speech_to_text 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
4
- data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
3
+ metadata.gz: ac78d515a0b4deadd4b5f1046f2e42edb7dfd0b1334e58b51367c1ec6b132f18
4
+ data.tar.gz: e72d38855046135e7d0126f9bf82ffddbed63688593a9ae2474515099805daff
5
5
  SHA512:
6
- metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
7
- data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
6
+ metadata.gz: '081da7bde03d0152012b1682d28395d89d6fe60b0f2ba420572d34738e01e46b2c0e8eed39ee64797093227235af89bd14c9da7f4b182aa35a7c35ca31b95896'
7
+ data.tar.gz: ae8fe4c2743dbb23a56aef3b5415bedf9cafddbf642f5d3b8e7f021c7a3986ca38e0c40ab0465da224976284f3322186b63b0e530ebcf5ba151d7a4887467169
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- speech_to_text (0.1.5)
4
+ speech_to_text (0.1.6)
5
5
  google-cloud-speech (= 0.35.0)
6
6
  google-cloud-storage (= 1.18.2)
7
7
  ibm_watson (~> 0.18.2)
@@ -18,8 +18,8 @@ module SpeechToText
18
18
  module MozillaDeepspeechS2T # rubocop:disable Style/Documentation
19
19
  include Util
20
20
 
21
- def self.create_job(audio, server_url, jobdetails_json)
22
- request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
21
+ def self.create_job(audio, server_url, jobdetails_json, api_key)
22
+ request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob/#{api_key}\" > #{jobdetails_json}"
23
23
 
24
24
  Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
25
25
  while line = stdout_err.gets
@@ -39,16 +39,32 @@ module SpeechToText
39
39
  data['job_id']
40
40
  end
41
41
 
42
- def self.checkstatus(job_id, server_url)
43
- uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}")
44
- response = Net::HTTP.get_response(uri)
42
+ def self.checkstatus(job_id, server_url, api_key)
43
+ uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}/#{api_key}")
44
+ request = Net::HTTP::Post.new(uri)
45
+
46
+ req_options = {
47
+ use_ssl: uri.scheme == "https",
48
+ }
49
+
50
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
51
+ http.request(request)
52
+ end
45
53
  data = JSON.load response.body
46
54
  data['status']
47
55
  end
48
56
 
49
- def self.order_transcript(job_id, server_url)
50
- uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}")
51
- response = Net::HTTP.get_response(uri)
57
+ def self.order_transcript(job_id, server_url, api_key)
58
+ uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}/#{api_key}")
59
+ request = Net::HTTP::Post.new(uri)
60
+
61
+ req_options = {
62
+ use_ssl: uri.scheme == "https",
63
+ }
64
+
65
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
66
+ http.request(request)
67
+ end
52
68
  data = JSON.load response.body
53
69
  data
54
70
  end
@@ -32,47 +32,60 @@ module SpeechToText
32
32
  "#{hh}:#{mm}:#{ss}"
33
33
  end
34
34
  # rubocop:enable Metrics/MethodLength
35
-
36
35
  # create and write the webvtt file
37
36
  # rubocop:disable Metrics/MethodLength
38
37
  def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
39
38
  vtt_file_name:,
40
- myarray:,
39
+ text_array:,
41
40
  start_time:)
41
+ # Array format
42
+ # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
43
+
44
+ # if we cut first few minutes from the audio then
45
+ # start time will be replaced instead of 0
46
+ start_time = start_time.to_i
42
47
 
43
- start_time = start_time.to_i
44
48
  filename = "#{vtt_file_path}/#{vtt_file_name}"
45
49
  file = File.open(filename, 'w')
46
- file.puts "WEBVTT\n\n"
47
-
48
- i = 0
49
- while i < myarray.length
50
-
51
- file.puts i / 30 + 1
52
- if i + 28 < myarray.length
53
- file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[i + 28] + start_time).to_i}"
54
- file.puts "#{myarray[i + 2]} #{myarray[i + 5]} #{myarray[i + 8]} #{myarray[i + 11]} #{myarray[i + 14]}"
55
- file.puts "#{myarray[i + 17]} #{myarray[i + 20]} #{myarray[i + 23]} #{myarray[i + 26]} #{myarray[i + 29]}\n\n"
56
- else
57
- remainder = myarray.length - i
58
- file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[myarray.length - 2] + start_time).to_i}"
59
- count = 0
60
- flag = true
61
- while count < remainder
62
- file.print "#{myarray[i + 2]} "
63
- if flag # rubocop:disable Metrics/BlockNesting
64
- # rubocop:disable Metrics/BlockNesting
65
- if count > 9
66
- file.print "\n"
67
- flag = false
68
- end
69
- # rubocop:enable Metrics/BlockNesting
70
- end
71
- i += 3
72
- count += 3
50
+ file.print "WEBVTT"
51
+
52
+ i = block_number = 0
53
+
54
+ #all the words are at position [2,5,8,11...]
55
+ word_index = 2
56
+
57
+ # one block will give total 10 words on screen at a time
58
+ # which contains total 30 index
59
+ # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
60
+ block_size = 30
61
+
62
+ # each block contains 10 words index range o to 29
63
+ # last end time will be at index = 28
64
+ end_timestamp = 28
65
+
66
+ # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1)
67
+ line_space_index = 17
68
+
69
+ while i < text_array.length
70
+
71
+ if i%3 == word_index #if index has word then print word
72
+ if i%block_size == line_space_index # if this is 6th word then print new line
73
+ file.puts
74
+ end
75
+ file.print "#{text_array[i]} "
76
+ elsif i%block_size == 0 #if index is 0,30,60... means starting a new block
77
+ block_number += 1
78
+ file.puts "\n\n"
79
+ file.puts block_number #print block number
80
+ file.print "#{seconds_to_timestamp(text_array[i] + start_time)} " #print start timestamps
81
+ if i + end_timestamp < text_array.length # End timestamp will be at 28th index in block of 30 indexes (10 words)
82
+ file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
83
+ else # For last block, there will not be total 30 indexes, so end timestamp will be second last index
84
+ file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
73
85
  end
86
+ else
74
87
  end
75
- i += 30
88
+ i += 1
76
89
  end
77
90
 
78
91
  file.close
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpeechToText
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.6'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech_to_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Alam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-21 00:00:00.000000000 Z
11
+ date: 2019-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler