speech_to_text 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
4
- data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
3
+ metadata.gz: ac78d515a0b4deadd4b5f1046f2e42edb7dfd0b1334e58b51367c1ec6b132f18
4
+ data.tar.gz: e72d38855046135e7d0126f9bf82ffddbed63688593a9ae2474515099805daff
5
5
  SHA512:
6
- metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
7
- data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
6
+ metadata.gz: '081da7bde03d0152012b1682d28395d89d6fe60b0f2ba420572d34738e01e46b2c0e8eed39ee64797093227235af89bd14c9da7f4b182aa35a7c35ca31b95896'
7
+ data.tar.gz: ae8fe4c2743dbb23a56aef3b5415bedf9cafddbf642f5d3b8e7f021c7a3986ca38e0c40ab0465da224976284f3322186b63b0e530ebcf5ba151d7a4887467169
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- speech_to_text (0.1.5)
4
+ speech_to_text (0.1.6)
5
5
  google-cloud-speech (= 0.35.0)
6
6
  google-cloud-storage (= 1.18.2)
7
7
  ibm_watson (~> 0.18.2)
@@ -18,8 +18,8 @@ module SpeechToText
18
18
  module MozillaDeepspeechS2T # rubocop:disable Style/Documentation
19
19
  include Util
20
20
 
21
- def self.create_job(audio, server_url, jobdetails_json)
22
- request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
21
+ def self.create_job(audio, server_url, jobdetails_json, api_key)
22
+ request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob/#{api_key}\" > #{jobdetails_json}"
23
23
 
24
24
  Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
25
25
  while line = stdout_err.gets
@@ -39,16 +39,32 @@ module SpeechToText
39
39
  data['job_id']
40
40
  end
41
41
 
42
- def self.checkstatus(job_id, server_url)
43
- uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}")
44
- response = Net::HTTP.get_response(uri)
42
+ def self.checkstatus(job_id, server_url, api_key)
43
+ uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}/#{api_key}")
44
+ request = Net::HTTP::Post.new(uri)
45
+
46
+ req_options = {
47
+ use_ssl: uri.scheme == "https",
48
+ }
49
+
50
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
51
+ http.request(request)
52
+ end
45
53
  data = JSON.load response.body
46
54
  data['status']
47
55
  end
48
56
 
49
- def self.order_transcript(job_id, server_url)
50
- uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}")
51
- response = Net::HTTP.get_response(uri)
57
+ def self.order_transcript(job_id, server_url, api_key)
58
+ uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}/#{api_key}")
59
+ request = Net::HTTP::Post.new(uri)
60
+
61
+ req_options = {
62
+ use_ssl: uri.scheme == "https",
63
+ }
64
+
65
+ response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
66
+ http.request(request)
67
+ end
52
68
  data = JSON.load response.body
53
69
  data
54
70
  end
@@ -32,47 +32,60 @@ module SpeechToText
32
32
  "#{hh}:#{mm}:#{ss}"
33
33
  end
34
34
  # rubocop:enable Metrics/MethodLength
35
-
36
35
  # create and write the webvtt file
37
36
  # rubocop:disable Metrics/MethodLength
38
37
  def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
39
38
  vtt_file_name:,
40
- myarray:,
39
+ text_array:,
41
40
  start_time:)
41
+ # Array format
42
+ # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
43
+
44
+ # if we cut first few minutes from the audio then
45
+ # start time will be replaced instead of 0
46
+ start_time = start_time.to_i
42
47
 
43
- start_time = start_time.to_i
44
48
  filename = "#{vtt_file_path}/#{vtt_file_name}"
45
49
  file = File.open(filename, 'w')
46
- file.puts "WEBVTT\n\n"
47
-
48
- i = 0
49
- while i < myarray.length
50
-
51
- file.puts i / 30 + 1
52
- if i + 28 < myarray.length
53
- file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[i + 28] + start_time).to_i}"
54
- file.puts "#{myarray[i + 2]} #{myarray[i + 5]} #{myarray[i + 8]} #{myarray[i + 11]} #{myarray[i + 14]}"
55
- file.puts "#{myarray[i + 17]} #{myarray[i + 20]} #{myarray[i + 23]} #{myarray[i + 26]} #{myarray[i + 29]}\n\n"
56
- else
57
- remainder = myarray.length - i
58
- file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[myarray.length - 2] + start_time).to_i}"
59
- count = 0
60
- flag = true
61
- while count < remainder
62
- file.print "#{myarray[i + 2]} "
63
- if flag # rubocop:disable Metrics/BlockNesting
64
- # rubocop:disable Metrics/BlockNesting
65
- if count > 9
66
- file.print "\n"
67
- flag = false
68
- end
69
- # rubocop:enable Metrics/BlockNesting
70
- end
71
- i += 3
72
- count += 3
50
+ file.print "WEBVTT"
51
+
52
+ i = block_number = 0
53
+
54
+ #all the words are at position [2,5,8,11...]
55
+ word_index = 2
56
+
57
+ # one block will give total 10 words on screen at a time
58
+ # which contains total 30 index
59
+ # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
60
+ block_size = 30
61
+
62
+ # each block contains 10 words index range o to 29
63
+ # last end time will be at index = 28
64
+ end_timestamp = 28
65
+
66
+ # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1)
67
+ line_space_index = 17
68
+
69
+ while i < text_array.length
70
+
71
+ if i%3 == word_index #if index has word then print word
72
+ if i%block_size == line_space_index # if this is 6th word then print new line
73
+ file.puts
74
+ end
75
+ file.print "#{text_array[i]} "
76
+ elsif i%block_size == 0 #if index is 0,30,60... means starting a new block
77
+ block_number += 1
78
+ file.puts "\n\n"
79
+ file.puts block_number #print block number
80
+ file.print "#{seconds_to_timestamp(text_array[i] + start_time)} " #print start timestamps
81
+ if i + end_timestamp < text_array.length # End timestamp will be at 28th index in block of 30 indexes (10 words)
82
+ file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
83
+ else # For last block, there will not be total 30 indexes, so end timestamp will be second last index
84
+ file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
73
85
  end
86
+ else
74
87
  end
75
- i += 30
88
+ i += 1
76
89
  end
77
90
 
78
91
  file.close
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpeechToText
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.6'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech_to_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Alam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-21 00:00:00.000000000 Z
11
+ date: 2019-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler