speech_to_text 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0601f9186723489cc821a5afe961b94925734a73586f134b43cd8c28ce6c4e68
4
- data.tar.gz: 7f7fd338e9255e5357a6f6a684131559b386f8377617f33ede8c1b90c8d777de
3
+ metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
4
+ data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
5
5
  SHA512:
6
- metadata.gz: e1f01caf754a1a015c7c82a2d8bf0e3112a7224859cbbf527d977a84cfc60b407cb9708cc756d63ebc175650282c64fb2f10c1de878aa7c38973dc392e492263
7
- data.tar.gz: 3a9ddd6b49011df32d52d50b4f3c83b48c78e11b3e0fac458cb1e75231769e86fa1b4600735cd56d1bd8676b1987f4a605aa936dfc2832af5288d49d5c92f7cd
6
+ metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
7
+ data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
data/.gitignore CHANGED
@@ -7,3 +7,8 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  *.gem
10
+ *.mp3
11
+ *.mp4
12
+ *.webm
13
+ *.wav
14
+ *.flac
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- speech_to_text (0.1.4)
4
+ speech_to_text (0.1.5)
5
5
  google-cloud-speech (= 0.35.0)
6
6
  google-cloud-storage (= 1.18.2)
7
7
  ibm_watson (~> 0.18.2)
data/README.md CHANGED
@@ -79,7 +79,7 @@ bucket_name could be any string
79
79
 
80
80
  file = SpeechToText::GoogleS2T.google_storage(audio_file_path,audio_name,audio_content_type,bucket_name)
81
81
  operation_name = SpeechToText::GoogleS2T.create_job(audio_name,audio_content_type,bucket_name,language_code)
82
- data = SpeechToText::GoogleS2T.check_job(operation_name)
82
+ data = SpeechToText::GoogleS2T.check_status(operation_name)
83
83
  myarray = SpeechToText::GoogleS2T.create_array_google(data["results"])
84
84
  SpeechToText::GoogleS2T.delete_google_storage(bucket_name,audio_name,audio_content_type)
85
85
  ```
@@ -11,12 +11,24 @@
11
11
  require 'json'
12
12
  require 'net/http'
13
13
  require 'uri'
14
+ require 'open3'
14
15
 
15
16
  module SpeechToText
16
17
  module ThreePlaymediaS2T # rubocop:disable Style/Documentation
17
18
  def self.create_job(api_key, audio_file, name, create_job_file)
18
19
  cretae_job_command = "curl -X POST -F \"source_file=@#{audio_file}\" \"https://api.3playmedia.com/v3/files?api_key=#{api_key}&language_id=1&name=#{name}\" > #{create_job_file}"
19
- system(cretae_job_command)
20
+ Open3.popen2e(cretae_job_command) do |stdin, stdout_err, wait_thr|
21
+ while line = stdout_err.gets
22
+ puts "#{line}"
23
+ end
24
+
25
+ exit_status = wait_thr.value
26
+ unless exit_status.success?
27
+ puts '---------------------'
28
+ puts "FAILED to execute --> #{cretae_job_command}"
29
+ puts '---------------------'
30
+ end
31
+ end
20
32
  file = File.open(create_job_file, 'r')
21
33
  response = JSON.load file
22
34
  job_id = response['data']['id']
@@ -11,6 +11,7 @@
11
11
  require 'json'
12
12
  require 'net/http'
13
13
  require 'uri'
14
+ require 'open3'
14
15
  require_relative 'util.rb'
15
16
 
16
17
  module SpeechToText
@@ -19,7 +20,20 @@ module SpeechToText
19
20
 
20
21
  def self.create_job(audio, server_url, jobdetails_json)
21
22
  request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
22
- system(request)
23
+
24
+ Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
25
+ while line = stdout_err.gets
26
+ puts "#{line}"
27
+ end
28
+
29
+ exit_status = wait_thr.value
30
+ unless exit_status.success?
31
+ puts '---------------------'
32
+ puts "FAILED to execute --> #{request}"
33
+ puts '---------------------'
34
+ end
35
+ end
36
+
23
37
  file = File.open(jobdetails_json, 'r')
24
38
  data = JSON.load file
25
39
  data['job_id']
@@ -42,7 +56,20 @@ module SpeechToText
42
56
  # used by deepspeech server only
43
57
  def self.generate_transcript(audio, json_file, model_path)
44
58
  deepspeech_command = "#{model_path}/deepspeech --model #{model_path}/models/output_graph.pbmm --alphabet #{model_path}/models/alphabet.txt --lm #{model_path}/models/lm.binary --trie #{model_path}/models/trie -e --audio #{audio} > #{json_file}"
45
- system(deepspeech_command.to_s)
59
+
60
+ Open3.popen2e(deepspeech_command) do |stdin, stdout_err, wait_thr|
61
+ while line = stdout_err.gets
62
+ puts "#{line}"
63
+ end
64
+
65
+ exit_status = wait_thr.value
66
+ unless exit_status.success?
67
+ puts '---------------------'
68
+ puts "FAILED to execute --> #{deepspeech_command}"
69
+ puts '---------------------'
70
+ end
71
+ end
72
+
46
73
  end
47
74
 
48
75
  # rubocop:disable Metrics/MethodLength
@@ -70,18 +70,15 @@ module SpeechToText
70
70
  # construct a new operation object from the id
71
71
  speech = Google::Cloud::Speech.new(version: :v1p1beta1)
72
72
  operation2 = speech.get_operation operation_name
73
+ status = 'not found'
73
74
  if operation2.error?
74
75
  status = 'failed'
75
- return status
76
- end
77
-
78
- if operation2.done?
76
+ elsif operation2.done?
79
77
  status = 'completed'
80
- return status
78
+ else
79
+ status = 'inProgress'
81
80
  end
82
-
83
- status = 'inProgress'
84
- status
81
+ return status
85
82
  end
86
83
 
87
84
  def self.get_words(operation_name)
@@ -12,6 +12,7 @@ require_relative 'util.rb'
12
12
  require 'net/http'
13
13
  require 'uri'
14
14
  require 'json'
15
+ require 'open3'
15
16
 
16
17
  module SpeechToText
17
18
  module SpeechmaticsS2T # rubocop:disable Style/Documentation
@@ -24,7 +25,20 @@ module SpeechToText
24
25
  # rubocop:enable Metrics/ParameterLists
25
26
  # rubocop:enable Naming/VariableName
26
27
  upload_audio = "curl -F data_file=@#{audio_file_path}/#{audio_name}.#{audio_content_type} -F model=#{model} \"https://api.speechmatics.com/v1.0/user/#{userID}/jobs/?auth_token=#{authKey}\" > #{jobID_json_file}"
27
- system(upload_audio.to_s)
28
+
29
+ Open3.popen2e(upload_audio) do |stdin, stdout_err, wait_thr|
30
+ while line = stdout_err.gets
31
+ puts "#{line}"
32
+ end
33
+
34
+ exit_status = wait_thr.value
35
+ unless exit_status.success?
36
+ puts '---------------------'
37
+ puts "FAILED to execute --> #{upload_audio}"
38
+ puts '---------------------'
39
+ end
40
+ end
41
+
28
42
  file = File.open(jobID_json_file)
29
43
  data = JSON.load file
30
44
  jobID = data['id'] # rubocop:disable Naming/VariableName
@@ -8,6 +8,7 @@
8
8
  #
9
9
  # Copyright (c) 2019 BigBlueButton Inc. and by respective authors (see below).
10
10
  #
11
+ require 'open3'
11
12
 
12
13
  module SpeechToText
13
14
  module Util # rubocop:disable Style/Documentation
@@ -122,21 +123,34 @@ module SpeechToText
122
123
  audio_content_type:,
123
124
  **duration)
124
125
  # rubocop:enable Metrics/ParameterLists
125
-
126
+ video_to_audio_command = ''
126
127
  if duration.empty?
127
128
  video_to_audio_command = "ffmpeg -y -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
128
- system(video_to_audio_command.to_s)
129
129
  elsif duration[:start_time].nil? && duration[:end_time] != nil
130
130
  video_to_audio_command = "ffmpeg -y -ss #{0.to_i} -i #{video_file_path}/#{video_name}.#{video_content_type} -t #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
131
- system(video_to_audio_command.to_s)
132
131
  elsif duration[:end_time].nil? && duration[:start_time] != nil
133
132
  video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
134
- system(video_to_audio_command.to_s)
135
133
  else
136
- video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -to #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
137
- system(video_to_audio_command.to_s)
138
- end
139
-
134
+ video_to_audio_command = "ffmpeg -y -t #{duration[:end_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ss #{duration[:start_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
135
+ end
136
+
137
+ Open3.popen2e(video_to_audio_command) do |stdin, stdout_err, wait_thr|
138
+ while line = stdout_err.gets
139
+ puts "#{line}"
140
+ end
141
+
142
+ exit_status = wait_thr.value
143
+ unless exit_status.success?
144
+ puts '---------------------'
145
+ puts "FAILED to execute --> #{video_to_audio_command}"
146
+ puts '---------------------'
147
+ end
148
+ end
149
+
150
+ #Open3.popen3(video_to_audio_command.to_s) do |stdin, stdout, stderr, wait_thr|
151
+ # puts "stdout is:" + stdout.read
152
+ # puts "stderr is:" + stderr.read
153
+ #end
140
154
  end
141
155
  end
142
156
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SpeechToText
4
- VERSION = '0.1.4'
4
+ VERSION = '0.1.5'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech_to_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Alam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-20 00:00:00.000000000 Z
11
+ date: 2019-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,7 +94,6 @@ files:
94
94
  - LICENSE.txt
95
95
  - README.md
96
96
  - Rakefile
97
- - abc.mp3
98
97
  - bin/console
99
98
  - bin/setup
100
99
  - examples/3playmedia.rb
@@ -113,7 +112,6 @@ files:
113
112
  - resources/test/test.json
114
113
  - resources/test/video/video.mp4
115
114
  - speech_to_text.gemspec
116
- - webcams.webm
117
115
  homepage: https://github.com/bigbluebutton/speech_to_text
118
116
  licenses:
119
117
  - MIT
data/abc.mp3 DELETED
Binary file
data/webcams.webm DELETED
Binary file