speech_to_text 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/lib/speech_to_text/3playmedia.rb +13 -1
- data/lib/speech_to_text/deepspeech.rb +29 -2
- data/lib/speech_to_text/google.rb +5 -8
- data/lib/speech_to_text/speechmatics.rb +15 -1
- data/lib/speech_to_text/util.rb +22 -8
- data/lib/speech_to_text/version.rb +1 -1
- metadata +2 -4
- data/abc.mp3 +0 -0
- data/webcams.webm +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
|
4
|
+
data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
|
7
|
+
data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -79,7 +79,7 @@ bucket_name could be any string
|
|
79
79
|
|
80
80
|
file = SpeechToText::GoogleS2T.google_storage(audio_file_path,audio_name,audio_content_type,bucket_name)
|
81
81
|
operation_name = SpeechToText::GoogleS2T.create_job(audio_name,audio_content_type,bucket_name,language_code)
|
82
|
-
data = SpeechToText::GoogleS2T.
|
82
|
+
data = SpeechToText::GoogleS2T.check_status(operation_name)
|
83
83
|
myarray = SpeechToText::GoogleS2T.create_array_google(data["results"])
|
84
84
|
SpeechToText::GoogleS2T.delete_google_storage(bucket_name,audio_name,audio_content_type)
|
85
85
|
```
|
@@ -11,12 +11,24 @@
|
|
11
11
|
require 'json'
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
|
+
require 'open3'
|
14
15
|
|
15
16
|
module SpeechToText
|
16
17
|
module ThreePlaymediaS2T # rubocop:disable Style/Documentation
|
17
18
|
def self.create_job(api_key, audio_file, name, create_job_file)
|
18
19
|
cretae_job_command = "curl -X POST -F \"source_file=@#{audio_file}\" \"https://api.3playmedia.com/v3/files?api_key=#{api_key}&language_id=1&name=#{name}\" > #{create_job_file}"
|
19
|
-
|
20
|
+
Open3.popen2e(cretae_job_command) do |stdin, stdout_err, wait_thr|
|
21
|
+
while line = stdout_err.gets
|
22
|
+
puts "#{line}"
|
23
|
+
end
|
24
|
+
|
25
|
+
exit_status = wait_thr.value
|
26
|
+
unless exit_status.success?
|
27
|
+
puts '---------------------'
|
28
|
+
puts "FAILED to execute --> #{cretae_job_command}"
|
29
|
+
puts '---------------------'
|
30
|
+
end
|
31
|
+
end
|
20
32
|
file = File.open(create_job_file, 'r')
|
21
33
|
response = JSON.load file
|
22
34
|
job_id = response['data']['id']
|
@@ -11,6 +11,7 @@
|
|
11
11
|
require 'json'
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
|
+
require 'open3'
|
14
15
|
require_relative 'util.rb'
|
15
16
|
|
16
17
|
module SpeechToText
|
@@ -19,7 +20,20 @@ module SpeechToText
|
|
19
20
|
|
20
21
|
def self.create_job(audio, server_url, jobdetails_json)
|
21
22
|
request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
|
22
|
-
|
23
|
+
|
24
|
+
Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
|
25
|
+
while line = stdout_err.gets
|
26
|
+
puts "#{line}"
|
27
|
+
end
|
28
|
+
|
29
|
+
exit_status = wait_thr.value
|
30
|
+
unless exit_status.success?
|
31
|
+
puts '---------------------'
|
32
|
+
puts "FAILED to execute --> #{request}"
|
33
|
+
puts '---------------------'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
23
37
|
file = File.open(jobdetails_json, 'r')
|
24
38
|
data = JSON.load file
|
25
39
|
data['job_id']
|
@@ -42,7 +56,20 @@ module SpeechToText
|
|
42
56
|
# used by deepspeech server only
|
43
57
|
def self.generate_transcript(audio, json_file, model_path)
|
44
58
|
deepspeech_command = "#{model_path}/deepspeech --model #{model_path}/models/output_graph.pbmm --alphabet #{model_path}/models/alphabet.txt --lm #{model_path}/models/lm.binary --trie #{model_path}/models/trie -e --audio #{audio} > #{json_file}"
|
45
|
-
|
59
|
+
|
60
|
+
Open3.popen2e(deepspeech_command) do |stdin, stdout_err, wait_thr|
|
61
|
+
while line = stdout_err.gets
|
62
|
+
puts "#{line}"
|
63
|
+
end
|
64
|
+
|
65
|
+
exit_status = wait_thr.value
|
66
|
+
unless exit_status.success?
|
67
|
+
puts '---------------------'
|
68
|
+
puts "FAILED to execute --> #{deepspeech_command}"
|
69
|
+
puts '---------------------'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
46
73
|
end
|
47
74
|
|
48
75
|
# rubocop:disable Metrics/MethodLength
|
@@ -70,18 +70,15 @@ module SpeechToText
|
|
70
70
|
# construct a new operation object from the id
|
71
71
|
speech = Google::Cloud::Speech.new(version: :v1p1beta1)
|
72
72
|
operation2 = speech.get_operation operation_name
|
73
|
+
status = 'not found'
|
73
74
|
if operation2.error?
|
74
75
|
status = 'failed'
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if operation2.done?
|
76
|
+
elsif operation2.done?
|
79
77
|
status = 'completed'
|
80
|
-
|
78
|
+
else
|
79
|
+
status = 'inProgress'
|
81
80
|
end
|
82
|
-
|
83
|
-
status = 'inProgress'
|
84
|
-
status
|
81
|
+
return status
|
85
82
|
end
|
86
83
|
|
87
84
|
def self.get_words(operation_name)
|
@@ -12,6 +12,7 @@ require_relative 'util.rb'
|
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
14
|
require 'json'
|
15
|
+
require 'open3'
|
15
16
|
|
16
17
|
module SpeechToText
|
17
18
|
module SpeechmaticsS2T # rubocop:disable Style/Documentation
|
@@ -24,7 +25,20 @@ module SpeechToText
|
|
24
25
|
# rubocop:enable Metrics/ParameterLists
|
25
26
|
# rubocop:enable Naming/VariableName
|
26
27
|
upload_audio = "curl -F data_file=@#{audio_file_path}/#{audio_name}.#{audio_content_type} -F model=#{model} \"https://api.speechmatics.com/v1.0/user/#{userID}/jobs/?auth_token=#{authKey}\" > #{jobID_json_file}"
|
27
|
-
|
28
|
+
|
29
|
+
Open3.popen2e(upload_audio) do |stdin, stdout_err, wait_thr|
|
30
|
+
while line = stdout_err.gets
|
31
|
+
puts "#{line}"
|
32
|
+
end
|
33
|
+
|
34
|
+
exit_status = wait_thr.value
|
35
|
+
unless exit_status.success?
|
36
|
+
puts '---------------------'
|
37
|
+
puts "FAILED to execute --> #{upload_audio}"
|
38
|
+
puts '---------------------'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
28
42
|
file = File.open(jobID_json_file)
|
29
43
|
data = JSON.load file
|
30
44
|
jobID = data['id'] # rubocop:disable Naming/VariableName
|
data/lib/speech_to_text/util.rb
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#
|
9
9
|
# Copyright (c) 2019 BigBlueButton Inc. and by respective authors (see below).
|
10
10
|
#
|
11
|
+
require 'open3'
|
11
12
|
|
12
13
|
module SpeechToText
|
13
14
|
module Util # rubocop:disable Style/Documentation
|
@@ -122,21 +123,34 @@ module SpeechToText
|
|
122
123
|
audio_content_type:,
|
123
124
|
**duration)
|
124
125
|
# rubocop:enable Metrics/ParameterLists
|
125
|
-
|
126
|
+
video_to_audio_command = ''
|
126
127
|
if duration.empty?
|
127
128
|
video_to_audio_command = "ffmpeg -y -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
128
|
-
system(video_to_audio_command.to_s)
|
129
129
|
elsif duration[:start_time].nil? && duration[:end_time] != nil
|
130
130
|
video_to_audio_command = "ffmpeg -y -ss #{0.to_i} -i #{video_file_path}/#{video_name}.#{video_content_type} -t #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
131
|
-
system(video_to_audio_command.to_s)
|
132
131
|
elsif duration[:end_time].nil? && duration[:start_time] != nil
|
133
132
|
video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
134
|
-
system(video_to_audio_command.to_s)
|
135
133
|
else
|
136
|
-
video_to_audio_command = "ffmpeg -y -
|
137
|
-
|
138
|
-
|
139
|
-
|
134
|
+
video_to_audio_command = "ffmpeg -y -t #{duration[:end_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ss #{duration[:start_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
135
|
+
end
|
136
|
+
|
137
|
+
Open3.popen2e(video_to_audio_command) do |stdin, stdout_err, wait_thr|
|
138
|
+
while line = stdout_err.gets
|
139
|
+
puts "#{line}"
|
140
|
+
end
|
141
|
+
|
142
|
+
exit_status = wait_thr.value
|
143
|
+
unless exit_status.success?
|
144
|
+
puts '---------------------'
|
145
|
+
puts "FAILED to execute --> #{video_to_audio_command}"
|
146
|
+
puts '---------------------'
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
#Open3.popen3(video_to_audio_command.to_s) do |stdin, stdout, stderr, wait_thr|
|
151
|
+
# puts "stdout is:" + stdout.read
|
152
|
+
# puts "stderr is:" + stderr.read
|
153
|
+
#end
|
140
154
|
end
|
141
155
|
end
|
142
156
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speech_to_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Alam
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,7 +94,6 @@ files:
|
|
94
94
|
- LICENSE.txt
|
95
95
|
- README.md
|
96
96
|
- Rakefile
|
97
|
-
- abc.mp3
|
98
97
|
- bin/console
|
99
98
|
- bin/setup
|
100
99
|
- examples/3playmedia.rb
|
@@ -113,7 +112,6 @@ files:
|
|
113
112
|
- resources/test/test.json
|
114
113
|
- resources/test/video/video.mp4
|
115
114
|
- speech_to_text.gemspec
|
116
|
-
- webcams.webm
|
117
115
|
homepage: https://github.com/bigbluebutton/speech_to_text
|
118
116
|
licenses:
|
119
117
|
- MIT
|
data/abc.mp3
DELETED
Binary file
|
data/webcams.webm
DELETED
Binary file
|