speech_to_text 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/lib/speech_to_text/3playmedia.rb +13 -1
- data/lib/speech_to_text/deepspeech.rb +29 -2
- data/lib/speech_to_text/google.rb +5 -8
- data/lib/speech_to_text/speechmatics.rb +15 -1
- data/lib/speech_to_text/util.rb +22 -8
- data/lib/speech_to_text/version.rb +1 -1
- metadata +2 -4
- data/abc.mp3 +0 -0
- data/webcams.webm +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
|
4
|
+
data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
|
7
|
+
data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -79,7 +79,7 @@ bucket_name could be any string
|
|
79
79
|
|
80
80
|
file = SpeechToText::GoogleS2T.google_storage(audio_file_path,audio_name,audio_content_type,bucket_name)
|
81
81
|
operation_name = SpeechToText::GoogleS2T.create_job(audio_name,audio_content_type,bucket_name,language_code)
|
82
|
-
data = SpeechToText::GoogleS2T.
|
82
|
+
data = SpeechToText::GoogleS2T.check_status(operation_name)
|
83
83
|
myarray = SpeechToText::GoogleS2T.create_array_google(data["results"])
|
84
84
|
SpeechToText::GoogleS2T.delete_google_storage(bucket_name,audio_name,audio_content_type)
|
85
85
|
```
|
@@ -11,12 +11,24 @@
|
|
11
11
|
require 'json'
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
|
+
require 'open3'
|
14
15
|
|
15
16
|
module SpeechToText
|
16
17
|
module ThreePlaymediaS2T # rubocop:disable Style/Documentation
|
17
18
|
def self.create_job(api_key, audio_file, name, create_job_file)
|
18
19
|
cretae_job_command = "curl -X POST -F \"source_file=@#{audio_file}\" \"https://api.3playmedia.com/v3/files?api_key=#{api_key}&language_id=1&name=#{name}\" > #{create_job_file}"
|
19
|
-
|
20
|
+
Open3.popen2e(cretae_job_command) do |stdin, stdout_err, wait_thr|
|
21
|
+
while line = stdout_err.gets
|
22
|
+
puts "#{line}"
|
23
|
+
end
|
24
|
+
|
25
|
+
exit_status = wait_thr.value
|
26
|
+
unless exit_status.success?
|
27
|
+
puts '---------------------'
|
28
|
+
puts "FAILED to execute --> #{cretae_job_command}"
|
29
|
+
puts '---------------------'
|
30
|
+
end
|
31
|
+
end
|
20
32
|
file = File.open(create_job_file, 'r')
|
21
33
|
response = JSON.load file
|
22
34
|
job_id = response['data']['id']
|
@@ -11,6 +11,7 @@
|
|
11
11
|
require 'json'
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
|
+
require 'open3'
|
14
15
|
require_relative 'util.rb'
|
15
16
|
|
16
17
|
module SpeechToText
|
@@ -19,7 +20,20 @@ module SpeechToText
|
|
19
20
|
|
20
21
|
def self.create_job(audio, server_url, jobdetails_json)
|
21
22
|
request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
|
22
|
-
|
23
|
+
|
24
|
+
Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
|
25
|
+
while line = stdout_err.gets
|
26
|
+
puts "#{line}"
|
27
|
+
end
|
28
|
+
|
29
|
+
exit_status = wait_thr.value
|
30
|
+
unless exit_status.success?
|
31
|
+
puts '---------------------'
|
32
|
+
puts "FAILED to execute --> #{request}"
|
33
|
+
puts '---------------------'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
23
37
|
file = File.open(jobdetails_json, 'r')
|
24
38
|
data = JSON.load file
|
25
39
|
data['job_id']
|
@@ -42,7 +56,20 @@ module SpeechToText
|
|
42
56
|
# used by deepspeech server only
|
43
57
|
def self.generate_transcript(audio, json_file, model_path)
|
44
58
|
deepspeech_command = "#{model_path}/deepspeech --model #{model_path}/models/output_graph.pbmm --alphabet #{model_path}/models/alphabet.txt --lm #{model_path}/models/lm.binary --trie #{model_path}/models/trie -e --audio #{audio} > #{json_file}"
|
45
|
-
|
59
|
+
|
60
|
+
Open3.popen2e(deepspeech_command) do |stdin, stdout_err, wait_thr|
|
61
|
+
while line = stdout_err.gets
|
62
|
+
puts "#{line}"
|
63
|
+
end
|
64
|
+
|
65
|
+
exit_status = wait_thr.value
|
66
|
+
unless exit_status.success?
|
67
|
+
puts '---------------------'
|
68
|
+
puts "FAILED to execute --> #{deepspeech_command}"
|
69
|
+
puts '---------------------'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
46
73
|
end
|
47
74
|
|
48
75
|
# rubocop:disable Metrics/MethodLength
|
@@ -70,18 +70,15 @@ module SpeechToText
|
|
70
70
|
# construct a new operation object from the id
|
71
71
|
speech = Google::Cloud::Speech.new(version: :v1p1beta1)
|
72
72
|
operation2 = speech.get_operation operation_name
|
73
|
+
status = 'not found'
|
73
74
|
if operation2.error?
|
74
75
|
status = 'failed'
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if operation2.done?
|
76
|
+
elsif operation2.done?
|
79
77
|
status = 'completed'
|
80
|
-
|
78
|
+
else
|
79
|
+
status = 'inProgress'
|
81
80
|
end
|
82
|
-
|
83
|
-
status = 'inProgress'
|
84
|
-
status
|
81
|
+
return status
|
85
82
|
end
|
86
83
|
|
87
84
|
def self.get_words(operation_name)
|
@@ -12,6 +12,7 @@ require_relative 'util.rb'
|
|
12
12
|
require 'net/http'
|
13
13
|
require 'uri'
|
14
14
|
require 'json'
|
15
|
+
require 'open3'
|
15
16
|
|
16
17
|
module SpeechToText
|
17
18
|
module SpeechmaticsS2T # rubocop:disable Style/Documentation
|
@@ -24,7 +25,20 @@ module SpeechToText
|
|
24
25
|
# rubocop:enable Metrics/ParameterLists
|
25
26
|
# rubocop:enable Naming/VariableName
|
26
27
|
upload_audio = "curl -F data_file=@#{audio_file_path}/#{audio_name}.#{audio_content_type} -F model=#{model} \"https://api.speechmatics.com/v1.0/user/#{userID}/jobs/?auth_token=#{authKey}\" > #{jobID_json_file}"
|
27
|
-
|
28
|
+
|
29
|
+
Open3.popen2e(upload_audio) do |stdin, stdout_err, wait_thr|
|
30
|
+
while line = stdout_err.gets
|
31
|
+
puts "#{line}"
|
32
|
+
end
|
33
|
+
|
34
|
+
exit_status = wait_thr.value
|
35
|
+
unless exit_status.success?
|
36
|
+
puts '---------------------'
|
37
|
+
puts "FAILED to execute --> #{upload_audio}"
|
38
|
+
puts '---------------------'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
28
42
|
file = File.open(jobID_json_file)
|
29
43
|
data = JSON.load file
|
30
44
|
jobID = data['id'] # rubocop:disable Naming/VariableName
|
data/lib/speech_to_text/util.rb
CHANGED
@@ -8,6 +8,7 @@
|
|
8
8
|
#
|
9
9
|
# Copyright (c) 2019 BigBlueButton Inc. and by respective authors (see below).
|
10
10
|
#
|
11
|
+
require 'open3'
|
11
12
|
|
12
13
|
module SpeechToText
|
13
14
|
module Util # rubocop:disable Style/Documentation
|
@@ -122,21 +123,34 @@ module SpeechToText
|
|
122
123
|
audio_content_type:,
|
123
124
|
**duration)
|
124
125
|
# rubocop:enable Metrics/ParameterLists
|
125
|
-
|
126
|
+
video_to_audio_command = ''
|
126
127
|
if duration.empty?
|
127
128
|
video_to_audio_command = "ffmpeg -y -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
128
|
-
system(video_to_audio_command.to_s)
|
129
129
|
elsif duration[:start_time].nil? && duration[:end_time] != nil
|
130
130
|
video_to_audio_command = "ffmpeg -y -ss #{0.to_i} -i #{video_file_path}/#{video_name}.#{video_content_type} -t #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
131
|
-
system(video_to_audio_command.to_s)
|
132
131
|
elsif duration[:end_time].nil? && duration[:start_time] != nil
|
133
132
|
video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
134
|
-
system(video_to_audio_command.to_s)
|
135
133
|
else
|
136
|
-
video_to_audio_command = "ffmpeg -y -
|
137
|
-
|
138
|
-
|
139
|
-
|
134
|
+
video_to_audio_command = "ffmpeg -y -t #{duration[:end_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ss #{duration[:start_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
|
135
|
+
end
|
136
|
+
|
137
|
+
Open3.popen2e(video_to_audio_command) do |stdin, stdout_err, wait_thr|
|
138
|
+
while line = stdout_err.gets
|
139
|
+
puts "#{line}"
|
140
|
+
end
|
141
|
+
|
142
|
+
exit_status = wait_thr.value
|
143
|
+
unless exit_status.success?
|
144
|
+
puts '---------------------'
|
145
|
+
puts "FAILED to execute --> #{video_to_audio_command}"
|
146
|
+
puts '---------------------'
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
#Open3.popen3(video_to_audio_command.to_s) do |stdin, stdout, stderr, wait_thr|
|
151
|
+
# puts "stdout is:" + stdout.read
|
152
|
+
# puts "stderr is:" + stderr.read
|
153
|
+
#end
|
140
154
|
end
|
141
155
|
end
|
142
156
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speech_to_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Alam
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,7 +94,6 @@ files:
|
|
94
94
|
- LICENSE.txt
|
95
95
|
- README.md
|
96
96
|
- Rakefile
|
97
|
-
- abc.mp3
|
98
97
|
- bin/console
|
99
98
|
- bin/setup
|
100
99
|
- examples/3playmedia.rb
|
@@ -113,7 +112,6 @@ files:
|
|
113
112
|
- resources/test/test.json
|
114
113
|
- resources/test/video/video.mp4
|
115
114
|
- speech_to_text.gemspec
|
116
|
-
- webcams.webm
|
117
115
|
homepage: https://github.com/bigbluebutton/speech_to_text
|
118
116
|
licenses:
|
119
117
|
- MIT
|
data/abc.mp3
DELETED
Binary file
|
data/webcams.webm
DELETED
Binary file
|