speech_to_text 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/speech_to_text/deepspeech.rb +24 -8
- data/lib/speech_to_text/util.rb +44 -31
- data/lib/speech_to_text/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac78d515a0b4deadd4b5f1046f2e42edb7dfd0b1334e58b51367c1ec6b132f18
|
4
|
+
data.tar.gz: e72d38855046135e7d0126f9bf82ffddbed63688593a9ae2474515099805daff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '081da7bde03d0152012b1682d28395d89d6fe60b0f2ba420572d34738e01e46b2c0e8eed39ee64797093227235af89bd14c9da7f4b182aa35a7c35ca31b95896'
|
7
|
+
data.tar.gz: ae8fe4c2743dbb23a56aef3b5415bedf9cafddbf642f5d3b8e7f021c7a3986ca38e0c40ab0465da224976284f3322186b63b0e530ebcf5ba151d7a4887467169
|
data/Gemfile.lock
CHANGED
@@ -18,8 +18,8 @@ module SpeechToText
|
|
18
18
|
module MozillaDeepspeechS2T # rubocop:disable Style/Documentation
|
19
19
|
include Util
|
20
20
|
|
21
|
-
def self.create_job(audio, server_url, jobdetails_json)
|
22
|
-
request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
|
21
|
+
def self.create_job(audio, server_url, jobdetails_json, api_key)
|
22
|
+
request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob/#{api_key}\" > #{jobdetails_json}"
|
23
23
|
|
24
24
|
Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
|
25
25
|
while line = stdout_err.gets
|
@@ -39,16 +39,32 @@ module SpeechToText
|
|
39
39
|
data['job_id']
|
40
40
|
end
|
41
41
|
|
42
|
-
def self.checkstatus(job_id, server_url)
|
43
|
-
uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}")
|
44
|
-
|
42
|
+
def self.checkstatus(job_id, server_url, api_key)
|
43
|
+
uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}/#{api_key}")
|
44
|
+
request = Net::HTTP::Post.new(uri)
|
45
|
+
|
46
|
+
req_options = {
|
47
|
+
use_ssl: uri.scheme == "https",
|
48
|
+
}
|
49
|
+
|
50
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
|
51
|
+
http.request(request)
|
52
|
+
end
|
45
53
|
data = JSON.load response.body
|
46
54
|
data['status']
|
47
55
|
end
|
48
56
|
|
49
|
-
def self.order_transcript(job_id, server_url)
|
50
|
-
uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}")
|
51
|
-
|
57
|
+
def self.order_transcript(job_id, server_url, api_key)
|
58
|
+
uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}/#{api_key}")
|
59
|
+
request = Net::HTTP::Post.new(uri)
|
60
|
+
|
61
|
+
req_options = {
|
62
|
+
use_ssl: uri.scheme == "https",
|
63
|
+
}
|
64
|
+
|
65
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
|
66
|
+
http.request(request)
|
67
|
+
end
|
52
68
|
data = JSON.load response.body
|
53
69
|
data
|
54
70
|
end
|
data/lib/speech_to_text/util.rb
CHANGED
@@ -32,47 +32,60 @@ module SpeechToText
|
|
32
32
|
"#{hh}:#{mm}:#{ss}"
|
33
33
|
end
|
34
34
|
# rubocop:enable Metrics/MethodLength
|
35
|
-
|
36
35
|
# create and write the webvtt file
|
37
36
|
# rubocop:disable Metrics/MethodLength
|
38
37
|
def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
|
39
38
|
vtt_file_name:,
|
40
|
-
|
39
|
+
text_array:,
|
41
40
|
start_time:)
|
41
|
+
# Array format
|
42
|
+
# text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
|
43
|
+
|
44
|
+
# if we cut first few minutes from the audio then
|
45
|
+
# start time will be replaced instead of 0
|
46
|
+
start_time = start_time.to_i
|
42
47
|
|
43
|
-
start_time = start_time.to_i
|
44
48
|
filename = "#{vtt_file_path}/#{vtt_file_name}"
|
45
49
|
file = File.open(filename, 'w')
|
46
|
-
file.
|
47
|
-
|
48
|
-
i = 0
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
50
|
+
file.print "WEBVTT"
|
51
|
+
|
52
|
+
i = block_number = 0
|
53
|
+
|
54
|
+
#all the words are at position [2,5,8,11...]
|
55
|
+
word_index = 2
|
56
|
+
|
57
|
+
# one block will give total 10 words on screen at a time
|
58
|
+
# which contains total 30 index
|
59
|
+
# each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
|
60
|
+
block_size = 30
|
61
|
+
|
62
|
+
# each block contains 10 words index range o to 29
|
63
|
+
# last end time will be at index = 28
|
64
|
+
end_timestamp = 28
|
65
|
+
|
66
|
+
# we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1)
|
67
|
+
line_space_index = 17
|
68
|
+
|
69
|
+
while i < text_array.length
|
70
|
+
|
71
|
+
if i%3 == word_index #if index has word then print word
|
72
|
+
if i%block_size == line_space_index # if this is 6th word then print new line
|
73
|
+
file.puts
|
74
|
+
end
|
75
|
+
file.print "#{text_array[i]} "
|
76
|
+
elsif i%block_size == 0 #if index is 0,30,60... means starting a new block
|
77
|
+
block_number += 1
|
78
|
+
file.puts "\n\n"
|
79
|
+
file.puts block_number #print block number
|
80
|
+
file.print "#{seconds_to_timestamp(text_array[i] + start_time)} " #print start timestamps
|
81
|
+
if i + end_timestamp < text_array.length # End timestamp will be at 28th index in block of 30 indexes (10 words)
|
82
|
+
file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
|
83
|
+
else # For last block, there will not be total 30 indexes, so end timestamp will be second last index
|
84
|
+
file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
|
73
85
|
end
|
86
|
+
else
|
74
87
|
end
|
75
|
-
i +=
|
88
|
+
i += 1
|
76
89
|
end
|
77
90
|
|
78
91
|
file.close
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speech_to_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Alam
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|