RubyGems - speech_to_text - Versions diffs - 0.1.5 → 0.1.6 - Mend

speech_to_text 0.1.5 → 0.1.6

Files changed (6) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/lib/speech_to_text/deepspeech.rb +24 -8
data/lib/speech_to_text/util.rb +44 -31
data/lib/speech_to_text/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
-  data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
+  metadata.gz: ac78d515a0b4deadd4b5f1046f2e42edb7dfd0b1334e58b51367c1ec6b132f18
+  data.tar.gz: e72d38855046135e7d0126f9bf82ffddbed63688593a9ae2474515099805daff
 SHA512:
-  metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
-  data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
+  metadata.gz: '081da7bde03d0152012b1682d28395d89d6fe60b0f2ba420572d34738e01e46b2c0e8eed39ee64797093227235af89bd14c9da7f4b182aa35a7c35ca31b95896'
+  data.tar.gz: ae8fe4c2743dbb23a56aef3b5415bedf9cafddbf642f5d3b8e7f021c7a3986ca38e0c40ab0465da224976284f3322186b63b0e530ebcf5ba151d7a4887467169

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    speech_to_text (0.1.5)
+    speech_to_text (0.1.6)
       google-cloud-speech (= 0.35.0)
       google-cloud-storage (= 1.18.2)
       ibm_watson (~> 0.18.2)

data/lib/speech_to_text/deepspeech.rb CHANGED Viewed

@@ -18,8 +18,8 @@ module SpeechToText
   module MozillaDeepspeechS2T # rubocop:disable Style/Documentation
     include Util
-    def self.create_job(audio, server_url, jobdetails_json)
-      request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
+    def self.create_job(audio, server_url, jobdetails_json, api_key)
+      request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob/#{api_key}\" > #{jobdetails_json}"
       Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
         while line = stdout_err.gets
@@ -39,16 +39,32 @@ module SpeechToText
       data['job_id']
     end
-    def self.checkstatus(job_id, server_url)
-      uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}")
-      response = Net::HTTP.get_response(uri)
+    def self.checkstatus(job_id, server_url, api_key)
+      uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}/#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      req_options = {
+        use_ssl: uri.scheme == "https",
+      }
+      response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
+        http.request(request)
+      end
       data = JSON.load response.body
       data['status']
     end
-    def self.order_transcript(job_id, server_url)
-      uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}")
-      response = Net::HTTP.get_response(uri)
+    def self.order_transcript(job_id, server_url, api_key)
+      uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}/#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      req_options = {
+        use_ssl: uri.scheme == "https",
+      }
+      response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
+        http.request(request)
+      end
       data = JSON.load response.body
       data
     end

data/lib/speech_to_text/util.rb CHANGED Viewed

@@ -32,47 +32,60 @@ module SpeechToText
       "#{hh}:#{mm}:#{ss}"
     end
     # rubocop:enable Metrics/MethodLength
     # create and write the webvtt file
     # rubocop:disable Metrics/MethodLength
     def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
                              vtt_file_name:,
-                             myarray:,
+                             text_array:,
                              start_time:)
+      # Array format
+      # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
+      # if we cut first few minutes from the audio then
+      # start time will be replaced instead of 0
+      start_time = start_time.to_i
-      start_time = start_time.to_i
       filename = "#{vtt_file_path}/#{vtt_file_name}"
       file = File.open(filename, 'w')
-      file.puts "WEBVTT\n\n"
-      i = 0
-      while i < myarray.length
-        file.puts i / 30 + 1
-        if i + 28 < myarray.length
-          file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[i + 28] + start_time).to_i}"
-          file.puts "#{myarray[i + 2]} #{myarray[i + 5]} #{myarray[i + 8]} #{myarray[i + 11]} #{myarray[i + 14]}"
-          file.puts "#{myarray[i + 17]} #{myarray[i + 20]} #{myarray[i + 23]} #{myarray[i + 26]} #{myarray[i + 29]}\n\n"
-        else
-          remainder = myarray.length - i
-          file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[myarray.length - 2] + start_time).to_i}"
-          count = 0
-          flag = true
-          while count < remainder
-            file.print "#{myarray[i + 2]} "
-            if flag # rubocop:disable Metrics/BlockNesting
-              # rubocop:disable Metrics/BlockNesting
-              if count > 9
-                file.print "\n"
-                flag = false
-              end
-              # rubocop:enable Metrics/BlockNesting
-            end
-            i += 3
-            count += 3
+      file.print "WEBVTT"
+      i = block_number = 0
+      #all the words are at position [2,5,8,11...]
+      word_index = 2
+      # one block will give total 10 words on screen at a time
+      # which contains total 30 index
+      # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
+      block_size = 30
+      # each block contains 10 words index range o to 29
+      # last end time will be at index = 28
+      end_timestamp = 28
+      # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1)
+      line_space_index = 17
+      while i < text_array.length
+        if i%3 == word_index  #if index has word then print word
+          if i%block_size == line_space_index # if this is 6th word then print new line
+            file.puts
+          end
+          file.print "#{text_array[i]} "
+        elsif i%block_size == 0  #if index is 0,30,60... means starting a new block
+          block_number += 1
+          file.puts "\n\n"
+          file.puts block_number  #print block number
+          file.print "#{seconds_to_timestamp(text_array[i] + start_time)} "  #print start timestamps
+          if i + end_timestamp < text_array.length  # End timestamp will be at 28th index in block of 30 indexes (10 words)
+            file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
+          else  # For last block, there will not be total 30 indexes, so end timestamp will be second last index
+            file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
           end
+        else
         end
-        i += 30
+        i += 1
       end
       file.close

data/lib/speech_to_text/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SpeechToText
-  VERSION = '0.1.5'
+  VERSION = '0.1.6'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: speech_to_text
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.6
 platform: ruby
 authors:
 - Richard Alam
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-11-21 00:00:00.000000000 Z
+date: 2019-12-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler