RubyGems - speech_to_text - Versions diffs - 0.1.5 → 0.1.6 - Mend

speech_to_text 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/lib/speech_to_text/deepspeech.rb +24 -8
data/lib/speech_to_text/util.rb +44 -31
data/lib/speech_to_text/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 912dddca8963d0db76d95ca167a7a0935c264a39384c6baec8dc20c956b97f8a
-  data.tar.gz: bbdeaf426301438a7009e38d07c008c0253b18e1c56e61c2c063e678d7c17128
+  metadata.gz: ac78d515a0b4deadd4b5f1046f2e42edb7dfd0b1334e58b51367c1ec6b132f18
+  data.tar.gz: e72d38855046135e7d0126f9bf82ffddbed63688593a9ae2474515099805daff
 SHA512:
-  metadata.gz: 2dc6bc04f997a4f4101b3b5c45de7e354d11eacac6d3f7ad217df4eabc0afa1a30c1584c9bcc4ba63ab8008914a0a37da8515bbb64a03fb7c1390a0701e726cf
-  data.tar.gz: 8980ccbe70389e5029a145a0a4bcfefa72b2e2f815f49daf42bb300291f7cd33e8b2da6f2a69dca79a548ea7225f7578d1199198b9bc8cc1556829fc464846d5
+  metadata.gz: '081da7bde03d0152012b1682d28395d89d6fe60b0f2ba420572d34738e01e46b2c0e8eed39ee64797093227235af89bd14c9da7f4b182aa35a7c35ca31b95896'
+  data.tar.gz: ae8fe4c2743dbb23a56aef3b5415bedf9cafddbf642f5d3b8e7f021c7a3986ca38e0c40ab0465da224976284f3322186b63b0e530ebcf5ba151d7a4887467169

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    speech_to_text (0.1.5)
+    speech_to_text (0.1.6)
       google-cloud-speech (= 0.35.0)
       google-cloud-storage (= 1.18.2)
       ibm_watson (~> 0.18.2)

data/lib/speech_to_text/deepspeech.rb CHANGED Viewed

@@ -18,8 +18,8 @@ module SpeechToText
   module MozillaDeepspeechS2T # rubocop:disable Style/Documentation
     include Util
-    def self.create_job(audio, server_url, jobdetails_json)
-      request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob\" > #{jobdetails_json}"
+    def self.create_job(audio, server_url, jobdetails_json, api_key)
+      request = "curl -F \"file=@#{audio}\" \"#{server_url}/deepspeech/createjob/#{api_key}\" > #{jobdetails_json}"
       Open3.popen2e(request) do |stdin, stdout_err, wait_thr|
         while line = stdout_err.gets
@@ -39,16 +39,32 @@ module SpeechToText
       data['job_id']
     end
-    def self.checkstatus(job_id, server_url)
-      uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}")
-      response = Net::HTTP.get_response(uri)
+    def self.checkstatus(job_id, server_url, api_key)
+      uri = URI.parse("#{server_url}/deepspeech/checkstatus/#{job_id}/#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      req_options = {
+        use_ssl: uri.scheme == "https",
+      }
+      response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
+        http.request(request)
+      end
       data = JSON.load response.body
       data['status']
     end
-    def self.order_transcript(job_id, server_url)
-      uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}")
-      response = Net::HTTP.get_response(uri)
+    def self.order_transcript(job_id, server_url, api_key)
+      uri = URI.parse("#{server_url}/deepspeech/transcript/#{job_id}/#{api_key}")
+      request = Net::HTTP::Post.new(uri)
+      req_options = {
+        use_ssl: uri.scheme == "https",
+      }
+      response = Net::HTTP.start(uri.hostname, uri.port, req_options) do |http|
+        http.request(request)
+      end
       data = JSON.load response.body
       data
     end

data/lib/speech_to_text/util.rb CHANGED Viewed

@@ -32,47 +32,60 @@ module SpeechToText
       "#{hh}:#{mm}:#{ss}"
     end
     # rubocop:enable Metrics/MethodLength
     # create and write the webvtt file
     # rubocop:disable Metrics/MethodLength
     def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
                              vtt_file_name:,
-                             myarray:,
+                             text_array:,
                              start_time:)
+      # Array format
+      # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
+      # if we cut first few minutes from the audio then
+      # start time will be replaced instead of 0
+      start_time = start_time.to_i
-      start_time = start_time.to_i
       filename = "#{vtt_file_path}/#{vtt_file_name}"
       file = File.open(filename, 'w')
-      file.puts "WEBVTT\n\n"
-      i = 0
-      while i < myarray.length
-        file.puts i / 30 + 1
-        if i + 28 < myarray.length
-          file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[i + 28] + start_time).to_i}"
-          file.puts "#{myarray[i + 2]} #{myarray[i + 5]} #{myarray[i + 8]} #{myarray[i + 11]} #{myarray[i + 14]}"
-          file.puts "#{myarray[i + 17]} #{myarray[i + 20]} #{myarray[i + 23]} #{myarray[i + 26]} #{myarray[i + 29]}\n\n"
-        else
-          remainder = myarray.length - i
-          file.puts "#{seconds_to_timestamp (myarray[i] + start_time).to_i} --> #{seconds_to_timestamp (myarray[myarray.length - 2] + start_time).to_i}"
-          count = 0
-          flag = true
-          while count < remainder
-            file.print "#{myarray[i + 2]} "
-            if flag # rubocop:disable Metrics/BlockNesting
-              # rubocop:disable Metrics/BlockNesting
-              if count > 9
-                file.print "\n"
-                flag = false
-              end
-              # rubocop:enable Metrics/BlockNesting
-            end
-            i += 3
-            count += 3
+      file.print "WEBVTT"
+      i = block_number = 0
+      #all the words are at position [2,5,8,11...]
+      word_index = 2
+      # one block will give total 10 words on screen at a time
+      # which contains total 30 index
+      # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
+      block_size = 30
+      # each block contains 10 words index range o to 29
+      # last end time will be at index = 28
+      end_timestamp = 28
+      # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1)
+      line_space_index = 17
+      while i < text_array.length
+        if i%3 == word_index  #if index has word then print word
+          if i%block_size == line_space_index # if this is 6th word then print new line
+            file.puts
+          end
+          file.print "#{text_array[i]} "
+        elsif i%block_size == 0  #if index is 0,30,60... means starting a new block
+          block_number += 1
+          file.puts "\n\n"
+          file.puts block_number  #print block number
+          file.print "#{seconds_to_timestamp(text_array[i] + start_time)} "  #print start timestamps
+          if i + end_timestamp < text_array.length  # End timestamp will be at 28th index in block of 30 indexes (10 words)
+            file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
+          else  # For last block, there will not be total 30 indexes, so end timestamp will be second last index
+            file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
           end
+        else
         end
-        i += 30
+        i += 1
       end
       file.close

data/lib/speech_to_text/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SpeechToText
-  VERSION = '0.1.5'
+  VERSION = '0.1.6'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: speech_to_text
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.6
 platform: ruby
 authors:
 - Richard Alam
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-11-21 00:00:00.000000000 Z
+date: 2019-12-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler