RubyGems - sphinxtrain-ruby - Versions diffs - 0.0.2 → 0.0.3 - Mend

sphinxtrain-ruby 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/README.md +10 -1
data/lib/sphinxtrain.rb +8 -1
data/lib/sphinxtrain/acoustic_model.rb +45 -0
data/lib/sphinxtrain/analyser.rb +9 -0
data/lib/sphinxtrain/trainer.rb +36 -33
data/lib/sphinxtrain/version.rb +1 -1
data/sphinxtrain-ruby.gemspec +1 -1
metadata +6 -5

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 7d2ce4b3bc287d0a77c6ccf9896f383aea9877ce
-  data.tar.gz: 9231781f44272e9669436d7534da94cbb6810a0b
+  metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
+  data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
 SHA512:
-  metadata.gz: ff3208a722ed8f230052f405562cc1dc9f63c869fa9267dc6bebe1540a0500288c0f3ee216436be714ae61d2132f17450d973872242ecb76095350b87d187909
-  data.tar.gz: 63aa01337cc94d363358dff686c38547a8d903cc34e34a7d92ca473241490d8f979c82bff5e5cdd341f3cb69f1367a95b90a4c964fafb1fbf03e97686e4f2347
+  metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
+  data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f

data/README.md CHANGED

@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
 ## Usage
-Run `sphinxtrain-ruby` from the command line and follow the instructions. See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output).
+Run `sphinxtrain-ruby` from the command line and follow the instructions.  It will:
+1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
+2. Download the CMU ARCTIC example sentences (on first run)
+3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
+4. Decode the sentences using the base acoustic model, giving an overall score.
+5. Duplicate and adapt the base acoustic model using the recorded sentences.
+6. Decode the sentences using the adapted acoustic model, giving an overall score.
+See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
 ## To Do

data/lib/sphinxtrain.rb CHANGED

@@ -5,9 +5,16 @@ require "word_aligner"
 require "sphinxtrain/version"
 require "sphinxtrain/analyser"
 require "sphinxtrain/map_adapter"
+require "sphinxtrain/acoustic_model"
 require "sphinxtrain/training_decoder"
 require "sphinxtrain/trainer"
 module Sphinxtrain
-  # Your code goes here...
+  def self.base_dir
+    File.join(Dir.home, '.sphinxtrain-ruby')
+  end
+  def self.recordings_dir
+    File.join(base_dir, 'recordings')
+  end
 end

data/lib/sphinxtrain/acoustic_model.rb ADDED

@@ -0,0 +1,45 @@
+module Sphinxtrain
+  class AcousticModel < Struct.new(:url)
+    MODEL_URLS = {
+      voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
+    }
+    MODEL_DESCRIPTIONS = {
+      voxforge_grasch: "Grasch Voxforge English 0.4"
+    }
+    def self.voxforge_grasch
+      new MODEL_URLS[:voxforge_grasch]
+    end
+    def description
+      MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
+    end
+    def downloaded?
+      File.exist?(downloaded_filename)
+    end
+    def downloaded_filename
+      File.basename(url)
+    end
+    def folder
+      File.basename(downloaded_filename, '.tar.gz')
+    end
+    def adapted_folder
+      folder + "_adapted"
+    end
+    def download!
+      `wget #{url}`
+      `tar xfz #{downloaded_filename}`
+    end
+    def duplicate!
+      FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
+      FileUtils.cp_r(folder, adapted_folder)
+    end
+  end
+end

data/lib/sphinxtrain/analyser.rb CHANGED

@@ -7,11 +7,20 @@ module Sphinxtrain
     def analyse(sentences_file, recordings_dir)
       total = 0
+      first_decoding = true
       File.open(sentences_file).each_line.with_index do |transcription, index|
         transcription = transcription.downcase.gsub(/[,\.]/, '')
         file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
         decoder.decode file_path
+        # Repeat the first decoding after CMN estimations are calculated
+        # See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
+        if first_decoding
+          first_decoding = false
+          redo
+        end
         hypothesis = decoder.hypothesis
         error_rate = WordAligner.align(transcription, hypothesis)
         total += error_rate.percentage_accurate

data/lib/sphinxtrain/trainer.rb CHANGED

@@ -2,42 +2,46 @@ require 'fileutils'
 module Sphinxtrain
   class Trainer
-    BASE_DIR = File.join(Dir.home, '.sphinxtrain-ruby')
-    #VOXFORGE_URL = "http://downloads.sourceforge.net/project/cmusphinx/Acoustic%20and%20Language%20Models/English%20Voxforge/voxforge-en-0.4.tar.gz"
-    VOXFORGE_URL = "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
-    VOXFORGE_FILE = File.basename(VOXFORGE_URL)
-    VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
-    #VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
-    VOXFORGE_MODEL = VOXFORGE_FOLDER
-    RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
-    NEW_MODEL = File.join(BASE_DIR, 'new_model')
+    def acoustic_model
+      @acoustic_model ||= AcousticModel.voxforge_grasch
+    end
     def train
       Pocketsphinx.disable_logging
-      Dir.mkdir BASE_DIR rescue Errno::EEXIST
-      Dir.chdir BASE_DIR do
-        download_voxforge unless File.exist?(VOXFORGE_FILE)
+      Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
+      Dir.chdir Sphinxtrain.base_dir do
+        if acoustic_model.downloaded?
+          log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
+        else
+          log "=> Downloading #{acoustic_model.description}..."
+          acoustic_model.download!
+        end
         download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
-        record_sentences unless Dir.exist?(RECORDINGS_DIR)
-        analyse_model VOXFORGE_MODEL
+        if Dir.exist?(Sphinxtrain.recordings_dir)
+          log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
+        else
+          record_sentences
+        end
+        result = analyse_model
         duplicate_model
         adapt_model
-        analyse_model NEW_MODEL
+        adapted_result = analyse_model acoustic_model.adapted_folder
+        improvement = ((adapted_result/result)-1)*100
+        log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
+        log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
       end
     end
     private
-    def download_voxforge
-      log "=> Downloading Voxforge English 0.4 Acoustic Model..."
-      `wget #{VOXFORGE_URL}`
-      `tar xfz #{VOXFORGE_FILE}`
-    end
     def download_assets
       log "=> Downloading CMU ARCTIC Example Sentences..."
@@ -52,7 +56,7 @@ module Sphinxtrain
     def record_sentences
       log "=> Recording sentences..."
-      Dir.mkdir RECORDINGS_DIR unless Dir.exist?(RECORDINGS_DIR)
+      Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
       recognizer = Pocketsphinx::LiveSpeechRecognizer.new
       decoder = TrainingDecoder.new(recognizer.decoder)
@@ -83,28 +87,27 @@ module Sphinxtrain
     def save_audio(data, sentence_index)
       raise "Can't save empty audio data" if data.nil? || data.empty?
-      File.open(File.join(RECORDINGS_DIR, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
+      File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
         file.write data
       end
     end
-    def analyse_model(model)
+    def analyse_model(model_folder = acoustic_model.folder)
       log "=> Analysing acoustic model...\n"
-      result = Analyser.new(model).analyse(arctic_file(:txt), RECORDINGS_DIR) do |transcription, hypothesis, accuracy|
+      result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
         puts "   ACTUAL: #{transcription}"
         puts "   RECORD: #{hypothesis}"
         puts "   RESULT: #{accuracy}\n\n"
       end
       puts "   OVERALL: #{result}\n\n"
+      result
     end
     def duplicate_model
       log "=> Duplicating Voxforge acoustic model..."
-      FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
-      FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
+      acoustic_model.duplicate!
     end
     # Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
@@ -112,17 +115,17 @@ module Sphinxtrain
       log "=> Adapting Voxforge acoustic model..."
       MapAdapter.new(
-        old_model: VOXFORGE_MODEL,
-        new_model: NEW_MODEL,
-        recordings_dir: RECORDINGS_DIR,
+        old_model: acoustic_model.folder,
+        new_model: acoustic_model.adapted_folder,
+        recordings_dir: Sphinxtrain.recordings_dir,
         sentences_transcription: arctic_file(:transcription),
         sentences_files: arctic_file(:listoffiles),
         sentences_dict: arctic_file(:dic)
       ).adapt
     end
-    def log(message)
-      puts message.colorize(:green)
+    def log(message, color = :green)
+      puts message.colorize(color)
     end
   end
 end

data/lib/sphinxtrain/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Sphinxtrain
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
 end

data/sphinxtrain-ruby.gemspec CHANGED

@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
   spec.required_ruby_version = '>= 2.1.0'
-  spec.add_dependency "pocketsphinx-ruby", "~> 0.2.0"
+  spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
   spec.add_dependency "word_aligner", "~> 0.1.2"
   spec.add_dependency "colorize", "~> 0.7.3"

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sphinxtrain-ruby
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Howard Wilson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-03 00:00:00.000000000 Z
+date: 2015-11-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: pocketsphinx-ruby
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.3.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: word_aligner
   requirement: !ruby/object:Gem::Requirement
@@ -112,6 +112,7 @@ files:
 - bin/sphinxtrain-ruby
 - lib/sphinxtrain-ruby.rb
 - lib/sphinxtrain.rb
+- lib/sphinxtrain/acoustic_model.rb
 - lib/sphinxtrain/analyser.rb
 - lib/sphinxtrain/map_adapter.rb
 - lib/sphinxtrain/trainer.rb
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: Toolkit for training/adapting CMU Sphinx acoustic models.