RubyGems - sphinxtrain-ruby - Versions diffs - 0.0.2 → 0.0.3 - Mend

sphinxtrain-ruby 0.0.2 → 0.0.3

Files changed (9) hide show

checksums.yaml +4 -4
data/README.md +10 -1
data/lib/sphinxtrain.rb +8 -1
data/lib/sphinxtrain/acoustic_model.rb +45 -0
data/lib/sphinxtrain/analyser.rb +9 -0
data/lib/sphinxtrain/trainer.rb +36 -33
data/lib/sphinxtrain/version.rb +1 -1
data/sphinxtrain-ruby.gemspec +1 -1
metadata +6 -5

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 7d2ce4b3bc287d0a77c6ccf9896f383aea9877ce
-  data.tar.gz: 9231781f44272e9669436d7534da94cbb6810a0b
+  metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
+  data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
 SHA512:
-  metadata.gz: ff3208a722ed8f230052f405562cc1dc9f63c869fa9267dc6bebe1540a0500288c0f3ee216436be714ae61d2132f17450d973872242ecb76095350b87d187909
-  data.tar.gz: 63aa01337cc94d363358dff686c38547a8d903cc34e34a7d92ca473241490d8f979c82bff5e5cdd341f3cb69f1367a95b90a4c964fafb1fbf03e97686e4f2347
+  metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
+  data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f

data/README.md CHANGED

@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
 ## Usage
-Run `sphinxtrain-ruby` from the command line and follow the instructions. See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output).
+Run `sphinxtrain-ruby` from the command line and follow the instructions.  It will:
+1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
+2. Download the CMU ARCTIC example sentences (on first run)
+3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
+4. Decode the sentences using the base acoustic model, giving an overall score.
+5. Duplicate and adapt the base acoustic model using the recorded sentences.
+6. Decode the sentences using the adapted acoustic model, giving an overall score.
+See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
 ## To Do

data/lib/sphinxtrain.rb CHANGED

@@ -5,9 +5,16 @@ require "word_aligner"
 require "sphinxtrain/version"
 require "sphinxtrain/analyser"
 require "sphinxtrain/map_adapter"
+require "sphinxtrain/acoustic_model"
 require "sphinxtrain/training_decoder"
 require "sphinxtrain/trainer"
 module Sphinxtrain
-  # Your code goes here...
+  def self.base_dir
+    File.join(Dir.home, '.sphinxtrain-ruby')
+  end
+  def self.recordings_dir
+    File.join(base_dir, 'recordings')
+  end
 end

data/lib/sphinxtrain/acoustic_model.rb ADDED

@@ -0,0 +1,45 @@
+module Sphinxtrain
+  class AcousticModel < Struct.new(:url)
+    MODEL_URLS = {
+      voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
+    }
+    MODEL_DESCRIPTIONS = {
+      voxforge_grasch: "Grasch Voxforge English 0.4"
+    }
+    def self.voxforge_grasch
+      new MODEL_URLS[:voxforge_grasch]
+    end
+    def description
+      MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
+    end
+    def downloaded?
+      File.exist?(downloaded_filename)
+    end
+    def downloaded_filename
+      File.basename(url)
+    end
+    def folder
+      File.basename(downloaded_filename, '.tar.gz')
+    end
+    def adapted_folder
+      folder + "_adapted"
+    end
+    def download!
+      `wget #{url}`
+      `tar xfz #{downloaded_filename}`
+    end
+    def duplicate!
+      FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
+      FileUtils.cp_r(folder, adapted_folder)
+    end
+  end
+end

data/lib/sphinxtrain/analyser.rb CHANGED

@@ -7,11 +7,20 @@ module Sphinxtrain
     def analyse(sentences_file, recordings_dir)
       total = 0
+      first_decoding = true
       File.open(sentences_file).each_line.with_index do |transcription, index|
         transcription = transcription.downcase.gsub(/[,\.]/, '')
         file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
         decoder.decode file_path
+        # Repeat the first decoding after CMN estimations are calculated
+        # See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
+        if first_decoding
+          first_decoding = false
+          redo
+        end
         hypothesis = decoder.hypothesis
         error_rate = WordAligner.align(transcription, hypothesis)
         total += error_rate.percentage_accurate

data/lib/sphinxtrain/trainer.rb CHANGED

@@ -2,42 +2,46 @@ require 'fileutils'
 module Sphinxtrain
   class Trainer
-    BASE_DIR = File.join(Dir.home, '.sphinxtrain-ruby')
-    #VOXFORGE_URL = "http://downloads.sourceforge.net/project/cmusphinx/Acoustic%20and%20Language%20Models/English%20Voxforge/voxforge-en-0.4.tar.gz"
-    VOXFORGE_URL = "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
-    VOXFORGE_FILE = File.basename(VOXFORGE_URL)
-    VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
-    #VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
-    VOXFORGE_MODEL = VOXFORGE_FOLDER
-    RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
-    NEW_MODEL = File.join(BASE_DIR, 'new_model')
+    def acoustic_model
+      @acoustic_model ||= AcousticModel.voxforge_grasch
+    end
     def train
       Pocketsphinx.disable_logging
-      Dir.mkdir BASE_DIR rescue Errno::EEXIST
-      Dir.chdir BASE_DIR do
-        download_voxforge unless File.exist?(VOXFORGE_FILE)
+      Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
+      Dir.chdir Sphinxtrain.base_dir do
+        if acoustic_model.downloaded?
+          log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
+        else
+          log "=> Downloading #{acoustic_model.description}..."
+          acoustic_model.download!
+        end
         download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
-        record_sentences unless Dir.exist?(RECORDINGS_DIR)
-        analyse_model VOXFORGE_MODEL
+        if Dir.exist?(Sphinxtrain.recordings_dir)
+          log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
+        else
+          record_sentences
+        end
+        result = analyse_model
         duplicate_model
         adapt_model
-        analyse_model NEW_MODEL
+        adapted_result = analyse_model acoustic_model.adapted_folder
+        improvement = ((adapted_result/result)-1)*100
+        log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
+        log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
       end
     end
     private
-    def download_voxforge
-      log "=> Downloading Voxforge English 0.4 Acoustic Model..."
-      `wget #{VOXFORGE_URL}`
-      `tar xfz #{VOXFORGE_FILE}`
-    end
     def download_assets
       log "=> Downloading CMU ARCTIC Example Sentences..."
@@ -52,7 +56,7 @@ module Sphinxtrain
     def record_sentences
       log "=> Recording sentences..."
-      Dir.mkdir RECORDINGS_DIR unless Dir.exist?(RECORDINGS_DIR)
+      Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
       recognizer = Pocketsphinx::LiveSpeechRecognizer.new
       decoder = TrainingDecoder.new(recognizer.decoder)
@@ -83,28 +87,27 @@ module Sphinxtrain
     def save_audio(data, sentence_index)
       raise "Can't save empty audio data" if data.nil? || data.empty?
-      File.open(File.join(RECORDINGS_DIR, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
+      File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
         file.write data
       end
     end
-    def analyse_model(model)
+    def analyse_model(model_folder = acoustic_model.folder)
       log "=> Analysing acoustic model...\n"
-      result = Analyser.new(model).analyse(arctic_file(:txt), RECORDINGS_DIR) do |transcription, hypothesis, accuracy|
+      result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
         puts "   ACTUAL: #{transcription}"
         puts "   RECORD: #{hypothesis}"
         puts "   RESULT: #{accuracy}\n\n"
       end
       puts "   OVERALL: #{result}\n\n"
+      result
     end
     def duplicate_model
       log "=> Duplicating Voxforge acoustic model..."
-      FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
-      FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
+      acoustic_model.duplicate!
     end
     # Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
@@ -112,17 +115,17 @@ module Sphinxtrain
       log "=> Adapting Voxforge acoustic model..."
       MapAdapter.new(
-        old_model: VOXFORGE_MODEL,
-        new_model: NEW_MODEL,
-        recordings_dir: RECORDINGS_DIR,
+        old_model: acoustic_model.folder,
+        new_model: acoustic_model.adapted_folder,
+        recordings_dir: Sphinxtrain.recordings_dir,
         sentences_transcription: arctic_file(:transcription),
         sentences_files: arctic_file(:listoffiles),
         sentences_dict: arctic_file(:dic)
       ).adapt
     end
-    def log(message)
-      puts message.colorize(:green)
+    def log(message, color = :green)
+      puts message.colorize(color)
     end
   end
 end

data/lib/sphinxtrain/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Sphinxtrain
-  VERSION = "0.0.2"
+  VERSION = "0.0.3"
 end

data/sphinxtrain-ruby.gemspec CHANGED

@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
   spec.required_ruby_version = '>= 2.1.0'
-  spec.add_dependency "pocketsphinx-ruby", "~> 0.2.0"
+  spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
   spec.add_dependency "word_aligner", "~> 0.1.2"
   spec.add_dependency "colorize", "~> 0.7.3"

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sphinxtrain-ruby
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Howard Wilson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-03 00:00:00.000000000 Z
+date: 2015-11-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: pocketsphinx-ruby
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.3.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: word_aligner
   requirement: !ruby/object:Gem::Requirement
@@ -112,6 +112,7 @@ files:
 - bin/sphinxtrain-ruby
 - lib/sphinxtrain-ruby.rb
 - lib/sphinxtrain.rb
+- lib/sphinxtrain/acoustic_model.rb
 - lib/sphinxtrain/analyser.rb
 - lib/sphinxtrain/map_adapter.rb
 - lib/sphinxtrain/trainer.rb
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: Toolkit for training/adapting CMU Sphinx acoustic models.