sphinxtrain-ruby 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d2ce4b3bc287d0a77c6ccf9896f383aea9877ce
4
- data.tar.gz: 9231781f44272e9669436d7534da94cbb6810a0b
3
+ metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
4
+ data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
5
5
  SHA512:
6
- metadata.gz: ff3208a722ed8f230052f405562cc1dc9f63c869fa9267dc6bebe1540a0500288c0f3ee216436be714ae61d2132f17450d973872242ecb76095350b87d187909
7
- data.tar.gz: 63aa01337cc94d363358dff686c38547a8d903cc34e34a7d92ca473241490d8f979c82bff5e5cdd341f3cb69f1367a95b90a4c964fafb1fbf03e97686e4f2347
6
+ metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
7
+ data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f
data/README.md CHANGED
@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
32
32
 
33
33
  ## Usage
34
34
 
35
- Run `sphinxtrain-ruby` from the command line and follow the instructions. See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output).
35
+ Run `sphinxtrain-ruby` from the command line and follow the instructions. It will:
36
+
37
+ 1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
38
+ 2. Download the CMU ARCTIC example sentences (on first run)
39
+ 3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
40
+ 4. Decode the sentences using the base acoustic model, giving an overall score.
41
+ 5. Duplicate and adapt the base acoustic model using the recorded sentences.
42
+ 6. Decode the sentences using the adapted acoustic model, giving an overall score.
43
+
44
+ See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
36
45
 
37
46
 
38
47
  ## To Do
@@ -5,9 +5,16 @@ require "word_aligner"
5
5
  require "sphinxtrain/version"
6
6
  require "sphinxtrain/analyser"
7
7
  require "sphinxtrain/map_adapter"
8
+ require "sphinxtrain/acoustic_model"
8
9
  require "sphinxtrain/training_decoder"
9
10
  require "sphinxtrain/trainer"
10
11
 
11
12
  module Sphinxtrain
12
- # Your code goes here...
13
+ def self.base_dir
14
+ File.join(Dir.home, '.sphinxtrain-ruby')
15
+ end
16
+
17
+ def self.recordings_dir
18
+ File.join(base_dir, 'recordings')
19
+ end
13
20
  end
@@ -0,0 +1,45 @@
1
+ module Sphinxtrain
2
+ class AcousticModel < Struct.new(:url)
3
+ MODEL_URLS = {
4
+ voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
5
+ }
6
+
7
+ MODEL_DESCRIPTIONS = {
8
+ voxforge_grasch: "Grasch Voxforge English 0.4"
9
+ }
10
+
11
+ def self.voxforge_grasch
12
+ new MODEL_URLS[:voxforge_grasch]
13
+ end
14
+
15
+ def description
16
+ MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
17
+ end
18
+
19
+ def downloaded?
20
+ File.exist?(downloaded_filename)
21
+ end
22
+
23
+ def downloaded_filename
24
+ File.basename(url)
25
+ end
26
+
27
+ def folder
28
+ File.basename(downloaded_filename, '.tar.gz')
29
+ end
30
+
31
+ def adapted_folder
32
+ folder + "_adapted"
33
+ end
34
+
35
+ def download!
36
+ `wget #{url}`
37
+ `tar xfz #{downloaded_filename}`
38
+ end
39
+
40
+ def duplicate!
41
+ FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
42
+ FileUtils.cp_r(folder, adapted_folder)
43
+ end
44
+ end
45
+ end
@@ -7,11 +7,20 @@ module Sphinxtrain
7
7
 
8
8
  def analyse(sentences_file, recordings_dir)
9
9
  total = 0
10
+ first_decoding = true
10
11
 
11
12
  File.open(sentences_file).each_line.with_index do |transcription, index|
12
13
  transcription = transcription.downcase.gsub(/[,\.]/, '')
13
14
  file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
14
15
  decoder.decode file_path
16
+
17
+ # Repeat the first decoding after CMN estimations are calculated
18
+ # See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
19
+ if first_decoding
20
+ first_decoding = false
21
+ redo
22
+ end
23
+
15
24
  hypothesis = decoder.hypothesis
16
25
  error_rate = WordAligner.align(transcription, hypothesis)
17
26
  total += error_rate.percentage_accurate
@@ -2,42 +2,46 @@ require 'fileutils'
2
2
 
3
3
  module Sphinxtrain
4
4
  class Trainer
5
- BASE_DIR = File.join(Dir.home, '.sphinxtrain-ruby')
6
- #VOXFORGE_URL = "http://downloads.sourceforge.net/project/cmusphinx/Acoustic%20and%20Language%20Models/English%20Voxforge/voxforge-en-0.4.tar.gz"
7
- VOXFORGE_URL = "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
8
- VOXFORGE_FILE = File.basename(VOXFORGE_URL)
9
- VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
10
- #VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
11
- VOXFORGE_MODEL = VOXFORGE_FOLDER
12
- RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
13
- NEW_MODEL = File.join(BASE_DIR, 'new_model')
5
+ def acoustic_model
6
+ @acoustic_model ||= AcousticModel.voxforge_grasch
7
+ end
14
8
 
15
9
  def train
16
10
  Pocketsphinx.disable_logging
17
11
 
18
- Dir.mkdir BASE_DIR rescue Errno::EEXIST
19
- Dir.chdir BASE_DIR do
20
- download_voxforge unless File.exist?(VOXFORGE_FILE)
12
+ Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
13
+ Dir.chdir Sphinxtrain.base_dir do
14
+ if acoustic_model.downloaded?
15
+ log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
16
+ else
17
+ log "=> Downloading #{acoustic_model.description}..."
18
+ acoustic_model.download!
19
+ end
20
+
21
21
  download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
22
- record_sentences unless Dir.exist?(RECORDINGS_DIR)
23
22
 
24
- analyse_model VOXFORGE_MODEL
23
+ if Dir.exist?(Sphinxtrain.recordings_dir)
24
+ log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
25
+ else
26
+ record_sentences
27
+ end
28
+
29
+ result = analyse_model
25
30
 
26
31
  duplicate_model
27
32
  adapt_model
28
33
 
29
- analyse_model NEW_MODEL
34
+ adapted_result = analyse_model acoustic_model.adapted_folder
35
+
36
+ improvement = ((adapted_result/result)-1)*100
37
+
38
+ log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
39
+ log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
30
40
  end
31
41
  end
32
42
 
33
43
  private
34
44
 
35
- def download_voxforge
36
- log "=> Downloading Voxforge English 0.4 Acoustic Model..."
37
- `wget #{VOXFORGE_URL}`
38
- `tar xfz #{VOXFORGE_FILE}`
39
- end
40
-
41
45
  def download_assets
42
46
  log "=> Downloading CMU ARCTIC Example Sentences..."
43
47
 
@@ -52,7 +56,7 @@ module Sphinxtrain
52
56
 
53
57
  def record_sentences
54
58
  log "=> Recording sentences..."
55
- Dir.mkdir RECORDINGS_DIR unless Dir.exist?(RECORDINGS_DIR)
59
+ Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
56
60
 
57
61
  recognizer = Pocketsphinx::LiveSpeechRecognizer.new
58
62
  decoder = TrainingDecoder.new(recognizer.decoder)
@@ -83,28 +87,27 @@ module Sphinxtrain
83
87
  def save_audio(data, sentence_index)
84
88
  raise "Can't save empty audio data" if data.nil? || data.empty?
85
89
 
86
- File.open(File.join(RECORDINGS_DIR, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
90
+ File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
87
91
  file.write data
88
92
  end
89
93
  end
90
94
 
91
- def analyse_model(model)
95
+ def analyse_model(model_folder = acoustic_model.folder)
92
96
  log "=> Analysing acoustic model...\n"
93
97
 
94
- result = Analyser.new(model).analyse(arctic_file(:txt), RECORDINGS_DIR) do |transcription, hypothesis, accuracy|
98
+ result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
95
99
  puts " ACTUAL: #{transcription}"
96
100
  puts " RECORD: #{hypothesis}"
97
101
  puts " RESULT: #{accuracy}\n\n"
98
102
  end
99
103
 
100
104
  puts " OVERALL: #{result}\n\n"
105
+ result
101
106
  end
102
107
 
103
108
  def duplicate_model
104
109
  log "=> Duplicating Voxforge acoustic model..."
105
-
106
- FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
107
- FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
110
+ acoustic_model.duplicate!
108
111
  end
109
112
 
110
113
  # Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
@@ -112,17 +115,17 @@ module Sphinxtrain
112
115
  log "=> Adapting Voxforge acoustic model..."
113
116
 
114
117
  MapAdapter.new(
115
- old_model: VOXFORGE_MODEL,
116
- new_model: NEW_MODEL,
117
- recordings_dir: RECORDINGS_DIR,
118
+ old_model: acoustic_model.folder,
119
+ new_model: acoustic_model.adapted_folder,
120
+ recordings_dir: Sphinxtrain.recordings_dir,
118
121
  sentences_transcription: arctic_file(:transcription),
119
122
  sentences_files: arctic_file(:listoffiles),
120
123
  sentences_dict: arctic_file(:dic)
121
124
  ).adapt
122
125
  end
123
126
 
124
- def log(message)
125
- puts message.colorize(:green)
127
+ def log(message, color = :green)
128
+ puts message.colorize(color)
126
129
  end
127
130
  end
128
131
  end
@@ -1,3 +1,3 @@
1
1
  module Sphinxtrain
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
  spec.required_ruby_version = '>= 2.1.0'
21
21
 
22
- spec.add_dependency "pocketsphinx-ruby", "~> 0.2.0"
22
+ spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
23
23
  spec.add_dependency "word_aligner", "~> 0.1.2"
24
24
  spec.add_dependency "colorize", "~> 0.7.3"
25
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sphinxtrain-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Howard Wilson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pocketsphinx-ruby
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.2.0
19
+ version: 0.3.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.2.0
26
+ version: 0.3.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: word_aligner
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -112,6 +112,7 @@ files:
112
112
  - bin/sphinxtrain-ruby
113
113
  - lib/sphinxtrain-ruby.rb
114
114
  - lib/sphinxtrain.rb
115
+ - lib/sphinxtrain/acoustic_model.rb
115
116
  - lib/sphinxtrain/analyser.rb
116
117
  - lib/sphinxtrain/map_adapter.rb
117
118
  - lib/sphinxtrain/trainer.rb
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
140
  version: '0'
140
141
  requirements: []
141
142
  rubyforge_project:
142
- rubygems_version: 2.2.2
143
+ rubygems_version: 2.4.6
143
144
  signing_key:
144
145
  specification_version: 4
145
146
  summary: Toolkit for training/adapting CMU Sphinx acoustic models.