sphinxtrain-ruby 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d2ce4b3bc287d0a77c6ccf9896f383aea9877ce
4
- data.tar.gz: 9231781f44272e9669436d7534da94cbb6810a0b
3
+ metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
4
+ data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
5
5
  SHA512:
6
- metadata.gz: ff3208a722ed8f230052f405562cc1dc9f63c869fa9267dc6bebe1540a0500288c0f3ee216436be714ae61d2132f17450d973872242ecb76095350b87d187909
7
- data.tar.gz: 63aa01337cc94d363358dff686c38547a8d903cc34e34a7d92ca473241490d8f979c82bff5e5cdd341f3cb69f1367a95b90a4c964fafb1fbf03e97686e4f2347
6
+ metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
7
+ data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f
data/README.md CHANGED
@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
32
32
 
33
33
  ## Usage
34
34
 
35
- Run `sphinxtrain-ruby` from the command line and follow the instructions. See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output).
35
+ Run `sphinxtrain-ruby` from the command line and follow the instructions. It will:
36
+
37
+ 1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
38
+ 2. Download the CMU ARCTIC example sentences (on first run)
39
+ 3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
40
+ 4. Decode the sentences using the base acoustic model, giving an overall score.
41
+ 5. Duplicate and adapt the base acoustic model using the recorded sentences.
42
+ 6. Decode the sentences using the adapted acoustic model, giving an overall score.
43
+
44
+ See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
36
45
 
37
46
 
38
47
  ## To Do
@@ -5,9 +5,16 @@ require "word_aligner"
5
5
  require "sphinxtrain/version"
6
6
  require "sphinxtrain/analyser"
7
7
  require "sphinxtrain/map_adapter"
8
+ require "sphinxtrain/acoustic_model"
8
9
  require "sphinxtrain/training_decoder"
9
10
  require "sphinxtrain/trainer"
10
11
 
11
12
  module Sphinxtrain
12
- # Your code goes here...
13
+ def self.base_dir
14
+ File.join(Dir.home, '.sphinxtrain-ruby')
15
+ end
16
+
17
+ def self.recordings_dir
18
+ File.join(base_dir, 'recordings')
19
+ end
13
20
  end
@@ -0,0 +1,45 @@
1
+ module Sphinxtrain
2
+ class AcousticModel < Struct.new(:url)
3
+ MODEL_URLS = {
4
+ voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
5
+ }
6
+
7
+ MODEL_DESCRIPTIONS = {
8
+ voxforge_grasch: "Grasch Voxforge English 0.4"
9
+ }
10
+
11
+ def self.voxforge_grasch
12
+ new MODEL_URLS[:voxforge_grasch]
13
+ end
14
+
15
+ def description
16
+ MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
17
+ end
18
+
19
+ def downloaded?
20
+ File.exist?(downloaded_filename)
21
+ end
22
+
23
+ def downloaded_filename
24
+ File.basename(url)
25
+ end
26
+
27
+ def folder
28
+ File.basename(downloaded_filename, '.tar.gz')
29
+ end
30
+
31
+ def adapted_folder
32
+ folder + "_adapted"
33
+ end
34
+
35
+ def download!
36
+ `wget #{url}`
37
+ `tar xfz #{downloaded_filename}`
38
+ end
39
+
40
+ def duplicate!
41
+ FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
42
+ FileUtils.cp_r(folder, adapted_folder)
43
+ end
44
+ end
45
+ end
@@ -7,11 +7,20 @@ module Sphinxtrain
7
7
 
8
8
  def analyse(sentences_file, recordings_dir)
9
9
  total = 0
10
+ first_decoding = true
10
11
 
11
12
  File.open(sentences_file).each_line.with_index do |transcription, index|
12
13
  transcription = transcription.downcase.gsub(/[,\.]/, '')
13
14
  file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
14
15
  decoder.decode file_path
16
+
17
+ # Repeat the first decoding after CMN estimations are calculated
18
+ # See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
19
+ if first_decoding
20
+ first_decoding = false
21
+ redo
22
+ end
23
+
15
24
  hypothesis = decoder.hypothesis
16
25
  error_rate = WordAligner.align(transcription, hypothesis)
17
26
  total += error_rate.percentage_accurate
@@ -2,42 +2,46 @@ require 'fileutils'
2
2
 
3
3
  module Sphinxtrain
4
4
  class Trainer
5
- BASE_DIR = File.join(Dir.home, '.sphinxtrain-ruby')
6
- #VOXFORGE_URL = "http://downloads.sourceforge.net/project/cmusphinx/Acoustic%20and%20Language%20Models/English%20Voxforge/voxforge-en-0.4.tar.gz"
7
- VOXFORGE_URL = "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
8
- VOXFORGE_FILE = File.basename(VOXFORGE_URL)
9
- VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
10
- #VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
11
- VOXFORGE_MODEL = VOXFORGE_FOLDER
12
- RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
13
- NEW_MODEL = File.join(BASE_DIR, 'new_model')
5
+ def acoustic_model
6
+ @acoustic_model ||= AcousticModel.voxforge_grasch
7
+ end
14
8
 
15
9
  def train
16
10
  Pocketsphinx.disable_logging
17
11
 
18
- Dir.mkdir BASE_DIR rescue Errno::EEXIST
19
- Dir.chdir BASE_DIR do
20
- download_voxforge unless File.exist?(VOXFORGE_FILE)
12
+ Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
13
+ Dir.chdir Sphinxtrain.base_dir do
14
+ if acoustic_model.downloaded?
15
+ log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
16
+ else
17
+ log "=> Downloading #{acoustic_model.description}..."
18
+ acoustic_model.download!
19
+ end
20
+
21
21
  download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
22
- record_sentences unless Dir.exist?(RECORDINGS_DIR)
23
22
 
24
- analyse_model VOXFORGE_MODEL
23
+ if Dir.exist?(Sphinxtrain.recordings_dir)
24
+ log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
25
+ else
26
+ record_sentences
27
+ end
28
+
29
+ result = analyse_model
25
30
 
26
31
  duplicate_model
27
32
  adapt_model
28
33
 
29
- analyse_model NEW_MODEL
34
+ adapted_result = analyse_model acoustic_model.adapted_folder
35
+
36
+ improvement = ((adapted_result/result)-1)*100
37
+
38
+ log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
39
+ log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
30
40
  end
31
41
  end
32
42
 
33
43
  private
34
44
 
35
- def download_voxforge
36
- log "=> Downloading Voxforge English 0.4 Acoustic Model..."
37
- `wget #{VOXFORGE_URL}`
38
- `tar xfz #{VOXFORGE_FILE}`
39
- end
40
-
41
45
  def download_assets
42
46
  log "=> Downloading CMU ARCTIC Example Sentences..."
43
47
 
@@ -52,7 +56,7 @@ module Sphinxtrain
52
56
 
53
57
  def record_sentences
54
58
  log "=> Recording sentences..."
55
- Dir.mkdir RECORDINGS_DIR unless Dir.exist?(RECORDINGS_DIR)
59
+ Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
56
60
 
57
61
  recognizer = Pocketsphinx::LiveSpeechRecognizer.new
58
62
  decoder = TrainingDecoder.new(recognizer.decoder)
@@ -83,28 +87,27 @@ module Sphinxtrain
83
87
  def save_audio(data, sentence_index)
84
88
  raise "Can't save empty audio data" if data.nil? || data.empty?
85
89
 
86
- File.open(File.join(RECORDINGS_DIR, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
90
+ File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
87
91
  file.write data
88
92
  end
89
93
  end
90
94
 
91
- def analyse_model(model)
95
+ def analyse_model(model_folder = acoustic_model.folder)
92
96
  log "=> Analysing acoustic model...\n"
93
97
 
94
- result = Analyser.new(model).analyse(arctic_file(:txt), RECORDINGS_DIR) do |transcription, hypothesis, accuracy|
98
+ result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
95
99
  puts " ACTUAL: #{transcription}"
96
100
  puts " RECORD: #{hypothesis}"
97
101
  puts " RESULT: #{accuracy}\n\n"
98
102
  end
99
103
 
100
104
  puts " OVERALL: #{result}\n\n"
105
+ result
101
106
  end
102
107
 
103
108
  def duplicate_model
104
109
  log "=> Duplicating Voxforge acoustic model..."
105
-
106
- FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
107
- FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
110
+ acoustic_model.duplicate!
108
111
  end
109
112
 
110
113
  # Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
@@ -112,17 +115,17 @@ module Sphinxtrain
112
115
  log "=> Adapting Voxforge acoustic model..."
113
116
 
114
117
  MapAdapter.new(
115
- old_model: VOXFORGE_MODEL,
116
- new_model: NEW_MODEL,
117
- recordings_dir: RECORDINGS_DIR,
118
+ old_model: acoustic_model.folder,
119
+ new_model: acoustic_model.adapted_folder,
120
+ recordings_dir: Sphinxtrain.recordings_dir,
118
121
  sentences_transcription: arctic_file(:transcription),
119
122
  sentences_files: arctic_file(:listoffiles),
120
123
  sentences_dict: arctic_file(:dic)
121
124
  ).adapt
122
125
  end
123
126
 
124
- def log(message)
125
- puts message.colorize(:green)
127
+ def log(message, color = :green)
128
+ puts message.colorize(color)
126
129
  end
127
130
  end
128
131
  end
@@ -1,3 +1,3 @@
1
1
  module Sphinxtrain
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
  spec.required_ruby_version = '>= 2.1.0'
21
21
 
22
- spec.add_dependency "pocketsphinx-ruby", "~> 0.2.0"
22
+ spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
23
23
  spec.add_dependency "word_aligner", "~> 0.1.2"
24
24
  spec.add_dependency "colorize", "~> 0.7.3"
25
25
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sphinxtrain-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Howard Wilson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-03 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pocketsphinx-ruby
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.2.0
19
+ version: 0.3.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.2.0
26
+ version: 0.3.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: word_aligner
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -112,6 +112,7 @@ files:
112
112
  - bin/sphinxtrain-ruby
113
113
  - lib/sphinxtrain-ruby.rb
114
114
  - lib/sphinxtrain.rb
115
+ - lib/sphinxtrain/acoustic_model.rb
115
116
  - lib/sphinxtrain/analyser.rb
116
117
  - lib/sphinxtrain/map_adapter.rb
117
118
  - lib/sphinxtrain/trainer.rb
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
140
  version: '0'
140
141
  requirements: []
141
142
  rubyforge_project:
142
- rubygems_version: 2.2.2
143
+ rubygems_version: 2.4.6
143
144
  signing_key:
144
145
  specification_version: 4
145
146
  summary: Toolkit for training/adapting CMU Sphinx acoustic models.