sphinxtrain-ruby 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -1
- data/lib/sphinxtrain.rb +8 -1
- data/lib/sphinxtrain/acoustic_model.rb +45 -0
- data/lib/sphinxtrain/analyser.rb +9 -0
- data/lib/sphinxtrain/trainer.rb +36 -33
- data/lib/sphinxtrain/version.rb +1 -1
- data/sphinxtrain-ruby.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
|
4
|
+
data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
|
7
|
+
data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f
|
data/README.md
CHANGED
@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
|
|
32
32
|
|
33
33
|
## Usage
|
34
34
|
|
35
|
-
Run `sphinxtrain-ruby` from the command line and follow the instructions.
|
35
|
+
Run `sphinxtrain-ruby` from the command line and follow the instructions. It will:
|
36
|
+
|
37
|
+
1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
|
38
|
+
2. Download the CMU ARCTIC example sentences (on first run)
|
39
|
+
3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
|
40
|
+
4. Decode the sentences using the base acoustic model, giving an overall score.
|
41
|
+
5. Duplicate and adapt the base acoustic model using the recorded sentences.
|
42
|
+
6. Decode the sentences using the adapted acoustic model, giving an overall score.
|
43
|
+
|
44
|
+
See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
|
36
45
|
|
37
46
|
|
38
47
|
## To Do
|
data/lib/sphinxtrain.rb
CHANGED
@@ -5,9 +5,16 @@ require "word_aligner"
|
|
5
5
|
require "sphinxtrain/version"
|
6
6
|
require "sphinxtrain/analyser"
|
7
7
|
require "sphinxtrain/map_adapter"
|
8
|
+
require "sphinxtrain/acoustic_model"
|
8
9
|
require "sphinxtrain/training_decoder"
|
9
10
|
require "sphinxtrain/trainer"
|
10
11
|
|
11
12
|
module Sphinxtrain
|
12
|
-
|
13
|
+
def self.base_dir
|
14
|
+
File.join(Dir.home, '.sphinxtrain-ruby')
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.recordings_dir
|
18
|
+
File.join(base_dir, 'recordings')
|
19
|
+
end
|
13
20
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Sphinxtrain
|
2
|
+
class AcousticModel < Struct.new(:url)
|
3
|
+
MODEL_URLS = {
|
4
|
+
voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
|
5
|
+
}
|
6
|
+
|
7
|
+
MODEL_DESCRIPTIONS = {
|
8
|
+
voxforge_grasch: "Grasch Voxforge English 0.4"
|
9
|
+
}
|
10
|
+
|
11
|
+
def self.voxforge_grasch
|
12
|
+
new MODEL_URLS[:voxforge_grasch]
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
|
17
|
+
end
|
18
|
+
|
19
|
+
def downloaded?
|
20
|
+
File.exist?(downloaded_filename)
|
21
|
+
end
|
22
|
+
|
23
|
+
def downloaded_filename
|
24
|
+
File.basename(url)
|
25
|
+
end
|
26
|
+
|
27
|
+
def folder
|
28
|
+
File.basename(downloaded_filename, '.tar.gz')
|
29
|
+
end
|
30
|
+
|
31
|
+
def adapted_folder
|
32
|
+
folder + "_adapted"
|
33
|
+
end
|
34
|
+
|
35
|
+
def download!
|
36
|
+
`wget #{url}`
|
37
|
+
`tar xfz #{downloaded_filename}`
|
38
|
+
end
|
39
|
+
|
40
|
+
def duplicate!
|
41
|
+
FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
|
42
|
+
FileUtils.cp_r(folder, adapted_folder)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/sphinxtrain/analyser.rb
CHANGED
@@ -7,11 +7,20 @@ module Sphinxtrain
|
|
7
7
|
|
8
8
|
def analyse(sentences_file, recordings_dir)
|
9
9
|
total = 0
|
10
|
+
first_decoding = true
|
10
11
|
|
11
12
|
File.open(sentences_file).each_line.with_index do |transcription, index|
|
12
13
|
transcription = transcription.downcase.gsub(/[,\.]/, '')
|
13
14
|
file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
|
14
15
|
decoder.decode file_path
|
16
|
+
|
17
|
+
# Repeat the first decoding after CMN estimations are calculated
|
18
|
+
# See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
|
19
|
+
if first_decoding
|
20
|
+
first_decoding = false
|
21
|
+
redo
|
22
|
+
end
|
23
|
+
|
15
24
|
hypothesis = decoder.hypothesis
|
16
25
|
error_rate = WordAligner.align(transcription, hypothesis)
|
17
26
|
total += error_rate.percentage_accurate
|
data/lib/sphinxtrain/trainer.rb
CHANGED
@@ -2,42 +2,46 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module Sphinxtrain
|
4
4
|
class Trainer
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
VOXFORGE_FILE = File.basename(VOXFORGE_URL)
|
9
|
-
VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
|
10
|
-
#VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
|
11
|
-
VOXFORGE_MODEL = VOXFORGE_FOLDER
|
12
|
-
RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
|
13
|
-
NEW_MODEL = File.join(BASE_DIR, 'new_model')
|
5
|
+
def acoustic_model
|
6
|
+
@acoustic_model ||= AcousticModel.voxforge_grasch
|
7
|
+
end
|
14
8
|
|
15
9
|
def train
|
16
10
|
Pocketsphinx.disable_logging
|
17
11
|
|
18
|
-
Dir.mkdir
|
19
|
-
Dir.chdir
|
20
|
-
|
12
|
+
Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
|
13
|
+
Dir.chdir Sphinxtrain.base_dir do
|
14
|
+
if acoustic_model.downloaded?
|
15
|
+
log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
|
16
|
+
else
|
17
|
+
log "=> Downloading #{acoustic_model.description}..."
|
18
|
+
acoustic_model.download!
|
19
|
+
end
|
20
|
+
|
21
21
|
download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
|
22
|
-
record_sentences unless Dir.exist?(RECORDINGS_DIR)
|
23
22
|
|
24
|
-
|
23
|
+
if Dir.exist?(Sphinxtrain.recordings_dir)
|
24
|
+
log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
|
25
|
+
else
|
26
|
+
record_sentences
|
27
|
+
end
|
28
|
+
|
29
|
+
result = analyse_model
|
25
30
|
|
26
31
|
duplicate_model
|
27
32
|
adapt_model
|
28
33
|
|
29
|
-
analyse_model
|
34
|
+
adapted_result = analyse_model acoustic_model.adapted_folder
|
35
|
+
|
36
|
+
improvement = ((adapted_result/result)-1)*100
|
37
|
+
|
38
|
+
log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
|
39
|
+
log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
|
30
40
|
end
|
31
41
|
end
|
32
42
|
|
33
43
|
private
|
34
44
|
|
35
|
-
def download_voxforge
|
36
|
-
log "=> Downloading Voxforge English 0.4 Acoustic Model..."
|
37
|
-
`wget #{VOXFORGE_URL}`
|
38
|
-
`tar xfz #{VOXFORGE_FILE}`
|
39
|
-
end
|
40
|
-
|
41
45
|
def download_assets
|
42
46
|
log "=> Downloading CMU ARCTIC Example Sentences..."
|
43
47
|
|
@@ -52,7 +56,7 @@ module Sphinxtrain
|
|
52
56
|
|
53
57
|
def record_sentences
|
54
58
|
log "=> Recording sentences..."
|
55
|
-
Dir.mkdir
|
59
|
+
Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
|
56
60
|
|
57
61
|
recognizer = Pocketsphinx::LiveSpeechRecognizer.new
|
58
62
|
decoder = TrainingDecoder.new(recognizer.decoder)
|
@@ -83,28 +87,27 @@ module Sphinxtrain
|
|
83
87
|
def save_audio(data, sentence_index)
|
84
88
|
raise "Can't save empty audio data" if data.nil? || data.empty?
|
85
89
|
|
86
|
-
File.open(File.join(
|
90
|
+
File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
|
87
91
|
file.write data
|
88
92
|
end
|
89
93
|
end
|
90
94
|
|
91
|
-
def analyse_model(
|
95
|
+
def analyse_model(model_folder = acoustic_model.folder)
|
92
96
|
log "=> Analysing acoustic model...\n"
|
93
97
|
|
94
|
-
result = Analyser.new(
|
98
|
+
result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
|
95
99
|
puts " ACTUAL: #{transcription}"
|
96
100
|
puts " RECORD: #{hypothesis}"
|
97
101
|
puts " RESULT: #{accuracy}\n\n"
|
98
102
|
end
|
99
103
|
|
100
104
|
puts " OVERALL: #{result}\n\n"
|
105
|
+
result
|
101
106
|
end
|
102
107
|
|
103
108
|
def duplicate_model
|
104
109
|
log "=> Duplicating Voxforge acoustic model..."
|
105
|
-
|
106
|
-
FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
|
107
|
-
FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
|
110
|
+
acoustic_model.duplicate!
|
108
111
|
end
|
109
112
|
|
110
113
|
# Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
|
@@ -112,17 +115,17 @@ module Sphinxtrain
|
|
112
115
|
log "=> Adapting Voxforge acoustic model..."
|
113
116
|
|
114
117
|
MapAdapter.new(
|
115
|
-
old_model:
|
116
|
-
new_model:
|
117
|
-
recordings_dir:
|
118
|
+
old_model: acoustic_model.folder,
|
119
|
+
new_model: acoustic_model.adapted_folder,
|
120
|
+
recordings_dir: Sphinxtrain.recordings_dir,
|
118
121
|
sentences_transcription: arctic_file(:transcription),
|
119
122
|
sentences_files: arctic_file(:listoffiles),
|
120
123
|
sentences_dict: arctic_file(:dic)
|
121
124
|
).adapt
|
122
125
|
end
|
123
126
|
|
124
|
-
def log(message)
|
125
|
-
puts message.colorize(
|
127
|
+
def log(message, color = :green)
|
128
|
+
puts message.colorize(color)
|
126
129
|
end
|
127
130
|
end
|
128
131
|
end
|
data/lib/sphinxtrain/version.rb
CHANGED
data/sphinxtrain-ruby.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.required_ruby_version = '>= 2.1.0'
|
21
21
|
|
22
|
-
spec.add_dependency "pocketsphinx-ruby", "~> 0.
|
22
|
+
spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
|
23
23
|
spec.add_dependency "word_aligner", "~> 0.1.2"
|
24
24
|
spec.add_dependency "colorize", "~> 0.7.3"
|
25
25
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sphinxtrain-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Howard Wilson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pocketsphinx-ruby
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.3.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.3.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: word_aligner
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- bin/sphinxtrain-ruby
|
113
113
|
- lib/sphinxtrain-ruby.rb
|
114
114
|
- lib/sphinxtrain.rb
|
115
|
+
- lib/sphinxtrain/acoustic_model.rb
|
115
116
|
- lib/sphinxtrain/analyser.rb
|
116
117
|
- lib/sphinxtrain/map_adapter.rb
|
117
118
|
- lib/sphinxtrain/trainer.rb
|
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
140
|
version: '0'
|
140
141
|
requirements: []
|
141
142
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
143
|
+
rubygems_version: 2.4.6
|
143
144
|
signing_key:
|
144
145
|
specification_version: 4
|
145
146
|
summary: Toolkit for training/adapting CMU Sphinx acoustic models.
|