sphinxtrain-ruby 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -1
- data/lib/sphinxtrain.rb +8 -1
- data/lib/sphinxtrain/acoustic_model.rb +45 -0
- data/lib/sphinxtrain/analyser.rb +9 -0
- data/lib/sphinxtrain/trainer.rb +36 -33
- data/lib/sphinxtrain/version.rb +1 -1
- data/sphinxtrain-ruby.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c757dbd07d39404598cbf6e08425f063fc1761fd
|
4
|
+
data.tar.gz: b2ab016cbc6adcda04478240898873cc8584988d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29da259c4c7be47c1a7c8514ec7b3038fb1b00aa734a10bd3046d9585b9e7ffe9c18cf304428681bd842807e166867f43569c7009346a59a62f826262ffcb274
|
7
|
+
data.tar.gz: 07b1d383642233e8f2d9c13bce6e9011023391fb1851a7dad1466e9387a680e45238e36f0b2dba58e449a0fe12d0ed23bd064ff51f2ddc7b82e11b3ba9b9dc8f
|
data/README.md
CHANGED
@@ -32,7 +32,16 @@ $ gem install sphinxtrain-ruby
|
|
32
32
|
|
33
33
|
## Usage
|
34
34
|
|
35
|
-
Run `sphinxtrain-ruby` from the command line and follow the instructions.
|
35
|
+
Run `sphinxtrain-ruby` from the command line and follow the instructions. It will:
|
36
|
+
|
37
|
+
1. Download and extract the Grasch Voxforge English 0.4 acoustic model (on first run)
|
38
|
+
2. Download the CMU ARCTIC example sentences (on first run)
|
39
|
+
3. Record the 20 example sentences. Press enter to record, speak sentence, then wait.
|
40
|
+
4. Decode the sentences using the base acoustic model, giving an overall score.
|
41
|
+
5. Duplicate and adapt the base acoustic model using the recorded sentences.
|
42
|
+
6. Decode the sentences using the adapted acoustic model, giving an overall score.
|
43
|
+
|
44
|
+
See some example output [here](https://github.com/watsonbox/sphinxtrain-ruby/wiki/Example-Output). All data is saved in `~/.sphinxtrain-ruby`.
|
36
45
|
|
37
46
|
|
38
47
|
## To Do
|
data/lib/sphinxtrain.rb
CHANGED
@@ -5,9 +5,16 @@ require "word_aligner"
|
|
5
5
|
require "sphinxtrain/version"
|
6
6
|
require "sphinxtrain/analyser"
|
7
7
|
require "sphinxtrain/map_adapter"
|
8
|
+
require "sphinxtrain/acoustic_model"
|
8
9
|
require "sphinxtrain/training_decoder"
|
9
10
|
require "sphinxtrain/trainer"
|
10
11
|
|
11
12
|
module Sphinxtrain
|
12
|
-
|
13
|
+
def self.base_dir
|
14
|
+
File.join(Dir.home, '.sphinxtrain-ruby')
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.recordings_dir
|
18
|
+
File.join(base_dir, 'recordings')
|
19
|
+
end
|
13
20
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Sphinxtrain
|
2
|
+
class AcousticModel < Struct.new(:url)
|
3
|
+
MODEL_URLS = {
|
4
|
+
voxforge_grasch: "http://files.kde.org/accessibility/Simon/am/voxforge_en_sphinx.cd_cont_5000.tar.gz"
|
5
|
+
}
|
6
|
+
|
7
|
+
MODEL_DESCRIPTIONS = {
|
8
|
+
voxforge_grasch: "Grasch Voxforge English 0.4"
|
9
|
+
}
|
10
|
+
|
11
|
+
def self.voxforge_grasch
|
12
|
+
new MODEL_URLS[:voxforge_grasch]
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
MODEL_DESCRIPTIONS[MODEL_URLS.invert[url]] || url
|
17
|
+
end
|
18
|
+
|
19
|
+
def downloaded?
|
20
|
+
File.exist?(downloaded_filename)
|
21
|
+
end
|
22
|
+
|
23
|
+
def downloaded_filename
|
24
|
+
File.basename(url)
|
25
|
+
end
|
26
|
+
|
27
|
+
def folder
|
28
|
+
File.basename(downloaded_filename, '.tar.gz')
|
29
|
+
end
|
30
|
+
|
31
|
+
def adapted_folder
|
32
|
+
folder + "_adapted"
|
33
|
+
end
|
34
|
+
|
35
|
+
def download!
|
36
|
+
`wget #{url}`
|
37
|
+
`tar xfz #{downloaded_filename}`
|
38
|
+
end
|
39
|
+
|
40
|
+
def duplicate!
|
41
|
+
FileUtils.rm_rf(adapted_folder) if Dir.exist?(adapted_folder)
|
42
|
+
FileUtils.cp_r(folder, adapted_folder)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/sphinxtrain/analyser.rb
CHANGED
@@ -7,11 +7,20 @@ module Sphinxtrain
|
|
7
7
|
|
8
8
|
def analyse(sentences_file, recordings_dir)
|
9
9
|
total = 0
|
10
|
+
first_decoding = true
|
10
11
|
|
11
12
|
File.open(sentences_file).each_line.with_index do |transcription, index|
|
12
13
|
transcription = transcription.downcase.gsub(/[,\.]/, '')
|
13
14
|
file_path = File.join(recordings_dir, "arctic_#{(index + 1).to_s.rjust(4, "0")}.raw")
|
14
15
|
decoder.decode file_path
|
16
|
+
|
17
|
+
# Repeat the first decoding after CMN estimations are calculated
|
18
|
+
# See https://github.com/watsonbox/pocketsphinx-ruby/issues/10
|
19
|
+
if first_decoding
|
20
|
+
first_decoding = false
|
21
|
+
redo
|
22
|
+
end
|
23
|
+
|
15
24
|
hypothesis = decoder.hypothesis
|
16
25
|
error_rate = WordAligner.align(transcription, hypothesis)
|
17
26
|
total += error_rate.percentage_accurate
|
data/lib/sphinxtrain/trainer.rb
CHANGED
@@ -2,42 +2,46 @@ require 'fileutils'
|
|
2
2
|
|
3
3
|
module Sphinxtrain
|
4
4
|
class Trainer
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
VOXFORGE_FILE = File.basename(VOXFORGE_URL)
|
9
|
-
VOXFORGE_FOLDER = File.basename(VOXFORGE_FILE, '.tar.gz')
|
10
|
-
#VOXFORGE_MODEL = File.join(BASE_DIR, VOXFORGE_FOLDER, "model_parameters/voxforge_en_sphinx.cd_cont_5000")
|
11
|
-
VOXFORGE_MODEL = VOXFORGE_FOLDER
|
12
|
-
RECORDINGS_DIR = File.join(BASE_DIR, 'recordings')
|
13
|
-
NEW_MODEL = File.join(BASE_DIR, 'new_model')
|
5
|
+
def acoustic_model
|
6
|
+
@acoustic_model ||= AcousticModel.voxforge_grasch
|
7
|
+
end
|
14
8
|
|
15
9
|
def train
|
16
10
|
Pocketsphinx.disable_logging
|
17
11
|
|
18
|
-
Dir.mkdir
|
19
|
-
Dir.chdir
|
20
|
-
|
12
|
+
Dir.mkdir Sphinxtrain.base_dir rescue Errno::EEXIST
|
13
|
+
Dir.chdir Sphinxtrain.base_dir do
|
14
|
+
if acoustic_model.downloaded?
|
15
|
+
log "=> Using existing acoustic model #{acoustic_model.description}", :yellow
|
16
|
+
else
|
17
|
+
log "=> Downloading #{acoustic_model.description}..."
|
18
|
+
acoustic_model.download!
|
19
|
+
end
|
20
|
+
|
21
21
|
download_assets unless arctic_file(:txt, :listoffiles, :transcription, :dic).all? { |f| File.exist? f }
|
22
|
-
record_sentences unless Dir.exist?(RECORDINGS_DIR)
|
23
22
|
|
24
|
-
|
23
|
+
if Dir.exist?(Sphinxtrain.recordings_dir)
|
24
|
+
log "=> Using sentences recorded in #{Sphinxtrain.recordings_dir}", :yellow
|
25
|
+
else
|
26
|
+
record_sentences
|
27
|
+
end
|
28
|
+
|
29
|
+
result = analyse_model
|
25
30
|
|
26
31
|
duplicate_model
|
27
32
|
adapt_model
|
28
33
|
|
29
|
-
analyse_model
|
34
|
+
adapted_result = analyse_model acoustic_model.adapted_folder
|
35
|
+
|
36
|
+
improvement = ((adapted_result/result)-1)*100
|
37
|
+
|
38
|
+
log "=> Adapted acoustic model improved by #{improvement}%. Test this model with:"
|
39
|
+
log "=> pocketsphinx_continuous -hmm #{File.join(Sphinxtrain.base_dir, acoustic_model.adapted_folder)} -inmic yes"
|
30
40
|
end
|
31
41
|
end
|
32
42
|
|
33
43
|
private
|
34
44
|
|
35
|
-
def download_voxforge
|
36
|
-
log "=> Downloading Voxforge English 0.4 Acoustic Model..."
|
37
|
-
`wget #{VOXFORGE_URL}`
|
38
|
-
`tar xfz #{VOXFORGE_FILE}`
|
39
|
-
end
|
40
|
-
|
41
45
|
def download_assets
|
42
46
|
log "=> Downloading CMU ARCTIC Example Sentences..."
|
43
47
|
|
@@ -52,7 +56,7 @@ module Sphinxtrain
|
|
52
56
|
|
53
57
|
def record_sentences
|
54
58
|
log "=> Recording sentences..."
|
55
|
-
Dir.mkdir
|
59
|
+
Dir.mkdir Sphinxtrain.recordings_dir unless Dir.exist?(Sphinxtrain.recordings_dir)
|
56
60
|
|
57
61
|
recognizer = Pocketsphinx::LiveSpeechRecognizer.new
|
58
62
|
decoder = TrainingDecoder.new(recognizer.decoder)
|
@@ -83,28 +87,27 @@ module Sphinxtrain
|
|
83
87
|
def save_audio(data, sentence_index)
|
84
88
|
raise "Can't save empty audio data" if data.nil? || data.empty?
|
85
89
|
|
86
|
-
File.open(File.join(
|
90
|
+
File.open(File.join(Sphinxtrain.recordings_dir, "arctic_#{(sentence_index + 1).to_s.rjust(4, "0")}.raw"), "wb") do |file|
|
87
91
|
file.write data
|
88
92
|
end
|
89
93
|
end
|
90
94
|
|
91
|
-
def analyse_model(
|
95
|
+
def analyse_model(model_folder = acoustic_model.folder)
|
92
96
|
log "=> Analysing acoustic model...\n"
|
93
97
|
|
94
|
-
result = Analyser.new(
|
98
|
+
result = Analyser.new(model_folder).analyse(arctic_file(:txt), Sphinxtrain.recordings_dir) do |transcription, hypothesis, accuracy|
|
95
99
|
puts " ACTUAL: #{transcription}"
|
96
100
|
puts " RECORD: #{hypothesis}"
|
97
101
|
puts " RESULT: #{accuracy}\n\n"
|
98
102
|
end
|
99
103
|
|
100
104
|
puts " OVERALL: #{result}\n\n"
|
105
|
+
result
|
101
106
|
end
|
102
107
|
|
103
108
|
def duplicate_model
|
104
109
|
log "=> Duplicating Voxforge acoustic model..."
|
105
|
-
|
106
|
-
FileUtils.rm_rf(NEW_MODEL) if Dir.exist?(NEW_MODEL)
|
107
|
-
FileUtils.cp_r(VOXFORGE_MODEL, NEW_MODEL)
|
110
|
+
acoustic_model.duplicate!
|
108
111
|
end
|
109
112
|
|
110
113
|
# Follows process described here: http://cmusphinx.sourceforge.net/wiki/tutorialadapt
|
@@ -112,17 +115,17 @@ module Sphinxtrain
|
|
112
115
|
log "=> Adapting Voxforge acoustic model..."
|
113
116
|
|
114
117
|
MapAdapter.new(
|
115
|
-
old_model:
|
116
|
-
new_model:
|
117
|
-
recordings_dir:
|
118
|
+
old_model: acoustic_model.folder,
|
119
|
+
new_model: acoustic_model.adapted_folder,
|
120
|
+
recordings_dir: Sphinxtrain.recordings_dir,
|
118
121
|
sentences_transcription: arctic_file(:transcription),
|
119
122
|
sentences_files: arctic_file(:listoffiles),
|
120
123
|
sentences_dict: arctic_file(:dic)
|
121
124
|
).adapt
|
122
125
|
end
|
123
126
|
|
124
|
-
def log(message)
|
125
|
-
puts message.colorize(
|
127
|
+
def log(message, color = :green)
|
128
|
+
puts message.colorize(color)
|
126
129
|
end
|
127
130
|
end
|
128
131
|
end
|
data/lib/sphinxtrain/version.rb
CHANGED
data/sphinxtrain-ruby.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.required_ruby_version = '>= 2.1.0'
|
21
21
|
|
22
|
-
spec.add_dependency "pocketsphinx-ruby", "~> 0.
|
22
|
+
spec.add_dependency "pocketsphinx-ruby", "~> 0.3.0"
|
23
23
|
spec.add_dependency "word_aligner", "~> 0.1.2"
|
24
24
|
spec.add_dependency "colorize", "~> 0.7.3"
|
25
25
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sphinxtrain-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Howard Wilson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pocketsphinx-ruby
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.3.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.3.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: word_aligner
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- bin/sphinxtrain-ruby
|
113
113
|
- lib/sphinxtrain-ruby.rb
|
114
114
|
- lib/sphinxtrain.rb
|
115
|
+
- lib/sphinxtrain/acoustic_model.rb
|
115
116
|
- lib/sphinxtrain/analyser.rb
|
116
117
|
- lib/sphinxtrain/map_adapter.rb
|
117
118
|
- lib/sphinxtrain/trainer.rb
|
@@ -139,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
140
|
version: '0'
|
140
141
|
requirements: []
|
141
142
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
143
|
+
rubygems_version: 2.4.6
|
143
144
|
signing_key:
|
144
145
|
specification_version: 4
|
145
146
|
summary: Toolkit for training/adapting CMU Sphinx acoustic models.
|