word_aligner 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,44 @@
1
+ require 'ostruct'
2
+
3
+ module WordAligner
4
+ class WordErrorRate < OpenStruct
5
+
6
+ attr_reader :data
7
+
8
+ def initialize(data)
9
+ @data = data
10
+ super(data)
11
+ end
12
+
13
+ def words
14
+ transcription_words
15
+ end
16
+
17
+ def correct_words
18
+ matching
19
+ end
20
+
21
+ def incorrect_words
22
+ align_cost
23
+ end
24
+
25
+ def percentage_accurate
26
+ 100-percentage_incorrect
27
+ end
28
+
29
+ def percentage_correct
30
+ percent_rate(correct_words)
31
+ end
32
+
33
+ def percentage_incorrect
34
+ percent_rate(incorrect_words)
35
+ end
36
+
37
+ private
38
+
39
+ def percent_rate(value)
40
+ value * 100.0 / [ words, 1].max
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ class Sample < OpenStruct
4
+ def aligner_result
5
+ @aligner_result ||= WordAligner::Aligner.new(transcription, hypothesis)
6
+ .word_error_rate.data
7
+ end
8
+ end
9
+
10
+ module WordAligner
11
+ describe Aligner do
12
+
13
+ samples = YAML.load File.read('spec/sample_data/regression/sentences.yml')
14
+
15
+ samples.each_with_index do |sample, idx|
16
+
17
+ describe "sample #{idx}" do
18
+ subject { Sample.new(sample) }
19
+
20
+ its(:hypothesis) { should_not be_nil }
21
+ its(:hypothesis) { should_not be_empty }
22
+
23
+ its(:transcription) { should_not be_nil }
24
+ its(:transcription) { should_not be_empty }
25
+
26
+ its(:aligner_result) { should eq sample }
27
+ end
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ module WordAligner
4
+ describe WordErrorRate do
5
+
6
+ let(:data) do
7
+ {
8
+ insertions: 8,
9
+ substitutions: 2,
10
+ deletions: 0,
11
+ align_cost: 2,
12
+ transcription_words: 8,
13
+ matching: 6
14
+ }
15
+ end
16
+
17
+ subject { WordErrorRate.new(data) }
18
+
19
+ it { should be_a(WordErrorRate) }
20
+
21
+ its(:words) { should eq(8) }
22
+ its(:correct_words) { should eq(6) }
23
+ its(:incorrect_words) { should eq(2) }
24
+ its(:percentage_correct) { should eq(75.0) }
25
+ its(:percentage_incorrect) { should eq(25.0) }
26
+ its(:percentage_accurate) { should eq(75.0) }
27
+ end
28
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe WordAligner do
4
+
5
+ describe '.align' do
6
+
7
+ it 'returns a WordErrorRate' do
8
+ expect(
9
+ WordAligner.align('hello world', 'hello wurld')
10
+ ).to be_a(WordAligner::WordErrorRate)
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,51 @@
1
+ # USAGE: ruby grab_for_comparision.rb regression/sentences.txt.wa \
2
+ # regression/sentences.hypotheses.txt.wa > regression/sentences.yml
3
+ require 'yaml'
4
+
5
+ transcription_file, hypothesis_file = ARGV
6
+ command = "perl word_align.pl #{transcription_file} #{hypothesis_file}"
7
+ output = `#{command}`.split("\n").map(&:strip)
8
+
9
+ transcription_lines = File.readlines(transcription_file)
10
+ hypothesis_lines = File.readlines(hypothesis_file)
11
+
12
+ def strip_id(str)
13
+ str.sub(/\s*\(.+?\)\s*$/, '')
14
+ end
15
+
16
+ records = []
17
+
18
+ loop do
19
+ transcription = output.shift
20
+ hypothesis = output.shift
21
+ statistics = output.shift
22
+ distance = output.shift
23
+
24
+ break unless statistics.match(/Words:/)
25
+
26
+ words, correct, errors,
27
+ percentage_correct, error, accuracy = statistics.scan(/\d+(?:[.]\d+)?/)
28
+ insertions, deletions, substitutions = distance.scan(/\d+/)
29
+
30
+ transcription_line = strip_id(transcription_lines.shift.strip)
31
+ hypothesis_line = strip_id(hypothesis_lines.shift.strip)
32
+
33
+ next if transcription_line.empty?
34
+
35
+ details = {
36
+ transcription: transcription_line,
37
+ hypothesis: hypothesis_line,
38
+ insertions: insertions.to_i,
39
+ deletions: deletions.to_i,
40
+ substitutions: substitutions.to_i,
41
+ matching: correct.to_i,
42
+ align_cost: errors.to_i,
43
+ transcription_words: words.to_i,
44
+ aligned_transcription: strip_id(transcription),
45
+ aligned_hypothesis: strip_id(hypothesis)
46
+ }
47
+
48
+ records << details
49
+ end
50
+
51
+ puts records.to_yaml
@@ -0,0 +1,647 @@
1
+ ---
2
+ - :transcription: I think this might just work for fine
3
+ :hypothesis: I'd think this might just work for laine
4
+ :insertions: 0
5
+ :deletions: 0
6
+ :substitutions: 2
7
+ :matching: 6
8
+ :align_cost: 2
9
+ :transcription_words: 8
10
+ :aligned_transcription: I think this might just work for FINE
11
+ :aligned_hypothesis: I'D think this might just work for LAINE
12
+ - :transcription: did you register for a new account
13
+ :hypothesis: hit you register for a new account
14
+ :insertions: 0
15
+ :deletions: 0
16
+ :substitutions: 1
17
+ :matching: 6
18
+ :align_cost: 1
19
+ :transcription_words: 7
20
+ :aligned_transcription: DID you register for a new account
21
+ :aligned_hypothesis: HIT you register for a new account
22
+ - :transcription: I almost forgot to tell you about our next meeting
23
+ :hypothesis: I almost forgot to tell you about our next meeting
24
+ :insertions: 0
25
+ :deletions: 0
26
+ :substitutions: 0
27
+ :matching: 10
28
+ :align_cost: 0
29
+ :transcription_words: 10
30
+ :aligned_transcription: i almost forgot to tell you about our next meeting
31
+ :aligned_hypothesis: i almost forgot to tell you about our next meeting
32
+ - :transcription: I am not using bash anymore
33
+ :hypothesis: if I am not using bash anymore
34
+ :insertions: 1
35
+ :deletions: 0
36
+ :substitutions: 0
37
+ :matching: 6
38
+ :align_cost: 1
39
+ :transcription_words: 6
40
+ :aligned_transcription: '*** i am not using bash anymore'
41
+ :aligned_hypothesis: IF i am not using bash anymore
42
+ - :transcription: I think I might switch to a windows computer
43
+ :hypothesis: I think it might switch to a windows computer
44
+ :insertions: 0
45
+ :deletions: 0
46
+ :substitutions: 1
47
+ :matching: 8
48
+ :align_cost: 1
49
+ :transcription_words: 9
50
+ :aligned_transcription: i think I might switch to a windows computer
51
+ :aligned_hypothesis: i think IT might switch to a windows computer
52
+ - :transcription: it returns the current hypothesis
53
+ :hypothesis: it returns the current hypothesis
54
+ :insertions: 0
55
+ :deletions: 0
56
+ :substitutions: 0
57
+ :matching: 5
58
+ :align_cost: 0
59
+ :transcription_words: 5
60
+ :aligned_transcription: it returns the current hypothesis
61
+ :aligned_hypothesis: it returns the current hypothesis
62
+ - :transcription: you cannot code HTML by voice
63
+ :hypothesis: you cannot code HTML by avoids
64
+ :insertions: 0
65
+ :deletions: 0
66
+ :substitutions: 1
67
+ :matching: 5
68
+ :align_cost: 1
69
+ :transcription_words: 6
70
+ :aligned_transcription: you cannot code html by VOICE
71
+ :aligned_hypothesis: you cannot code html by AVOIDS
72
+ - :transcription: why exactly would you do that
73
+ :hypothesis: why exactly would you go that
74
+ :insertions: 0
75
+ :deletions: 0
76
+ :substitutions: 1
77
+ :matching: 5
78
+ :align_cost: 1
79
+ :transcription_words: 6
80
+ :aligned_transcription: why exactly would you DO that
81
+ :aligned_hypothesis: why exactly would you GO that
82
+ - :transcription: I didn't think about it like that
83
+ :hypothesis: I didn't think about it like that
84
+ :insertions: 0
85
+ :deletions: 0
86
+ :substitutions: 0
87
+ :matching: 7
88
+ :align_cost: 0
89
+ :transcription_words: 7
90
+ :aligned_transcription: i didn't think about it like that
91
+ :aligned_hypothesis: i didn't think about it like that
92
+ - :transcription: haven't you figured out the meaning of the algorithm yet
93
+ :hypothesis: haven't you fake it out the meaning of the I'd go with him yet
94
+ :insertions: 4
95
+ :deletions: 0
96
+ :substitutions: 2
97
+ :matching: 8
98
+ :align_cost: 6
99
+ :transcription_words: 10
100
+ :aligned_transcription: haven't you *** FIGURED out the meaning of the *** ***
101
+ *** ALGORITHM yet
102
+ :aligned_hypothesis: haven't you FAKE IT out the meaning of the I'D GO WITH
103
+ HIM yet
104
+ - :transcription: the algorithm is quite good
105
+ :hypothesis: the algorithm is quite good
106
+ :insertions: 0
107
+ :deletions: 0
108
+ :substitutions: 0
109
+ :matching: 5
110
+ :align_cost: 0
111
+ :transcription_words: 5
112
+ :aligned_transcription: the algorithm is quite good
113
+ :aligned_hypothesis: the algorithm is quite good
114
+ - :transcription: it responds in a custom way
115
+ :hypothesis: it's response in a custom way
116
+ :insertions: 0
117
+ :deletions: 0
118
+ :substitutions: 2
119
+ :matching: 4
120
+ :align_cost: 2
121
+ :transcription_words: 6
122
+ :aligned_transcription: IT RESPONDS in a custom way
123
+ :aligned_hypothesis: IT'S RESPONSE in a custom way
124
+ - :transcription: it won't work for HTML
125
+ :hypothesis: it won't work for HTML
126
+ :insertions: 0
127
+ :deletions: 0
128
+ :substitutions: 0
129
+ :matching: 5
130
+ :align_cost: 0
131
+ :transcription_words: 5
132
+ :aligned_transcription: it won't work for html
133
+ :aligned_hypothesis: it won't work for html
134
+ - :transcription: but it will work good for ruby on rails
135
+ :hypothesis: but it will work good for ruby on rails
136
+ :insertions: 0
137
+ :deletions: 0
138
+ :substitutions: 0
139
+ :matching: 9
140
+ :align_cost: 0
141
+ :transcription_words: 9
142
+ :aligned_transcription: but it will work good for ruby on rails
143
+ :aligned_hypothesis: but it will work good for ruby on rails
144
+ - :transcription: the user would decide when to update
145
+ :hypothesis: but the user would decide when to app to
146
+ :insertions: 2
147
+ :deletions: 0
148
+ :substitutions: 1
149
+ :matching: 6
150
+ :align_cost: 3
151
+ :transcription_words: 7
152
+ :aligned_transcription: '*** the user would decide when to *** UPDATE'
153
+ :aligned_hypothesis: BUT the user would decide when to APP TO
154
+ - :transcription: when you define a new class it is not there
155
+ :hypothesis: when you defining you close it is not there
156
+ :insertions: 0
157
+ :deletions: 1
158
+ :substitutions: 3
159
+ :matching: 6
160
+ :align_cost: 4
161
+ :transcription_words: 10
162
+ :aligned_transcription: when you DEFINE A NEW CLASS it is not there
163
+ :aligned_hypothesis: when you *** DEFINING YOU CLOSE it is not there
164
+ - :transcription: so the voice recognition system would learn new classes while you
165
+ are defining them
166
+ :hypothesis: so the voice recognition system would learn new classes where you are
167
+ defining them for
168
+ :insertions: 1
169
+ :deletions: 0
170
+ :substitutions: 1
171
+ :matching: 13
172
+ :align_cost: 2
173
+ :transcription_words: 14
174
+ :aligned_transcription: so the voice recognition system would learn new classes
175
+ WHILE you are defining them ***
176
+ :aligned_hypothesis: so the voice recognition system would learn new classes WHERE
177
+ you are defining them FOR
178
+ - :transcription: and it would build a custom language model based on ctags
179
+ :hypothesis: and it would build a custom language model based on see Tex
180
+ :insertions: 1
181
+ :deletions: 0
182
+ :substitutions: 1
183
+ :matching: 10
184
+ :align_cost: 2
185
+ :transcription_words: 11
186
+ :aligned_transcription: and it would build a custom language model based on ***
187
+ CTAGS
188
+ :aligned_hypothesis: and it would build a custom language model based on SEE
189
+ TEX
190
+ - :transcription: so it understands all your classes and methods like words
191
+ :hypothesis: so it understands Oreo classes and methods like words
192
+ :insertions: 0
193
+ :deletions: 1
194
+ :substitutions: 1
195
+ :matching: 8
196
+ :align_cost: 2
197
+ :transcription_words: 10
198
+ :aligned_transcription: so it understands ALL YOUR classes and methods like words
199
+ :aligned_hypothesis: so it understands *** OREO classes and methods like words
200
+ - :transcription: I think this might be a good idea
201
+ :hypothesis: I'd think this might be a good idea
202
+ :insertions: 0
203
+ :deletions: 0
204
+ :substitutions: 1
205
+ :matching: 7
206
+ :align_cost: 1
207
+ :transcription_words: 8
208
+ :aligned_transcription: I think this might be a good idea
209
+ :aligned_hypothesis: I'D think this might be a good idea
210
+ - :transcription: I think this should be much faster
211
+ :hypothesis: I think the should be much faster
212
+ :insertions: 0
213
+ :deletions: 0
214
+ :substitutions: 1
215
+ :matching: 6
216
+ :align_cost: 1
217
+ :transcription_words: 7
218
+ :aligned_transcription: i think THIS should be much faster
219
+ :aligned_hypothesis: i think THE should be much faster
220
+ - :transcription: did you see my latest commit
221
+ :hypothesis: and opted you ca latest committed up
222
+ :insertions: 1
223
+ :deletions: 0
224
+ :substitutions: 5
225
+ :matching: 1
226
+ :align_cost: 6
227
+ :transcription_words: 6
228
+ :aligned_transcription: '*** DID you SEE MY LATEST COMMIT'
229
+ :aligned_hypothesis: AND OPTED you CA LATEST COMMITTED UP
230
+ - :transcription: click on first result
231
+ :hypothesis: click on first result
232
+ :insertions: 0
233
+ :deletions: 0
234
+ :substitutions: 0
235
+ :matching: 4
236
+ :align_cost: 0
237
+ :transcription_words: 4
238
+ :aligned_transcription: click on first result
239
+ :aligned_hypothesis: click on first result
240
+ - :transcription: click on second result
241
+ :hypothesis: click on second free soft
242
+ :insertions: 1
243
+ :deletions: 0
244
+ :substitutions: 1
245
+ :matching: 3
246
+ :align_cost: 2
247
+ :transcription_words: 4
248
+ :aligned_transcription: click on second *** RESULT
249
+ :aligned_hypothesis: click on second FREE SOFT
250
+ - :transcription: go to Google
251
+ :hypothesis: go to Google on
252
+ :insertions: 1
253
+ :deletions: 0
254
+ :substitutions: 0
255
+ :matching: 3
256
+ :align_cost: 1
257
+ :transcription_words: 3
258
+ :aligned_transcription: go to google ***
259
+ :aligned_hypothesis: go to google ON
260
+ - :transcription: validates presence of name
261
+ :hypothesis: valid dates presence of name
262
+ :insertions: 1
263
+ :deletions: 0
264
+ :substitutions: 1
265
+ :matching: 3
266
+ :align_cost: 2
267
+ :transcription_words: 4
268
+ :aligned_transcription: '*** VALIDATES presence of name'
269
+ :aligned_hypothesis: VALID DATES presence of name
270
+ - :transcription: validates uniqueness of name
271
+ :hypothesis: wedded its uniqueness often name
272
+ :insertions: 1
273
+ :deletions: 0
274
+ :substitutions: 2
275
+ :matching: 2
276
+ :align_cost: 3
277
+ :transcription_words: 4
278
+ :aligned_transcription: '*** VALIDATES uniqueness OF name'
279
+ :aligned_hypothesis: WEDDED ITS uniqueness OFTEN name
280
+ - :transcription: belongs to language
281
+ :hypothesis: belongs to language
282
+ :insertions: 0
283
+ :deletions: 0
284
+ :substitutions: 0
285
+ :matching: 3
286
+ :align_cost: 0
287
+ :transcription_words: 3
288
+ :aligned_transcription: belongs to language
289
+ :aligned_hypothesis: belongs to language
290
+ - :transcription: belongs to user
291
+ :hypothesis: belongs to user
292
+ :insertions: 0
293
+ :deletions: 0
294
+ :substitutions: 0
295
+ :matching: 3
296
+ :align_cost: 0
297
+ :transcription_words: 3
298
+ :aligned_transcription: belongs to user
299
+ :aligned_hypothesis: belongs to user
300
+ - :transcription: it should have three actions
301
+ :hypothesis: it should have three actions
302
+ :insertions: 0
303
+ :deletions: 0
304
+ :substitutions: 0
305
+ :matching: 5
306
+ :align_cost: 0
307
+ :transcription_words: 5
308
+ :aligned_transcription: it should have three actions
309
+ :aligned_hypothesis: it should have three actions
310
+ - :transcription: I didn't think this would work
311
+ :hypothesis: I didn't think this would work
312
+ :insertions: 0
313
+ :deletions: 0
314
+ :substitutions: 0
315
+ :matching: 6
316
+ :align_cost: 0
317
+ :transcription_words: 6
318
+ :aligned_transcription: i didn't think this would work
319
+ :aligned_hypothesis: i didn't think this would work
320
+ - :transcription: I am now testing another recording
321
+ :hypothesis: I am not testing another recording
322
+ :insertions: 0
323
+ :deletions: 0
324
+ :substitutions: 1
325
+ :matching: 5
326
+ :align_cost: 1
327
+ :transcription_words: 6
328
+ :aligned_transcription: i am NOW testing another recording
329
+ :aligned_hypothesis: i am NOT testing another recording
330
+ - :transcription: hello and welcome
331
+ :hypothesis: hello and welcome
332
+ :insertions: 0
333
+ :deletions: 0
334
+ :substitutions: 0
335
+ :matching: 3
336
+ :align_cost: 0
337
+ :transcription_words: 3
338
+ :aligned_transcription: hello and welcome
339
+ :aligned_hypothesis: hello and welcome
340
+ - :transcription: by the way everything that you have just read was recognized by
341
+ my software
342
+ :hypothesis: by the way everything that you have just read was recognized by my
343
+ software
344
+ :insertions: 0
345
+ :deletions: 0
346
+ :substitutions: 0
347
+ :matching: 14
348
+ :align_cost: 0
349
+ :transcription_words: 14
350
+ :aligned_transcription: by the way everything that you have just read was recognized
351
+ by my software
352
+ :aligned_hypothesis: by the way everything that you have just read was recognized
353
+ by my software
354
+ - :transcription: with only minor errors in the recognition
355
+ :hypothesis: with only minor errors in the recognition
356
+ :insertions: 0
357
+ :deletions: 0
358
+ :substitutions: 0
359
+ :matching: 7
360
+ :align_cost: 0
361
+ :transcription_words: 7
362
+ :aligned_transcription: with only minor errors in the recognition
363
+ :aligned_hypothesis: with only minor errors in the recognition
364
+ - :transcription: please fetch the files from the server
365
+ :hypothesis: please search the files from the server
366
+ :insertions: 0
367
+ :deletions: 0
368
+ :substitutions: 1
369
+ :matching: 6
370
+ :align_cost: 1
371
+ :transcription_words: 7
372
+ :aligned_transcription: please FETCH the files from the server
373
+ :aligned_hypothesis: please SEARCH the files from the server
374
+ - :transcription: the real challenge is coming up with a good speech representation
375
+ of ruby
376
+ :hypothesis: the real challenge is coming up with Blake good speech representation
377
+ of rube
378
+ :insertions: 0
379
+ :deletions: 0
380
+ :substitutions: 2
381
+ :matching: 11
382
+ :align_cost: 2
383
+ :transcription_words: 13
384
+ :aligned_transcription: the real challenge is coming up with A good speech
385
+ representation of RUBY
386
+ :aligned_hypothesis: the real challenge is coming up with BLAKE good speech representation
387
+ of RUBE
388
+ - :transcription: that follows the principle of least surprise
389
+ :hypothesis: that follows the principle of the surprise
390
+ :insertions: 0
391
+ :deletions: 0
392
+ :substitutions: 1
393
+ :matching: 6
394
+ :align_cost: 1
395
+ :transcription_words: 7
396
+ :aligned_transcription: that follows the principle of LEAST surprise
397
+ :aligned_hypothesis: that follows the principle of THE surprise
398
+ - :transcription: and deals appropriately with ambiguous cases
399
+ :hypothesis: and this appropriately it was ambiguous cases
400
+ :insertions: 1
401
+ :deletions: 0
402
+ :substitutions: 2
403
+ :matching: 4
404
+ :align_cost: 3
405
+ :transcription_words: 6
406
+ :aligned_transcription: and DEALS appropriately *** WITH ambiguous cases
407
+ :aligned_hypothesis: and THIS appropriately IT WAS ambiguous cases
408
+ - :transcription: the good thing is that you can get rid of a lot of manual work
409
+ :hypothesis: the good thing is that you can get rid of a lot of manual work
410
+ :insertions: 0
411
+ :deletions: 0
412
+ :substitutions: 0
413
+ :matching: 15
414
+ :align_cost: 0
415
+ :transcription_words: 15
416
+ :aligned_transcription: the good thing is that you can get rid of a lot of manual
417
+ work
418
+ :aligned_hypothesis: the good thing is that you can get rid of a lot of manual
419
+ work
420
+ - :transcription: for example attribute accessors are nearly always placed at the
421
+ top of the file
422
+ :hypothesis: for example attribute excesses are nearly always placed at the top
423
+ of the file
424
+ :insertions: 0
425
+ :deletions: 0
426
+ :substitutions: 1
427
+ :matching: 13
428
+ :align_cost: 1
429
+ :transcription_words: 14
430
+ :aligned_transcription: for example attribute ACCESSORS are nearly always placed
431
+ at the top of the file
432
+ :aligned_hypothesis: for example attribute EXCESSES are nearly always placed at the
433
+ top of the file
434
+ - :transcription: so when you say something like
435
+ :hypothesis: so when you say something like
436
+ :insertions: 0
437
+ :deletions: 0
438
+ :substitutions: 0
439
+ :matching: 6
440
+ :align_cost: 0
441
+ :transcription_words: 6
442
+ :aligned_transcription: so when you say something like
443
+ :aligned_hypothesis: so when you say something like
444
+ - :transcription: attribute accessor file name
445
+ :hypothesis: attribute access server fine name
446
+ :insertions: 1
447
+ :deletions: 0
448
+ :substitutions: 2
449
+ :matching: 2
450
+ :align_cost: 3
451
+ :transcription_words: 4
452
+ :aligned_transcription: attribute *** ACCESSOR FILE name
453
+ :aligned_hypothesis: attribute ACCESS SERVER FINE name
454
+ - :transcription: it will automatically put the following line at the top of the file
455
+ in the right place
456
+ :hypothesis: it will automatically put the following line at the top of the file
457
+ in the right place
458
+ :insertions: 0
459
+ :deletions: 0
460
+ :substitutions: 0
461
+ :matching: 17
462
+ :align_cost: 0
463
+ :transcription_words: 17
464
+ :aligned_transcription: it will automatically put the following line at the top
465
+ of the file in the right place
466
+ :aligned_hypothesis: it will automatically put the following line at the top of the
467
+ file in the right place
468
+ - :transcription: so the whole approach works only with one unified style
469
+ :hypothesis: so the whole approach works only with one unified style
470
+ :insertions: 0
471
+ :deletions: 0
472
+ :substitutions: 0
473
+ :matching: 10
474
+ :align_cost: 0
475
+ :transcription_words: 10
476
+ :aligned_transcription: so the whole approach works only with one unified style
477
+ :aligned_hypothesis: so the whole approach works only with one unified style
478
+ - :transcription: which is the ruby best practices style which is published on github
479
+ :hypothesis: which is did ruby best practices style which is published on git help
480
+ :insertions: 1
481
+ :deletions: 0
482
+ :substitutions: 2
483
+ :matching: 10
484
+ :align_cost: 3
485
+ :transcription_words: 12
486
+ :aligned_transcription: which is THE ruby best practices style which is published
487
+ on *** GITHUB
488
+ :aligned_hypothesis: which is DID ruby best practices style which is published
489
+ on GIT HELP
490
+ - :transcription: I wonder if I should create a custom language model just for programming
491
+ :hypothesis: I wonder if I should create a custom language will just for programming
492
+ :insertions: 0
493
+ :deletions: 0
494
+ :substitutions: 1
495
+ :matching: 12
496
+ :align_cost: 1
497
+ :transcription_words: 13
498
+ :aligned_transcription: i wonder if i should create a custom language MODEL
499
+ just for programming
500
+ :aligned_hypothesis: i wonder if i should create a custom language WILL just
501
+ for programming
502
+ - :transcription: or if I should use the normal dictation model and just train it
503
+ for programming
504
+ :hypothesis: or if I should use that normal dictation on model and just train it
505
+ for programming
506
+ :insertions: 1
507
+ :deletions: 0
508
+ :substitutions: 1
509
+ :matching: 14
510
+ :align_cost: 2
511
+ :transcription_words: 15
512
+ :aligned_transcription: or if i should use THE normal dictation *** model and
513
+ just train it for programming
514
+ :aligned_hypothesis: or if i should use THAT normal dictation ON model and
515
+ just train it for programming
516
+ - :transcription: the advantage is that it would also recognize normal sentences such
517
+ as commit messages
518
+ :hypothesis: the advantage is that it would also recognized normal sentences such
519
+ as commit messages
520
+ :insertions: 0
521
+ :deletions: 0
522
+ :substitutions: 1
523
+ :matching: 13
524
+ :align_cost: 1
525
+ :transcription_words: 14
526
+ :aligned_transcription: the advantage is that it would also RECOGNIZE normal
527
+ sentences such as commit messages
528
+ :aligned_hypothesis: the advantage is that it would also RECOGNIZED normal sentences
529
+ such as commit messages
530
+ - :transcription: while the disadvantage is that it would not work as accurate on
531
+ programming messages
532
+ :hypothesis: why the disadvantage is that it would not work as a correct on programming
533
+ messages
534
+ :insertions: 1
535
+ :deletions: 0
536
+ :substitutions: 2
537
+ :matching: 12
538
+ :align_cost: 3
539
+ :transcription_words: 14
540
+ :aligned_transcription: WHILE the disadvantage is that it would not work as ***
541
+ ACCURATE on programming messages
542
+ :aligned_hypothesis: WHY the disadvantage is that it would not work as A CORRECT on programming
543
+ messages
544
+ - :transcription: another idea would be a hybrid approach
545
+ :hypothesis: another idea would be a hybrid approach
546
+ :insertions: 0
547
+ :deletions: 0
548
+ :substitutions: 0
549
+ :matching: 7
550
+ :align_cost: 0
551
+ :transcription_words: 7
552
+ :aligned_transcription: another idea would be a hybrid approach
553
+ :aligned_hypothesis: another idea would be a hybrid approach
554
+ - :transcription: that means whenever you are entering a string value or a commit
555
+ message it would switch automatically to the dictation language model
556
+ :hypothesis: that means whenever you are entering any string value or a commit message
557
+ it would switch automatically to the dictation language more
558
+ :insertions: 0
559
+ :deletions: 0
560
+ :substitutions: 2
561
+ :matching: 20
562
+ :align_cost: 2
563
+ :transcription_words: 22
564
+ :aligned_transcription: that means whenever you are entering A string value or a commit
565
+ message it would switch automatically to the dictation language MODEL
566
+ :aligned_hypothesis: that means whenever you are entering ANY string value or a commit
567
+ message it would switch automatically to the dictation language MORE
568
+ - :transcription: I am not sure if Google's voice recognition is actually that good
569
+ :hypothesis: I am not sure it's Google's voice recognition is actually dead code
570
+ :insertions: 0
571
+ :deletions: 0
572
+ :substitutions: 3
573
+ :matching: 9
574
+ :align_cost: 3
575
+ :transcription_words: 12
576
+ :aligned_transcription: i am not sure IF google's voice recognition is actually
577
+ THAT GOOD
578
+ :aligned_hypothesis: i am not sure IT'S google's voice recognition is actually
579
+ DEAD CODE
580
+ - :transcription: we can wait no problem
581
+ :hypothesis: we can wait no problem
582
+ :insertions: 0
583
+ :deletions: 0
584
+ :substitutions: 0
585
+ :matching: 5
586
+ :align_cost: 0
587
+ :transcription_words: 5
588
+ :aligned_transcription: we can wait no problem
589
+ :aligned_hypothesis: we can wait no problem
590
+ - :transcription: wow it's really fast
591
+ :hypothesis: wow it's really fast
592
+ :insertions: 0
593
+ :deletions: 0
594
+ :substitutions: 0
595
+ :matching: 4
596
+ :align_cost: 0
597
+ :transcription_words: 4
598
+ :aligned_transcription: wow it's really fast
599
+ :aligned_hypothesis: wow it's really fast
600
+ - :transcription: the URL is different
601
+ :hypothesis: do you are at it is different
602
+ :insertions: 3
603
+ :deletions: 0
604
+ :substitutions: 2
605
+ :matching: 2
606
+ :align_cost: 5
607
+ :transcription_words: 4
608
+ :aligned_transcription: '*** *** *** THE URL is different'
609
+ :aligned_hypothesis: DO YOU ARE AT IT is different
610
+ - :transcription: we would basically just need to change that
611
+ :hypothesis: we would basically just need to change that
612
+ :insertions: 0
613
+ :deletions: 0
614
+ :substitutions: 0
615
+ :matching: 8
616
+ :align_cost: 0
617
+ :transcription_words: 8
618
+ :aligned_transcription: we would basically just need to change that
619
+ :aligned_hypothesis: we would basically just need to change that
620
+ - :transcription: logos are symbols that attempt to visually represent the essence
621
+ of an organization
622
+ :hypothesis: logos are symbols that attempt to visually represent the essence of
623
+ an organization
624
+ :insertions: 0
625
+ :deletions: 0
626
+ :substitutions: 0
627
+ :matching: 13
628
+ :align_cost: 0
629
+ :transcription_words: 13
630
+ :aligned_transcription: logos are symbols that attempt to visually represent the
631
+ essence of an organization
632
+ :aligned_hypothesis: logos are symbols that attempt to visually represent the essence
633
+ of an organization
634
+ - :transcription: given that the new yahoo logo is a blandly cooperate humourless
635
+ confused jumble of unappealing elements
636
+ :hypothesis: given that the new yahoo rule is a plan to cooperate to over less confused
637
+ jumble of an unappealing elements
638
+ :insertions: 4
639
+ :deletions: 0
640
+ :substitutions: 3
641
+ :matching: 13
642
+ :align_cost: 7
643
+ :transcription_words: 16
644
+ :aligned_transcription: given that the new yahoo LOGO is a *** BLANDLY cooperate
645
+ *** *** HUMOURLESS confused jumble of *** unappealing elements
646
+ :aligned_hypothesis: given that the new yahoo RULE is a PLAN TO cooperate
647
+ TO OVER LESS confused jumble of AN unappealing elements