word_aligner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,44 @@
1
+ require 'ostruct'
2
+
3
+ module WordAligner
4
+ class WordErrorRate < OpenStruct
5
+
6
+ attr_reader :data
7
+
8
+ def initialize(data)
9
+ @data = data
10
+ super(data)
11
+ end
12
+
13
+ def words
14
+ transcription_words
15
+ end
16
+
17
+ def correct_words
18
+ matching
19
+ end
20
+
21
+ def incorrect_words
22
+ align_cost
23
+ end
24
+
25
+ def percentage_accurate
26
+ 100-percentage_incorrect
27
+ end
28
+
29
+ def percentage_correct
30
+ percent_rate(correct_words)
31
+ end
32
+
33
+ def percentage_incorrect
34
+ percent_rate(incorrect_words)
35
+ end
36
+
37
+ private
38
+
39
+ def percent_rate(value)
40
+ value * 100.0 / [ words, 1].max
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ class Sample < OpenStruct
4
+ def aligner_result
5
+ @aligner_result ||= WordAligner::Aligner.new(transcription, hypothesis)
6
+ .word_error_rate.data
7
+ end
8
+ end
9
+
10
+ module WordAligner
11
+ describe Aligner do
12
+
13
+ samples = YAML.load File.read('spec/sample_data/regression/sentences.yml')
14
+
15
+ samples.each_with_index do |sample, idx|
16
+
17
+ describe "sample #{idx}" do
18
+ subject { Sample.new(sample) }
19
+
20
+ its(:hypothesis) { should_not be_nil }
21
+ its(:hypothesis) { should_not be_empty }
22
+
23
+ its(:transcription) { should_not be_nil }
24
+ its(:transcription) { should_not be_empty }
25
+
26
+ its(:aligner_result) { should eq sample }
27
+ end
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ module WordAligner
4
+ describe WordErrorRate do
5
+
6
+ let(:data) do
7
+ {
8
+ insertions: 8,
9
+ substitutions: 2,
10
+ deletions: 0,
11
+ align_cost: 2,
12
+ transcription_words: 8,
13
+ matching: 6
14
+ }
15
+ end
16
+
17
+ subject { WordErrorRate.new(data) }
18
+
19
+ it { should be_a(WordErrorRate) }
20
+
21
+ its(:words) { should eq(8) }
22
+ its(:correct_words) { should eq(6) }
23
+ its(:incorrect_words) { should eq(2) }
24
+ its(:percentage_correct) { should eq(75.0) }
25
+ its(:percentage_incorrect) { should eq(25.0) }
26
+ its(:percentage_accurate) { should eq(75.0) }
27
+ end
28
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe WordAligner do
4
+
5
+ describe '.align' do
6
+
7
+ it 'returns a WordErrorRate' do
8
+ expect(
9
+ WordAligner.align('hello world', 'hello wurld')
10
+ ).to be_a(WordAligner::WordErrorRate)
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,51 @@
1
+ # USAGE: ruby grab_for_comparision.rb regression/sentences.txt.wa \
2
+ # regression/sentences.hypotheses.txt.wa > regression/sentences.yml
3
+ require 'yaml'
4
+
5
+ transcription_file, hypothesis_file = ARGV
6
+ command = "perl word_align.pl #{transcription_file} #{hypothesis_file}"
7
+ output = `#{command}`.split("\n").map(&:strip)
8
+
9
+ transcription_lines = File.readlines(transcription_file)
10
+ hypothesis_lines = File.readlines(hypothesis_file)
11
+
12
+ def strip_id(str)
13
+ str.sub(/\s*\(.+?\)\s*$/, '')
14
+ end
15
+
16
+ records = []
17
+
18
+ loop do
19
+ transcription = output.shift
20
+ hypothesis = output.shift
21
+ statistics = output.shift
22
+ distance = output.shift
23
+
24
+ break unless statistics.match(/Words:/)
25
+
26
+ words, correct, errors,
27
+ percentage_correct, error, accuracy = statistics.scan(/\d+(?:[.]\d+)?/)
28
+ insertions, deletions, substitutions = distance.scan(/\d+/)
29
+
30
+ transcription_line = strip_id(transcription_lines.shift.strip)
31
+ hypothesis_line = strip_id(hypothesis_lines.shift.strip)
32
+
33
+ next if transcription_line.empty?
34
+
35
+ details = {
36
+ transcription: transcription_line,
37
+ hypothesis: hypothesis_line,
38
+ insertions: insertions.to_i,
39
+ deletions: deletions.to_i,
40
+ substitutions: substitutions.to_i,
41
+ matching: correct.to_i,
42
+ align_cost: errors.to_i,
43
+ transcription_words: words.to_i,
44
+ aligned_transcription: strip_id(transcription),
45
+ aligned_hypothesis: strip_id(hypothesis)
46
+ }
47
+
48
+ records << details
49
+ end
50
+
51
+ puts records.to_yaml
@@ -0,0 +1,647 @@
1
+ ---
2
+ - :transcription: I think this might just work for fine
3
+ :hypothesis: I'd think this might just work for laine
4
+ :insertions: 0
5
+ :deletions: 0
6
+ :substitutions: 2
7
+ :matching: 6
8
+ :align_cost: 2
9
+ :transcription_words: 8
10
+ :aligned_transcription: I think this might just work for FINE
11
+ :aligned_hypothesis: I'D think this might just work for LAINE
12
+ - :transcription: did you register for a new account
13
+ :hypothesis: hit you register for a new account
14
+ :insertions: 0
15
+ :deletions: 0
16
+ :substitutions: 1
17
+ :matching: 6
18
+ :align_cost: 1
19
+ :transcription_words: 7
20
+ :aligned_transcription: DID you register for a new account
21
+ :aligned_hypothesis: HIT you register for a new account
22
+ - :transcription: I almost forgot to tell you about our next meeting
23
+ :hypothesis: I almost forgot to tell you about our next meeting
24
+ :insertions: 0
25
+ :deletions: 0
26
+ :substitutions: 0
27
+ :matching: 10
28
+ :align_cost: 0
29
+ :transcription_words: 10
30
+ :aligned_transcription: i almost forgot to tell you about our next meeting
31
+ :aligned_hypothesis: i almost forgot to tell you about our next meeting
32
+ - :transcription: I am not using bash anymore
33
+ :hypothesis: if I am not using bash anymore
34
+ :insertions: 1
35
+ :deletions: 0
36
+ :substitutions: 0
37
+ :matching: 6
38
+ :align_cost: 1
39
+ :transcription_words: 6
40
+ :aligned_transcription: '*** i am not using bash anymore'
41
+ :aligned_hypothesis: IF i am not using bash anymore
42
+ - :transcription: I think I might switch to a windows computer
43
+ :hypothesis: I think it might switch to a windows computer
44
+ :insertions: 0
45
+ :deletions: 0
46
+ :substitutions: 1
47
+ :matching: 8
48
+ :align_cost: 1
49
+ :transcription_words: 9
50
+ :aligned_transcription: i think I might switch to a windows computer
51
+ :aligned_hypothesis: i think IT might switch to a windows computer
52
+ - :transcription: it returns the current hypothesis
53
+ :hypothesis: it returns the current hypothesis
54
+ :insertions: 0
55
+ :deletions: 0
56
+ :substitutions: 0
57
+ :matching: 5
58
+ :align_cost: 0
59
+ :transcription_words: 5
60
+ :aligned_transcription: it returns the current hypothesis
61
+ :aligned_hypothesis: it returns the current hypothesis
62
+ - :transcription: you cannot code HTML by voice
63
+ :hypothesis: you cannot code HTML by avoids
64
+ :insertions: 0
65
+ :deletions: 0
66
+ :substitutions: 1
67
+ :matching: 5
68
+ :align_cost: 1
69
+ :transcription_words: 6
70
+ :aligned_transcription: you cannot code html by VOICE
71
+ :aligned_hypothesis: you cannot code html by AVOIDS
72
+ - :transcription: why exactly would you do that
73
+ :hypothesis: why exactly would you go that
74
+ :insertions: 0
75
+ :deletions: 0
76
+ :substitutions: 1
77
+ :matching: 5
78
+ :align_cost: 1
79
+ :transcription_words: 6
80
+ :aligned_transcription: why exactly would you DO that
81
+ :aligned_hypothesis: why exactly would you GO that
82
+ - :transcription: I didn't think about it like that
83
+ :hypothesis: I didn't think about it like that
84
+ :insertions: 0
85
+ :deletions: 0
86
+ :substitutions: 0
87
+ :matching: 7
88
+ :align_cost: 0
89
+ :transcription_words: 7
90
+ :aligned_transcription: i didn't think about it like that
91
+ :aligned_hypothesis: i didn't think about it like that
92
+ - :transcription: haven't you figured out the meaning of the algorithm yet
93
+ :hypothesis: haven't you fake it out the meaning of the I'd go with him yet
94
+ :insertions: 4
95
+ :deletions: 0
96
+ :substitutions: 2
97
+ :matching: 8
98
+ :align_cost: 6
99
+ :transcription_words: 10
100
+ :aligned_transcription: haven't you *** FIGURED out the meaning of the *** ***
101
+ *** ALGORITHM yet
102
+ :aligned_hypothesis: haven't you FAKE IT out the meaning of the I'D GO WITH
103
+ HIM yet
104
+ - :transcription: the algorithm is quite good
105
+ :hypothesis: the algorithm is quite good
106
+ :insertions: 0
107
+ :deletions: 0
108
+ :substitutions: 0
109
+ :matching: 5
110
+ :align_cost: 0
111
+ :transcription_words: 5
112
+ :aligned_transcription: the algorithm is quite good
113
+ :aligned_hypothesis: the algorithm is quite good
114
+ - :transcription: it responds in a custom way
115
+ :hypothesis: it's response in a custom way
116
+ :insertions: 0
117
+ :deletions: 0
118
+ :substitutions: 2
119
+ :matching: 4
120
+ :align_cost: 2
121
+ :transcription_words: 6
122
+ :aligned_transcription: IT RESPONDS in a custom way
123
+ :aligned_hypothesis: IT'S RESPONSE in a custom way
124
+ - :transcription: it won't work for HTML
125
+ :hypothesis: it won't work for HTML
126
+ :insertions: 0
127
+ :deletions: 0
128
+ :substitutions: 0
129
+ :matching: 5
130
+ :align_cost: 0
131
+ :transcription_words: 5
132
+ :aligned_transcription: it won't work for html
133
+ :aligned_hypothesis: it won't work for html
134
+ - :transcription: but it will work good for ruby on rails
135
+ :hypothesis: but it will work good for ruby on rails
136
+ :insertions: 0
137
+ :deletions: 0
138
+ :substitutions: 0
139
+ :matching: 9
140
+ :align_cost: 0
141
+ :transcription_words: 9
142
+ :aligned_transcription: but it will work good for ruby on rails
143
+ :aligned_hypothesis: but it will work good for ruby on rails
144
+ - :transcription: the user would decide when to update
145
+ :hypothesis: but the user would decide when to app to
146
+ :insertions: 2
147
+ :deletions: 0
148
+ :substitutions: 1
149
+ :matching: 6
150
+ :align_cost: 3
151
+ :transcription_words: 7
152
+ :aligned_transcription: '*** the user would decide when to *** UPDATE'
153
+ :aligned_hypothesis: BUT the user would decide when to APP TO
154
+ - :transcription: when you define a new class it is not there
155
+ :hypothesis: when you defining you close it is not there
156
+ :insertions: 0
157
+ :deletions: 1
158
+ :substitutions: 3
159
+ :matching: 6
160
+ :align_cost: 4
161
+ :transcription_words: 10
162
+ :aligned_transcription: when you DEFINE A NEW CLASS it is not there
163
+ :aligned_hypothesis: when you *** DEFINING YOU CLOSE it is not there
164
+ - :transcription: so the voice recognition system would learn new classes while you
165
+ are defining them
166
+ :hypothesis: so the voice recognition system would learn new classes where you are
167
+ defining them for
168
+ :insertions: 1
169
+ :deletions: 0
170
+ :substitutions: 1
171
+ :matching: 13
172
+ :align_cost: 2
173
+ :transcription_words: 14
174
+ :aligned_transcription: so the voice recognition system would learn new classes
175
+ WHILE you are defining them ***
176
+ :aligned_hypothesis: so the voice recognition system would learn new classes WHERE
177
+ you are defining them FOR
178
+ - :transcription: and it would build a custom language model based on ctags
179
+ :hypothesis: and it would build a custom language model based on see Tex
180
+ :insertions: 1
181
+ :deletions: 0
182
+ :substitutions: 1
183
+ :matching: 10
184
+ :align_cost: 2
185
+ :transcription_words: 11
186
+ :aligned_transcription: and it would build a custom language model based on ***
187
+ CTAGS
188
+ :aligned_hypothesis: and it would build a custom language model based on SEE
189
+ TEX
190
+ - :transcription: so it understands all your classes and methods like words
191
+ :hypothesis: so it understands Oreo classes and methods like words
192
+ :insertions: 0
193
+ :deletions: 1
194
+ :substitutions: 1
195
+ :matching: 8
196
+ :align_cost: 2
197
+ :transcription_words: 10
198
+ :aligned_transcription: so it understands ALL YOUR classes and methods like words
199
+ :aligned_hypothesis: so it understands *** OREO classes and methods like words
200
+ - :transcription: I think this might be a good idea
201
+ :hypothesis: I'd think this might be a good idea
202
+ :insertions: 0
203
+ :deletions: 0
204
+ :substitutions: 1
205
+ :matching: 7
206
+ :align_cost: 1
207
+ :transcription_words: 8
208
+ :aligned_transcription: I think this might be a good idea
209
+ :aligned_hypothesis: I'D think this might be a good idea
210
+ - :transcription: I think this should be much faster
211
+ :hypothesis: I think the should be much faster
212
+ :insertions: 0
213
+ :deletions: 0
214
+ :substitutions: 1
215
+ :matching: 6
216
+ :align_cost: 1
217
+ :transcription_words: 7
218
+ :aligned_transcription: i think THIS should be much faster
219
+ :aligned_hypothesis: i think THE should be much faster
220
+ - :transcription: did you see my latest commit
221
+ :hypothesis: and opted you ca latest committed up
222
+ :insertions: 1
223
+ :deletions: 0
224
+ :substitutions: 5
225
+ :matching: 1
226
+ :align_cost: 6
227
+ :transcription_words: 6
228
+ :aligned_transcription: '*** DID you SEE MY LATEST COMMIT'
229
+ :aligned_hypothesis: AND OPTED you CA LATEST COMMITTED UP
230
+ - :transcription: click on first result
231
+ :hypothesis: click on first result
232
+ :insertions: 0
233
+ :deletions: 0
234
+ :substitutions: 0
235
+ :matching: 4
236
+ :align_cost: 0
237
+ :transcription_words: 4
238
+ :aligned_transcription: click on first result
239
+ :aligned_hypothesis: click on first result
240
+ - :transcription: click on second result
241
+ :hypothesis: click on second free soft
242
+ :insertions: 1
243
+ :deletions: 0
244
+ :substitutions: 1
245
+ :matching: 3
246
+ :align_cost: 2
247
+ :transcription_words: 4
248
+ :aligned_transcription: click on second *** RESULT
249
+ :aligned_hypothesis: click on second FREE SOFT
250
+ - :transcription: go to Google
251
+ :hypothesis: go to Google on
252
+ :insertions: 1
253
+ :deletions: 0
254
+ :substitutions: 0
255
+ :matching: 3
256
+ :align_cost: 1
257
+ :transcription_words: 3
258
+ :aligned_transcription: go to google ***
259
+ :aligned_hypothesis: go to google ON
260
+ - :transcription: validates presence of name
261
+ :hypothesis: valid dates presence of name
262
+ :insertions: 1
263
+ :deletions: 0
264
+ :substitutions: 1
265
+ :matching: 3
266
+ :align_cost: 2
267
+ :transcription_words: 4
268
+ :aligned_transcription: '*** VALIDATES presence of name'
269
+ :aligned_hypothesis: VALID DATES presence of name
270
+ - :transcription: validates uniqueness of name
271
+ :hypothesis: wedded its uniqueness often name
272
+ :insertions: 1
273
+ :deletions: 0
274
+ :substitutions: 2
275
+ :matching: 2
276
+ :align_cost: 3
277
+ :transcription_words: 4
278
+ :aligned_transcription: '*** VALIDATES uniqueness OF name'
279
+ :aligned_hypothesis: WEDDED ITS uniqueness OFTEN name
280
+ - :transcription: belongs to language
281
+ :hypothesis: belongs to language
282
+ :insertions: 0
283
+ :deletions: 0
284
+ :substitutions: 0
285
+ :matching: 3
286
+ :align_cost: 0
287
+ :transcription_words: 3
288
+ :aligned_transcription: belongs to language
289
+ :aligned_hypothesis: belongs to language
290
+ - :transcription: belongs to user
291
+ :hypothesis: belongs to user
292
+ :insertions: 0
293
+ :deletions: 0
294
+ :substitutions: 0
295
+ :matching: 3
296
+ :align_cost: 0
297
+ :transcription_words: 3
298
+ :aligned_transcription: belongs to user
299
+ :aligned_hypothesis: belongs to user
300
+ - :transcription: it should have three actions
301
+ :hypothesis: it should have three actions
302
+ :insertions: 0
303
+ :deletions: 0
304
+ :substitutions: 0
305
+ :matching: 5
306
+ :align_cost: 0
307
+ :transcription_words: 5
308
+ :aligned_transcription: it should have three actions
309
+ :aligned_hypothesis: it should have three actions
310
+ - :transcription: I didn't think this would work
311
+ :hypothesis: I didn't think this would work
312
+ :insertions: 0
313
+ :deletions: 0
314
+ :substitutions: 0
315
+ :matching: 6
316
+ :align_cost: 0
317
+ :transcription_words: 6
318
+ :aligned_transcription: i didn't think this would work
319
+ :aligned_hypothesis: i didn't think this would work
320
+ - :transcription: I am now testing another recording
321
+ :hypothesis: I am not testing another recording
322
+ :insertions: 0
323
+ :deletions: 0
324
+ :substitutions: 1
325
+ :matching: 5
326
+ :align_cost: 1
327
+ :transcription_words: 6
328
+ :aligned_transcription: i am NOW testing another recording
329
+ :aligned_hypothesis: i am NOT testing another recording
330
+ - :transcription: hello and welcome
331
+ :hypothesis: hello and welcome
332
+ :insertions: 0
333
+ :deletions: 0
334
+ :substitutions: 0
335
+ :matching: 3
336
+ :align_cost: 0
337
+ :transcription_words: 3
338
+ :aligned_transcription: hello and welcome
339
+ :aligned_hypothesis: hello and welcome
340
+ - :transcription: by the way everything that you have just read was recognized by
341
+ my software
342
+ :hypothesis: by the way everything that you have just read was recognized by my
343
+ software
344
+ :insertions: 0
345
+ :deletions: 0
346
+ :substitutions: 0
347
+ :matching: 14
348
+ :align_cost: 0
349
+ :transcription_words: 14
350
+ :aligned_transcription: by the way everything that you have just read was recognized
351
+ by my software
352
+ :aligned_hypothesis: by the way everything that you have just read was recognized
353
+ by my software
354
+ - :transcription: with only minor errors in the recognition
355
+ :hypothesis: with only minor errors in the recognition
356
+ :insertions: 0
357
+ :deletions: 0
358
+ :substitutions: 0
359
+ :matching: 7
360
+ :align_cost: 0
361
+ :transcription_words: 7
362
+ :aligned_transcription: with only minor errors in the recognition
363
+ :aligned_hypothesis: with only minor errors in the recognition
364
+ - :transcription: please fetch the files from the server
365
+ :hypothesis: please search the files from the server
366
+ :insertions: 0
367
+ :deletions: 0
368
+ :substitutions: 1
369
+ :matching: 6
370
+ :align_cost: 1
371
+ :transcription_words: 7
372
+ :aligned_transcription: please FETCH the files from the server
373
+ :aligned_hypothesis: please SEARCH the files from the server
374
+ - :transcription: the real challenge is coming up with a good speech representation
375
+ of ruby
376
+ :hypothesis: the real challenge is coming up with Blake good speech representation
377
+ of rube
378
+ :insertions: 0
379
+ :deletions: 0
380
+ :substitutions: 2
381
+ :matching: 11
382
+ :align_cost: 2
383
+ :transcription_words: 13
384
+ :aligned_transcription: the real challenge is coming up with A good speech
385
+ representation of RUBY
386
+ :aligned_hypothesis: the real challenge is coming up with BLAKE good speech representation
387
+ of RUBE
388
+ - :transcription: that follows the principle of least surprise
389
+ :hypothesis: that follows the principle of the surprise
390
+ :insertions: 0
391
+ :deletions: 0
392
+ :substitutions: 1
393
+ :matching: 6
394
+ :align_cost: 1
395
+ :transcription_words: 7
396
+ :aligned_transcription: that follows the principle of LEAST surprise
397
+ :aligned_hypothesis: that follows the principle of THE surprise
398
+ - :transcription: and deals appropriately with ambiguous cases
399
+ :hypothesis: and this appropriately it was ambiguous cases
400
+ :insertions: 1
401
+ :deletions: 0
402
+ :substitutions: 2
403
+ :matching: 4
404
+ :align_cost: 3
405
+ :transcription_words: 6
406
+ :aligned_transcription: and DEALS appropriately *** WITH ambiguous cases
407
+ :aligned_hypothesis: and THIS appropriately IT WAS ambiguous cases
408
+ - :transcription: the good thing is that you can get rid of a lot of manual work
409
+ :hypothesis: the good thing is that you can get rid of a lot of manual work
410
+ :insertions: 0
411
+ :deletions: 0
412
+ :substitutions: 0
413
+ :matching: 15
414
+ :align_cost: 0
415
+ :transcription_words: 15
416
+ :aligned_transcription: the good thing is that you can get rid of a lot of manual
417
+ work
418
+ :aligned_hypothesis: the good thing is that you can get rid of a lot of manual
419
+ work
420
+ - :transcription: for example attribute accessors are nearly always placed at the
421
+ top of the file
422
+ :hypothesis: for example attribute excesses are nearly always placed at the top
423
+ of the file
424
+ :insertions: 0
425
+ :deletions: 0
426
+ :substitutions: 1
427
+ :matching: 13
428
+ :align_cost: 1
429
+ :transcription_words: 14
430
+ :aligned_transcription: for example attribute ACCESSORS are nearly always placed
431
+ at the top of the file
432
+ :aligned_hypothesis: for example attribute EXCESSES are nearly always placed at the
433
+ top of the file
434
+ - :transcription: so when you say something like
435
+ :hypothesis: so when you say something like
436
+ :insertions: 0
437
+ :deletions: 0
438
+ :substitutions: 0
439
+ :matching: 6
440
+ :align_cost: 0
441
+ :transcription_words: 6
442
+ :aligned_transcription: so when you say something like
443
+ :aligned_hypothesis: so when you say something like
444
+ - :transcription: attribute accessor file name
445
+ :hypothesis: attribute access server fine name
446
+ :insertions: 1
447
+ :deletions: 0
448
+ :substitutions: 2
449
+ :matching: 2
450
+ :align_cost: 3
451
+ :transcription_words: 4
452
+ :aligned_transcription: attribute *** ACCESSOR FILE name
453
+ :aligned_hypothesis: attribute ACCESS SERVER FINE name
454
+ - :transcription: it will automatically put the following line at the top of the file
455
+ in the right place
456
+ :hypothesis: it will automatically put the following line at the top of the file
457
+ in the right place
458
+ :insertions: 0
459
+ :deletions: 0
460
+ :substitutions: 0
461
+ :matching: 17
462
+ :align_cost: 0
463
+ :transcription_words: 17
464
+ :aligned_transcription: it will automatically put the following line at the top
465
+ of the file in the right place
466
+ :aligned_hypothesis: it will automatically put the following line at the top of the
467
+ file in the right place
468
+ - :transcription: so the whole approach works only with one unified style
469
+ :hypothesis: so the whole approach works only with one unified style
470
+ :insertions: 0
471
+ :deletions: 0
472
+ :substitutions: 0
473
+ :matching: 10
474
+ :align_cost: 0
475
+ :transcription_words: 10
476
+ :aligned_transcription: so the whole approach works only with one unified style
477
+ :aligned_hypothesis: so the whole approach works only with one unified style
478
+ - :transcription: which is the ruby best practices style which is published on github
479
+ :hypothesis: which is did ruby best practices style which is published on git help
480
+ :insertions: 1
481
+ :deletions: 0
482
+ :substitutions: 2
483
+ :matching: 10
484
+ :align_cost: 3
485
+ :transcription_words: 12
486
+ :aligned_transcription: which is THE ruby best practices style which is published
487
+ on *** GITHUB
488
+ :aligned_hypothesis: which is DID ruby best practices style which is published
489
+ on GIT HELP
490
+ - :transcription: I wonder if I should create a custom language model just for programming
491
+ :hypothesis: I wonder if I should create a custom language will just for programming
492
+ :insertions: 0
493
+ :deletions: 0
494
+ :substitutions: 1
495
+ :matching: 12
496
+ :align_cost: 1
497
+ :transcription_words: 13
498
+ :aligned_transcription: i wonder if i should create a custom language MODEL
499
+ just for programming
500
+ :aligned_hypothesis: i wonder if i should create a custom language WILL just
501
+ for programming
502
+ - :transcription: or if I should use the normal dictation model and just train it
503
+ for programming
504
+ :hypothesis: or if I should use that normal dictation on model and just train it
505
+ for programming
506
+ :insertions: 1
507
+ :deletions: 0
508
+ :substitutions: 1
509
+ :matching: 14
510
+ :align_cost: 2
511
+ :transcription_words: 15
512
+ :aligned_transcription: or if i should use THE normal dictation *** model and
513
+ just train it for programming
514
+ :aligned_hypothesis: or if i should use THAT normal dictation ON model and
515
+ just train it for programming
516
+ - :transcription: the advantage is that it would also recognize normal sentences such
517
+ as commit messages
518
+ :hypothesis: the advantage is that it would also recognized normal sentences such
519
+ as commit messages
520
+ :insertions: 0
521
+ :deletions: 0
522
+ :substitutions: 1
523
+ :matching: 13
524
+ :align_cost: 1
525
+ :transcription_words: 14
526
+ :aligned_transcription: the advantage is that it would also RECOGNIZE normal
527
+ sentences such as commit messages
528
+ :aligned_hypothesis: the advantage is that it would also RECOGNIZED normal sentences
529
+ such as commit messages
530
+ - :transcription: while the disadvantage is that it would not work as accurate on
531
+ programming messages
532
+ :hypothesis: why the disadvantage is that it would not work as a correct on programming
533
+ messages
534
+ :insertions: 1
535
+ :deletions: 0
536
+ :substitutions: 2
537
+ :matching: 12
538
+ :align_cost: 3
539
+ :transcription_words: 14
540
+ :aligned_transcription: WHILE the disadvantage is that it would not work as ***
541
+ ACCURATE on programming messages
542
+ :aligned_hypothesis: WHY the disadvantage is that it would not work as A CORRECT on programming
543
+ messages
544
+ - :transcription: another idea would be a hybrid approach
545
+ :hypothesis: another idea would be a hybrid approach
546
+ :insertions: 0
547
+ :deletions: 0
548
+ :substitutions: 0
549
+ :matching: 7
550
+ :align_cost: 0
551
+ :transcription_words: 7
552
+ :aligned_transcription: another idea would be a hybrid approach
553
+ :aligned_hypothesis: another idea would be a hybrid approach
554
+ - :transcription: that means whenever you are entering a string value or a commit
555
+ message it would switch automatically to the dictation language model
556
+ :hypothesis: that means whenever you are entering any string value or a commit message
557
+ it would switch automatically to the dictation language more
558
+ :insertions: 0
559
+ :deletions: 0
560
+ :substitutions: 2
561
+ :matching: 20
562
+ :align_cost: 2
563
+ :transcription_words: 22
564
+ :aligned_transcription: that means whenever you are entering A string value or a commit
565
+ message it would switch automatically to the dictation language MODEL
566
+ :aligned_hypothesis: that means whenever you are entering ANY string value or a commit
567
+ message it would switch automatically to the dictation language MORE
568
+ - :transcription: I am not sure if Google's voice recognition is actually that good
569
+ :hypothesis: I am not sure it's Google's voice recognition is actually dead code
570
+ :insertions: 0
571
+ :deletions: 0
572
+ :substitutions: 3
573
+ :matching: 9
574
+ :align_cost: 3
575
+ :transcription_words: 12
576
+ :aligned_transcription: i am not sure IF google's voice recognition is actually
577
+ THAT GOOD
578
+ :aligned_hypothesis: i am not sure IT'S google's voice recognition is actually
579
+ DEAD CODE
580
+ - :transcription: we can wait no problem
581
+ :hypothesis: we can wait no problem
582
+ :insertions: 0
583
+ :deletions: 0
584
+ :substitutions: 0
585
+ :matching: 5
586
+ :align_cost: 0
587
+ :transcription_words: 5
588
+ :aligned_transcription: we can wait no problem
589
+ :aligned_hypothesis: we can wait no problem
590
+ - :transcription: wow it's really fast
591
+ :hypothesis: wow it's really fast
592
+ :insertions: 0
593
+ :deletions: 0
594
+ :substitutions: 0
595
+ :matching: 4
596
+ :align_cost: 0
597
+ :transcription_words: 4
598
+ :aligned_transcription: wow it's really fast
599
+ :aligned_hypothesis: wow it's really fast
600
+ - :transcription: the URL is different
601
+ :hypothesis: do you are at it is different
602
+ :insertions: 3
603
+ :deletions: 0
604
+ :substitutions: 2
605
+ :matching: 2
606
+ :align_cost: 5
607
+ :transcription_words: 4
608
+ :aligned_transcription: '*** *** *** THE URL is different'
609
+ :aligned_hypothesis: DO YOU ARE AT IT is different
610
+ - :transcription: we would basically just need to change that
611
+ :hypothesis: we would basically just need to change that
612
+ :insertions: 0
613
+ :deletions: 0
614
+ :substitutions: 0
615
+ :matching: 8
616
+ :align_cost: 0
617
+ :transcription_words: 8
618
+ :aligned_transcription: we would basically just need to change that
619
+ :aligned_hypothesis: we would basically just need to change that
620
+ - :transcription: logos are symbols that attempt to visually represent the essence
621
+ of an organization
622
+ :hypothesis: logos are symbols that attempt to visually represent the essence of
623
+ an organization
624
+ :insertions: 0
625
+ :deletions: 0
626
+ :substitutions: 0
627
+ :matching: 13
628
+ :align_cost: 0
629
+ :transcription_words: 13
630
+ :aligned_transcription: logos are symbols that attempt to visually represent the
631
+ essence of an organization
632
+ :aligned_hypothesis: logos are symbols that attempt to visually represent the essence
633
+ of an organization
634
+ - :transcription: given that the new yahoo logo is a blandly cooperate humourless
635
+ confused jumble of unappealing elements
636
+ :hypothesis: given that the new yahoo rule is a plan to cooperate to over less confused
637
+ jumble of an unappealing elements
638
+ :insertions: 4
639
+ :deletions: 0
640
+ :substitutions: 3
641
+ :matching: 13
642
+ :align_cost: 7
643
+ :transcription_words: 16
644
+ :aligned_transcription: given that the new yahoo LOGO is a *** BLANDLY cooperate
645
+ *** *** HUMOURLESS confused jumble of *** unappealing elements
646
+ :aligned_hypothesis: given that the new yahoo RULE is a PLAN TO cooperate
647
+ TO OVER LESS confused jumble of AN unappealing elements