RubyGems - word_aligner - Versions diffs - 0.1.0 - Mend

word_aligner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +7 -0
data/Gemfile +18 -0
data/Gemfile.lock +115 -0
data/Guardfile +8 -0
data/LICENSE.txt +20 -0
data/README.md +45 -0
data/Rakefile +45 -0
data/VERSION +1 -0
data/lib/word_aligner.rb +11 -0
data/lib/word_aligner/aligner.rb +170 -0
data/lib/word_aligner/word_error_rate.rb +44 -0
data/spec/lib/word_aligner/aligner_spec.rb +31 -0
data/spec/lib/word_aligner/word_error_rate_spec.rb +28 -0
data/spec/lib/word_aligner_spec.rb +15 -0
data/spec/sample_data/grab_for_comparision.rb +51 -0
data/spec/sample_data/regression/sentences.yml +647 -0
data/spec/sample_data/source_data/enough.hypotheses.txt +1 -0
data/spec/sample_data/source_data/enough.txt +1 -0
data/spec/sample_data/source_data/exactly_data.hypotheses.txt +1 -0
data/spec/sample_data/source_data/exactly_data.txt +1 -0
data/spec/sample_data/source_data/hamlet.hypotheses.txt +2 -0
data/spec/sample_data/source_data/hamlet.txt +2 -0
data/spec/sample_data/source_data/that_might.hypotheses.txt +1 -0
data/spec/sample_data/source_data/that_might.txt +1 -0
data/spec/sample_data/word_align.pl +302 -0
data/spec/spec_helper.rb +20 -0
metadata +143 -0

data/lib/word_aligner/word_error_rate.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'ostruct'
+module WordAligner
+  class WordErrorRate < OpenStruct
+    attr_reader :data
+    def initialize(data)
+      @data = data
+      super(data)
+    end
+    def words
+      transcription_words
+    end
+    def correct_words
+      matching
+    end
+    def incorrect_words
+      align_cost
+    end
+    def percentage_accurate
+      100-percentage_incorrect
+    end
+    def percentage_correct
+      percent_rate(correct_words)
+    end
+    def percentage_incorrect
+      percent_rate(incorrect_words)
+    end
+    private
+    def percent_rate(value)
+      value * 100.0 / [ words, 1].max
+    end
+  end
+end

data/spec/lib/word_aligner/aligner_spec.rb ADDED Viewed

@@ -0,0 +1,31 @@
+require 'spec_helper'
+class Sample < OpenStruct
+  def aligner_result
+    @aligner_result ||= WordAligner::Aligner.new(transcription, hypothesis)
+      .word_error_rate.data
+  end
+end
+module WordAligner
+  describe Aligner do
+    samples = YAML.load File.read('spec/sample_data/regression/sentences.yml')
+    samples.each_with_index do |sample, idx|
+      describe "sample #{idx}" do
+        subject { Sample.new(sample) }
+        its(:hypothesis) { should_not be_nil }
+        its(:hypothesis) { should_not be_empty }
+        its(:transcription) { should_not be_nil }
+        its(:transcription) { should_not be_empty }
+        its(:aligner_result) { should eq sample }
+      end
+    end
+  end
+end

data/spec/lib/word_aligner/word_error_rate_spec.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require 'spec_helper'
+module WordAligner
+  describe WordErrorRate do
+    let(:data) do
+      {
+        insertions:          8,
+        substitutions:       2,
+        deletions:           0,
+        align_cost:          2,
+        transcription_words: 8,
+        matching:            6
+      }
+    end
+    subject { WordErrorRate.new(data) }
+    it { should be_a(WordErrorRate) }
+    its(:words)                { should eq(8) }
+    its(:correct_words)        { should eq(6) }
+    its(:incorrect_words)      { should eq(2) }
+    its(:percentage_correct)   { should eq(75.0) }
+    its(:percentage_incorrect) { should eq(25.0) }
+    its(:percentage_accurate)  { should eq(75.0) }
+  end
+end

data/spec/lib/word_aligner_spec.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require 'spec_helper'
+describe WordAligner do
+  describe '.align' do
+    it 'returns a WordErrorRate' do
+      expect(
+        WordAligner.align('hello world', 'hello wurld')
+      ).to be_a(WordAligner::WordErrorRate)
+    end
+  end
+end

data/spec/sample_data/grab_for_comparision.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# USAGE: ruby grab_for_comparision.rb regression/sentences.txt.wa \
+# regression/sentences.hypotheses.txt.wa > regression/sentences.yml
+require 'yaml'
+transcription_file, hypothesis_file = ARGV
+command = "perl word_align.pl #{transcription_file} #{hypothesis_file}"
+output  = `#{command}`.split("\n").map(&:strip)
+transcription_lines   = File.readlines(transcription_file)
+hypothesis_lines = File.readlines(hypothesis_file)
+def strip_id(str)
+  str.sub(/\s*\(.+?\)\s*$/, '')
+end
+records = []
+loop do
+  transcription = output.shift
+  hypothesis    = output.shift
+  statistics    = output.shift
+  distance      = output.shift
+  break unless statistics.match(/Words:/)
+  words, correct, errors,
+  percentage_correct, error, accuracy     = statistics.scan(/\d+(?:[.]\d+)?/)
+  insertions, deletions, substitutions = distance.scan(/\d+/)
+  transcription_line = strip_id(transcription_lines.shift.strip)
+  hypothesis_line    = strip_id(hypothesis_lines.shift.strip)
+  next if transcription_line.empty?
+  details = {
+    transcription:          transcription_line,
+    hypothesis:             hypothesis_line,
+    insertions:             insertions.to_i,
+    deletions:              deletions.to_i,
+    substitutions:          substitutions.to_i,
+    matching:               correct.to_i,
+    align_cost:             errors.to_i,
+    transcription_words:    words.to_i,
+    aligned_transcription:  strip_id(transcription),
+    aligned_hypothesis:     strip_id(hypothesis)
+  }
+  records << details
+end
+puts records.to_yaml

data/spec/sample_data/regression/sentences.yml ADDED Viewed

@@ -0,0 +1,647 @@
+---
+- :transcription: I think this might just work for fine
+  :hypothesis: I'd think this might just work for laine
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 2
+  :matching: 6
+  :align_cost: 2
+  :transcription_words: 8
+  :aligned_transcription: I   think this might just work for FINE
+  :aligned_hypothesis: I'D think this might just work for LAINE
+- :transcription: did you register for a new account
+  :hypothesis: hit you register for a new account
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 6
+  :align_cost: 1
+  :transcription_words: 7
+  :aligned_transcription: DID you register for a   new account
+  :aligned_hypothesis: HIT you register for a   new account
+- :transcription: I almost forgot to tell you about our next meeting
+  :hypothesis: I almost forgot to tell you about our next meeting
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 10
+  :align_cost: 0
+  :transcription_words: 10
+  :aligned_transcription: i   almost forgot to  tell you about our next meeting
+  :aligned_hypothesis: i   almost forgot to  tell you about our next meeting
+- :transcription: I am not using bash anymore
+  :hypothesis: if I am not using bash anymore
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 0
+  :matching: 6
+  :align_cost: 1
+  :transcription_words: 6
+  :aligned_transcription: '*** i   am  not using bash anymore'
+  :aligned_hypothesis: IF  i   am  not using bash anymore
+- :transcription: I think I might switch to a windows computer
+  :hypothesis: I think it might switch to a windows computer
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 8
+  :align_cost: 1
+  :transcription_words: 9
+  :aligned_transcription: i   think I   might switch to  a   windows computer
+  :aligned_hypothesis: i   think IT  might switch to  a   windows computer
+- :transcription: it returns the current hypothesis
+  :hypothesis: it returns the current hypothesis
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 5
+  :align_cost: 0
+  :transcription_words: 5
+  :aligned_transcription: it  returns the current hypothesis
+  :aligned_hypothesis: it  returns the current hypothesis
+- :transcription: you cannot code HTML by voice
+  :hypothesis: you cannot code HTML by avoids
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 5
+  :align_cost: 1
+  :transcription_words: 6
+  :aligned_transcription: you cannot code html by  VOICE
+  :aligned_hypothesis: you cannot code html by  AVOIDS
+- :transcription: why exactly would you do that
+  :hypothesis: why exactly would you go that
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 5
+  :align_cost: 1
+  :transcription_words: 6
+  :aligned_transcription: why exactly would you DO  that
+  :aligned_hypothesis: why exactly would you GO  that
+- :transcription: I didn't think about it like that
+  :hypothesis: I didn't think about it like that
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 7
+  :align_cost: 0
+  :transcription_words: 7
+  :aligned_transcription: i   didn't think about it  like that
+  :aligned_hypothesis: i   didn't think about it  like that
+- :transcription: haven't you figured out the meaning of the algorithm yet
+  :hypothesis: haven't you fake it out the meaning of the I'd go with him yet
+  :insertions: 4
+  :deletions: 0
+  :substitutions: 2
+  :matching: 8
+  :align_cost: 6
+  :transcription_words: 10
+  :aligned_transcription: haven't you ***  FIGURED out the meaning of  the *** ***
+    ***  ALGORITHM yet
+  :aligned_hypothesis: haven't you FAKE IT      out the meaning of  the I'D GO  WITH
+    HIM       yet
+- :transcription: the algorithm is quite good
+  :hypothesis: the algorithm is quite good
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 5
+  :align_cost: 0
+  :transcription_words: 5
+  :aligned_transcription: the algorithm is  quite good
+  :aligned_hypothesis: the algorithm is  quite good
+- :transcription: it responds in a custom way
+  :hypothesis: it's response in a custom way
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 2
+  :matching: 4
+  :align_cost: 2
+  :transcription_words: 6
+  :aligned_transcription: IT   RESPONDS in  a   custom way
+  :aligned_hypothesis: IT'S RESPONSE in  a   custom way
+- :transcription: it won't work for HTML
+  :hypothesis: it won't work for HTML
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 5
+  :align_cost: 0
+  :transcription_words: 5
+  :aligned_transcription: it  won't work for html
+  :aligned_hypothesis: it  won't work for html
+- :transcription: but it will work good for ruby on rails
+  :hypothesis: but it will work good for ruby on rails
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 9
+  :align_cost: 0
+  :transcription_words: 9
+  :aligned_transcription: but it  will work good for ruby on  rails
+  :aligned_hypothesis: but it  will work good for ruby on  rails
+- :transcription: the user would decide when to update
+  :hypothesis: but the user would decide when to app to
+  :insertions: 2
+  :deletions: 0
+  :substitutions: 1
+  :matching: 6
+  :align_cost: 3
+  :transcription_words: 7
+  :aligned_transcription: '*** the user would decide when to  *** UPDATE'
+  :aligned_hypothesis: BUT the user would decide when to  APP TO
+- :transcription: when you define a new class it is not there
+  :hypothesis: when you defining you close it is not there
+  :insertions: 0
+  :deletions: 1
+  :substitutions: 3
+  :matching: 6
+  :align_cost: 4
+  :transcription_words: 10
+  :aligned_transcription: when you DEFINE A        NEW CLASS it  is  not there
+  :aligned_hypothesis: when you ***    DEFINING YOU CLOSE it  is  not there
+- :transcription: so the voice recognition system would learn new classes while you
+    are defining them
+  :hypothesis: so the voice recognition system would learn new classes where you are
+    defining them for
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 1
+  :matching: 13
+  :align_cost: 2
+  :transcription_words: 14
+  :aligned_transcription: so  the voice recognition system would learn new classes
+    WHILE you are defining them ***
+  :aligned_hypothesis: so  the voice recognition system would learn new classes WHERE
+    you are defining them FOR
+- :transcription: and it would build a custom language model based on ctags
+  :hypothesis: and it would build a custom language model based on see Tex
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 1
+  :matching: 10
+  :align_cost: 2
+  :transcription_words: 11
+  :aligned_transcription: and it  would build a   custom language model based on  ***
+    CTAGS
+  :aligned_hypothesis: and it  would build a   custom language model based on  SEE
+    TEX
+- :transcription: so it understands all your classes and methods like words
+  :hypothesis: so it understands Oreo classes and methods like words
+  :insertions: 0
+  :deletions: 1
+  :substitutions: 1
+  :matching: 8
+  :align_cost: 2
+  :transcription_words: 10
+  :aligned_transcription: so  it  understands ALL YOUR classes and methods like words
+  :aligned_hypothesis: so  it  understands *** OREO classes and methods like words
+- :transcription: I think this might be a good idea
+  :hypothesis: I'd think this might be a good idea
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 7
+  :align_cost: 1
+  :transcription_words: 8
+  :aligned_transcription: I   think this might be  a   good idea
+  :aligned_hypothesis: I'D think this might be  a   good idea
+- :transcription: I think this should be much faster
+  :hypothesis: I think the should be much faster
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 6
+  :align_cost: 1
+  :transcription_words: 7
+  :aligned_transcription: i   think THIS should be  much faster
+  :aligned_hypothesis: i   think THE  should be  much faster
+- :transcription: did you see my latest commit
+  :hypothesis: and opted you ca latest committed up
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 5
+  :matching: 1
+  :align_cost: 6
+  :transcription_words: 6
+  :aligned_transcription: '*** DID   you SEE MY     LATEST    COMMIT'
+  :aligned_hypothesis: AND OPTED you CA  LATEST COMMITTED UP
+- :transcription: click on first result
+  :hypothesis: click on first result
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 4
+  :align_cost: 0
+  :transcription_words: 4
+  :aligned_transcription: click on  first result
+  :aligned_hypothesis: click on  first result
+- :transcription: click on second result
+  :hypothesis: click on second free soft
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 1
+  :matching: 3
+  :align_cost: 2
+  :transcription_words: 4
+  :aligned_transcription: click on  second ***  RESULT
+  :aligned_hypothesis: click on  second FREE SOFT
+- :transcription: go to Google
+  :hypothesis: go to Google on
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 0
+  :matching: 3
+  :align_cost: 1
+  :transcription_words: 3
+  :aligned_transcription: go  to  google ***
+  :aligned_hypothesis: go  to  google ON
+- :transcription: validates presence of name
+  :hypothesis: valid dates presence of name
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 1
+  :matching: 3
+  :align_cost: 2
+  :transcription_words: 4
+  :aligned_transcription: '***   VALIDATES presence of  name'
+  :aligned_hypothesis: VALID DATES     presence of  name
+- :transcription: validates uniqueness of name
+  :hypothesis: wedded its uniqueness often name
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 2
+  :matching: 2
+  :align_cost: 3
+  :transcription_words: 4
+  :aligned_transcription: '***    VALIDATES uniqueness OF    name'
+  :aligned_hypothesis: WEDDED ITS       uniqueness OFTEN name
+- :transcription: belongs to language
+  :hypothesis: belongs to language
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 3
+  :align_cost: 0
+  :transcription_words: 3
+  :aligned_transcription: belongs to  language
+  :aligned_hypothesis: belongs to  language
+- :transcription: belongs to user
+  :hypothesis: belongs to user
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 3
+  :align_cost: 0
+  :transcription_words: 3
+  :aligned_transcription: belongs to  user
+  :aligned_hypothesis: belongs to  user
+- :transcription: it should have three actions
+  :hypothesis: it should have three actions
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 5
+  :align_cost: 0
+  :transcription_words: 5
+  :aligned_transcription: it  should have three actions
+  :aligned_hypothesis: it  should have three actions
+- :transcription: I didn't think this would work
+  :hypothesis: I didn't think this would work
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 6
+  :align_cost: 0
+  :transcription_words: 6
+  :aligned_transcription: i   didn't think this would work
+  :aligned_hypothesis: i   didn't think this would work
+- :transcription: I am now testing another recording
+  :hypothesis: I am not testing another recording
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 5
+  :align_cost: 1
+  :transcription_words: 6
+  :aligned_transcription: i   am  NOW testing another recording
+  :aligned_hypothesis: i   am  NOT testing another recording
+- :transcription: hello and welcome
+  :hypothesis: hello and welcome
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 3
+  :align_cost: 0
+  :transcription_words: 3
+  :aligned_transcription: hello and welcome
+  :aligned_hypothesis: hello and welcome
+- :transcription: by the way everything that you have just read was recognized by
+    my software
+  :hypothesis: by the way everything that you have just read was recognized by my
+    software
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 14
+  :align_cost: 0
+  :transcription_words: 14
+  :aligned_transcription: by  the way everything that you have just read was recognized
+    by  my  software
+  :aligned_hypothesis: by  the way everything that you have just read was recognized
+    by  my  software
+- :transcription: with only minor errors in the recognition
+  :hypothesis: with only minor errors in the recognition
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 7
+  :align_cost: 0
+  :transcription_words: 7
+  :aligned_transcription: with only minor errors in  the recognition
+  :aligned_hypothesis: with only minor errors in  the recognition
+- :transcription: please fetch the files from the server
+  :hypothesis: please search the files from the server
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 6
+  :align_cost: 1
+  :transcription_words: 7
+  :aligned_transcription: please FETCH  the files from the server
+  :aligned_hypothesis: please SEARCH the files from the server
+- :transcription: the real challenge is coming up with a good speech representation
+    of ruby
+  :hypothesis: the real challenge is coming up with Blake good speech representation
+    of rube
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 2
+  :matching: 11
+  :align_cost: 2
+  :transcription_words: 13
+  :aligned_transcription: the real challenge is  coming up  with A     good speech
+    representation of  RUBY
+  :aligned_hypothesis: the real challenge is  coming up  with BLAKE good speech representation
+    of  RUBE
+- :transcription: that follows the principle of least surprise
+  :hypothesis: that follows the principle of the surprise
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 6
+  :align_cost: 1
+  :transcription_words: 7
+  :aligned_transcription: that follows the principle of  LEAST surprise
+  :aligned_hypothesis: that follows the principle of  THE   surprise
+- :transcription: and deals appropriately with ambiguous cases
+  :hypothesis: and this appropriately it was ambiguous cases
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 2
+  :matching: 4
+  :align_cost: 3
+  :transcription_words: 6
+  :aligned_transcription: and DEALS appropriately *** WITH ambiguous cases
+  :aligned_hypothesis: and THIS  appropriately IT  WAS  ambiguous cases
+- :transcription: the good thing is that you can get rid of a lot of manual work
+  :hypothesis: the good thing is that you can get rid of a lot of manual work
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 15
+  :align_cost: 0
+  :transcription_words: 15
+  :aligned_transcription: the good thing is  that you can get rid of  a   lot of  manual
+    work
+  :aligned_hypothesis: the good thing is  that you can get rid of  a   lot of  manual
+    work
+- :transcription: for example attribute accessors are nearly always placed at the
+    top of the file
+  :hypothesis: for example attribute excesses are nearly always placed at the top
+    of the file
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 13
+  :align_cost: 1
+  :transcription_words: 14
+  :aligned_transcription: for example attribute ACCESSORS are nearly always placed
+    at  the top of  the file
+  :aligned_hypothesis: for example attribute EXCESSES  are nearly always placed at  the
+    top of  the file
+- :transcription: so when you say something like
+  :hypothesis: so when you say something like
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 6
+  :align_cost: 0
+  :transcription_words: 6
+  :aligned_transcription: so  when you say something like
+  :aligned_hypothesis: so  when you say something like
+- :transcription: attribute accessor file name
+  :hypothesis: attribute access server fine name
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 2
+  :matching: 2
+  :align_cost: 3
+  :transcription_words: 4
+  :aligned_transcription: attribute ***    ACCESSOR FILE name
+  :aligned_hypothesis: attribute ACCESS SERVER   FINE name
+- :transcription: it will automatically put the following line at the top of the file
+    in the right place
+  :hypothesis: it will automatically put the following line at the top of the file
+    in the right place
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 17
+  :align_cost: 0
+  :transcription_words: 17
+  :aligned_transcription: it  will automatically put the following line at  the top
+    of  the file in  the right place
+  :aligned_hypothesis: it  will automatically put the following line at  the top of  the
+    file in  the right place
+- :transcription: so the whole approach works only with one unified style
+  :hypothesis: so the whole approach works only with one unified style
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 10
+  :align_cost: 0
+  :transcription_words: 10
+  :aligned_transcription: so  the whole approach works only with one unified style
+  :aligned_hypothesis: so  the whole approach works only with one unified style
+- :transcription: which is the ruby best practices style which is published on github
+  :hypothesis: which is did ruby best practices style which is published on git help
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 2
+  :matching: 10
+  :align_cost: 3
+  :transcription_words: 12
+  :aligned_transcription: which is  THE ruby best practices style which is  published
+    on  *** GITHUB
+  :aligned_hypothesis: which is  DID ruby best practices style which is  published
+    on  GIT HELP
+- :transcription: I wonder if I should create a custom language model just for programming
+  :hypothesis: I wonder if I should create a custom language will just for programming
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 12
+  :align_cost: 1
+  :transcription_words: 13
+  :aligned_transcription: i   wonder if  i   should create a   custom language MODEL
+    just for programming
+  :aligned_hypothesis: i   wonder if  i   should create a   custom language WILL  just
+    for programming
+- :transcription: or if I should use the normal dictation model and just train it
+    for programming
+  :hypothesis: or if I should use that normal dictation on model and just train it
+    for programming
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 1
+  :matching: 14
+  :align_cost: 2
+  :transcription_words: 15
+  :aligned_transcription: or  if  i   should use THE  normal dictation *** model and
+    just train it  for programming
+  :aligned_hypothesis: or  if  i   should use THAT normal dictation ON  model and
+    just train it  for programming
+- :transcription: the advantage is that it would also recognize normal sentences such
+    as commit messages
+  :hypothesis: the advantage is that it would also recognized normal sentences such
+    as commit messages
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 1
+  :matching: 13
+  :align_cost: 1
+  :transcription_words: 14
+  :aligned_transcription: the advantage is  that it  would also RECOGNIZE  normal
+    sentences such as  commit messages
+  :aligned_hypothesis: the advantage is  that it  would also RECOGNIZED normal sentences
+    such as  commit messages
+- :transcription: while the disadvantage is that it would not work as accurate on
+    programming messages
+  :hypothesis: why the disadvantage is that it would not work as a correct on programming
+    messages
+  :insertions: 1
+  :deletions: 0
+  :substitutions: 2
+  :matching: 12
+  :align_cost: 3
+  :transcription_words: 14
+  :aligned_transcription: WHILE the disadvantage is  that it  would not work as  ***
+    ACCURATE on  programming messages
+  :aligned_hypothesis: WHY   the disadvantage is  that it  would not work as  A   CORRECT  on  programming
+    messages
+- :transcription: another idea would be a hybrid approach
+  :hypothesis: another idea would be a hybrid approach
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 7
+  :align_cost: 0
+  :transcription_words: 7
+  :aligned_transcription: another idea would be  a   hybrid approach
+  :aligned_hypothesis: another idea would be  a   hybrid approach
+- :transcription: that means whenever you are entering a string value or a commit
+    message it would switch automatically to the dictation language model
+  :hypothesis: that means whenever you are entering any string value or a commit message
+    it would switch automatically to the dictation language more
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 2
+  :matching: 20
+  :align_cost: 2
+  :transcription_words: 22
+  :aligned_transcription: that means whenever you are entering A   string value or  a   commit
+    message it  would switch automatically to  the dictation language MODEL
+  :aligned_hypothesis: that means whenever you are entering ANY string value or  a   commit
+    message it  would switch automatically to  the dictation language MORE
+- :transcription: I am not sure if Google's voice recognition is actually that good
+  :hypothesis: I am not sure it's Google's voice recognition is actually dead code
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 3
+  :matching: 9
+  :align_cost: 3
+  :transcription_words: 12
+  :aligned_transcription: i   am  not sure IF   google's voice recognition is  actually
+    THAT GOOD
+  :aligned_hypothesis: i   am  not sure IT'S google's voice recognition is  actually
+    DEAD CODE
+- :transcription: we can wait no problem
+  :hypothesis: we can wait no problem
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 5
+  :align_cost: 0
+  :transcription_words: 5
+  :aligned_transcription: we  can wait no  problem
+  :aligned_hypothesis: we  can wait no  problem
+- :transcription: wow it's really fast
+  :hypothesis: wow it's really fast
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 4
+  :align_cost: 0
+  :transcription_words: 4
+  :aligned_transcription: wow it's really fast
+  :aligned_hypothesis: wow it's really fast
+- :transcription: the URL is different
+  :hypothesis: do you are at it is different
+  :insertions: 3
+  :deletions: 0
+  :substitutions: 2
+  :matching: 2
+  :align_cost: 5
+  :transcription_words: 4
+  :aligned_transcription: '*** *** *** THE URL is  different'
+  :aligned_hypothesis: DO  YOU ARE AT  IT  is  different
+- :transcription: we would basically just need to change that
+  :hypothesis: we would basically just need to change that
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 8
+  :align_cost: 0
+  :transcription_words: 8
+  :aligned_transcription: we  would basically just need to  change that
+  :aligned_hypothesis: we  would basically just need to  change that
+- :transcription: logos are symbols that attempt to visually represent the essence
+    of an organization
+  :hypothesis: logos are symbols that attempt to visually represent the essence of
+    an organization
+  :insertions: 0
+  :deletions: 0
+  :substitutions: 0
+  :matching: 13
+  :align_cost: 0
+  :transcription_words: 13
+  :aligned_transcription: logos are symbols that attempt to  visually represent the
+    essence of  an  organization
+  :aligned_hypothesis: logos are symbols that attempt to  visually represent the essence
+    of  an  organization
+- :transcription: given that the new yahoo logo is a blandly cooperate humourless
+    confused jumble of unappealing elements
+  :hypothesis: given that the new yahoo rule is a plan to cooperate to over less confused
+    jumble of an unappealing elements
+  :insertions: 4
+  :deletions: 0
+  :substitutions: 3
+  :matching: 13
+  :align_cost: 7
+  :transcription_words: 16
+  :aligned_transcription: given that the new yahoo LOGO is  a   ***  BLANDLY cooperate
+    *** ***  HUMOURLESS confused jumble of  *** unappealing elements
+  :aligned_hypothesis: given that the new yahoo RULE is  a   PLAN TO      cooperate
+    TO  OVER LESS       confused jumble of  AN  unappealing elements