raingrams 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/Manifest.txt +10 -10
- data/README.txt +9 -7
- data/Rakefile +3 -6
- data/TODO.txt +6 -0
- data/lib/raingrams/bigram_model.rb +3 -7
- data/lib/raingrams/extensions/object.rb +4 -1
- data/lib/raingrams/extensions/string.rb +3 -0
- data/lib/raingrams/extensions.rb +0 -5
- data/lib/raingrams/hexagram_model.rb +3 -7
- data/lib/raingrams/model.rb +622 -61
- data/lib/raingrams/ngram.rb +50 -9
- data/lib/raingrams/ngram_set.rb +43 -0
- data/lib/raingrams/open_vocabulary/model.rb +12 -0
- data/lib/raingrams/open_vocabulary/open_model.rb +8 -4
- data/lib/raingrams/open_vocabulary.rb +0 -1
- data/lib/raingrams/pentagram_model.rb +3 -7
- data/lib/raingrams/probability_table.rb +153 -0
- data/lib/raingrams/quadgram_model.rb +3 -7
- data/lib/raingrams/raingrams.rb +10 -20
- data/lib/raingrams/tokens/start_sentence.rb +2 -2
- data/lib/raingrams/tokens/stop_sentence.rb +2 -2
- data/lib/raingrams/tokens/token.rb +49 -5
- data/lib/raingrams/tokens/unknown.rb +2 -2
- data/lib/raingrams/tokens.rb +1 -0
- data/lib/raingrams/trigram_model.rb +3 -7
- data/lib/raingrams/version.rb +1 -1
- data/lib/raingrams.rb +1 -1
- data/spec/ngram_set_spec.rb +54 -0
- data/spec/ngram_spec.rb +29 -0
- data/spec/probability_table_spec.rb +94 -0
- data/spec/raingrams_spec.rb +9 -0
- data/spec/spec_helper.rb +5 -0
- data/tasks/spec.rb +7 -0
- metadata +65 -55
- data/lib/raingrams/extensions/class.rb +0 -7
- data/lib/raingrams/extensions/false_class.rb +0 -7
- data/lib/raingrams/extensions/nil_class.rb +0 -7
- data/lib/raingrams/extensions/symbol.rb +0 -7
- data/lib/raingrams/extensions/true_class.rb +0 -7
- data/lib/raingrams/multigram_model.rb +0 -165
- data/lib/raingrams/open_vocabulary/multigram_model.rb +0 -12
- data/lib/raingrams/open_vocabulary/unigram_model.rb +0 -12
- data/lib/raingrams/unigram_model.rb +0 -70
- data/test/test_raingrams.rb +0 -0
| @@ -0,0 +1,94 @@ | |
| 1 | 
            +
            require 'raingrams/probability_table'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'spec_helper'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe ProbabilityTable do
         | 
| 6 | 
            +
              before(:all) do
         | 
| 7 | 
            +
                @grams = [:a, :b, :a, :a, :b, :c, :d, 2, 3, :a]
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                @table = ProbabilityTable.new
         | 
| 10 | 
            +
                @grams.each { |g| @table.count(g) }
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              describe "empty table" do
         | 
| 14 | 
            +
                before(:all) do
         | 
| 15 | 
            +
                  @empty_table = ProbabilityTable.new
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it "should not be dirty" do
         | 
| 19 | 
            +
                  @empty_table.should_not be_dirty
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                it "should be empty" do
         | 
| 23 | 
            +
                  @empty_table.should be_empty
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it "should not have any frequencies" do
         | 
| 27 | 
            +
                  @empty_table.frequencies.should be_empty
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                it "should have no probabilities" do
         | 
| 31 | 
            +
                  @empty_table.probabilities.should be_empty
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                it "should have no grams" do
         | 
| 35 | 
            +
                  @empty_table.grams.should be_empty
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              describe "un-built table" do
         | 
| 40 | 
            +
                it "should be dirty" do
         | 
| 41 | 
            +
                  @table.should be_dirty
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                it "should have the observed grams" do
         | 
| 45 | 
            +
                  (@table.grams - @grams.uniq).should be_empty
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                it "should have non-zero frequencies" do
         | 
| 49 | 
            +
                  @table.frequencies.each_value do |freq|
         | 
| 50 | 
            +
                    freq.should > 0
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                it "should have non-zero frequencies for grams it has observed" do
         | 
| 55 | 
            +
                  @grams.uniq.each do |g|
         | 
| 56 | 
            +
                    @table.frequency_of(g).should > 0
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                it "should return a zero frequency for unknown grams" do
         | 
| 61 | 
            +
                  @table.frequency_of(:x).should == 0
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                it "should not have any probabilities yet" do
         | 
| 65 | 
            +
                  @table.probabilities.should be_empty
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
              end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
              describe "built table" do
         | 
| 70 | 
            +
                before(:all) do
         | 
| 71 | 
            +
                  @table.build
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                it "should not be dirty" do
         | 
| 75 | 
            +
                  @table.should_not be_dirty
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                it "should return a zero probability for unknown grams" do
         | 
| 79 | 
            +
                  @table.probability_of(:x).should == 0.0
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                it "should have non-zero probabilities" do
         | 
| 83 | 
            +
                  @table.probabilities.each_value do |prob|
         | 
| 84 | 
            +
                    prob.should > 0.0
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                it "should have non-zero probabilities for grams it has observed" do
         | 
| 89 | 
            +
                  @grams.uniq.each do |g|
         | 
| 90 | 
            +
                    @table.probability_of(g).should > 0.0
         | 
| 91 | 
            +
                  end
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
              end
         | 
| 94 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    
    
        data/tasks/spec.rb
    ADDED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,51 +1,54 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            -
            rubygems_version: 0.9.4
         | 
| 3 | 
            -
            specification_version: 1
         | 
| 4 2 | 
             
            name: raingrams
         | 
| 5 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 6 | 
            -
              version: 0.0 | 
| 7 | 
            -
            date: 2008-01-09 00:00:00 -08:00
         | 
| 8 | 
            -
            summary: Raingrams is a flexible and general-purpose ngrams library written in Ruby
         | 
| 9 | 
            -
            require_paths: 
         | 
| 10 | 
            -
            - lib
         | 
| 11 | 
            -
            email: postmodern.mod3@gmail.com
         | 
| 12 | 
            -
            homepage: "    by Postmodern Modulus III"
         | 
| 13 | 
            -
            rubyforge_project: raingrams
         | 
| 14 | 
            -
            description: "== FEATURES/PROBLEMS:  * Supports all non-zero ngram sizes. * Supports text and non-text grams. * Supports Open and Closed vocabulary models.  == REQUIREMENTS:  == INSTALL:  $ sudo gem install raingrams"
         | 
| 15 | 
            -
            autorequire: 
         | 
| 16 | 
            -
            default_executable: 
         | 
| 17 | 
            -
            bindir: bin
         | 
| 18 | 
            -
            has_rdoc: true
         | 
| 19 | 
            -
            required_ruby_version: !ruby/object:Gem::Version::Requirement 
         | 
| 20 | 
            -
              requirements: 
         | 
| 21 | 
            -
              - - ">"
         | 
| 22 | 
            -
                - !ruby/object:Gem::Version 
         | 
| 23 | 
            -
                  version: 0.0.0
         | 
| 24 | 
            -
              version: 
         | 
| 4 | 
            +
              version: 0.1.0
         | 
| 25 5 | 
             
            platform: ruby
         | 
| 26 | 
            -
            signing_key: 
         | 
| 27 | 
            -
            cert_chain: 
         | 
| 28 | 
            -
            post_install_message: 
         | 
| 29 6 | 
             
            authors: 
         | 
| 30 7 | 
             
            - Postmodern Modulus III
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            date: 2008-10-06 00:00:00 -07:00
         | 
| 13 | 
            +
            default_executable: 
         | 
| 14 | 
            +
            dependencies: 
         | 
| 15 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 16 | 
            +
              name: hoe
         | 
| 17 | 
            +
              type: :development
         | 
| 18 | 
            +
              version_requirement: 
         | 
| 19 | 
            +
              version_requirements: !ruby/object:Gem::Requirement 
         | 
| 20 | 
            +
                requirements: 
         | 
| 21 | 
            +
                - - ">="
         | 
| 22 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 23 | 
            +
                    version: 1.7.0
         | 
| 24 | 
            +
                version: 
         | 
| 25 | 
            +
            description: Raingrams is a flexible and general-purpose ngrams library written in Ruby. Raingrams supports any non-zero ngram size, text/non-text grams, multiple parsing styles and open/closed vocabulary models.
         | 
| 26 | 
            +
            email: 
         | 
| 27 | 
            +
            - postmodern.mod3@gmail.com
         | 
| 28 | 
            +
            executables: []
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            extensions: []
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            extra_rdoc_files: 
         | 
| 33 | 
            +
            - History.txt
         | 
| 34 | 
            +
            - LICENSE.txt
         | 
| 35 | 
            +
            - Manifest.txt
         | 
| 36 | 
            +
            - README.txt
         | 
| 37 | 
            +
            - TODO.txt
         | 
| 31 38 | 
             
            files: 
         | 
| 32 39 | 
             
            - History.txt
         | 
| 33 40 | 
             
            - LICENSE.txt
         | 
| 34 41 | 
             
            - Manifest.txt
         | 
| 35 42 | 
             
            - README.txt
         | 
| 43 | 
            +
            - TODO.txt
         | 
| 36 44 | 
             
            - Rakefile
         | 
| 37 45 | 
             
            - lib/raingrams.rb
         | 
| 38 46 | 
             
            - lib/raingrams/version.rb
         | 
| 39 47 | 
             
            - lib/raingrams/raingrams.rb
         | 
| 40 48 | 
             
            - lib/raingrams/exceptions/prefix_frequency_missing.rb
         | 
| 41 49 | 
             
            - lib/raingrams/exceptions.rb
         | 
| 42 | 
            -
            - lib/raingrams/extensions/class.rb
         | 
| 43 | 
            -
            - lib/raingrams/extensions/false_class.rb
         | 
| 44 | 
            -
            - lib/raingrams/extensions/nil_class.rb
         | 
| 45 50 | 
             
            - lib/raingrams/extensions/object.rb
         | 
| 46 51 | 
             
            - lib/raingrams/extensions/string.rb
         | 
| 47 | 
            -
            - lib/raingrams/extensions/symbol.rb
         | 
| 48 | 
            -
            - lib/raingrams/extensions/true_class.rb
         | 
| 49 52 | 
             
            - lib/raingrams/extensions.rb
         | 
| 50 53 | 
             
            - lib/raingrams/tokens/token.rb
         | 
| 51 54 | 
             
            - lib/raingrams/tokens/start_sentence.rb
         | 
| @@ -53,47 +56,54 @@ files: | |
| 53 56 | 
             
            - lib/raingrams/tokens/unknown.rb
         | 
| 54 57 | 
             
            - lib/raingrams/tokens.rb
         | 
| 55 58 | 
             
            - lib/raingrams/ngram.rb
         | 
| 59 | 
            +
            - lib/raingrams/ngram_set.rb
         | 
| 60 | 
            +
            - lib/raingrams/probability_table.rb
         | 
| 56 61 | 
             
            - lib/raingrams/model.rb
         | 
| 57 | 
            -
            - lib/raingrams/unigram_model.rb
         | 
| 58 | 
            -
            - lib/raingrams/multigram_model.rb
         | 
| 59 62 | 
             
            - lib/raingrams/bigram_model.rb
         | 
| 60 63 | 
             
            - lib/raingrams/trigram_model.rb
         | 
| 61 64 | 
             
            - lib/raingrams/quadgram_model.rb
         | 
| 62 65 | 
             
            - lib/raingrams/pentagram_model.rb
         | 
| 63 66 | 
             
            - lib/raingrams/hexagram_model.rb
         | 
| 64 67 | 
             
            - lib/raingrams/open_vocabulary/open_model.rb
         | 
| 65 | 
            -
            - lib/raingrams/open_vocabulary/ | 
| 66 | 
            -
            - lib/raingrams/open_vocabulary/multigram_model.rb
         | 
| 68 | 
            +
            - lib/raingrams/open_vocabulary/model.rb
         | 
| 67 69 | 
             
            - lib/raingrams/open_vocabulary/bigram_model.rb
         | 
| 68 70 | 
             
            - lib/raingrams/open_vocabulary/trigram_model.rb
         | 
| 69 71 | 
             
            - lib/raingrams/open_vocabulary/quadgram_model.rb
         | 
| 70 72 | 
             
            - lib/raingrams/open_vocabulary/pentagram_model.rb
         | 
| 71 73 | 
             
            - lib/raingrams/open_vocabulary/hexagram_model.rb
         | 
| 72 74 | 
             
            - lib/raingrams/open_vocabulary.rb
         | 
| 73 | 
            -
            -  | 
| 74 | 
            -
             | 
| 75 | 
            -
            -  | 
| 75 | 
            +
            - tasks/spec.rb
         | 
| 76 | 
            +
            - spec/spec_helper.rb
         | 
| 77 | 
            +
            - spec/ngram_spec.rb
         | 
| 78 | 
            +
            - spec/ngram_set_spec.rb
         | 
| 79 | 
            +
            - spec/probability_table_spec.rb
         | 
| 80 | 
            +
            - spec/raingrams_spec.rb
         | 
| 81 | 
            +
            has_rdoc: true
         | 
| 82 | 
            +
            homepage: http://raingrams.rubyforge.org/
         | 
| 83 | 
            +
            post_install_message: 
         | 
| 76 84 | 
             
            rdoc_options: 
         | 
| 77 85 | 
             
            - --main
         | 
| 78 86 | 
             
            - README.txt
         | 
| 79 | 
            -
             | 
| 80 | 
            -
            -  | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
            -  | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 87 | 
            +
            require_paths: 
         | 
| 88 | 
            +
            - lib
         | 
| 89 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 90 | 
            +
              requirements: 
         | 
| 91 | 
            +
              - - ">="
         | 
| 92 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 93 | 
            +
                  version: "0"
         | 
| 94 | 
            +
              version: 
         | 
| 95 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 96 | 
            +
              requirements: 
         | 
| 97 | 
            +
              - - ">="
         | 
| 98 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 99 | 
            +
                  version: "0"
         | 
| 100 | 
            +
              version: 
         | 
| 88 101 | 
             
            requirements: []
         | 
| 89 102 |  | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
             | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
| 97 | 
            -
                  - !ruby/object:Gem::Version 
         | 
| 98 | 
            -
                    version: 1.4.0
         | 
| 99 | 
            -
                version: 
         | 
| 103 | 
            +
            rubyforge_project: raingrams
         | 
| 104 | 
            +
            rubygems_version: 1.3.0
         | 
| 105 | 
            +
            signing_key: 
         | 
| 106 | 
            +
            specification_version: 2
         | 
| 107 | 
            +
            summary: Raingrams is a flexible and general-purpose ngrams library written in Ruby
         | 
| 108 | 
            +
            test_files: []
         | 
| 109 | 
            +
             | 
| @@ -1,165 +0,0 @@ | |
| 1 | 
            -
            require 'raingrams/model'
         | 
| 2 | 
            -
            require 'raingrams/tokens/start_sentence'
         | 
| 3 | 
            -
            require 'raingrams/tokens/stop_sentence'
         | 
| 4 | 
            -
            require 'raingrams/exceptions/prefix_frequency_missing'
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            module Raingrams
         | 
| 7 | 
            -
              class MultigramModel < Model
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                # Frequencies of n-1 grams
         | 
| 10 | 
            -
                attr_reader :prefix_frequency
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                def initialize(opts={},&block)
         | 
| 13 | 
            -
                  @prefix_frequency = Hash.new { |hash,key| 0 }
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                  super(opts) { |model| model.build(&block) }
         | 
| 16 | 
            -
                end
         | 
| 17 | 
            -
             | 
| 18 | 
            -
                def ngrams_from_words(words)
         | 
| 19 | 
            -
                  return (0...(words.length-@ngram_size+1)).map do |index|
         | 
| 20 | 
            -
                    Ngram.new(words[index,@ngram_size])
         | 
| 21 | 
            -
                  end
         | 
| 22 | 
            -
                end
         | 
| 23 | 
            -
             | 
| 24 | 
            -
                def ngrams_from_fragment(fragment)
         | 
| 25 | 
            -
                  ngrams_from_words(parse_sentence(fragment))
         | 
| 26 | 
            -
                end
         | 
| 27 | 
            -
             | 
| 28 | 
            -
                def ngrams_from_sentence(sentence)
         | 
| 29 | 
            -
                  ngrams_from_words(wrap_sentence(parse_sentence(sentence)))
         | 
| 30 | 
            -
                end
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                def ngrams_from_text(text)
         | 
| 33 | 
            -
                  parse_text(text).inject([]) do |ngrams,sentence|
         | 
| 34 | 
            -
                    ngrams + ngrams_from_sentence(sentence)
         | 
| 35 | 
            -
                  end
         | 
| 36 | 
            -
                end
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                def common_ngrams_from_words(words)
         | 
| 39 | 
            -
                  ngrams_from_words(words).select { |ngram| has_ngram?(ngram) }
         | 
| 40 | 
            -
                end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
                def common_ngrams_from_fragment(fragment)
         | 
| 43 | 
            -
                  ngrams_from_fragment(words).select { |ngram| has_ngram?(ngram) }
         | 
| 44 | 
            -
                end
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                def common_ngrams_from_sentence(sentence)
         | 
| 47 | 
            -
                  ngrams_from_sentence(sentence).select { |ngram| has_ngram?(ngram) }
         | 
| 48 | 
            -
                end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                def common_ngrams_from_text(text)
         | 
| 51 | 
            -
                  ngrams_from_text(text).select { |ngram| has_ngram?(ngram) }
         | 
| 52 | 
            -
                end
         | 
| 53 | 
            -
             | 
| 54 | 
            -
                def train_with_ngram(ngram)
         | 
| 55 | 
            -
                  @prefix_frequency[ngram.prefix] += 1
         | 
| 56 | 
            -
                  return super(ngram)
         | 
| 57 | 
            -
                end
         | 
| 58 | 
            -
             | 
| 59 | 
            -
                def train_with_sentence(sentence)
         | 
| 60 | 
            -
                  train_with_ngrams(ngrams_from_sentence(sentence))
         | 
| 61 | 
            -
                end
         | 
| 62 | 
            -
             | 
| 63 | 
            -
                def train_with_text(text)
         | 
| 64 | 
            -
                  train_with_ngrams(ngrams_from_text(text))
         | 
| 65 | 
            -
                end
         | 
| 66 | 
            -
             | 
| 67 | 
            -
                def build(&block)
         | 
| 68 | 
            -
                  clear_probabilities
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                  block.call(self) if block
         | 
| 71 | 
            -
             | 
| 72 | 
            -
                  @frequency.each do |ngram,count|
         | 
| 73 | 
            -
                    prefix = ngram.prefix
         | 
| 74 | 
            -
             | 
| 75 | 
            -
                    unless @prefix_frequency[prefix]
         | 
| 76 | 
            -
                      raise(PrefixFrequencyMissing,"the model is missing the frequency of the ngram prefix #{prefix}",caller)
         | 
| 77 | 
            -
                    end
         | 
| 78 | 
            -
             | 
| 79 | 
            -
                    @probability[ngram] = count.to_f / @prefix_frequency[prefix].to_f
         | 
| 80 | 
            -
                  end
         | 
| 81 | 
            -
             | 
| 82 | 
            -
                  return self
         | 
| 83 | 
            -
                end
         | 
| 84 | 
            -
             | 
| 85 | 
            -
                def ngrams_prefixed_by(prefix)
         | 
| 86 | 
            -
                  ngrams_with { |ngram| ngram.prefixed_by?(prefix) }
         | 
| 87 | 
            -
                end
         | 
| 88 | 
            -
             | 
| 89 | 
            -
                def ngrams_postfixed_by(postfix)
         | 
| 90 | 
            -
                  ngrams_with { |ngram| ngram.prefixed_by?(postfix) }
         | 
| 91 | 
            -
                end
         | 
| 92 | 
            -
             | 
| 93 | 
            -
                def ngrams_preceeding(gram)
         | 
| 94 | 
            -
                  ngrams_ending_with(gram).map do |ngram|
         | 
| 95 | 
            -
                    ngrams_postfixed_by(ngram.prefix)
         | 
| 96 | 
            -
                  end
         | 
| 97 | 
            -
                end
         | 
| 98 | 
            -
             | 
| 99 | 
            -
                def ngrams_following(gram)
         | 
| 100 | 
            -
                  ngrams_starting_with(gram).map do |ngram|
         | 
| 101 | 
            -
                    ngrams_prefixed_by(ngram.postfix)
         | 
| 102 | 
            -
                  end
         | 
| 103 | 
            -
                end
         | 
| 104 | 
            -
             | 
| 105 | 
            -
                def grams_preceeding(gram)
         | 
| 106 | 
            -
                  ngrams_ending_with(gram).map do |ngram|
         | 
| 107 | 
            -
                    ngram[-2]
         | 
| 108 | 
            -
                  end
         | 
| 109 | 
            -
                end
         | 
| 110 | 
            -
             | 
| 111 | 
            -
                def grams_following(gram)
         | 
| 112 | 
            -
                  ngrams_starting_with(gram).map do |ngram|
         | 
| 113 | 
            -
                    ngram[1]
         | 
| 114 | 
            -
                  end
         | 
| 115 | 
            -
                end
         | 
| 116 | 
            -
             | 
| 117 | 
            -
                def fragment_probability(fragment)
         | 
| 118 | 
            -
                  probability_of_ngrams(ngrams_from_fragment(fragment))
         | 
| 119 | 
            -
                end
         | 
| 120 | 
            -
             | 
| 121 | 
            -
                def sentence_probability(sentence)
         | 
| 122 | 
            -
                  probability_of_ngrams(ngrams_from_sentence(sentence))
         | 
| 123 | 
            -
                end
         | 
| 124 | 
            -
             | 
| 125 | 
            -
                def text_probability(text)
         | 
| 126 | 
            -
                  probability_of_ngrams(ngrams_from_text(text))
         | 
| 127 | 
            -
                end
         | 
| 128 | 
            -
             | 
| 129 | 
            -
                def common_fragment_probability(fragment)
         | 
| 130 | 
            -
                  probability_of_ngrams(common_ngrams_from_fragment(fragment))
         | 
| 131 | 
            -
                end
         | 
| 132 | 
            -
             | 
| 133 | 
            -
                def common_sentence_probability(sentence)
         | 
| 134 | 
            -
                  probability_of_ngrams(common_ngrams_from_sentence(sentence))
         | 
| 135 | 
            -
                end
         | 
| 136 | 
            -
             | 
| 137 | 
            -
                def common_text_probability(fragment)
         | 
| 138 | 
            -
                  probability_of_ngrams(common_ngrams_from_text(text))
         | 
| 139 | 
            -
                end
         | 
| 140 | 
            -
             | 
| 141 | 
            -
                def similar_fragment_probability(other,fragment)
         | 
| 142 | 
            -
                  common_fragment_probability(fragment) * other.common_fragment_probability(fragment)
         | 
| 143 | 
            -
                end
         | 
| 144 | 
            -
             | 
| 145 | 
            -
                def similar_sentence_probability(other,sentence)
         | 
| 146 | 
            -
                  common_sentence_probability(sentence) * other.common_sentence_probability(sentence)
         | 
| 147 | 
            -
                end
         | 
| 148 | 
            -
             | 
| 149 | 
            -
                def similar_text_probability(other,text)
         | 
| 150 | 
            -
                  common_text_probability(text) * other.common_text_probability(text)
         | 
| 151 | 
            -
                end
         | 
| 152 | 
            -
             | 
| 153 | 
            -
                def clear
         | 
| 154 | 
            -
                  @prefix_frequency.clear
         | 
| 155 | 
            -
                  return super
         | 
| 156 | 
            -
                end
         | 
| 157 | 
            -
             | 
| 158 | 
            -
                protected
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                def wrap_sentence(sentence)
         | 
| 161 | 
            -
                  (Tokens::StartSentence * @ngram_size) + sentence.to_a + (Tokens::StopSentence * @ngram_size)
         | 
| 162 | 
            -
                end
         | 
| 163 | 
            -
             | 
| 164 | 
            -
              end
         | 
| 165 | 
            -
            end
         | 
| @@ -1,70 +0,0 @@ | |
| 1 | 
            -
            require 'raingrams/model'
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            module Raingrams
         | 
| 4 | 
            -
              class UnigramModel < Model
         | 
| 5 | 
            -
             | 
| 6 | 
            -
                def initialize(opts={},&block)
         | 
| 7 | 
            -
                  opts[:ngram_size] = 1
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                  super(opts) { |model| model.build(&block) }
         | 
| 10 | 
            -
                end
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                def ngrams_from_words(words)
         | 
| 13 | 
            -
                  words.map { |word| Ngram[word] }
         | 
| 14 | 
            -
                end
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                def ngrams_from_fragment(fragment)
         | 
| 17 | 
            -
                  ngrams_from_words(parse_sentence(fragment))
         | 
| 18 | 
            -
                end
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                def ngrams_from_sentence(sentence)
         | 
| 21 | 
            -
                  ngrams_from_fragment(sentence)
         | 
| 22 | 
            -
                end
         | 
| 23 | 
            -
             | 
| 24 | 
            -
                def ngrams_from_text(text)
         | 
| 25 | 
            -
                  parse_text(text).inject([]) do |ngrams,sentence|
         | 
| 26 | 
            -
                    ngrams + ngrams_from_sentence(sentence)
         | 
| 27 | 
            -
                  end
         | 
| 28 | 
            -
                end
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                def train_with_sentence(sentence)
         | 
| 31 | 
            -
                  train_with_ngrams(ngrams_from_sentence(sentence))
         | 
| 32 | 
            -
                end
         | 
| 33 | 
            -
             | 
| 34 | 
            -
                def train_with_text(text)
         | 
| 35 | 
            -
                  train_with_ngrams(ngrams_from_text(text))
         | 
| 36 | 
            -
                end
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                def gram_count
         | 
| 39 | 
            -
                  @frequency.values.inject do |sum,count|
         | 
| 40 | 
            -
                    sum + count
         | 
| 41 | 
            -
                  end
         | 
| 42 | 
            -
                end
         | 
| 43 | 
            -
             | 
| 44 | 
            -
                def build(&block)
         | 
| 45 | 
            -
                  clear_probabilities
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                  block.call(self) if block
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                  total_count = gram_count.to_f
         | 
| 50 | 
            -
                  @frequency.each do |ngram,count|
         | 
| 51 | 
            -
                    @probability[ngram] = count.to_f / total_count
         | 
| 52 | 
            -
                  end
         | 
| 53 | 
            -
             | 
| 54 | 
            -
                  return self
         | 
| 55 | 
            -
                end
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                def fragment_probability(fragment)
         | 
| 58 | 
            -
                  probability_of_ngrams(ngrams_from_fragment(fragment))
         | 
| 59 | 
            -
                end
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                def sentence_probability(sentence)
         | 
| 62 | 
            -
                  probability_of_ngrams(ngrams_from_sentence(sentence))
         | 
| 63 | 
            -
                end
         | 
| 64 | 
            -
             | 
| 65 | 
            -
                def text_probability(text)
         | 
| 66 | 
            -
                  probability_of_ngrams(ngrams_from_text(text))
         | 
| 67 | 
            -
                end
         | 
| 68 | 
            -
             | 
| 69 | 
            -
              end
         | 
| 70 | 
            -
            end
         | 
    
        data/test/test_raingrams.rb
    DELETED
    
    | 
            File without changes
         |