raingrams 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/Manifest.txt +10 -10
- data/README.txt +9 -7
- data/Rakefile +3 -6
- data/TODO.txt +6 -0
- data/lib/raingrams/bigram_model.rb +3 -7
- data/lib/raingrams/extensions/object.rb +4 -1
- data/lib/raingrams/extensions/string.rb +3 -0
- data/lib/raingrams/extensions.rb +0 -5
- data/lib/raingrams/hexagram_model.rb +3 -7
- data/lib/raingrams/model.rb +622 -61
- data/lib/raingrams/ngram.rb +50 -9
- data/lib/raingrams/ngram_set.rb +43 -0
- data/lib/raingrams/open_vocabulary/model.rb +12 -0
- data/lib/raingrams/open_vocabulary/open_model.rb +8 -4
- data/lib/raingrams/open_vocabulary.rb +0 -1
- data/lib/raingrams/pentagram_model.rb +3 -7
- data/lib/raingrams/probability_table.rb +153 -0
- data/lib/raingrams/quadgram_model.rb +3 -7
- data/lib/raingrams/raingrams.rb +10 -20
- data/lib/raingrams/tokens/start_sentence.rb +2 -2
- data/lib/raingrams/tokens/stop_sentence.rb +2 -2
- data/lib/raingrams/tokens/token.rb +49 -5
- data/lib/raingrams/tokens/unknown.rb +2 -2
- data/lib/raingrams/tokens.rb +1 -0
- data/lib/raingrams/trigram_model.rb +3 -7
- data/lib/raingrams/version.rb +1 -1
- data/lib/raingrams.rb +1 -1
- data/spec/ngram_set_spec.rb +54 -0
- data/spec/ngram_spec.rb +29 -0
- data/spec/probability_table_spec.rb +94 -0
- data/spec/raingrams_spec.rb +9 -0
- data/spec/spec_helper.rb +5 -0
- data/tasks/spec.rb +7 -0
- metadata +65 -55
- data/lib/raingrams/extensions/class.rb +0 -7
- data/lib/raingrams/extensions/false_class.rb +0 -7
- data/lib/raingrams/extensions/nil_class.rb +0 -7
- data/lib/raingrams/extensions/symbol.rb +0 -7
- data/lib/raingrams/extensions/true_class.rb +0 -7
- data/lib/raingrams/multigram_model.rb +0 -165
- data/lib/raingrams/open_vocabulary/multigram_model.rb +0 -12
- data/lib/raingrams/open_vocabulary/unigram_model.rb +0 -12
- data/lib/raingrams/unigram_model.rb +0 -70
- data/test/test_raingrams.rb +0 -0
    
        data/lib/raingrams/ngram.rb
    CHANGED
    
    | @@ -1,20 +1,53 @@ | |
| 1 | 
            +
            require 'raingrams/extensions'
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module Raingrams
         | 
| 2 4 | 
             
              class Ngram < Array
         | 
| 3 5 |  | 
| 4 | 
            -
                 | 
| 5 | 
            -
             | 
| 6 | 
            +
                #
         | 
| 7 | 
            +
                # Creates a new Ngram object with the specified _objects_.
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                def initialize(objects)
         | 
| 10 | 
            +
                  super(objects.map { |obj| obj.to_gram })
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                #
         | 
| 14 | 
            +
                # Creates a new Ngram object from the specified _objects_.
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                def self.[](*objects)
         | 
| 17 | 
            +
                  self.new(objects)
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                #
         | 
| 21 | 
            +
                # Creates a new Ngram object by appending the specified _grams_ to the
         | 
| 22 | 
            +
                # ngram.
         | 
| 23 | 
            +
                #
         | 
| 24 | 
            +
                def +(grams)
         | 
| 25 | 
            +
                  if grams.kind_of?(Array)
         | 
| 26 | 
            +
                    return self.class.new(super(grams.map { |gram|
         | 
| 27 | 
            +
                      gram.to_gram
         | 
| 28 | 
            +
                    }))
         | 
| 29 | 
            +
                  else
         | 
| 30 | 
            +
                    return self.class.new(super([grams.to_gram]))
         | 
| 31 | 
            +
                  end
         | 
| 6 32 | 
             
                end
         | 
| 7 33 |  | 
| 8 | 
            -
                def  | 
| 9 | 
            -
                   | 
| 34 | 
            +
                def <<(gram)
         | 
| 35 | 
            +
                  super(gram.to_gram)
         | 
| 10 36 | 
             
                end
         | 
| 11 37 |  | 
| 38 | 
            +
                #
         | 
| 39 | 
            +
                # Returns the prefix of the ngram.
         | 
| 40 | 
            +
                #
         | 
| 12 41 | 
             
                def prefix
         | 
| 13 42 | 
             
                  self[0...length-1]
         | 
| 14 43 | 
             
                end
         | 
| 15 44 |  | 
| 16 | 
            -
                 | 
| 17 | 
            -
             | 
| 45 | 
            +
                #
         | 
| 46 | 
            +
                # Returns +true+ if the ngram is prefixed by the specified
         | 
| 47 | 
            +
                # _smaller_ngram_.
         | 
| 48 | 
            +
                #
         | 
| 49 | 
            +
                def prefixed_by?(smaller_ngram)
         | 
| 50 | 
            +
                  prefix == smaller_ngram
         | 
| 18 51 | 
             
                end
         | 
| 19 52 |  | 
| 20 53 | 
             
                def postfix
         | 
| @@ -22,21 +55,25 @@ module Raingrams | |
| 22 55 | 
             
                end
         | 
| 23 56 |  | 
| 24 57 | 
             
                def postfixed_by?(ngram)
         | 
| 25 | 
            -
                  postfix==ngram
         | 
| 58 | 
            +
                  postfix == ngram
         | 
| 26 59 | 
             
                end
         | 
| 27 60 |  | 
| 28 61 | 
             
                def starts_with?(obj)
         | 
| 29 | 
            -
                  self | 
| 62 | 
            +
                  self.first == obj.to_gram
         | 
| 30 63 | 
             
                end
         | 
| 31 64 |  | 
| 32 65 | 
             
                def ends_with?(obj)
         | 
| 33 | 
            -
                  self | 
| 66 | 
            +
                  self.last == obj.to_gram
         | 
| 34 67 | 
             
                end
         | 
| 35 68 |  | 
| 36 69 | 
             
                def include?(obj)
         | 
| 37 70 | 
             
                  super(obj.to_gram)
         | 
| 38 71 | 
             
                end
         | 
| 39 72 |  | 
| 73 | 
            +
                def includes?(*grams)
         | 
| 74 | 
            +
                  (self & grams) == grams
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
             | 
| 40 77 | 
             
                def flatten
         | 
| 41 78 | 
             
                  self.dup
         | 
| 42 79 | 
             
                end
         | 
| @@ -49,5 +86,9 @@ module Raingrams | |
| 49 86 | 
             
                  join(', ')
         | 
| 50 87 | 
             
                end
         | 
| 51 88 |  | 
| 89 | 
            +
                def inspect
         | 
| 90 | 
            +
                  'Ngram[' + self.map { |gram| gram.inspect }.join(', ') + ']'
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 52 93 | 
             
              end
         | 
| 53 94 | 
             
            end
         | 
| @@ -0,0 +1,43 @@ | |
| 1 | 
            +
            require 'raingrams/ngram'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'set'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Raingrams
         | 
| 6 | 
            +
              class NgramSet < Set
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def select(&block)
         | 
| 9 | 
            +
                  selected_ngrams = self.class.new
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  each do |ngram|
         | 
| 12 | 
            +
                    selected_ngrams << ngram if block.call(ngram)
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  return selected_ngrams
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def prefixed_by(prefix)
         | 
| 19 | 
            +
                  select { |ngram| ngram.prefixed_by?(prefix) }
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def postfixed_by(postfix)
         | 
| 23 | 
            +
                  select { |ngram| ngram.postfixed_by?(postfix) }
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def starts_with(gram)
         | 
| 27 | 
            +
                  select { |ngram| ngram.starts_with?(gram) }
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def ends_with(gram)
         | 
| 31 | 
            +
                  select { |ngram| ngram.ends_with?(gram) }
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                def including(gram)
         | 
| 35 | 
            +
                  select { |ngram| ngram.include?(gram) }
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def includes(*grams)
         | 
| 39 | 
            +
                  select { |ngram| ngram.includes?(*grams) }
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
            end
         | 
| @@ -7,14 +7,18 @@ module Raingrams | |
| 7 7 | 
             
                  # The fixed lexicon of this model
         | 
| 8 8 | 
             
                  attr_reader :lexicon
         | 
| 9 9 |  | 
| 10 | 
            -
                  def initialize( | 
| 11 | 
            -
                    @lexicon =  | 
| 10 | 
            +
                  def initialize(options={},&block)
         | 
| 11 | 
            +
                    @lexicon = (options[:lexicon] || [])
         | 
| 12 12 |  | 
| 13 | 
            -
                     | 
| 13 | 
            +
                    @lexicon.map! do |word|
         | 
| 14 | 
            +
                      word.to_gram
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    super(options,&block)
         | 
| 14 18 | 
             
                  end
         | 
| 15 19 |  | 
| 16 20 | 
             
                  def within_lexicon?(gram)
         | 
| 17 | 
            -
                    @lexicon.include?(gram)
         | 
| 21 | 
            +
                    @lexicon.include?(gram.to_gram)
         | 
| 18 22 | 
             
                  end
         | 
| 19 23 |  | 
| 20 24 | 
             
                  def train_ngram(ngram)
         | 
| @@ -1,13 +1,9 @@ | |
| 1 | 
            -
            require 'raingrams/ | 
| 1 | 
            +
            require 'raingrams/model'
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Raingrams
         | 
| 4 | 
            -
              class PentagramModel <  | 
| 4 | 
            +
              class PentagramModel < Model
         | 
| 5 5 |  | 
| 6 | 
            -
                 | 
| 7 | 
            -
                  opts[:ngram_size] = 5
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                  super(opts,&block)
         | 
| 10 | 
            -
                end
         | 
| 6 | 
            +
                ngram_size 5
         | 
| 11 7 |  | 
| 12 8 | 
             
              end
         | 
| 13 9 | 
             
            end
         | 
| @@ -0,0 +1,153 @@ | |
| 1 | 
            +
            module Raingrams
         | 
| 2 | 
            +
              class ProbabilityTable
         | 
| 3 | 
            +
             | 
| 4 | 
            +
                # Indicates wether the table needs to be rebuilt
         | 
| 5 | 
            +
                attr_reader :dirty
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                # Frequencies of grams
         | 
| 8 | 
            +
                attr_reader :frequencies
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                # Probabilities of grams
         | 
| 11 | 
            +
                attr_reader :probabilities
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                #
         | 
| 14 | 
            +
                # Creates a new empty ProbabilityTable object.
         | 
| 15 | 
            +
                #
         | 
| 16 | 
            +
                def initialize
         | 
| 17 | 
            +
                  @dirty = false
         | 
| 18 | 
            +
                  @total = 0
         | 
| 19 | 
            +
                  @frequencies = {}
         | 
| 20 | 
            +
                  @probabilities = {}
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                #
         | 
| 24 | 
            +
                # Returns +true+ if the probability table is dirty and needs to be
         | 
| 25 | 
            +
                # rebuilt, returns +false+ otherwise.
         | 
| 26 | 
            +
                #
         | 
| 27 | 
            +
                def dirty?
         | 
| 28 | 
            +
                  @dirty == true
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                #
         | 
| 32 | 
            +
                # Returns +true+ if the probability table contains the specified _gram_,
         | 
| 33 | 
            +
                # returns +false+ otherwise.
         | 
| 34 | 
            +
                #
         | 
| 35 | 
            +
                def has_gram?(gram)
         | 
| 36 | 
            +
                  @frequencies.has_key?(gram)
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                #
         | 
| 40 | 
            +
                # Returns the grams within the probability table.
         | 
| 41 | 
            +
                #
         | 
| 42 | 
            +
                def grams
         | 
| 43 | 
            +
                  @frequencies.keys
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                #
         | 
| 47 | 
            +
                # Iterates over each gram in the probability table, passing each to the
         | 
| 48 | 
            +
                # given _block_.
         | 
| 49 | 
            +
                #
         | 
| 50 | 
            +
                def each_gram(&block)
         | 
| 51 | 
            +
                  @frequencies.each_key(&block)
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                #
         | 
| 55 | 
            +
                # Returns the frequency of the specified _gram_. Returns +0+ by default.
         | 
| 56 | 
            +
                #
         | 
| 57 | 
            +
                def frequency_of(gram)
         | 
| 58 | 
            +
                  @frequencies[gram] || 0
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                #
         | 
| 62 | 
            +
                # Returns the probability of the specified _gram_ occurring. Returns
         | 
| 63 | 
            +
                # <tt>0.0</tt> by default.
         | 
| 64 | 
            +
                #
         | 
| 65 | 
            +
                def probability_of(gram)
         | 
| 66 | 
            +
                  @probabilities[gram] || 0.0
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                alias [] probability_of
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                #
         | 
| 72 | 
            +
                # Sets the frequency of the specified _gram_ to the specified _value_.
         | 
| 73 | 
            +
                #
         | 
| 74 | 
            +
                def set_count(gram,value)
         | 
| 75 | 
            +
                  @dirty = true
         | 
| 76 | 
            +
                  @frequencies[gram] = value
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                #
         | 
| 80 | 
            +
                # Increments the frequency of the specified _gram_ and marks the
         | 
| 81 | 
            +
                # probability table as dirty.
         | 
| 82 | 
            +
                #
         | 
| 83 | 
            +
                def count(gram)
         | 
| 84 | 
            +
                  @dirty = true
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                  unless @frequencies.has_key?(gram)
         | 
| 87 | 
            +
                    @frequencies[gram] = 0
         | 
| 88 | 
            +
                  end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  return @frequencies[gram] += 1
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                #
         | 
| 94 | 
            +
                # Calculates the total via the summation of the frequencies. Also
         | 
| 95 | 
            +
                # marks the probability table as dirty.
         | 
| 96 | 
            +
                #
         | 
| 97 | 
            +
                def total
         | 
| 98 | 
            +
                  if @dirty
         | 
| 99 | 
            +
                    @total = @frequencies.values.inject do |sum,freq|
         | 
| 100 | 
            +
                      sum + freq
         | 
| 101 | 
            +
                    end
         | 
| 102 | 
            +
                  end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                  return @total
         | 
| 105 | 
            +
                end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                #
         | 
| 108 | 
            +
                # Builds the probability table using the recorded frequencies, if the
         | 
| 109 | 
            +
                # table is marked as dirty.
         | 
| 110 | 
            +
                #
         | 
| 111 | 
            +
                def build
         | 
| 112 | 
            +
                  if @dirty
         | 
| 113 | 
            +
                    current_total = total.to_f
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                    @frequencies.each do |gram,count|
         | 
| 116 | 
            +
                      @probabilities[gram] = count.to_f / current_total
         | 
| 117 | 
            +
                    end
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                    @dirty = false
         | 
| 120 | 
            +
                  end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                  return self
         | 
| 123 | 
            +
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # Returns +true+ if the probability table is empty, returns +false+
         | 
| 127 | 
            +
                # otherwise.
         | 
| 128 | 
            +
                #
         | 
| 129 | 
            +
                def empty?
         | 
| 130 | 
            +
                  @total == 0
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                #
         | 
| 134 | 
            +
                # Clears the probability table.
         | 
| 135 | 
            +
                #
         | 
| 136 | 
            +
                def clear
         | 
| 137 | 
            +
                  @total = 0
         | 
| 138 | 
            +
                  @frequencies.clear
         | 
| 139 | 
            +
                  @probabilities.clear
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                  return self
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def inspect
         | 
| 145 | 
            +
                  if @dirty
         | 
| 146 | 
            +
                    "#<ProbabilityTable @total=#{@total} @frequencies=#{@frequencies.inspect}>"
         | 
| 147 | 
            +
                  else
         | 
| 148 | 
            +
                    @probabilities.inspect
         | 
| 149 | 
            +
                  end
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
              end
         | 
| 153 | 
            +
            end
         | 
| @@ -1,13 +1,9 @@ | |
| 1 | 
            -
            require 'raingrams/ | 
| 1 | 
            +
            require 'raingrams/model'
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Raingrams
         | 
| 4 | 
            -
              class QuadgramModel <  | 
| 4 | 
            +
              class QuadgramModel < Model
         | 
| 5 5 |  | 
| 6 | 
            -
                 | 
| 7 | 
            -
                  opts[:ngram_size] = 4
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                  super(opts,&block)
         | 
| 10 | 
            -
                end
         | 
| 6 | 
            +
                ngram_size 4
         | 
| 11 7 |  | 
| 12 8 | 
             
              end
         | 
| 13 9 | 
             
            end
         | 
    
        data/lib/raingrams/raingrams.rb
    CHANGED
    
    | @@ -1,31 +1,21 @@ | |
| 1 | 
            -
            require 'raingrams/ | 
| 2 | 
            -
            require 'raingrams/ | 
| 3 | 
            -
            require 'raingrams/open_vocabulary/unigram_model'
         | 
| 4 | 
            -
            require 'raingrams/open_vocabulary/multigram_model'
         | 
| 1 | 
            +
            require 'raingrams/model'
         | 
| 2 | 
            +
            require 'raingrams/open_vocabulary/model'
         | 
| 5 3 |  | 
| 6 4 | 
             
            module Raingrams
         | 
| 7 | 
            -
              def Raingrams.closed_vocabulary_model( | 
| 8 | 
            -
                 | 
| 9 | 
            -
                  return UnigramModel.new(opts,&block)
         | 
| 10 | 
            -
                else
         | 
| 11 | 
            -
                  return MultigramModel.new(opts,&block)
         | 
| 12 | 
            -
                end
         | 
| 5 | 
            +
              def Raingrams.closed_vocabulary_model(options={},&block)
         | 
| 6 | 
            +
                Model.new(options,&block)
         | 
| 13 7 | 
             
              end
         | 
| 14 8 |  | 
| 15 | 
            -
              def Raingrams.open_vocabulary_model( | 
| 16 | 
            -
                 | 
| 17 | 
            -
                  return OpenVocabulary::UnigramModel.new(opts,&block)
         | 
| 18 | 
            -
                else
         | 
| 19 | 
            -
                  return OpenVocabulary::MultigramModel.new(opts,&block)
         | 
| 20 | 
            -
                end
         | 
| 9 | 
            +
              def Raingrams.open_vocabulary_model(options={},&block)
         | 
| 10 | 
            +
                OpenVocabulary::Model.new(options,&block)
         | 
| 21 11 | 
             
              end
         | 
| 22 12 |  | 
| 23 | 
            -
              def Raingrams.model( | 
| 24 | 
            -
                case  | 
| 13 | 
            +
              def Raingrams.model(options={},&block)
         | 
| 14 | 
            +
                case options[:vocabulary]
         | 
| 25 15 | 
             
                when :open, 'open'
         | 
| 26 | 
            -
                  return Raingrams.open_vocabulary_model( | 
| 16 | 
            +
                  return Raingrams.open_vocabulary_model(options,&block)
         | 
| 27 17 | 
             
                else
         | 
| 28 | 
            -
                  return Raingrams.closed_vocabulary_model( | 
| 18 | 
            +
                  return Raingrams.closed_vocabulary_model(options,&block)
         | 
| 29 19 | 
             
                end
         | 
| 30 20 | 
             
              end
         | 
| 31 21 | 
             
            end
         | 
| @@ -2,16 +2,60 @@ module Raingrams | |
| 2 2 | 
             
              module Tokens
         | 
| 3 3 | 
             
                class Token
         | 
| 4 4 |  | 
| 5 | 
            -
                   | 
| 5 | 
            +
                  # Gram form of the token
         | 
| 6 | 
            +
                  attr_reader :gram
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  #
         | 
| 9 | 
            +
                  # Creates a new Token object with the specified _gram_.
         | 
| 10 | 
            +
                  #
         | 
| 11 | 
            +
                  def initialize(gram)
         | 
| 12 | 
            +
                    @gram = gram
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def to_gram
         | 
| 16 | 
            +
                    self
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  #
         | 
| 20 | 
            +
                  # Creates an Array of the specified _length_ containing the token.
         | 
| 21 | 
            +
                  #
         | 
| 22 | 
            +
                  def *(length)
         | 
| 6 23 | 
             
                    [self] * length
         | 
| 7 24 | 
             
                  end
         | 
| 8 25 |  | 
| 9 | 
            -
                   | 
| 10 | 
            -
             | 
| 26 | 
            +
                  #
         | 
| 27 | 
            +
                  # Returns +true+ if the token has the same gram as the _other_ token,
         | 
| 28 | 
            +
                  # returns +false+ otherwise.
         | 
| 29 | 
            +
                  #
         | 
| 30 | 
            +
                  def eql?(other)
         | 
| 31 | 
            +
                    if other.kind_of?(Token)
         | 
| 32 | 
            +
                      return (@gram == other.gram)
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    return false
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  alias == eql?
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  #
         | 
| 41 | 
            +
                  # Returns the String form of the token.
         | 
| 42 | 
            +
                  #
         | 
| 43 | 
            +
                  def to_s
         | 
| 44 | 
            +
                    @gram.to_s
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  #
         | 
| 48 | 
            +
                  # Returns the Symbol form of the token.
         | 
| 49 | 
            +
                  #
         | 
| 50 | 
            +
                  def to_sym
         | 
| 51 | 
            +
                    @gram.to_sym
         | 
| 11 52 | 
             
                  end
         | 
| 12 53 |  | 
| 13 | 
            -
                   | 
| 14 | 
            -
             | 
| 54 | 
            +
                  #
         | 
| 55 | 
            +
                  # Returns the String form of the token.
         | 
| 56 | 
            +
                  #
         | 
| 57 | 
            +
                  def inspect
         | 
| 58 | 
            +
                    @gram.to_s
         | 
| 15 59 | 
             
                  end
         | 
| 16 60 |  | 
| 17 61 | 
             
                end
         | 
    
        data/lib/raingrams/tokens.rb
    CHANGED
    
    
| @@ -1,13 +1,9 @@ | |
| 1 | 
            -
            require 'raingrams/ | 
| 1 | 
            +
            require 'raingrams/model'
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Raingrams
         | 
| 4 | 
            -
              class TrigramModel <  | 
| 4 | 
            +
              class TrigramModel < Model
         | 
| 5 5 |  | 
| 6 | 
            -
                 | 
| 7 | 
            -
                  opts[:ngram_size] = 3
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                  super(opts,&block)
         | 
| 10 | 
            -
                end
         | 
| 6 | 
            +
                ngram_size 3
         | 
| 11 7 |  | 
| 12 8 | 
             
              end
         | 
| 13 9 | 
             
            end
         | 
    
        data/lib/raingrams/version.rb
    CHANGED
    
    
    
        data/lib/raingrams.rb
    CHANGED
    
    
| @@ -0,0 +1,54 @@ | |
| 1 | 
            +
            require 'raingrams/ngram_set'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'spec_helper'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe NgramSet do
         | 
| 6 | 
            +
              before(:all) do
         | 
| 7 | 
            +
                @ngrams = NgramSet[
         | 
| 8 | 
            +
                  Ngram[:the, :dog],
         | 
| 9 | 
            +
                  Ngram[:dog, :jumped],
         | 
| 10 | 
            +
                  Ngram[:jumped, :through],
         | 
| 11 | 
            +
                  Ngram[:through, :the],
         | 
| 12 | 
            +
                  Ngram[:the, :hoop]
         | 
| 13 | 
            +
                ]
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              it "should select ngrams from the set" do
         | 
| 17 | 
            +
                @ngrams.select { |ngram|
         | 
| 18 | 
            +
                  ngram.starts_with?(:the)
         | 
| 19 | 
            +
                }.should == NgramSet[Ngram[:the, :dog], Ngram[:the, :hoop]]
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              it "should select ngrams with a specified prefixed" do
         | 
| 23 | 
            +
                @ngrams.prefixed_by(Ngram[:dog]).should == NgramSet[
         | 
| 24 | 
            +
                  Ngram[:dog, :jumped]
         | 
| 25 | 
            +
                ]
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              it "should select ngrams with a specified postfix" do
         | 
| 29 | 
            +
                @ngrams.postfixed_by(Ngram[:through]).should == NgramSet[
         | 
| 30 | 
            +
                  Ngram[:jumped, :through]
         | 
| 31 | 
            +
                ]
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              it "should select ngrams starting with a specified gram" do
         | 
| 35 | 
            +
                @ngrams.starts_with(:jumped).should == NgramSet[Ngram[:jumped, :through]]
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              it "should select ngrams ending with a specified gram" do
         | 
| 39 | 
            +
                @ngrams.ends_with(:dog).should == NgramSet[Ngram[:the, :dog]]
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              it "should select ngrams including a specified gram" do
         | 
| 43 | 
            +
                @ngrams.including(:dog).should == NgramSet[
         | 
| 44 | 
            +
                  Ngram[:the, :dog],
         | 
| 45 | 
            +
                  Ngram[:dog, :jumped]
         | 
| 46 | 
            +
                ]
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              it "should select ngrams which includes specified grams" do
         | 
| 50 | 
            +
                @ngrams.includes(:the, :dog).should == NgramSet[
         | 
| 51 | 
            +
                  Ngram[:the, :dog],
         | 
| 52 | 
            +
                ]
         | 
| 53 | 
            +
              end
         | 
| 54 | 
            +
            end
         | 
    
        data/spec/ngram_spec.rb
    ADDED
    
    | @@ -0,0 +1,29 @@ | |
| 1 | 
            +
            require 'raingrams/ngram'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'spec_helper'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe Ngram do
         | 
| 6 | 
            +
              before(:all) do
         | 
| 7 | 
            +
                @ngram = Ngram[:one, :two, :three]
         | 
| 8 | 
            +
              end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
              it "should have a prefix" do
         | 
| 11 | 
            +
                @ngram.prefix.should == Ngram[:one, :two]
         | 
| 12 | 
            +
              end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              it "should have a postfix" do
         | 
| 15 | 
            +
                @ngram.postfix.should == Ngram[:two, :three]
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              it "should begin with a gram" do
         | 
| 19 | 
            +
                @ngram.starts_with?(:one).should == true
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              it "should end with a gram" do
         | 
| 23 | 
            +
                @ngram.ends_with?(:three).should == true
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              it "should include certain grams" do
         | 
| 27 | 
            +
                @ngram.includes?(:one, :three).should == true
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
            end
         |