raingrams 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/History.txt +9 -0
  2. data/Manifest.txt +10 -10
  3. data/README.txt +9 -7
  4. data/Rakefile +3 -6
  5. data/TODO.txt +6 -0
  6. data/lib/raingrams/bigram_model.rb +3 -7
  7. data/lib/raingrams/extensions/object.rb +4 -1
  8. data/lib/raingrams/extensions/string.rb +3 -0
  9. data/lib/raingrams/extensions.rb +0 -5
  10. data/lib/raingrams/hexagram_model.rb +3 -7
  11. data/lib/raingrams/model.rb +622 -61
  12. data/lib/raingrams/ngram.rb +50 -9
  13. data/lib/raingrams/ngram_set.rb +43 -0
  14. data/lib/raingrams/open_vocabulary/model.rb +12 -0
  15. data/lib/raingrams/open_vocabulary/open_model.rb +8 -4
  16. data/lib/raingrams/open_vocabulary.rb +0 -1
  17. data/lib/raingrams/pentagram_model.rb +3 -7
  18. data/lib/raingrams/probability_table.rb +153 -0
  19. data/lib/raingrams/quadgram_model.rb +3 -7
  20. data/lib/raingrams/raingrams.rb +10 -20
  21. data/lib/raingrams/tokens/start_sentence.rb +2 -2
  22. data/lib/raingrams/tokens/stop_sentence.rb +2 -2
  23. data/lib/raingrams/tokens/token.rb +49 -5
  24. data/lib/raingrams/tokens/unknown.rb +2 -2
  25. data/lib/raingrams/tokens.rb +1 -0
  26. data/lib/raingrams/trigram_model.rb +3 -7
  27. data/lib/raingrams/version.rb +1 -1
  28. data/lib/raingrams.rb +1 -1
  29. data/spec/ngram_set_spec.rb +54 -0
  30. data/spec/ngram_spec.rb +29 -0
  31. data/spec/probability_table_spec.rb +94 -0
  32. data/spec/raingrams_spec.rb +9 -0
  33. data/spec/spec_helper.rb +5 -0
  34. data/tasks/spec.rb +7 -0
  35. metadata +65 -55
  36. data/lib/raingrams/extensions/class.rb +0 -7
  37. data/lib/raingrams/extensions/false_class.rb +0 -7
  38. data/lib/raingrams/extensions/nil_class.rb +0 -7
  39. data/lib/raingrams/extensions/symbol.rb +0 -7
  40. data/lib/raingrams/extensions/true_class.rb +0 -7
  41. data/lib/raingrams/multigram_model.rb +0 -165
  42. data/lib/raingrams/open_vocabulary/multigram_model.rb +0 -12
  43. data/lib/raingrams/open_vocabulary/unigram_model.rb +0 -12
  44. data/lib/raingrams/unigram_model.rb +0 -70
  45. data/test/test_raingrams.rb +0 -0
data/History.txt CHANGED
@@ -1,3 +1,12 @@
1
+ == 0.1.0 / 2008-10-06
2
+
3
+ * Various bug fixes.
4
+ * Added NgramSet and ProbabilityTable classes.
5
+ * Merged NgramModel with the Model class.
6
+ * Refactored the Model class.
7
+ * Added random_gram_sentence, random_sentence, random_paragraph and
8
+ random_text methods to the Model class.
9
+
1
10
  == 0.0.9 / 2008-01-09
2
11
 
3
12
  * Initial release.
data/Manifest.txt CHANGED
@@ -2,19 +2,15 @@ History.txt
2
2
  LICENSE.txt
3
3
  Manifest.txt
4
4
  README.txt
5
+ TODO.txt
5
6
  Rakefile
6
7
  lib/raingrams.rb
7
8
  lib/raingrams/version.rb
8
9
  lib/raingrams/raingrams.rb
9
10
  lib/raingrams/exceptions/prefix_frequency_missing.rb
10
11
  lib/raingrams/exceptions.rb
11
- lib/raingrams/extensions/class.rb
12
- lib/raingrams/extensions/false_class.rb
13
- lib/raingrams/extensions/nil_class.rb
14
12
  lib/raingrams/extensions/object.rb
15
13
  lib/raingrams/extensions/string.rb
16
- lib/raingrams/extensions/symbol.rb
17
- lib/raingrams/extensions/true_class.rb
18
14
  lib/raingrams/extensions.rb
19
15
  lib/raingrams/tokens/token.rb
20
16
  lib/raingrams/tokens/start_sentence.rb
@@ -22,21 +18,25 @@ lib/raingrams/tokens/stop_sentence.rb
22
18
  lib/raingrams/tokens/unknown.rb
23
19
  lib/raingrams/tokens.rb
24
20
  lib/raingrams/ngram.rb
21
+ lib/raingrams/ngram_set.rb
22
+ lib/raingrams/probability_table.rb
25
23
  lib/raingrams/model.rb
26
- lib/raingrams/unigram_model.rb
27
- lib/raingrams/multigram_model.rb
28
24
  lib/raingrams/bigram_model.rb
29
25
  lib/raingrams/trigram_model.rb
30
26
  lib/raingrams/quadgram_model.rb
31
27
  lib/raingrams/pentagram_model.rb
32
28
  lib/raingrams/hexagram_model.rb
33
29
  lib/raingrams/open_vocabulary/open_model.rb
34
- lib/raingrams/open_vocabulary/unigram_model.rb
35
- lib/raingrams/open_vocabulary/multigram_model.rb
30
+ lib/raingrams/open_vocabulary/model.rb
36
31
  lib/raingrams/open_vocabulary/bigram_model.rb
37
32
  lib/raingrams/open_vocabulary/trigram_model.rb
38
33
  lib/raingrams/open_vocabulary/quadgram_model.rb
39
34
  lib/raingrams/open_vocabulary/pentagram_model.rb
40
35
  lib/raingrams/open_vocabulary/hexagram_model.rb
41
36
  lib/raingrams/open_vocabulary.rb
42
- test/test_raingrams.rb
37
+ tasks/spec.rb
38
+ spec/spec_helper.rb
39
+ spec/ngram_spec.rb
40
+ spec/ngram_set_spec.rb
41
+ spec/probability_table_spec.rb
42
+ spec/raingrams_spec.rb
data/README.txt CHANGED
@@ -1,6 +1,7 @@
1
- Raingrams
2
- by Postmodern Modulus III
3
- http://rubyforge.net/projects/raingrams/
1
+ = Raingrams
2
+
3
+ * http://raingrams.rubyforge.org/
4
+ * Postmodern Modulus III (postmodern.mod3@gmail.com)
4
5
 
5
6
  == DESCRIPTION:
6
7
 
@@ -8,13 +9,14 @@ Raingrams is a flexible and general-purpose ngrams library written in Ruby.
8
9
  Raingrams supports any non-zero ngram size, text/non-text grams, multiple
9
10
  parsing styles and open/closed vocabulary models.
10
11
 
11
- == FEATURES/PROBLEMS:
12
+ == FEATURES:
12
13
 
13
- * Supports all non-zero ngram sizes.
14
+ * Supports all ngram sizes above 1.
14
15
  * Supports text and non-text grams.
15
16
  * Supports Open and Closed vocabulary models.
16
-
17
- == REQUIREMENTS:
17
+ * Supports calculating the similarity and commonality of sample text against
18
+ specified models.
19
+ * Supports generating random text from models.
18
20
 
19
21
  == INSTALL:
20
22
 
data/Rakefile CHANGED
@@ -2,16 +2,13 @@
2
2
 
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
+ require './tasks/spec.rb'
5
6
  require './lib/raingrams/version.rb'
6
7
 
7
8
  Hoe.new('raingrams', Raingrams::VERSION) do |p|
8
9
  p.rubyforge_name = 'raingrams'
9
- p.author = 'Postmodern Modulus III'
10
- p.email = 'postmodern.mod3@gmail.com'
11
- p.summary = 'Raingrams is a flexible and general-purpose ngrams library written in Ruby'
12
- p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
- p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
- p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
10
+ p.developer('Postmodern Modulus III', 'postmodern.mod3@gmail.com')
11
+ p.remote_rdoc_dir = 'docs'
15
12
  end
16
13
 
17
14
  # vim: syntax=Ruby
data/TODO.txt ADDED
@@ -0,0 +1,6 @@
1
+ == TODO:
2
+
3
+ * Add spes for the Model class.
4
+ * Add options to Model#random_sentence for weighting certain grams.
5
+ * Add a command-line utility to utilize the Raingrams API.
6
+
@@ -1,13 +1,9 @@
1
- require 'raingrams/multigram_model'
1
+ require 'raingrams/model'
2
2
 
3
3
  module Raingrams
4
- class BigramModel < MultigramModel
4
+ class BigramModel < Model
5
5
 
6
- def initialize(opts={},&block)
7
- opts[:ngram_size] = 2
8
-
9
- super(opts,&block)
10
- end
6
+ ngram_size 2
11
7
 
12
8
  end
13
9
  end
@@ -1,7 +1,10 @@
1
1
  class Object
2
2
 
3
+ #
4
+ # Returns the object.
5
+ #
3
6
  def to_gram
4
- self.dup.freeze
7
+ self
5
8
  end
6
9
 
7
10
  end
@@ -1,5 +1,8 @@
1
1
  class String
2
2
 
3
+ #
4
+ # Interns the string for usage as a gram.
5
+ #
3
6
  def to_gram
4
7
  intern
5
8
  end
@@ -1,7 +1,2 @@
1
- require 'raingrams/extensions/class'
2
- require 'raingrams/extensions/nil_class'
3
- require 'raingrams/extensions/true_class'
4
- require 'raingrams/extensions/false_class'
5
- require 'raingrams/extensions/symbol'
6
1
  require 'raingrams/extensions/string'
7
2
  require 'raingrams/extensions/object'
@@ -1,13 +1,9 @@
1
- require 'raingrams/multigram_model'
1
+ require 'raingrams/model'
2
2
 
3
3
  module Raingrams
4
- class HexagramModel < MultigramModel
4
+ class HexagramModel < Model
5
5
 
6
- def initialize(opts={},&block)
7
- opts[:ngram_size] = 6
8
-
9
- super(opts,&block)
10
- end
6
+ ngram_size 6
11
7
 
12
8
  end
13
9
  end