raingrams 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +28 -0
- data/Manifest.txt +9 -0
- data/README.txt +46 -2
- data/Rakefile +1 -0
- data/TODO.txt +0 -1
- data/lib/raingrams/model.rb +204 -40
- data/lib/raingrams/ngram.rb +6 -2
- data/lib/raingrams/ngram_set.rb +6 -2
- data/lib/raingrams/version.rb +1 -1
- data/spec/bigram_model_spec.rb +111 -0
- data/spec/helpers/training.rb +8 -0
- data/spec/helpers.rb +1 -0
- data/spec/model_examples.rb +83 -0
- data/spec/model_spec.rb +118 -0
- data/spec/ngram_set_spec.rb +11 -2
- data/spec/ngram_spec.rb +1 -1
- data/spec/pentagram_model_spec.rb +101 -0
- data/spec/quadgram_model_spec.rb +106 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/training/snowcrash.txt +88 -0
- data/spec/trigram_model_spec.rb +109 -0
- metadata +24 -4
data/History.txt
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
== 0.1.1 / 2008-10-12
|
2
|
+
|
3
|
+
* Improved the parsing abilities of Model#parse_sentence and
|
4
|
+
Model#parse_text.
|
5
|
+
* Fixed a bug in Model#has_ngram?.
|
6
|
+
* Fixed a bug in Model#ngrams_starting_with.
|
7
|
+
* Removed Model#probability_of_gram, for now atleast.
|
8
|
+
* Renamed Ngram#includes? to Ngram#includes_all?.
|
9
|
+
* Renamed Model#ngrams_including to Model#ngrams_including_all.
|
10
|
+
* Renamed Model#frequencies_of_ngrams to Model#frequency_of_ngrams.
|
11
|
+
* Added the following methods:
|
12
|
+
* Ngram#includs_any?.
|
13
|
+
* Model.open.
|
14
|
+
* Model.train_with_paragraph.
|
15
|
+
* Model.train_with_text.
|
16
|
+
* Model.train_with_file.
|
17
|
+
* Model.train_with_url.
|
18
|
+
* Model#has_gram.
|
19
|
+
* Model#ngrams_including_all.
|
20
|
+
* Model#ngrams_from_paragraph.
|
21
|
+
* Model#train_with_paragraph.
|
22
|
+
* Model#train_with_file.
|
23
|
+
* Model#train_with_url.
|
24
|
+
* Model#frequency_of_ngram.
|
25
|
+
* Model#frequencies_for.
|
26
|
+
* Model#frequencies_of_ngrams.
|
27
|
+
* Model#save.
|
28
|
+
|
1
29
|
== 0.1.0 / 2008-10-06
|
2
30
|
|
3
31
|
* Various bug fixes.
|
data/Manifest.txt
CHANGED
@@ -35,8 +35,17 @@ lib/raingrams/open_vocabulary/pentagram_model.rb
|
|
35
35
|
lib/raingrams/open_vocabulary/hexagram_model.rb
|
36
36
|
lib/raingrams/open_vocabulary.rb
|
37
37
|
tasks/spec.rb
|
38
|
+
spec/training/snowcrash.txt
|
39
|
+
spec/helpers/training.rb
|
40
|
+
spec/helpers.rb
|
38
41
|
spec/spec_helper.rb
|
39
42
|
spec/ngram_spec.rb
|
40
43
|
spec/ngram_set_spec.rb
|
41
44
|
spec/probability_table_spec.rb
|
42
45
|
spec/raingrams_spec.rb
|
46
|
+
spec/model_spec.rb
|
47
|
+
spec/model_examples.rb
|
48
|
+
spec/bigram_model_spec.rb
|
49
|
+
spec/trigram_model_spec.rb
|
50
|
+
spec/quadgram_model_spec.rb
|
51
|
+
spec/pentagram_model_spec.rb
|
data/README.txt
CHANGED
@@ -6,22 +6,66 @@
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
8
|
Raingrams is a flexible and general-purpose ngrams library written in Ruby.
|
9
|
-
Raingrams supports
|
9
|
+
Raingrams supports ngram sizes greater than 1, text/non-text grams, multiple
|
10
10
|
parsing styles and open/closed vocabulary models.
|
11
11
|
|
12
12
|
== FEATURES:
|
13
13
|
|
14
|
-
* Supports
|
14
|
+
* Supports ngram sizes greater than 1.
|
15
15
|
* Supports text and non-text grams.
|
16
16
|
* Supports Open and Closed vocabulary models.
|
17
17
|
* Supports calculating the similarity and commonality of sample text against
|
18
18
|
specified models.
|
19
19
|
* Supports generating random text from models.
|
20
20
|
|
21
|
+
== REQUIREMENTS:
|
22
|
+
|
23
|
+
* Hpricot
|
24
|
+
|
21
25
|
== INSTALL:
|
22
26
|
|
23
27
|
$ sudo gem install raingrams
|
24
28
|
|
29
|
+
== EXAMPLES:
|
30
|
+
|
31
|
+
* Train a model with ycombinator comments:
|
32
|
+
|
33
|
+
require 'raingrams'
|
34
|
+
require 'hpricot'
|
35
|
+
require 'open-uri'
|
36
|
+
|
37
|
+
include Raingrams
|
38
|
+
|
39
|
+
model = BigramModel.build do |model|
|
40
|
+
doc = Hpricot(open('http://news.ycombinator.org/newcomments'))
|
41
|
+
doc.search('span.comment') do |span|
|
42
|
+
model.train_with_text(span.inner_text)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
* Update a trained model:
|
47
|
+
|
48
|
+
model.train_with_text %{Interesting videos. Anders talks about functional
|
49
|
+
support on .net, concurrency, immutability. Guy Steele talks about
|
50
|
+
Fortress on JVM. Too bad they are afraid of macros (access to AST),
|
51
|
+
though Steele does say Fortress has some support.}
|
52
|
+
|
53
|
+
model.refresh
|
54
|
+
|
55
|
+
* Generate a random sentence:
|
56
|
+
|
57
|
+
model.random_sentence
|
58
|
+
# => "OTOOH if you use slicehost even offer to bash Apple makes it will
|
59
|
+
exit and its 38 month ago based configuration of little networks created."
|
60
|
+
|
61
|
+
* Dump a model to a file, to be marshaled later:
|
62
|
+
|
63
|
+
model.save('path/for/model')
|
64
|
+
|
65
|
+
* Load a model from a file:
|
66
|
+
|
67
|
+
Model.open('path/for/model')
|
68
|
+
|
25
69
|
== LICENSE:
|
26
70
|
|
27
71
|
The MIT License
|
data/Rakefile
CHANGED
data/TODO.txt
CHANGED
data/lib/raingrams/model.rb
CHANGED
@@ -4,6 +4,8 @@ require 'raingrams/probability_table'
|
|
4
4
|
require 'raingrams/tokens'
|
5
5
|
|
6
6
|
require 'set'
|
7
|
+
require 'hpricot'
|
8
|
+
require 'open-uri'
|
7
9
|
|
8
10
|
module Raingrams
|
9
11
|
class Model
|
@@ -56,6 +58,7 @@ module Raingrams
|
|
56
58
|
@ignore_punctuation = true
|
57
59
|
@ignore_urls = true
|
58
60
|
@ignore_phone_numbers = false
|
61
|
+
@ignore_references = false
|
59
62
|
|
60
63
|
if options.has_key?(:ignore_case)
|
61
64
|
@ignore_case = options[:ignore_case]
|
@@ -73,14 +76,19 @@ module Raingrams
|
|
73
76
|
@ignore_phone_numbers = options[:ignore_phone_numbers]
|
74
77
|
end
|
75
78
|
|
79
|
+
if options.has_key?(:ignore_references)
|
80
|
+
@ignore_references = options[:ignore_references]
|
81
|
+
end
|
82
|
+
|
76
83
|
@prefixes = {}
|
77
84
|
|
78
85
|
block.call(self) if block
|
79
86
|
end
|
80
87
|
|
81
88
|
#
|
82
|
-
# Creates a new
|
83
|
-
# _block_ is given, it will be passed the newly created model.
|
89
|
+
# Creates a new model object with the given _options_. If a
|
90
|
+
# _block_ is given, it will be passed the newly created model. After
|
91
|
+
# the block as been called the model will be built.
|
84
92
|
#
|
85
93
|
def self.build(options={},&block)
|
86
94
|
self.new(options) do |model|
|
@@ -88,16 +96,74 @@ module Raingrams
|
|
88
96
|
end
|
89
97
|
end
|
90
98
|
|
99
|
+
#
|
100
|
+
# Creates a new model object with the given _options_ and trains it
|
101
|
+
# with the specified _paragraph_.
|
102
|
+
#
|
103
|
+
def self.train_with_paragraph(paragraph,options={})
|
104
|
+
self.build(options) do |model|
|
105
|
+
model.train_with_paragraph(paragraph)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Creates a new model object with the given _options_ and trains it
|
111
|
+
# with the specified _text_.
|
112
|
+
#
|
113
|
+
def self.train_with_text(text,options={})
|
114
|
+
self.build(options) do |model|
|
115
|
+
model.train_with_text(text)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Creates a new model object with the given _options_ and trains it
|
121
|
+
# with the contents of the specified _path_.
|
122
|
+
#
|
123
|
+
def self.train_with_file(path,options={})
|
124
|
+
self.build(options) do |model|
|
125
|
+
model.train_with_file(path)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
# Creates a new model object with the given _options_ and trains it
|
131
|
+
# with the inner text of the paragraphs tags at the specified _url_.
|
132
|
+
#
|
133
|
+
def self.train_with_url(url,options={})
|
134
|
+
self.build(options) do |model|
|
135
|
+
model.train_with_url(url)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Marshals a model from the contents of the file at the specified
|
141
|
+
# _path_.
|
142
|
+
#
|
143
|
+
def self.open(path)
|
144
|
+
model = nil
|
145
|
+
|
146
|
+
File.open(path) do |file|
|
147
|
+
model = Marshal.load(file)
|
148
|
+
end
|
149
|
+
|
150
|
+
return model
|
151
|
+
end
|
152
|
+
|
91
153
|
#
|
92
154
|
# Parses the specified _sentence_ and returns an Array of tokens.
|
93
155
|
#
|
94
156
|
def parse_sentence(sentence)
|
95
|
-
|
96
|
-
|
157
|
+
sentence = sentence.to_s
|
158
|
+
|
159
|
+
if @ignore_punctuation
|
160
|
+
# eat tailing punctuation
|
161
|
+
sentence.gsub!(/[\.\?!]*$/,'')
|
162
|
+
end
|
97
163
|
|
98
164
|
if @ignore_urls
|
99
165
|
# remove URLs
|
100
|
-
sentence.gsub!(/\s*\w+:\/\/[\w
|
166
|
+
sentence.gsub!(/\s*\w+:\/\/[\w\/\+_\-,:%\d\.\-\?&=]*\s*/,' ')
|
101
167
|
end
|
102
168
|
|
103
169
|
if @ignore_phone_numbers
|
@@ -107,7 +173,7 @@ module Raingrams
|
|
107
173
|
|
108
174
|
if @ignore_references
|
109
175
|
# remove RFC style references
|
110
|
-
sentence.gsub!(/\s
|
176
|
+
sentence.gsub!(/\s*[\(\{\[]\d+[\)\}\]]\s*/,' ')
|
111
177
|
end
|
112
178
|
|
113
179
|
if @ignore_case
|
@@ -117,10 +183,10 @@ module Raingrams
|
|
117
183
|
|
118
184
|
if @ignore_punctuation
|
119
185
|
# split and ignore punctuation characters
|
120
|
-
return sentence.scan(/\w+[_\.:']
|
186
|
+
return sentence.scan(/\w+[\-_\.:']\w+|\w+/)
|
121
187
|
else
|
122
188
|
# split and accept punctuation characters
|
123
|
-
return sentence.scan(/[\w\-_
|
189
|
+
return sentence.scan(/[\w\-_,:;\.\?\!'"\\\/]+/)
|
124
190
|
end
|
125
191
|
end
|
126
192
|
|
@@ -128,7 +194,7 @@ module Raingrams
|
|
128
194
|
# Parses the specified _text_ and returns an Array of sentences.
|
129
195
|
#
|
130
196
|
def parse_text(text)
|
131
|
-
text.to_s.scan(/[^\s\.\?!][^\.\?!]
|
197
|
+
text.to_s.scan(/[^\s\.\?!][^\.\?!]*[\.\?\!]/)
|
132
198
|
end
|
133
199
|
|
134
200
|
#
|
@@ -138,8 +204,8 @@ module Raingrams
|
|
138
204
|
ngram_set = NgramSet.new
|
139
205
|
|
140
206
|
@prefixes.each do |prefix,table|
|
141
|
-
table.each_gram do |
|
142
|
-
ngram_set << (prefix +
|
207
|
+
table.each_gram do |postfix_gram|
|
208
|
+
ngram_set << (prefix + postfix_gram)
|
143
209
|
end
|
144
210
|
end
|
145
211
|
|
@@ -151,7 +217,11 @@ module Raingrams
|
|
151
217
|
# +false+ otherwise.
|
152
218
|
#
|
153
219
|
def has_ngram?(ngram)
|
154
|
-
@prefixes
|
220
|
+
if @prefixes.has_key?(ngram.prefix)
|
221
|
+
return @prefixes[ngram.prefix].has_gram?(ngram.last)
|
222
|
+
else
|
223
|
+
return false
|
224
|
+
end
|
155
225
|
end
|
156
226
|
|
157
227
|
#
|
@@ -160,8 +230,8 @@ module Raingrams
|
|
160
230
|
#
|
161
231
|
def each_ngram(&block)
|
162
232
|
@prefixes.each do |prefix,table|
|
163
|
-
table.each_gram do |
|
164
|
-
block.call(prefix +
|
233
|
+
table.each_gram do |postfix_gram|
|
234
|
+
block.call(prefix + postfix_gram) if block
|
165
235
|
end
|
166
236
|
end
|
167
237
|
|
@@ -178,7 +248,7 @@ module Raingrams
|
|
178
248
|
selected_ngrams << ngram if block.call(ngram)
|
179
249
|
end
|
180
250
|
|
181
|
-
return
|
251
|
+
return selected_ngrams
|
182
252
|
end
|
183
253
|
|
184
254
|
#
|
@@ -221,8 +291,8 @@ module Raingrams
|
|
221
291
|
|
222
292
|
@prefixes.each do |prefix,table|
|
223
293
|
if prefix.first == gram
|
224
|
-
table.each_gram do |
|
225
|
-
ngram_set << (prefix +
|
294
|
+
table.each_gram do |postfix_gram|
|
295
|
+
ngram_set << (prefix + postfix_gram)
|
226
296
|
end
|
227
297
|
end
|
228
298
|
end
|
@@ -246,20 +316,20 @@ module Raingrams
|
|
246
316
|
end
|
247
317
|
|
248
318
|
#
|
249
|
-
# Returns the ngrams including the specified _grams_.
|
319
|
+
# Returns the ngrams including any of the specified _grams_.
|
250
320
|
#
|
251
|
-
def
|
321
|
+
def ngrams_including_any(*grams)
|
252
322
|
ngram_set = NgramSet.new
|
253
323
|
|
254
324
|
@prefixes.each do |prefix,table|
|
255
|
-
if prefix.
|
256
|
-
table.each_gram do |
|
257
|
-
ngram_set << (prefix +
|
325
|
+
if prefix.includes_any?(*grams)
|
326
|
+
table.each_gram do |postfix_gram|
|
327
|
+
ngram_set << (prefix + postfix_gram)
|
258
328
|
end
|
259
329
|
else
|
260
|
-
table.each_gram do |
|
261
|
-
if grams.include?(
|
262
|
-
ngram_set << (prefix +
|
330
|
+
table.each_gram do |postfix_gram|
|
331
|
+
if grams.include?(postfix_gram)
|
332
|
+
ngram_set << (prefix + postfix_gram)
|
263
333
|
end
|
264
334
|
end
|
265
335
|
end
|
@@ -268,6 +338,19 @@ module Raingrams
|
|
268
338
|
return ngram_set
|
269
339
|
end
|
270
340
|
|
341
|
+
#
|
342
|
+
# Returns the ngrams including all of the specified _grams_.
|
343
|
+
#
|
344
|
+
def ngrams_including_all(*grams)
|
345
|
+
ngram_set = NgramSet.new
|
346
|
+
|
347
|
+
each_ngram do |ngram|
|
348
|
+
ngram_set << ngram if ngram.includes_all?(*grams)
|
349
|
+
end
|
350
|
+
|
351
|
+
return ngram_set
|
352
|
+
end
|
353
|
+
|
271
354
|
#
|
272
355
|
# Returns the ngrams extracted from the specified _words_.
|
273
356
|
#
|
@@ -300,6 +383,8 @@ module Raingrams
|
|
300
383
|
end
|
301
384
|
end
|
302
385
|
|
386
|
+
alias ngrams_from_paragraph ngrams_from_text
|
387
|
+
|
303
388
|
#
|
304
389
|
# Returns all ngrams which preceed the specified _gram_.
|
305
390
|
#
|
@@ -334,7 +419,19 @@ module Raingrams
|
|
334
419
|
# Returns all grams within the model.
|
335
420
|
#
|
336
421
|
def grams
|
337
|
-
@prefixes.keys.
|
422
|
+
@prefixes.keys.inject(Set.new) do |all_grams,gram|
|
423
|
+
all_grams + gram
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
#
|
428
|
+
# Returns +true+ if the model contain the specified _gram_, returns
|
429
|
+
# +false+ otherwise.
|
430
|
+
#
|
431
|
+
def has_gram?(gram)
|
432
|
+
@prefixes.keys.any? do |prefix|
|
433
|
+
prefix.include?(gram)
|
434
|
+
end
|
338
435
|
end
|
339
436
|
|
340
437
|
#
|
@@ -376,7 +473,7 @@ module Raingrams
|
|
376
473
|
# within the model.
|
377
474
|
#
|
378
475
|
def common_ngrams_from_fragment(fragment)
|
379
|
-
ngrams_from_fragment(
|
476
|
+
ngrams_from_fragment(fragment).select { |ngram| has_ngram?(ngram) }
|
380
477
|
end
|
381
478
|
|
382
479
|
#
|
@@ -423,6 +520,13 @@ module Raingrams
|
|
423
520
|
train_with_ngrams(ngrams_from_sentence(sentence))
|
424
521
|
end
|
425
522
|
|
523
|
+
#
|
524
|
+
# Train the model with the specified _paragraphs_.
|
525
|
+
#
|
526
|
+
def train_with_paragraph(paragraph)
|
527
|
+
train_with_ngrams(ngrams_from_paragraph(paragraphs))
|
528
|
+
end
|
529
|
+
|
426
530
|
#
|
427
531
|
# Train the model with the specified _text_.
|
428
532
|
#
|
@@ -430,6 +534,39 @@ module Raingrams
|
|
430
534
|
train_with_ngrams(ngrams_from_text(text))
|
431
535
|
end
|
432
536
|
|
537
|
+
#
|
538
|
+
# Train the model with the contents of the specified _path_.
|
539
|
+
#
|
540
|
+
def train_with_file(path)
|
541
|
+
train_with_text(File.read(path))
|
542
|
+
end
|
543
|
+
|
544
|
+
#
|
545
|
+
# Train the model with the inner text of the paragraph tags at the
|
546
|
+
# specified _url_.
|
547
|
+
#
|
548
|
+
def train_with_url(url)
|
549
|
+
doc = Hpricot(open(url))
|
550
|
+
|
551
|
+
return doc.search('p').map do |p|
|
552
|
+
train_with_paragraph(p.inner_text)
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
#
|
557
|
+
# Returns the observed frequency of the specified _ngram_ within
|
558
|
+
# the training text.
|
559
|
+
#
|
560
|
+
def frequency_of_ngram(ngram)
|
561
|
+
prefix = ngram.prefix
|
562
|
+
|
563
|
+
if @prefixes.has_key?(prefix)
|
564
|
+
return @prefixes[prefix].frequency_of(ngram.last)
|
565
|
+
else
|
566
|
+
return 0
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
433
570
|
#
|
434
571
|
# Returns the probability of the specified _ngram_ occurring within
|
435
572
|
# arbitrary text.
|
@@ -444,6 +581,20 @@ module Raingrams
|
|
444
581
|
end
|
445
582
|
end
|
446
583
|
|
584
|
+
#
|
585
|
+
# Returns the observed frequency of the specified _ngrams_ occurring
|
586
|
+
# within the training text.
|
587
|
+
#
|
588
|
+
def frequencies_for(ngrams)
|
589
|
+
table = {}
|
590
|
+
|
591
|
+
ngrams.each do |ngram|
|
592
|
+
table[ngram] = frequency_of_ngram(ngram)
|
593
|
+
end
|
594
|
+
|
595
|
+
return table
|
596
|
+
end
|
597
|
+
|
447
598
|
#
|
448
599
|
# Returns the probability of the specified _ngrams_ occurring within
|
449
600
|
# arbitrary text.
|
@@ -458,6 +609,16 @@ module Raingrams
|
|
458
609
|
return table
|
459
610
|
end
|
460
611
|
|
612
|
+
#
|
613
|
+
# Returns the total observed frequency of the specified _ngrams_
|
614
|
+
# occurring within the training text.
|
615
|
+
#
|
616
|
+
def frequency_of_ngrams(ngrams)
|
617
|
+
frequencies_for(ngrams).values.inject do |total,freq|
|
618
|
+
total + freq
|
619
|
+
end
|
620
|
+
end
|
621
|
+
|
461
622
|
#
|
462
623
|
# Returns the joint probability of the specified _ngrams_ occurring
|
463
624
|
# within arbitrary text.
|
@@ -468,14 +629,6 @@ module Raingrams
|
|
468
629
|
end
|
469
630
|
end
|
470
631
|
|
471
|
-
#
|
472
|
-
# Returns the probably of the specified _gram_ occurring within
|
473
|
-
# arbitrary text.
|
474
|
-
#
|
475
|
-
def probability_of_gram(gram)
|
476
|
-
probability_of_ngrams(ngrams_starting_with(gram))
|
477
|
-
end
|
478
|
-
|
479
632
|
#
|
480
633
|
# Returns the probability of the specified _fragment_ occuring within
|
481
634
|
# arbitrary text.
|
@@ -582,9 +735,6 @@ module Raingrams
|
|
582
735
|
grams = []
|
583
736
|
last_ngram = @starting_ngram
|
584
737
|
|
585
|
-
# prime the grams
|
586
|
-
grams += @starting_ngram
|
587
|
-
|
588
738
|
loop do
|
589
739
|
next_ngrams = ngrams_prefixed_by(last_ngram.postfix).to_a
|
590
740
|
last_ngram = next_ngrams[rand(next_ngrams.length)]
|
@@ -592,8 +742,11 @@ module Raingrams
|
|
592
742
|
if last_ngram.nil?
|
593
743
|
return []
|
594
744
|
else
|
595
|
-
|
596
|
-
|
745
|
+
last_gram = last_ngram.last
|
746
|
+
|
747
|
+
break if last_gram == Tokens.stop
|
748
|
+
|
749
|
+
grams << last_gram
|
597
750
|
end
|
598
751
|
end
|
599
752
|
|
@@ -690,6 +843,17 @@ module Raingrams
|
|
690
843
|
return self
|
691
844
|
end
|
692
845
|
|
846
|
+
#
|
847
|
+
# Saves the model to the file at the specified _path_.
|
848
|
+
#
|
849
|
+
def save(path)
|
850
|
+
File.open(path,'w') do |file|
|
851
|
+
Marshal.dump(self,file)
|
852
|
+
end
|
853
|
+
|
854
|
+
return self
|
855
|
+
end
|
856
|
+
|
693
857
|
protected
|
694
858
|
|
695
859
|
#
|
data/lib/raingrams/ngram.rb
CHANGED
@@ -70,8 +70,12 @@ module Raingrams
|
|
70
70
|
super(obj.to_gram)
|
71
71
|
end
|
72
72
|
|
73
|
-
def
|
74
|
-
|
73
|
+
def includes_any?(*grams)
|
74
|
+
grams.any? { |gram| include?(gram) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def includes_all?(*grams)
|
78
|
+
grams.all? { |gram| include?(gram) }
|
75
79
|
end
|
76
80
|
|
77
81
|
def flatten
|
data/lib/raingrams/ngram_set.rb
CHANGED
@@ -35,8 +35,12 @@ module Raingrams
|
|
35
35
|
select { |ngram| ngram.include?(gram) }
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
39
|
-
select { |ngram| ngram.
|
38
|
+
def including_any(*grams)
|
39
|
+
select { |ngram| ngram.includes_any?(*grams) }
|
40
|
+
end
|
41
|
+
|
42
|
+
def including_all(*grams)
|
43
|
+
select { |ngram| ngram.includes_all?(*grams) }
|
40
44
|
end
|
41
45
|
|
42
46
|
end
|
data/lib/raingrams/version.rb
CHANGED