raingrams 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +28 -0
- data/Manifest.txt +9 -0
- data/README.txt +46 -2
- data/Rakefile +1 -0
- data/TODO.txt +0 -1
- data/lib/raingrams/model.rb +204 -40
- data/lib/raingrams/ngram.rb +6 -2
- data/lib/raingrams/ngram_set.rb +6 -2
- data/lib/raingrams/version.rb +1 -1
- data/spec/bigram_model_spec.rb +111 -0
- data/spec/helpers/training.rb +8 -0
- data/spec/helpers.rb +1 -0
- data/spec/model_examples.rb +83 -0
- data/spec/model_spec.rb +118 -0
- data/spec/ngram_set_spec.rb +11 -2
- data/spec/ngram_spec.rb +1 -1
- data/spec/pentagram_model_spec.rb +101 -0
- data/spec/quadgram_model_spec.rb +106 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/training/snowcrash.txt +88 -0
- data/spec/trigram_model_spec.rb +109 -0
- metadata +24 -4
data/History.txt
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
== 0.1.1 / 2008-10-12
|
2
|
+
|
3
|
+
* Improved the parsing abilities of Model#parse_sentence and
|
4
|
+
Model#parse_text.
|
5
|
+
* Fixed a bug in Model#has_ngram?.
|
6
|
+
* Fixed a bug in Model#ngrams_starting_with.
|
7
|
+
* Removed Model#probability_of_gram, for now atleast.
|
8
|
+
* Renamed Ngram#includes? to Ngram#includes_all?.
|
9
|
+
* Renamed Model#ngrams_including to Model#ngrams_including_all.
|
10
|
+
* Renamed Model#frequencies_of_ngrams to Model#frequency_of_ngrams.
|
11
|
+
* Added the following methods:
|
12
|
+
* Ngram#includs_any?.
|
13
|
+
* Model.open.
|
14
|
+
* Model.train_with_paragraph.
|
15
|
+
* Model.train_with_text.
|
16
|
+
* Model.train_with_file.
|
17
|
+
* Model.train_with_url.
|
18
|
+
* Model#has_gram.
|
19
|
+
* Model#ngrams_including_all.
|
20
|
+
* Model#ngrams_from_paragraph.
|
21
|
+
* Model#train_with_paragraph.
|
22
|
+
* Model#train_with_file.
|
23
|
+
* Model#train_with_url.
|
24
|
+
* Model#frequency_of_ngram.
|
25
|
+
* Model#frequencies_for.
|
26
|
+
* Model#frequencies_of_ngrams.
|
27
|
+
* Model#save.
|
28
|
+
|
1
29
|
== 0.1.0 / 2008-10-06
|
2
30
|
|
3
31
|
* Various bug fixes.
|
data/Manifest.txt
CHANGED
@@ -35,8 +35,17 @@ lib/raingrams/open_vocabulary/pentagram_model.rb
|
|
35
35
|
lib/raingrams/open_vocabulary/hexagram_model.rb
|
36
36
|
lib/raingrams/open_vocabulary.rb
|
37
37
|
tasks/spec.rb
|
38
|
+
spec/training/snowcrash.txt
|
39
|
+
spec/helpers/training.rb
|
40
|
+
spec/helpers.rb
|
38
41
|
spec/spec_helper.rb
|
39
42
|
spec/ngram_spec.rb
|
40
43
|
spec/ngram_set_spec.rb
|
41
44
|
spec/probability_table_spec.rb
|
42
45
|
spec/raingrams_spec.rb
|
46
|
+
spec/model_spec.rb
|
47
|
+
spec/model_examples.rb
|
48
|
+
spec/bigram_model_spec.rb
|
49
|
+
spec/trigram_model_spec.rb
|
50
|
+
spec/quadgram_model_spec.rb
|
51
|
+
spec/pentagram_model_spec.rb
|
data/README.txt
CHANGED
@@ -6,22 +6,66 @@
|
|
6
6
|
== DESCRIPTION:
|
7
7
|
|
8
8
|
Raingrams is a flexible and general-purpose ngrams library written in Ruby.
|
9
|
-
Raingrams supports
|
9
|
+
Raingrams supports ngram sizes greater than 1, text/non-text grams, multiple
|
10
10
|
parsing styles and open/closed vocabulary models.
|
11
11
|
|
12
12
|
== FEATURES:
|
13
13
|
|
14
|
-
* Supports
|
14
|
+
* Supports ngram sizes greater than 1.
|
15
15
|
* Supports text and non-text grams.
|
16
16
|
* Supports Open and Closed vocabulary models.
|
17
17
|
* Supports calculating the similarity and commonality of sample text against
|
18
18
|
specified models.
|
19
19
|
* Supports generating random text from models.
|
20
20
|
|
21
|
+
== REQUIREMENTS:
|
22
|
+
|
23
|
+
* Hpricot
|
24
|
+
|
21
25
|
== INSTALL:
|
22
26
|
|
23
27
|
$ sudo gem install raingrams
|
24
28
|
|
29
|
+
== EXAMPLES:
|
30
|
+
|
31
|
+
* Train a model with ycombinator comments:
|
32
|
+
|
33
|
+
require 'raingrams'
|
34
|
+
require 'hpricot'
|
35
|
+
require 'open-uri'
|
36
|
+
|
37
|
+
include Raingrams
|
38
|
+
|
39
|
+
model = BigramModel.build do |model|
|
40
|
+
doc = Hpricot(open('http://news.ycombinator.org/newcomments'))
|
41
|
+
doc.search('span.comment') do |span|
|
42
|
+
model.train_with_text(span.inner_text)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
* Update a trained model:
|
47
|
+
|
48
|
+
model.train_with_text %{Interesting videos. Anders talks about functional
|
49
|
+
support on .net, concurrency, immutability. Guy Steele talks about
|
50
|
+
Fortress on JVM. Too bad they are afraid of macros (access to AST),
|
51
|
+
though Steele does say Fortress has some support.}
|
52
|
+
|
53
|
+
model.refresh
|
54
|
+
|
55
|
+
* Generate a random sentence:
|
56
|
+
|
57
|
+
model.random_sentence
|
58
|
+
# => "OTOOH if you use slicehost even offer to bash Apple makes it will
|
59
|
+
exit and its 38 month ago based configuration of little networks created."
|
60
|
+
|
61
|
+
* Dump a model to a file, to be marshaled later:
|
62
|
+
|
63
|
+
model.save('path/for/model')
|
64
|
+
|
65
|
+
* Load a model from a file:
|
66
|
+
|
67
|
+
Model.open('path/for/model')
|
68
|
+
|
25
69
|
== LICENSE:
|
26
70
|
|
27
71
|
The MIT License
|
data/Rakefile
CHANGED
data/TODO.txt
CHANGED
data/lib/raingrams/model.rb
CHANGED
@@ -4,6 +4,8 @@ require 'raingrams/probability_table'
|
|
4
4
|
require 'raingrams/tokens'
|
5
5
|
|
6
6
|
require 'set'
|
7
|
+
require 'hpricot'
|
8
|
+
require 'open-uri'
|
7
9
|
|
8
10
|
module Raingrams
|
9
11
|
class Model
|
@@ -56,6 +58,7 @@ module Raingrams
|
|
56
58
|
@ignore_punctuation = true
|
57
59
|
@ignore_urls = true
|
58
60
|
@ignore_phone_numbers = false
|
61
|
+
@ignore_references = false
|
59
62
|
|
60
63
|
if options.has_key?(:ignore_case)
|
61
64
|
@ignore_case = options[:ignore_case]
|
@@ -73,14 +76,19 @@ module Raingrams
|
|
73
76
|
@ignore_phone_numbers = options[:ignore_phone_numbers]
|
74
77
|
end
|
75
78
|
|
79
|
+
if options.has_key?(:ignore_references)
|
80
|
+
@ignore_references = options[:ignore_references]
|
81
|
+
end
|
82
|
+
|
76
83
|
@prefixes = {}
|
77
84
|
|
78
85
|
block.call(self) if block
|
79
86
|
end
|
80
87
|
|
81
88
|
#
|
82
|
-
# Creates a new
|
83
|
-
# _block_ is given, it will be passed the newly created model.
|
89
|
+
# Creates a new model object with the given _options_. If a
|
90
|
+
# _block_ is given, it will be passed the newly created model. After
|
91
|
+
# the block as been called the model will be built.
|
84
92
|
#
|
85
93
|
def self.build(options={},&block)
|
86
94
|
self.new(options) do |model|
|
@@ -88,16 +96,74 @@ module Raingrams
|
|
88
96
|
end
|
89
97
|
end
|
90
98
|
|
99
|
+
#
|
100
|
+
# Creates a new model object with the given _options_ and trains it
|
101
|
+
# with the specified _paragraph_.
|
102
|
+
#
|
103
|
+
def self.train_with_paragraph(paragraph,options={})
|
104
|
+
self.build(options) do |model|
|
105
|
+
model.train_with_paragraph(paragraph)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Creates a new model object with the given _options_ and trains it
|
111
|
+
# with the specified _text_.
|
112
|
+
#
|
113
|
+
def self.train_with_text(text,options={})
|
114
|
+
self.build(options) do |model|
|
115
|
+
model.train_with_text(text)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Creates a new model object with the given _options_ and trains it
|
121
|
+
# with the contents of the specified _path_.
|
122
|
+
#
|
123
|
+
def self.train_with_file(path,options={})
|
124
|
+
self.build(options) do |model|
|
125
|
+
model.train_with_file(path)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
# Creates a new model object with the given _options_ and trains it
|
131
|
+
# with the inner text of the paragraphs tags at the specified _url_.
|
132
|
+
#
|
133
|
+
def self.train_with_url(url,options={})
|
134
|
+
self.build(options) do |model|
|
135
|
+
model.train_with_url(url)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Marshals a model from the contents of the file at the specified
|
141
|
+
# _path_.
|
142
|
+
#
|
143
|
+
def self.open(path)
|
144
|
+
model = nil
|
145
|
+
|
146
|
+
File.open(path) do |file|
|
147
|
+
model = Marshal.load(file)
|
148
|
+
end
|
149
|
+
|
150
|
+
return model
|
151
|
+
end
|
152
|
+
|
91
153
|
#
|
92
154
|
# Parses the specified _sentence_ and returns an Array of tokens.
|
93
155
|
#
|
94
156
|
def parse_sentence(sentence)
|
95
|
-
|
96
|
-
|
157
|
+
sentence = sentence.to_s
|
158
|
+
|
159
|
+
if @ignore_punctuation
|
160
|
+
# eat tailing punctuation
|
161
|
+
sentence.gsub!(/[\.\?!]*$/,'')
|
162
|
+
end
|
97
163
|
|
98
164
|
if @ignore_urls
|
99
165
|
# remove URLs
|
100
|
-
sentence.gsub!(/\s*\w+:\/\/[\w
|
166
|
+
sentence.gsub!(/\s*\w+:\/\/[\w\/\+_\-,:%\d\.\-\?&=]*\s*/,' ')
|
101
167
|
end
|
102
168
|
|
103
169
|
if @ignore_phone_numbers
|
@@ -107,7 +173,7 @@ module Raingrams
|
|
107
173
|
|
108
174
|
if @ignore_references
|
109
175
|
# remove RFC style references
|
110
|
-
sentence.gsub!(/\s
|
176
|
+
sentence.gsub!(/\s*[\(\{\[]\d+[\)\}\]]\s*/,' ')
|
111
177
|
end
|
112
178
|
|
113
179
|
if @ignore_case
|
@@ -117,10 +183,10 @@ module Raingrams
|
|
117
183
|
|
118
184
|
if @ignore_punctuation
|
119
185
|
# split and ignore punctuation characters
|
120
|
-
return sentence.scan(/\w+[_\.:']
|
186
|
+
return sentence.scan(/\w+[\-_\.:']\w+|\w+/)
|
121
187
|
else
|
122
188
|
# split and accept punctuation characters
|
123
|
-
return sentence.scan(/[\w\-_
|
189
|
+
return sentence.scan(/[\w\-_,:;\.\?\!'"\\\/]+/)
|
124
190
|
end
|
125
191
|
end
|
126
192
|
|
@@ -128,7 +194,7 @@ module Raingrams
|
|
128
194
|
# Parses the specified _text_ and returns an Array of sentences.
|
129
195
|
#
|
130
196
|
def parse_text(text)
|
131
|
-
text.to_s.scan(/[^\s\.\?!][^\.\?!]
|
197
|
+
text.to_s.scan(/[^\s\.\?!][^\.\?!]*[\.\?\!]/)
|
132
198
|
end
|
133
199
|
|
134
200
|
#
|
@@ -138,8 +204,8 @@ module Raingrams
|
|
138
204
|
ngram_set = NgramSet.new
|
139
205
|
|
140
206
|
@prefixes.each do |prefix,table|
|
141
|
-
table.each_gram do |
|
142
|
-
ngram_set << (prefix +
|
207
|
+
table.each_gram do |postfix_gram|
|
208
|
+
ngram_set << (prefix + postfix_gram)
|
143
209
|
end
|
144
210
|
end
|
145
211
|
|
@@ -151,7 +217,11 @@ module Raingrams
|
|
151
217
|
# +false+ otherwise.
|
152
218
|
#
|
153
219
|
def has_ngram?(ngram)
|
154
|
-
@prefixes
|
220
|
+
if @prefixes.has_key?(ngram.prefix)
|
221
|
+
return @prefixes[ngram.prefix].has_gram?(ngram.last)
|
222
|
+
else
|
223
|
+
return false
|
224
|
+
end
|
155
225
|
end
|
156
226
|
|
157
227
|
#
|
@@ -160,8 +230,8 @@ module Raingrams
|
|
160
230
|
#
|
161
231
|
def each_ngram(&block)
|
162
232
|
@prefixes.each do |prefix,table|
|
163
|
-
table.each_gram do |
|
164
|
-
block.call(prefix +
|
233
|
+
table.each_gram do |postfix_gram|
|
234
|
+
block.call(prefix + postfix_gram) if block
|
165
235
|
end
|
166
236
|
end
|
167
237
|
|
@@ -178,7 +248,7 @@ module Raingrams
|
|
178
248
|
selected_ngrams << ngram if block.call(ngram)
|
179
249
|
end
|
180
250
|
|
181
|
-
return
|
251
|
+
return selected_ngrams
|
182
252
|
end
|
183
253
|
|
184
254
|
#
|
@@ -221,8 +291,8 @@ module Raingrams
|
|
221
291
|
|
222
292
|
@prefixes.each do |prefix,table|
|
223
293
|
if prefix.first == gram
|
224
|
-
table.each_gram do |
|
225
|
-
ngram_set << (prefix +
|
294
|
+
table.each_gram do |postfix_gram|
|
295
|
+
ngram_set << (prefix + postfix_gram)
|
226
296
|
end
|
227
297
|
end
|
228
298
|
end
|
@@ -246,20 +316,20 @@ module Raingrams
|
|
246
316
|
end
|
247
317
|
|
248
318
|
#
|
249
|
-
# Returns the ngrams including the specified _grams_.
|
319
|
+
# Returns the ngrams including any of the specified _grams_.
|
250
320
|
#
|
251
|
-
def
|
321
|
+
def ngrams_including_any(*grams)
|
252
322
|
ngram_set = NgramSet.new
|
253
323
|
|
254
324
|
@prefixes.each do |prefix,table|
|
255
|
-
if prefix.
|
256
|
-
table.each_gram do |
|
257
|
-
ngram_set << (prefix +
|
325
|
+
if prefix.includes_any?(*grams)
|
326
|
+
table.each_gram do |postfix_gram|
|
327
|
+
ngram_set << (prefix + postfix_gram)
|
258
328
|
end
|
259
329
|
else
|
260
|
-
table.each_gram do |
|
261
|
-
if grams.include?(
|
262
|
-
ngram_set << (prefix +
|
330
|
+
table.each_gram do |postfix_gram|
|
331
|
+
if grams.include?(postfix_gram)
|
332
|
+
ngram_set << (prefix + postfix_gram)
|
263
333
|
end
|
264
334
|
end
|
265
335
|
end
|
@@ -268,6 +338,19 @@ module Raingrams
|
|
268
338
|
return ngram_set
|
269
339
|
end
|
270
340
|
|
341
|
+
#
|
342
|
+
# Returns the ngrams including all of the specified _grams_.
|
343
|
+
#
|
344
|
+
def ngrams_including_all(*grams)
|
345
|
+
ngram_set = NgramSet.new
|
346
|
+
|
347
|
+
each_ngram do |ngram|
|
348
|
+
ngram_set << ngram if ngram.includes_all?(*grams)
|
349
|
+
end
|
350
|
+
|
351
|
+
return ngram_set
|
352
|
+
end
|
353
|
+
|
271
354
|
#
|
272
355
|
# Returns the ngrams extracted from the specified _words_.
|
273
356
|
#
|
@@ -300,6 +383,8 @@ module Raingrams
|
|
300
383
|
end
|
301
384
|
end
|
302
385
|
|
386
|
+
alias ngrams_from_paragraph ngrams_from_text
|
387
|
+
|
303
388
|
#
|
304
389
|
# Returns all ngrams which preceed the specified _gram_.
|
305
390
|
#
|
@@ -334,7 +419,19 @@ module Raingrams
|
|
334
419
|
# Returns all grams within the model.
|
335
420
|
#
|
336
421
|
def grams
|
337
|
-
@prefixes.keys.
|
422
|
+
@prefixes.keys.inject(Set.new) do |all_grams,gram|
|
423
|
+
all_grams + gram
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
#
|
428
|
+
# Returns +true+ if the model contain the specified _gram_, returns
|
429
|
+
# +false+ otherwise.
|
430
|
+
#
|
431
|
+
def has_gram?(gram)
|
432
|
+
@prefixes.keys.any? do |prefix|
|
433
|
+
prefix.include?(gram)
|
434
|
+
end
|
338
435
|
end
|
339
436
|
|
340
437
|
#
|
@@ -376,7 +473,7 @@ module Raingrams
|
|
376
473
|
# within the model.
|
377
474
|
#
|
378
475
|
def common_ngrams_from_fragment(fragment)
|
379
|
-
ngrams_from_fragment(
|
476
|
+
ngrams_from_fragment(fragment).select { |ngram| has_ngram?(ngram) }
|
380
477
|
end
|
381
478
|
|
382
479
|
#
|
@@ -423,6 +520,13 @@ module Raingrams
|
|
423
520
|
train_with_ngrams(ngrams_from_sentence(sentence))
|
424
521
|
end
|
425
522
|
|
523
|
+
#
|
524
|
+
# Train the model with the specified _paragraphs_.
|
525
|
+
#
|
526
|
+
def train_with_paragraph(paragraph)
|
527
|
+
train_with_ngrams(ngrams_from_paragraph(paragraphs))
|
528
|
+
end
|
529
|
+
|
426
530
|
#
|
427
531
|
# Train the model with the specified _text_.
|
428
532
|
#
|
@@ -430,6 +534,39 @@ module Raingrams
|
|
430
534
|
train_with_ngrams(ngrams_from_text(text))
|
431
535
|
end
|
432
536
|
|
537
|
+
#
|
538
|
+
# Train the model with the contents of the specified _path_.
|
539
|
+
#
|
540
|
+
def train_with_file(path)
|
541
|
+
train_with_text(File.read(path))
|
542
|
+
end
|
543
|
+
|
544
|
+
#
|
545
|
+
# Train the model with the inner text of the paragraph tags at the
|
546
|
+
# specified _url_.
|
547
|
+
#
|
548
|
+
def train_with_url(url)
|
549
|
+
doc = Hpricot(open(url))
|
550
|
+
|
551
|
+
return doc.search('p').map do |p|
|
552
|
+
train_with_paragraph(p.inner_text)
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
#
|
557
|
+
# Returns the observed frequency of the specified _ngram_ within
|
558
|
+
# the training text.
|
559
|
+
#
|
560
|
+
def frequency_of_ngram(ngram)
|
561
|
+
prefix = ngram.prefix
|
562
|
+
|
563
|
+
if @prefixes.has_key?(prefix)
|
564
|
+
return @prefixes[prefix].frequency_of(ngram.last)
|
565
|
+
else
|
566
|
+
return 0
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
433
570
|
#
|
434
571
|
# Returns the probability of the specified _ngram_ occurring within
|
435
572
|
# arbitrary text.
|
@@ -444,6 +581,20 @@ module Raingrams
|
|
444
581
|
end
|
445
582
|
end
|
446
583
|
|
584
|
+
#
|
585
|
+
# Returns the observed frequency of the specified _ngrams_ occurring
|
586
|
+
# within the training text.
|
587
|
+
#
|
588
|
+
def frequencies_for(ngrams)
|
589
|
+
table = {}
|
590
|
+
|
591
|
+
ngrams.each do |ngram|
|
592
|
+
table[ngram] = frequency_of_ngram(ngram)
|
593
|
+
end
|
594
|
+
|
595
|
+
return table
|
596
|
+
end
|
597
|
+
|
447
598
|
#
|
448
599
|
# Returns the probability of the specified _ngrams_ occurring within
|
449
600
|
# arbitrary text.
|
@@ -458,6 +609,16 @@ module Raingrams
|
|
458
609
|
return table
|
459
610
|
end
|
460
611
|
|
612
|
+
#
|
613
|
+
# Returns the total observed frequency of the specified _ngrams_
|
614
|
+
# occurring within the training text.
|
615
|
+
#
|
616
|
+
def frequency_of_ngrams(ngrams)
|
617
|
+
frequencies_for(ngrams).values.inject do |total,freq|
|
618
|
+
total + freq
|
619
|
+
end
|
620
|
+
end
|
621
|
+
|
461
622
|
#
|
462
623
|
# Returns the joint probability of the specified _ngrams_ occurring
|
463
624
|
# within arbitrary text.
|
@@ -468,14 +629,6 @@ module Raingrams
|
|
468
629
|
end
|
469
630
|
end
|
470
631
|
|
471
|
-
#
|
472
|
-
# Returns the probably of the specified _gram_ occurring within
|
473
|
-
# arbitrary text.
|
474
|
-
#
|
475
|
-
def probability_of_gram(gram)
|
476
|
-
probability_of_ngrams(ngrams_starting_with(gram))
|
477
|
-
end
|
478
|
-
|
479
632
|
#
|
480
633
|
# Returns the probability of the specified _fragment_ occuring within
|
481
634
|
# arbitrary text.
|
@@ -582,9 +735,6 @@ module Raingrams
|
|
582
735
|
grams = []
|
583
736
|
last_ngram = @starting_ngram
|
584
737
|
|
585
|
-
# prime the grams
|
586
|
-
grams += @starting_ngram
|
587
|
-
|
588
738
|
loop do
|
589
739
|
next_ngrams = ngrams_prefixed_by(last_ngram.postfix).to_a
|
590
740
|
last_ngram = next_ngrams[rand(next_ngrams.length)]
|
@@ -592,8 +742,11 @@ module Raingrams
|
|
592
742
|
if last_ngram.nil?
|
593
743
|
return []
|
594
744
|
else
|
595
|
-
|
596
|
-
|
745
|
+
last_gram = last_ngram.last
|
746
|
+
|
747
|
+
break if last_gram == Tokens.stop
|
748
|
+
|
749
|
+
grams << last_gram
|
597
750
|
end
|
598
751
|
end
|
599
752
|
|
@@ -690,6 +843,17 @@ module Raingrams
|
|
690
843
|
return self
|
691
844
|
end
|
692
845
|
|
846
|
+
#
|
847
|
+
# Saves the model to the file at the specified _path_.
|
848
|
+
#
|
849
|
+
def save(path)
|
850
|
+
File.open(path,'w') do |file|
|
851
|
+
Marshal.dump(self,file)
|
852
|
+
end
|
853
|
+
|
854
|
+
return self
|
855
|
+
end
|
856
|
+
|
693
857
|
protected
|
694
858
|
|
695
859
|
#
|
data/lib/raingrams/ngram.rb
CHANGED
@@ -70,8 +70,12 @@ module Raingrams
|
|
70
70
|
super(obj.to_gram)
|
71
71
|
end
|
72
72
|
|
73
|
-
def
|
74
|
-
|
73
|
+
def includes_any?(*grams)
|
74
|
+
grams.any? { |gram| include?(gram) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def includes_all?(*grams)
|
78
|
+
grams.all? { |gram| include?(gram) }
|
75
79
|
end
|
76
80
|
|
77
81
|
def flatten
|
data/lib/raingrams/ngram_set.rb
CHANGED
@@ -35,8 +35,12 @@ module Raingrams
|
|
35
35
|
select { |ngram| ngram.include?(gram) }
|
36
36
|
end
|
37
37
|
|
38
|
-
def
|
39
|
-
select { |ngram| ngram.
|
38
|
+
def including_any(*grams)
|
39
|
+
select { |ngram| ngram.includes_any?(*grams) }
|
40
|
+
end
|
41
|
+
|
42
|
+
def including_all(*grams)
|
43
|
+
select { |ngram| ngram.includes_all?(*grams) }
|
40
44
|
end
|
41
45
|
|
42
46
|
end
|
data/lib/raingrams/version.rb
CHANGED