raingrams 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +28 -0
- data/Manifest.txt +9 -0
- data/README.txt +46 -2
- data/Rakefile +1 -0
- data/TODO.txt +0 -1
- data/lib/raingrams/model.rb +204 -40
- data/lib/raingrams/ngram.rb +6 -2
- data/lib/raingrams/ngram_set.rb +6 -2
- data/lib/raingrams/version.rb +1 -1
- data/spec/bigram_model_spec.rb +111 -0
- data/spec/helpers/training.rb +8 -0
- data/spec/helpers.rb +1 -0
- data/spec/model_examples.rb +83 -0
- data/spec/model_spec.rb +118 -0
- data/spec/ngram_set_spec.rb +11 -2
- data/spec/ngram_spec.rb +1 -1
- data/spec/pentagram_model_spec.rb +101 -0
- data/spec/quadgram_model_spec.rb +106 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/training/snowcrash.txt +88 -0
- data/spec/trigram_model_spec.rb +109 -0
- metadata +24 -4
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'raingrams/bigram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe BigramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = BigramModel.train_with_text(Training.text_for(:snowcrash))
|
9
|
+
end
|
10
|
+
|
11
|
+
it_should_behave_like "Model"
|
12
|
+
|
13
|
+
it "should return ngrams from specified words" do
|
14
|
+
words = %w{Why is the Deliverator so equipped}
|
15
|
+
ngrams = [
|
16
|
+
Ngram[:Why, :is],
|
17
|
+
Ngram[:is, :the],
|
18
|
+
Ngram[:the, :Deliverator],
|
19
|
+
Ngram[:Deliverator, :so],
|
20
|
+
Ngram[:so, :equipped]
|
21
|
+
]
|
22
|
+
|
23
|
+
@model.ngrams_from_words(words).should == ngrams
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return common ngrams from words" do
|
27
|
+
words = %w{The Deliverator is a future Archetype}
|
28
|
+
ngrams = [
|
29
|
+
Ngram[:The, :Deliverator],
|
30
|
+
Ngram[:Deliverator, :is],
|
31
|
+
Ngram[:is, :a]
|
32
|
+
]
|
33
|
+
|
34
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should return common ngrams from a specified fragment of text" do
|
38
|
+
fragment = %{The Deliverator is a future Archetype}
|
39
|
+
ngrams = [
|
40
|
+
Ngram[:The, :Deliverator],
|
41
|
+
Ngram[:Deliverator, :is],
|
42
|
+
Ngram[:is, :a]
|
43
|
+
]
|
44
|
+
|
45
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should return common ngrams from a specified sentence" do
|
49
|
+
sentence = %{The Deliverator is a future Archetype.}
|
50
|
+
ngrams = [
|
51
|
+
Ngram[Tokens.start, Tokens.start],
|
52
|
+
Ngram[Tokens.start, :The],
|
53
|
+
Ngram[:The, :Deliverator],
|
54
|
+
Ngram[:Deliverator, :is],
|
55
|
+
Ngram[:is, :a],
|
56
|
+
Ngram[Tokens.stop, Tokens.stop]
|
57
|
+
]
|
58
|
+
|
59
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should have a frequency for a specified ngram" do
|
63
|
+
ngram = Ngram[:teensy, :darts]
|
64
|
+
|
65
|
+
@model.frequency_of_ngram(ngram).should == 1
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should have a probability for a specified ngram" do
|
69
|
+
ngram = Ngram[:teensy, :darts]
|
70
|
+
|
71
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should have a frequency for specified ngrams" do
|
75
|
+
ngrams = NgramSet[
|
76
|
+
Ngram[:but, :excess],
|
77
|
+
Ngram[:freshly, :napalmed],
|
78
|
+
Ngram[:sintered, :armorgel]
|
79
|
+
]
|
80
|
+
|
81
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should have a probability of specified ngrams" do
|
85
|
+
ngrams = NgramSet[
|
86
|
+
Ngram[:The, :Deliverator],
|
87
|
+
Ngram[:Deliverator, :belongs],
|
88
|
+
Ngram[:belongs, :to]
|
89
|
+
]
|
90
|
+
|
91
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '0.0112293144208038'
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should have a probability for a specified fragment of text" do
|
95
|
+
fragment = %{The Deliverator belongs to}
|
96
|
+
|
97
|
+
@model.fragment_probability(fragment).to_s.should == '0.0112293144208038'
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should have a probability for a specified sentence" do
|
101
|
+
sentence = %{The Deliverator used to make software.}
|
102
|
+
|
103
|
+
@model.sentence_probability(sentence).to_s.should == '4.10042780102381e-07'
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should have a probability for specified text" do
|
107
|
+
text = %{The Deliverator used to make software. Still does, sometimes.}
|
108
|
+
|
109
|
+
@model.text_probability(text).to_s.should == '2.40635434332383e-10'
|
110
|
+
end
|
111
|
+
end
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'helpers/training'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
shared_examples_for "Model" do
|
4
|
+
it "should have ngrams" do
|
5
|
+
@model.ngrams.each do |ngram|
|
6
|
+
@model.has_ngram?(ngram).should == true
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should be able to iterate through all ngrams" do
|
11
|
+
@model.each_ngram do |ngram|
|
12
|
+
@model.has_ngram?(ngram).should == true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to select ngrams with certain properties" do
|
17
|
+
ngrams = @model.ngrams_with do |ngram|
|
18
|
+
ngram.include?(:the)
|
19
|
+
end
|
20
|
+
|
21
|
+
ngrams.each do |ngram|
|
22
|
+
ngram.include?(:the).should == true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should be able to select ngrams starting with a specified gram" do
|
27
|
+
@model.ngrams_starting_with(:filtering).each do |ngram|
|
28
|
+
ngram.starts_with?(:filtering).should == true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should be able to select ngrams ending with a specified gram" do
|
33
|
+
@model.ngrams_ending_with(:sword).each do |ngram|
|
34
|
+
ngram.ends_with?(:sword).should == true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should be able to select ngrams including any of the specified grams" do
|
39
|
+
@model.ngrams_including_any(:The, :Deliverator).each do |ngram|
|
40
|
+
ngram.includes_any?(:The, :Deliverator).should == true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should be able to select ngrams including all of the specified grams" do
|
45
|
+
@model.ngrams_including_all(:activated, :charcoal).each do |ngram|
|
46
|
+
ngram.includes_all?(:activated, :charcoal).should == true
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should have grams" do
|
51
|
+
@model.grams.each do |gram|
|
52
|
+
@model.has_gram?(gram).should == true
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should provide a random ngram" do
|
57
|
+
@model.has_ngram?(@model.random_ngram).should == true
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should generate a random sentence" do
|
61
|
+
sentence = @model.random_sentence
|
62
|
+
|
63
|
+
@model.ngrams_from_sentence(sentence).each do |ngram|
|
64
|
+
@model.has_ngram?(ngram).should == true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should generate a random paragraph" do
|
69
|
+
paragraph = @model.random_paragraph
|
70
|
+
|
71
|
+
@model.ngrams_from_paragraph(paragraph).each do |ngram|
|
72
|
+
@model.has_ngram?(ngram).should == true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should generate a random text" do
|
77
|
+
text = @model.random_text
|
78
|
+
|
79
|
+
@model.ngrams_from_text(text).each do |ngram|
|
80
|
+
@model.has_ngram?(ngram).should == true
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'raingrams/model'
|
4
|
+
|
5
|
+
describe Model do
|
6
|
+
before(:all) do
|
7
|
+
@model = Model.new(:ngram_size => 2)
|
8
|
+
|
9
|
+
@phone_number_model = Model.new(
|
10
|
+
:ngram_size => 2,
|
11
|
+
:ignore_phone_numbers => true
|
12
|
+
)
|
13
|
+
|
14
|
+
@references_model = Model.new(
|
15
|
+
:ngram_size => 2,
|
16
|
+
:ignore_references => true
|
17
|
+
)
|
18
|
+
|
19
|
+
@case_model = Model.new(
|
20
|
+
:ngram_size => 2,
|
21
|
+
:ignore_case => true
|
22
|
+
)
|
23
|
+
|
24
|
+
@punctuation_model = Model.new(
|
25
|
+
:ngram_size => 2,
|
26
|
+
:ignore_punctuation => false
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse text into sentences" do
|
31
|
+
text = %{The Deliverator belongs to an elite order, a hallowed sub-category. He's got esprit up to here.}
|
32
|
+
sentences = [
|
33
|
+
"The Deliverator belongs to an elite order, a hallowed sub-category.",
|
34
|
+
"He's got esprit up to here."
|
35
|
+
]
|
36
|
+
|
37
|
+
@model.parse_text(text).should == sentences
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should parse words from a sentence" do
|
41
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
42
|
+
words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
|
43
|
+
|
44
|
+
@model.parse_sentence(sentence).should == words
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should ignore URLs by default while parsing a sentence" do
|
48
|
+
sentence = %{Click on the following link: http://www.example.com/}
|
49
|
+
words = %w{Click on the following link}
|
50
|
+
|
51
|
+
@model.parse_sentence(sentence).should == words
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should ignore short URIs by default while parsing a sentence" do
|
55
|
+
sentence = %{Click on the following link: jabber://}
|
56
|
+
words = %w{Click on the following link}
|
57
|
+
|
58
|
+
@model.parse_sentence(sentence).should == words
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should ignore complex HTTP URLs by default while parsing a sentence" do
|
62
|
+
sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
|
63
|
+
words = %w{Click on the following link}
|
64
|
+
|
65
|
+
@model.parse_sentence(sentence).should == words
|
66
|
+
end
|
67
|
+
|
68
|
+
it "may ignore phone numbers while parsing a sentence" do
|
69
|
+
sentence = %{Call me before 12, 1-888-444-2222.}
|
70
|
+
words = %w{Call me before 12}
|
71
|
+
|
72
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
73
|
+
end
|
74
|
+
|
75
|
+
it "may ignore long-distance phone numbers while parsing a sentence" do
|
76
|
+
sentence = %{Call me before 12, 1-444-2222.}
|
77
|
+
words = %w{Call me before 12}
|
78
|
+
|
79
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
80
|
+
end
|
81
|
+
|
82
|
+
it "may ignore short phone numbers while parsing a sentence" do
|
83
|
+
sentence = %{Call me before 12, 444-2222.}
|
84
|
+
words = %w{Call me before 12}
|
85
|
+
|
86
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
87
|
+
end
|
88
|
+
|
89
|
+
it "may ignore RFC style references while parsing a sentence" do
|
90
|
+
sentence = %{As one can see, it has failed [1].}
|
91
|
+
words = %w{As one can see it has failed}
|
92
|
+
|
93
|
+
@references_model.parse_sentence(sentence).should == words
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should ignore punctuation by default while parsing a sentence" do
|
97
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
98
|
+
words = %w{
|
99
|
+
Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
|
100
|
+
}
|
101
|
+
|
102
|
+
@model.parse_sentence(sentence).should == words
|
103
|
+
end
|
104
|
+
|
105
|
+
it "may ignore case while parsing a sentence" do
|
106
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
107
|
+
words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
|
108
|
+
|
109
|
+
@case_model.parse_sentence(sentence).should == words
|
110
|
+
end
|
111
|
+
|
112
|
+
it "may preserve punctuation while parsing a sentence" do
|
113
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
114
|
+
words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
115
|
+
|
116
|
+
@punctuation_model.parse_sentence(sentence).should == words
|
117
|
+
end
|
118
|
+
end
|
data/spec/ngram_set_spec.rb
CHANGED
@@ -46,9 +46,18 @@ describe NgramSet do
|
|
46
46
|
]
|
47
47
|
end
|
48
48
|
|
49
|
-
it "should select ngrams which
|
50
|
-
@ngrams.
|
49
|
+
it "should select ngrams which include any of the specified grams" do
|
50
|
+
@ngrams.including_any(:the, :dog).should == NgramSet[
|
51
51
|
Ngram[:the, :dog],
|
52
|
+
Ngram[:dog, :jumped],
|
53
|
+
Ngram[:through, :the],
|
54
|
+
Ngram[:the, :hoop]
|
55
|
+
]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should select ngrams which include all of the specified grams" do
|
59
|
+
@ngrams.including_all(:the, :dog).should == NgramSet[
|
60
|
+
Ngram[:the, :dog]
|
52
61
|
]
|
53
62
|
end
|
54
63
|
end
|
data/spec/ngram_spec.rb
CHANGED
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'raingrams/pentagram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe PentagramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = PentagramModel.build do |model|
|
9
|
+
model.train_with_text(Training.text_for(:snowcrash))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it_should_behave_like "Model"
|
14
|
+
|
15
|
+
it "should return ngrams from specified words" do
|
16
|
+
words = %w{Why is the Deliverator so equipped}
|
17
|
+
ngrams = [
|
18
|
+
Ngram[:Why, :is, :the, :Deliverator, :so],
|
19
|
+
Ngram[:is, :the, :Deliverator, :so, :equipped]
|
20
|
+
]
|
21
|
+
|
22
|
+
@model.ngrams_from_words(words).should == ngrams
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return common ngrams from words" do
|
26
|
+
words = %w{The Deliverator is a future Archetype}
|
27
|
+
ngrams = []
|
28
|
+
|
29
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should return common ngrams from a specified fragment of text" do
|
33
|
+
fragment = %{The Deliverator is a future Archetype}
|
34
|
+
ngrams = []
|
35
|
+
|
36
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return common ngrams from a specified sentence" do
|
40
|
+
sentence = %{The Deliverator is a future Archetype.}
|
41
|
+
ngrams = [
|
42
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start, Tokens.start],
|
43
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start, :The],
|
44
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, :The, :Deliverator],
|
45
|
+
Ngram[Tokens.start, Tokens.start, :The, :Deliverator, :is],
|
46
|
+
Ngram[Tokens.start, :The, :Deliverator, :is, :a],
|
47
|
+
Ngram[Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop]
|
48
|
+
]
|
49
|
+
|
50
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should have a frequency for a specified ngram" do
|
54
|
+
ngram = Ngram[:it, :fires, :teensy, :darts, :that]
|
55
|
+
|
56
|
+
@model.frequency_of_ngram(ngram).should == 1
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should have a probability for a specified ngram" do
|
60
|
+
ngram = Ngram[:it, :fires, :teensy, :darts, :that]
|
61
|
+
|
62
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should have a frequency for specified ngrams" do
|
66
|
+
ngrams = NgramSet[
|
67
|
+
Ngram[:but, :excess, :perspiration, :wafts, :through],
|
68
|
+
Ngram[:through, :a, :freshly, :napalmed, :forest],
|
69
|
+
Ngram[:the, :suit, :has, :sintered, :armorgel]
|
70
|
+
]
|
71
|
+
|
72
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should have a probability of specified ngrams" do
|
76
|
+
ngrams = NgramSet[
|
77
|
+
Ngram[:The, :Deliverator, :belongs, :to, :an],
|
78
|
+
Ngram[:Deliverator, :belongs, :to, :an, :elite]
|
79
|
+
]
|
80
|
+
|
81
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '1.0'
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should have a probability for a specified fragment of text" do
|
85
|
+
fragment = %{The Deliverator belongs to an}
|
86
|
+
|
87
|
+
@model.fragment_probability(fragment).to_s.should == '1.0'
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should have a probability for a specified sentence" do
|
91
|
+
sentence = %{So now he has this other job.}
|
92
|
+
|
93
|
+
@model.sentence_probability(sentence).to_s.should == '0.00117370892018779'
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should have a probability for specified text" do
|
97
|
+
text = %{So now he has this other job. No brightness or creativity involved-but no cooperation either.}
|
98
|
+
|
99
|
+
@model.text_probability(text).to_s.should == '2.75518525865679e-06'
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'raingrams/quadgram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe QuadgramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = QuadgramModel.build do |model|
|
9
|
+
model.train_with_text(Training.text_for(:snowcrash))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it_should_behave_like "Model"
|
14
|
+
|
15
|
+
it "should return ngrams from specified words" do
|
16
|
+
words = %w{Why is the Deliverator so equipped}
|
17
|
+
ngrams = [
|
18
|
+
Ngram[:Why, :is, :the, :Deliverator],
|
19
|
+
Ngram[:is, :the, :Deliverator, :so],
|
20
|
+
Ngram[:the, :Deliverator, :so, :equipped]
|
21
|
+
]
|
22
|
+
|
23
|
+
@model.ngrams_from_words(words).should == ngrams
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return common ngrams from words" do
|
27
|
+
words = %w{The Deliverator is a future Archetype}
|
28
|
+
ngrams = [
|
29
|
+
Ngram[:The, :Deliverator, :is, :a]
|
30
|
+
]
|
31
|
+
|
32
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should return common ngrams from a specified fragment of text" do
|
36
|
+
fragment = %{The Deliverator is a future Archetype}
|
37
|
+
ngrams = [
|
38
|
+
Ngram[:The, :Deliverator, :is, :a]
|
39
|
+
]
|
40
|
+
|
41
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return common ngrams from a specified sentence" do
|
45
|
+
sentence = %{The Deliverator is a future Archetype.}
|
46
|
+
ngrams = [
|
47
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start],
|
48
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, :The],
|
49
|
+
Ngram[Tokens.start, Tokens.start, :The, :Deliverator],
|
50
|
+
Ngram[Tokens.start, :The, :Deliverator, :is],
|
51
|
+
Ngram[:The, :Deliverator, :is, :a],
|
52
|
+
Ngram[Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop]
|
53
|
+
]
|
54
|
+
|
55
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should have a frequency for a specified ngram" do
|
59
|
+
ngram = Ngram[:it, :fires, :teensy, :darts]
|
60
|
+
|
61
|
+
@model.frequency_of_ngram(ngram).should == 1
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should have a probability for a specified ngram" do
|
65
|
+
ngram = Ngram[:it, :fires, :teensy, :darts]
|
66
|
+
|
67
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should have a frequency for specified ngrams" do
|
71
|
+
ngrams = NgramSet[
|
72
|
+
Ngram[:but, :excess, :perspiration, :wafts],
|
73
|
+
Ngram[:a, :freshly, :napalmed, :forest],
|
74
|
+
Ngram[:suit, :has, :sintered, :armorgel]
|
75
|
+
]
|
76
|
+
|
77
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should have a probability of specified ngrams" do
|
81
|
+
ngrams = NgramSet[
|
82
|
+
Ngram[:The, :Deliverator, :belongs, :to],
|
83
|
+
Ngram[:Deliverator, :belongs, :to, :an]
|
84
|
+
]
|
85
|
+
|
86
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '1.0'
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should have a probability for a specified fragment of text" do
|
90
|
+
fragment = %{The Deliverator belongs to}
|
91
|
+
|
92
|
+
@model.fragment_probability(fragment).to_s.should == '1.0'
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should have a probability for a specified sentence" do
|
96
|
+
sentence = %{So now he has this other job.}
|
97
|
+
|
98
|
+
@model.sentence_probability(sentence).to_s.should == '0.00117370892018779'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should have a probability for specified text" do
|
102
|
+
text = %{So now he has this other job. No brightness or creativity involved-but no cooperation either.}
|
103
|
+
|
104
|
+
@model.text_probability(text).to_s.should == '2.75518525865679e-06'
|
105
|
+
end
|
106
|
+
end
|