raingrams 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +28 -0
- data/Manifest.txt +9 -0
- data/README.txt +46 -2
- data/Rakefile +1 -0
- data/TODO.txt +0 -1
- data/lib/raingrams/model.rb +204 -40
- data/lib/raingrams/ngram.rb +6 -2
- data/lib/raingrams/ngram_set.rb +6 -2
- data/lib/raingrams/version.rb +1 -1
- data/spec/bigram_model_spec.rb +111 -0
- data/spec/helpers/training.rb +8 -0
- data/spec/helpers.rb +1 -0
- data/spec/model_examples.rb +83 -0
- data/spec/model_spec.rb +118 -0
- data/spec/ngram_set_spec.rb +11 -2
- data/spec/ngram_spec.rb +1 -1
- data/spec/pentagram_model_spec.rb +101 -0
- data/spec/quadgram_model_spec.rb +106 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/training/snowcrash.txt +88 -0
- data/spec/trigram_model_spec.rb +109 -0
- metadata +24 -4
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'raingrams/bigram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe BigramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = BigramModel.train_with_text(Training.text_for(:snowcrash))
|
9
|
+
end
|
10
|
+
|
11
|
+
it_should_behave_like "Model"
|
12
|
+
|
13
|
+
it "should return ngrams from specified words" do
|
14
|
+
words = %w{Why is the Deliverator so equipped}
|
15
|
+
ngrams = [
|
16
|
+
Ngram[:Why, :is],
|
17
|
+
Ngram[:is, :the],
|
18
|
+
Ngram[:the, :Deliverator],
|
19
|
+
Ngram[:Deliverator, :so],
|
20
|
+
Ngram[:so, :equipped]
|
21
|
+
]
|
22
|
+
|
23
|
+
@model.ngrams_from_words(words).should == ngrams
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return common ngrams from words" do
|
27
|
+
words = %w{The Deliverator is a future Archetype}
|
28
|
+
ngrams = [
|
29
|
+
Ngram[:The, :Deliverator],
|
30
|
+
Ngram[:Deliverator, :is],
|
31
|
+
Ngram[:is, :a]
|
32
|
+
]
|
33
|
+
|
34
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should return common ngrams from a specified fragment of text" do
|
38
|
+
fragment = %{The Deliverator is a future Archetype}
|
39
|
+
ngrams = [
|
40
|
+
Ngram[:The, :Deliverator],
|
41
|
+
Ngram[:Deliverator, :is],
|
42
|
+
Ngram[:is, :a]
|
43
|
+
]
|
44
|
+
|
45
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should return common ngrams from a specified sentence" do
|
49
|
+
sentence = %{The Deliverator is a future Archetype.}
|
50
|
+
ngrams = [
|
51
|
+
Ngram[Tokens.start, Tokens.start],
|
52
|
+
Ngram[Tokens.start, :The],
|
53
|
+
Ngram[:The, :Deliverator],
|
54
|
+
Ngram[:Deliverator, :is],
|
55
|
+
Ngram[:is, :a],
|
56
|
+
Ngram[Tokens.stop, Tokens.stop]
|
57
|
+
]
|
58
|
+
|
59
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should have a frequency for a specified ngram" do
|
63
|
+
ngram = Ngram[:teensy, :darts]
|
64
|
+
|
65
|
+
@model.frequency_of_ngram(ngram).should == 1
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should have a probability for a specified ngram" do
|
69
|
+
ngram = Ngram[:teensy, :darts]
|
70
|
+
|
71
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should have a frequency for specified ngrams" do
|
75
|
+
ngrams = NgramSet[
|
76
|
+
Ngram[:but, :excess],
|
77
|
+
Ngram[:freshly, :napalmed],
|
78
|
+
Ngram[:sintered, :armorgel]
|
79
|
+
]
|
80
|
+
|
81
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should have a probability of specified ngrams" do
|
85
|
+
ngrams = NgramSet[
|
86
|
+
Ngram[:The, :Deliverator],
|
87
|
+
Ngram[:Deliverator, :belongs],
|
88
|
+
Ngram[:belongs, :to]
|
89
|
+
]
|
90
|
+
|
91
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '0.0112293144208038'
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should have a probability for a specified fragment of text" do
|
95
|
+
fragment = %{The Deliverator belongs to}
|
96
|
+
|
97
|
+
@model.fragment_probability(fragment).to_s.should == '0.0112293144208038'
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should have a probability for a specified sentence" do
|
101
|
+
sentence = %{The Deliverator used to make software.}
|
102
|
+
|
103
|
+
@model.sentence_probability(sentence).to_s.should == '4.10042780102381e-07'
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should have a probability for specified text" do
|
107
|
+
text = %{The Deliverator used to make software. Still does, sometimes.}
|
108
|
+
|
109
|
+
@model.text_probability(text).to_s.should == '2.40635434332383e-10'
|
110
|
+
end
|
111
|
+
end
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'helpers/training'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
shared_examples_for "Model" do
|
4
|
+
it "should have ngrams" do
|
5
|
+
@model.ngrams.each do |ngram|
|
6
|
+
@model.has_ngram?(ngram).should == true
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should be able to iterate through all ngrams" do
|
11
|
+
@model.each_ngram do |ngram|
|
12
|
+
@model.has_ngram?(ngram).should == true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to select ngrams with certain properties" do
|
17
|
+
ngrams = @model.ngrams_with do |ngram|
|
18
|
+
ngram.include?(:the)
|
19
|
+
end
|
20
|
+
|
21
|
+
ngrams.each do |ngram|
|
22
|
+
ngram.include?(:the).should == true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should be able to select ngrams starting with a specified gram" do
|
27
|
+
@model.ngrams_starting_with(:filtering).each do |ngram|
|
28
|
+
ngram.starts_with?(:filtering).should == true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should be able to select ngrams ending with a specified gram" do
|
33
|
+
@model.ngrams_ending_with(:sword).each do |ngram|
|
34
|
+
ngram.ends_with?(:sword).should == true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should be able to select ngrams including any of the specified grams" do
|
39
|
+
@model.ngrams_including_any(:The, :Deliverator).each do |ngram|
|
40
|
+
ngram.includes_any?(:The, :Deliverator).should == true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should be able to select ngrams including all of the specified grams" do
|
45
|
+
@model.ngrams_including_all(:activated, :charcoal).each do |ngram|
|
46
|
+
ngram.includes_all?(:activated, :charcoal).should == true
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should have grams" do
|
51
|
+
@model.grams.each do |gram|
|
52
|
+
@model.has_gram?(gram).should == true
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should provide a random ngram" do
|
57
|
+
@model.has_ngram?(@model.random_ngram).should == true
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should generate a random sentence" do
|
61
|
+
sentence = @model.random_sentence
|
62
|
+
|
63
|
+
@model.ngrams_from_sentence(sentence).each do |ngram|
|
64
|
+
@model.has_ngram?(ngram).should == true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should generate a random paragraph" do
|
69
|
+
paragraph = @model.random_paragraph
|
70
|
+
|
71
|
+
@model.ngrams_from_paragraph(paragraph).each do |ngram|
|
72
|
+
@model.has_ngram?(ngram).should == true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should generate a random text" do
|
77
|
+
text = @model.random_text
|
78
|
+
|
79
|
+
@model.ngrams_from_text(text).each do |ngram|
|
80
|
+
@model.has_ngram?(ngram).should == true
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'raingrams/model'
|
4
|
+
|
5
|
+
describe Model do
|
6
|
+
before(:all) do
|
7
|
+
@model = Model.new(:ngram_size => 2)
|
8
|
+
|
9
|
+
@phone_number_model = Model.new(
|
10
|
+
:ngram_size => 2,
|
11
|
+
:ignore_phone_numbers => true
|
12
|
+
)
|
13
|
+
|
14
|
+
@references_model = Model.new(
|
15
|
+
:ngram_size => 2,
|
16
|
+
:ignore_references => true
|
17
|
+
)
|
18
|
+
|
19
|
+
@case_model = Model.new(
|
20
|
+
:ngram_size => 2,
|
21
|
+
:ignore_case => true
|
22
|
+
)
|
23
|
+
|
24
|
+
@punctuation_model = Model.new(
|
25
|
+
:ngram_size => 2,
|
26
|
+
:ignore_punctuation => false
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse text into sentences" do
|
31
|
+
text = %{The Deliverator belongs to an elite order, a hallowed sub-category. He's got esprit up to here.}
|
32
|
+
sentences = [
|
33
|
+
"The Deliverator belongs to an elite order, a hallowed sub-category.",
|
34
|
+
"He's got esprit up to here."
|
35
|
+
]
|
36
|
+
|
37
|
+
@model.parse_text(text).should == sentences
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should parse words from a sentence" do
|
41
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
42
|
+
words = %w{The Deliverator is in touch with the road starts like a bad day stops on a peseta}
|
43
|
+
|
44
|
+
@model.parse_sentence(sentence).should == words
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should ignore URLs by default while parsing a sentence" do
|
48
|
+
sentence = %{Click on the following link: http://www.example.com/}
|
49
|
+
words = %w{Click on the following link}
|
50
|
+
|
51
|
+
@model.parse_sentence(sentence).should == words
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should ignore short URIs by default while parsing a sentence" do
|
55
|
+
sentence = %{Click on the following link: jabber://}
|
56
|
+
words = %w{Click on the following link}
|
57
|
+
|
58
|
+
@model.parse_sentence(sentence).should == words
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should ignore complex HTTP URLs by default while parsing a sentence" do
|
62
|
+
sentence = %{Click on the following link: http://www.google.com/search?hl=en&client=firefox-a&rls=org.mozilla:en-US:official&hs=jU&q=ruby+datamapper&start=20&sa=N}
|
63
|
+
words = %w{Click on the following link}
|
64
|
+
|
65
|
+
@model.parse_sentence(sentence).should == words
|
66
|
+
end
|
67
|
+
|
68
|
+
it "may ignore phone numbers while parsing a sentence" do
|
69
|
+
sentence = %{Call me before 12, 1-888-444-2222.}
|
70
|
+
words = %w{Call me before 12}
|
71
|
+
|
72
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
73
|
+
end
|
74
|
+
|
75
|
+
it "may ignore long-distance phone numbers while parsing a sentence" do
|
76
|
+
sentence = %{Call me before 12, 1-444-2222.}
|
77
|
+
words = %w{Call me before 12}
|
78
|
+
|
79
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
80
|
+
end
|
81
|
+
|
82
|
+
it "may ignore short phone numbers while parsing a sentence" do
|
83
|
+
sentence = %{Call me before 12, 444-2222.}
|
84
|
+
words = %w{Call me before 12}
|
85
|
+
|
86
|
+
@phone_number_model.parse_sentence(sentence).should == words
|
87
|
+
end
|
88
|
+
|
89
|
+
it "may ignore RFC style references while parsing a sentence" do
|
90
|
+
sentence = %{As one can see, it has failed [1].}
|
91
|
+
words = %w{As one can see it has failed}
|
92
|
+
|
93
|
+
@references_model.parse_sentence(sentence).should == words
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should ignore punctuation by default while parsing a sentence" do
|
97
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
98
|
+
words = %w{
|
99
|
+
Oh they used to argue over times many corporate driver-years lost to it homeowners red-faced and sweaty with their own lies stinking of Old Spice and job-related stress standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink I swear can't you guys tell time
|
100
|
+
}
|
101
|
+
|
102
|
+
@model.parse_sentence(sentence).should == words
|
103
|
+
end
|
104
|
+
|
105
|
+
it "may ignore case while parsing a sentence" do
|
106
|
+
sentence = %{The Deliverator is in touch with the road, starts like a bad day, stops on a peseta.}
|
107
|
+
words = %w{the deliverator is in touch with the road starts like a bad day stops on a peseta}
|
108
|
+
|
109
|
+
@case_model.parse_sentence(sentence).should == words
|
110
|
+
end
|
111
|
+
|
112
|
+
it "may preserve punctuation while parsing a sentence" do
|
113
|
+
sentence = %{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
114
|
+
words = %w{Oh, they used to argue over times, many corporate driver-years lost to it: homeowners, red-faced and sweaty with their own lies, stinking of Old Spice and job-related stress, standing in their glowing yellow doorways brandishing their Seikos and waving at the clock over the kitchen sink, I swear, can't you guys tell time?}
|
115
|
+
|
116
|
+
@punctuation_model.parse_sentence(sentence).should == words
|
117
|
+
end
|
118
|
+
end
|
data/spec/ngram_set_spec.rb
CHANGED
@@ -46,9 +46,18 @@ describe NgramSet do
|
|
46
46
|
]
|
47
47
|
end
|
48
48
|
|
49
|
-
it "should select ngrams which
|
50
|
-
@ngrams.
|
49
|
+
it "should select ngrams which include any of the specified grams" do
|
50
|
+
@ngrams.including_any(:the, :dog).should == NgramSet[
|
51
51
|
Ngram[:the, :dog],
|
52
|
+
Ngram[:dog, :jumped],
|
53
|
+
Ngram[:through, :the],
|
54
|
+
Ngram[:the, :hoop]
|
55
|
+
]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should select ngrams which include all of the specified grams" do
|
59
|
+
@ngrams.including_all(:the, :dog).should == NgramSet[
|
60
|
+
Ngram[:the, :dog]
|
52
61
|
]
|
53
62
|
end
|
54
63
|
end
|
data/spec/ngram_spec.rb
CHANGED
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'raingrams/pentagram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe PentagramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = PentagramModel.build do |model|
|
9
|
+
model.train_with_text(Training.text_for(:snowcrash))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it_should_behave_like "Model"
|
14
|
+
|
15
|
+
it "should return ngrams from specified words" do
|
16
|
+
words = %w{Why is the Deliverator so equipped}
|
17
|
+
ngrams = [
|
18
|
+
Ngram[:Why, :is, :the, :Deliverator, :so],
|
19
|
+
Ngram[:is, :the, :Deliverator, :so, :equipped]
|
20
|
+
]
|
21
|
+
|
22
|
+
@model.ngrams_from_words(words).should == ngrams
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return common ngrams from words" do
|
26
|
+
words = %w{The Deliverator is a future Archetype}
|
27
|
+
ngrams = []
|
28
|
+
|
29
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should return common ngrams from a specified fragment of text" do
|
33
|
+
fragment = %{The Deliverator is a future Archetype}
|
34
|
+
ngrams = []
|
35
|
+
|
36
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return common ngrams from a specified sentence" do
|
40
|
+
sentence = %{The Deliverator is a future Archetype.}
|
41
|
+
ngrams = [
|
42
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start, Tokens.start],
|
43
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start, :The],
|
44
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, :The, :Deliverator],
|
45
|
+
Ngram[Tokens.start, Tokens.start, :The, :Deliverator, :is],
|
46
|
+
Ngram[Tokens.start, :The, :Deliverator, :is, :a],
|
47
|
+
Ngram[Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop]
|
48
|
+
]
|
49
|
+
|
50
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should have a frequency for a specified ngram" do
|
54
|
+
ngram = Ngram[:it, :fires, :teensy, :darts, :that]
|
55
|
+
|
56
|
+
@model.frequency_of_ngram(ngram).should == 1
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should have a probability for a specified ngram" do
|
60
|
+
ngram = Ngram[:it, :fires, :teensy, :darts, :that]
|
61
|
+
|
62
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should have a frequency for specified ngrams" do
|
66
|
+
ngrams = NgramSet[
|
67
|
+
Ngram[:but, :excess, :perspiration, :wafts, :through],
|
68
|
+
Ngram[:through, :a, :freshly, :napalmed, :forest],
|
69
|
+
Ngram[:the, :suit, :has, :sintered, :armorgel]
|
70
|
+
]
|
71
|
+
|
72
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should have a probability of specified ngrams" do
|
76
|
+
ngrams = NgramSet[
|
77
|
+
Ngram[:The, :Deliverator, :belongs, :to, :an],
|
78
|
+
Ngram[:Deliverator, :belongs, :to, :an, :elite]
|
79
|
+
]
|
80
|
+
|
81
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '1.0'
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should have a probability for a specified fragment of text" do
|
85
|
+
fragment = %{The Deliverator belongs to an}
|
86
|
+
|
87
|
+
@model.fragment_probability(fragment).to_s.should == '1.0'
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should have a probability for a specified sentence" do
|
91
|
+
sentence = %{So now he has this other job.}
|
92
|
+
|
93
|
+
@model.sentence_probability(sentence).to_s.should == '0.00117370892018779'
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should have a probability for specified text" do
|
97
|
+
text = %{So now he has this other job. No brightness or creativity involved-but no cooperation either.}
|
98
|
+
|
99
|
+
@model.text_probability(text).to_s.should == '2.75518525865679e-06'
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'raingrams/quadgram_model'
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'model_examples'
|
5
|
+
|
6
|
+
describe QuadgramModel do
|
7
|
+
before(:all) do
|
8
|
+
@model = QuadgramModel.build do |model|
|
9
|
+
model.train_with_text(Training.text_for(:snowcrash))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
it_should_behave_like "Model"
|
14
|
+
|
15
|
+
it "should return ngrams from specified words" do
|
16
|
+
words = %w{Why is the Deliverator so equipped}
|
17
|
+
ngrams = [
|
18
|
+
Ngram[:Why, :is, :the, :Deliverator],
|
19
|
+
Ngram[:is, :the, :Deliverator, :so],
|
20
|
+
Ngram[:the, :Deliverator, :so, :equipped]
|
21
|
+
]
|
22
|
+
|
23
|
+
@model.ngrams_from_words(words).should == ngrams
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return common ngrams from words" do
|
27
|
+
words = %w{The Deliverator is a future Archetype}
|
28
|
+
ngrams = [
|
29
|
+
Ngram[:The, :Deliverator, :is, :a]
|
30
|
+
]
|
31
|
+
|
32
|
+
@model.common_ngrams_from_words(words).should == ngrams
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should return common ngrams from a specified fragment of text" do
|
36
|
+
fragment = %{The Deliverator is a future Archetype}
|
37
|
+
ngrams = [
|
38
|
+
Ngram[:The, :Deliverator, :is, :a]
|
39
|
+
]
|
40
|
+
|
41
|
+
@model.common_ngrams_from_fragment(fragment).should == ngrams
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return common ngrams from a specified sentence" do
|
45
|
+
sentence = %{The Deliverator is a future Archetype.}
|
46
|
+
ngrams = [
|
47
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, Tokens.start],
|
48
|
+
Ngram[Tokens.start, Tokens.start, Tokens.start, :The],
|
49
|
+
Ngram[Tokens.start, Tokens.start, :The, :Deliverator],
|
50
|
+
Ngram[Tokens.start, :The, :Deliverator, :is],
|
51
|
+
Ngram[:The, :Deliverator, :is, :a],
|
52
|
+
Ngram[Tokens.stop, Tokens.stop, Tokens.stop, Tokens.stop]
|
53
|
+
]
|
54
|
+
|
55
|
+
@model.common_ngrams_from_sentence(sentence).should == ngrams
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should have a frequency for a specified ngram" do
|
59
|
+
ngram = Ngram[:it, :fires, :teensy, :darts]
|
60
|
+
|
61
|
+
@model.frequency_of_ngram(ngram).should == 1
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should have a probability for a specified ngram" do
|
65
|
+
ngram = Ngram[:it, :fires, :teensy, :darts]
|
66
|
+
|
67
|
+
@model.probability_of_ngram(ngram).should == 1.0
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should have a frequency for specified ngrams" do
|
71
|
+
ngrams = NgramSet[
|
72
|
+
Ngram[:but, :excess, :perspiration, :wafts],
|
73
|
+
Ngram[:a, :freshly, :napalmed, :forest],
|
74
|
+
Ngram[:suit, :has, :sintered, :armorgel]
|
75
|
+
]
|
76
|
+
|
77
|
+
@model.frequency_of_ngrams(ngrams).should == 3
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should have a probability of specified ngrams" do
|
81
|
+
ngrams = NgramSet[
|
82
|
+
Ngram[:The, :Deliverator, :belongs, :to],
|
83
|
+
Ngram[:Deliverator, :belongs, :to, :an]
|
84
|
+
]
|
85
|
+
|
86
|
+
@model.probability_of_ngrams(ngrams).to_s.should == '1.0'
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should have a probability for a specified fragment of text" do
|
90
|
+
fragment = %{The Deliverator belongs to}
|
91
|
+
|
92
|
+
@model.fragment_probability(fragment).to_s.should == '1.0'
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should have a probability for a specified sentence" do
|
96
|
+
sentence = %{So now he has this other job.}
|
97
|
+
|
98
|
+
@model.sentence_probability(sentence).to_s.should == '0.00117370892018779'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should have a probability for specified text" do
|
102
|
+
text = %{So now he has this other job. No brightness or creativity involved-but no cooperation either.}
|
103
|
+
|
104
|
+
@model.text_probability(text).to_s.should == '2.75518525865679e-06'
|
105
|
+
end
|
106
|
+
end
|