twitter_ebooks 2.3.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +7 -0
- data/README.md +60 -30
- data/bin/ebooks +239 -117
- data/lib/twitter_ebooks.rb +2 -2
- data/lib/twitter_ebooks/archive.rb +12 -9
- data/lib/twitter_ebooks/bot.rb +343 -109
- data/lib/twitter_ebooks/model.rb +104 -22
- data/lib/twitter_ebooks/nlp.rb +46 -13
- data/lib/twitter_ebooks/suffix.rb +9 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- data/skeleton/Gemfile +1 -1
- data/skeleton/Procfile +1 -1
- data/skeleton/bots.rb +35 -22
- data/spec/bot_spec.rb +178 -0
- data/spec/model_spec.rb +18 -2
- data/twitter_ebooks.gemspec +7 -3
- metadata +72 -20
- data/lib/twitter_ebooks/markov.rb +0 -82
- data/skeleton/run.rb +0 -9
- data/test/corpus/0xabad1dea.tweets +0 -14696
- data/test/keywords.rb +0 -18
- data/test/tokenize.rb +0 -18
data/spec/bot_spec.rb
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'memory_profiler'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'timecop'
|
5
|
+
|
6
|
+
class TestBot < Ebooks::Bot
|
7
|
+
attr_accessor :twitter
|
8
|
+
|
9
|
+
def configure
|
10
|
+
end
|
11
|
+
|
12
|
+
def on_direct_message(dm)
|
13
|
+
reply dm, "echo: #{dm.text}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def on_mention(tweet)
|
17
|
+
reply tweet, "echo: #{meta(tweet).mentionless}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def on_timeline(tweet)
|
21
|
+
reply tweet, "fine tweet good sir"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
module Ebooks::Test
|
26
|
+
# Generates a random twitter id
|
27
|
+
def twitter_id
|
28
|
+
(rand*10**18).to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
# Creates a mock direct message
|
32
|
+
# @param username User sending the DM
|
33
|
+
# @param text DM content
|
34
|
+
def mock_dm(username, text)
|
35
|
+
Twitter::DirectMessage.new(id: twitter_id,
|
36
|
+
sender: { id: twitter_id, screen_name: username},
|
37
|
+
text: text)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Creates a mock tweet
|
41
|
+
# @param username User sending the tweet
|
42
|
+
# @param text Tweet content
|
43
|
+
def mock_tweet(username, text, extra={})
|
44
|
+
mentions = text.split.find_all { |x| x.start_with?('@') }
|
45
|
+
tweet = Twitter::Tweet.new({
|
46
|
+
id: twitter_id,
|
47
|
+
in_reply_to_status_id: 'mock-link',
|
48
|
+
user: { id: twitter_id, screen_name: username },
|
49
|
+
text: text,
|
50
|
+
created_at: Time.now.to_s,
|
51
|
+
entities: {
|
52
|
+
user_mentions: mentions.map { |m|
|
53
|
+
{ screen_name: m.split('@')[1],
|
54
|
+
indices: [text.index(m), text.index(m)+m.length] }
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}.merge!(extra))
|
58
|
+
tweet
|
59
|
+
end
|
60
|
+
|
61
|
+
def twitter_spy(bot)
|
62
|
+
twitter = spy("twitter")
|
63
|
+
allow(twitter).to receive(:update).and_return(mock_tweet(bot.username, "test tweet"))
|
64
|
+
twitter
|
65
|
+
end
|
66
|
+
|
67
|
+
def simulate(bot, &b)
|
68
|
+
bot.twitter = twitter_spy(bot)
|
69
|
+
b.call
|
70
|
+
end
|
71
|
+
|
72
|
+
def expect_direct_message(bot, content)
|
73
|
+
expect(bot.twitter).to have_received(:create_direct_message).with(anything(), content, {})
|
74
|
+
bot.twitter = twitter_spy(bot)
|
75
|
+
end
|
76
|
+
|
77
|
+
def expect_tweet(bot, content)
|
78
|
+
expect(bot.twitter).to have_received(:update).with(content, anything())
|
79
|
+
bot.twitter = twitter_spy(bot)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
describe Ebooks::Bot do
|
85
|
+
include Ebooks::Test
|
86
|
+
let(:bot) { TestBot.new('test_ebooks') }
|
87
|
+
|
88
|
+
before { Timecop.freeze }
|
89
|
+
after { Timecop.return }
|
90
|
+
|
91
|
+
it "responds to dms" do
|
92
|
+
simulate(bot) do
|
93
|
+
bot.receive_event(mock_dm("m1sp", "this is a dm"))
|
94
|
+
expect_direct_message(bot, "echo: this is a dm")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
it "responds to mentions" do
|
99
|
+
simulate(bot) do
|
100
|
+
bot.receive_event(mock_tweet("m1sp", "@test_ebooks this is a mention"))
|
101
|
+
expect_tweet(bot, "@m1sp echo: this is a mention")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it "responds to timeline tweets" do
|
106
|
+
simulate(bot) do
|
107
|
+
bot.receive_event(mock_tweet("m1sp", "some excellent tweet"))
|
108
|
+
expect_tweet(bot, "@m1sp fine tweet good sir")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
it "links tweets to conversations correctly" do
|
113
|
+
tweet1 = mock_tweet("m1sp", "tweet 1", id: 1, in_reply_to_status_id: nil)
|
114
|
+
|
115
|
+
tweet2 = mock_tweet("m1sp", "tweet 2", id: 2, in_reply_to_status_id: 1)
|
116
|
+
|
117
|
+
tweet3 = mock_tweet("m1sp", "tweet 3", id: 3, in_reply_to_status_id: nil)
|
118
|
+
|
119
|
+
bot.conversation(tweet1).add(tweet1)
|
120
|
+
expect(bot.conversation(tweet2)).to eq(bot.conversation(tweet1))
|
121
|
+
|
122
|
+
bot.conversation(tweet2).add(tweet2)
|
123
|
+
expect(bot.conversation(tweet3)).to_not eq(bot.conversation(tweet2))
|
124
|
+
end
|
125
|
+
|
126
|
+
it "stops mentioning people after a certain limit" do
|
127
|
+
simulate(bot) do
|
128
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 1"))
|
129
|
+
expect_tweet(bot, "@spammer @m1sp echo: 1")
|
130
|
+
|
131
|
+
Timecop.travel(Time.now + 60)
|
132
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 2"))
|
133
|
+
expect_tweet(bot, "@spammer @m1sp echo: 2")
|
134
|
+
|
135
|
+
Timecop.travel(Time.now + 60)
|
136
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 3"))
|
137
|
+
expect_tweet(bot, "@spammer echo: 3")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
it "doesn't stop mentioning them if they reply" do
|
142
|
+
simulate(bot) do
|
143
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 4"))
|
144
|
+
expect_tweet(bot, "@spammer @m1sp echo: 4")
|
145
|
+
|
146
|
+
Timecop.travel(Time.now + 60)
|
147
|
+
bot.receive_event(mock_tweet("m1sp", "@spammer @test_ebooks 5"))
|
148
|
+
expect_tweet(bot, "@m1sp @spammer echo: 5")
|
149
|
+
|
150
|
+
Timecop.travel(Time.now + 60)
|
151
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 6"))
|
152
|
+
expect_tweet(bot, "@spammer @m1sp echo: 6")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
it "doesn't get into infinite bot conversations" do
|
157
|
+
simulate(bot) do
|
158
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 7"))
|
159
|
+
expect_tweet(bot, "@spammer @m1sp echo: 7")
|
160
|
+
|
161
|
+
Timecop.travel(Time.now + 10)
|
162
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 8"))
|
163
|
+
expect_tweet(bot, "@spammer @m1sp echo: 8")
|
164
|
+
|
165
|
+
Timecop.travel(Time.now + 10)
|
166
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 9"))
|
167
|
+
expect(bot.twitter).to_not have_received(:update)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
it "blocks blacklisted users on contact" do
|
172
|
+
simulate(bot) do
|
173
|
+
bot.blacklist = ["spammer"]
|
174
|
+
bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 7"))
|
175
|
+
expect(bot.twitter).to have_received(:block).with("spammer")
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
data/spec/model_spec.rb
CHANGED
@@ -22,12 +22,28 @@ describe Ebooks::Model do
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
-
it "
|
25
|
+
it "consumes, saves and loads models correctly" do
|
26
|
+
model = nil
|
27
|
+
|
26
28
|
report = MemoryUsage.report do
|
27
29
|
model = Ebooks::Model.consume(path("data/0xabad1dea.json"))
|
28
30
|
end
|
31
|
+
expect(report.total_memsize).to be < 200000000
|
32
|
+
|
33
|
+
file = Tempfile.new("0xabad1dea")
|
34
|
+
model.save(file.path)
|
35
|
+
|
36
|
+
report2 = MemoryUsage.report do
|
37
|
+
model = Ebooks::Model.load(file.path)
|
38
|
+
end
|
39
|
+
expect(report2.total_memsize).to be < 3000000
|
40
|
+
|
41
|
+
expect(model.tokens[0]).to be_a String
|
42
|
+
expect(model.sentences[0][0]).to be_a Fixnum
|
43
|
+
expect(model.mentions[0][0]).to be_a Fixnum
|
44
|
+
expect(model.keywords[0]).to be_a String
|
29
45
|
|
30
|
-
|
46
|
+
puts "0xabad1dea.model uses #{report2.total_memsize} bytes in memory"
|
31
47
|
end
|
32
48
|
|
33
49
|
describe '.consume' do
|
data/twitter_ebooks.gemspec
CHANGED
@@ -16,16 +16,20 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = Ebooks::VERSION
|
17
17
|
|
18
18
|
gem.add_development_dependency 'rspec'
|
19
|
+
gem.add_development_dependency 'rspec-mocks'
|
19
20
|
gem.add_development_dependency 'memory_profiler'
|
21
|
+
gem.add_development_dependency 'timecop'
|
20
22
|
gem.add_development_dependency 'pry-byebug'
|
23
|
+
gem.add_development_dependency 'yard'
|
21
24
|
|
22
|
-
gem.add_runtime_dependency 'twitter', '~>
|
23
|
-
gem.add_runtime_dependency 'simple_oauth'
|
24
|
-
gem.add_runtime_dependency '
|
25
|
+
gem.add_runtime_dependency 'twitter', '~> 5.0'
|
26
|
+
gem.add_runtime_dependency 'simple_oauth'
|
27
|
+
gem.add_runtime_dependency 'eventmachine', '~> 1.0.3'
|
25
28
|
gem.add_runtime_dependency 'rufus-scheduler'
|
26
29
|
gem.add_runtime_dependency 'gingerice'
|
27
30
|
gem.add_runtime_dependency 'htmlentities'
|
28
31
|
gem.add_runtime_dependency 'engtagger'
|
29
32
|
gem.add_runtime_dependency 'fast-stemmer'
|
30
33
|
gem.add_runtime_dependency 'highscore'
|
34
|
+
gem.add_runtime_dependency 'pry'
|
31
35
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec-mocks
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: memory_profiler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,6 +52,20 @@ dependencies:
|
|
38
52
|
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: timecop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: pry-byebug
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,35 +81,35 @@ dependencies:
|
|
53
81
|
- !ruby/object:Gem::Version
|
54
82
|
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
84
|
+
name: yard
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
58
86
|
requirements:
|
59
|
-
- - "
|
87
|
+
- - ">="
|
60
88
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
63
91
|
prerelease: false
|
64
92
|
version_requirements: !ruby/object:Gem::Requirement
|
65
93
|
requirements:
|
66
|
-
- - "
|
94
|
+
- - ">="
|
67
95
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
96
|
+
version: '0'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
98
|
+
name: twitter
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
72
100
|
requirements:
|
73
101
|
- - "~>"
|
74
102
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
103
|
+
version: '5.0'
|
76
104
|
type: :runtime
|
77
105
|
prerelease: false
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
79
107
|
requirements:
|
80
108
|
- - "~>"
|
81
109
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
110
|
+
version: '5.0'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
112
|
+
name: simple_oauth
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
115
|
- - ">="
|
@@ -94,6 +122,20 @@ dependencies:
|
|
94
122
|
- - ">="
|
95
123
|
- !ruby/object:Gem::Version
|
96
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: eventmachine
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.0.3
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.0.3
|
97
139
|
- !ruby/object:Gem::Dependency
|
98
140
|
name: rufus-scheduler
|
99
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +220,20 @@ dependencies:
|
|
178
220
|
- - ">="
|
179
221
|
- !ruby/object:Gem::Version
|
180
222
|
version: '0'
|
223
|
+
- !ruby/object:Gem::Dependency
|
224
|
+
name: pry
|
225
|
+
requirement: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - ">="
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '0'
|
230
|
+
type: :runtime
|
231
|
+
prerelease: false
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - ">="
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '0'
|
181
237
|
description: Markov chains for all your friends~
|
182
238
|
email:
|
183
239
|
- "^_^@mispy.me"
|
@@ -188,6 +244,7 @@ extra_rdoc_files: []
|
|
188
244
|
files:
|
189
245
|
- ".gitignore"
|
190
246
|
- ".rspec"
|
247
|
+
- ".travis.yml"
|
191
248
|
- Gemfile
|
192
249
|
- LICENSE
|
193
250
|
- README.md
|
@@ -199,7 +256,6 @@ files:
|
|
199
256
|
- lib/twitter_ebooks.rb
|
200
257
|
- lib/twitter_ebooks/archive.rb
|
201
258
|
- lib/twitter_ebooks/bot.rb
|
202
|
-
- lib/twitter_ebooks/markov.rb
|
203
259
|
- lib/twitter_ebooks/model.rb
|
204
260
|
- lib/twitter_ebooks/nlp.rb
|
205
261
|
- lib/twitter_ebooks/suffix.rb
|
@@ -210,15 +266,12 @@ files:
|
|
210
266
|
- skeleton/bots.rb
|
211
267
|
- skeleton/corpus/.gitignore
|
212
268
|
- skeleton/model/.gitignore
|
213
|
-
-
|
269
|
+
- spec/bot_spec.rb
|
214
270
|
- spec/data/0xabad1dea.json
|
215
271
|
- spec/data/0xabad1dea.model
|
216
272
|
- spec/memprof.rb
|
217
273
|
- spec/model_spec.rb
|
218
274
|
- spec/spec_helper.rb
|
219
|
-
- test/corpus/0xabad1dea.tweets
|
220
|
-
- test/keywords.rb
|
221
|
-
- test/tokenize.rb
|
222
275
|
- twitter_ebooks.gemspec
|
223
276
|
homepage: ''
|
224
277
|
licenses: []
|
@@ -244,11 +297,10 @@ signing_key:
|
|
244
297
|
specification_version: 4
|
245
298
|
summary: Markov chains for all your friends~
|
246
299
|
test_files:
|
300
|
+
- spec/bot_spec.rb
|
247
301
|
- spec/data/0xabad1dea.json
|
248
302
|
- spec/data/0xabad1dea.model
|
249
303
|
- spec/memprof.rb
|
250
304
|
- spec/model_spec.rb
|
251
305
|
- spec/spec_helper.rb
|
252
|
-
|
253
|
-
- test/keywords.rb
|
254
|
-
- test/tokenize.rb
|
306
|
+
has_rdoc:
|
@@ -1,82 +0,0 @@
|
|
1
|
-
module Ebooks
|
2
|
-
# Special INTERIM token represents sentence boundaries
|
3
|
-
# This is so we can include start and end of statements in model
|
4
|
-
# Due to the way the sentence tokenizer works, can correspond
|
5
|
-
# to multiple actual parts of text (such as ^, $, \n and .?!)
|
6
|
-
INTERIM = :interim
|
7
|
-
|
8
|
-
# This is an ngram-based Markov model optimized to build from a
|
9
|
-
# tokenized sentence list without requiring too much transformation
|
10
|
-
class MarkovModel
|
11
|
-
def self.build(sentences)
|
12
|
-
MarkovModel.new.consume(sentences)
|
13
|
-
end
|
14
|
-
|
15
|
-
def consume(sentences)
|
16
|
-
# These models are of the form ngram => [[sentence_pos, token_pos] || INTERIM, ...]
|
17
|
-
# We map by both bigrams and unigrams so we can fall back to the latter in
|
18
|
-
# cases where an input bigram is unavailable, such as starting a sentence
|
19
|
-
@sentences = sentences
|
20
|
-
@unigrams = {}
|
21
|
-
@bigrams = {}
|
22
|
-
|
23
|
-
sentences.each_with_index do |tokens, i|
|
24
|
-
last_token = INTERIM
|
25
|
-
tokens.each_with_index do |token, j|
|
26
|
-
@unigrams[last_token] ||= []
|
27
|
-
@unigrams[last_token] << [i, j]
|
28
|
-
|
29
|
-
@bigrams[last_token] ||= {}
|
30
|
-
@bigrams[last_token][token] ||= []
|
31
|
-
|
32
|
-
if j == tokens.length-1 # Mark sentence endings
|
33
|
-
@unigrams[token] ||= []
|
34
|
-
@unigrams[token] << INTERIM
|
35
|
-
@bigrams[last_token][token] << INTERIM
|
36
|
-
else
|
37
|
-
@bigrams[last_token][token] << [i, j+1]
|
38
|
-
end
|
39
|
-
|
40
|
-
last_token = token
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
self
|
45
|
-
end
|
46
|
-
|
47
|
-
def find_token(index)
|
48
|
-
if index == INTERIM
|
49
|
-
INTERIM
|
50
|
-
else
|
51
|
-
@sentences[index[0]][index[1]]
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
def chain(tokens)
|
56
|
-
if tokens.length == 1
|
57
|
-
matches = @unigrams[tokens[-1]]
|
58
|
-
else
|
59
|
-
matches = @bigrams[tokens[-2]][tokens[-1]]
|
60
|
-
matches = @unigrams[tokens[-1]] if matches.length < 2
|
61
|
-
end
|
62
|
-
|
63
|
-
if matches.empty?
|
64
|
-
# This should never happen unless a strange token is
|
65
|
-
# supplied from outside the dataset
|
66
|
-
raise ArgumentError, "Unable to continue chain for: #{tokens.inspect}"
|
67
|
-
end
|
68
|
-
|
69
|
-
next_token = find_token(matches.sample)
|
70
|
-
|
71
|
-
if next_token == INTERIM # We chose to end the sentence
|
72
|
-
return tokens
|
73
|
-
else
|
74
|
-
return chain(tokens + [next_token])
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def generate
|
79
|
-
NLP.reconstruct(chain([INTERIM]))
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|