twitter_ebooks 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ require 'spec_helper'
2
+ require 'memory_profiler'
3
+ require 'tempfile'
4
+ require 'timecop'
5
+
6
+ class TestBot < Ebooks::Bot
7
+ attr_accessor :twitter
8
+
9
+ def configure
10
+ end
11
+
12
+ def on_direct_message(dm)
13
+ reply dm, "echo: #{dm.text}"
14
+ end
15
+
16
+ def on_mention(tweet)
17
+ reply tweet, "echo: #{meta(tweet).mentionless}"
18
+ end
19
+
20
+ def on_timeline(tweet)
21
+ reply tweet, "fine tweet good sir"
22
+ end
23
+ end
24
+
25
+ module Ebooks::Test
26
+ # Generates a random twitter id
27
+ def twitter_id
28
+ (rand*10**18).to_i
29
+ end
30
+
31
+ # Creates a mock direct message
32
+ # @param username User sending the DM
33
+ # @param text DM content
34
+ def mock_dm(username, text)
35
+ Twitter::DirectMessage.new(id: twitter_id,
36
+ sender: { id: twitter_id, screen_name: username},
37
+ text: text)
38
+ end
39
+
40
+ # Creates a mock tweet
41
+ # @param username User sending the tweet
42
+ # @param text Tweet content
43
+ def mock_tweet(username, text, extra={})
44
+ mentions = text.split.find_all { |x| x.start_with?('@') }
45
+ tweet = Twitter::Tweet.new({
46
+ id: twitter_id,
47
+ in_reply_to_status_id: 'mock-link',
48
+ user: { id: twitter_id, screen_name: username },
49
+ text: text,
50
+ created_at: Time.now.to_s,
51
+ entities: {
52
+ user_mentions: mentions.map { |m|
53
+ { screen_name: m.split('@')[1],
54
+ indices: [text.index(m), text.index(m)+m.length] }
55
+ }
56
+ }
57
+ }.merge!(extra))
58
+ tweet
59
+ end
60
+
61
+ def twitter_spy(bot)
62
+ twitter = spy("twitter")
63
+ allow(twitter).to receive(:update).and_return(mock_tweet(bot.username, "test tweet"))
64
+ twitter
65
+ end
66
+
67
+ def simulate(bot, &b)
68
+ bot.twitter = twitter_spy(bot)
69
+ b.call
70
+ end
71
+
72
+ def expect_direct_message(bot, content)
73
+ expect(bot.twitter).to have_received(:create_direct_message).with(anything(), content, {})
74
+ bot.twitter = twitter_spy(bot)
75
+ end
76
+
77
+ def expect_tweet(bot, content)
78
+ expect(bot.twitter).to have_received(:update).with(content, anything())
79
+ bot.twitter = twitter_spy(bot)
80
+ end
81
+ end
82
+
83
+
84
+ describe Ebooks::Bot do
85
+ include Ebooks::Test
86
+ let(:bot) { TestBot.new('test_ebooks') }
87
+
88
+ before { Timecop.freeze }
89
+ after { Timecop.return }
90
+
91
+ it "responds to dms" do
92
+ simulate(bot) do
93
+ bot.receive_event(mock_dm("m1sp", "this is a dm"))
94
+ expect_direct_message(bot, "echo: this is a dm")
95
+ end
96
+ end
97
+
98
+ it "responds to mentions" do
99
+ simulate(bot) do
100
+ bot.receive_event(mock_tweet("m1sp", "@test_ebooks this is a mention"))
101
+ expect_tweet(bot, "@m1sp echo: this is a mention")
102
+ end
103
+ end
104
+
105
+ it "responds to timeline tweets" do
106
+ simulate(bot) do
107
+ bot.receive_event(mock_tweet("m1sp", "some excellent tweet"))
108
+ expect_tweet(bot, "@m1sp fine tweet good sir")
109
+ end
110
+ end
111
+
112
+ it "links tweets to conversations correctly" do
113
+ tweet1 = mock_tweet("m1sp", "tweet 1", id: 1, in_reply_to_status_id: nil)
114
+
115
+ tweet2 = mock_tweet("m1sp", "tweet 2", id: 2, in_reply_to_status_id: 1)
116
+
117
+ tweet3 = mock_tweet("m1sp", "tweet 3", id: 3, in_reply_to_status_id: nil)
118
+
119
+ bot.conversation(tweet1).add(tweet1)
120
+ expect(bot.conversation(tweet2)).to eq(bot.conversation(tweet1))
121
+
122
+ bot.conversation(tweet2).add(tweet2)
123
+ expect(bot.conversation(tweet3)).to_not eq(bot.conversation(tweet2))
124
+ end
125
+
126
+ it "stops mentioning people after a certain limit" do
127
+ simulate(bot) do
128
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 1"))
129
+ expect_tweet(bot, "@spammer @m1sp echo: 1")
130
+
131
+ Timecop.travel(Time.now + 60)
132
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 2"))
133
+ expect_tweet(bot, "@spammer @m1sp echo: 2")
134
+
135
+ Timecop.travel(Time.now + 60)
136
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 3"))
137
+ expect_tweet(bot, "@spammer echo: 3")
138
+ end
139
+ end
140
+
141
+ it "doesn't stop mentioning them if they reply" do
142
+ simulate(bot) do
143
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 4"))
144
+ expect_tweet(bot, "@spammer @m1sp echo: 4")
145
+
146
+ Timecop.travel(Time.now + 60)
147
+ bot.receive_event(mock_tweet("m1sp", "@spammer @test_ebooks 5"))
148
+ expect_tweet(bot, "@m1sp @spammer echo: 5")
149
+
150
+ Timecop.travel(Time.now + 60)
151
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 6"))
152
+ expect_tweet(bot, "@spammer @m1sp echo: 6")
153
+ end
154
+ end
155
+
156
+ it "doesn't get into infinite bot conversations" do
157
+ simulate(bot) do
158
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 7"))
159
+ expect_tweet(bot, "@spammer @m1sp echo: 7")
160
+
161
+ Timecop.travel(Time.now + 10)
162
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 8"))
163
+ expect_tweet(bot, "@spammer @m1sp echo: 8")
164
+
165
+ Timecop.travel(Time.now + 10)
166
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 9"))
167
+ expect(bot.twitter).to_not have_received(:update)
168
+ end
169
+ end
170
+
171
+ it "blocks blacklisted users on contact" do
172
+ simulate(bot) do
173
+ bot.blacklist = ["spammer"]
174
+ bot.receive_event(mock_tweet("spammer", "@test_ebooks @m1sp 7"))
175
+ expect(bot.twitter).to have_received(:block).with("spammer")
176
+ end
177
+ end
178
+ end
@@ -22,12 +22,28 @@ describe Ebooks::Model do
22
22
  end
23
23
  end
24
24
 
25
- it "does not use a ridiculous amount of memory" do
25
+ it "consumes, saves and loads models correctly" do
26
+ model = nil
27
+
26
28
  report = MemoryUsage.report do
27
29
  model = Ebooks::Model.consume(path("data/0xabad1dea.json"))
28
30
  end
31
+ expect(report.total_memsize).to be < 200000000
32
+
33
+ file = Tempfile.new("0xabad1dea")
34
+ model.save(file.path)
35
+
36
+ report2 = MemoryUsage.report do
37
+ model = Ebooks::Model.load(file.path)
38
+ end
39
+ expect(report2.total_memsize).to be < 3000000
40
+
41
+ expect(model.tokens[0]).to be_a String
42
+ expect(model.sentences[0][0]).to be_a Fixnum
43
+ expect(model.mentions[0][0]).to be_a Fixnum
44
+ expect(model.keywords[0]).to be_a String
29
45
 
30
- expect(report.total_memsize).to be < 1000000000
46
+ puts "0xabad1dea.model uses #{report2.total_memsize} bytes in memory"
31
47
  end
32
48
 
33
49
  describe '.consume' do
@@ -16,16 +16,20 @@ Gem::Specification.new do |gem|
16
16
  gem.version = Ebooks::VERSION
17
17
 
18
18
  gem.add_development_dependency 'rspec'
19
+ gem.add_development_dependency 'rspec-mocks'
19
20
  gem.add_development_dependency 'memory_profiler'
21
+ gem.add_development_dependency 'timecop'
20
22
  gem.add_development_dependency 'pry-byebug'
23
+ gem.add_development_dependency 'yard'
21
24
 
22
- gem.add_runtime_dependency 'twitter', '~> 4.0'
23
- gem.add_runtime_dependency 'simple_oauth', '~> 0.2.0'
24
- gem.add_runtime_dependency 'tweetstream'
25
+ gem.add_runtime_dependency 'twitter', '~> 5.0'
26
+ gem.add_runtime_dependency 'simple_oauth'
27
+ gem.add_runtime_dependency 'eventmachine', '~> 1.0.3'
25
28
  gem.add_runtime_dependency 'rufus-scheduler'
26
29
  gem.add_runtime_dependency 'gingerice'
27
30
  gem.add_runtime_dependency 'htmlentities'
28
31
  gem.add_runtime_dependency 'engtagger'
29
32
  gem.add_runtime_dependency 'fast-stemmer'
30
33
  gem.add_runtime_dependency 'highscore'
34
+ gem.add_runtime_dependency 'pry'
31
35
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaiden Mispy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-25 00:00:00.000000000 Z
11
+ date: 2014-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec-mocks
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: memory_profiler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -38,6 +52,20 @@ dependencies:
38
52
  - - ">="
39
53
  - !ruby/object:Gem::Version
40
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: timecop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: pry-byebug
43
71
  requirement: !ruby/object:Gem::Requirement
@@ -53,35 +81,35 @@ dependencies:
53
81
  - !ruby/object:Gem::Version
54
82
  version: '0'
55
83
  - !ruby/object:Gem::Dependency
56
- name: twitter
84
+ name: yard
57
85
  requirement: !ruby/object:Gem::Requirement
58
86
  requirements:
59
- - - "~>"
87
+ - - ">="
60
88
  - !ruby/object:Gem::Version
61
- version: '4.0'
62
- type: :runtime
89
+ version: '0'
90
+ type: :development
63
91
  prerelease: false
64
92
  version_requirements: !ruby/object:Gem::Requirement
65
93
  requirements:
66
- - - "~>"
94
+ - - ">="
67
95
  - !ruby/object:Gem::Version
68
- version: '4.0'
96
+ version: '0'
69
97
  - !ruby/object:Gem::Dependency
70
- name: simple_oauth
98
+ name: twitter
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
101
  - - "~>"
74
102
  - !ruby/object:Gem::Version
75
- version: 0.2.0
103
+ version: '5.0'
76
104
  type: :runtime
77
105
  prerelease: false
78
106
  version_requirements: !ruby/object:Gem::Requirement
79
107
  requirements:
80
108
  - - "~>"
81
109
  - !ruby/object:Gem::Version
82
- version: 0.2.0
110
+ version: '5.0'
83
111
  - !ruby/object:Gem::Dependency
84
- name: tweetstream
112
+ name: simple_oauth
85
113
  requirement: !ruby/object:Gem::Requirement
86
114
  requirements:
87
115
  - - ">="
@@ -94,6 +122,20 @@ dependencies:
94
122
  - - ">="
95
123
  - !ruby/object:Gem::Version
96
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: eventmachine
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.0.3
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.0.3
97
139
  - !ruby/object:Gem::Dependency
98
140
  name: rufus-scheduler
99
141
  requirement: !ruby/object:Gem::Requirement
@@ -178,6 +220,20 @@ dependencies:
178
220
  - - ">="
179
221
  - !ruby/object:Gem::Version
180
222
  version: '0'
223
+ - !ruby/object:Gem::Dependency
224
+ name: pry
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: '0'
230
+ type: :runtime
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - ">="
235
+ - !ruby/object:Gem::Version
236
+ version: '0'
181
237
  description: Markov chains for all your friends~
182
238
  email:
183
239
  - "^_^@mispy.me"
@@ -188,6 +244,7 @@ extra_rdoc_files: []
188
244
  files:
189
245
  - ".gitignore"
190
246
  - ".rspec"
247
+ - ".travis.yml"
191
248
  - Gemfile
192
249
  - LICENSE
193
250
  - README.md
@@ -199,7 +256,6 @@ files:
199
256
  - lib/twitter_ebooks.rb
200
257
  - lib/twitter_ebooks/archive.rb
201
258
  - lib/twitter_ebooks/bot.rb
202
- - lib/twitter_ebooks/markov.rb
203
259
  - lib/twitter_ebooks/model.rb
204
260
  - lib/twitter_ebooks/nlp.rb
205
261
  - lib/twitter_ebooks/suffix.rb
@@ -210,15 +266,12 @@ files:
210
266
  - skeleton/bots.rb
211
267
  - skeleton/corpus/.gitignore
212
268
  - skeleton/model/.gitignore
213
- - skeleton/run.rb
269
+ - spec/bot_spec.rb
214
270
  - spec/data/0xabad1dea.json
215
271
  - spec/data/0xabad1dea.model
216
272
  - spec/memprof.rb
217
273
  - spec/model_spec.rb
218
274
  - spec/spec_helper.rb
219
- - test/corpus/0xabad1dea.tweets
220
- - test/keywords.rb
221
- - test/tokenize.rb
222
275
  - twitter_ebooks.gemspec
223
276
  homepage: ''
224
277
  licenses: []
@@ -244,11 +297,10 @@ signing_key:
244
297
  specification_version: 4
245
298
  summary: Markov chains for all your friends~
246
299
  test_files:
300
+ - spec/bot_spec.rb
247
301
  - spec/data/0xabad1dea.json
248
302
  - spec/data/0xabad1dea.model
249
303
  - spec/memprof.rb
250
304
  - spec/model_spec.rb
251
305
  - spec/spec_helper.rb
252
- - test/corpus/0xabad1dea.tweets
253
- - test/keywords.rb
254
- - test/tokenize.rb
306
+ has_rdoc:
@@ -1,82 +0,0 @@
1
- module Ebooks
2
- # Special INTERIM token represents sentence boundaries
3
- # This is so we can include start and end of statements in model
4
- # Due to the way the sentence tokenizer works, can correspond
5
- # to multiple actual parts of text (such as ^, $, \n and .?!)
6
- INTERIM = :interim
7
-
8
- # This is an ngram-based Markov model optimized to build from a
9
- # tokenized sentence list without requiring too much transformation
10
- class MarkovModel
11
- def self.build(sentences)
12
- MarkovModel.new.consume(sentences)
13
- end
14
-
15
- def consume(sentences)
16
- # These models are of the form ngram => [[sentence_pos, token_pos] || INTERIM, ...]
17
- # We map by both bigrams and unigrams so we can fall back to the latter in
18
- # cases where an input bigram is unavailable, such as starting a sentence
19
- @sentences = sentences
20
- @unigrams = {}
21
- @bigrams = {}
22
-
23
- sentences.each_with_index do |tokens, i|
24
- last_token = INTERIM
25
- tokens.each_with_index do |token, j|
26
- @unigrams[last_token] ||= []
27
- @unigrams[last_token] << [i, j]
28
-
29
- @bigrams[last_token] ||= {}
30
- @bigrams[last_token][token] ||= []
31
-
32
- if j == tokens.length-1 # Mark sentence endings
33
- @unigrams[token] ||= []
34
- @unigrams[token] << INTERIM
35
- @bigrams[last_token][token] << INTERIM
36
- else
37
- @bigrams[last_token][token] << [i, j+1]
38
- end
39
-
40
- last_token = token
41
- end
42
- end
43
-
44
- self
45
- end
46
-
47
- def find_token(index)
48
- if index == INTERIM
49
- INTERIM
50
- else
51
- @sentences[index[0]][index[1]]
52
- end
53
- end
54
-
55
- def chain(tokens)
56
- if tokens.length == 1
57
- matches = @unigrams[tokens[-1]]
58
- else
59
- matches = @bigrams[tokens[-2]][tokens[-1]]
60
- matches = @unigrams[tokens[-1]] if matches.length < 2
61
- end
62
-
63
- if matches.empty?
64
- # This should never happen unless a strange token is
65
- # supplied from outside the dataset
66
- raise ArgumentError, "Unable to continue chain for: #{tokens.inspect}"
67
- end
68
-
69
- next_token = find_token(matches.sample)
70
-
71
- if next_token == INTERIM # We chose to end the sentence
72
- return tokens
73
- else
74
- return chain(tokens + [next_token])
75
- end
76
- end
77
-
78
- def generate
79
- NLP.reconstruct(chain([INTERIM]))
80
- end
81
- end
82
- end