twitter_ebooks 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  $debug = false
2
2
 
3
3
  def log(*args)
4
- STDERR.puts args.map(&:to_s).join(' ')
4
+ STDERR.print args.map(&:to_s).join(' ') + "\n"
5
5
  STDERR.flush
6
6
  end
7
7
 
@@ -11,11 +11,11 @@ module Ebooks
11
11
  SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
12
12
  TEST_PATH = File.join(GEM_PATH, 'test')
13
13
  TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/0xabad1dea.tweets')
14
+ INTERIM = :interim
14
15
  end
15
16
 
16
17
  require 'twitter_ebooks/nlp'
17
18
  require 'twitter_ebooks/archive'
18
- require 'twitter_ebooks/markov'
19
19
  require 'twitter_ebooks/suffix'
20
20
  require 'twitter_ebooks/model'
21
21
  require 'twitter_ebooks/bot'
@@ -21,9 +21,9 @@ module Ebooks
21
21
  @config[:consumer_key] = STDIN.gets.chomp
22
22
  print "Consumer secret: "
23
23
  @config[:consumer_secret] = STDIN.gets.chomp
24
- print "Oauth token: "
24
+ print "Access token: "
25
25
  @config[:oauth_token] = STDIN.gets.chomp
26
- print "Oauth secret: "
26
+ print "Access secret: "
27
27
  @config[:oauth_token_secret] = STDIN.gets.chomp
28
28
 
29
29
  File.open(CONFIG_PATH, 'w') do |f|
@@ -31,19 +31,22 @@ module Ebooks
31
31
  end
32
32
  end
33
33
 
34
- Twitter.configure do |config|
34
+ Twitter::REST::Client.new do |config|
35
35
  config.consumer_key = @config[:consumer_key]
36
36
  config.consumer_secret = @config[:consumer_secret]
37
- config.oauth_token = @config[:oauth_token]
38
- config.oauth_token_secret = @config[:oauth_token_secret]
37
+ config.access_token = @config[:oauth_token]
38
+ config.access_token_secret = @config[:oauth_token_secret]
39
39
  end
40
-
41
- Twitter::Client.new
42
40
  end
43
41
 
44
- def initialize(username, path, client=nil)
42
+ def initialize(username, path=nil, client=nil)
45
43
  @username = username
46
- @path = path || "#{username}.json"
44
+ @path = path || "corpus/#{username}.json"
45
+
46
+ if File.directory?(@path)
47
+ @path = File.join(@path, "#{username}.json")
48
+ end
49
+
47
50
  @client = client || make_client
48
51
 
49
52
  if File.exists?(@path)
@@ -1,180 +1,414 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: utf-8
3
2
  require 'twitter'
4
- require 'tweetstream'
5
3
  require 'rufus/scheduler'
6
4
 
7
5
  module Ebooks
8
- class Bot
9
- attr_accessor :consumer_key, :consumer_secret,
10
- :oauth_token, :oauth_token_secret
6
+ class ConfigurationError < Exception
7
+ end
11
8
 
12
- attr_accessor :username
9
+ # Represents a single reply tree of tweets
10
+ class Conversation
11
+ attr_reader :last_update
13
12
 
14
- attr_reader :twitter, :stream
13
+ # @param bot [Ebooks::Bot]
14
+ def initialize(bot)
15
+ @bot = bot
16
+ @tweets = []
17
+ @last_update = Time.now
18
+ end
15
19
 
16
- @@all = [] # List of all defined bots
17
- def self.all; @@all; end
20
+ # @param tweet [Twitter::Tweet] tweet to add
21
+ def add(tweet)
22
+ @tweets << tweet
23
+ @last_update = Time.now
24
+ end
25
+
26
+ # Make an informed guess as to whether a user is a bot based
27
+ # on their behavior in this conversation
28
+ def is_bot?(username)
29
+ usertweets = @tweets.select { |t| t.user.screen_name == username }
18
30
 
19
- def self.get(name)
20
- all.find { |bot| bot.username == name }
31
+ if usertweets.length > 2
32
+ if (usertweets[-1].created_at - usertweets[-3].created_at) < 30
33
+ return true
34
+ end
35
+ end
36
+
37
+ username.include?("ebooks")
21
38
  end
22
39
 
23
- def initialize(username, &b)
24
- # Set defaults
25
- @username = username
40
+ # Figure out whether to keep this user in the reply prefix
41
+ # We want to avoid spamming non-participating users
42
+ def can_include?(username)
43
+ @tweets.length <= 4 ||
44
+ !@tweets[-4..-1].select { |t| t.user.screen_name == username }.empty?
45
+ end
46
+ end
26
47
 
27
- # Override with callback
28
- b.call(self)
48
+ # Meta information about a tweet that we calculate for ourselves
49
+ class TweetMeta
50
+ # @return [Array<String>] usernames mentioned in tweet
51
+ attr_accessor :mentions
52
+ # @return [String] text of tweets with mentions removed
53
+ attr_accessor :mentionless
54
+ # @return [Array<String>] usernames to include in a reply
55
+ attr_accessor :reply_mentions
56
+ # @return [String] mentions to start reply with
57
+ attr_accessor :reply_prefix
58
+ # @return [Integer] available chars for reply
59
+ attr_accessor :limit
60
+
61
+ # @return [Ebooks::Bot] associated bot
62
+ attr_accessor :bot
63
+ # @return [Twitter::Tweet] associated tweet
64
+ attr_accessor :tweet
65
+
66
+ # Check whether this tweet mentions our bot
67
+ # @return [Boolean]
68
+ def mentions_bot?
69
+ # To check if this is someone talking to us, ensure:
70
+ # - The tweet mentions list contains our username
71
+ # - The tweet is not being retweeted by somebody else
72
+ # - Or soft-retweeted by somebody else
73
+ @mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.start_with?('RT ')
74
+ end
75
+
76
+ # @param bot [Ebooks::Bot]
77
+ # @param ev [Twitter::Tweet]
78
+ def initialize(bot, ev)
79
+ @bot = bot
80
+ @tweet = ev
81
+
82
+ @mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
83
+
84
+ # Process mentions to figure out who to reply to
85
+ # i.e. not self and nobody who has seen too many secondary mentions
86
+ reply_mentions = @mentions.reject do |m|
87
+ username = m.downcase
88
+ username == @bot.username || !@bot.conversation(ev).can_include?(username)
89
+ end
90
+ @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
91
+
92
+ @reply_prefix = @reply_mentions.map { |m| '@'+m }.join(' ') + ' '
93
+ @limit = 140 - @reply_prefix.length
29
94
 
30
- Bot.all.push(self)
95
+ mless = ev.text
96
+ begin
97
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
98
+ last = mless[entity[:indices][1]..-1]||''
99
+ mless = mless[0...entity[:indices][0]] + last.strip
100
+ end
101
+ rescue Exception
102
+ p ev.attrs[:entities][:user_mentions]
103
+ p ev.text
104
+ raise
105
+ end
106
+ @mentionless = mless
31
107
  end
108
+ end
32
109
 
110
+ class Bot
111
+ # @return [String] OAuth consumer key for a Twitter app
112
+ attr_accessor :consumer_key
113
+ # @return [String] OAuth consumer secret for a Twitter app
114
+ attr_accessor :consumer_secret
115
+ # @return [String] OAuth access token from `ebooks auth`
116
+ attr_accessor :access_token
117
+ # @return [String] OAuth access secret from `ebooks auth`
118
+ attr_accessor :access_token_secret
119
+ # @return [String] Twitter username of bot
120
+ attr_accessor :username
121
+ # @return [Array<String>] list of usernames to block on contact
122
+ attr_accessor :blacklist
123
+ # @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
124
+ attr_accessor :conversations
125
+ # @return [Range, Integer] range of seconds to delay in delay method
126
+ attr_accessor :delay_range
127
+
128
+ # @return [Array] list of all defined bots
129
+ def self.all; @@all ||= []; end
130
+
131
+ # Fetches a bot by username
132
+ # @param username [String]
133
+ # @return [Ebooks::Bot]
134
+ def self.get(username)
135
+ all.find { |bot| bot.username == username }
136
+ end
137
+
138
+ # Logs info to stdout in the context of this bot
33
139
  def log(*args)
34
- STDOUT.puts "@#{@username}: " + args.map(&:to_s).join(' ')
140
+ STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
35
141
  STDOUT.flush
36
142
  end
37
143
 
38
- def configure
39
- TweetStream.configure do |config|
144
+ # Initializes and configures bot
145
+ # @param args Arguments passed to configure method
146
+ # @param b Block to call with new bot
147
+ def initialize(username, &b)
148
+ @blacklist ||= []
149
+ @conversations ||= {}
150
+ # Tweet ids we've already observed, to avoid duplication
151
+ @seen_tweets ||= {}
152
+
153
+ @username = username
154
+ configure
155
+
156
+ b.call(self) unless b.nil?
157
+ Bot.all << self
158
+ end
159
+
160
+ # Find or create the conversation context for this tweet
161
+ # @param tweet [Twitter::Tweet]
162
+ # @return [Ebooks::Conversation]
163
+ def conversation(tweet)
164
+ conv = if tweet.in_reply_to_status_id?
165
+ @conversations[tweet.in_reply_to_status_id]
166
+ end
167
+
168
+ if conv.nil?
169
+ conv = @conversations[tweet.id] || Conversation.new(self)
170
+ end
171
+
172
+ if tweet.in_reply_to_status_id?
173
+ @conversations[tweet.in_reply_to_status_id] = conv
174
+ end
175
+ @conversations[tweet.id] = conv
176
+
177
+ # Expire any old conversations to prevent memory growth
178
+ @conversations.each do |k,v|
179
+ if v != conv && Time.now - v.last_update > 3600
180
+ @conversations.delete(k)
181
+ end
182
+ end
183
+
184
+ conv
185
+ end
186
+
187
+ # @return [Twitter::REST::Client] underlying REST client from twitter gem
188
+ def twitter
189
+ @twitter ||= Twitter::REST::Client.new do |config|
40
190
  config.consumer_key = @consumer_key
41
191
  config.consumer_secret = @consumer_secret
42
- config.oauth_token = @oauth_token
43
- config.oauth_token_secret = @oauth_token_secret
192
+ config.access_token = @access_token
193
+ config.access_token_secret = @access_token_secret
44
194
  end
195
+ end
45
196
 
46
- Twitter.configure do |config|
197
+ # @return [Twitter::Streaming::Client] underlying streaming client from twitter gem
198
+ def stream
199
+ @stream ||= Twitter::Streaming::Client.new do |config|
47
200
  config.consumer_key = @consumer_key
48
201
  config.consumer_secret = @consumer_secret
49
- config.oauth_token = @oauth_token
50
- config.oauth_token_secret = @oauth_token_secret
202
+ config.access_token = @access_token
203
+ config.access_token_secret = @access_token_secret
204
+ end
205
+ end
206
+
207
+ # Calculate some meta information about a tweet relevant for replying
208
+ # @param ev [Twitter::Tweet]
209
+ # @return [Ebooks::TweetMeta]
210
+ def meta(ev)
211
+ TweetMeta.new(self, ev)
212
+ end
213
+
214
+ # Receive an event from the twitter stream
215
+ # @param ev [Object] Twitter streaming event
216
+ def receive_event(ev)
217
+ if ev.is_a? Array # Initial array sent on first connection
218
+ log "Online!"
219
+ return
51
220
  end
52
221
 
53
- @twitter = Twitter::Client.new
222
+ if ev.is_a? Twitter::DirectMessage
223
+ return if ev.sender.screen_name == @username # Don't reply to self
224
+ log "DM from @#{ev.sender.screen_name}: #{ev.text}"
225
+ fire(:direct_message, ev)
54
226
 
55
- needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
227
+ elsif ev.respond_to?(:name) && ev.name == :follow
228
+ return if ev.source.screen_name == @username
229
+ log "Followed by #{ev.source.screen_name}"
230
+ fire(:follow, ev.source)
56
231
 
57
- @stream = TweetStream::Client.new if needs_stream
58
- end
232
+ elsif ev.is_a? Twitter::Tweet
233
+ return unless ev.text # If it's not a text-containing tweet, ignore it
234
+ return if ev.user.screen_name == @username # Ignore our own tweets
59
235
 
60
- # Connects to tweetstream and opens event handlers for this bot
61
- def start
62
- configure
236
+ meta = meta(ev)
63
237
 
64
- @on_startup.call if @on_startup
238
+ if blacklisted?(ev.user.screen_name)
239
+ log "Blocking blacklisted user @#{ev.user.screen_name}"
240
+ @twitter.block(ev.user.screen_name)
241
+ end
65
242
 
66
- if not @stream
67
- log "not bothering with stream for #@username"
68
- return
69
- end
243
+ # Avoid responding to duplicate tweets
244
+ if @seen_tweets[ev.id]
245
+ log "Not firing event for duplicate tweet #{ev.id}"
246
+ return
247
+ else
248
+ @seen_tweets[ev.id] = true
249
+ end
70
250
 
71
- log "starting stream for #@username"
72
- @stream.on_error do |msg|
73
- log "ERROR: #{msg}"
74
- end
251
+ if meta.mentions_bot?
252
+ log "Mention from @#{ev.user.screen_name}: #{ev.text}"
253
+ conversation(ev).add(ev)
254
+ fire(:mention, ev)
255
+ else
256
+ fire(:timeline, ev)
257
+ end
75
258
 
76
- @stream.on_inited do
77
- log "Online!"
259
+ elsif ev.is_a?(Twitter::Streaming::DeletedTweet) ||
260
+ ev.is_a?(Twitter::Streaming::Event)
261
+ # pass
262
+ else
263
+ log ev
78
264
  end
265
+ end
79
266
 
80
- @stream.on_event(:follow) do |event|
81
- next if event[:source][:screen_name] == @username
82
- log "Followed by #{event[:source][:screen_name]}"
83
- @on_follow.call(event[:source]) if @on_follow
267
+ # Configures client and fires startup event
268
+ def prepare
269
+ # Sanity check
270
+ if @username.nil?
271
+ raise ConfigurationError, "bot username cannot be nil"
84
272
  end
85
273
 
86
- @stream.on_direct_message do |dm|
87
- next if dm[:sender][:screen_name] == @username # Don't reply to self
88
- log "DM from @#{dm[:sender][:screen_name]}: #{dm[:text]}"
89
- @on_message.call(dm) if @on_message
274
+ if @consumer_key.nil? || @consumer_key.empty? ||
275
+ @consumer_secret.nil? || @consumer_key.empty?
276
+ log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
277
+ exit 1
90
278
  end
91
279
 
92
- @stream.userstream do |ev|
93
- next unless ev[:text] # If it's not a text-containing tweet, ignore it
94
- next if ev[:user][:screen_name] == @username # Ignore our own tweets
280
+ if @access_token.nil? || @access_token.empty? ||
281
+ @access_token_secret.nil? || @access_token_secret.empty?
282
+ log "Missing access_token or access_token_secret. Please run `ebooks auth`."
283
+ exit 1
284
+ end
95
285
 
96
- meta = {}
97
- mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
286
+ twitter
287
+ fire(:startup)
288
+ end
98
289
 
99
- reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
100
- reply_mentions = [ev[:user][:screen_name]] + reply_mentions
290
+ # Start running user event stream
291
+ def start
292
+ log "starting tweet stream"
101
293
 
102
- meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
103
- meta[:limit] = 140 - meta[:reply_prefix].length
294
+ stream.user do |ev|
295
+ receive_event ev
296
+ end
297
+ end
104
298
 
105
- mless = ev[:text]
106
- begin
107
- ev.attrs[:entities][:user_mentions].reverse.each do |entity|
108
- last = mless[entity[:indices][1]..-1]||''
109
- mless = mless[0...entity[:indices][0]] + last.strip
110
- end
111
- rescue Exception
112
- p ev.attrs[:entities][:user_mentions]
113
- p ev[:text]
114
- raise
115
- end
116
- meta[:mentionless] = mless
117
-
118
- # To check if this is a mention, ensure:
119
- # - The tweet mentions list contains our username
120
- # - The tweet is not being retweeted by somebody else
121
- # - Or soft-retweeted by somebody else
122
- if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
123
- log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
124
- @on_mention.call(ev, meta) if @on_mention
125
- else
126
- @on_timeline.call(ev, meta) if @on_timeline
127
- end
299
+ # Fire an event
300
+ # @param event [Symbol] event to fire
301
+ # @param args arguments for event handler
302
+ def fire(event, *args)
303
+ handler = "on_#{event}".to_sym
304
+ if respond_to? handler
305
+ self.send(handler, *args)
128
306
  end
129
307
  end
130
308
 
131
- # Wrapper for EM.add_timer
132
- # Delays add a greater sense of humanity to bot behaviour
133
- def delay(time, &b)
134
- time = time.to_a.sample unless time.is_a? Integer
135
- EM.add_timer(time, &b)
309
+ # Delay an action for a variable period of time
310
+ # @param range [Range, Integer] range of seconds to choose for delay
311
+ def delay(range=@delay_range, &b)
312
+ time = range.to_a.sample unless range.is_a? Integer
313
+ sleep time
314
+ b.call
315
+ end
316
+
317
+ # Check if a username is blacklisted
318
+ # @param username [String]
319
+ # @return [Boolean]
320
+ def blacklisted?(username)
321
+ if @blacklist.include?(username)
322
+ true
323
+ else
324
+ false
325
+ end
136
326
  end
137
327
 
138
328
  # Reply to a tweet or a DM.
139
- # Applies configurable @reply_delay range
329
+ # @param ev [Twitter::Tweet, Twitter::DirectMessage]
330
+ # @param text [String] contents of reply excluding reply_prefix
331
+ # @param opts [Hash] additional params to pass to twitter gem
140
332
  def reply(ev, text, opts={})
141
333
  opts = opts.clone
142
334
 
143
335
  if ev.is_a? Twitter::DirectMessage
144
- log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
145
- @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
336
+ log "Sending DM to @#{ev.sender.screen_name}: #{text}"
337
+ twitter.create_direct_message(ev.sender.screen_name, text, opts)
146
338
  elsif ev.is_a? Twitter::Tweet
147
- log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
148
- @twitter.update(text, in_reply_to_status_id: ev[:id])
339
+ meta = meta(ev)
340
+
341
+ if conversation(ev).is_bot?(ev.user.screen_name)
342
+ log "Not replying to suspected bot @#{ev.user.screen_name}"
343
+ return false
344
+ end
345
+
346
+ log "Replying to @#{ev.user.screen_name} with: #{meta.reply_prefix + text}"
347
+ tweet = twitter.update(meta.reply_prefix + text, in_reply_to_status_id: ev.id)
348
+ conversation(tweet).add(tweet)
349
+ tweet
149
350
  else
150
351
  raise Exception("Don't know how to reply to a #{ev.class}")
151
352
  end
152
353
  end
153
354
 
154
- def scheduler
155
- @scheduler ||= Rufus::Scheduler.new
355
+ # Favorite a tweet
356
+ # @param tweet [Twitter::Tweet]
357
+ def favorite(tweet)
358
+ log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
359
+
360
+ begin
361
+ twitter.favorite(tweet.id)
362
+ rescue Twitter::Error::Forbidden
363
+ log "Already favorited: #{tweet.user.screen_name}: #{tweet.text}"
364
+ end
365
+ end
366
+
367
+ # Retweet a tweet
368
+ # @param tweet [Twitter::Tweet]
369
+ def retweet(tweet)
370
+ log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
371
+
372
+ begin
373
+ twitter.retweet(tweet.id)
374
+ rescue Twitter::Error::Forbidden
375
+ log "Already retweeted: #{tweet.user.screen_name}: #{tweet.text}"
376
+ end
377
+ end
378
+
379
+ # Follow a user
380
+ # @param user [String] username or user id
381
+ def follow(user, *args)
382
+ log "Following #{user}"
383
+ twitter.follow(user, *args)
384
+ end
385
+
386
+ # Unfollow a user
387
+ # @param user [String] username or user id
388
+ def unfollow(user, *args)
389
+ log "Unfollowing #{user}"
390
+ twiter.unfollow(user, *args)
156
391
  end
157
392
 
158
- def follow(*args)
159
- log "Following #{args}"
160
- @twitter.follow(*args)
393
+ # Tweet something
394
+ # @param text [String]
395
+ def tweet(text, *args)
396
+ log "Tweeting '#{text}'"
397
+ twitter.update(text, *args)
161
398
  end
162
399
 
163
- def tweet(*args)
164
- log "Tweeting #{args.inspect}"
165
- @twitter.update(*args)
400
+ # Get a scheduler for this bot
401
+ # @return [Rufus::Scheduler]
402
+ def scheduler
403
+ @scheduler ||= Rufus::Scheduler.new
166
404
  end
167
405
 
168
- # could easily just be *args however the separation keeps it clean.
406
+ # Tweet some text with an image
407
+ # @param txt [String]
408
+ # @param pic [String] filename
169
409
  def pictweet(txt, pic, *args)
170
410
  log "Tweeting #{txt.inspect} - #{pic} #{args}"
171
- @twitter.update_with_media(txt, File.new(pic), *args)
411
+ twitter.update_with_media(txt, File.new(pic), *args)
172
412
  end
173
-
174
- def on_startup(&b); @on_startup = b; end
175
- def on_follow(&b); @on_follow = b; end
176
- def on_mention(&b); @on_mention = b; end
177
- def on_timeline(&b); @on_timeline = b; end
178
- def on_message(&b); @on_message = b; end
179
413
  end
180
414
  end