twitter_ebooks 2.3.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  $debug = false
2
2
 
3
3
  def log(*args)
4
- STDERR.puts args.map(&:to_s).join(' ')
4
+ STDERR.print args.map(&:to_s).join(' ') + "\n"
5
5
  STDERR.flush
6
6
  end
7
7
 
@@ -11,11 +11,11 @@ module Ebooks
11
11
  SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
12
12
  TEST_PATH = File.join(GEM_PATH, 'test')
13
13
  TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/0xabad1dea.tweets')
14
+ INTERIM = :interim
14
15
  end
15
16
 
16
17
  require 'twitter_ebooks/nlp'
17
18
  require 'twitter_ebooks/archive'
18
- require 'twitter_ebooks/markov'
19
19
  require 'twitter_ebooks/suffix'
20
20
  require 'twitter_ebooks/model'
21
21
  require 'twitter_ebooks/bot'
@@ -21,9 +21,9 @@ module Ebooks
21
21
  @config[:consumer_key] = STDIN.gets.chomp
22
22
  print "Consumer secret: "
23
23
  @config[:consumer_secret] = STDIN.gets.chomp
24
- print "Oauth token: "
24
+ print "Access token: "
25
25
  @config[:oauth_token] = STDIN.gets.chomp
26
- print "Oauth secret: "
26
+ print "Access secret: "
27
27
  @config[:oauth_token_secret] = STDIN.gets.chomp
28
28
 
29
29
  File.open(CONFIG_PATH, 'w') do |f|
@@ -31,19 +31,22 @@ module Ebooks
31
31
  end
32
32
  end
33
33
 
34
- Twitter.configure do |config|
34
+ Twitter::REST::Client.new do |config|
35
35
  config.consumer_key = @config[:consumer_key]
36
36
  config.consumer_secret = @config[:consumer_secret]
37
- config.oauth_token = @config[:oauth_token]
38
- config.oauth_token_secret = @config[:oauth_token_secret]
37
+ config.access_token = @config[:oauth_token]
38
+ config.access_token_secret = @config[:oauth_token_secret]
39
39
  end
40
-
41
- Twitter::Client.new
42
40
  end
43
41
 
44
- def initialize(username, path, client=nil)
42
+ def initialize(username, path=nil, client=nil)
45
43
  @username = username
46
- @path = path || "#{username}.json"
44
+ @path = path || "corpus/#{username}.json"
45
+
46
+ if File.directory?(@path)
47
+ @path = File.join(@path, "#{username}.json")
48
+ end
49
+
47
50
  @client = client || make_client
48
51
 
49
52
  if File.exists?(@path)
@@ -1,180 +1,414 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: utf-8
3
2
  require 'twitter'
4
- require 'tweetstream'
5
3
  require 'rufus/scheduler'
6
4
 
7
5
  module Ebooks
8
- class Bot
9
- attr_accessor :consumer_key, :consumer_secret,
10
- :oauth_token, :oauth_token_secret
6
+ class ConfigurationError < Exception
7
+ end
11
8
 
12
- attr_accessor :username
9
+ # Represents a single reply tree of tweets
10
+ class Conversation
11
+ attr_reader :last_update
13
12
 
14
- attr_reader :twitter, :stream
13
+ # @param bot [Ebooks::Bot]
14
+ def initialize(bot)
15
+ @bot = bot
16
+ @tweets = []
17
+ @last_update = Time.now
18
+ end
15
19
 
16
- @@all = [] # List of all defined bots
17
- def self.all; @@all; end
20
+ # @param tweet [Twitter::Tweet] tweet to add
21
+ def add(tweet)
22
+ @tweets << tweet
23
+ @last_update = Time.now
24
+ end
25
+
26
+ # Make an informed guess as to whether a user is a bot based
27
+ # on their behavior in this conversation
28
+ def is_bot?(username)
29
+ usertweets = @tweets.select { |t| t.user.screen_name == username }
18
30
 
19
- def self.get(name)
20
- all.find { |bot| bot.username == name }
31
+ if usertweets.length > 2
32
+ if (usertweets[-1].created_at - usertweets[-3].created_at) < 30
33
+ return true
34
+ end
35
+ end
36
+
37
+ username.include?("ebooks")
21
38
  end
22
39
 
23
- def initialize(username, &b)
24
- # Set defaults
25
- @username = username
40
+ # Figure out whether to keep this user in the reply prefix
41
+ # We want to avoid spamming non-participating users
42
+ def can_include?(username)
43
+ @tweets.length <= 4 ||
44
+ !@tweets[-4..-1].select { |t| t.user.screen_name == username }.empty?
45
+ end
46
+ end
26
47
 
27
- # Override with callback
28
- b.call(self)
48
+ # Meta information about a tweet that we calculate for ourselves
49
+ class TweetMeta
50
+ # @return [Array<String>] usernames mentioned in tweet
51
+ attr_accessor :mentions
52
+ # @return [String] text of tweets with mentions removed
53
+ attr_accessor :mentionless
54
+ # @return [Array<String>] usernames to include in a reply
55
+ attr_accessor :reply_mentions
56
+ # @return [String] mentions to start reply with
57
+ attr_accessor :reply_prefix
58
+ # @return [Integer] available chars for reply
59
+ attr_accessor :limit
60
+
61
+ # @return [Ebooks::Bot] associated bot
62
+ attr_accessor :bot
63
+ # @return [Twitter::Tweet] associated tweet
64
+ attr_accessor :tweet
65
+
66
+ # Check whether this tweet mentions our bot
67
+ # @return [Boolean]
68
+ def mentions_bot?
69
+ # To check if this is someone talking to us, ensure:
70
+ # - The tweet mentions list contains our username
71
+ # - The tweet is not being retweeted by somebody else
72
+ # - Or soft-retweeted by somebody else
73
+ @mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.start_with?('RT ')
74
+ end
75
+
76
+ # @param bot [Ebooks::Bot]
77
+ # @param ev [Twitter::Tweet]
78
+ def initialize(bot, ev)
79
+ @bot = bot
80
+ @tweet = ev
81
+
82
+ @mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
83
+
84
+ # Process mentions to figure out who to reply to
85
+ # i.e. not self and nobody who has seen too many secondary mentions
86
+ reply_mentions = @mentions.reject do |m|
87
+ username = m.downcase
88
+ username == @bot.username || !@bot.conversation(ev).can_include?(username)
89
+ end
90
+ @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
91
+
92
+ @reply_prefix = @reply_mentions.map { |m| '@'+m }.join(' ') + ' '
93
+ @limit = 140 - @reply_prefix.length
29
94
 
30
- Bot.all.push(self)
95
+ mless = ev.text
96
+ begin
97
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
98
+ last = mless[entity[:indices][1]..-1]||''
99
+ mless = mless[0...entity[:indices][0]] + last.strip
100
+ end
101
+ rescue Exception
102
+ p ev.attrs[:entities][:user_mentions]
103
+ p ev.text
104
+ raise
105
+ end
106
+ @mentionless = mless
31
107
  end
108
+ end
32
109
 
110
+ class Bot
111
+ # @return [String] OAuth consumer key for a Twitter app
112
+ attr_accessor :consumer_key
113
+ # @return [String] OAuth consumer secret for a Twitter app
114
+ attr_accessor :consumer_secret
115
+ # @return [String] OAuth access token from `ebooks auth`
116
+ attr_accessor :access_token
117
+ # @return [String] OAuth access secret from `ebooks auth`
118
+ attr_accessor :access_token_secret
119
+ # @return [String] Twitter username of bot
120
+ attr_accessor :username
121
+ # @return [Array<String>] list of usernames to block on contact
122
+ attr_accessor :blacklist
123
+ # @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
124
+ attr_accessor :conversations
125
+ # @return [Range, Integer] range of seconds to delay in delay method
126
+ attr_accessor :delay_range
127
+
128
+ # @return [Array] list of all defined bots
129
+ def self.all; @@all ||= []; end
130
+
131
+ # Fetches a bot by username
132
+ # @param username [String]
133
+ # @return [Ebooks::Bot]
134
+ def self.get(username)
135
+ all.find { |bot| bot.username == username }
136
+ end
137
+
138
+ # Logs info to stdout in the context of this bot
33
139
  def log(*args)
34
- STDOUT.puts "@#{@username}: " + args.map(&:to_s).join(' ')
140
+ STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
35
141
  STDOUT.flush
36
142
  end
37
143
 
38
- def configure
39
- TweetStream.configure do |config|
144
+ # Initializes and configures bot
145
+ # @param args Arguments passed to configure method
146
+ # @param b Block to call with new bot
147
+ def initialize(username, &b)
148
+ @blacklist ||= []
149
+ @conversations ||= {}
150
+ # Tweet ids we've already observed, to avoid duplication
151
+ @seen_tweets ||= {}
152
+
153
+ @username = username
154
+ configure
155
+
156
+ b.call(self) unless b.nil?
157
+ Bot.all << self
158
+ end
159
+
160
+ # Find or create the conversation context for this tweet
161
+ # @param tweet [Twitter::Tweet]
162
+ # @return [Ebooks::Conversation]
163
+ def conversation(tweet)
164
+ conv = if tweet.in_reply_to_status_id?
165
+ @conversations[tweet.in_reply_to_status_id]
166
+ end
167
+
168
+ if conv.nil?
169
+ conv = @conversations[tweet.id] || Conversation.new(self)
170
+ end
171
+
172
+ if tweet.in_reply_to_status_id?
173
+ @conversations[tweet.in_reply_to_status_id] = conv
174
+ end
175
+ @conversations[tweet.id] = conv
176
+
177
+ # Expire any old conversations to prevent memory growth
178
+ @conversations.each do |k,v|
179
+ if v != conv && Time.now - v.last_update > 3600
180
+ @conversations.delete(k)
181
+ end
182
+ end
183
+
184
+ conv
185
+ end
186
+
187
+ # @return [Twitter::REST::Client] underlying REST client from twitter gem
188
+ def twitter
189
+ @twitter ||= Twitter::REST::Client.new do |config|
40
190
  config.consumer_key = @consumer_key
41
191
  config.consumer_secret = @consumer_secret
42
- config.oauth_token = @oauth_token
43
- config.oauth_token_secret = @oauth_token_secret
192
+ config.access_token = @access_token
193
+ config.access_token_secret = @access_token_secret
44
194
  end
195
+ end
45
196
 
46
- Twitter.configure do |config|
197
+ # @return [Twitter::Streaming::Client] underlying streaming client from twitter gem
198
+ def stream
199
+ @stream ||= Twitter::Streaming::Client.new do |config|
47
200
  config.consumer_key = @consumer_key
48
201
  config.consumer_secret = @consumer_secret
49
- config.oauth_token = @oauth_token
50
- config.oauth_token_secret = @oauth_token_secret
202
+ config.access_token = @access_token
203
+ config.access_token_secret = @access_token_secret
204
+ end
205
+ end
206
+
207
+ # Calculate some meta information about a tweet relevant for replying
208
+ # @param ev [Twitter::Tweet]
209
+ # @return [Ebooks::TweetMeta]
210
+ def meta(ev)
211
+ TweetMeta.new(self, ev)
212
+ end
213
+
214
+ # Receive an event from the twitter stream
215
+ # @param ev [Object] Twitter streaming event
216
+ def receive_event(ev)
217
+ if ev.is_a? Array # Initial array sent on first connection
218
+ log "Online!"
219
+ return
51
220
  end
52
221
 
53
- @twitter = Twitter::Client.new
222
+ if ev.is_a? Twitter::DirectMessage
223
+ return if ev.sender.screen_name == @username # Don't reply to self
224
+ log "DM from @#{ev.sender.screen_name}: #{ev.text}"
225
+ fire(:direct_message, ev)
54
226
 
55
- needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
227
+ elsif ev.respond_to?(:name) && ev.name == :follow
228
+ return if ev.source.screen_name == @username
229
+ log "Followed by #{ev.source.screen_name}"
230
+ fire(:follow, ev.source)
56
231
 
57
- @stream = TweetStream::Client.new if needs_stream
58
- end
232
+ elsif ev.is_a? Twitter::Tweet
233
+ return unless ev.text # If it's not a text-containing tweet, ignore it
234
+ return if ev.user.screen_name == @username # Ignore our own tweets
59
235
 
60
- # Connects to tweetstream and opens event handlers for this bot
61
- def start
62
- configure
236
+ meta = meta(ev)
63
237
 
64
- @on_startup.call if @on_startup
238
+ if blacklisted?(ev.user.screen_name)
239
+ log "Blocking blacklisted user @#{ev.user.screen_name}"
240
+ @twitter.block(ev.user.screen_name)
241
+ end
65
242
 
66
- if not @stream
67
- log "not bothering with stream for #@username"
68
- return
69
- end
243
+ # Avoid responding to duplicate tweets
244
+ if @seen_tweets[ev.id]
245
+ log "Not firing event for duplicate tweet #{ev.id}"
246
+ return
247
+ else
248
+ @seen_tweets[ev.id] = true
249
+ end
70
250
 
71
- log "starting stream for #@username"
72
- @stream.on_error do |msg|
73
- log "ERROR: #{msg}"
74
- end
251
+ if meta.mentions_bot?
252
+ log "Mention from @#{ev.user.screen_name}: #{ev.text}"
253
+ conversation(ev).add(ev)
254
+ fire(:mention, ev)
255
+ else
256
+ fire(:timeline, ev)
257
+ end
75
258
 
76
- @stream.on_inited do
77
- log "Online!"
259
+ elsif ev.is_a?(Twitter::Streaming::DeletedTweet) ||
260
+ ev.is_a?(Twitter::Streaming::Event)
261
+ # pass
262
+ else
263
+ log ev
78
264
  end
265
+ end
79
266
 
80
- @stream.on_event(:follow) do |event|
81
- next if event[:source][:screen_name] == @username
82
- log "Followed by #{event[:source][:screen_name]}"
83
- @on_follow.call(event[:source]) if @on_follow
267
+ # Configures client and fires startup event
268
+ def prepare
269
+ # Sanity check
270
+ if @username.nil?
271
+ raise ConfigurationError, "bot username cannot be nil"
84
272
  end
85
273
 
86
- @stream.on_direct_message do |dm|
87
- next if dm[:sender][:screen_name] == @username # Don't reply to self
88
- log "DM from @#{dm[:sender][:screen_name]}: #{dm[:text]}"
89
- @on_message.call(dm) if @on_message
274
+ if @consumer_key.nil? || @consumer_key.empty? ||
275
+ @consumer_secret.nil? || @consumer_key.empty?
276
+ log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
277
+ exit 1
90
278
  end
91
279
 
92
- @stream.userstream do |ev|
93
- next unless ev[:text] # If it's not a text-containing tweet, ignore it
94
- next if ev[:user][:screen_name] == @username # Ignore our own tweets
280
+ if @access_token.nil? || @access_token.empty? ||
281
+ @access_token_secret.nil? || @access_token_secret.empty?
282
+ log "Missing access_token or access_token_secret. Please run `ebooks auth`."
283
+ exit 1
284
+ end
95
285
 
96
- meta = {}
97
- mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
286
+ twitter
287
+ fire(:startup)
288
+ end
98
289
 
99
- reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
100
- reply_mentions = [ev[:user][:screen_name]] + reply_mentions
290
+ # Start running user event stream
291
+ def start
292
+ log "starting tweet stream"
101
293
 
102
- meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
103
- meta[:limit] = 140 - meta[:reply_prefix].length
294
+ stream.user do |ev|
295
+ receive_event ev
296
+ end
297
+ end
104
298
 
105
- mless = ev[:text]
106
- begin
107
- ev.attrs[:entities][:user_mentions].reverse.each do |entity|
108
- last = mless[entity[:indices][1]..-1]||''
109
- mless = mless[0...entity[:indices][0]] + last.strip
110
- end
111
- rescue Exception
112
- p ev.attrs[:entities][:user_mentions]
113
- p ev[:text]
114
- raise
115
- end
116
- meta[:mentionless] = mless
117
-
118
- # To check if this is a mention, ensure:
119
- # - The tweet mentions list contains our username
120
- # - The tweet is not being retweeted by somebody else
121
- # - Or soft-retweeted by somebody else
122
- if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
123
- log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
124
- @on_mention.call(ev, meta) if @on_mention
125
- else
126
- @on_timeline.call(ev, meta) if @on_timeline
127
- end
299
+ # Fire an event
300
+ # @param event [Symbol] event to fire
301
+ # @param args arguments for event handler
302
+ def fire(event, *args)
303
+ handler = "on_#{event}".to_sym
304
+ if respond_to? handler
305
+ self.send(handler, *args)
128
306
  end
129
307
  end
130
308
 
131
- # Wrapper for EM.add_timer
132
- # Delays add a greater sense of humanity to bot behaviour
133
- def delay(time, &b)
134
- time = time.to_a.sample unless time.is_a? Integer
135
- EM.add_timer(time, &b)
309
+ # Delay an action for a variable period of time
310
+ # @param range [Range, Integer] range of seconds to choose for delay
311
+ def delay(range=@delay_range, &b)
312
+ time = range.to_a.sample unless range.is_a? Integer
313
+ sleep time
314
+ b.call
315
+ end
316
+
317
+ # Check if a username is blacklisted
318
+ # @param username [String]
319
+ # @return [Boolean]
320
+ def blacklisted?(username)
321
+ if @blacklist.include?(username)
322
+ true
323
+ else
324
+ false
325
+ end
136
326
  end
137
327
 
138
328
  # Reply to a tweet or a DM.
139
- # Applies configurable @reply_delay range
329
+ # @param ev [Twitter::Tweet, Twitter::DirectMessage]
330
+ # @param text [String] contents of reply excluding reply_prefix
331
+ # @param opts [Hash] additional params to pass to twitter gem
140
332
  def reply(ev, text, opts={})
141
333
  opts = opts.clone
142
334
 
143
335
  if ev.is_a? Twitter::DirectMessage
144
- log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
145
- @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
336
+ log "Sending DM to @#{ev.sender.screen_name}: #{text}"
337
+ twitter.create_direct_message(ev.sender.screen_name, text, opts)
146
338
  elsif ev.is_a? Twitter::Tweet
147
- log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
148
- @twitter.update(text, in_reply_to_status_id: ev[:id])
339
+ meta = meta(ev)
340
+
341
+ if conversation(ev).is_bot?(ev.user.screen_name)
342
+ log "Not replying to suspected bot @#{ev.user.screen_name}"
343
+ return false
344
+ end
345
+
346
+ log "Replying to @#{ev.user.screen_name} with: #{meta.reply_prefix + text}"
347
+ tweet = twitter.update(meta.reply_prefix + text, in_reply_to_status_id: ev.id)
348
+ conversation(tweet).add(tweet)
349
+ tweet
149
350
  else
150
351
  raise Exception("Don't know how to reply to a #{ev.class}")
151
352
  end
152
353
  end
153
354
 
154
- def scheduler
155
- @scheduler ||= Rufus::Scheduler.new
355
+ # Favorite a tweet
356
+ # @param tweet [Twitter::Tweet]
357
+ def favorite(tweet)
358
+ log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
359
+
360
+ begin
361
+ twitter.favorite(tweet.id)
362
+ rescue Twitter::Error::Forbidden
363
+ log "Already favorited: #{tweet.user.screen_name}: #{tweet.text}"
364
+ end
365
+ end
366
+
367
+ # Retweet a tweet
368
+ # @param tweet [Twitter::Tweet]
369
+ def retweet(tweet)
370
+ log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
371
+
372
+ begin
373
+ twitter.retweet(tweet.id)
374
+ rescue Twitter::Error::Forbidden
375
+ log "Already retweeted: #{tweet.user.screen_name}: #{tweet.text}"
376
+ end
377
+ end
378
+
379
+ # Follow a user
380
+ # @param user [String] username or user id
381
+ def follow(user, *args)
382
+ log "Following #{user}"
383
+ twitter.follow(user, *args)
384
+ end
385
+
386
+ # Unfollow a user
387
+ # @param user [String] username or user id
388
+ def unfollow(user, *args)
389
+ log "Unfollowing #{user}"
390
+ twiter.unfollow(user, *args)
156
391
  end
157
392
 
158
- def follow(*args)
159
- log "Following #{args}"
160
- @twitter.follow(*args)
393
+ # Tweet something
394
+ # @param text [String]
395
+ def tweet(text, *args)
396
+ log "Tweeting '#{text}'"
397
+ twitter.update(text, *args)
161
398
  end
162
399
 
163
- def tweet(*args)
164
- log "Tweeting #{args.inspect}"
165
- @twitter.update(*args)
400
+ # Get a scheduler for this bot
401
+ # @return [Rufus::Scheduler]
402
+ def scheduler
403
+ @scheduler ||= Rufus::Scheduler.new
166
404
  end
167
405
 
168
- # could easily just be *args however the separation keeps it clean.
406
+ # Tweet some text with an image
407
+ # @param txt [String]
408
+ # @param pic [String] filename
169
409
  def pictweet(txt, pic, *args)
170
410
  log "Tweeting #{txt.inspect} - #{pic} #{args}"
171
- @twitter.update_with_media(txt, File.new(pic), *args)
411
+ twitter.update_with_media(txt, File.new(pic), *args)
172
412
  end
173
-
174
- def on_startup(&b); @on_startup = b; end
175
- def on_follow(&b); @on_follow = b; end
176
- def on_mention(&b); @on_mention = b; end
177
- def on_timeline(&b); @on_timeline = b; end
178
- def on_message(&b); @on_message = b; end
179
413
  end
180
414
  end