twitter_ebooks_poll 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'json'
6
+
7
+ CONFIG_PATH = "#{ENV['HOME']}/.ebooksrc"
8
+
9
+ module Ebooks
10
+ class Archive
11
+ attr_reader :tweets
12
+
13
+ def make_client
14
+ if File.exists?(CONFIG_PATH)
15
+ @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
16
+ else
17
+ @config = {}
18
+
19
+ puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
20
+ print "Consumer key: "
21
+ @config[:consumer_key] = STDIN.gets.chomp
22
+ print "Consumer secret: "
23
+ @config[:consumer_secret] = STDIN.gets.chomp
24
+ print "Access token: "
25
+ @config[:oauth_token] = STDIN.gets.chomp
26
+ print "Access secret: "
27
+ @config[:oauth_token_secret] = STDIN.gets.chomp
28
+
29
+ File.open(CONFIG_PATH, 'w') do |f|
30
+ f.write(JSON.pretty_generate(@config))
31
+ end
32
+ end
33
+
34
+ Twitter::REST::Client.new do |config|
35
+ config.consumer_key = @config[:consumer_key]
36
+ config.consumer_secret = @config[:consumer_secret]
37
+ config.access_token = @config[:oauth_token]
38
+ config.access_token_secret = @config[:oauth_token_secret]
39
+ end
40
+ end
41
+
42
+ def initialize(username, path=nil, client=nil)
43
+ @username = username
44
+ @path = path || "corpus/#{username}.json"
45
+
46
+ if File.directory?(@path)
47
+ @path = File.join(@path, "#{username}.json")
48
+ end
49
+
50
+ @client = client || make_client
51
+
52
+ if (File.exists?(@path) && !File.zero?(@path))
53
+ @filetext = File.read(@path, :encoding => 'utf-8')
54
+ @tweets = JSON.parse(@filetext, symbolize_names: true)
55
+ log "Currently #{@tweets.length} tweets for #{@username}"
56
+ else
57
+ @tweets.nil?
58
+ log "New archive for @#{username} at #{@path}"
59
+ end
60
+ end
61
+
62
+ def sync
63
+ # We use this structure to ensure that
64
+ # a) if there's an issue opening the file, we error out before download
65
+ # b) if there's an issue during download we restore the original
66
+ File.open(@path, 'w') do |file|
67
+ begin
68
+ sync_to(file)
69
+ rescue Exception
70
+ file.seek(0)
71
+ file.write(@filetext)
72
+ raise
73
+ end
74
+ end
75
+ end
76
+
77
+ def sync_to(file)
78
+ retries = 0
79
+ tweets = []
80
+ max_id = nil
81
+
82
+ opts = {
83
+ count: 200,
84
+ #include_rts: false,
85
+ trim_user: true
86
+ }
87
+
88
+ opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
89
+
90
+ loop do
91
+ opts[:max_id] = max_id unless max_id.nil?
92
+ begin
93
+ new = @client.user_timeline(@username, opts)
94
+ rescue Twitter::Error::TooManyRequests
95
+ log "Rate limit exceeded. Waiting for 5 mins before retry."
96
+ sleep 60*5
97
+ retry
98
+ end
99
+ break if new.length <= 1
100
+ tweets += new
101
+ log "Received #{tweets.length} new tweets"
102
+ max_id = new.last.id
103
+ end
104
+
105
+ if tweets.length == 0
106
+ log "No new tweets"
107
+ else
108
+ @tweets ||= []
109
+ @tweets = tweets.map(&:attrs).each { |tw|
110
+ tw.delete(:entities)
111
+ } + @tweets
112
+ end
113
+ file.write(JSON.pretty_generate(@tweets))
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,521 @@
1
+ # encoding: utf-8
2
+ require 'twitter'
3
+ require 'rufus/scheduler'
4
+
5
+ # Monkeypatch hack to fix upstream dependency issue
6
+ # https://github.com/sferik/twitter/issues/709
7
+ class HTTP::URI
8
+ def port
9
+ 443 if self.https?
10
+ end
11
+ end
12
+
13
+ module Ebooks
14
+ class ConfigurationError < Exception
15
+ end
16
+
17
+ # Represents a single reply tree of tweets
18
+ class Conversation
19
+ attr_reader :last_update
20
+
21
+ # @param bot [Ebooks::Bot]
22
+ def initialize(bot)
23
+ @bot = bot
24
+ @tweets = []
25
+ @last_update = Time.now
26
+ end
27
+
28
+ # @param tweet [Twitter::Tweet] tweet to add
29
+ def add(tweet)
30
+ @tweets << tweet
31
+ @last_update = Time.now
32
+ end
33
+
34
+ # Make an informed guess as to whether a user is a bot based
35
+ # on their behavior in this conversation
36
+ def is_bot?(username)
37
+ usertweets = @tweets.select { |t| t.user.screen_name.downcase == username.downcase }
38
+
39
+ if usertweets.length > 2
40
+ if username.include?('ebooks') || (usertweets[-1].created_at - usertweets[-3].created_at) < 12
41
+ return true
42
+ end
43
+ end
44
+ end
45
+
46
+ # Figure out whether to keep this user in the reply prefix
47
+ # We want to avoid spamming non-participating users
48
+ def can_include?(username)
49
+ @tweets.length <= 4 ||
50
+ !@tweets.select { |t| t.user.screen_name.downcase == username.downcase }.empty?
51
+ end
52
+ end
53
+
54
+ # Meta information about a tweet that we calculate for ourselves
55
+ class TweetMeta
56
+ # @return [Array<String>] usernames mentioned in tweet
57
+ attr_accessor :mentions
58
+ # @return [String] text of tweets with mentions removed
59
+ attr_accessor :mentionless
60
+ # @return [Array<String>] usernames to include in a reply
61
+ attr_accessor :reply_mentions
62
+ # @return [String] mentions to start reply with
63
+ attr_accessor :reply_prefix
64
+ # @return [Integer] available chars for reply
65
+ attr_accessor :limit
66
+
67
+ # @return [Ebooks::Bot] associated bot
68
+ attr_accessor :bot
69
+ # @return [Twitter::Tweet] associated tweet
70
+ attr_accessor :tweet
71
+
72
+ # Check whether this tweet mentions our bot
73
+ # @return [Boolean]
74
+ def mentions_bot?
75
+ # To check if this is someone talking to us, ensure:
76
+ # - The tweet mentions list contains our username
77
+ # - The tweet is not being retweeted by somebody else
78
+ # - Or soft-retweeted by somebody else
79
+ @mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.match(/([`'‘’"“”]|RT|via|by|from)\s*@/i)
80
+ end
81
+
82
+ # @param bot [Ebooks::Bot]
83
+ # @param ev [Twitter::Tweet]
84
+ def initialize(bot, ev)
85
+ @bot = bot
86
+ @tweet = ev
87
+
88
+ @mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
89
+
90
+ # Process mentions to figure out who to reply to
91
+ # i.e. not self and nobody who has seen too many secondary mentions
92
+ reply_mentions = @mentions.reject do |m|
93
+ m.downcase == @bot.username.downcase || !@bot.conversation(ev).can_include?(m)
94
+ end
95
+ @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
96
+
97
+ @reply_prefix = @reply_mentions.map { |m| '@'+m }.join(' ') + ' '
98
+ @limit = 140 - @reply_prefix.length
99
+
100
+ mless = ev.text
101
+ begin
102
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
103
+ last = mless[entity[:indices][1]..-1]||''
104
+ mless = mless[0...entity[:indices][0]] + last.strip
105
+ end
106
+ rescue Exception
107
+ p ev.attrs[:entities][:user_mentions]
108
+ p ev.text
109
+ raise
110
+ end
111
+ @mentionless = mless
112
+ end
113
+
114
+ # Get an array of media uris in tweet.
115
+ # @param size [String] A twitter image size to return. Supported sizes are thumb, small, medium (default), large
116
+ # @return [Array<String>] image URIs included in tweet
117
+ def media_uris(size_input = '')
118
+ case size_input
119
+ when 'thumb'
120
+ size = ':thumb'
121
+ when 'small'
122
+ size = ':small'
123
+ when 'medium'
124
+ size = ':medium'
125
+ when 'large'
126
+ size = ':large'
127
+ else
128
+ size = ''
129
+ end
130
+
131
+ # Start collecting uris.
132
+ uris = []
133
+ if @tweet.media?
134
+ @tweet.media.each do |each_media|
135
+ uris << each_media.media_url.to_s + size
136
+ end
137
+ end
138
+
139
+ # and that's pretty much it!
140
+ uris
141
+ end
142
+ end
143
+
144
+ class Bot
145
+ # @return [String] OAuth consumer key for a Twitter app
146
+ attr_accessor :consumer_key
147
+ # @return [String] OAuth consumer secret for a Twitter app
148
+ attr_accessor :consumer_secret
149
+ # @return [String] OAuth access token from `ebooks auth`
150
+ attr_accessor :access_token
151
+ # @return [String] OAuth access secret from `ebooks auth`
152
+ attr_accessor :access_token_secret
153
+ # @return [Twitter::User] Twitter user object of bot
154
+ attr_accessor :user
155
+ # @return [String] Twitter username of bot
156
+ attr_accessor :username
157
+ # @return [Array<String>] list of usernames to block on contact
158
+ attr_accessor :blacklist
159
+ # @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
160
+ attr_accessor :conversations
161
+ # @return [Range, Integer] range of seconds to delay in delay method
162
+ attr_accessor :delay_range
163
+
164
+ # @return [Array] list of all defined bots
165
+ def self.all; @@all ||= []; end
166
+
167
+ # Fetches a bot by username
168
+ # @param username [String]
169
+ # @return [Ebooks::Bot]
170
+ def self.get(username)
171
+ all.find { |bot| bot.username.downcase == username.downcase }
172
+ end
173
+
174
+ # Logs info to stdout in the context of this bot
175
+ def log(*args)
176
+ STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
177
+ STDOUT.flush
178
+ end
179
+
180
+ # Initializes and configures bot
181
+ # @param args Arguments passed to configure method
182
+ # @param b Block to call with new bot
183
+ def initialize(username, &b)
184
+ @blacklist ||= []
185
+ @conversations ||= {}
186
+ # Tweet ids we've already observed, to avoid duplication
187
+ @seen_tweets ||= {}
188
+
189
+ @username = username
190
+ @delay_range ||= 1..6
191
+ configure
192
+
193
+ b.call(self) unless b.nil?
194
+ Bot.all << self
195
+ end
196
+
197
+ def configure
198
+ raise ConfigurationError, "Please override the 'configure' method for subclasses of Ebooks::Bot."
199
+ end
200
+
201
+ # Find or create the conversation context for this tweet
202
+ # @param tweet [Twitter::Tweet]
203
+ # @return [Ebooks::Conversation]
204
+ def conversation(tweet)
205
+ conv = if tweet.in_reply_to_status_id?
206
+ @conversations[tweet.in_reply_to_status_id]
207
+ end
208
+
209
+ if conv.nil?
210
+ conv = @conversations[tweet.id] || Conversation.new(self)
211
+ end
212
+
213
+ if tweet.in_reply_to_status_id?
214
+ @conversations[tweet.in_reply_to_status_id] = conv
215
+ end
216
+ @conversations[tweet.id] = conv
217
+
218
+ # Expire any old conversations to prevent memory growth
219
+ @conversations.each do |k,v|
220
+ if v != conv && Time.now - v.last_update > 3600
221
+ @conversations.delete(k)
222
+ end
223
+ end
224
+
225
+ conv
226
+ end
227
+
228
+ # @return [Twitter::REST::Client] underlying REST client from twitter gem
229
+ def twitter
230
+ @twitter ||= Twitter::REST::Client.new do |config|
231
+ config.consumer_key = @consumer_key
232
+ config.consumer_secret = @consumer_secret
233
+ config.access_token = @access_token
234
+ config.access_token_secret = @access_token_secret
235
+ end
236
+ end
237
+
238
+ # @return [Twitter::Streaming::Client] underlying streaming client from twitter gem
239
+ def stream
240
+ @stream ||= Twitter::Streaming::Client.new do |config|
241
+ config.consumer_key = @consumer_key
242
+ config.consumer_secret = @consumer_secret
243
+ config.access_token = @access_token
244
+ config.access_token_secret = @access_token_secret
245
+ end
246
+ end
247
+
248
+ # Calculate some meta information about a tweet relevant for replying
249
+ # @param ev [Twitter::Tweet]
250
+ # @return [Ebooks::TweetMeta]
251
+ def meta(ev)
252
+ TweetMeta.new(self, ev)
253
+ end
254
+
255
+ # Receive an event from the twitter stream
256
+ # @param ev [Object] Twitter streaming event
257
+ def receive_event(ev)
258
+ case ev
259
+ when Array # Initial array sent on first connection
260
+ log "Online!"
261
+ fire(:connect, ev)
262
+ return
263
+ when Twitter::DirectMessage
264
+ return if ev.sender.id == @user.id # Don't reply to self
265
+ log "DM from @#{ev.sender.screen_name}: #{ev.text}"
266
+ fire(:message, ev)
267
+ when Twitter::Tweet
268
+ return unless ev.text # If it's not a text-containing tweet, ignore it
269
+ return if ev.user.id == @user.id # Ignore our own tweets
270
+
271
+ if ev.retweet? && ev.retweeted_tweet.user.id == @user.id
272
+ # Someone retweeted our tweet!
273
+ fire(:retweet, ev)
274
+ return
275
+ end
276
+
277
+ meta = meta(ev)
278
+
279
+ if blacklisted?(ev.user.screen_name)
280
+ log "Blocking blacklisted user @#{ev.user.screen_name}"
281
+ @twitter.block(ev.user.screen_name)
282
+ end
283
+
284
+ # Avoid responding to duplicate tweets
285
+ if @seen_tweets[ev.id]
286
+ log "Not firing event for duplicate tweet #{ev.id}"
287
+ return
288
+ else
289
+ @seen_tweets[ev.id] = true
290
+ end
291
+
292
+ if meta.mentions_bot?
293
+ log "Mention from @#{ev.user.screen_name}: #{ev.text}"
294
+ conversation(ev).add(ev)
295
+ fire(:mention, ev)
296
+ else
297
+ fire(:timeline, ev)
298
+ end
299
+ when Twitter::Streaming::Event
300
+ case ev.name
301
+ when :follow
302
+ return if ev.source.id == @user.id
303
+ log "Followed by #{ev.source.screen_name}"
304
+ fire(:follow, ev.source)
305
+ when :favorite, :unfavorite
306
+ return if ev.source.id == @user.id # Ignore our own favorites
307
+ log "@#{ev.source.screen_name} #{ev.name.to_s}d: #{ev.target_object.text}"
308
+ fire(ev.name, ev.source, ev.target_object)
309
+ when :user_update
310
+ update_myself ev.source
311
+ end
312
+ when Twitter::Streaming::DeletedTweet
313
+ # Pass
314
+ else
315
+ log ev
316
+ end
317
+ end
318
+
319
+ # Updates @user and calls on_user_update.
320
+ def update_myself(new_me=twitter.user)
321
+ @user = new_me if @user.nil? || new_me.id == @user.id
322
+ @username = @user.screen_name
323
+ log 'User information updated'
324
+ fire(:user_update)
325
+ end
326
+
327
+ # Configures client and fires startup event
328
+ def prepare
329
+ # Sanity check
330
+ if @username.nil?
331
+ raise ConfigurationError, "bot username cannot be nil"
332
+ end
333
+
334
+ if @consumer_key.nil? || @consumer_key.empty? ||
335
+ @consumer_secret.nil? || @consumer_key.empty?
336
+ log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
337
+ exit 1
338
+ end
339
+
340
+ if @access_token.nil? || @access_token.empty? ||
341
+ @access_token_secret.nil? || @access_token_secret.empty?
342
+ log "Missing access_token or access_token_secret. Please run `ebooks auth`."
343
+ exit 1
344
+ end
345
+
346
+ # Save old name
347
+ old_name = username
348
+ # Load user object and actual username
349
+ update_myself
350
+ # Warn about mismatches unless it was clearly intentional
351
+ log "warning: bot expected to be @#{old_name} but connected to @#{username}" unless username == old_name || old_name.empty?
352
+
353
+ fire(:startup)
354
+ end
355
+
356
+ # Start polling timelines
357
+ def start
358
+ log "starting Twitter timeline polling"
359
+
360
+ latest_tweet = 0
361
+ latest_mention = 0
362
+ options_home = {count: 800}
363
+ options_mention = {count: 200}
364
+ persistence_file = "#{@username}.json"
365
+ # Read last polled tweets from a persisted file, if exists
366
+ if File.exist? persistence_file
367
+ json = JSON.parse(open(persistence_file, 'r').read)
368
+ latest_tweet = json['latest_tweet']
369
+ latest_mention = json['latest_mention']
370
+ options_home[:since_id] = latest_tweet
371
+ options_mention[:since_id] = latest_mention
372
+ log "starting home timeline after tweet ##{latest_tweet}"
373
+ log "starting mentions after tweet ##{latest_mention}"
374
+ end
375
+
376
+ # Poll home timeline every 70s (rate limit is 15 GETs/15min)
377
+ scheduler.every '70s' do
378
+ tweets = twitter.home_timeline(options_home)
379
+ log "#{tweets.size} new tweets in timeline"
380
+ tweets.each do |ev|
381
+ latest_tweet = ev.id if ev.id > latest_tweet
382
+ receive_event ev
383
+ options_home[:since_id] = latest_tweet
384
+ end
385
+ file = open(persistence_file, 'w')
386
+ file.puts({latest_tweet: latest_tweet, latest_mention: latest_mention}.to_json)
387
+ file.close
388
+ end
389
+
390
+ # Poll mentions timeline every 20s (rate limit is 75 GETs/15min)
391
+ scheduler.every '20s' do
392
+ mentions = twitter.mentions_timeline(options_mention)
393
+ log "#{mentions.size} new mentions in timeline"
394
+ mentions.each do |ev|
395
+ latest_mention = ev.id if ev.id > latest_mention
396
+ receive_event ev
397
+ options_mention[:since_id] = latest_mention
398
+ end
399
+ file = open(persistence_file, 'w')
400
+ file.puts({latest_tweet: latest_tweet, latest_mention: latest_mention}.to_json)
401
+ file.close
402
+ end
403
+ end
404
+
405
+ # Fire an event
406
+ # @param event [Symbol] event to fire
407
+ # @param args arguments for event handler
408
+ def fire(event, *args)
409
+ handler = "on_#{event}".to_sym
410
+ if respond_to? handler
411
+ self.send(handler, *args)
412
+ end
413
+ end
414
+
415
+ # Delay an action for a variable period of time
416
+ # @param range [Range, Integer] range of seconds to choose for delay
417
+ def delay(range=@delay_range, &b)
418
+ time = rand(range) unless range.is_a? Integer
419
+ sleep time
420
+ b.call
421
+ end
422
+
423
+ # Check if a username is blacklisted
424
+ # @param username [String]
425
+ # @return [Boolean]
426
+ def blacklisted?(username)
427
+ if @blacklist.map(&:downcase).include?(username.downcase)
428
+ true
429
+ else
430
+ false
431
+ end
432
+ end
433
+
434
+ # Reply to a tweet or a DM.
435
+ # @param ev [Twitter::Tweet, Twitter::DirectMessage]
436
+ # @param text [String] contents of reply excluding reply_prefix
437
+ # @param opts [Hash] additional params to pass to twitter gem
438
+ def reply(ev, text, opts={})
439
+ opts = opts.clone
440
+
441
+ if ev.is_a? Twitter::DirectMessage
442
+ log "Sending DM to @#{ev.sender.screen_name}: #{text}"
443
+ twitter.create_direct_message(ev.sender.screen_name, text, opts)
444
+ elsif ev.is_a? Twitter::Tweet
445
+ meta = meta(ev)
446
+
447
+ if conversation(ev).is_bot?(ev.user.screen_name)
448
+ log "Not replying to suspected bot @#{ev.user.screen_name}"
449
+ return false
450
+ end
451
+
452
+ text = meta.reply_prefix + text unless text.match(/@#{Regexp.escape ev.user.screen_name}/i)
453
+ log "Replying to @#{ev.user.screen_name} with: #{text}"
454
+ tweet = twitter.update(text, opts.merge(in_reply_to_status_id: ev.id))
455
+ conversation(tweet).add(tweet)
456
+ tweet
457
+ else
458
+ raise Exception("Don't know how to reply to a #{ev.class}")
459
+ end
460
+ end
461
+
462
+ # Favorite a tweet
463
+ # @param tweet [Twitter::Tweet]
464
+ def favorite(tweet)
465
+ log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
466
+
467
+ begin
468
+ twitter.favorite(tweet.id)
469
+ rescue Twitter::Error::Forbidden
470
+ log "Already favorited: #{tweet.user.screen_name}: #{tweet.text}"
471
+ end
472
+ end
473
+
474
+ # Retweet a tweet
475
+ # @param tweet [Twitter::Tweet]
476
+ def retweet(tweet)
477
+ log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
478
+
479
+ begin
480
+ twitter.retweet(tweet.id)
481
+ rescue Twitter::Error::Forbidden
482
+ log "Already retweeted: #{tweet.user.screen_name}: #{tweet.text}"
483
+ end
484
+ end
485
+
486
+ # Follow a user
487
+ # @param user [String] username or user id
488
+ def follow(user, *args)
489
+ log "Following #{user}"
490
+ twitter.follow(user, *args)
491
+ end
492
+
493
+ # Unfollow a user
494
+ # @param user [String] username or user id
495
+ def unfollow(user, *args)
496
+ log "Unfollowing #{user}"
497
+ twitter.unfollow(user, *args)
498
+ end
499
+
500
+ # Tweet something
501
+ # @param text [String]
502
+ def tweet(text, *args)
503
+ log "Tweeting '#{text}'"
504
+ twitter.update(text, *args)
505
+ end
506
+
507
+ # Get a scheduler for this bot
508
+ # @return [Rufus::Scheduler]
509
+ def scheduler
510
+ @scheduler ||= Rufus::Scheduler.new
511
+ end
512
+
513
+ # Tweet some text with an image
514
+ # @param txt [String]
515
+ # @param pic [String] filename
516
+ def pictweet(txt, pic, *args)
517
+ log "Tweeting #{txt.inspect} - #{pic} #{args}"
518
+ twitter.update_with_media(txt, File.new(pic), *args)
519
+ end
520
+ end
521
+ end