bot_twitter_ebooks 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ $debug = false
2
+
3
+ def log(*args)
4
+ STDERR.print args.map(&:to_s).join(' ') + "\n"
5
+ STDERR.flush
6
+ end
7
+
8
+ module Ebooks
9
+ GEM_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
10
+ DATA_PATH = File.join(GEM_PATH, 'data')
11
+ SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
12
+ TEST_PATH = File.join(GEM_PATH, 'test')
13
+ TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/elonmusk.tweets')
14
+ INTERIM = :interim
15
+ end
16
+
17
+ require 'bot_twitter_ebooks/nlp'
18
+ require 'bot_twitter_ebooks/archive'
19
+ require 'bot_twitter_ebooks/sync'
20
+ require 'bot_twitter_ebooks/suffix'
21
+ require 'bot_twitter_ebooks/model'
22
+ require 'bot_twitter_ebooks/bot'
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'json'
6
+
7
+ CONFIG_PATH = "#{ENV['HOME']}/.ebooksrc"
8
+
9
+ module Ebooks
10
+ class Archive
11
+ attr_reader :tweets
12
+
13
+ def make_client
14
+ if File.exists?(CONFIG_PATH)
15
+ @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
16
+ else
17
+ @config = {}
18
+
19
+ puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
20
+ print "Consumer key: "
21
+ @config[:consumer_key] = STDIN.gets.chomp
22
+ print "Consumer secret: "
23
+ @config[:consumer_secret] = STDIN.gets.chomp
24
+ print "Access token: "
25
+ @config[:oauth_token] = STDIN.gets.chomp
26
+ print "Access secret: "
27
+ @config[:oauth_token_secret] = STDIN.gets.chomp
28
+
29
+ File.open(CONFIG_PATH, 'w') do |f|
30
+ f.write(JSON.pretty_generate(@config))
31
+ end
32
+ end
33
+
34
+ Twitter::REST::Client.new do |config|
35
+ config.consumer_key = @config[:consumer_key]
36
+ config.consumer_secret = @config[:consumer_secret]
37
+ config.access_token = @config[:oauth_token]
38
+ config.access_token_secret = @config[:oauth_token_secret]
39
+ end
40
+ end
41
+
42
+ def initialize(username, path=nil, client=nil)
43
+ @username = username
44
+ @path = path || "corpus/#{username}.json"
45
+
46
+ if File.directory?(@path)
47
+ @path = File.join(@path, "#{username}.json")
48
+ end
49
+
50
+ @client = client || make_client
51
+
52
+ if (File.exists?(@path) && !File.zero?(@path))
53
+ @filetext = File.read(@path, :encoding => 'utf-8')
54
+ @tweets = JSON.parse(@filetext, symbolize_names: true)
55
+ log "Currently #{@tweets.length} tweets for #{@username}"
56
+ else
57
+ @tweets.nil?
58
+ log "New archive for @#{username} at #{@path}"
59
+ end
60
+ end
61
+
62
+ def sync
63
+ # We use this structure to ensure that
64
+ # a) if there's an issue opening the file, we error out before download
65
+ # b) if there's an issue during download we restore the original
66
+ File.open(@path, 'w') do |file|
67
+ begin
68
+ sync_to(file)
69
+ rescue Exception
70
+ file.seek(0)
71
+ file.write(@filetext)
72
+ raise
73
+ end
74
+ end
75
+ end
76
+
77
+ def sync_to(file)
78
+ retries = 0
79
+ tweets = []
80
+ max_id = nil
81
+
82
+ opts = {
83
+ count: 200,
84
+ #include_rts: false,
85
+ tweet_mode: "extended",
86
+ trim_user: true
87
+ }
88
+
89
+ opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
90
+
91
+ loop do
92
+ opts[:max_id] = max_id unless max_id.nil?
93
+ begin
94
+ new = @client.user_timeline(@username, opts)
95
+ rescue Twitter::Error::TooManyRequests
96
+ log "Rate limit exceeded. Waiting for 15 mins before retry."
97
+ sleep 60*15
98
+ retry
99
+ end
100
+ break if new.length <= 1
101
+ tweets += new
102
+ log "Received #{tweets.length} new tweets"
103
+ max_id = new.last.id - 1
104
+ end
105
+
106
+ if tweets.length == 0
107
+ log "No new tweets"
108
+ else
109
+ @tweets ||= []
110
+ @tweets = tweets.map(&:attrs).each { |tw|
111
+ tw.replace({ text: tw[:full_text], id: tw[:id] })
112
+ } + @tweets
113
+ end
114
+ file.write(JSON.pretty_generate(@tweets))
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,481 @@
1
+ # encoding: utf-8
2
+ require 'twitter'
3
+ require 'rufus/scheduler'
4
+
5
+ # Monkeypatch hack to fix upstream dependency issue
6
+ # https://github.com/sferik/twitter/issues/709
7
+ class HTTP::URI
8
+ def port
9
+ 443 if self.https?
10
+ end
11
+ end
12
+
13
+ module Ebooks
14
+ class ConfigurationError < Exception
15
+ end
16
+
17
+ # Represents a single reply tree of tweets
18
+ class Conversation
19
+ attr_reader :last_update
20
+
21
+ # @param bot [Ebooks::Bot]
22
+ def initialize(bot)
23
+ @bot = bot
24
+ @tweets = []
25
+ @last_update = Time.now
26
+ end
27
+
28
+ # @param tweet [Twitter::Tweet] tweet to add
29
+ def add(tweet)
30
+ @tweets << tweet
31
+ @last_update = Time.now
32
+ end
33
+
34
+ # Make an informed guess as to whether a user is a bot based
35
+ # on their behavior in this conversation
36
+ def is_bot?(username)
37
+ usertweets = @tweets.select { |t| t.user.screen_name.downcase == username.downcase }
38
+
39
+ if usertweets.length > 2
40
+ if username.include?('ebooks') || (usertweets[-1].created_at - usertweets[-3].created_at) < 12
41
+ return true
42
+ end
43
+ end
44
+ end
45
+
46
+ # Figure out whether to keep this user in the reply prefix
47
+ # We want to avoid spamming non-participating users
48
+ def can_include?(username)
49
+ @tweets.length <= 4 ||
50
+ !@tweets.select { |t| t.user.screen_name.downcase == username.downcase }.empty?
51
+ end
52
+ end
53
+
54
+ # Meta information about a tweet that we calculate for ourselves
55
+ class TweetMeta
56
+ # @return [Array<String>] usernames mentioned in tweet
57
+ attr_accessor :mentions
58
+ # @return [String] text of tweets with mentions removed
59
+ attr_accessor :mentionless
60
+ # @return [Array<String>] usernames to include in a reply
61
+ attr_accessor :reply_mentions
62
+ # @return [String] mentions to start reply with
63
+ attr_accessor :reply_prefix
64
+ # @return [Integer] available chars for reply
65
+ attr_accessor :limit
66
+
67
+ # @return [Ebooks::Bot] associated bot
68
+ attr_accessor :bot
69
+ # @return [Twitter::Tweet] associated tweet
70
+ attr_accessor :tweet
71
+
72
+ # Check whether this tweet mentions our bot
73
+ # @return [Boolean]
74
+ def mentions_bot?
75
+ # To check if this is someone talking to us, ensure:
76
+ # - The tweet mentions list contains our username
77
+ # - The tweet is not being retweeted by somebody else
78
+ # - Or soft-retweeted by somebody else
79
+ @mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.match(/([`'‘’"“”]|RT|via|by|from)\s*@/i)
80
+ end
81
+
82
+ # @param bot [Ebooks::Bot]
83
+ # @param ev [Twitter::Tweet]
84
+ def initialize(bot, ev)
85
+ @bot = bot
86
+ @tweet = ev
87
+
88
+ @mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
89
+
90
+ # Process mentions to figure out who to reply to
91
+ # i.e. not self and nobody who has seen too many secondary mentions
92
+ reply_mentions = @mentions.reject do |m|
93
+ m.downcase == @bot.username.downcase || !@bot.conversation(ev).can_include?(m)
94
+ end
95
+ @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
96
+
97
+ @reply_prefix = @reply_mentions.map { |m| '@'+m }.join(' ') + ' '
98
+ @limit = 140 - @reply_prefix.length
99
+
100
+ mless = ev.text
101
+ begin
102
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
103
+ last = mless[entity[:indices][1]..-1]||''
104
+ mless = mless[0...entity[:indices][0]] + last.strip
105
+ end
106
+ rescue Exception
107
+ p ev.attrs[:entities][:user_mentions]
108
+ p ev.text
109
+ raise
110
+ end
111
+ @mentionless = mless
112
+ end
113
+
114
+ # Get an array of media uris in tweet.
115
+ # @param size [String] A twitter image size to return. Supported sizes are thumb, small, medium (default), large
116
+ # @return [Array<String>] image URIs included in tweet
117
+ def media_uris(size_input = '')
118
+ case size_input
119
+ when 'thumb'
120
+ size = ':thumb'
121
+ when 'small'
122
+ size = ':small'
123
+ when 'medium'
124
+ size = ':medium'
125
+ when 'large'
126
+ size = ':large'
127
+ else
128
+ size = ''
129
+ end
130
+
131
+ # Start collecting uris.
132
+ uris = []
133
+ if @tweet.media?
134
+ @tweet.media.each do |each_media|
135
+ uris << each_media.media_url.to_s + size
136
+ end
137
+ end
138
+
139
+ # and that's pretty much it!
140
+ uris
141
+ end
142
+ end
143
+
144
+ class Bot
145
+ # @return [String] OAuth consumer key for a Twitter app
146
+ attr_accessor :consumer_key
147
+ # @return [String] OAuth consumer secret for a Twitter app
148
+ attr_accessor :consumer_secret
149
+ # @return [String] OAuth access token from `ebooks auth`
150
+ attr_accessor :access_token
151
+ # @return [String] OAuth access secret from `ebooks auth`
152
+ attr_accessor :access_token_secret
153
+ # @return [Twitter::User] Twitter user object of bot
154
+ attr_accessor :user
155
+ # @return [String] Twitter username of bot
156
+ attr_accessor :username
157
+ # @return [Array<String>] list of usernames to block on contact
158
+ attr_accessor :blacklist
159
+ # @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
160
+ attr_accessor :conversations
161
+ # @return [Range, Integer] range of seconds to delay in delay method
162
+ attr_accessor :delay_range
163
+
164
+ # @return [Array] list of all defined bots
165
+ def self.all; @@all ||= []; end
166
+
167
+ # Fetches a bot by username
168
+ # @param username [String]
169
+ # @return [Ebooks::Bot]
170
+ def self.get(username)
171
+ all.find { |bot| bot.username.downcase == username.downcase }
172
+ end
173
+
174
+ # Logs info to stdout in the context of this bot
175
+ def log(*args)
176
+ STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
177
+ STDOUT.flush
178
+ end
179
+
180
+ # Initializes and configures bot
181
+ # @param args Arguments passed to configure method
182
+ # @param b Block to call with new bot
183
+ def initialize(username, &b)
184
+ @blacklist ||= []
185
+ @conversations ||= {}
186
+ # Tweet ids we've already observed, to avoid duplication
187
+ @seen_tweets ||= {}
188
+
189
+ @username = username
190
+ @delay_range ||= 1..6
191
+ configure
192
+
193
+ b.call(self) unless b.nil?
194
+ Bot.all << self
195
+ end
196
+
197
+ def configure
198
+ raise ConfigurationError, "Please override the 'configure' method for subclasses of Ebooks::Bot."
199
+ end
200
+
201
+ # Find or create the conversation context for this tweet
202
+ # @param tweet [Twitter::Tweet]
203
+ # @return [Ebooks::Conversation]
204
+ def conversation(tweet)
205
+ conv = if tweet.in_reply_to_status_id?
206
+ @conversations[tweet.in_reply_to_status_id]
207
+ end
208
+
209
+ if conv.nil?
210
+ conv = @conversations[tweet.id] || Conversation.new(self)
211
+ end
212
+
213
+ if tweet.in_reply_to_status_id?
214
+ @conversations[tweet.in_reply_to_status_id] = conv
215
+ end
216
+ @conversations[tweet.id] = conv
217
+
218
+ # Expire any old conversations to prevent memory growth
219
+ @conversations.each do |k,v|
220
+ if v != conv && Time.now - v.last_update > 3600
221
+ @conversations.delete(k)
222
+ end
223
+ end
224
+
225
+ conv
226
+ end
227
+
228
+ # @return [Twitter::REST::Client] underlying REST client from twitter gem
229
+ def twitter
230
+ @twitter ||= Twitter::REST::Client.new do |config|
231
+ config.consumer_key = @consumer_key
232
+ config.consumer_secret = @consumer_secret
233
+ config.access_token = @access_token
234
+ config.access_token_secret = @access_token_secret
235
+ end
236
+ end
237
+
238
+ # @return [Twitter::Streaming::Client] underlying streaming client from twitter gem
239
+ def stream
240
+ @stream ||= Twitter::Streaming::Client.new do |config|
241
+ config.consumer_key = @consumer_key
242
+ config.consumer_secret = @consumer_secret
243
+ config.access_token = @access_token
244
+ config.access_token_secret = @access_token_secret
245
+ end
246
+ end
247
+
248
+ # Calculate some meta information about a tweet relevant for replying
249
+ # @param ev [Twitter::Tweet]
250
+ # @return [Ebooks::TweetMeta]
251
+ def meta(ev)
252
+ TweetMeta.new(self, ev)
253
+ end
254
+
255
+ # Receive an event from the twitter stream
256
+ # @param ev [Object] Twitter streaming event
257
+ def receive_event(ev)
258
+ case ev
259
+ when Array # Initial array sent on first connection
260
+ log "Online!"
261
+ fire(:connect, ev)
262
+ return
263
+ when Twitter::DirectMessage
264
+ return if ev.sender.id == @user.id # Don't reply to self
265
+ log "DM from @#{ev.sender.screen_name}: #{ev.text}"
266
+ fire(:message, ev)
267
+ when Twitter::Tweet
268
+ return unless ev.text # If it's not a text-containing tweet, ignore it
269
+ return if ev.user.id == @user.id # Ignore our own tweets
270
+
271
+ if ev.retweet? && ev.retweeted_tweet.user.id == @user.id
272
+ # Someone retweeted our tweet!
273
+ fire(:retweet, ev)
274
+ return
275
+ end
276
+
277
+ meta = meta(ev)
278
+
279
+ if blacklisted?(ev.user.screen_name)
280
+ log "Blocking blacklisted user @#{ev.user.screen_name}"
281
+ @twitter.block(ev.user.screen_name)
282
+ end
283
+
284
+ # Avoid responding to duplicate tweets
285
+ if @seen_tweets[ev.id]
286
+ log "Not firing event for duplicate tweet #{ev.id}"
287
+ return
288
+ else
289
+ @seen_tweets[ev.id] = true
290
+ end
291
+
292
+ if meta.mentions_bot?
293
+ log "Mention from @#{ev.user.screen_name}: #{ev.text}"
294
+ conversation(ev).add(ev)
295
+ fire(:mention, ev)
296
+ else
297
+ fire(:timeline, ev)
298
+ end
299
+ when Twitter::Streaming::Event
300
+ case ev.name
301
+ when :follow
302
+ return if ev.source.id == @user.id
303
+ log "Followed by #{ev.source.screen_name}"
304
+ fire(:follow, ev.source)
305
+ when :favorite, :unfavorite
306
+ return if ev.source.id == @user.id # Ignore our own favorites
307
+ log "@#{ev.source.screen_name} #{ev.name.to_s}d: #{ev.target_object.text}"
308
+ fire(ev.name, ev.source, ev.target_object)
309
+ when :user_update
310
+ update_myself ev.source
311
+ end
312
+ when Twitter::Streaming::DeletedTweet
313
+ # Pass
314
+ else
315
+ log ev
316
+ end
317
+ end
318
+
319
+ # Updates @user and calls on_user_update.
320
+ def update_myself(new_me=twitter.user)
321
+ @user = new_me if @user.nil? || new_me.id == @user.id
322
+ @username = @user.screen_name
323
+ log 'User information updated'
324
+ fire(:user_update)
325
+ end
326
+
327
+ # Configures client and fires startup event
328
+ def prepare
329
+ # Sanity check
330
+ if @username.nil?
331
+ raise ConfigurationError, "bot username cannot be nil"
332
+ end
333
+
334
+ if @consumer_key.nil? || @consumer_key.empty? ||
335
+ @consumer_secret.nil? || @consumer_key.empty?
336
+ log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
337
+ exit 1
338
+ end
339
+
340
+ if @access_token.nil? || @access_token.empty? ||
341
+ @access_token_secret.nil? || @access_token_secret.empty?
342
+ log "Missing access_token or access_token_secret. Please run `ebooks auth`."
343
+ exit 1
344
+ end
345
+
346
+ # Save old name
347
+ old_name = username
348
+ # Load user object and actual username
349
+ update_myself
350
+ # Warn about mismatches unless it was clearly intentional
351
+ log "warning: bot expected to be @#{old_name} but connected to @#{username}" unless username == old_name || old_name.empty?
352
+
353
+ fire(:startup)
354
+ end
355
+
356
+ # Start running user event stream
357
+ def start
358
+ log "starting tweet stream"
359
+
360
+ stream.user do |ev|
361
+ receive_event ev
362
+ end
363
+ end
364
+
365
+ # Fire an event
366
+ # @param event [Symbol] event to fire
367
+ # @param args arguments for event handler
368
+ def fire(event, *args)
369
+ handler = "on_#{event}".to_sym
370
+ if respond_to? handler
371
+ self.send(handler, *args)
372
+ end
373
+ end
374
+
375
+ # Delay an action for a variable period of time
376
+ # @param range [Range, Integer] range of seconds to choose for delay
377
+ def delay(range=@delay_range, &b)
378
+ time = rand(range) unless range.is_a? Integer
379
+ sleep time
380
+ b.call
381
+ end
382
+
383
+ # Check if a username is blacklisted
384
+ # @param username [String]
385
+ # @return [Boolean]
386
+ def blacklisted?(username)
387
+ if @blacklist.map(&:downcase).include?(username.downcase)
388
+ true
389
+ else
390
+ false
391
+ end
392
+ end
393
+
394
+ # Reply to a tweet or a DM.
395
+ # @param ev [Twitter::Tweet, Twitter::DirectMessage]
396
+ # @param text [String] contents of reply excluding reply_prefix
397
+ # @param opts [Hash] additional params to pass to twitter gem
398
+ def reply(ev, text, opts={})
399
+ opts = opts.clone
400
+
401
+ if ev.is_a? Twitter::DirectMessage
402
+ log "Sending DM to @#{ev.sender.screen_name}: #{text}"
403
+ twitter.create_direct_message(ev.sender.screen_name, text, opts)
404
+ elsif ev.is_a? Twitter::Tweet
405
+ meta = meta(ev)
406
+
407
+ if conversation(ev).is_bot?(ev.user.screen_name)
408
+ log "Not replying to suspected bot @#{ev.user.screen_name}"
409
+ return false
410
+ end
411
+
412
+ text = meta.reply_prefix + text unless text.match(/@#{Regexp.escape ev.user.screen_name}/i)
413
+ log "Replying to @#{ev.user.screen_name} with: #{text}"
414
+ tweet = twitter.update(text, opts.merge(in_reply_to_status_id: ev.id))
415
+ conversation(tweet).add(tweet)
416
+ tweet
417
+ else
418
+ raise Exception("Don't know how to reply to a #{ev.class}")
419
+ end
420
+ end
421
+
422
+ # Favorite a tweet
423
+ # @param tweet [Twitter::Tweet]
424
+ def favorite(tweet)
425
+ log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
426
+
427
+ begin
428
+ twitter.favorite(tweet.id)
429
+ rescue Twitter::Error::Forbidden
430
+ log "Already favorited: #{tweet.user.screen_name}: #{tweet.text}"
431
+ end
432
+ end
433
+
434
+ # Retweet a tweet
435
+ # @param tweet [Twitter::Tweet]
436
+ def retweet(tweet)
437
+ log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
438
+
439
+ begin
440
+ twitter.retweet(tweet.id)
441
+ rescue Twitter::Error::Forbidden
442
+ log "Already retweeted: #{tweet.user.screen_name}: #{tweet.text}"
443
+ end
444
+ end
445
+
446
+ # Follow a user
447
+ # @param user [String] username or user id
448
+ def follow(user, *args)
449
+ log "Following #{user}"
450
+ twitter.follow(user, *args)
451
+ end
452
+
453
+ # Unfollow a user
454
+ # @param user [String] username or user id
455
+ def unfollow(user, *args)
456
+ log "Unfollowing #{user}"
457
+ twitter.unfollow(user, *args)
458
+ end
459
+
460
+ # Tweet something
461
+ # @param text [String]
462
+ def tweet(text, *args)
463
+ log "Tweeting '#{text}'"
464
+ twitter.update(text, *args)
465
+ end
466
+
467
+ # Get a scheduler for this bot
468
+ # @return [Rufus::Scheduler]
469
+ def scheduler
470
+ @scheduler ||= Rufus::Scheduler.new
471
+ end
472
+
473
+ # Tweet some text with an image
474
+ # @param txt [String]
475
+ # @param pic [String] filename
476
+ def pictweet(txt, pic, *args)
477
+ log "Tweeting #{txt.inspect} - #{pic} #{args}"
478
+ twitter.update_with_media(txt, File.new(pic), *args)
479
+ end
480
+ end
481
+ end