bot_twitter_ebooks 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ $debug = false
2
+
3
+ def log(*args)
4
+ STDERR.print args.map(&:to_s).join(' ') + "\n"
5
+ STDERR.flush
6
+ end
7
+
8
+ module Ebooks
9
+ GEM_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
10
+ DATA_PATH = File.join(GEM_PATH, 'data')
11
+ SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
12
+ TEST_PATH = File.join(GEM_PATH, 'test')
13
+ TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/elonmusk.tweets')
14
+ INTERIM = :interim
15
+ end
16
+
17
+ require 'bot_twitter_ebooks/nlp'
18
+ require 'bot_twitter_ebooks/archive'
19
+ require 'bot_twitter_ebooks/sync'
20
+ require 'bot_twitter_ebooks/suffix'
21
+ require 'bot_twitter_ebooks/model'
22
+ require 'bot_twitter_ebooks/bot'
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'twitter'
5
+ require 'json'
6
+
7
+ CONFIG_PATH = "#{ENV['HOME']}/.ebooksrc"
8
+
9
+ module Ebooks
10
+ class Archive
11
+ attr_reader :tweets
12
+
13
+ def make_client
14
+ if File.exists?(CONFIG_PATH)
15
+ @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
16
+ else
17
+ @config = {}
18
+
19
+ puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
20
+ print "Consumer key: "
21
+ @config[:consumer_key] = STDIN.gets.chomp
22
+ print "Consumer secret: "
23
+ @config[:consumer_secret] = STDIN.gets.chomp
24
+ print "Access token: "
25
+ @config[:oauth_token] = STDIN.gets.chomp
26
+ print "Access secret: "
27
+ @config[:oauth_token_secret] = STDIN.gets.chomp
28
+
29
+ File.open(CONFIG_PATH, 'w') do |f|
30
+ f.write(JSON.pretty_generate(@config))
31
+ end
32
+ end
33
+
34
+ Twitter::REST::Client.new do |config|
35
+ config.consumer_key = @config[:consumer_key]
36
+ config.consumer_secret = @config[:consumer_secret]
37
+ config.access_token = @config[:oauth_token]
38
+ config.access_token_secret = @config[:oauth_token_secret]
39
+ end
40
+ end
41
+
42
+ def initialize(username, path=nil, client=nil)
43
+ @username = username
44
+ @path = path || "corpus/#{username}.json"
45
+
46
+ if File.directory?(@path)
47
+ @path = File.join(@path, "#{username}.json")
48
+ end
49
+
50
+ @client = client || make_client
51
+
52
+ if (File.exists?(@path) && !File.zero?(@path))
53
+ @filetext = File.read(@path, :encoding => 'utf-8')
54
+ @tweets = JSON.parse(@filetext, symbolize_names: true)
55
+ log "Currently #{@tweets.length} tweets for #{@username}"
56
+ else
57
+ @tweets.nil?
58
+ log "New archive for @#{username} at #{@path}"
59
+ end
60
+ end
61
+
62
+ def sync
63
+ # We use this structure to ensure that
64
+ # a) if there's an issue opening the file, we error out before download
65
+ # b) if there's an issue during download we restore the original
66
+ File.open(@path, 'w') do |file|
67
+ begin
68
+ sync_to(file)
69
+ rescue Exception
70
+ file.seek(0)
71
+ file.write(@filetext)
72
+ raise
73
+ end
74
+ end
75
+ end
76
+
77
+ def sync_to(file)
78
+ retries = 0
79
+ tweets = []
80
+ max_id = nil
81
+
82
+ opts = {
83
+ count: 200,
84
+ #include_rts: false,
85
+ tweet_mode: "extended",
86
+ trim_user: true
87
+ }
88
+
89
+ opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
90
+
91
+ loop do
92
+ opts[:max_id] = max_id unless max_id.nil?
93
+ begin
94
+ new = @client.user_timeline(@username, opts)
95
+ rescue Twitter::Error::TooManyRequests
96
+ log "Rate limit exceeded. Waiting for 15 mins before retry."
97
+ sleep 60*15
98
+ retry
99
+ end
100
+ break if new.length <= 1
101
+ tweets += new
102
+ log "Received #{tweets.length} new tweets"
103
+ max_id = new.last.id - 1
104
+ end
105
+
106
+ if tweets.length == 0
107
+ log "No new tweets"
108
+ else
109
+ @tweets ||= []
110
+ @tweets = tweets.map(&:attrs).each { |tw|
111
+ tw.replace({ text: tw[:full_text], id: tw[:id] })
112
+ } + @tweets
113
+ end
114
+ file.write(JSON.pretty_generate(@tweets))
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,481 @@
1
+ # encoding: utf-8
2
+ require 'twitter'
3
+ require 'rufus/scheduler'
4
+
5
+ # Monkeypatch hack to fix upstream dependency issue
6
+ # https://github.com/sferik/twitter/issues/709
7
+ class HTTP::URI
8
+ def port
9
+ 443 if self.https?
10
+ end
11
+ end
12
+
13
+ module Ebooks
14
+ class ConfigurationError < Exception
15
+ end
16
+
17
+ # Represents a single reply tree of tweets
18
+ class Conversation
19
+ attr_reader :last_update
20
+
21
+ # @param bot [Ebooks::Bot]
22
+ def initialize(bot)
23
+ @bot = bot
24
+ @tweets = []
25
+ @last_update = Time.now
26
+ end
27
+
28
+ # @param tweet [Twitter::Tweet] tweet to add
29
+ def add(tweet)
30
+ @tweets << tweet
31
+ @last_update = Time.now
32
+ end
33
+
34
+ # Make an informed guess as to whether a user is a bot based
35
+ # on their behavior in this conversation
36
+ def is_bot?(username)
37
+ usertweets = @tweets.select { |t| t.user.screen_name.downcase == username.downcase }
38
+
39
+ if usertweets.length > 2
40
+ if username.include?('ebooks') || (usertweets[-1].created_at - usertweets[-3].created_at) < 12
41
+ return true
42
+ end
43
+ end
44
+ end
45
+
46
+ # Figure out whether to keep this user in the reply prefix
47
+ # We want to avoid spamming non-participating users
48
+ def can_include?(username)
49
+ @tweets.length <= 4 ||
50
+ !@tweets.select { |t| t.user.screen_name.downcase == username.downcase }.empty?
51
+ end
52
+ end
53
+
54
+ # Meta information about a tweet that we calculate for ourselves
55
+ class TweetMeta
56
+ # @return [Array<String>] usernames mentioned in tweet
57
+ attr_accessor :mentions
58
+ # @return [String] text of tweets with mentions removed
59
+ attr_accessor :mentionless
60
+ # @return [Array<String>] usernames to include in a reply
61
+ attr_accessor :reply_mentions
62
+ # @return [String] mentions to start reply with
63
+ attr_accessor :reply_prefix
64
+ # @return [Integer] available chars for reply
65
+ attr_accessor :limit
66
+
67
+ # @return [Ebooks::Bot] associated bot
68
+ attr_accessor :bot
69
+ # @return [Twitter::Tweet] associated tweet
70
+ attr_accessor :tweet
71
+
72
+ # Check whether this tweet mentions our bot
73
+ # @return [Boolean]
74
+ def mentions_bot?
75
+ # To check if this is someone talking to us, ensure:
76
+ # - The tweet mentions list contains our username
77
+ # - The tweet is not being retweeted by somebody else
78
+ # - Or soft-retweeted by somebody else
79
+ @mentions.map(&:downcase).include?(@bot.username.downcase) && !@tweet.retweeted_status? && !@tweet.text.match(/([`'‘’"“”]|RT|via|by|from)\s*@/i)
80
+ end
81
+
82
+ # @param bot [Ebooks::Bot]
83
+ # @param ev [Twitter::Tweet]
84
+ def initialize(bot, ev)
85
+ @bot = bot
86
+ @tweet = ev
87
+
88
+ @mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
89
+
90
+ # Process mentions to figure out who to reply to
91
+ # i.e. not self and nobody who has seen too many secondary mentions
92
+ reply_mentions = @mentions.reject do |m|
93
+ m.downcase == @bot.username.downcase || !@bot.conversation(ev).can_include?(m)
94
+ end
95
+ @reply_mentions = ([ev.user.screen_name] + reply_mentions).uniq
96
+
97
+ @reply_prefix = @reply_mentions.map { |m| '@'+m }.join(' ') + ' '
98
+ @limit = 140 - @reply_prefix.length
99
+
100
+ mless = ev.text
101
+ begin
102
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
103
+ last = mless[entity[:indices][1]..-1]||''
104
+ mless = mless[0...entity[:indices][0]] + last.strip
105
+ end
106
+ rescue Exception
107
+ p ev.attrs[:entities][:user_mentions]
108
+ p ev.text
109
+ raise
110
+ end
111
+ @mentionless = mless
112
+ end
113
+
114
+ # Get an array of media uris in tweet.
115
+ # @param size [String] A twitter image size to return. Supported sizes are thumb, small, medium (default), large
116
+ # @return [Array<String>] image URIs included in tweet
117
+ def media_uris(size_input = '')
118
+ case size_input
119
+ when 'thumb'
120
+ size = ':thumb'
121
+ when 'small'
122
+ size = ':small'
123
+ when 'medium'
124
+ size = ':medium'
125
+ when 'large'
126
+ size = ':large'
127
+ else
128
+ size = ''
129
+ end
130
+
131
+ # Start collecting uris.
132
+ uris = []
133
+ if @tweet.media?
134
+ @tweet.media.each do |each_media|
135
+ uris << each_media.media_url.to_s + size
136
+ end
137
+ end
138
+
139
+ # and that's pretty much it!
140
+ uris
141
+ end
142
+ end
143
+
144
+ class Bot
145
+ # @return [String] OAuth consumer key for a Twitter app
146
+ attr_accessor :consumer_key
147
+ # @return [String] OAuth consumer secret for a Twitter app
148
+ attr_accessor :consumer_secret
149
+ # @return [String] OAuth access token from `ebooks auth`
150
+ attr_accessor :access_token
151
+ # @return [String] OAuth access secret from `ebooks auth`
152
+ attr_accessor :access_token_secret
153
+ # @return [Twitter::User] Twitter user object of bot
154
+ attr_accessor :user
155
+ # @return [String] Twitter username of bot
156
+ attr_accessor :username
157
+ # @return [Array<String>] list of usernames to block on contact
158
+ attr_accessor :blacklist
159
+ # @return [Hash{String => Ebooks::Conversation}] maps tweet ids to their conversation contexts
160
+ attr_accessor :conversations
161
+ # @return [Range, Integer] range of seconds to delay in delay method
162
+ attr_accessor :delay_range
163
+
164
+ # @return [Array] list of all defined bots
165
+ def self.all; @@all ||= []; end
166
+
167
+ # Fetches a bot by username
168
+ # @param username [String]
169
+ # @return [Ebooks::Bot]
170
+ def self.get(username)
171
+ all.find { |bot| bot.username.downcase == username.downcase }
172
+ end
173
+
174
+ # Logs info to stdout in the context of this bot
175
+ def log(*args)
176
+ STDOUT.print "@#{@username}: " + args.map(&:to_s).join(' ') + "\n"
177
+ STDOUT.flush
178
+ end
179
+
180
+ # Initializes and configures bot
181
+ # @param args Arguments passed to configure method
182
+ # @param b Block to call with new bot
183
+ def initialize(username, &b)
184
+ @blacklist ||= []
185
+ @conversations ||= {}
186
+ # Tweet ids we've already observed, to avoid duplication
187
+ @seen_tweets ||= {}
188
+
189
+ @username = username
190
+ @delay_range ||= 1..6
191
+ configure
192
+
193
+ b.call(self) unless b.nil?
194
+ Bot.all << self
195
+ end
196
+
197
+ def configure
198
+ raise ConfigurationError, "Please override the 'configure' method for subclasses of Ebooks::Bot."
199
+ end
200
+
201
+ # Find or create the conversation context for this tweet
202
+ # @param tweet [Twitter::Tweet]
203
+ # @return [Ebooks::Conversation]
204
+ def conversation(tweet)
205
+ conv = if tweet.in_reply_to_status_id?
206
+ @conversations[tweet.in_reply_to_status_id]
207
+ end
208
+
209
+ if conv.nil?
210
+ conv = @conversations[tweet.id] || Conversation.new(self)
211
+ end
212
+
213
+ if tweet.in_reply_to_status_id?
214
+ @conversations[tweet.in_reply_to_status_id] = conv
215
+ end
216
+ @conversations[tweet.id] = conv
217
+
218
+ # Expire any old conversations to prevent memory growth
219
+ @conversations.each do |k,v|
220
+ if v != conv && Time.now - v.last_update > 3600
221
+ @conversations.delete(k)
222
+ end
223
+ end
224
+
225
+ conv
226
+ end
227
+
228
+ # @return [Twitter::REST::Client] underlying REST client from twitter gem
229
+ def twitter
230
+ @twitter ||= Twitter::REST::Client.new do |config|
231
+ config.consumer_key = @consumer_key
232
+ config.consumer_secret = @consumer_secret
233
+ config.access_token = @access_token
234
+ config.access_token_secret = @access_token_secret
235
+ end
236
+ end
237
+
238
+ # @return [Twitter::Streaming::Client] underlying streaming client from twitter gem
239
+ def stream
240
+ @stream ||= Twitter::Streaming::Client.new do |config|
241
+ config.consumer_key = @consumer_key
242
+ config.consumer_secret = @consumer_secret
243
+ config.access_token = @access_token
244
+ config.access_token_secret = @access_token_secret
245
+ end
246
+ end
247
+
248
+ # Calculate some meta information about a tweet relevant for replying
249
+ # @param ev [Twitter::Tweet]
250
+ # @return [Ebooks::TweetMeta]
251
+ def meta(ev)
252
+ TweetMeta.new(self, ev)
253
+ end
254
+
255
+ # Receive an event from the twitter stream
256
+ # @param ev [Object] Twitter streaming event
257
+ def receive_event(ev)
258
+ case ev
259
+ when Array # Initial array sent on first connection
260
+ log "Online!"
261
+ fire(:connect, ev)
262
+ return
263
+ when Twitter::DirectMessage
264
+ return if ev.sender.id == @user.id # Don't reply to self
265
+ log "DM from @#{ev.sender.screen_name}: #{ev.text}"
266
+ fire(:message, ev)
267
+ when Twitter::Tweet
268
+ return unless ev.text # If it's not a text-containing tweet, ignore it
269
+ return if ev.user.id == @user.id # Ignore our own tweets
270
+
271
+ if ev.retweet? && ev.retweeted_tweet.user.id == @user.id
272
+ # Someone retweeted our tweet!
273
+ fire(:retweet, ev)
274
+ return
275
+ end
276
+
277
+ meta = meta(ev)
278
+
279
+ if blacklisted?(ev.user.screen_name)
280
+ log "Blocking blacklisted user @#{ev.user.screen_name}"
281
+ @twitter.block(ev.user.screen_name)
282
+ end
283
+
284
+ # Avoid responding to duplicate tweets
285
+ if @seen_tweets[ev.id]
286
+ log "Not firing event for duplicate tweet #{ev.id}"
287
+ return
288
+ else
289
+ @seen_tweets[ev.id] = true
290
+ end
291
+
292
+ if meta.mentions_bot?
293
+ log "Mention from @#{ev.user.screen_name}: #{ev.text}"
294
+ conversation(ev).add(ev)
295
+ fire(:mention, ev)
296
+ else
297
+ fire(:timeline, ev)
298
+ end
299
+ when Twitter::Streaming::Event
300
+ case ev.name
301
+ when :follow
302
+ return if ev.source.id == @user.id
303
+ log "Followed by #{ev.source.screen_name}"
304
+ fire(:follow, ev.source)
305
+ when :favorite, :unfavorite
306
+ return if ev.source.id == @user.id # Ignore our own favorites
307
+ log "@#{ev.source.screen_name} #{ev.name.to_s}d: #{ev.target_object.text}"
308
+ fire(ev.name, ev.source, ev.target_object)
309
+ when :user_update
310
+ update_myself ev.source
311
+ end
312
+ when Twitter::Streaming::DeletedTweet
313
+ # Pass
314
+ else
315
+ log ev
316
+ end
317
+ end
318
+
319
+ # Updates @user and calls on_user_update.
320
+ def update_myself(new_me=twitter.user)
321
+ @user = new_me if @user.nil? || new_me.id == @user.id
322
+ @username = @user.screen_name
323
+ log 'User information updated'
324
+ fire(:user_update)
325
+ end
326
+
327
+ # Configures client and fires startup event
328
+ def prepare
329
+ # Sanity check
330
+ if @username.nil?
331
+ raise ConfigurationError, "bot username cannot be nil"
332
+ end
333
+
334
+ if @consumer_key.nil? || @consumer_key.empty? ||
335
+ @consumer_secret.nil? || @consumer_key.empty?
336
+ log "Missing consumer_key or consumer_secret. These details can be acquired by registering a Twitter app at https://apps.twitter.com/"
337
+ exit 1
338
+ end
339
+
340
+ if @access_token.nil? || @access_token.empty? ||
341
+ @access_token_secret.nil? || @access_token_secret.empty?
342
+ log "Missing access_token or access_token_secret. Please run `ebooks auth`."
343
+ exit 1
344
+ end
345
+
346
+ # Save old name
347
+ old_name = username
348
+ # Load user object and actual username
349
+ update_myself
350
+ # Warn about mismatches unless it was clearly intentional
351
+ log "warning: bot expected to be @#{old_name} but connected to @#{username}" unless username == old_name || old_name.empty?
352
+
353
+ fire(:startup)
354
+ end
355
+
356
+ # Start running user event stream
357
+ def start
358
+ log "starting tweet stream"
359
+
360
+ stream.user do |ev|
361
+ receive_event ev
362
+ end
363
+ end
364
+
365
+ # Fire an event
366
+ # @param event [Symbol] event to fire
367
+ # @param args arguments for event handler
368
+ def fire(event, *args)
369
+ handler = "on_#{event}".to_sym
370
+ if respond_to? handler
371
+ self.send(handler, *args)
372
+ end
373
+ end
374
+
375
+ # Delay an action for a variable period of time
376
+ # @param range [Range, Integer] range of seconds to choose for delay
377
+ def delay(range=@delay_range, &b)
378
+ time = rand(range) unless range.is_a? Integer
379
+ sleep time
380
+ b.call
381
+ end
382
+
383
+ # Check if a username is blacklisted
384
+ # @param username [String]
385
+ # @return [Boolean]
386
+ def blacklisted?(username)
387
+ if @blacklist.map(&:downcase).include?(username.downcase)
388
+ true
389
+ else
390
+ false
391
+ end
392
+ end
393
+
394
+ # Reply to a tweet or a DM.
395
+ # @param ev [Twitter::Tweet, Twitter::DirectMessage]
396
+ # @param text [String] contents of reply excluding reply_prefix
397
+ # @param opts [Hash] additional params to pass to twitter gem
398
+ def reply(ev, text, opts={})
399
+ opts = opts.clone
400
+
401
+ if ev.is_a? Twitter::DirectMessage
402
+ log "Sending DM to @#{ev.sender.screen_name}: #{text}"
403
+ twitter.create_direct_message(ev.sender.screen_name, text, opts)
404
+ elsif ev.is_a? Twitter::Tweet
405
+ meta = meta(ev)
406
+
407
+ if conversation(ev).is_bot?(ev.user.screen_name)
408
+ log "Not replying to suspected bot @#{ev.user.screen_name}"
409
+ return false
410
+ end
411
+
412
+ text = meta.reply_prefix + text unless text.match(/@#{Regexp.escape ev.user.screen_name}/i)
413
+ log "Replying to @#{ev.user.screen_name} with: #{text}"
414
+ tweet = twitter.update(text, opts.merge(in_reply_to_status_id: ev.id))
415
+ conversation(tweet).add(tweet)
416
+ tweet
417
+ else
418
+ raise Exception("Don't know how to reply to a #{ev.class}")
419
+ end
420
+ end
421
+
422
+ # Favorite a tweet
423
+ # @param tweet [Twitter::Tweet]
424
+ def favorite(tweet)
425
+ log "Favoriting @#{tweet.user.screen_name}: #{tweet.text}"
426
+
427
+ begin
428
+ twitter.favorite(tweet.id)
429
+ rescue Twitter::Error::Forbidden
430
+ log "Already favorited: #{tweet.user.screen_name}: #{tweet.text}"
431
+ end
432
+ end
433
+
434
+ # Retweet a tweet
435
+ # @param tweet [Twitter::Tweet]
436
+ def retweet(tweet)
437
+ log "Retweeting @#{tweet.user.screen_name}: #{tweet.text}"
438
+
439
+ begin
440
+ twitter.retweet(tweet.id)
441
+ rescue Twitter::Error::Forbidden
442
+ log "Already retweeted: #{tweet.user.screen_name}: #{tweet.text}"
443
+ end
444
+ end
445
+
446
+ # Follow a user
447
+ # @param user [String] username or user id
448
+ def follow(user, *args)
449
+ log "Following #{user}"
450
+ twitter.follow(user, *args)
451
+ end
452
+
453
+ # Unfollow a user
454
+ # @param user [String] username or user id
455
+ def unfollow(user, *args)
456
+ log "Unfollowing #{user}"
457
+ twitter.unfollow(user, *args)
458
+ end
459
+
460
+ # Tweet something
461
+ # @param text [String]
462
+ def tweet(text, *args)
463
+ log "Tweeting '#{text}'"
464
+ twitter.update(text, *args)
465
+ end
466
+
467
+ # Get a scheduler for this bot
468
+ # @return [Rufus::Scheduler]
469
+ def scheduler
470
+ @scheduler ||= Rufus::Scheduler.new
471
+ end
472
+
473
+ # Tweet some text with an image
474
+ # @param txt [String]
475
+ # @param pic [String] filename
476
+ def pictweet(txt, pic, *args)
477
+ log "Tweeting #{txt.inspect} - #{pic} #{args}"
478
+ twitter.update_with_media(txt, File.new(pic), *args)
479
+ end
480
+ end
481
+ end