twitter_ebooks 2.2.6 → 2.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/lib/twitter_ebooks/bot.rb +20 -15
- data/lib/twitter_ebooks/model.rb +26 -25
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +0 -2
- data/skeleton/bots.rb +1 -0
- data/spec/data/0xabad1dea.json +203945 -0
- data/spec/data/0xabad1dea.model +0 -0
- data/spec/memprof.rb +37 -0
- data/spec/model_spec.rb +14 -0
- data/spec/spec_helper.rb +6 -0
- data/twitter_ebooks.gemspec +6 -3
- metadata +49 -11
- data/script/process_anc_data.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69ac58617389d783dde49b9295bc12f00380ee2f
|
4
|
+
data.tar.gz: 6afd456d5eae6d74a8330240c40e427640255dda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4990cbbe8e4fa74bbf8b935598474d245de148464679b85a1908abe9612653849981f38343fb7f1706b241cc85ba166d45a89a8ee030d43878c91361f5d2dff4
|
7
|
+
data.tar.gz: 67bdec6974c5aea4bebf299045653b3f71cb3a69becd49b926c956bd22da546d01fc276334034c39175a9f329bf325535344a9e77f2036ab76085b95073c49a1
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# twitter\_ebooks 2.2.
|
1
|
+
# twitter\_ebooks 2.2.7
|
2
2
|
|
3
3
|
Rewrite of my twitter\_ebooks code. While the original was solely a tweeting Markov generator, this framework helps you build any kind of interactive twitterbot which responds to mentions/DMs. See [ebooks\_example](https://github.com/mispy/ebooks_example) for an example of a full bot.
|
4
4
|
|
@@ -50,6 +50,7 @@ Ebooks::Bot.new("abby_ebooks") do |bot|
|
|
50
50
|
# Tweet something every 24 hours
|
51
51
|
# See https://github.com/jmettraux/rufus-scheduler
|
52
52
|
# bot.tweet("hi")
|
53
|
+
# bot.pictweet("hi", "cuteselfie.jpg", ":possibly_sensitive => true")
|
53
54
|
end
|
54
55
|
end
|
55
56
|
```
|
data/lib/twitter_ebooks/bot.rb
CHANGED
@@ -43,16 +43,15 @@ module Ebooks
|
|
43
43
|
config.oauth_token_secret = @oauth_token_secret
|
44
44
|
end
|
45
45
|
|
46
|
-
Twitter.
|
46
|
+
@twitter = Twitter::REST::Client.new do |config|
|
47
47
|
config.consumer_key = @consumer_key
|
48
48
|
config.consumer_secret = @consumer_secret
|
49
|
-
config.
|
50
|
-
config.
|
49
|
+
config.access_token = @access_token
|
50
|
+
config.access_token_secret = @access_token_secret
|
51
51
|
end
|
52
52
|
|
53
53
|
needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
|
54
54
|
|
55
|
-
@twitter = Twitter::Client.new
|
56
55
|
@stream = TweetStream::Client.new if needs_stream
|
57
56
|
end
|
58
57
|
|
@@ -89,19 +88,19 @@ module Ebooks
|
|
89
88
|
end
|
90
89
|
|
91
90
|
@stream.userstream do |ev|
|
92
|
-
next unless ev
|
93
|
-
next if ev
|
91
|
+
next unless ev.text # If it's not a text-containing tweet, ignore it
|
92
|
+
next if ev.user.screen_name == @username # Ignore our own tweets
|
94
93
|
|
95
94
|
meta = {}
|
96
95
|
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
97
96
|
|
98
97
|
reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
|
99
|
-
reply_mentions = [ev
|
98
|
+
reply_mentions = [ev.user.screen_name] + reply_mentions
|
100
99
|
|
101
100
|
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
102
101
|
meta[:limit] = 140 - meta[:reply_prefix].length
|
103
102
|
|
104
|
-
mless = ev
|
103
|
+
mless = ev.text
|
105
104
|
begin
|
106
105
|
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
107
106
|
last = mless[entity[:indices][1]..-1]||''
|
@@ -109,7 +108,7 @@ module Ebooks
|
|
109
108
|
end
|
110
109
|
rescue Exception
|
111
110
|
p ev.attrs[:entities][:user_mentions]
|
112
|
-
p ev
|
111
|
+
p ev.text
|
113
112
|
raise
|
114
113
|
end
|
115
114
|
meta[:mentionless] = mless
|
@@ -118,8 +117,8 @@ module Ebooks
|
|
118
117
|
# - The tweet mentions list contains our username
|
119
118
|
# - The tweet is not being retweeted by somebody else
|
120
119
|
# - Or soft-retweeted by somebody else
|
121
|
-
if mentions.map(&:downcase).include?(@username.downcase) && !ev
|
122
|
-
log "Mention from @#{ev
|
120
|
+
if mentions.map(&:downcase).include?(@username.downcase) && !ev.retweeted_status && !ev.text.start_with?('RT ')
|
121
|
+
log "Mention from @#{ev.user.screen_name}: #{ev.text}"
|
123
122
|
@on_mention.call(ev, meta) if @on_mention
|
124
123
|
else
|
125
124
|
@on_timeline.call(ev, meta) if @on_timeline
|
@@ -140,11 +139,11 @@ module Ebooks
|
|
140
139
|
opts = opts.clone
|
141
140
|
|
142
141
|
if ev.is_a? Twitter::DirectMessage
|
143
|
-
log "Sending DM to @#{ev
|
144
|
-
@twitter.direct_message_create(ev
|
142
|
+
log "Sending DM to @#{ev.sender.screen_name}: #{text}"
|
143
|
+
@twitter.direct_message_create(ev.sender.screen_name, text, opts)
|
145
144
|
elsif ev.is_a? Twitter::Tweet
|
146
|
-
log "Replying to @#{ev
|
147
|
-
@twitter.update(text, in_reply_to_status_id: ev
|
145
|
+
log "Replying to @#{ev.user.screen_name} with: #{text}"
|
146
|
+
@twitter.update(text, in_reply_to_status_id: ev.id)
|
148
147
|
else
|
149
148
|
raise Exception("Don't know how to reply to a #{ev.class}")
|
150
149
|
end
|
@@ -164,6 +163,12 @@ module Ebooks
|
|
164
163
|
@twitter.update(*args)
|
165
164
|
end
|
166
165
|
|
166
|
+
# could easily just be *args however the separation keeps it clean.
|
167
|
+
def pictweet(txt, pic, *args)
|
168
|
+
log "Tweeting #{txt.inspect} - #{pic} #{args}"
|
169
|
+
@twitter.update_with_media(txt, File.new(pic), *args)
|
170
|
+
end
|
171
|
+
|
167
172
|
def on_startup(&b); @on_startup = b; end
|
168
173
|
def on_follow(&b); @on_follow = b; end
|
169
174
|
def on_mention(&b); @on_mention = b; end
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -18,14 +18,28 @@ module Ebooks
|
|
18
18
|
Marshal.load(File.open(path, 'rb') { |f| f.read })
|
19
19
|
end
|
20
20
|
|
21
|
+
def mass_tokenize(text)
|
22
|
+
sentences = NLP.sentences(text)
|
23
|
+
tokens = []
|
24
|
+
|
25
|
+
sentences.each do |s|
|
26
|
+
tokens << NLP.tokenize(s).reject do |t|
|
27
|
+
# Don't include usernames/urls as tokens
|
28
|
+
t.include?('@') || t.include?('http')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
tokens
|
33
|
+
end
|
34
|
+
|
21
35
|
def consume(path)
|
22
36
|
content = File.read(path, :encoding => 'utf-8')
|
23
37
|
@hash = Digest::MD5.hexdigest(content)
|
24
38
|
|
25
39
|
if path.split('.')[-1] == "json"
|
26
40
|
log "Reading json corpus from #{path}"
|
27
|
-
lines = JSON.parse(content
|
28
|
-
tweet[
|
41
|
+
lines = JSON.parse(content).map do |tweet|
|
42
|
+
tweet['text']
|
29
43
|
end
|
30
44
|
elsif path.split('.')[-1] == "csv"
|
31
45
|
log "Reading CSV corpus from #{path}"
|
@@ -42,41 +56,28 @@ module Ebooks
|
|
42
56
|
|
43
57
|
log "Removing commented lines and sorting mentions"
|
44
58
|
|
45
|
-
|
59
|
+
statements = []
|
46
60
|
mentions = []
|
47
61
|
lines.each do |l|
|
48
62
|
next if l.start_with?('#') # Remove commented lines
|
49
63
|
next if l.include?('RT') || l.include?('MT') # Remove soft retweets
|
50
|
-
|
64
|
+
|
51
65
|
if l.include?('@')
|
52
|
-
|
66
|
+
statements << NLP.normalize(l)
|
53
67
|
else
|
54
|
-
|
68
|
+
mentions << NLP.normalize(l)
|
55
69
|
end
|
56
70
|
end
|
57
|
-
text = NLP.normalize(keeping.join("\n")) # Normalize weird characters
|
58
|
-
mention_text = NLP.normalize(mentions.join("\n"))
|
59
71
|
|
60
|
-
|
72
|
+
text = statements.join("\n")
|
73
|
+
mention_text = mentions.join("\n")
|
61
74
|
|
62
|
-
statements =
|
63
|
-
mentions = NLP.sentences(mention_text)
|
75
|
+
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
64
76
|
|
65
|
-
log "Tokenizing #{
|
66
|
-
@sentences = []
|
67
|
-
@mentions = []
|
77
|
+
log "Tokenizing #{text.count('\n')} statements and #{mention_text.count('\n')} mentions"
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
t.include?('@') || t.include?('http')
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
mentions.each do |s|
|
76
|
-
@mentions << NLP.tokenize(s).reject do |t|
|
77
|
-
t.include?('@') || t.include?('http')
|
78
|
-
end
|
79
|
-
end
|
79
|
+
@sentences = mass_tokenize(text)
|
80
|
+
@mentions = mass_tokenize(mention_text)
|
80
81
|
|
81
82
|
log "Ranking keywords"
|
82
83
|
@keywords = NLP.keywords(@sentences)
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
data/lib/twitter_ebooks.rb
CHANGED
data/skeleton/bots.rb
CHANGED