twitter_ebooks 2.2.6 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/lib/twitter_ebooks/bot.rb +20 -15
- data/lib/twitter_ebooks/model.rb +26 -25
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +0 -2
- data/skeleton/bots.rb +1 -0
- data/spec/data/0xabad1dea.json +203945 -0
- data/spec/data/0xabad1dea.model +0 -0
- data/spec/memprof.rb +37 -0
- data/spec/model_spec.rb +14 -0
- data/spec/spec_helper.rb +6 -0
- data/twitter_ebooks.gemspec +6 -3
- metadata +49 -11
- data/script/process_anc_data.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69ac58617389d783dde49b9295bc12f00380ee2f
|
4
|
+
data.tar.gz: 6afd456d5eae6d74a8330240c40e427640255dda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4990cbbe8e4fa74bbf8b935598474d245de148464679b85a1908abe9612653849981f38343fb7f1706b241cc85ba166d45a89a8ee030d43878c91361f5d2dff4
|
7
|
+
data.tar.gz: 67bdec6974c5aea4bebf299045653b3f71cb3a69becd49b926c956bd22da546d01fc276334034c39175a9f329bf325535344a9e77f2036ab76085b95073c49a1
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# twitter\_ebooks 2.2.
|
1
|
+
# twitter\_ebooks 2.2.7
|
2
2
|
|
3
3
|
Rewrite of my twitter\_ebooks code. While the original was solely a tweeting Markov generator, this framework helps you build any kind of interactive twitterbot which responds to mentions/DMs. See [ebooks\_example](https://github.com/mispy/ebooks_example) for an example of a full bot.
|
4
4
|
|
@@ -50,6 +50,7 @@ Ebooks::Bot.new("abby_ebooks") do |bot|
|
|
50
50
|
# Tweet something every 24 hours
|
51
51
|
# See https://github.com/jmettraux/rufus-scheduler
|
52
52
|
# bot.tweet("hi")
|
53
|
+
# bot.pictweet("hi", "cuteselfie.jpg", ":possibly_sensitive => true")
|
53
54
|
end
|
54
55
|
end
|
55
56
|
```
|
data/lib/twitter_ebooks/bot.rb
CHANGED
@@ -43,16 +43,15 @@ module Ebooks
|
|
43
43
|
config.oauth_token_secret = @oauth_token_secret
|
44
44
|
end
|
45
45
|
|
46
|
-
Twitter.
|
46
|
+
@twitter = Twitter::REST::Client.new do |config|
|
47
47
|
config.consumer_key = @consumer_key
|
48
48
|
config.consumer_secret = @consumer_secret
|
49
|
-
config.
|
50
|
-
config.
|
49
|
+
config.access_token = @access_token
|
50
|
+
config.access_token_secret = @access_token_secret
|
51
51
|
end
|
52
52
|
|
53
53
|
needs_stream = [@on_follow, @on_message, @on_mention, @on_timeline].any? {|e| !e.nil?}
|
54
54
|
|
55
|
-
@twitter = Twitter::Client.new
|
56
55
|
@stream = TweetStream::Client.new if needs_stream
|
57
56
|
end
|
58
57
|
|
@@ -89,19 +88,19 @@ module Ebooks
|
|
89
88
|
end
|
90
89
|
|
91
90
|
@stream.userstream do |ev|
|
92
|
-
next unless ev
|
93
|
-
next if ev
|
91
|
+
next unless ev.text # If it's not a text-containing tweet, ignore it
|
92
|
+
next if ev.user.screen_name == @username # Ignore our own tweets
|
94
93
|
|
95
94
|
meta = {}
|
96
95
|
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
97
96
|
|
98
97
|
reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
|
99
|
-
reply_mentions = [ev
|
98
|
+
reply_mentions = [ev.user.screen_name] + reply_mentions
|
100
99
|
|
101
100
|
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
102
101
|
meta[:limit] = 140 - meta[:reply_prefix].length
|
103
102
|
|
104
|
-
mless = ev
|
103
|
+
mless = ev.text
|
105
104
|
begin
|
106
105
|
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
107
106
|
last = mless[entity[:indices][1]..-1]||''
|
@@ -109,7 +108,7 @@ module Ebooks
|
|
109
108
|
end
|
110
109
|
rescue Exception
|
111
110
|
p ev.attrs[:entities][:user_mentions]
|
112
|
-
p ev
|
111
|
+
p ev.text
|
113
112
|
raise
|
114
113
|
end
|
115
114
|
meta[:mentionless] = mless
|
@@ -118,8 +117,8 @@ module Ebooks
|
|
118
117
|
# - The tweet mentions list contains our username
|
119
118
|
# - The tweet is not being retweeted by somebody else
|
120
119
|
# - Or soft-retweeted by somebody else
|
121
|
-
if mentions.map(&:downcase).include?(@username.downcase) && !ev
|
122
|
-
log "Mention from @#{ev
|
120
|
+
if mentions.map(&:downcase).include?(@username.downcase) && !ev.retweeted_status && !ev.text.start_with?('RT ')
|
121
|
+
log "Mention from @#{ev.user.screen_name}: #{ev.text}"
|
123
122
|
@on_mention.call(ev, meta) if @on_mention
|
124
123
|
else
|
125
124
|
@on_timeline.call(ev, meta) if @on_timeline
|
@@ -140,11 +139,11 @@ module Ebooks
|
|
140
139
|
opts = opts.clone
|
141
140
|
|
142
141
|
if ev.is_a? Twitter::DirectMessage
|
143
|
-
log "Sending DM to @#{ev
|
144
|
-
@twitter.direct_message_create(ev
|
142
|
+
log "Sending DM to @#{ev.sender.screen_name}: #{text}"
|
143
|
+
@twitter.direct_message_create(ev.sender.screen_name, text, opts)
|
145
144
|
elsif ev.is_a? Twitter::Tweet
|
146
|
-
log "Replying to @#{ev
|
147
|
-
@twitter.update(text, in_reply_to_status_id: ev
|
145
|
+
log "Replying to @#{ev.user.screen_name} with: #{text}"
|
146
|
+
@twitter.update(text, in_reply_to_status_id: ev.id)
|
148
147
|
else
|
149
148
|
raise Exception("Don't know how to reply to a #{ev.class}")
|
150
149
|
end
|
@@ -164,6 +163,12 @@ module Ebooks
|
|
164
163
|
@twitter.update(*args)
|
165
164
|
end
|
166
165
|
|
166
|
+
# could easily just be *args however the separation keeps it clean.
|
167
|
+
def pictweet(txt, pic, *args)
|
168
|
+
log "Tweeting #{txt.inspect} - #{pic} #{args}"
|
169
|
+
@twitter.update_with_media(txt, File.new(pic), *args)
|
170
|
+
end
|
171
|
+
|
167
172
|
def on_startup(&b); @on_startup = b; end
|
168
173
|
def on_follow(&b); @on_follow = b; end
|
169
174
|
def on_mention(&b); @on_mention = b; end
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -18,14 +18,28 @@ module Ebooks
|
|
18
18
|
Marshal.load(File.open(path, 'rb') { |f| f.read })
|
19
19
|
end
|
20
20
|
|
21
|
+
def mass_tokenize(text)
|
22
|
+
sentences = NLP.sentences(text)
|
23
|
+
tokens = []
|
24
|
+
|
25
|
+
sentences.each do |s|
|
26
|
+
tokens << NLP.tokenize(s).reject do |t|
|
27
|
+
# Don't include usernames/urls as tokens
|
28
|
+
t.include?('@') || t.include?('http')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
tokens
|
33
|
+
end
|
34
|
+
|
21
35
|
def consume(path)
|
22
36
|
content = File.read(path, :encoding => 'utf-8')
|
23
37
|
@hash = Digest::MD5.hexdigest(content)
|
24
38
|
|
25
39
|
if path.split('.')[-1] == "json"
|
26
40
|
log "Reading json corpus from #{path}"
|
27
|
-
lines = JSON.parse(content
|
28
|
-
tweet[
|
41
|
+
lines = JSON.parse(content).map do |tweet|
|
42
|
+
tweet['text']
|
29
43
|
end
|
30
44
|
elsif path.split('.')[-1] == "csv"
|
31
45
|
log "Reading CSV corpus from #{path}"
|
@@ -42,41 +56,28 @@ module Ebooks
|
|
42
56
|
|
43
57
|
log "Removing commented lines and sorting mentions"
|
44
58
|
|
45
|
-
|
59
|
+
statements = []
|
46
60
|
mentions = []
|
47
61
|
lines.each do |l|
|
48
62
|
next if l.start_with?('#') # Remove commented lines
|
49
63
|
next if l.include?('RT') || l.include?('MT') # Remove soft retweets
|
50
|
-
|
64
|
+
|
51
65
|
if l.include?('@')
|
52
|
-
|
66
|
+
statements << NLP.normalize(l)
|
53
67
|
else
|
54
|
-
|
68
|
+
mentions << NLP.normalize(l)
|
55
69
|
end
|
56
70
|
end
|
57
|
-
text = NLP.normalize(keeping.join("\n")) # Normalize weird characters
|
58
|
-
mention_text = NLP.normalize(mentions.join("\n"))
|
59
71
|
|
60
|
-
|
72
|
+
text = statements.join("\n")
|
73
|
+
mention_text = mentions.join("\n")
|
61
74
|
|
62
|
-
statements =
|
63
|
-
mentions = NLP.sentences(mention_text)
|
75
|
+
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
64
76
|
|
65
|
-
log "Tokenizing #{
|
66
|
-
@sentences = []
|
67
|
-
@mentions = []
|
77
|
+
log "Tokenizing #{text.count('\n')} statements and #{mention_text.count('\n')} mentions"
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
t.include?('@') || t.include?('http')
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
mentions.each do |s|
|
76
|
-
@mentions << NLP.tokenize(s).reject do |t|
|
77
|
-
t.include?('@') || t.include?('http')
|
78
|
-
end
|
79
|
-
end
|
79
|
+
@sentences = mass_tokenize(text)
|
80
|
+
@mentions = mass_tokenize(mention_text)
|
80
81
|
|
81
82
|
log "Ranking keywords"
|
82
83
|
@keywords = NLP.keywords(@sentences)
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
data/lib/twitter_ebooks.rb
CHANGED
data/skeleton/bots.rb
CHANGED