twitter_ebooks 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +78 -0
- data/NOTES.md +4 -0
- data/README.md +20 -0
- data/bin/ebooks +83 -0
- data/data/adjectives.txt +1466 -0
- data/data/nouns.txt +2193 -0
- data/data/stopwords.txt +639 -0
- data/lib/twitter_ebooks/archiver.rb +86 -0
- data/lib/twitter_ebooks/bot.rb +145 -0
- data/lib/twitter_ebooks/markov.rb +89 -0
- data/lib/twitter_ebooks/model.rb +147 -0
- data/lib/twitter_ebooks/nlp.rb +142 -0
- data/lib/twitter_ebooks/version.rb +3 -0
- data/lib/twitter_ebooks.rb +20 -0
- data/skeleton/Procfile +1 -0
- data/skeleton/bots.rb +47 -0
- data/skeleton/corpus/README.md +1 -0
- data/skeleton/model/README.md +1 -0
- data/test/corpus/0xabad1dea.tweets +14696 -0
- data/test/tokenize.rb +18 -0
- data/twitter_ebooks.gemspec +30 -0
- metadata +247 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'twitter'
|
3
|
+
require 'tweetstream'
|
4
|
+
require 'rufus/scheduler'
|
5
|
+
|
6
|
+
module Ebooks
|
7
|
+
class Bot
|
8
|
+
attr_accessor :consumer_key, :consumer_secret,
|
9
|
+
:oauth_token, :oauth_token_secret
|
10
|
+
|
11
|
+
attr_accessor :username
|
12
|
+
|
13
|
+
attr_reader :twitter, :stream
|
14
|
+
|
15
|
+
@@all = [] # List of all defined bots
|
16
|
+
def self.all; @@all; end
|
17
|
+
|
18
|
+
def initialize(username, &b)
|
19
|
+
# Set defaults
|
20
|
+
@username = username
|
21
|
+
|
22
|
+
# Override with callback
|
23
|
+
b.call(self)
|
24
|
+
|
25
|
+
Bot.all.push(self)
|
26
|
+
end
|
27
|
+
|
28
|
+
def log(*args)
|
29
|
+
STDERR.puts "@#{@username}: " + args.map(&:to_s).join(' ')
|
30
|
+
STDERR.flush
|
31
|
+
end
|
32
|
+
|
33
|
+
# Connects to tweetstream and opens event handlers for this bot
|
34
|
+
def start
|
35
|
+
TweetStream.configure do |config|
|
36
|
+
config.consumer_key = @consumer_key
|
37
|
+
config.consumer_secret = @consumer_secret
|
38
|
+
config.oauth_token = @oauth_token
|
39
|
+
config.oauth_token_secret = @oauth_token_secret
|
40
|
+
end
|
41
|
+
|
42
|
+
Twitter.configure do |config|
|
43
|
+
config.consumer_key = @consumer_key
|
44
|
+
config.consumer_secret = @consumer_secret
|
45
|
+
config.oauth_token = @oauth_token
|
46
|
+
config.oauth_token_secret = @oauth_token_secret
|
47
|
+
end
|
48
|
+
|
49
|
+
@twitter = Twitter::Client.new
|
50
|
+
@stream = TweetStream::Client.new
|
51
|
+
|
52
|
+
@stream.on_error do |msg|
|
53
|
+
log "ERROR: #{msg}"
|
54
|
+
end
|
55
|
+
|
56
|
+
@stream.on_inited do
|
57
|
+
log "Online!"
|
58
|
+
end
|
59
|
+
|
60
|
+
@stream.on_event(:follow) do |event|
|
61
|
+
log "Followed by #{event[:source][:screen_name]}"
|
62
|
+
@on_follow.call(event[:source])
|
63
|
+
end
|
64
|
+
|
65
|
+
@stream.on_direct_message do |dm|
|
66
|
+
next if dm[:sender][:screen_name] == @username # Don't reply to self
|
67
|
+
log "DM from @#{dm[:sender][:screen_name]}: #{dm[:text]}"
|
68
|
+
@on_message.call(dm)
|
69
|
+
end
|
70
|
+
|
71
|
+
@stream.userstream do |ev|
|
72
|
+
next unless ev[:text] # If it's not a text-containing tweet, ignore it
|
73
|
+
next if ev[:user][:screen_name] == @username # Ignore our own tweets
|
74
|
+
|
75
|
+
meta = {}
|
76
|
+
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
77
|
+
|
78
|
+
reply_mentions = mentions.reject { |m| m.downcase == @username }
|
79
|
+
reply_mentions << ev[:user][:screen_name]
|
80
|
+
|
81
|
+
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
82
|
+
|
83
|
+
mless = ev[:text]
|
84
|
+
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
85
|
+
mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
|
86
|
+
end
|
87
|
+
meta[:mentionless] = mless
|
88
|
+
|
89
|
+
# To check if this is a mention, ensure:
|
90
|
+
# - The tweet mentions list contains our username
|
91
|
+
# - The tweet is not being retweeted by somebody else
|
92
|
+
# - Or soft-retweeted by somebody else
|
93
|
+
if mentions.map(&:downcase).include?(@username) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
|
94
|
+
log "Mention from #{ev[:user][:screen_name]}: #{ev[:text]}"
|
95
|
+
@on_mention.call(ev, meta)
|
96
|
+
else
|
97
|
+
@on_timeline.call(ev, meta)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Wrapper for EM.add_timer
|
103
|
+
# Delays add a greater sense of humanity to bot behaviour
|
104
|
+
def delay(time, &b)
|
105
|
+
time = time.to_a.sample unless time.is_a? Integer
|
106
|
+
EM.add_timer(time, &b)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Reply to a tweet or a DM.
|
110
|
+
# Applies configurable @reply_delay range
|
111
|
+
def reply(ev, text, opts={})
|
112
|
+
opts = opts.clone
|
113
|
+
delay = @reply_delay.to_a.sample
|
114
|
+
|
115
|
+
if ev.is_a? Twitter::DirectMessage
|
116
|
+
log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
|
117
|
+
@twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
|
118
|
+
elsif ev.is_a? Twitter::Tweet
|
119
|
+
log "Replying to @#{ev[:user][:screen_name]}: #{text}"
|
120
|
+
@twitter.update(text, in_reply_to_status_id: ev[:id])
|
121
|
+
else
|
122
|
+
raise Exception("Don't know how to reply to a #{ev.class}")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def scheduler
|
127
|
+
@scheduler ||= Rufus::Scheduler.new
|
128
|
+
end
|
129
|
+
|
130
|
+
def follow(*args)
|
131
|
+
log "Following #{args}"
|
132
|
+
@twitter.follow(*args)
|
133
|
+
end
|
134
|
+
|
135
|
+
def tweet(*args)
|
136
|
+
log "Tweeting #{args.inspect}"
|
137
|
+
@twitter.update(*args)
|
138
|
+
end
|
139
|
+
|
140
|
+
def on_follow(&b); @on_follow = b; end
|
141
|
+
def on_mention(&b); @on_mention = b; end
|
142
|
+
def on_timeline(&b); @on_timeline = b; end
|
143
|
+
def on_message(&b); @on_message = b; end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Ebooks
|
2
|
+
class MarkovModel
|
3
|
+
INTERIM = :interim # Special token marking newline/^/$ boundaries
|
4
|
+
|
5
|
+
attr_accessor :tokens
|
6
|
+
attr_reader :depth
|
7
|
+
|
8
|
+
def represent(token)
|
9
|
+
if token.nil? || token == "\n" || token.empty?
|
10
|
+
INTERIM
|
11
|
+
else
|
12
|
+
token
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def consume(tokenized, depth=2)
|
17
|
+
@tokens = [INTERIM]
|
18
|
+
@depth = depth
|
19
|
+
|
20
|
+
tokenized.each do |tokens|
|
21
|
+
@tokens += tokens
|
22
|
+
@tokens << INTERIM
|
23
|
+
end
|
24
|
+
|
25
|
+
@model = {}
|
26
|
+
|
27
|
+
@tokens.each_with_index do |token, i|
|
28
|
+
prev_tokens = []
|
29
|
+
|
30
|
+
@depth.downto(1) do |j|
|
31
|
+
if i-j < 0; next
|
32
|
+
else; prev = represent(@tokens[i-j])
|
33
|
+
end
|
34
|
+
prev_tokens << prev
|
35
|
+
end
|
36
|
+
|
37
|
+
1.upto(@depth) do |j|
|
38
|
+
break if j > prev_tokens.length
|
39
|
+
ngram = prev_tokens.last(j)
|
40
|
+
|
41
|
+
unless ngram == INTERIM && prev_tokens[-1] == INTERIM
|
42
|
+
@model[ngram] ||= []
|
43
|
+
@model[ngram] << represent(token)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
def chain(tokens)
|
52
|
+
next_token = nil
|
53
|
+
@depth.downto(1).each do |i|
|
54
|
+
next if tokens.length < i
|
55
|
+
matches = @model[tokens.last(i)]
|
56
|
+
if matches
|
57
|
+
#p tokens.last(i)
|
58
|
+
#puts "=> #{matches.inspect}"
|
59
|
+
next_token = matches.sample
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
raise ArgumentError if next_token.nil?
|
65
|
+
|
66
|
+
if next_token == INTERIM
|
67
|
+
return tokens
|
68
|
+
else
|
69
|
+
return chain(tokens + [next_token])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def generate
|
74
|
+
tokens = chain([@model[[INTERIM]].sample])
|
75
|
+
NLP.reconstruct(tokens)
|
76
|
+
end
|
77
|
+
|
78
|
+
def serialize
|
79
|
+
{ 'model' => @model,
|
80
|
+
'depth' => @depth }
|
81
|
+
end
|
82
|
+
|
83
|
+
def deserialize(data)
|
84
|
+
@model = data['model']
|
85
|
+
@depth = data['depth']
|
86
|
+
self
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'json'
|
5
|
+
require 'set'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
module Ebooks
|
9
|
+
class Model
|
10
|
+
attr_accessor :hash, :sentences, :tokenized, :markov
|
11
|
+
|
12
|
+
def self.consume(txtpath)
|
13
|
+
Model.new.consume(txtpath)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.load(path)
|
17
|
+
data = Marshal.load(File.read(path))
|
18
|
+
Model.new.deserialize(data)
|
19
|
+
end
|
20
|
+
|
21
|
+
def consume(txtpath)
|
22
|
+
# Record hash of source file so we know to update later
|
23
|
+
@hash = Digest::MD5.hexdigest(File.read(txtpath))
|
24
|
+
|
25
|
+
text = File.read(txtpath)
|
26
|
+
log "Removing commented lines and mentions"
|
27
|
+
|
28
|
+
lines = text.split("\n")
|
29
|
+
keeping = []
|
30
|
+
lines.each do |l|
|
31
|
+
next if l.start_with?('#') || l.include?('RT')
|
32
|
+
processed = l.split.reject { |w| w.include?('@') || w.include?('http') }
|
33
|
+
keeping << processed.join(' ')
|
34
|
+
end
|
35
|
+
text = NLP.normalize(keeping.join("\n"))
|
36
|
+
|
37
|
+
log "Segmenting text into sentences of 140 characters or less"
|
38
|
+
@sentences = NLP.sentences(text).reject do |s|
|
39
|
+
s.length > 140 || s.count('"')%2 != 0
|
40
|
+
end
|
41
|
+
|
42
|
+
log "Tokenizing #{@sentences.length} sentences"
|
43
|
+
@tokenized = @sentences.map { |sent| NLP.tokenize(sent) }
|
44
|
+
@tokensets = @tokenized.map { |tokens| NLP.tokenset(tokens) }
|
45
|
+
|
46
|
+
log "Building markov model (this may take a while)"
|
47
|
+
@markov = MarkovModel.new.consume(@tokenized)
|
48
|
+
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
# Produces a hash with the data needed to quickly
|
53
|
+
# reconstruct this corpus object
|
54
|
+
def serialize
|
55
|
+
return { 'hash' => @hash,
|
56
|
+
'tokenized' => @tokenized,
|
57
|
+
'tokensets' => @tokensets,
|
58
|
+
'markov' => @markov.serialize }
|
59
|
+
end
|
60
|
+
|
61
|
+
def save(path)
|
62
|
+
data = self.serialize
|
63
|
+
File.open(path, 'w') do |f|
|
64
|
+
f.write(Marshal.dump(data))
|
65
|
+
end
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
def deserialize(data)
|
70
|
+
@hash = data['hash']
|
71
|
+
@tokenized = data['tokenized']
|
72
|
+
@tokensets = data['tokensets']
|
73
|
+
@markov = MarkovModel.new.deserialize(data['markov'])
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
def replace_noun(sent)
|
78
|
+
tagged = NLP.tagger.add_tags(sent)
|
79
|
+
|
80
|
+
nouns = tagged.scan(/<nn>([^<]+)<\/nn>/).flatten
|
81
|
+
to_replace = nouns.reject { |n| ['much'].include?(n) }.sample
|
82
|
+
return sent if to_replace.nil?
|
83
|
+
replacement = NLP.nouns.sample
|
84
|
+
if to_replace.en.plural.length <= to_replace.length
|
85
|
+
replacement = replacement.en.plural(1)
|
86
|
+
end
|
87
|
+
sent = sent.gsub(/(?<=\W)#{to_replace}(?=\W)/, replacement)
|
88
|
+
sent.gsub(/(?<=\W)(a|an) #{replacement}(?=\W)/, replacement.en.a)
|
89
|
+
end
|
90
|
+
|
91
|
+
def fix(tweet)
|
92
|
+
# This seems to require an external api call
|
93
|
+
begin
|
94
|
+
fixer = NLP.gingerice.parse(tweet)
|
95
|
+
log fixer if fixer['corrections']
|
96
|
+
tweet = fixer['result']
|
97
|
+
rescue Exception => e
|
98
|
+
log e.message
|
99
|
+
log e.backtrace
|
100
|
+
end
|
101
|
+
|
102
|
+
NLP.htmlentities.decode tweet
|
103
|
+
end
|
104
|
+
|
105
|
+
def markov_statement(limit=140, markov=nil)
|
106
|
+
markov ||= @markov
|
107
|
+
tweet = ""
|
108
|
+
|
109
|
+
while (tweet = markov.generate) do
|
110
|
+
next if tweet.length > limit
|
111
|
+
next if NLP.unmatched_enclosers?(tweet)
|
112
|
+
break if tweet.length > limit*0.4 || rand > 0.8
|
113
|
+
end
|
114
|
+
|
115
|
+
fix tweet
|
116
|
+
end
|
117
|
+
|
118
|
+
# Generates a response by looking for related sentences
|
119
|
+
# in the corpus and building a smaller markov model from these
|
120
|
+
def markov_response(input, limit=140)
|
121
|
+
inputset = NLP.tokenset(input)
|
122
|
+
log "Input tokenset: #{inputset.to_a}"
|
123
|
+
|
124
|
+
if inputset.empty?
|
125
|
+
# Very uninteresting input; no relevant response possible
|
126
|
+
return markov_statement(limit)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Let's find all the sentences that might be relevant
|
130
|
+
relevant = []
|
131
|
+
@tokensets.each_with_index.map do |set, i|
|
132
|
+
if inputset.intersection(set).length > 0
|
133
|
+
relevant << @tokenized[i]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
log "Found #{relevant.length} relevant tokenset matches"
|
138
|
+
|
139
|
+
if relevant.length < 3
|
140
|
+
return markov_statement(limit)
|
141
|
+
end
|
142
|
+
|
143
|
+
markov = MarkovModel.new.consume(relevant.sample(100))
|
144
|
+
markov_statement(limit, markov)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'linguistics'
|
4
|
+
Linguistics.use(:en, classes: [String])
|
5
|
+
|
6
|
+
module Ebooks
|
7
|
+
module NLP
|
8
|
+
# We don't necessarily want to use all of this stuff all the time
|
9
|
+
# Only load it when it is needed
|
10
|
+
|
11
|
+
def self.stopwords
|
12
|
+
@stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.nouns
|
16
|
+
@nouns ||= File.read(File.join(DATA_PATH, 'nouns.txt')).split
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.adjectives
|
20
|
+
@adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.tokenizer
|
24
|
+
# This tokenizer is used for dividing sentences into words
|
25
|
+
# It's too slow for finding sentences in paragraphs, hence tactful
|
26
|
+
require 'tokenizer'
|
27
|
+
@tokenizer ||= Tokenizer::Tokenizer.new(:en)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.tactful
|
31
|
+
require 'tactful_tokenizer'
|
32
|
+
@tactful ||= TactfulTokenizer::Model.new
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tagger
|
36
|
+
require 'engtagger'
|
37
|
+
@tagger ||= EngTagger.new
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.stemmer
|
41
|
+
require 'lingua/stemmer'
|
42
|
+
@stemmer ||= Lingua::Stemmer.new
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.gingerice
|
46
|
+
require 'gingerice'
|
47
|
+
Gingerice::Parser.new # No caching for this one
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.htmlentities
|
51
|
+
require 'htmlentities'
|
52
|
+
@htmlentities ||= HTMLEntities.new
|
53
|
+
end
|
54
|
+
|
55
|
+
### Utility functions which wrap the above
|
56
|
+
|
57
|
+
def self.sentences(text)
|
58
|
+
tactful.tokenize_text(text)
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.normalize(text)
|
62
|
+
htmlentities.decode text.gsub('“', '"').gsub('”', '"').gsub('’', "'").gsub('…', '...')
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.tokenize(sentence)
|
66
|
+
# This is hacky, but an ad hoc approach seems to be
|
67
|
+
# most reliable for now. Tokenization libraries have oddities
|
68
|
+
# that are hard to correct.
|
69
|
+
sentence.split(/\s/).map do |token|
|
70
|
+
exceptions = [/^\w\)$/, /^@/, /^#/, /^:\w$/, /^:\w$/, /^http/]
|
71
|
+
if exceptions.find { |r| r.match(token) }
|
72
|
+
token
|
73
|
+
else
|
74
|
+
token.split(/(?<=^[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+$)/)
|
75
|
+
end
|
76
|
+
end.flatten
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.tokenset(sentence)
|
80
|
+
tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
|
81
|
+
tokens.map(&:downcase)
|
82
|
+
.reject { |token| stopwords.include?(token) }
|
83
|
+
.to_set
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.space_between?(token1, token2)
|
87
|
+
p1 = self.punctuation?(token1)
|
88
|
+
p2 = self.punctuation?(token2)
|
89
|
+
if p1 && p2 # "foo?!"
|
90
|
+
false
|
91
|
+
elsif !p1 && p2 # "foo."
|
92
|
+
false
|
93
|
+
elsif p1 && !p2 # "foo. rah"
|
94
|
+
true
|
95
|
+
else # "foo rah"
|
96
|
+
true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.reconstruct(tokens)
|
101
|
+
# Put tokens back together into a nice looking sentence
|
102
|
+
text = ""
|
103
|
+
last_token = nil
|
104
|
+
tokens.each do |token|
|
105
|
+
text += ' ' if last_token && space_between?(last_token, token)
|
106
|
+
text += token
|
107
|
+
last_token = token
|
108
|
+
end
|
109
|
+
text
|
110
|
+
end
|
111
|
+
|
112
|
+
# Deliberately limit our punctuation handling to stuff we can do consistently
|
113
|
+
# It'll just be a part of a token if we don't split it out, and that's fine
|
114
|
+
PUNCTUATION = ".?!,"
|
115
|
+
|
116
|
+
def self.punctuation?(token)
|
117
|
+
(token.chars.to_set - PUNCTUATION.chars.to_set).empty?
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.unmatched_enclosers?(text)
|
121
|
+
# Weird quotes are an instant giveaway. Let's do paren-matching.
|
122
|
+
enclosers = ['**', '""', '()', '[]', '``']
|
123
|
+
enclosers.each do |pair|
|
124
|
+
starter = Regexp.new('(\W|^)' + Regexp.escape(pair[0]) + '\S')
|
125
|
+
ender = Regexp.new('\S' + Regexp.escape(pair[1]) + '(\W|$)')
|
126
|
+
|
127
|
+
opened = 0
|
128
|
+
|
129
|
+
tokenize(text).each do |token|
|
130
|
+
opened += 1 if token.match(starter)
|
131
|
+
opened -= 1 if token.match(ender)
|
132
|
+
|
133
|
+
return true if opened < 0 # Too many ends!
|
134
|
+
end
|
135
|
+
|
136
|
+
return true if opened != 0 # Mismatch somewhere.
|
137
|
+
end
|
138
|
+
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
gem 'minitest'
|
2
|
+
|
3
|
+
def log(*args)
|
4
|
+
STDERR.puts args.map(&:to_s).join(' ')
|
5
|
+
STDERR.flush
|
6
|
+
end
|
7
|
+
|
8
|
+
module Ebooks
|
9
|
+
GEM_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
10
|
+
DATA_PATH = File.join(GEM_PATH, 'data')
|
11
|
+
SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
|
12
|
+
TEST_PATH = File.join(GEM_PATH, 'test')
|
13
|
+
TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/0xabad1dea.tweets')
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'twitter_ebooks/nlp'
|
17
|
+
require 'twitter_ebooks/archiver'
|
18
|
+
require 'twitter_ebooks/markov'
|
19
|
+
require 'twitter_ebooks/model'
|
20
|
+
require 'twitter_ebooks/bot'
|
data/skeleton/Procfile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
worker: ruby bots.rb start
|
data/skeleton/bots.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'twitter_ebooks'
|
4
|
+
|
5
|
+
# This is an example bot definition with event handlers commented out
|
6
|
+
# You can define as many of these as you like; they will run simultaneously
|
7
|
+
|
8
|
+
Ebooks::Bot.new("{{BOT_NAME}}") do |bot|
|
9
|
+
# Consumer details come from registering an app at https://dev.twitter.com/
|
10
|
+
# OAuth details can be fetched with https://github.com/marcel/twurl
|
11
|
+
bot.consumer_key = "" # Your app consumer key
|
12
|
+
bot.consumer_secret = "" # Your app consumer secret
|
13
|
+
bot.oauth_token = "" # Token connecting the app to this account
|
14
|
+
bot.oauth_token_secret = "" # Secret connecting the app to this account
|
15
|
+
|
16
|
+
bot.on_message do |dm|
|
17
|
+
# Reply to a DM
|
18
|
+
# bot.reply(dm, "secret secrets")
|
19
|
+
end
|
20
|
+
|
21
|
+
bot.on_follow do |user|
|
22
|
+
# Follow a user back
|
23
|
+
# bot.follow(user[:screen_name])
|
24
|
+
end
|
25
|
+
|
26
|
+
bot.on_mention do |tweet, meta|
|
27
|
+
# Reply to a mention
|
28
|
+
# bot.reply(tweet, meta[:reply_prefix] + "oh hullo")
|
29
|
+
end
|
30
|
+
|
31
|
+
bot.on_timeline do |tweet, meta|
|
32
|
+
# Reply to a tweet in the bot's timeline
|
33
|
+
# bot.reply(tweet, meta[:reply_prefix] + "nice tweet")
|
34
|
+
end
|
35
|
+
|
36
|
+
bot.scheduler.every '24h' do
|
37
|
+
# Tweet something every 24 hours
|
38
|
+
# See https://github.com/jmettraux/rufus-scheduler
|
39
|
+
# bot.tweet("hi")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
EM.run do
|
44
|
+
Ebooks::Bot.all.each do |bot|
|
45
|
+
bot.start
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Put raw text files in here and process them with `ebooks consume` to make Markov models.
|
@@ -0,0 +1 @@
|
|
1
|
+
This is where the output of `ebooks consume <corpus_path>` goes. You can load these files using Model.load(path), and `ebooks gen <path>` for testing.
|