twitter_ebooks 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +78 -0
- data/NOTES.md +4 -0
- data/README.md +20 -0
- data/bin/ebooks +83 -0
- data/data/adjectives.txt +1466 -0
- data/data/nouns.txt +2193 -0
- data/data/stopwords.txt +639 -0
- data/lib/twitter_ebooks/archiver.rb +86 -0
- data/lib/twitter_ebooks/bot.rb +145 -0
- data/lib/twitter_ebooks/markov.rb +89 -0
- data/lib/twitter_ebooks/model.rb +147 -0
- data/lib/twitter_ebooks/nlp.rb +142 -0
- data/lib/twitter_ebooks/version.rb +3 -0
- data/lib/twitter_ebooks.rb +20 -0
- data/skeleton/Procfile +1 -0
- data/skeleton/bots.rb +47 -0
- data/skeleton/corpus/README.md +1 -0
- data/skeleton/model/README.md +1 -0
- data/test/corpus/0xabad1dea.tweets +14696 -0
- data/test/tokenize.rb +18 -0
- data/twitter_ebooks.gemspec +30 -0
- metadata +247 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'twitter'
|
3
|
+
require 'tweetstream'
|
4
|
+
require 'rufus/scheduler'
|
5
|
+
|
6
|
+
module Ebooks
|
7
|
+
class Bot
|
8
|
+
attr_accessor :consumer_key, :consumer_secret,
|
9
|
+
:oauth_token, :oauth_token_secret
|
10
|
+
|
11
|
+
attr_accessor :username
|
12
|
+
|
13
|
+
attr_reader :twitter, :stream
|
14
|
+
|
15
|
+
@@all = [] # List of all defined bots
|
16
|
+
def self.all; @@all; end
|
17
|
+
|
18
|
+
def initialize(username, &b)
|
19
|
+
# Set defaults
|
20
|
+
@username = username
|
21
|
+
|
22
|
+
# Override with callback
|
23
|
+
b.call(self)
|
24
|
+
|
25
|
+
Bot.all.push(self)
|
26
|
+
end
|
27
|
+
|
28
|
+
def log(*args)
|
29
|
+
STDERR.puts "@#{@username}: " + args.map(&:to_s).join(' ')
|
30
|
+
STDERR.flush
|
31
|
+
end
|
32
|
+
|
33
|
+
# Connects to tweetstream and opens event handlers for this bot
|
34
|
+
def start
|
35
|
+
TweetStream.configure do |config|
|
36
|
+
config.consumer_key = @consumer_key
|
37
|
+
config.consumer_secret = @consumer_secret
|
38
|
+
config.oauth_token = @oauth_token
|
39
|
+
config.oauth_token_secret = @oauth_token_secret
|
40
|
+
end
|
41
|
+
|
42
|
+
Twitter.configure do |config|
|
43
|
+
config.consumer_key = @consumer_key
|
44
|
+
config.consumer_secret = @consumer_secret
|
45
|
+
config.oauth_token = @oauth_token
|
46
|
+
config.oauth_token_secret = @oauth_token_secret
|
47
|
+
end
|
48
|
+
|
49
|
+
@twitter = Twitter::Client.new
|
50
|
+
@stream = TweetStream::Client.new
|
51
|
+
|
52
|
+
@stream.on_error do |msg|
|
53
|
+
log "ERROR: #{msg}"
|
54
|
+
end
|
55
|
+
|
56
|
+
@stream.on_inited do
|
57
|
+
log "Online!"
|
58
|
+
end
|
59
|
+
|
60
|
+
@stream.on_event(:follow) do |event|
|
61
|
+
log "Followed by #{event[:source][:screen_name]}"
|
62
|
+
@on_follow.call(event[:source])
|
63
|
+
end
|
64
|
+
|
65
|
+
@stream.on_direct_message do |dm|
|
66
|
+
next if dm[:sender][:screen_name] == @username # Don't reply to self
|
67
|
+
log "DM from @#{dm[:sender][:screen_name]}: #{dm[:text]}"
|
68
|
+
@on_message.call(dm)
|
69
|
+
end
|
70
|
+
|
71
|
+
@stream.userstream do |ev|
|
72
|
+
next unless ev[:text] # If it's not a text-containing tweet, ignore it
|
73
|
+
next if ev[:user][:screen_name] == @username # Ignore our own tweets
|
74
|
+
|
75
|
+
meta = {}
|
76
|
+
mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
|
77
|
+
|
78
|
+
reply_mentions = mentions.reject { |m| m.downcase == @username }
|
79
|
+
reply_mentions << ev[:user][:screen_name]
|
80
|
+
|
81
|
+
meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
|
82
|
+
|
83
|
+
mless = ev[:text]
|
84
|
+
ev.attrs[:entities][:user_mentions].reverse.each do |entity|
|
85
|
+
mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
|
86
|
+
end
|
87
|
+
meta[:mentionless] = mless
|
88
|
+
|
89
|
+
# To check if this is a mention, ensure:
|
90
|
+
# - The tweet mentions list contains our username
|
91
|
+
# - The tweet is not being retweeted by somebody else
|
92
|
+
# - Or soft-retweeted by somebody else
|
93
|
+
if mentions.map(&:downcase).include?(@username) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
|
94
|
+
log "Mention from #{ev[:user][:screen_name]}: #{ev[:text]}"
|
95
|
+
@on_mention.call(ev, meta)
|
96
|
+
else
|
97
|
+
@on_timeline.call(ev, meta)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Wrapper for EM.add_timer
|
103
|
+
# Delays add a greater sense of humanity to bot behaviour
|
104
|
+
def delay(time, &b)
|
105
|
+
time = time.to_a.sample unless time.is_a? Integer
|
106
|
+
EM.add_timer(time, &b)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Reply to a tweet or a DM.
|
110
|
+
# Applies configurable @reply_delay range
|
111
|
+
def reply(ev, text, opts={})
|
112
|
+
opts = opts.clone
|
113
|
+
delay = @reply_delay.to_a.sample
|
114
|
+
|
115
|
+
if ev.is_a? Twitter::DirectMessage
|
116
|
+
log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
|
117
|
+
@twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
|
118
|
+
elsif ev.is_a? Twitter::Tweet
|
119
|
+
log "Replying to @#{ev[:user][:screen_name]}: #{text}"
|
120
|
+
@twitter.update(text, in_reply_to_status_id: ev[:id])
|
121
|
+
else
|
122
|
+
raise Exception("Don't know how to reply to a #{ev.class}")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def scheduler
|
127
|
+
@scheduler ||= Rufus::Scheduler.new
|
128
|
+
end
|
129
|
+
|
130
|
+
def follow(*args)
|
131
|
+
log "Following #{args}"
|
132
|
+
@twitter.follow(*args)
|
133
|
+
end
|
134
|
+
|
135
|
+
def tweet(*args)
|
136
|
+
log "Tweeting #{args.inspect}"
|
137
|
+
@twitter.update(*args)
|
138
|
+
end
|
139
|
+
|
140
|
+
def on_follow(&b); @on_follow = b; end
|
141
|
+
def on_mention(&b); @on_mention = b; end
|
142
|
+
def on_timeline(&b); @on_timeline = b; end
|
143
|
+
def on_message(&b); @on_message = b; end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Ebooks
|
2
|
+
class MarkovModel
|
3
|
+
INTERIM = :interim # Special token marking newline/^/$ boundaries
|
4
|
+
|
5
|
+
attr_accessor :tokens
|
6
|
+
attr_reader :depth
|
7
|
+
|
8
|
+
def represent(token)
|
9
|
+
if token.nil? || token == "\n" || token.empty?
|
10
|
+
INTERIM
|
11
|
+
else
|
12
|
+
token
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def consume(tokenized, depth=2)
|
17
|
+
@tokens = [INTERIM]
|
18
|
+
@depth = depth
|
19
|
+
|
20
|
+
tokenized.each do |tokens|
|
21
|
+
@tokens += tokens
|
22
|
+
@tokens << INTERIM
|
23
|
+
end
|
24
|
+
|
25
|
+
@model = {}
|
26
|
+
|
27
|
+
@tokens.each_with_index do |token, i|
|
28
|
+
prev_tokens = []
|
29
|
+
|
30
|
+
@depth.downto(1) do |j|
|
31
|
+
if i-j < 0; next
|
32
|
+
else; prev = represent(@tokens[i-j])
|
33
|
+
end
|
34
|
+
prev_tokens << prev
|
35
|
+
end
|
36
|
+
|
37
|
+
1.upto(@depth) do |j|
|
38
|
+
break if j > prev_tokens.length
|
39
|
+
ngram = prev_tokens.last(j)
|
40
|
+
|
41
|
+
unless ngram == INTERIM && prev_tokens[-1] == INTERIM
|
42
|
+
@model[ngram] ||= []
|
43
|
+
@model[ngram] << represent(token)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
def chain(tokens)
|
52
|
+
next_token = nil
|
53
|
+
@depth.downto(1).each do |i|
|
54
|
+
next if tokens.length < i
|
55
|
+
matches = @model[tokens.last(i)]
|
56
|
+
if matches
|
57
|
+
#p tokens.last(i)
|
58
|
+
#puts "=> #{matches.inspect}"
|
59
|
+
next_token = matches.sample
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
raise ArgumentError if next_token.nil?
|
65
|
+
|
66
|
+
if next_token == INTERIM
|
67
|
+
return tokens
|
68
|
+
else
|
69
|
+
return chain(tokens + [next_token])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def generate
|
74
|
+
tokens = chain([@model[[INTERIM]].sample])
|
75
|
+
NLP.reconstruct(tokens)
|
76
|
+
end
|
77
|
+
|
78
|
+
def serialize
|
79
|
+
{ 'model' => @model,
|
80
|
+
'depth' => @depth }
|
81
|
+
end
|
82
|
+
|
83
|
+
def deserialize(data)
|
84
|
+
@model = data['model']
|
85
|
+
@depth = data['depth']
|
86
|
+
self
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'json'
|
5
|
+
require 'set'
|
6
|
+
require 'digest/md5'
|
7
|
+
|
8
|
+
module Ebooks
|
9
|
+
class Model
|
10
|
+
attr_accessor :hash, :sentences, :tokenized, :markov
|
11
|
+
|
12
|
+
def self.consume(txtpath)
|
13
|
+
Model.new.consume(txtpath)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.load(path)
|
17
|
+
data = Marshal.load(File.read(path))
|
18
|
+
Model.new.deserialize(data)
|
19
|
+
end
|
20
|
+
|
21
|
+
def consume(txtpath)
|
22
|
+
# Record hash of source file so we know to update later
|
23
|
+
@hash = Digest::MD5.hexdigest(File.read(txtpath))
|
24
|
+
|
25
|
+
text = File.read(txtpath)
|
26
|
+
log "Removing commented lines and mentions"
|
27
|
+
|
28
|
+
lines = text.split("\n")
|
29
|
+
keeping = []
|
30
|
+
lines.each do |l|
|
31
|
+
next if l.start_with?('#') || l.include?('RT')
|
32
|
+
processed = l.split.reject { |w| w.include?('@') || w.include?('http') }
|
33
|
+
keeping << processed.join(' ')
|
34
|
+
end
|
35
|
+
text = NLP.normalize(keeping.join("\n"))
|
36
|
+
|
37
|
+
log "Segmenting text into sentences of 140 characters or less"
|
38
|
+
@sentences = NLP.sentences(text).reject do |s|
|
39
|
+
s.length > 140 || s.count('"')%2 != 0
|
40
|
+
end
|
41
|
+
|
42
|
+
log "Tokenizing #{@sentences.length} sentences"
|
43
|
+
@tokenized = @sentences.map { |sent| NLP.tokenize(sent) }
|
44
|
+
@tokensets = @tokenized.map { |tokens| NLP.tokenset(tokens) }
|
45
|
+
|
46
|
+
log "Building markov model (this may take a while)"
|
47
|
+
@markov = MarkovModel.new.consume(@tokenized)
|
48
|
+
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
# Produces a hash with the data needed to quickly
|
53
|
+
# reconstruct this corpus object
|
54
|
+
def serialize
|
55
|
+
return { 'hash' => @hash,
|
56
|
+
'tokenized' => @tokenized,
|
57
|
+
'tokensets' => @tokensets,
|
58
|
+
'markov' => @markov.serialize }
|
59
|
+
end
|
60
|
+
|
61
|
+
def save(path)
|
62
|
+
data = self.serialize
|
63
|
+
File.open(path, 'w') do |f|
|
64
|
+
f.write(Marshal.dump(data))
|
65
|
+
end
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
def deserialize(data)
|
70
|
+
@hash = data['hash']
|
71
|
+
@tokenized = data['tokenized']
|
72
|
+
@tokensets = data['tokensets']
|
73
|
+
@markov = MarkovModel.new.deserialize(data['markov'])
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
def replace_noun(sent)
|
78
|
+
tagged = NLP.tagger.add_tags(sent)
|
79
|
+
|
80
|
+
nouns = tagged.scan(/<nn>([^<]+)<\/nn>/).flatten
|
81
|
+
to_replace = nouns.reject { |n| ['much'].include?(n) }.sample
|
82
|
+
return sent if to_replace.nil?
|
83
|
+
replacement = NLP.nouns.sample
|
84
|
+
if to_replace.en.plural.length <= to_replace.length
|
85
|
+
replacement = replacement.en.plural(1)
|
86
|
+
end
|
87
|
+
sent = sent.gsub(/(?<=\W)#{to_replace}(?=\W)/, replacement)
|
88
|
+
sent.gsub(/(?<=\W)(a|an) #{replacement}(?=\W)/, replacement.en.a)
|
89
|
+
end
|
90
|
+
|
91
|
+
def fix(tweet)
|
92
|
+
# This seems to require an external api call
|
93
|
+
begin
|
94
|
+
fixer = NLP.gingerice.parse(tweet)
|
95
|
+
log fixer if fixer['corrections']
|
96
|
+
tweet = fixer['result']
|
97
|
+
rescue Exception => e
|
98
|
+
log e.message
|
99
|
+
log e.backtrace
|
100
|
+
end
|
101
|
+
|
102
|
+
NLP.htmlentities.decode tweet
|
103
|
+
end
|
104
|
+
|
105
|
+
def markov_statement(limit=140, markov=nil)
|
106
|
+
markov ||= @markov
|
107
|
+
tweet = ""
|
108
|
+
|
109
|
+
while (tweet = markov.generate) do
|
110
|
+
next if tweet.length > limit
|
111
|
+
next if NLP.unmatched_enclosers?(tweet)
|
112
|
+
break if tweet.length > limit*0.4 || rand > 0.8
|
113
|
+
end
|
114
|
+
|
115
|
+
fix tweet
|
116
|
+
end
|
117
|
+
|
118
|
+
# Generates a response by looking for related sentences
|
119
|
+
# in the corpus and building a smaller markov model from these
|
120
|
+
def markov_response(input, limit=140)
|
121
|
+
inputset = NLP.tokenset(input)
|
122
|
+
log "Input tokenset: #{inputset.to_a}"
|
123
|
+
|
124
|
+
if inputset.empty?
|
125
|
+
# Very uninteresting input; no relevant response possible
|
126
|
+
return markov_statement(limit)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Let's find all the sentences that might be relevant
|
130
|
+
relevant = []
|
131
|
+
@tokensets.each_with_index.map do |set, i|
|
132
|
+
if inputset.intersection(set).length > 0
|
133
|
+
relevant << @tokenized[i]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
log "Found #{relevant.length} relevant tokenset matches"
|
138
|
+
|
139
|
+
if relevant.length < 3
|
140
|
+
return markov_statement(limit)
|
141
|
+
end
|
142
|
+
|
143
|
+
markov = MarkovModel.new.consume(relevant.sample(100))
|
144
|
+
markov_statement(limit, markov)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'linguistics'
|
4
|
+
Linguistics.use(:en, classes: [String])
|
5
|
+
|
6
|
+
module Ebooks
|
7
|
+
module NLP
|
8
|
+
# We don't necessarily want to use all of this stuff all the time
|
9
|
+
# Only load it when it is needed
|
10
|
+
|
11
|
+
def self.stopwords
|
12
|
+
@stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.nouns
|
16
|
+
@nouns ||= File.read(File.join(DATA_PATH, 'nouns.txt')).split
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.adjectives
|
20
|
+
@adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.tokenizer
|
24
|
+
# This tokenizer is used for dividing sentences into words
|
25
|
+
# It's too slow for finding sentences in paragraphs, hence tactful
|
26
|
+
require 'tokenizer'
|
27
|
+
@tokenizer ||= Tokenizer::Tokenizer.new(:en)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.tactful
|
31
|
+
require 'tactful_tokenizer'
|
32
|
+
@tactful ||= TactfulTokenizer::Model.new
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tagger
|
36
|
+
require 'engtagger'
|
37
|
+
@tagger ||= EngTagger.new
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.stemmer
|
41
|
+
require 'lingua/stemmer'
|
42
|
+
@stemmer ||= Lingua::Stemmer.new
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.gingerice
|
46
|
+
require 'gingerice'
|
47
|
+
Gingerice::Parser.new # No caching for this one
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.htmlentities
|
51
|
+
require 'htmlentities'
|
52
|
+
@htmlentities ||= HTMLEntities.new
|
53
|
+
end
|
54
|
+
|
55
|
+
### Utility functions which wrap the above
|
56
|
+
|
57
|
+
def self.sentences(text)
|
58
|
+
tactful.tokenize_text(text)
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.normalize(text)
|
62
|
+
htmlentities.decode text.gsub('“', '"').gsub('”', '"').gsub('’', "'").gsub('…', '...')
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.tokenize(sentence)
|
66
|
+
# This is hacky, but an ad hoc approach seems to be
|
67
|
+
# most reliable for now. Tokenization libraries have oddities
|
68
|
+
# that are hard to correct.
|
69
|
+
sentence.split(/\s/).map do |token|
|
70
|
+
exceptions = [/^\w\)$/, /^@/, /^#/, /^:\w$/, /^:\w$/, /^http/]
|
71
|
+
if exceptions.find { |r| r.match(token) }
|
72
|
+
token
|
73
|
+
else
|
74
|
+
token.split(/(?<=^[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+$)/)
|
75
|
+
end
|
76
|
+
end.flatten
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.tokenset(sentence)
|
80
|
+
tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
|
81
|
+
tokens.map(&:downcase)
|
82
|
+
.reject { |token| stopwords.include?(token) }
|
83
|
+
.to_set
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.space_between?(token1, token2)
|
87
|
+
p1 = self.punctuation?(token1)
|
88
|
+
p2 = self.punctuation?(token2)
|
89
|
+
if p1 && p2 # "foo?!"
|
90
|
+
false
|
91
|
+
elsif !p1 && p2 # "foo."
|
92
|
+
false
|
93
|
+
elsif p1 && !p2 # "foo. rah"
|
94
|
+
true
|
95
|
+
else # "foo rah"
|
96
|
+
true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.reconstruct(tokens)
|
101
|
+
# Put tokens back together into a nice looking sentence
|
102
|
+
text = ""
|
103
|
+
last_token = nil
|
104
|
+
tokens.each do |token|
|
105
|
+
text += ' ' if last_token && space_between?(last_token, token)
|
106
|
+
text += token
|
107
|
+
last_token = token
|
108
|
+
end
|
109
|
+
text
|
110
|
+
end
|
111
|
+
|
112
|
+
# Deliberately limit our punctuation handling to stuff we can do consistently
|
113
|
+
# It'll just be a part of a token if we don't split it out, and that's fine
|
114
|
+
PUNCTUATION = ".?!,"
|
115
|
+
|
116
|
+
def self.punctuation?(token)
|
117
|
+
(token.chars.to_set - PUNCTUATION.chars.to_set).empty?
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.unmatched_enclosers?(text)
|
121
|
+
# Weird quotes are an instant giveaway. Let's do paren-matching.
|
122
|
+
enclosers = ['**', '""', '()', '[]', '``']
|
123
|
+
enclosers.each do |pair|
|
124
|
+
starter = Regexp.new('(\W|^)' + Regexp.escape(pair[0]) + '\S')
|
125
|
+
ender = Regexp.new('\S' + Regexp.escape(pair[1]) + '(\W|$)')
|
126
|
+
|
127
|
+
opened = 0
|
128
|
+
|
129
|
+
tokenize(text).each do |token|
|
130
|
+
opened += 1 if token.match(starter)
|
131
|
+
opened -= 1 if token.match(ender)
|
132
|
+
|
133
|
+
return true if opened < 0 # Too many ends!
|
134
|
+
end
|
135
|
+
|
136
|
+
return true if opened != 0 # Mismatch somewhere.
|
137
|
+
end
|
138
|
+
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
gem 'minitest'
|
2
|
+
|
3
|
+
def log(*args)
|
4
|
+
STDERR.puts args.map(&:to_s).join(' ')
|
5
|
+
STDERR.flush
|
6
|
+
end
|
7
|
+
|
8
|
+
module Ebooks
|
9
|
+
GEM_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
10
|
+
DATA_PATH = File.join(GEM_PATH, 'data')
|
11
|
+
SKELETON_PATH = File.join(GEM_PATH, 'skeleton')
|
12
|
+
TEST_PATH = File.join(GEM_PATH, 'test')
|
13
|
+
TEST_CORPUS_PATH = File.join(TEST_PATH, 'corpus/0xabad1dea.tweets')
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'twitter_ebooks/nlp'
|
17
|
+
require 'twitter_ebooks/archiver'
|
18
|
+
require 'twitter_ebooks/markov'
|
19
|
+
require 'twitter_ebooks/model'
|
20
|
+
require 'twitter_ebooks/bot'
|
data/skeleton/Procfile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
worker: ruby bots.rb start
|
data/skeleton/bots.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'twitter_ebooks'
|
4
|
+
|
5
|
+
# This is an example bot definition with event handlers commented out
|
6
|
+
# You can define as many of these as you like; they will run simultaneously
|
7
|
+
|
8
|
+
Ebooks::Bot.new("{{BOT_NAME}}") do |bot|
|
9
|
+
# Consumer details come from registering an app at https://dev.twitter.com/
|
10
|
+
# OAuth details can be fetched with https://github.com/marcel/twurl
|
11
|
+
bot.consumer_key = "" # Your app consumer key
|
12
|
+
bot.consumer_secret = "" # Your app consumer secret
|
13
|
+
bot.oauth_token = "" # Token connecting the app to this account
|
14
|
+
bot.oauth_token_secret = "" # Secret connecting the app to this account
|
15
|
+
|
16
|
+
bot.on_message do |dm|
|
17
|
+
# Reply to a DM
|
18
|
+
# bot.reply(dm, "secret secrets")
|
19
|
+
end
|
20
|
+
|
21
|
+
bot.on_follow do |user|
|
22
|
+
# Follow a user back
|
23
|
+
# bot.follow(user[:screen_name])
|
24
|
+
end
|
25
|
+
|
26
|
+
bot.on_mention do |tweet, meta|
|
27
|
+
# Reply to a mention
|
28
|
+
# bot.reply(tweet, meta[:reply_prefix] + "oh hullo")
|
29
|
+
end
|
30
|
+
|
31
|
+
bot.on_timeline do |tweet, meta|
|
32
|
+
# Reply to a tweet in the bot's timeline
|
33
|
+
# bot.reply(tweet, meta[:reply_prefix] + "nice tweet")
|
34
|
+
end
|
35
|
+
|
36
|
+
bot.scheduler.every '24h' do
|
37
|
+
# Tweet something every 24 hours
|
38
|
+
# See https://github.com/jmettraux/rufus-scheduler
|
39
|
+
# bot.tweet("hi")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
EM.run do
|
44
|
+
Ebooks::Bot.all.each do |bot|
|
45
|
+
bot.start
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Put raw text files in here and process them with `ebooks consume` to make Markov models.
|
@@ -0,0 +1 @@
|
|
1
|
+
This is where the output of `ebooks consume <corpus_path>` goes. You can load these files using Model.load(path), and `ebooks gen <path>` for testing.
|