twitter_ebooks 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,6 +15,10 @@ module Ebooks
15
15
  @@all = [] # List of all defined bots
16
16
  def self.all; @@all; end
17
17
 
18
+ def self.get(name)
19
+ all.find { |bot| bot.username == name }
20
+ end
21
+
18
22
  def initialize(username, &b)
19
23
  # Set defaults
20
24
  @username = username
@@ -30,8 +34,7 @@ module Ebooks
30
34
  STDERR.flush
31
35
  end
32
36
 
33
- # Connects to tweetstream and opens event handlers for this bot
34
- def start
37
+ def configure
35
38
  TweetStream.configure do |config|
36
39
  config.consumer_key = @consumer_key
37
40
  config.consumer_secret = @consumer_secret
@@ -48,6 +51,13 @@ module Ebooks
48
51
 
49
52
  @twitter = Twitter::Client.new
50
53
  @stream = TweetStream::Client.new
54
+ end
55
+
56
+ # Connects to tweetstream and opens event handlers for this bot
57
+ def start
58
+ configure
59
+
60
+ @on_startup.call if @on_startup
51
61
 
52
62
  @stream.on_error do |msg|
53
63
  log "ERROR: #{msg}"
@@ -77,13 +87,20 @@ module Ebooks
77
87
  mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
78
88
 
79
89
  reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
80
- reply_mentions << ev[:user][:screen_name]
90
+ reply_mentions = [ev[:user][:screen_name]] + reply_mentions
81
91
 
82
92
  meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
93
+ meta[:limit] = 140 - meta[:reply_prefix].length
83
94
 
84
95
  mless = ev[:text]
85
- ev.attrs[:entities][:user_mentions].reverse.each do |entity|
86
- mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
96
+ begin
97
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
98
+ mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
99
+ end
100
+ rescue Exception
101
+ p ev.attrs[:entities][:user_mentions]
102
+ p ev[:text]
103
+ raise
87
104
  end
88
105
  meta[:mentionless] = mless
89
106
 
@@ -92,7 +109,7 @@ module Ebooks
92
109
  # - The tweet is not being retweeted by somebody else
93
110
  # - Or soft-retweeted by somebody else
94
111
  if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
95
- log "Mention from #{ev[:user][:screen_name]}: #{ev[:text]}"
112
+ log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
96
113
  @on_mention.call(ev, meta)
97
114
  else
98
115
  @on_timeline.call(ev, meta)
@@ -117,7 +134,7 @@ module Ebooks
117
134
  log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
118
135
  @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
119
136
  elsif ev.is_a? Twitter::Tweet
120
- log "Replying to @#{ev[:user][:screen_name]}: #{text}"
137
+ log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
121
138
  @twitter.update(text, in_reply_to_status_id: ev[:id])
122
139
  else
123
140
  raise Exception("Don't know how to reply to a #{ev.class}")
@@ -138,6 +155,7 @@ module Ebooks
138
155
  @twitter.update(*args)
139
156
  end
140
157
 
158
+ def on_startup(&b); @on_startup = b; end
141
159
  def on_follow(&b); @on_follow = b; end
142
160
  def on_mention(&b); @on_mention = b; end
143
161
  def on_timeline(&b); @on_timeline = b; end
@@ -1,69 +1,73 @@
1
1
  module Ebooks
2
+ # Special INTERIM token represents sentence boundaries
3
+ # This is so we can include start and end of statements in model
4
+ # Due to the way the sentence tokenizer works, can correspond
5
+ # to multiple actual parts of text (such as ^, $, \n and .?!)
6
+ INTERIM = :interim
7
+
8
+ # This is an ngram-based Markov model optimized to build from a
9
+ # tokenized sentence list without requiring too much transformation
2
10
  class MarkovModel
3
- INTERIM = :interim # Special token marking newline/^/$ boundaries
4
-
5
- attr_accessor :tokens
6
- attr_reader :depth
7
-
8
- def represent(token)
9
- if token.nil? || token == "\n" || token.empty?
10
- INTERIM
11
- else
12
- token
13
- end
11
+ def self.build(sentences)
12
+ MarkovModel.new.consume(sentences)
14
13
  end
15
14
 
16
- def consume(tokenized, depth=2)
17
- @tokens = [INTERIM]
18
- @depth = depth
19
-
20
- tokenized.each do |tokens|
21
- @tokens += tokens
22
- @tokens << INTERIM
23
- end
24
-
25
- @model = {}
26
-
27
- @tokens.each_with_index do |token, i|
28
- prev_tokens = []
29
-
30
- @depth.downto(1) do |j|
31
- if i-j < 0; next
32
- else; prev = represent(@tokens[i-j])
15
+ def consume(sentences)
16
+ # These models are of the form ngram => [[sentence_pos, token_pos] || INTERIM, ...]
17
+ # We map by both bigrams and unigrams so we can fall back to the latter in
18
+ # cases where an input bigram is unavailable, such as starting a sentence
19
+ @sentences = sentences
20
+ @unigrams = {}
21
+ @bigrams = {}
22
+
23
+ sentences.each_with_index do |tokens, i|
24
+ last_token = INTERIM
25
+ tokens.each_with_index do |token, j|
26
+ @unigrams[last_token] ||= []
27
+ @unigrams[last_token] << [i, j]
28
+
29
+ @bigrams[last_token] ||= {}
30
+ @bigrams[last_token][token] ||= []
31
+
32
+ if j == tokens.length-1 # Mark sentence endings
33
+ @unigrams[token] ||= []
34
+ @unigrams[token] << INTERIM
35
+ @bigrams[last_token][token] << INTERIM
36
+ else
37
+ @bigrams[last_token][token] << [i, j+1]
33
38
  end
34
- prev_tokens << prev
35
- end
36
39
 
37
- 1.upto(@depth) do |j|
38
- break if j > prev_tokens.length
39
- ngram = prev_tokens.last(j)
40
-
41
- unless ngram == INTERIM && prev_tokens[-1] == INTERIM
42
- @model[ngram] ||= []
43
- @model[ngram] << represent(token)
44
- end
40
+ last_token = token
45
41
  end
46
42
  end
47
43
 
48
44
  self
49
45
  end
50
46
 
47
+ def find_token(index)
48
+ if index == INTERIM
49
+ INTERIM
50
+ else
51
+ @sentences[index[0]][index[1]]
52
+ end
53
+ end
54
+
51
55
  def chain(tokens)
52
- next_token = nil
53
- @depth.downto(1).each do |i|
54
- next if tokens.length < i
55
- matches = @model[tokens.last(i)]
56
- if matches
57
- #p tokens.last(i)
58
- #puts "=> #{matches.inspect}"
59
- next_token = matches.sample
60
- break
61
- end
56
+ if tokens.length == 1
57
+ matches = @unigrams[tokens[0]]
58
+ else
59
+ matches = @bigrams[tokens[-2]][tokens[-1]]
62
60
  end
63
61
 
64
- raise ArgumentError if next_token.nil?
62
+ if matches.empty?
63
+ # This should never happen unless a strange token is
64
+ # supplied from outside the dataset
65
+ raise ArgumentError, "Unable to continue chain for: #{tokens.inspect}"
66
+ end
65
67
 
66
- if next_token == INTERIM
68
+ next_token = find_token(matches.sample)
69
+
70
+ if next_token == INTERIM # We chose to end the sentence
67
71
  return tokens
68
72
  else
69
73
  return chain(tokens + [next_token])
@@ -71,19 +75,7 @@ module Ebooks
71
75
  end
72
76
 
73
77
  def generate
74
- tokens = chain([@model[[INTERIM]].sample])
75
- NLP.reconstruct(tokens)
76
- end
77
-
78
- def serialize
79
- { 'model' => @model,
80
- 'depth' => @depth }
81
- end
82
-
83
- def deserialize(data)
84
- @model = data['model']
85
- @depth = data['depth']
86
- self
78
+ NLP.reconstruct(chain([INTERIM]))
87
79
  end
88
80
  end
89
81
  end
@@ -7,15 +7,14 @@ require 'digest/md5'
7
7
 
8
8
  module Ebooks
9
9
  class Model
10
- attr_accessor :hash, :sentences, :tokenized, :markov
10
+ attr_accessor :hash, :sentences, :markov, :keywords
11
11
 
12
12
  def self.consume(txtpath)
13
13
  Model.new.consume(txtpath)
14
14
  end
15
15
 
16
16
  def self.load(path)
17
- data = Marshal.load(File.read(path))
18
- Model.new.deserialize(data)
17
+ Marshal.load(File.read(path))
19
18
  end
20
19
 
21
20
  def consume(txtpath)
@@ -23,7 +22,7 @@ module Ebooks
23
22
  @hash = Digest::MD5.hexdigest(File.read(txtpath))
24
23
 
25
24
  text = File.read(txtpath)
26
- log "Removing commented lines and mentions"
25
+ log "Removing commented lines and mention tokens"
27
26
 
28
27
  lines = text.split("\n")
29
28
  keeping = []
@@ -34,70 +33,43 @@ module Ebooks
34
33
  end
35
34
  text = NLP.normalize(keeping.join("\n"))
36
35
 
37
- log "Segmenting text into sentences of 140 characters or less"
38
- @sentences = NLP.sentences(text).reject do |s|
39
- s.length > 140 || s.count('"')%2 != 0
40
- end
36
+ log "Segmenting text into sentences"
41
37
 
42
- log "Tokenizing #{@sentences.length} sentences"
43
- @tokenized = @sentences.map { |sent| NLP.tokenize(sent) }
44
- @tokensets = @tokenized.map { |tokens| NLP.tokenset(tokens) }
38
+ sentences = NLP.sentences(text)
45
39
 
46
- log "Building markov model (this may take a while)"
47
- @markov = MarkovModel.new.consume(@tokenized)
40
+ log "Tokenizing #{sentences.length} sentences"
41
+ @sentences = sentences.map { |sent| NLP.tokenize(sent) }
48
42
 
49
- self
50
- end
43
+ log "Building markov model"
44
+ @markov = MarkovModel.build(@sentences)
51
45
 
52
- # Produces a hash with the data needed to quickly
53
- # reconstruct this corpus object
54
- def serialize
55
- return { 'hash' => @hash,
56
- 'tokenized' => @tokenized,
57
- 'tokensets' => @tokensets,
58
- 'markov' => @markov.serialize }
46
+ log "Ranking keywords"
47
+ require 'benchmark'
48
+ puts Benchmark.measure {
49
+ @keywords = NLP.keywords(@sentences)
50
+ p @keywords.top(100)
51
+ }
52
+
53
+ self
59
54
  end
60
55
 
61
56
  def save(path)
62
- data = self.serialize
63
57
  File.open(path, 'w') do |f|
64
- f.write(Marshal.dump(data))
58
+ f.write(Marshal.dump(self))
65
59
  end
66
60
  self
67
61
  end
68
62
 
69
- def deserialize(data)
70
- @hash = data['hash']
71
- @tokenized = data['tokenized']
72
- @tokensets = data['tokensets']
73
- @markov = MarkovModel.new.deserialize(data['markov'])
74
- self
75
- end
76
-
77
- def replace_noun(sent)
78
- tagged = NLP.tagger.add_tags(sent)
79
-
80
- nouns = tagged.scan(/<nn>([^<]+)<\/nn>/).flatten
81
- to_replace = nouns.reject { |n| ['much'].include?(n) }.sample
82
- return sent if to_replace.nil?
83
- replacement = NLP.nouns.sample
84
- if to_replace.en.plural.length <= to_replace.length
85
- replacement = replacement.en.plural(1)
86
- end
87
- sent = sent.gsub(/(?<=\W)#{to_replace}(?=\W)/, replacement)
88
- sent.gsub(/(?<=\W)(a|an) #{replacement}(?=\W)/, replacement.en.a)
89
- end
90
-
91
63
  def fix(tweet)
92
64
  # This seems to require an external api call
93
- begin
94
- fixer = NLP.gingerice.parse(tweet)
95
- log fixer if fixer['corrections']
96
- tweet = fixer['result']
97
- rescue Exception => e
98
- log e.message
99
- log e.backtrace
100
- end
65
+ #begin
66
+ # fixer = NLP.gingerice.parse(tweet)
67
+ # log fixer if fixer['corrections']
68
+ # tweet = fixer['result']
69
+ #rescue Exception => e
70
+ # log e.message
71
+ # log e.backtrace
72
+ #end
101
73
 
102
74
  NLP.htmlentities.decode tweet
103
75
  end
@@ -115,33 +87,44 @@ module Ebooks
115
87
  fix tweet
116
88
  end
117
89
 
118
- # Generates a response by looking for related sentences
119
- # in the corpus and building a smaller markov model from these
120
- def markov_response(input, limit=140)
121
- inputset = NLP.tokenset(input)
122
- log "Input tokenset: #{inputset.to_a}"
90
+ # Finds all relevant tokenized sentences to given input by
91
+ # comparing non-stopword token overlaps
92
+ def relevant_sentences(input)
93
+ relevant = []
94
+ slightly_relevant = []
123
95
 
124
- if inputset.empty?
125
- # Very uninteresting input; no relevant response possible
126
- return markov_statement(limit)
127
- end
96
+ tokenized = NLP.tokenize(input)
128
97
 
129
- # Let's find all the sentences that might be relevant
130
- relevant = []
131
- @tokensets.each_with_index.map do |set, i|
132
- if inputset.intersection(set).length > 0
133
- relevant << @tokenized[i]
98
+ @sentences.each do |sent|
99
+ tokenized.each do |token|
100
+ if sent.include?(token)
101
+ relevant << sent unless NLP.stopword?(token)
102
+ slightly_relevant << sent
103
+ end
134
104
  end
135
105
  end
136
106
 
137
- log "Found #{relevant.length} relevant tokenset matches"
107
+ [relevant, slightly_relevant]
108
+ end
138
109
 
139
- if relevant.length < 3
140
- return markov_statement(limit)
110
+ # Generates a response by looking for related sentences
111
+ # in the corpus and building a smaller markov model from these
112
+ def markov_response(input, limit=140)
113
+ # First try
114
+ relevant, slightly_relevant = relevant_sentences(input)
115
+
116
+ p relevant
117
+ p slightly_relevant.length
118
+
119
+ if relevant.length >= 3
120
+ markov = MarkovModel.new.consume(relevant)
121
+ markov_statement(limit, markov)
122
+ elsif slightly_relevant.length > 5
123
+ markov = MarkovModel.new.consume(slightly_relevant)
124
+ markov_statement(limit, markov)
125
+ else
126
+ markov_statement(limit)
141
127
  end
142
-
143
- markov = MarkovModel.new.consume(relevant.sample(100))
144
- markov_statement(limit, markov)
145
128
  end
146
129
  end
147
130
  end
@@ -1,12 +1,16 @@
1
1
  # encoding: utf-8
2
-
3
- require 'linguistics'
4
- Linguistics.use(:en, classes: [String])
2
+ require 'fast-stemmer'
3
+ require 'highscore'
5
4
 
6
5
  module Ebooks
7
6
  module NLP
8
- # We don't necessarily want to use all of this stuff all the time
9
- # Only load it when it is needed
7
+ # We deliberately limit our punctuation handling to stuff we can do consistently
8
+ # It'll just be a part of another token if we don't split it out, and that's fine
9
+ PUNCTUATION = ".?!,"
10
+
11
+ # Lazy-load NLP libraries and resources
12
+ # Some of this stuff is pretty heavy and we don't necessarily need
13
+ # to be using it all of the time
10
14
 
11
15
  def self.stopwords
12
16
  @stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
@@ -19,89 +23,102 @@ module Ebooks
19
23
  def self.adjectives
20
24
  @adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
21
25
  end
22
-
23
- def self.tokenizer
24
- # This tokenizer is used for dividing sentences into words
25
- # It's too slow for finding sentences in paragraphs, hence tactful
26
- require 'tokenizer'
27
- @tokenizer ||= Tokenizer::Tokenizer.new(:en)
28
- end
29
-
30
- def self.tactful
31
- require 'tactful_tokenizer'
32
- @tactful ||= TactfulTokenizer::Model.new
26
+
27
+ def self.wordfreq
28
+ @wordfreq ||= JSON.load(File.read(File.join(DATA_PATH, 'wordfreq.json')))
33
29
  end
34
30
 
31
+ # POS tagger
35
32
  def self.tagger
36
33
  require 'engtagger'
37
34
  @tagger ||= EngTagger.new
38
35
  end
39
36
 
40
- def self.stemmer
41
- require 'lingua/stemmer'
42
- @stemmer ||= Lingua::Stemmer.new
43
- end
44
-
37
+ # Gingerice text correction service
45
38
  def self.gingerice
46
39
  require 'gingerice'
47
40
  Gingerice::Parser.new # No caching for this one
48
41
  end
49
42
 
43
+ # For decoding html entities
50
44
  def self.htmlentities
51
45
  require 'htmlentities'
52
46
  @htmlentities ||= HTMLEntities.new
53
47
  end
54
48
 
55
- ### Utility functions which wrap the above
49
+ ### Utility functions
56
50
 
57
- def self.sentences(text)
58
- tactful.tokenize_text(text)
59
- end
60
-
51
+ # We don't really want to deal with all this weird unicode punctuation
61
52
  def self.normalize(text)
62
53
  htmlentities.decode text.gsub('“', '"').gsub('”', '"').gsub('’', "'").gsub('…', '...')
63
54
  end
64
55
 
56
+ # Split text into sentences
57
+ # We use ad hoc approach because fancy libraries do not deal
58
+ # especially well with tweet formatting, and we can fake solving
59
+ # the quote problem during generation
60
+ def self.sentences(text)
61
+ text.split(/\n+|(?<=[.?!])\s+/)
62
+ end
63
+
64
+ # Split a sentence into word-level tokens
65
+ # As above, this is ad hoc because tokenization libraries
66
+ # do not behave well wrt. things like emoticons and timestamps
65
67
  def self.tokenize(sentence)
66
- # This is hacky, but an ad hoc approach seems to be
67
- # most reliable for now. Tokenization libraries have oddities
68
- # that are hard to correct.
69
- sentence.split(/\s/).map do |token|
70
- exceptions = [/^\w\)$/, /^@/, /^#/, /^:\w$/, /^:\w$/, /^http/]
71
- if exceptions.find { |r| r.match(token) }
72
- token
73
- else
74
- token.split(/(?<=^[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+$)/)
75
- end
76
- end.flatten
68
+ regex = /\s+|(?<=[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+)/
69
+ sentence.split(regex)
77
70
  end
78
71
 
79
- def self.tokenset(sentence)
72
+ def self.stem(word)
73
+ Stemmer::stem_word(word.downcase)
74
+ end
75
+
76
+ def self.keywords(sentences)
77
+ # Preprocess to remove stopwords (highscore's blacklist is v. slow)
78
+ text = sentences.flatten.reject { |t| stopword?(t) }.join(' ')
79
+
80
+ text = Highscore::Content.new(text)
81
+
82
+ text.configure do
83
+ #set :multiplier, 2
84
+ #set :upper_case, 3
85
+ #set :long_words, 2
86
+ #set :long_words_threshold, 15
87
+ #set :vowels, 1 # => default: 0 = not considered
88
+ #set :consonants, 5 # => default: 0 = not considered
89
+ #set :ignore_case, true # => default: false
90
+ set :word_pattern, /(?<!@)(?<=\s)[\w']+/ # => default: /\w+/
91
+ #set :stemming, true # => default: false
92
+ end
93
+
94
+ text.keywords
95
+ end
96
+
97
+ def self.stemset(sentence)
80
98
  tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
81
99
  tokens.map(&:downcase)
82
100
  .reject { |token| stopwords.include?(token) }
101
+ .map { |t| stemmer.stem(t) }
83
102
  .to_set
84
103
  end
85
104
 
86
- def self.space_between?(token1, token2)
87
- p1 = self.punctuation?(token1)
88
- p2 = self.punctuation?(token2)
89
- if p1 && p2 # "foo?!"
90
- false
91
- elsif !p1 && p2 # "foo."
92
- false
93
- elsif p1 && !p2 # "foo. rah"
94
- true
95
- else # "foo rah"
96
- true
105
+ # Builds a token stem frequency map
106
+ def self.stemfreq(sentences)
107
+ freqmap = {}
108
+ sentences.flatten.each do |token|
109
+ stem = NLP.stem(token)
110
+ freqmap[stem] ||= 0
111
+ freqmap[stem] += 1
97
112
  end
113
+ freqmap
98
114
  end
99
115
 
116
+ # Takes a list of tokens and builds a nice-looking sentence
100
117
  def self.reconstruct(tokens)
101
- # Put tokens back together into a nice looking sentence
102
118
  text = ""
103
119
  last_token = nil
104
120
  tokens.each do |token|
121
+ next if token == INTERIM
105
122
  text += ' ' if last_token && space_between?(last_token, token)
106
123
  text += token
107
124
  last_token = token
@@ -109,17 +126,35 @@ module Ebooks
109
126
  text
110
127
  end
111
128
 
112
- # Deliberately limit our punctuation handling to stuff we can do consistently
113
- # It'll just be a part of a token if we don't split it out, and that's fine
114
- PUNCTUATION = ".?!,"
129
+ # Determine if we need to insert a space between two tokens
130
+ def self.space_between?(token1, token2)
131
+ p1 = self.punctuation?(token1)
132
+ p2 = self.punctuation?(token2)
133
+ if p1 && p2 # "foo?!"
134
+ false
135
+ elsif !p1 && p2 # "foo."
136
+ false
137
+ elsif p1 && !p2 # "foo. rah"
138
+ true
139
+ else # "foo rah"
140
+ true
141
+ end
142
+ end
115
143
 
116
144
  def self.punctuation?(token)
117
145
  (token.chars.to_set - PUNCTUATION.chars.to_set).empty?
118
146
  end
119
147
 
148
+ def self.stopword?(token)
149
+ @stopword_set ||= stopwords.map(&:downcase).to_set
150
+ @stopword_set.include?(token.downcase)
151
+ end
152
+
153
+ # Determine if a sample of text contains unmatched brackets or quotes
154
+ # This is one of the more frequent and noticeable failure modes for
155
+ # the markov generator; we can just tell it to retry
120
156
  def self.unmatched_enclosers?(text)
121
- # Weird quotes are an instant giveaway. Let's do paren-matching.
122
- enclosers = ['**', '""', '()', '[]', '``']
157
+ enclosers = ['**', '""', '()', '[]', '``', "''"]
123
158
  enclosers.each do |pair|
124
159
  starter = Regexp.new('(\W|^)' + Regexp.escape(pair[0]) + '\S')
125
160
  ender = Regexp.new('\S' + Regexp.escape(pair[1]) + '(\W|$)')
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.0.3"
2
+ VERSION = "2.0.4"
3
3
  end
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'json'
5
+
6
+ freqmap = {}
7
+
8
+ data = File.read("data/ANC-all-count.txt")
9
+ data = data.unpack("C*").pack("U*")
10
+
11
+ data.lines.each do |l|
12
+ vals = l.split("\t")
13
+
14
+ freqmap[vals[0]] = vals[-1].to_i
15
+ end
16
+
17
+ File.open("data/wordfreq.json", 'w') do |f|
18
+ f.write(JSON.dump(freqmap))
19
+ end
data/skeleton/Procfile CHANGED
@@ -1 +1 @@
1
- worker: ruby bots.rb start
1
+ worker: ruby run.rb start
data/skeleton/bots.rb CHANGED
@@ -39,9 +39,3 @@ Ebooks::Bot.new("{{BOT_NAME}}") do |bot|
39
39
  # bot.tweet("hi")
40
40
  end
41
41
  end
42
-
43
- EM.run do
44
- Ebooks::Bot.all.each do |bot|
45
- bot.start
46
- end
47
- end
@@ -1 +1 @@
1
- Put raw text files in here and process them with `ebooks consume` to make Markov models.
1
+ Put any raw text files in here to be processed.
data/skeleton/run.rb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative 'bots'
4
+
5
+ EM.run do
6
+ Ebooks::Bot.all.each do |bot|
7
+ bot.start
8
+ end
9
+ end