twitter_ebooks 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,10 @@ module Ebooks
15
15
  @@all = [] # List of all defined bots
16
16
  def self.all; @@all; end
17
17
 
18
+ def self.get(name)
19
+ all.find { |bot| bot.username == name }
20
+ end
21
+
18
22
  def initialize(username, &b)
19
23
  # Set defaults
20
24
  @username = username
@@ -30,8 +34,7 @@ module Ebooks
30
34
  STDERR.flush
31
35
  end
32
36
 
33
- # Connects to tweetstream and opens event handlers for this bot
34
- def start
37
+ def configure
35
38
  TweetStream.configure do |config|
36
39
  config.consumer_key = @consumer_key
37
40
  config.consumer_secret = @consumer_secret
@@ -48,6 +51,13 @@ module Ebooks
48
51
 
49
52
  @twitter = Twitter::Client.new
50
53
  @stream = TweetStream::Client.new
54
+ end
55
+
56
+ # Connects to tweetstream and opens event handlers for this bot
57
+ def start
58
+ configure
59
+
60
+ @on_startup.call if @on_startup
51
61
 
52
62
  @stream.on_error do |msg|
53
63
  log "ERROR: #{msg}"
@@ -77,13 +87,20 @@ module Ebooks
77
87
  mentions = ev.attrs[:entities][:user_mentions].map { |x| x[:screen_name] }
78
88
 
79
89
  reply_mentions = mentions.reject { |m| m.downcase == @username.downcase }
80
- reply_mentions << ev[:user][:screen_name]
90
+ reply_mentions = [ev[:user][:screen_name]] + reply_mentions
81
91
 
82
92
  meta[:reply_prefix] = reply_mentions.uniq.map { |m| '@'+m }.join(' ') + ' '
93
+ meta[:limit] = 140 - meta[:reply_prefix].length
83
94
 
84
95
  mless = ev[:text]
85
- ev.attrs[:entities][:user_mentions].reverse.each do |entity|
86
- mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
96
+ begin
97
+ ev.attrs[:entities][:user_mentions].reverse.each do |entity|
98
+ mless = mless[0...entity[:indices][0]] + mless[entity[:indices][1]+1..-1]
99
+ end
100
+ rescue Exception
101
+ p ev.attrs[:entities][:user_mentions]
102
+ p ev[:text]
103
+ raise
87
104
  end
88
105
  meta[:mentionless] = mless
89
106
 
@@ -92,7 +109,7 @@ module Ebooks
92
109
  # - The tweet is not being retweeted by somebody else
93
110
  # - Or soft-retweeted by somebody else
94
111
  if mentions.map(&:downcase).include?(@username.downcase) && !ev[:retweeted_status] && !ev[:text].start_with?('RT ')
95
- log "Mention from #{ev[:user][:screen_name]}: #{ev[:text]}"
112
+ log "Mention from @#{ev[:user][:screen_name]}: #{ev[:text]}"
96
113
  @on_mention.call(ev, meta)
97
114
  else
98
115
  @on_timeline.call(ev, meta)
@@ -117,7 +134,7 @@ module Ebooks
117
134
  log "Sending DM to @#{ev[:sender][:screen_name]}: #{text}"
118
135
  @twitter.direct_message_create(ev[:sender][:screen_name], text, opts)
119
136
  elsif ev.is_a? Twitter::Tweet
120
- log "Replying to @#{ev[:user][:screen_name]}: #{text}"
137
+ log "Replying to @#{ev[:user][:screen_name]} with: #{text}"
121
138
  @twitter.update(text, in_reply_to_status_id: ev[:id])
122
139
  else
123
140
  raise Exception("Don't know how to reply to a #{ev.class}")
@@ -138,6 +155,7 @@ module Ebooks
138
155
  @twitter.update(*args)
139
156
  end
140
157
 
158
+ def on_startup(&b); @on_startup = b; end
141
159
  def on_follow(&b); @on_follow = b; end
142
160
  def on_mention(&b); @on_mention = b; end
143
161
  def on_timeline(&b); @on_timeline = b; end
@@ -1,69 +1,73 @@
1
1
  module Ebooks
2
+ # Special INTERIM token represents sentence boundaries
3
+ # This is so we can include start and end of statements in model
4
+ # Due to the way the sentence tokenizer works, can correspond
5
+ # to multiple actual parts of text (such as ^, $, \n and .?!)
6
+ INTERIM = :interim
7
+
8
+ # This is an ngram-based Markov model optimized to build from a
9
+ # tokenized sentence list without requiring too much transformation
2
10
  class MarkovModel
3
- INTERIM = :interim # Special token marking newline/^/$ boundaries
4
-
5
- attr_accessor :tokens
6
- attr_reader :depth
7
-
8
- def represent(token)
9
- if token.nil? || token == "\n" || token.empty?
10
- INTERIM
11
- else
12
- token
13
- end
11
+ def self.build(sentences)
12
+ MarkovModel.new.consume(sentences)
14
13
  end
15
14
 
16
- def consume(tokenized, depth=2)
17
- @tokens = [INTERIM]
18
- @depth = depth
19
-
20
- tokenized.each do |tokens|
21
- @tokens += tokens
22
- @tokens << INTERIM
23
- end
24
-
25
- @model = {}
26
-
27
- @tokens.each_with_index do |token, i|
28
- prev_tokens = []
29
-
30
- @depth.downto(1) do |j|
31
- if i-j < 0; next
32
- else; prev = represent(@tokens[i-j])
15
+ def consume(sentences)
16
+ # These models are of the form ngram => [[sentence_pos, token_pos] || INTERIM, ...]
17
+ # We map by both bigrams and unigrams so we can fall back to the latter in
18
+ # cases where an input bigram is unavailable, such as starting a sentence
19
+ @sentences = sentences
20
+ @unigrams = {}
21
+ @bigrams = {}
22
+
23
+ sentences.each_with_index do |tokens, i|
24
+ last_token = INTERIM
25
+ tokens.each_with_index do |token, j|
26
+ @unigrams[last_token] ||= []
27
+ @unigrams[last_token] << [i, j]
28
+
29
+ @bigrams[last_token] ||= {}
30
+ @bigrams[last_token][token] ||= []
31
+
32
+ if j == tokens.length-1 # Mark sentence endings
33
+ @unigrams[token] ||= []
34
+ @unigrams[token] << INTERIM
35
+ @bigrams[last_token][token] << INTERIM
36
+ else
37
+ @bigrams[last_token][token] << [i, j+1]
33
38
  end
34
- prev_tokens << prev
35
- end
36
39
 
37
- 1.upto(@depth) do |j|
38
- break if j > prev_tokens.length
39
- ngram = prev_tokens.last(j)
40
-
41
- unless ngram == INTERIM && prev_tokens[-1] == INTERIM
42
- @model[ngram] ||= []
43
- @model[ngram] << represent(token)
44
- end
40
+ last_token = token
45
41
  end
46
42
  end
47
43
 
48
44
  self
49
45
  end
50
46
 
47
+ def find_token(index)
48
+ if index == INTERIM
49
+ INTERIM
50
+ else
51
+ @sentences[index[0]][index[1]]
52
+ end
53
+ end
54
+
51
55
  def chain(tokens)
52
- next_token = nil
53
- @depth.downto(1).each do |i|
54
- next if tokens.length < i
55
- matches = @model[tokens.last(i)]
56
- if matches
57
- #p tokens.last(i)
58
- #puts "=> #{matches.inspect}"
59
- next_token = matches.sample
60
- break
61
- end
56
+ if tokens.length == 1
57
+ matches = @unigrams[tokens[0]]
58
+ else
59
+ matches = @bigrams[tokens[-2]][tokens[-1]]
62
60
  end
63
61
 
64
- raise ArgumentError if next_token.nil?
62
+ if matches.empty?
63
+ # This should never happen unless a strange token is
64
+ # supplied from outside the dataset
65
+ raise ArgumentError, "Unable to continue chain for: #{tokens.inspect}"
66
+ end
65
67
 
66
- if next_token == INTERIM
68
+ next_token = find_token(matches.sample)
69
+
70
+ if next_token == INTERIM # We chose to end the sentence
67
71
  return tokens
68
72
  else
69
73
  return chain(tokens + [next_token])
@@ -71,19 +75,7 @@ module Ebooks
71
75
  end
72
76
 
73
77
  def generate
74
- tokens = chain([@model[[INTERIM]].sample])
75
- NLP.reconstruct(tokens)
76
- end
77
-
78
- def serialize
79
- { 'model' => @model,
80
- 'depth' => @depth }
81
- end
82
-
83
- def deserialize(data)
84
- @model = data['model']
85
- @depth = data['depth']
86
- self
78
+ NLP.reconstruct(chain([INTERIM]))
87
79
  end
88
80
  end
89
81
  end
@@ -7,15 +7,14 @@ require 'digest/md5'
7
7
 
8
8
  module Ebooks
9
9
  class Model
10
- attr_accessor :hash, :sentences, :tokenized, :markov
10
+ attr_accessor :hash, :sentences, :markov, :keywords
11
11
 
12
12
  def self.consume(txtpath)
13
13
  Model.new.consume(txtpath)
14
14
  end
15
15
 
16
16
  def self.load(path)
17
- data = Marshal.load(File.read(path))
18
- Model.new.deserialize(data)
17
+ Marshal.load(File.read(path))
19
18
  end
20
19
 
21
20
  def consume(txtpath)
@@ -23,7 +22,7 @@ module Ebooks
23
22
  @hash = Digest::MD5.hexdigest(File.read(txtpath))
24
23
 
25
24
  text = File.read(txtpath)
26
- log "Removing commented lines and mentions"
25
+ log "Removing commented lines and mention tokens"
27
26
 
28
27
  lines = text.split("\n")
29
28
  keeping = []
@@ -34,70 +33,43 @@ module Ebooks
34
33
  end
35
34
  text = NLP.normalize(keeping.join("\n"))
36
35
 
37
- log "Segmenting text into sentences of 140 characters or less"
38
- @sentences = NLP.sentences(text).reject do |s|
39
- s.length > 140 || s.count('"')%2 != 0
40
- end
36
+ log "Segmenting text into sentences"
41
37
 
42
- log "Tokenizing #{@sentences.length} sentences"
43
- @tokenized = @sentences.map { |sent| NLP.tokenize(sent) }
44
- @tokensets = @tokenized.map { |tokens| NLP.tokenset(tokens) }
38
+ sentences = NLP.sentences(text)
45
39
 
46
- log "Building markov model (this may take a while)"
47
- @markov = MarkovModel.new.consume(@tokenized)
40
+ log "Tokenizing #{sentences.length} sentences"
41
+ @sentences = sentences.map { |sent| NLP.tokenize(sent) }
48
42
 
49
- self
50
- end
43
+ log "Building markov model"
44
+ @markov = MarkovModel.build(@sentences)
51
45
 
52
- # Produces a hash with the data needed to quickly
53
- # reconstruct this corpus object
54
- def serialize
55
- return { 'hash' => @hash,
56
- 'tokenized' => @tokenized,
57
- 'tokensets' => @tokensets,
58
- 'markov' => @markov.serialize }
46
+ log "Ranking keywords"
47
+ require 'benchmark'
48
+ puts Benchmark.measure {
49
+ @keywords = NLP.keywords(@sentences)
50
+ p @keywords.top(100)
51
+ }
52
+
53
+ self
59
54
  end
60
55
 
61
56
  def save(path)
62
- data = self.serialize
63
57
  File.open(path, 'w') do |f|
64
- f.write(Marshal.dump(data))
58
+ f.write(Marshal.dump(self))
65
59
  end
66
60
  self
67
61
  end
68
62
 
69
- def deserialize(data)
70
- @hash = data['hash']
71
- @tokenized = data['tokenized']
72
- @tokensets = data['tokensets']
73
- @markov = MarkovModel.new.deserialize(data['markov'])
74
- self
75
- end
76
-
77
- def replace_noun(sent)
78
- tagged = NLP.tagger.add_tags(sent)
79
-
80
- nouns = tagged.scan(/<nn>([^<]+)<\/nn>/).flatten
81
- to_replace = nouns.reject { |n| ['much'].include?(n) }.sample
82
- return sent if to_replace.nil?
83
- replacement = NLP.nouns.sample
84
- if to_replace.en.plural.length <= to_replace.length
85
- replacement = replacement.en.plural(1)
86
- end
87
- sent = sent.gsub(/(?<=\W)#{to_replace}(?=\W)/, replacement)
88
- sent.gsub(/(?<=\W)(a|an) #{replacement}(?=\W)/, replacement.en.a)
89
- end
90
-
91
63
  def fix(tweet)
92
64
  # This seems to require an external api call
93
- begin
94
- fixer = NLP.gingerice.parse(tweet)
95
- log fixer if fixer['corrections']
96
- tweet = fixer['result']
97
- rescue Exception => e
98
- log e.message
99
- log e.backtrace
100
- end
65
+ #begin
66
+ # fixer = NLP.gingerice.parse(tweet)
67
+ # log fixer if fixer['corrections']
68
+ # tweet = fixer['result']
69
+ #rescue Exception => e
70
+ # log e.message
71
+ # log e.backtrace
72
+ #end
101
73
 
102
74
  NLP.htmlentities.decode tweet
103
75
  end
@@ -115,33 +87,44 @@ module Ebooks
115
87
  fix tweet
116
88
  end
117
89
 
118
- # Generates a response by looking for related sentences
119
- # in the corpus and building a smaller markov model from these
120
- def markov_response(input, limit=140)
121
- inputset = NLP.tokenset(input)
122
- log "Input tokenset: #{inputset.to_a}"
90
+ # Finds all relevant tokenized sentences to given input by
91
+ # comparing non-stopword token overlaps
92
+ def relevant_sentences(input)
93
+ relevant = []
94
+ slightly_relevant = []
123
95
 
124
- if inputset.empty?
125
- # Very uninteresting input; no relevant response possible
126
- return markov_statement(limit)
127
- end
96
+ tokenized = NLP.tokenize(input)
128
97
 
129
- # Let's find all the sentences that might be relevant
130
- relevant = []
131
- @tokensets.each_with_index.map do |set, i|
132
- if inputset.intersection(set).length > 0
133
- relevant << @tokenized[i]
98
+ @sentences.each do |sent|
99
+ tokenized.each do |token|
100
+ if sent.include?(token)
101
+ relevant << sent unless NLP.stopword?(token)
102
+ slightly_relevant << sent
103
+ end
134
104
  end
135
105
  end
136
106
 
137
- log "Found #{relevant.length} relevant tokenset matches"
107
+ [relevant, slightly_relevant]
108
+ end
138
109
 
139
- if relevant.length < 3
140
- return markov_statement(limit)
110
+ # Generates a response by looking for related sentences
111
+ # in the corpus and building a smaller markov model from these
112
+ def markov_response(input, limit=140)
113
+ # First try
114
+ relevant, slightly_relevant = relevant_sentences(input)
115
+
116
+ p relevant
117
+ p slightly_relevant.length
118
+
119
+ if relevant.length >= 3
120
+ markov = MarkovModel.new.consume(relevant)
121
+ markov_statement(limit, markov)
122
+ elsif slightly_relevant.length > 5
123
+ markov = MarkovModel.new.consume(slightly_relevant)
124
+ markov_statement(limit, markov)
125
+ else
126
+ markov_statement(limit)
141
127
  end
142
-
143
- markov = MarkovModel.new.consume(relevant.sample(100))
144
- markov_statement(limit, markov)
145
128
  end
146
129
  end
147
130
  end
@@ -1,12 +1,16 @@
1
1
  # encoding: utf-8
2
-
3
- require 'linguistics'
4
- Linguistics.use(:en, classes: [String])
2
+ require 'fast-stemmer'
3
+ require 'highscore'
5
4
 
6
5
  module Ebooks
7
6
  module NLP
8
- # We don't necessarily want to use all of this stuff all the time
9
- # Only load it when it is needed
7
+ # We deliberately limit our punctuation handling to stuff we can do consistently
8
+ # It'll just be a part of another token if we don't split it out, and that's fine
9
+ PUNCTUATION = ".?!,"
10
+
11
+ # Lazy-load NLP libraries and resources
12
+ # Some of this stuff is pretty heavy and we don't necessarily need
13
+ # to be using it all of the time
10
14
 
11
15
  def self.stopwords
12
16
  @stopwords ||= File.read(File.join(DATA_PATH, 'stopwords.txt')).split
@@ -19,89 +23,102 @@ module Ebooks
19
23
  def self.adjectives
20
24
  @adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
21
25
  end
22
-
23
- def self.tokenizer
24
- # This tokenizer is used for dividing sentences into words
25
- # It's too slow for finding sentences in paragraphs, hence tactful
26
- require 'tokenizer'
27
- @tokenizer ||= Tokenizer::Tokenizer.new(:en)
28
- end
29
-
30
- def self.tactful
31
- require 'tactful_tokenizer'
32
- @tactful ||= TactfulTokenizer::Model.new
26
+
27
+ def self.wordfreq
28
+ @wordfreq ||= JSON.load(File.read(File.join(DATA_PATH, 'wordfreq.json')))
33
29
  end
34
30
 
31
+ # POS tagger
35
32
  def self.tagger
36
33
  require 'engtagger'
37
34
  @tagger ||= EngTagger.new
38
35
  end
39
36
 
40
- def self.stemmer
41
- require 'lingua/stemmer'
42
- @stemmer ||= Lingua::Stemmer.new
43
- end
44
-
37
+ # Gingerice text correction service
45
38
  def self.gingerice
46
39
  require 'gingerice'
47
40
  Gingerice::Parser.new # No caching for this one
48
41
  end
49
42
 
43
+ # For decoding html entities
50
44
  def self.htmlentities
51
45
  require 'htmlentities'
52
46
  @htmlentities ||= HTMLEntities.new
53
47
  end
54
48
 
55
- ### Utility functions which wrap the above
49
+ ### Utility functions
56
50
 
57
- def self.sentences(text)
58
- tactful.tokenize_text(text)
59
- end
60
-
51
+ # We don't really want to deal with all this weird unicode punctuation
61
52
  def self.normalize(text)
62
53
  htmlentities.decode text.gsub('“', '"').gsub('”', '"').gsub('’', "'").gsub('…', '...')
63
54
  end
64
55
 
56
+ # Split text into sentences
57
+ # We use ad hoc approach because fancy libraries do not deal
58
+ # especially well with tweet formatting, and we can fake solving
59
+ # the quote problem during generation
60
+ def self.sentences(text)
61
+ text.split(/\n+|(?<=[.?!])\s+/)
62
+ end
63
+
64
+ # Split a sentence into word-level tokens
65
+ # As above, this is ad hoc because tokenization libraries
66
+ # do not behave well wrt. things like emoticons and timestamps
65
67
  def self.tokenize(sentence)
66
- # This is hacky, but an ad hoc approach seems to be
67
- # most reliable for now. Tokenization libraries have oddities
68
- # that are hard to correct.
69
- sentence.split(/\s/).map do |token|
70
- exceptions = [/^\w\)$/, /^@/, /^#/, /^:\w$/, /^:\w$/, /^http/]
71
- if exceptions.find { |r| r.match(token) }
72
- token
73
- else
74
- token.split(/(?<=^[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+$)/)
75
- end
76
- end.flatten
68
+ regex = /\s+|(?<=[#{PUNCTUATION}])(?=[a-zA-Z])|(?<=[a-zA-Z])(?=[#{PUNCTUATION}]+)/
69
+ sentence.split(regex)
77
70
  end
78
71
 
79
- def self.tokenset(sentence)
72
+ def self.stem(word)
73
+ Stemmer::stem_word(word.downcase)
74
+ end
75
+
76
+ def self.keywords(sentences)
77
+ # Preprocess to remove stopwords (highscore's blacklist is v. slow)
78
+ text = sentences.flatten.reject { |t| stopword?(t) }.join(' ')
79
+
80
+ text = Highscore::Content.new(text)
81
+
82
+ text.configure do
83
+ #set :multiplier, 2
84
+ #set :upper_case, 3
85
+ #set :long_words, 2
86
+ #set :long_words_threshold, 15
87
+ #set :vowels, 1 # => default: 0 = not considered
88
+ #set :consonants, 5 # => default: 0 = not considered
89
+ #set :ignore_case, true # => default: false
90
+ set :word_pattern, /(?<!@)(?<=\s)[\w']+/ # => default: /\w+/
91
+ #set :stemming, true # => default: false
92
+ end
93
+
94
+ text.keywords
95
+ end
96
+
97
+ def self.stemset(sentence)
80
98
  tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
81
99
  tokens.map(&:downcase)
82
100
  .reject { |token| stopwords.include?(token) }
101
+ .map { |t| stemmer.stem(t) }
83
102
  .to_set
84
103
  end
85
104
 
86
- def self.space_between?(token1, token2)
87
- p1 = self.punctuation?(token1)
88
- p2 = self.punctuation?(token2)
89
- if p1 && p2 # "foo?!"
90
- false
91
- elsif !p1 && p2 # "foo."
92
- false
93
- elsif p1 && !p2 # "foo. rah"
94
- true
95
- else # "foo rah"
96
- true
105
+ # Builds a token stem frequency map
106
+ def self.stemfreq(sentences)
107
+ freqmap = {}
108
+ sentences.flatten.each do |token|
109
+ stem = NLP.stem(token)
110
+ freqmap[stem] ||= 0
111
+ freqmap[stem] += 1
97
112
  end
113
+ freqmap
98
114
  end
99
115
 
116
+ # Takes a list of tokens and builds a nice-looking sentence
100
117
  def self.reconstruct(tokens)
101
- # Put tokens back together into a nice looking sentence
102
118
  text = ""
103
119
  last_token = nil
104
120
  tokens.each do |token|
121
+ next if token == INTERIM
105
122
  text += ' ' if last_token && space_between?(last_token, token)
106
123
  text += token
107
124
  last_token = token
@@ -109,17 +126,35 @@ module Ebooks
109
126
  text
110
127
  end
111
128
 
112
- # Deliberately limit our punctuation handling to stuff we can do consistently
113
- # It'll just be a part of a token if we don't split it out, and that's fine
114
- PUNCTUATION = ".?!,"
129
+ # Determine if we need to insert a space between two tokens
130
+ def self.space_between?(token1, token2)
131
+ p1 = self.punctuation?(token1)
132
+ p2 = self.punctuation?(token2)
133
+ if p1 && p2 # "foo?!"
134
+ false
135
+ elsif !p1 && p2 # "foo."
136
+ false
137
+ elsif p1 && !p2 # "foo. rah"
138
+ true
139
+ else # "foo rah"
140
+ true
141
+ end
142
+ end
115
143
 
116
144
  def self.punctuation?(token)
117
145
  (token.chars.to_set - PUNCTUATION.chars.to_set).empty?
118
146
  end
119
147
 
148
+ def self.stopword?(token)
149
+ @stopword_set ||= stopwords.map(&:downcase).to_set
150
+ @stopword_set.include?(token.downcase)
151
+ end
152
+
153
+ # Determine if a sample of text contains unmatched brackets or quotes
154
+ # This is one of the more frequent and noticeable failure modes for
155
+ # the markov generator; we can just tell it to retry
120
156
  def self.unmatched_enclosers?(text)
121
- # Weird quotes are an instant giveaway. Let's do paren-matching.
122
- enclosers = ['**', '""', '()', '[]', '``']
157
+ enclosers = ['**', '""', '()', '[]', '``', "''"]
123
158
  enclosers.each do |pair|
124
159
  starter = Regexp.new('(\W|^)' + Regexp.escape(pair[0]) + '\S')
125
160
  ender = Regexp.new('\S' + Regexp.escape(pair[1]) + '(\W|$)')
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.0.3"
2
+ VERSION = "2.0.4"
3
3
  end
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'json'
5
+
6
+ freqmap = {}
7
+
8
+ data = File.read("data/ANC-all-count.txt")
9
+ data = data.unpack("C*").pack("U*")
10
+
11
+ data.lines.each do |l|
12
+ vals = l.split("\t")
13
+
14
+ freqmap[vals[0]] = vals[-1].to_i
15
+ end
16
+
17
+ File.open("data/wordfreq.json", 'w') do |f|
18
+ f.write(JSON.dump(freqmap))
19
+ end
data/skeleton/Procfile CHANGED
@@ -1 +1 @@
1
- worker: ruby bots.rb start
1
+ worker: ruby run.rb start
data/skeleton/bots.rb CHANGED
@@ -39,9 +39,3 @@ Ebooks::Bot.new("{{BOT_NAME}}") do |bot|
39
39
  # bot.tweet("hi")
40
40
  end
41
41
  end
42
-
43
- EM.run do
44
- Ebooks::Bot.all.each do |bot|
45
- bot.start
46
- end
47
- end
@@ -1 +1 @@
1
- Put raw text files in here and process them with `ebooks consume` to make Markov models.
1
+ Put any raw text files in here to be processed.
data/skeleton/run.rb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative 'bots'
4
+
5
+ EM.run do
6
+ Ebooks::Bot.all.each do |bot|
7
+ bot.start
8
+ end
9
+ end