twitter_ebooks 3.1.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/ebooks +33 -0
- data/lib/twitter_ebooks/model.rb +41 -3
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/suffix.rb +3 -0
- data/lib/twitter_ebooks/version.rb +1 -1
- data/twitter_ebooks.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb109277da9591a48da1231fe81690a569a94e16
|
4
|
+
data.tar.gz: 84a3a30a0e8bdec98e6998a22897617d6349807a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e492aa786f3d29d16fa878cc3bcb615ba7258ee01d5b5f4a1817279bc08bb4d0d485e78696cd9f39ebe2cc6b81aa01187aa69b735c9ea5a9b9fe496f00954b1c
|
7
|
+
data.tar.gz: 5208f5d31d895ecc60ab6829f26cea43c80d59b39f53063911593db7914de80cf6d7cd852a4ba65f6bd6435bac09d27de5e5f089d1427344cbe3d04c5d4e5146
|
data/README.md
CHANGED
@@ -135,10 +135,10 @@ The secondary function is the "interesting keywords" list. For example, I use th
|
|
135
135
|
|
136
136
|
``` ruby
|
137
137
|
top100 = model.keywords.take(100)
|
138
|
-
tokens = Ebooks::NLP.tokenize(tweet
|
138
|
+
tokens = Ebooks::NLP.tokenize(tweet.text)
|
139
139
|
|
140
140
|
if tokens.find { |t| top100.include?(t) }
|
141
|
-
|
141
|
+
favorite(tweet)
|
142
142
|
end
|
143
143
|
```
|
144
144
|
|
data/bin/ebooks
CHANGED
@@ -25,9 +25,11 @@ Usage:
|
|
25
25
|
ebooks auth
|
26
26
|
ebooks consume <corpus_path> [corpus_path2] [...]
|
27
27
|
ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
|
28
|
+
ebooks append <model_name> <corpus_path>
|
28
29
|
ebooks gen <model_path> [input]
|
29
30
|
ebooks archive <username> [path]
|
30
31
|
ebooks tweet <model_path> <botname>
|
32
|
+
ebooks version
|
31
33
|
STR
|
32
34
|
|
33
35
|
def self.help(command=nil)
|
@@ -115,6 +117,24 @@ STR
|
|
115
117
|
log "Corpuses consumed to #{outpath}"
|
116
118
|
end
|
117
119
|
|
120
|
+
HELP.append = <<-STR
|
121
|
+
Usage: ebooks append <model_name> <corpus_path>
|
122
|
+
|
123
|
+
Process then append the provided corpus to the model
|
124
|
+
instead of overwriting.
|
125
|
+
STR
|
126
|
+
|
127
|
+
def self.append(name, path)
|
128
|
+
if !name || !path
|
129
|
+
help :append
|
130
|
+
exit 1
|
131
|
+
end
|
132
|
+
|
133
|
+
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
|
134
|
+
log "Corpus appended to #{name}.model"
|
135
|
+
end
|
136
|
+
|
137
|
+
|
118
138
|
HELP.jsonify = <<-STR
|
119
139
|
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
120
140
|
|
@@ -275,6 +295,17 @@ STR
|
|
275
295
|
require 'pry'; Ebooks.module_exec { pry }
|
276
296
|
end
|
277
297
|
|
298
|
+
HELP.version = <<-STR
|
299
|
+
Usage: ebooks version
|
300
|
+
|
301
|
+
Shows you twitter_ebooks' version number.
|
302
|
+
STR
|
303
|
+
|
304
|
+
def self.version
|
305
|
+
require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
|
306
|
+
log Ebooks::VERSION
|
307
|
+
end
|
308
|
+
|
278
309
|
HELP.start = <<-STR
|
279
310
|
Usage: ebooks s[tart] [botname]
|
280
311
|
|
@@ -368,6 +399,7 @@ STR
|
|
368
399
|
when "new" then new(args[1])
|
369
400
|
when "consume" then consume(args[1..-1])
|
370
401
|
when "consume-all" then consume_all(args[1], args[2..-1])
|
402
|
+
when "append" then append(args[1],args[2])
|
371
403
|
when "gen" then gen(args[1], args[2..-1].join(' '))
|
372
404
|
when "archive" then archive(args[1], args[2])
|
373
405
|
when "tweet" then tweet(args[1], args[2])
|
@@ -378,6 +410,7 @@ STR
|
|
378
410
|
when "start" then start(args[1])
|
379
411
|
when "s" then start(args[1])
|
380
412
|
when "help" then help(args[1])
|
413
|
+
when "version" then version
|
381
414
|
else
|
382
415
|
log "No such command '#{args[0]}'"
|
383
416
|
help
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -69,6 +69,35 @@ module Ebooks
|
|
69
69
|
self
|
70
70
|
end
|
71
71
|
|
72
|
+
# Append a generated model to existing model file instead of overwriting it
|
73
|
+
# @param path [String]
|
74
|
+
def append(path)
|
75
|
+
existing = File.file?(path)
|
76
|
+
if !existing
|
77
|
+
log "No existing model found at #{path}"
|
78
|
+
return
|
79
|
+
else
|
80
|
+
#read-in and deserialize existing model
|
81
|
+
props = Marshal.load(File.open(path,'rb') { |old| old.read })
|
82
|
+
old_tokens = props[:tokens]
|
83
|
+
old_sentences = props[:sentences]
|
84
|
+
old_mentions = props[:mentions]
|
85
|
+
old_keywords = props[:keywords]
|
86
|
+
|
87
|
+
#append existing properties to new ones and overwrite with new model
|
88
|
+
File.open(path, 'wb') do |f|
|
89
|
+
f.write(Marshal.dump({
|
90
|
+
tokens: @tokens.concat(old_tokens),
|
91
|
+
sentences: @sentences.concat(old_sentences),
|
92
|
+
mentions: @mentions.concat(old_mentions),
|
93
|
+
keywords: @keywords.concat(old_keywords)
|
94
|
+
}))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
|
72
101
|
def initialize
|
73
102
|
@tokens = []
|
74
103
|
|
@@ -80,7 +109,13 @@ module Ebooks
|
|
80
109
|
# @param token [String]
|
81
110
|
# @return [Integer]
|
82
111
|
def tikify(token)
|
83
|
-
@tikis
|
112
|
+
if @tikis.has_key?(token) then
|
113
|
+
return @tikis[token]
|
114
|
+
else
|
115
|
+
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
116
|
+
@tokens << token
|
117
|
+
return @tikis[token] = @tokens.length-1
|
118
|
+
end
|
84
119
|
end
|
85
120
|
|
86
121
|
# Convert a body of text into arrays of tikis
|
@@ -143,8 +178,8 @@ module Ebooks
|
|
143
178
|
end
|
144
179
|
end
|
145
180
|
|
146
|
-
text = statements.join("\n")
|
147
|
-
mention_text = mentions.join("\n")
|
181
|
+
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
|
182
|
+
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
|
148
183
|
|
149
184
|
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
150
185
|
|
@@ -155,6 +190,7 @@ module Ebooks
|
|
155
190
|
|
156
191
|
log "Ranking keywords"
|
157
192
|
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
193
|
+
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
|
158
194
|
|
159
195
|
self
|
160
196
|
end
|
@@ -218,6 +254,7 @@ module Ebooks
|
|
218
254
|
tweet = ""
|
219
255
|
|
220
256
|
while (tikis = generator.generate(3, :bigrams)) do
|
257
|
+
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
221
258
|
next if tikis.length <= 3 && !responding
|
222
259
|
break if valid_tweet?(tikis, limit)
|
223
260
|
|
@@ -226,6 +263,7 @@ module Ebooks
|
|
226
263
|
end
|
227
264
|
|
228
265
|
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
266
|
+
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
229
267
|
while (tikis = generator.generate(3, :unigrams)) do
|
230
268
|
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
231
269
|
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'fast-stemmer'
|
3
3
|
require 'highscore'
|
4
|
+
require 'htmlentities'
|
4
5
|
|
5
6
|
module Ebooks
|
6
7
|
module NLP
|
@@ -42,7 +43,6 @@ module Ebooks
|
|
42
43
|
# Lazily load HTML entity decoder
|
43
44
|
# @return [HTMLEntities]
|
44
45
|
def self.htmlentities
|
45
|
-
require 'htmlentities'
|
46
46
|
@htmlentities ||= HTMLEntities.new
|
47
47
|
end
|
48
48
|
|
@@ -19,6 +19,9 @@ module Ebooks
|
|
19
19
|
@bigrams = {}
|
20
20
|
|
21
21
|
@sentences.each_with_index do |tikis, i|
|
22
|
+
if (i % 10000 == 0) then
|
23
|
+
log ("Building: sentence #{i} of #{sentences.length}")
|
24
|
+
end
|
22
25
|
last_tiki = INTERIM
|
23
26
|
tikis.each_with_index do |tiki, j|
|
24
27
|
@unigrams[last_tiki] ||= []
|
data/twitter_ebooks.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.add_development_dependency 'pry-byebug'
|
23
23
|
gem.add_development_dependency 'yard'
|
24
24
|
|
25
|
-
gem.add_runtime_dependency 'twitter', '~> 5.
|
25
|
+
gem.add_runtime_dependency 'twitter', '~> 5.15'
|
26
26
|
gem.add_runtime_dependency 'rufus-scheduler'
|
27
27
|
gem.add_runtime_dependency 'gingerice'
|
28
28
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '5.
|
103
|
+
version: '5.15'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '5.
|
110
|
+
version: '5.15'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rufus-scheduler
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -278,7 +278,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
278
278
|
version: '0'
|
279
279
|
requirements: []
|
280
280
|
rubyforge_project:
|
281
|
-
rubygems_version: 2.4.
|
281
|
+
rubygems_version: 2.4.8
|
282
282
|
signing_key:
|
283
283
|
specification_version: 4
|
284
284
|
summary: Markov chains for all your friends~
|
@@ -289,3 +289,4 @@ test_files:
|
|
289
289
|
- spec/memprof.rb
|
290
290
|
- spec/model_spec.rb
|
291
291
|
- spec/spec_helper.rb
|
292
|
+
has_rdoc:
|