twitter_ebooks 3.1.0 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/bin/ebooks +33 -0
- data/lib/twitter_ebooks/model.rb +41 -3
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/suffix.rb +3 -0
- data/lib/twitter_ebooks/version.rb +1 -1
- data/twitter_ebooks.gemspec +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eb109277da9591a48da1231fe81690a569a94e16
|
4
|
+
data.tar.gz: 84a3a30a0e8bdec98e6998a22897617d6349807a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e492aa786f3d29d16fa878cc3bcb615ba7258ee01d5b5f4a1817279bc08bb4d0d485e78696cd9f39ebe2cc6b81aa01187aa69b735c9ea5a9b9fe496f00954b1c
|
7
|
+
data.tar.gz: 5208f5d31d895ecc60ab6829f26cea43c80d59b39f53063911593db7914de80cf6d7cd852a4ba65f6bd6435bac09d27de5e5f089d1427344cbe3d04c5d4e5146
|
data/README.md
CHANGED
@@ -135,10 +135,10 @@ The secondary function is the "interesting keywords" list. For example, I use th
|
|
135
135
|
|
136
136
|
``` ruby
|
137
137
|
top100 = model.keywords.take(100)
|
138
|
-
tokens = Ebooks::NLP.tokenize(tweet
|
138
|
+
tokens = Ebooks::NLP.tokenize(tweet.text)
|
139
139
|
|
140
140
|
if tokens.find { |t| top100.include?(t) }
|
141
|
-
|
141
|
+
favorite(tweet)
|
142
142
|
end
|
143
143
|
```
|
144
144
|
|
data/bin/ebooks
CHANGED
@@ -25,9 +25,11 @@ Usage:
|
|
25
25
|
ebooks auth
|
26
26
|
ebooks consume <corpus_path> [corpus_path2] [...]
|
27
27
|
ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
|
28
|
+
ebooks append <model_name> <corpus_path>
|
28
29
|
ebooks gen <model_path> [input]
|
29
30
|
ebooks archive <username> [path]
|
30
31
|
ebooks tweet <model_path> <botname>
|
32
|
+
ebooks version
|
31
33
|
STR
|
32
34
|
|
33
35
|
def self.help(command=nil)
|
@@ -115,6 +117,24 @@ STR
|
|
115
117
|
log "Corpuses consumed to #{outpath}"
|
116
118
|
end
|
117
119
|
|
120
|
+
HELP.append = <<-STR
|
121
|
+
Usage: ebooks append <model_name> <corpus_path>
|
122
|
+
|
123
|
+
Process then append the provided corpus to the model
|
124
|
+
instead of overwriting.
|
125
|
+
STR
|
126
|
+
|
127
|
+
def self.append(name, path)
|
128
|
+
if !name || !path
|
129
|
+
help :append
|
130
|
+
exit 1
|
131
|
+
end
|
132
|
+
|
133
|
+
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
|
134
|
+
log "Corpus appended to #{name}.model"
|
135
|
+
end
|
136
|
+
|
137
|
+
|
118
138
|
HELP.jsonify = <<-STR
|
119
139
|
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
120
140
|
|
@@ -275,6 +295,17 @@ STR
|
|
275
295
|
require 'pry'; Ebooks.module_exec { pry }
|
276
296
|
end
|
277
297
|
|
298
|
+
HELP.version = <<-STR
|
299
|
+
Usage: ebooks version
|
300
|
+
|
301
|
+
Shows you twitter_ebooks' version number.
|
302
|
+
STR
|
303
|
+
|
304
|
+
def self.version
|
305
|
+
require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
|
306
|
+
log Ebooks::VERSION
|
307
|
+
end
|
308
|
+
|
278
309
|
HELP.start = <<-STR
|
279
310
|
Usage: ebooks s[tart] [botname]
|
280
311
|
|
@@ -368,6 +399,7 @@ STR
|
|
368
399
|
when "new" then new(args[1])
|
369
400
|
when "consume" then consume(args[1..-1])
|
370
401
|
when "consume-all" then consume_all(args[1], args[2..-1])
|
402
|
+
when "append" then append(args[1],args[2])
|
371
403
|
when "gen" then gen(args[1], args[2..-1].join(' '))
|
372
404
|
when "archive" then archive(args[1], args[2])
|
373
405
|
when "tweet" then tweet(args[1], args[2])
|
@@ -378,6 +410,7 @@ STR
|
|
378
410
|
when "start" then start(args[1])
|
379
411
|
when "s" then start(args[1])
|
380
412
|
when "help" then help(args[1])
|
413
|
+
when "version" then version
|
381
414
|
else
|
382
415
|
log "No such command '#{args[0]}'"
|
383
416
|
help
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -69,6 +69,35 @@ module Ebooks
|
|
69
69
|
self
|
70
70
|
end
|
71
71
|
|
72
|
+
# Append a generated model to existing model file instead of overwriting it
|
73
|
+
# @param path [String]
|
74
|
+
def append(path)
|
75
|
+
existing = File.file?(path)
|
76
|
+
if !existing
|
77
|
+
log "No existing model found at #{path}"
|
78
|
+
return
|
79
|
+
else
|
80
|
+
#read-in and deserialize existing model
|
81
|
+
props = Marshal.load(File.open(path,'rb') { |old| old.read })
|
82
|
+
old_tokens = props[:tokens]
|
83
|
+
old_sentences = props[:sentences]
|
84
|
+
old_mentions = props[:mentions]
|
85
|
+
old_keywords = props[:keywords]
|
86
|
+
|
87
|
+
#append existing properties to new ones and overwrite with new model
|
88
|
+
File.open(path, 'wb') do |f|
|
89
|
+
f.write(Marshal.dump({
|
90
|
+
tokens: @tokens.concat(old_tokens),
|
91
|
+
sentences: @sentences.concat(old_sentences),
|
92
|
+
mentions: @mentions.concat(old_mentions),
|
93
|
+
keywords: @keywords.concat(old_keywords)
|
94
|
+
}))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
|
72
101
|
def initialize
|
73
102
|
@tokens = []
|
74
103
|
|
@@ -80,7 +109,13 @@ module Ebooks
|
|
80
109
|
# @param token [String]
|
81
110
|
# @return [Integer]
|
82
111
|
def tikify(token)
|
83
|
-
@tikis
|
112
|
+
if @tikis.has_key?(token) then
|
113
|
+
return @tikis[token]
|
114
|
+
else
|
115
|
+
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
116
|
+
@tokens << token
|
117
|
+
return @tikis[token] = @tokens.length-1
|
118
|
+
end
|
84
119
|
end
|
85
120
|
|
86
121
|
# Convert a body of text into arrays of tikis
|
@@ -143,8 +178,8 @@ module Ebooks
|
|
143
178
|
end
|
144
179
|
end
|
145
180
|
|
146
|
-
text = statements.join("\n")
|
147
|
-
mention_text = mentions.join("\n")
|
181
|
+
text = statements.join("\n").encode('UTF-8', :invalid => :replace)
|
182
|
+
mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
|
148
183
|
|
149
184
|
lines = nil; statements = nil; mentions = nil # Allow garbage collection
|
150
185
|
|
@@ -155,6 +190,7 @@ module Ebooks
|
|
155
190
|
|
156
191
|
log "Ranking keywords"
|
157
192
|
@keywords = NLP.keywords(text).top(200).map(&:to_s)
|
193
|
+
log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
|
158
194
|
|
159
195
|
self
|
160
196
|
end
|
@@ -218,6 +254,7 @@ module Ebooks
|
|
218
254
|
tweet = ""
|
219
255
|
|
220
256
|
while (tikis = generator.generate(3, :bigrams)) do
|
257
|
+
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
221
258
|
next if tikis.length <= 3 && !responding
|
222
259
|
break if valid_tweet?(tikis, limit)
|
223
260
|
|
@@ -226,6 +263,7 @@ module Ebooks
|
|
226
263
|
end
|
227
264
|
|
228
265
|
if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
|
266
|
+
log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
|
229
267
|
while (tikis = generator.generate(3, :unigrams)) do
|
230
268
|
break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
|
231
269
|
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'fast-stemmer'
|
3
3
|
require 'highscore'
|
4
|
+
require 'htmlentities'
|
4
5
|
|
5
6
|
module Ebooks
|
6
7
|
module NLP
|
@@ -42,7 +43,6 @@ module Ebooks
|
|
42
43
|
# Lazily load HTML entity decoder
|
43
44
|
# @return [HTMLEntities]
|
44
45
|
def self.htmlentities
|
45
|
-
require 'htmlentities'
|
46
46
|
@htmlentities ||= HTMLEntities.new
|
47
47
|
end
|
48
48
|
|
@@ -19,6 +19,9 @@ module Ebooks
|
|
19
19
|
@bigrams = {}
|
20
20
|
|
21
21
|
@sentences.each_with_index do |tikis, i|
|
22
|
+
if (i % 10000 == 0) then
|
23
|
+
log ("Building: sentence #{i} of #{sentences.length}")
|
24
|
+
end
|
22
25
|
last_tiki = INTERIM
|
23
26
|
tikis.each_with_index do |tiki, j|
|
24
27
|
@unigrams[last_tiki] ||= []
|
data/twitter_ebooks.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.add_development_dependency 'pry-byebug'
|
23
23
|
gem.add_development_dependency 'yard'
|
24
24
|
|
25
|
-
gem.add_runtime_dependency 'twitter', '~> 5.
|
25
|
+
gem.add_runtime_dependency 'twitter', '~> 5.15'
|
26
26
|
gem.add_runtime_dependency 'rufus-scheduler'
|
27
27
|
gem.add_runtime_dependency 'gingerice'
|
28
28
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '5.
|
103
|
+
version: '5.15'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '5.
|
110
|
+
version: '5.15'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rufus-scheduler
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -278,7 +278,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
278
278
|
version: '0'
|
279
279
|
requirements: []
|
280
280
|
rubyforge_project:
|
281
|
-
rubygems_version: 2.4.
|
281
|
+
rubygems_version: 2.4.8
|
282
282
|
signing_key:
|
283
283
|
specification_version: 4
|
284
284
|
summary: Markov chains for all your friends~
|
@@ -289,3 +289,4 @@ test_files:
|
|
289
289
|
- spec/memprof.rb
|
290
290
|
- spec/model_spec.rb
|
291
291
|
- spec/spec_helper.rb
|
292
|
+
has_rdoc:
|