twitter_ebooks 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0bf168a080a0aabf939715c3dcb59655fca83463
4
- data.tar.gz: 88ddb4ead32b0ca21db59b6edb4dcd97cf0d0594
3
+ metadata.gz: eb109277da9591a48da1231fe81690a569a94e16
4
+ data.tar.gz: 84a3a30a0e8bdec98e6998a22897617d6349807a
5
5
  SHA512:
6
- metadata.gz: 13873b6ea1ba04b79e0b5b44e735aa41a97b8aeaee798eee2e3cc43795dabb8bc4149ecc37418d341818d671bee7fd91f10316d19e53c941a8d652fc7dae1cc7
7
- data.tar.gz: 48b0e54e02267df3e5cc12c7a75d2deea62adfeaed9cf1e8274b6e30917cda55041f428ff3cd5bd5cf62eb1f8c84fe0880b1bb002e0c0db650892885ff221978
6
+ metadata.gz: e492aa786f3d29d16fa878cc3bcb615ba7258ee01d5b5f4a1817279bc08bb4d0d485e78696cd9f39ebe2cc6b81aa01187aa69b735c9ea5a9b9fe496f00954b1c
7
+ data.tar.gz: 5208f5d31d895ecc60ab6829f26cea43c80d59b39f53063911593db7914de80cf6d7cd852a4ba65f6bd6435bac09d27de5e5f089d1427344cbe3d04c5d4e5146
data/README.md CHANGED
@@ -135,10 +135,10 @@ The secondary function is the "interesting keywords" list. For example, I use th
135
135
 
136
136
  ``` ruby
137
137
  top100 = model.keywords.take(100)
138
- tokens = Ebooks::NLP.tokenize(tweet[:text])
138
+ tokens = Ebooks::NLP.tokenize(tweet.text)
139
139
 
140
140
  if tokens.find { |t| top100.include?(t) }
141
- bot.favorite(tweet[:id])
141
+ favorite(tweet)
142
142
  end
143
143
  ```
144
144
 
data/bin/ebooks CHANGED
@@ -25,9 +25,11 @@ Usage:
25
25
  ebooks auth
26
26
  ebooks consume <corpus_path> [corpus_path2] [...]
27
27
  ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
28
+ ebooks append <model_name> <corpus_path>
28
29
  ebooks gen <model_path> [input]
29
30
  ebooks archive <username> [path]
30
31
  ebooks tweet <model_path> <botname>
32
+ ebooks version
31
33
  STR
32
34
 
33
35
  def self.help(command=nil)
@@ -115,6 +117,24 @@ STR
115
117
  log "Corpuses consumed to #{outpath}"
116
118
  end
117
119
 
120
+ HELP.append = <<-STR
121
+ Usage: ebooks append <model_name> <corpus_path>
122
+
123
+ Process then append the provided corpus to the model
124
+ instead of overwriting.
125
+ STR
126
+
127
+ def self.append(name, path)
128
+ if !name || !path
129
+ help :append
130
+ exit 1
131
+ end
132
+
133
+ Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
134
+ log "Corpus appended to #{name}.model"
135
+ end
136
+
137
+
118
138
  HELP.jsonify = <<-STR
119
139
  Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
120
140
 
@@ -275,6 +295,17 @@ STR
275
295
  require 'pry'; Ebooks.module_exec { pry }
276
296
  end
277
297
 
298
+ HELP.version = <<-STR
299
+ Usage: ebooks version
300
+
301
+ Shows you twitter_ebooks' version number.
302
+ STR
303
+
304
+ def self.version
305
+ require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
306
+ log Ebooks::VERSION
307
+ end
308
+
278
309
  HELP.start = <<-STR
279
310
  Usage: ebooks s[tart] [botname]
280
311
 
@@ -368,6 +399,7 @@ STR
368
399
  when "new" then new(args[1])
369
400
  when "consume" then consume(args[1..-1])
370
401
  when "consume-all" then consume_all(args[1], args[2..-1])
402
+ when "append" then append(args[1],args[2])
371
403
  when "gen" then gen(args[1], args[2..-1].join(' '))
372
404
  when "archive" then archive(args[1], args[2])
373
405
  when "tweet" then tweet(args[1], args[2])
@@ -378,6 +410,7 @@ STR
378
410
  when "start" then start(args[1])
379
411
  when "s" then start(args[1])
380
412
  when "help" then help(args[1])
413
+ when "version" then version
381
414
  else
382
415
  log "No such command '#{args[0]}'"
383
416
  help
@@ -69,6 +69,35 @@ module Ebooks
69
69
  self
70
70
  end
71
71
 
72
+ # Append a generated model to existing model file instead of overwriting it
73
+ # @param path [String]
74
+ def append(path)
75
+ existing = File.file?(path)
76
+ if !existing
77
+ log "No existing model found at #{path}"
78
+ return
79
+ else
80
+ #read-in and deserialize existing model
81
+ props = Marshal.load(File.open(path,'rb') { |old| old.read })
82
+ old_tokens = props[:tokens]
83
+ old_sentences = props[:sentences]
84
+ old_mentions = props[:mentions]
85
+ old_keywords = props[:keywords]
86
+
87
+ #append existing properties to new ones and overwrite with new model
88
+ File.open(path, 'wb') do |f|
89
+ f.write(Marshal.dump({
90
+ tokens: @tokens.concat(old_tokens),
91
+ sentences: @sentences.concat(old_sentences),
92
+ mentions: @mentions.concat(old_mentions),
93
+ keywords: @keywords.concat(old_keywords)
94
+ }))
95
+ end
96
+ end
97
+ self
98
+ end
99
+
100
+
72
101
  def initialize
73
102
  @tokens = []
74
103
 
@@ -80,7 +109,13 @@ module Ebooks
80
109
  # @param token [String]
81
110
  # @return [Integer]
82
111
  def tikify(token)
83
- @tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
112
+ if @tikis.has_key?(token) then
113
+ return @tikis[token]
114
+ else
115
+ (@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
116
+ @tokens << token
117
+ return @tikis[token] = @tokens.length-1
118
+ end
84
119
  end
85
120
 
86
121
  # Convert a body of text into arrays of tikis
@@ -143,8 +178,8 @@ module Ebooks
143
178
  end
144
179
  end
145
180
 
146
- text = statements.join("\n")
147
- mention_text = mentions.join("\n")
181
+ text = statements.join("\n").encode('UTF-8', :invalid => :replace)
182
+ mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
148
183
 
149
184
  lines = nil; statements = nil; mentions = nil # Allow garbage collection
150
185
 
@@ -155,6 +190,7 @@ module Ebooks
155
190
 
156
191
  log "Ranking keywords"
157
192
  @keywords = NLP.keywords(text).top(200).map(&:to_s)
193
+ log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
158
194
 
159
195
  self
160
196
  end
@@ -218,6 +254,7 @@ module Ebooks
218
254
  tweet = ""
219
255
 
220
256
  while (tikis = generator.generate(3, :bigrams)) do
257
+ log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
221
258
  next if tikis.length <= 3 && !responding
222
259
  break if valid_tweet?(tikis, limit)
223
260
 
@@ -226,6 +263,7 @@ module Ebooks
226
263
  end
227
264
 
228
265
  if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
266
+ log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
229
267
  while (tikis = generator.generate(3, :unigrams)) do
230
268
  break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
231
269
 
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require 'fast-stemmer'
3
3
  require 'highscore'
4
+ require 'htmlentities'
4
5
 
5
6
  module Ebooks
6
7
  module NLP
@@ -42,7 +43,6 @@ module Ebooks
42
43
  # Lazily load HTML entity decoder
43
44
  # @return [HTMLEntities]
44
45
  def self.htmlentities
45
- require 'htmlentities'
46
46
  @htmlentities ||= HTMLEntities.new
47
47
  end
48
48
 
@@ -19,6 +19,9 @@ module Ebooks
19
19
  @bigrams = {}
20
20
 
21
21
  @sentences.each_with_index do |tikis, i|
22
+ if (i % 10000 == 0) then
23
+ log ("Building: sentence #{i} of #{sentences.length}")
24
+ end
22
25
  last_tiki = INTERIM
23
26
  tikis.each_with_index do |tiki, j|
24
27
  @unigrams[last_tiki] ||= []
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "3.1.0"
2
+ VERSION = "3.1.1"
3
3
  end
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
22
22
  gem.add_development_dependency 'pry-byebug'
23
23
  gem.add_development_dependency 'yard'
24
24
 
25
- gem.add_runtime_dependency 'twitter', '~> 5.0'
25
+ gem.add_runtime_dependency 'twitter', '~> 5.15'
26
26
  gem.add_runtime_dependency 'rufus-scheduler'
27
27
  gem.add_runtime_dependency 'gingerice'
28
28
  gem.add_runtime_dependency 'htmlentities'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaiden Mispy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-24 00:00:00.000000000 Z
11
+ date: 2015-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '5.0'
103
+ version: '5.15'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '5.0'
110
+ version: '5.15'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rufus-scheduler
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -278,7 +278,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
278
278
  version: '0'
279
279
  requirements: []
280
280
  rubyforge_project:
281
- rubygems_version: 2.4.6
281
+ rubygems_version: 2.4.8
282
282
  signing_key:
283
283
  specification_version: 4
284
284
  summary: Markov chains for all your friends~
@@ -289,3 +289,4 @@ test_files:
289
289
  - spec/memprof.rb
290
290
  - spec/model_spec.rb
291
291
  - spec/spec_helper.rb
292
+ has_rdoc: