twitter_ebooks 3.1.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0bf168a080a0aabf939715c3dcb59655fca83463
4
- data.tar.gz: 88ddb4ead32b0ca21db59b6edb4dcd97cf0d0594
3
+ metadata.gz: eb109277da9591a48da1231fe81690a569a94e16
4
+ data.tar.gz: 84a3a30a0e8bdec98e6998a22897617d6349807a
5
5
  SHA512:
6
- metadata.gz: 13873b6ea1ba04b79e0b5b44e735aa41a97b8aeaee798eee2e3cc43795dabb8bc4149ecc37418d341818d671bee7fd91f10316d19e53c941a8d652fc7dae1cc7
7
- data.tar.gz: 48b0e54e02267df3e5cc12c7a75d2deea62adfeaed9cf1e8274b6e30917cda55041f428ff3cd5bd5cf62eb1f8c84fe0880b1bb002e0c0db650892885ff221978
6
+ metadata.gz: e492aa786f3d29d16fa878cc3bcb615ba7258ee01d5b5f4a1817279bc08bb4d0d485e78696cd9f39ebe2cc6b81aa01187aa69b735c9ea5a9b9fe496f00954b1c
7
+ data.tar.gz: 5208f5d31d895ecc60ab6829f26cea43c80d59b39f53063911593db7914de80cf6d7cd852a4ba65f6bd6435bac09d27de5e5f089d1427344cbe3d04c5d4e5146
data/README.md CHANGED
@@ -135,10 +135,10 @@ The secondary function is the "interesting keywords" list. For example, I use th
135
135
 
136
136
  ``` ruby
137
137
  top100 = model.keywords.take(100)
138
- tokens = Ebooks::NLP.tokenize(tweet[:text])
138
+ tokens = Ebooks::NLP.tokenize(tweet.text)
139
139
 
140
140
  if tokens.find { |t| top100.include?(t) }
141
- bot.favorite(tweet[:id])
141
+ favorite(tweet)
142
142
  end
143
143
  ```
144
144
 
data/bin/ebooks CHANGED
@@ -25,9 +25,11 @@ Usage:
25
25
  ebooks auth
26
26
  ebooks consume <corpus_path> [corpus_path2] [...]
27
27
  ebooks consume-all <model_name> <corpus_path> [corpus_path2] [...]
28
+ ebooks append <model_name> <corpus_path>
28
29
  ebooks gen <model_path> [input]
29
30
  ebooks archive <username> [path]
30
31
  ebooks tweet <model_path> <botname>
32
+ ebooks version
31
33
  STR
32
34
 
33
35
  def self.help(command=nil)
@@ -115,6 +117,24 @@ STR
115
117
  log "Corpuses consumed to #{outpath}"
116
118
  end
117
119
 
120
+ HELP.append = <<-STR
121
+ Usage: ebooks append <model_name> <corpus_path>
122
+
123
+ Process then append the provided corpus to the model
124
+ instead of overwriting.
125
+ STR
126
+
127
+ def self.append(name, path)
128
+ if !name || !path
129
+ help :append
130
+ exit 1
131
+ end
132
+
133
+ Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
134
+ log "Corpus appended to #{name}.model"
135
+ end
136
+
137
+
118
138
  HELP.jsonify = <<-STR
119
139
  Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
120
140
 
@@ -275,6 +295,17 @@ STR
275
295
  require 'pry'; Ebooks.module_exec { pry }
276
296
  end
277
297
 
298
+ HELP.version = <<-STR
299
+ Usage: ebooks version
300
+
301
+ Shows you twitter_ebooks' version number.
302
+ STR
303
+
304
+ def self.version
305
+ require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
306
+ log Ebooks::VERSION
307
+ end
308
+
278
309
  HELP.start = <<-STR
279
310
  Usage: ebooks s[tart] [botname]
280
311
 
@@ -368,6 +399,7 @@ STR
368
399
  when "new" then new(args[1])
369
400
  when "consume" then consume(args[1..-1])
370
401
  when "consume-all" then consume_all(args[1], args[2..-1])
402
+ when "append" then append(args[1],args[2])
371
403
  when "gen" then gen(args[1], args[2..-1].join(' '))
372
404
  when "archive" then archive(args[1], args[2])
373
405
  when "tweet" then tweet(args[1], args[2])
@@ -378,6 +410,7 @@ STR
378
410
  when "start" then start(args[1])
379
411
  when "s" then start(args[1])
380
412
  when "help" then help(args[1])
413
+ when "version" then version
381
414
  else
382
415
  log "No such command '#{args[0]}'"
383
416
  help
@@ -69,6 +69,35 @@ module Ebooks
69
69
  self
70
70
  end
71
71
 
72
+ # Append a generated model to existing model file instead of overwriting it
73
+ # @param path [String]
74
+ def append(path)
75
+ existing = File.file?(path)
76
+ if !existing
77
+ log "No existing model found at #{path}"
78
+ return
79
+ else
80
+ #read-in and deserialize existing model
81
+ props = Marshal.load(File.open(path,'rb') { |old| old.read })
82
+ old_tokens = props[:tokens]
83
+ old_sentences = props[:sentences]
84
+ old_mentions = props[:mentions]
85
+ old_keywords = props[:keywords]
86
+
87
+ #append existing properties to new ones and overwrite with new model
88
+ File.open(path, 'wb') do |f|
89
+ f.write(Marshal.dump({
90
+ tokens: @tokens.concat(old_tokens),
91
+ sentences: @sentences.concat(old_sentences),
92
+ mentions: @mentions.concat(old_mentions),
93
+ keywords: @keywords.concat(old_keywords)
94
+ }))
95
+ end
96
+ end
97
+ self
98
+ end
99
+
100
+
72
101
  def initialize
73
102
  @tokens = []
74
103
 
@@ -80,7 +109,13 @@ module Ebooks
80
109
  # @param token [String]
81
110
  # @return [Integer]
82
111
  def tikify(token)
83
- @tikis[token] or (@tokens << token and @tikis[token] = @tokens.length-1)
112
+ if @tikis.has_key?(token) then
113
+ return @tikis[token]
114
+ else
115
+ (@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
116
+ @tokens << token
117
+ return @tikis[token] = @tokens.length-1
118
+ end
84
119
  end
85
120
 
86
121
  # Convert a body of text into arrays of tikis
@@ -143,8 +178,8 @@ module Ebooks
143
178
  end
144
179
  end
145
180
 
146
- text = statements.join("\n")
147
- mention_text = mentions.join("\n")
181
+ text = statements.join("\n").encode('UTF-8', :invalid => :replace)
182
+ mention_text = mentions.join("\n").encode('UTF-8', :invalid => :replace)
148
183
 
149
184
  lines = nil; statements = nil; mentions = nil # Allow garbage collection
150
185
 
@@ -155,6 +190,7 @@ module Ebooks
155
190
 
156
191
  log "Ranking keywords"
157
192
  @keywords = NLP.keywords(text).top(200).map(&:to_s)
193
+ log "Top keywords: #{@keywords[0]} #{@keywords[1]} #{@keywords[2]}"
158
194
 
159
195
  self
160
196
  end
@@ -218,6 +254,7 @@ module Ebooks
218
254
  tweet = ""
219
255
 
220
256
  while (tikis = generator.generate(3, :bigrams)) do
257
+ log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
221
258
  next if tikis.length <= 3 && !responding
222
259
  break if valid_tweet?(tikis, limit)
223
260
 
@@ -226,6 +263,7 @@ module Ebooks
226
263
  end
227
264
 
228
265
  if verbatim?(tikis) && tikis.length > 3 # We made a verbatim tweet by accident
266
+ log "Attempting to produce unigram tweet try #{retries+1}/#{retry_limit}"
229
267
  while (tikis = generator.generate(3, :unigrams)) do
230
268
  break if valid_tweet?(tikis, limit) && !verbatim?(tikis)
231
269
 
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require 'fast-stemmer'
3
3
  require 'highscore'
4
+ require 'htmlentities'
4
5
 
5
6
  module Ebooks
6
7
  module NLP
@@ -42,7 +43,6 @@ module Ebooks
42
43
  # Lazily load HTML entity decoder
43
44
  # @return [HTMLEntities]
44
45
  def self.htmlentities
45
- require 'htmlentities'
46
46
  @htmlentities ||= HTMLEntities.new
47
47
  end
48
48
 
@@ -19,6 +19,9 @@ module Ebooks
19
19
  @bigrams = {}
20
20
 
21
21
  @sentences.each_with_index do |tikis, i|
22
+ if (i % 10000 == 0) then
23
+ log ("Building: sentence #{i} of #{sentences.length}")
24
+ end
22
25
  last_tiki = INTERIM
23
26
  tikis.each_with_index do |tiki, j|
24
27
  @unigrams[last_tiki] ||= []
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "3.1.0"
2
+ VERSION = "3.1.1"
3
3
  end
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
22
22
  gem.add_development_dependency 'pry-byebug'
23
23
  gem.add_development_dependency 'yard'
24
24
 
25
- gem.add_runtime_dependency 'twitter', '~> 5.0'
25
+ gem.add_runtime_dependency 'twitter', '~> 5.15'
26
26
  gem.add_runtime_dependency 'rufus-scheduler'
27
27
  gem.add_runtime_dependency 'gingerice'
28
28
  gem.add_runtime_dependency 'htmlentities'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaiden Mispy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-24 00:00:00.000000000 Z
11
+ date: 2015-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '5.0'
103
+ version: '5.15'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '5.0'
110
+ version: '5.15'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rufus-scheduler
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -278,7 +278,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
278
278
  version: '0'
279
279
  requirements: []
280
280
  rubyforge_project:
281
- rubygems_version: 2.4.6
281
+ rubygems_version: 2.4.8
282
282
  signing_key:
283
283
  specification_version: 4
284
284
  summary: Markov chains for all your friends~
@@ -289,3 +289,4 @@ test_files:
289
289
  - spec/memprof.rb
290
290
  - spec/model_spec.rb
291
291
  - spec/spec_helper.rb
292
+ has_rdoc: