twitter_ebooks 3.1.2 → 3.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ebooks +10 -3
- data/lib/twitter_ebooks/bot.rb +8 -0
- data/lib/twitter_ebooks/model.rb +3 -4
- data/lib/twitter_ebooks/nlp.rb +3 -3
- data/lib/twitter_ebooks/version.rb +1 -1
- data/{data → skeleton}/stopwords.txt +0 -0
- data/spec/model_spec.rb +1 -1
- data/twitter_ebooks.gemspec +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8c70b5760bf1a3545b08b1899585ce08e0e10c5
|
4
|
+
data.tar.gz: 3d98fe56163d2682c516690ab14564c3d6b573dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8dddfdc46f98ef0184cf8929ae310d9ce1527fad7a22d75301a33d18bc0389542ab05ac802723d47c3455e2141f07c882163c409de7a56d15651fc3e01dd4ddd
|
7
|
+
data.tar.gz: 97e4d64f43cdb32d4dcecb63fc85634c6c114557faa22cb4f6919c3918feeee9979987fb05c23b4660c60f3862d10724db82f8c0d34ac91ca20e30b7ae2a80d3
|
data/bin/ebooks
CHANGED
@@ -93,7 +93,9 @@ STR
|
|
93
93
|
filename = File.basename(path)
|
94
94
|
shortname = filename.split('.')[0..-2].join('.')
|
95
95
|
|
96
|
+
FileUtils.mkdir_p(File.join(APP_PATH, 'model'))
|
96
97
|
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
98
|
+
|
97
99
|
Ebooks::Model.consume(path).save(outpath)
|
98
100
|
log "Corpus consumed to #{outpath}"
|
99
101
|
end
|
@@ -120,7 +122,7 @@ STR
|
|
120
122
|
HELP.append = <<-STR
|
121
123
|
Usage: ebooks append <model_name> <corpus_path>
|
122
124
|
|
123
|
-
Process then append the provided corpus to the model
|
125
|
+
Process then append the provided corpus to the model
|
124
126
|
instead of overwriting.
|
125
127
|
STR
|
126
128
|
|
@@ -133,7 +135,7 @@ STR
|
|
133
135
|
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
|
134
136
|
log "Corpus appended to #{name}.model"
|
135
137
|
end
|
136
|
-
|
138
|
+
|
137
139
|
|
138
140
|
HELP.jsonify = <<-STR
|
139
141
|
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
@@ -209,6 +211,11 @@ STR
|
|
209
211
|
Output defaults to corpus/<username>.json
|
210
212
|
Due to API limitations, this can only receive up to ~3000 tweets
|
211
213
|
into the past.
|
214
|
+
|
215
|
+
The first time you run archive, you will need to enter the auth
|
216
|
+
details of some account to use for accessing the API. This info
|
217
|
+
will then be stored in ~/.ebooksrc for later use, and can be
|
218
|
+
modified there if needed.
|
212
219
|
STR
|
213
220
|
|
214
221
|
def self.archive(username, outpath=nil)
|
@@ -300,7 +307,7 @@ STR
|
|
300
307
|
|
301
308
|
Shows you twitter_ebooks' version number.
|
302
309
|
STR
|
303
|
-
|
310
|
+
|
304
311
|
def self.version
|
305
312
|
require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
|
306
313
|
log Ebooks::VERSION
|
data/lib/twitter_ebooks/bot.rb
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
require 'twitter'
|
3
3
|
require 'rufus/scheduler'
|
4
4
|
|
5
|
+
# Monkeypatch hack to fix upstream dependency issue
|
6
|
+
# https://github.com/sferik/twitter/issues/709
|
7
|
+
class HTTP::URI
|
8
|
+
def port
|
9
|
+
443 if self.https?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
5
13
|
module Ebooks
|
6
14
|
class ConfigurationError < Exception
|
7
15
|
end
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -96,7 +96,7 @@ module Ebooks
|
|
96
96
|
end
|
97
97
|
self
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
|
101
101
|
def initialize
|
102
102
|
@tokens = []
|
@@ -113,7 +113,7 @@ module Ebooks
|
|
113
113
|
return @tikis[token]
|
114
114
|
else
|
115
115
|
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
116
|
-
@tokens << token
|
116
|
+
@tokens << token
|
117
117
|
return @tikis[token] = @tokens.length-1
|
118
118
|
end
|
119
119
|
end
|
@@ -255,8 +255,7 @@ module Ebooks
|
|
255
255
|
|
256
256
|
while (tikis = generator.generate(3, :bigrams)) do
|
257
257
|
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
258
|
-
|
259
|
-
break if valid_tweet?(tikis, limit)
|
258
|
+
break if (tikis.length > 3 || responding) && valid_tweet?(tikis, limit)
|
260
259
|
|
261
260
|
retries += 1
|
262
261
|
break if retries >= retry_limit
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
@@ -14,10 +14,10 @@ module Ebooks
|
|
14
14
|
# to be using it all of the time
|
15
15
|
|
16
16
|
# Lazily loads an array of stopwords
|
17
|
-
# Stopwords are common
|
17
|
+
# Stopwords are common words that should often be ignored
|
18
18
|
# @return [Array<String>]
|
19
19
|
def self.stopwords
|
20
|
-
@stopwords ||= File.
|
20
|
+
@stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : []
|
21
21
|
end
|
22
22
|
|
23
23
|
# Lazily loads an array of known English nouns
|
@@ -99,7 +99,7 @@ module Ebooks
|
|
99
99
|
#set :vowels, 1 # => default: 0 = not considered
|
100
100
|
#set :consonants, 5 # => default: 0 = not considered
|
101
101
|
#set :ignore_case, true # => default: false
|
102
|
-
set :word_pattern, /(?<!@)(?<=\s)[\
|
102
|
+
set :word_pattern, /(?<!@)(?<=\s)[\p{Word}']+/ # => default: /\w+/
|
103
103
|
#set :stemming, true # => default: false
|
104
104
|
end
|
105
105
|
|
File without changes
|
data/spec/model_spec.rb
CHANGED
@@ -36,7 +36,7 @@ describe Ebooks::Model do
|
|
36
36
|
report2 = MemoryUsage.report do
|
37
37
|
model = Ebooks::Model.load(file.path)
|
38
38
|
end
|
39
|
-
expect(report2.total_memsize).to be <
|
39
|
+
expect(report2.total_memsize).to be < 4000000
|
40
40
|
|
41
41
|
expect(model.tokens[0]).to be_a String
|
42
42
|
expect(model.sentences[0][0]).to be_a Fixnum
|
data/twitter_ebooks.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.add_development_dependency 'pry-byebug'
|
23
23
|
gem.add_development_dependency 'yard'
|
24
24
|
|
25
|
-
gem.add_runtime_dependency 'twitter', '
|
25
|
+
gem.add_runtime_dependency 'twitter', '~> 5.15'
|
26
26
|
gem.add_runtime_dependency 'rufus-scheduler'
|
27
27
|
gem.add_runtime_dependency 'gingerice'
|
28
28
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -98,16 +98,16 @@ dependencies:
|
|
98
98
|
name: twitter
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '5.
|
103
|
+
version: '5.15'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '5.
|
110
|
+
version: '5.15'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rufus-scheduler
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -238,7 +238,6 @@ files:
|
|
238
238
|
- bin/ebooks
|
239
239
|
- data/adjectives.txt
|
240
240
|
- data/nouns.txt
|
241
|
-
- data/stopwords.txt
|
242
241
|
- lib/twitter_ebooks.rb
|
243
242
|
- lib/twitter_ebooks/archive.rb
|
244
243
|
- lib/twitter_ebooks/bot.rb
|
@@ -252,6 +251,7 @@ files:
|
|
252
251
|
- skeleton/corpus/.gitignore
|
253
252
|
- skeleton/gitignore
|
254
253
|
- skeleton/model/.gitignore
|
254
|
+
- skeleton/stopwords.txt
|
255
255
|
- spec/bot_spec.rb
|
256
256
|
- spec/data/0xabad1dea.json
|
257
257
|
- spec/data/0xabad1dea.model
|