twitter_ebooks 3.1.2 → 3.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ebooks +10 -3
- data/lib/twitter_ebooks/bot.rb +8 -0
- data/lib/twitter_ebooks/model.rb +3 -4
- data/lib/twitter_ebooks/nlp.rb +3 -3
- data/lib/twitter_ebooks/version.rb +1 -1
- data/{data → skeleton}/stopwords.txt +0 -0
- data/spec/model_spec.rb +1 -1
- data/twitter_ebooks.gemspec +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8c70b5760bf1a3545b08b1899585ce08e0e10c5
|
4
|
+
data.tar.gz: 3d98fe56163d2682c516690ab14564c3d6b573dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8dddfdc46f98ef0184cf8929ae310d9ce1527fad7a22d75301a33d18bc0389542ab05ac802723d47c3455e2141f07c882163c409de7a56d15651fc3e01dd4ddd
|
7
|
+
data.tar.gz: 97e4d64f43cdb32d4dcecb63fc85634c6c114557faa22cb4f6919c3918feeee9979987fb05c23b4660c60f3862d10724db82f8c0d34ac91ca20e30b7ae2a80d3
|
data/bin/ebooks
CHANGED
@@ -93,7 +93,9 @@ STR
|
|
93
93
|
filename = File.basename(path)
|
94
94
|
shortname = filename.split('.')[0..-2].join('.')
|
95
95
|
|
96
|
+
FileUtils.mkdir_p(File.join(APP_PATH, 'model'))
|
96
97
|
outpath = File.join(APP_PATH, 'model', "#{shortname}.model")
|
98
|
+
|
97
99
|
Ebooks::Model.consume(path).save(outpath)
|
98
100
|
log "Corpus consumed to #{outpath}"
|
99
101
|
end
|
@@ -120,7 +122,7 @@ STR
|
|
120
122
|
HELP.append = <<-STR
|
121
123
|
Usage: ebooks append <model_name> <corpus_path>
|
122
124
|
|
123
|
-
Process then append the provided corpus to the model
|
125
|
+
Process then append the provided corpus to the model
|
124
126
|
instead of overwriting.
|
125
127
|
STR
|
126
128
|
|
@@ -133,7 +135,7 @@ STR
|
|
133
135
|
Ebooks::Model.consume(path).append(File.join(APP_PATH,'model',"#{name}.model"))
|
134
136
|
log "Corpus appended to #{name}.model"
|
135
137
|
end
|
136
|
-
|
138
|
+
|
137
139
|
|
138
140
|
HELP.jsonify = <<-STR
|
139
141
|
Usage: ebooks jsonify <tweets.csv> [tweets.csv2] [...]
|
@@ -209,6 +211,11 @@ STR
|
|
209
211
|
Output defaults to corpus/<username>.json
|
210
212
|
Due to API limitations, this can only receive up to ~3000 tweets
|
211
213
|
into the past.
|
214
|
+
|
215
|
+
The first time you run archive, you will need to enter the auth
|
216
|
+
details of some account to use for accessing the API. This info
|
217
|
+
will then be stored in ~/.ebooksrc for later use, and can be
|
218
|
+
modified there if needed.
|
212
219
|
STR
|
213
220
|
|
214
221
|
def self.archive(username, outpath=nil)
|
@@ -300,7 +307,7 @@ STR
|
|
300
307
|
|
301
308
|
Shows you twitter_ebooks' version number.
|
302
309
|
STR
|
303
|
-
|
310
|
+
|
304
311
|
def self.version
|
305
312
|
require File.expand_path('../../lib/twitter_ebooks/version', __FILE__)
|
306
313
|
log Ebooks::VERSION
|
data/lib/twitter_ebooks/bot.rb
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
require 'twitter'
|
3
3
|
require 'rufus/scheduler'
|
4
4
|
|
5
|
+
# Monkeypatch hack to fix upstream dependency issue
|
6
|
+
# https://github.com/sferik/twitter/issues/709
|
7
|
+
class HTTP::URI
|
8
|
+
def port
|
9
|
+
443 if self.https?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
5
13
|
module Ebooks
|
6
14
|
class ConfigurationError < Exception
|
7
15
|
end
|
data/lib/twitter_ebooks/model.rb
CHANGED
@@ -96,7 +96,7 @@ module Ebooks
|
|
96
96
|
end
|
97
97
|
self
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
|
101
101
|
def initialize
|
102
102
|
@tokens = []
|
@@ -113,7 +113,7 @@ module Ebooks
|
|
113
113
|
return @tikis[token]
|
114
114
|
else
|
115
115
|
(@tokens.length+1)%1000 == 0 and puts "#{@tokens.length+1} tokens"
|
116
|
-
@tokens << token
|
116
|
+
@tokens << token
|
117
117
|
return @tikis[token] = @tokens.length-1
|
118
118
|
end
|
119
119
|
end
|
@@ -255,8 +255,7 @@ module Ebooks
|
|
255
255
|
|
256
256
|
while (tikis = generator.generate(3, :bigrams)) do
|
257
257
|
log "Attempting to produce tweet try #{retries+1}/#{retry_limit}"
|
258
|
-
|
259
|
-
break if valid_tweet?(tikis, limit)
|
258
|
+
break if (tikis.length > 3 || responding) && valid_tweet?(tikis, limit)
|
260
259
|
|
261
260
|
retries += 1
|
262
261
|
break if retries >= retry_limit
|
data/lib/twitter_ebooks/nlp.rb
CHANGED
@@ -14,10 +14,10 @@ module Ebooks
|
|
14
14
|
# to be using it all of the time
|
15
15
|
|
16
16
|
# Lazily loads an array of stopwords
|
17
|
-
# Stopwords are common
|
17
|
+
# Stopwords are common words that should often be ignored
|
18
18
|
# @return [Array<String>]
|
19
19
|
def self.stopwords
|
20
|
-
@stopwords ||= File.
|
20
|
+
@stopwords ||= File.exists?('stopwords.txt') ? File.read('stopwords.txt').split : []
|
21
21
|
end
|
22
22
|
|
23
23
|
# Lazily loads an array of known English nouns
|
@@ -99,7 +99,7 @@ module Ebooks
|
|
99
99
|
#set :vowels, 1 # => default: 0 = not considered
|
100
100
|
#set :consonants, 5 # => default: 0 = not considered
|
101
101
|
#set :ignore_case, true # => default: false
|
102
|
-
set :word_pattern, /(?<!@)(?<=\s)[\
|
102
|
+
set :word_pattern, /(?<!@)(?<=\s)[\p{Word}']+/ # => default: /\w+/
|
103
103
|
#set :stemming, true # => default: false
|
104
104
|
end
|
105
105
|
|
File without changes
|
data/spec/model_spec.rb
CHANGED
@@ -36,7 +36,7 @@ describe Ebooks::Model do
|
|
36
36
|
report2 = MemoryUsage.report do
|
37
37
|
model = Ebooks::Model.load(file.path)
|
38
38
|
end
|
39
|
-
expect(report2.total_memsize).to be <
|
39
|
+
expect(report2.total_memsize).to be < 4000000
|
40
40
|
|
41
41
|
expect(model.tokens[0]).to be_a String
|
42
42
|
expect(model.sentences[0][0]).to be_a Fixnum
|
data/twitter_ebooks.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.add_development_dependency 'pry-byebug'
|
23
23
|
gem.add_development_dependency 'yard'
|
24
24
|
|
25
|
-
gem.add_runtime_dependency 'twitter', '
|
25
|
+
gem.add_runtime_dependency 'twitter', '~> 5.15'
|
26
26
|
gem.add_runtime_dependency 'rufus-scheduler'
|
27
27
|
gem.add_runtime_dependency 'gingerice'
|
28
28
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -98,16 +98,16 @@ dependencies:
|
|
98
98
|
name: twitter
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '5.
|
103
|
+
version: '5.15'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '5.
|
110
|
+
version: '5.15'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: rufus-scheduler
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -238,7 +238,6 @@ files:
|
|
238
238
|
- bin/ebooks
|
239
239
|
- data/adjectives.txt
|
240
240
|
- data/nouns.txt
|
241
|
-
- data/stopwords.txt
|
242
241
|
- lib/twitter_ebooks.rb
|
243
242
|
- lib/twitter_ebooks/archive.rb
|
244
243
|
- lib/twitter_ebooks/bot.rb
|
@@ -252,6 +251,7 @@ files:
|
|
252
251
|
- skeleton/corpus/.gitignore
|
253
252
|
- skeleton/gitignore
|
254
253
|
- skeleton/model/.gitignore
|
254
|
+
- skeleton/stopwords.txt
|
255
255
|
- spec/bot_spec.rb
|
256
256
|
- spec/data/0xabad1dea.json
|
257
257
|
- spec/data/0xabad1dea.model
|