twitter_ebooks 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +12 -12
- data/bin/ebooks +21 -6
- data/data/ANC-all-count.txt +297241 -0
- data/data/stopwords.txt +204 -0
- data/data/wordfreq.json +1 -0
- data/lib/twitter_ebooks/bot.rb +25 -7
- data/lib/twitter_ebooks/markov.rb +55 -63
- data/lib/twitter_ebooks/model.rb +57 -74
- data/lib/twitter_ebooks/nlp.rb +90 -55
- data/lib/twitter_ebooks/version.rb +1 -1
- data/script/process_anc_data.rb +19 -0
- data/skeleton/Procfile +1 -1
- data/skeleton/bots.rb +0 -6
- data/skeleton/corpus/README.md +1 -1
- data/skeleton/run.rb +9 -0
- data/test/keywords.rb +18 -0
- data/twitter_ebooks.gemspec +3 -5
- metadata +13 -40
- data/skeleton/model/README.md +0 -1
data/test/keywords.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'twitter_ebooks'
|
5
|
+
require 'minitest/autorun'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
module Ebooks
|
9
|
+
class TestKeywords < Minitest::Test
|
10
|
+
corpus = NLP.normalize(File.read(ARGV[0]))
|
11
|
+
puts "Finding and ranking keywords"
|
12
|
+
puts Benchmark.measure {
|
13
|
+
NLP.keywords(corpus).top(50).each do |keyword|
|
14
|
+
puts "#{keyword.text} #{keyword.weight}"
|
15
|
+
end
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
data/twitter_ebooks.gemspec
CHANGED
@@ -19,12 +19,10 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_runtime_dependency 'twitter'
|
21
21
|
gem.add_runtime_dependency 'tweetstream'
|
22
|
-
gem.add_runtime_dependency 'tactful_tokenizer'
|
23
|
-
gem.add_runtime_dependency 'tokenizer'
|
24
|
-
gem.add_runtime_dependency 'ruby-stemmer'
|
25
|
-
gem.add_runtime_dependency 'engtagger'
|
26
|
-
gem.add_runtime_dependency 'linguistics'
|
27
22
|
gem.add_runtime_dependency 'rufus-scheduler'
|
28
23
|
gem.add_runtime_dependency 'gingerice'
|
29
24
|
gem.add_runtime_dependency 'htmlentities'
|
25
|
+
gem.add_runtime_dependency 'engtagger'
|
26
|
+
gem.add_runtime_dependency 'fast-stemmer'
|
27
|
+
gem.add_runtime_dependency 'highscore'
|
30
28
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: minitest
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
63
|
+
name: rufus-scheduler
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
79
|
+
name: gingerice
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
@@ -92,7 +92,7 @@ dependencies:
|
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
94
|
- !ruby/object:Gem::Dependency
|
95
|
-
name:
|
95
|
+
name: htmlentities
|
96
96
|
requirement: !ruby/object:Gem::Requirement
|
97
97
|
none: false
|
98
98
|
requirements:
|
@@ -124,39 +124,7 @@ dependencies:
|
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0'
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
128
|
-
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
|
-
requirements:
|
131
|
-
- - ! '>='
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
version: '0'
|
134
|
-
type: :runtime
|
135
|
-
prerelease: false
|
136
|
-
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - ! '>='
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: '0'
|
142
|
-
- !ruby/object:Gem::Dependency
|
143
|
-
name: rufus-scheduler
|
144
|
-
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
|
-
requirements:
|
147
|
-
- - ! '>='
|
148
|
-
- !ruby/object:Gem::Version
|
149
|
-
version: '0'
|
150
|
-
type: :runtime
|
151
|
-
prerelease: false
|
152
|
-
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
|
-
requirements:
|
155
|
-
- - ! '>='
|
156
|
-
- !ruby/object:Gem::Version
|
157
|
-
version: '0'
|
158
|
-
- !ruby/object:Gem::Dependency
|
159
|
-
name: gingerice
|
127
|
+
name: fast-stemmer
|
160
128
|
requirement: !ruby/object:Gem::Requirement
|
161
129
|
none: false
|
162
130
|
requirements:
|
@@ -172,7 +140,7 @@ dependencies:
|
|
172
140
|
- !ruby/object:Gem::Version
|
173
141
|
version: '0'
|
174
142
|
- !ruby/object:Gem::Dependency
|
175
|
-
name:
|
143
|
+
name: highscore
|
176
144
|
requirement: !ruby/object:Gem::Requirement
|
177
145
|
none: false
|
178
146
|
requirements:
|
@@ -203,9 +171,11 @@ files:
|
|
203
171
|
- README.md
|
204
172
|
- Rakefile
|
205
173
|
- bin/ebooks
|
174
|
+
- data/ANC-all-count.txt
|
206
175
|
- data/adjectives.txt
|
207
176
|
- data/nouns.txt
|
208
177
|
- data/stopwords.txt
|
178
|
+
- data/wordfreq.json
|
209
179
|
- lib/twitter_ebooks.rb
|
210
180
|
- lib/twitter_ebooks/archiver.rb
|
211
181
|
- lib/twitter_ebooks/bot.rb
|
@@ -213,12 +183,14 @@ files:
|
|
213
183
|
- lib/twitter_ebooks/model.rb
|
214
184
|
- lib/twitter_ebooks/nlp.rb
|
215
185
|
- lib/twitter_ebooks/version.rb
|
186
|
+
- script/process_anc_data.rb
|
216
187
|
- skeleton/.gitignore
|
217
188
|
- skeleton/Procfile
|
218
189
|
- skeleton/bots.rb
|
219
190
|
- skeleton/corpus/README.md
|
220
|
-
- skeleton/
|
191
|
+
- skeleton/run.rb
|
221
192
|
- test/corpus/0xabad1dea.tweets
|
193
|
+
- test/keywords.rb
|
222
194
|
- test/tokenize.rb
|
223
195
|
- twitter_ebooks.gemspec
|
224
196
|
homepage: ''
|
@@ -247,4 +219,5 @@ specification_version: 3
|
|
247
219
|
summary: Markov chains for all your friends~
|
248
220
|
test_files:
|
249
221
|
- test/corpus/0xabad1dea.tweets
|
222
|
+
- test/keywords.rb
|
250
223
|
- test/tokenize.rb
|
data/skeleton/model/README.md
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
This is where the output of `ebooks consume <corpus_path>` goes. You can load these files using Model.load(path), and `ebooks gen <path>` for testing.
|