twitter_ebooks 2.2.6 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/lib/twitter_ebooks/bot.rb +20 -15
- data/lib/twitter_ebooks/model.rb +26 -25
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +0 -2
- data/skeleton/bots.rb +1 -0
- data/spec/data/0xabad1dea.json +203945 -0
- data/spec/data/0xabad1dea.model +0 -0
- data/spec/memprof.rb +37 -0
- data/spec/model_spec.rb +14 -0
- data/spec/spec_helper.rb +6 -0
- data/twitter_ebooks.gemspec +6 -3
- metadata +49 -11
- data/script/process_anc_data.rb +0 -19
Binary file
|
data/spec/memprof.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'objspace'
|
2
|
+
|
3
|
+
module MemoryUsage
|
4
|
+
MemoryReport = Struct.new(:total_memsize)
|
5
|
+
|
6
|
+
def self.full_gc
|
7
|
+
GC.start(full_mark: true)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.report(&block)
|
11
|
+
rvalue_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
|
12
|
+
|
13
|
+
full_gc
|
14
|
+
GC.disable
|
15
|
+
|
16
|
+
total_memsize = 0
|
17
|
+
|
18
|
+
generation = nil
|
19
|
+
ObjectSpace.trace_object_allocations do
|
20
|
+
generation = GC.count
|
21
|
+
block.call
|
22
|
+
end
|
23
|
+
|
24
|
+
ObjectSpace.each_object do |obj|
|
25
|
+
next unless generation == ObjectSpace.allocation_generation(obj)
|
26
|
+
memsize = ObjectSpace.memsize_of(obj) + rvalue_size
|
27
|
+
# compensate for API bug
|
28
|
+
memsize = rvalue_size if memsize > 100_000_000_000
|
29
|
+
total_memsize += memsize
|
30
|
+
end
|
31
|
+
|
32
|
+
GC.enable
|
33
|
+
full_gc
|
34
|
+
|
35
|
+
return MemoryReport.new(total_memsize)
|
36
|
+
end
|
37
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'memory_profiler'
|
3
|
+
|
4
|
+
def Process.rss; `ps -o rss= -p #{Process.pid}`.chomp.to_i; end
|
5
|
+
|
6
|
+
describe Ebooks::Model do
|
7
|
+
it "does not use a ridiculous amount of memory" do
|
8
|
+
report = MemoryUsage.report do
|
9
|
+
model = Ebooks::Model.consume(path("data/0xabad1dea.json"))
|
10
|
+
end
|
11
|
+
|
12
|
+
expect(report.total_memsize).to be < 1000000000
|
13
|
+
end
|
14
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/twitter_ebooks.gemspec
CHANGED
@@ -15,10 +15,13 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Ebooks::VERSION
|
17
17
|
|
18
|
-
gem.
|
18
|
+
gem.add_development_dependency 'rspec'
|
19
|
+
gem.add_development_dependency 'memory_profiler'
|
20
|
+
gem.add_development_dependency 'pry-byebug'
|
19
21
|
|
20
|
-
|
21
|
-
gem.add_runtime_dependency '
|
22
|
+
|
23
|
+
gem.add_runtime_dependency 'twitter', '~> 5.1'
|
24
|
+
gem.add_runtime_dependency 'tweetstream'
|
22
25
|
gem.add_runtime_dependency 'rufus-scheduler'
|
23
26
|
gem.add_runtime_dependency 'gingerice'
|
24
27
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,23 +1,51 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: memory_profiler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry-byebug
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
21
49
|
prerelease: false
|
22
50
|
version_requirements: !ruby/object:Gem::Requirement
|
23
51
|
requirements:
|
@@ -30,28 +58,28 @@ dependencies:
|
|
30
58
|
requirements:
|
31
59
|
- - "~>"
|
32
60
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
61
|
+
version: '5.1'
|
34
62
|
type: :runtime
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '5.1'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: tweetstream
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
44
72
|
requirements:
|
45
|
-
- -
|
73
|
+
- - ">="
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
75
|
+
version: '0'
|
48
76
|
type: :runtime
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
|
-
- -
|
80
|
+
- - ">="
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rufus-scheduler
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,6 +173,7 @@ extensions: []
|
|
145
173
|
extra_rdoc_files: []
|
146
174
|
files:
|
147
175
|
- ".gitignore"
|
176
|
+
- ".rspec"
|
148
177
|
- Gemfile
|
149
178
|
- LICENSE
|
150
179
|
- README.md
|
@@ -161,7 +190,6 @@ files:
|
|
161
190
|
- lib/twitter_ebooks/nlp.rb
|
162
191
|
- lib/twitter_ebooks/suffix.rb
|
163
192
|
- lib/twitter_ebooks/version.rb
|
164
|
-
- script/process_anc_data.rb
|
165
193
|
- skeleton/.gitignore
|
166
194
|
- skeleton/Gemfile
|
167
195
|
- skeleton/Procfile
|
@@ -169,6 +197,11 @@ files:
|
|
169
197
|
- skeleton/corpus/.gitignore
|
170
198
|
- skeleton/model/.gitignore
|
171
199
|
- skeleton/run.rb
|
200
|
+
- spec/data/0xabad1dea.json
|
201
|
+
- spec/data/0xabad1dea.model
|
202
|
+
- spec/memprof.rb
|
203
|
+
- spec/model_spec.rb
|
204
|
+
- spec/spec_helper.rb
|
172
205
|
- test/corpus/0xabad1dea.tweets
|
173
206
|
- test/keywords.rb
|
174
207
|
- test/tokenize.rb
|
@@ -197,6 +230,11 @@ signing_key:
|
|
197
230
|
specification_version: 4
|
198
231
|
summary: Markov chains for all your friends~
|
199
232
|
test_files:
|
233
|
+
- spec/data/0xabad1dea.json
|
234
|
+
- spec/data/0xabad1dea.model
|
235
|
+
- spec/memprof.rb
|
236
|
+
- spec/model_spec.rb
|
237
|
+
- spec/spec_helper.rb
|
200
238
|
- test/corpus/0xabad1dea.tweets
|
201
239
|
- test/keywords.rb
|
202
240
|
- test/tokenize.rb
|
data/script/process_anc_data.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
|
4
|
-
require 'json'
|
5
|
-
|
6
|
-
freqmap = {}
|
7
|
-
|
8
|
-
data = File.read("data/ANC-all-count.txt")
|
9
|
-
data = data.unpack("C*").pack("U*")
|
10
|
-
|
11
|
-
data.lines.each do |l|
|
12
|
-
vals = l.split("\t")
|
13
|
-
|
14
|
-
freqmap[vals[0]] = vals[-1].to_i
|
15
|
-
end
|
16
|
-
|
17
|
-
File.open("data/wordfreq.json", 'w') do |f|
|
18
|
-
f.write(JSON.dump(freqmap))
|
19
|
-
end
|