twitter_ebooks 2.2.6 → 2.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/lib/twitter_ebooks/bot.rb +20 -15
- data/lib/twitter_ebooks/model.rb +26 -25
- data/lib/twitter_ebooks/nlp.rb +1 -1
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +0 -2
- data/skeleton/bots.rb +1 -0
- data/spec/data/0xabad1dea.json +203945 -0
- data/spec/data/0xabad1dea.model +0 -0
- data/spec/memprof.rb +37 -0
- data/spec/model_spec.rb +14 -0
- data/spec/spec_helper.rb +6 -0
- data/twitter_ebooks.gemspec +6 -3
- metadata +49 -11
- data/script/process_anc_data.rb +0 -19
Binary file
|
data/spec/memprof.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'objspace'
|
2
|
+
|
3
|
+
module MemoryUsage
|
4
|
+
MemoryReport = Struct.new(:total_memsize)
|
5
|
+
|
6
|
+
def self.full_gc
|
7
|
+
GC.start(full_mark: true)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.report(&block)
|
11
|
+
rvalue_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
|
12
|
+
|
13
|
+
full_gc
|
14
|
+
GC.disable
|
15
|
+
|
16
|
+
total_memsize = 0
|
17
|
+
|
18
|
+
generation = nil
|
19
|
+
ObjectSpace.trace_object_allocations do
|
20
|
+
generation = GC.count
|
21
|
+
block.call
|
22
|
+
end
|
23
|
+
|
24
|
+
ObjectSpace.each_object do |obj|
|
25
|
+
next unless generation == ObjectSpace.allocation_generation(obj)
|
26
|
+
memsize = ObjectSpace.memsize_of(obj) + rvalue_size
|
27
|
+
# compensate for API bug
|
28
|
+
memsize = rvalue_size if memsize > 100_000_000_000
|
29
|
+
total_memsize += memsize
|
30
|
+
end
|
31
|
+
|
32
|
+
GC.enable
|
33
|
+
full_gc
|
34
|
+
|
35
|
+
return MemoryReport.new(total_memsize)
|
36
|
+
end
|
37
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'memory_profiler'
|
3
|
+
|
4
|
+
def Process.rss; `ps -o rss= -p #{Process.pid}`.chomp.to_i; end
|
5
|
+
|
6
|
+
describe Ebooks::Model do
|
7
|
+
it "does not use a ridiculous amount of memory" do
|
8
|
+
report = MemoryUsage.report do
|
9
|
+
model = Ebooks::Model.consume(path("data/0xabad1dea.json"))
|
10
|
+
end
|
11
|
+
|
12
|
+
expect(report.total_memsize).to be < 1000000000
|
13
|
+
end
|
14
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/twitter_ebooks.gemspec
CHANGED
@@ -15,10 +15,13 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Ebooks::VERSION
|
17
17
|
|
18
|
-
gem.
|
18
|
+
gem.add_development_dependency 'rspec'
|
19
|
+
gem.add_development_dependency 'memory_profiler'
|
20
|
+
gem.add_development_dependency 'pry-byebug'
|
19
21
|
|
20
|
-
|
21
|
-
gem.add_runtime_dependency '
|
22
|
+
|
23
|
+
gem.add_runtime_dependency 'twitter', '~> 5.1'
|
24
|
+
gem.add_runtime_dependency 'tweetstream'
|
22
25
|
gem.add_runtime_dependency 'rufus-scheduler'
|
23
26
|
gem.add_runtime_dependency 'gingerice'
|
24
27
|
gem.add_runtime_dependency 'htmlentities'
|
metadata
CHANGED
@@ -1,23 +1,51 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_ebooks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaiden Mispy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: memory_profiler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry-byebug
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
21
49
|
prerelease: false
|
22
50
|
version_requirements: !ruby/object:Gem::Requirement
|
23
51
|
requirements:
|
@@ -30,28 +58,28 @@ dependencies:
|
|
30
58
|
requirements:
|
31
59
|
- - "~>"
|
32
60
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
61
|
+
version: '5.1'
|
34
62
|
type: :runtime
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '5.1'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: tweetstream
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
44
72
|
requirements:
|
45
|
-
- -
|
73
|
+
- - ">="
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
75
|
+
version: '0'
|
48
76
|
type: :runtime
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
|
-
- -
|
80
|
+
- - ">="
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rufus-scheduler
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -145,6 +173,7 @@ extensions: []
|
|
145
173
|
extra_rdoc_files: []
|
146
174
|
files:
|
147
175
|
- ".gitignore"
|
176
|
+
- ".rspec"
|
148
177
|
- Gemfile
|
149
178
|
- LICENSE
|
150
179
|
- README.md
|
@@ -161,7 +190,6 @@ files:
|
|
161
190
|
- lib/twitter_ebooks/nlp.rb
|
162
191
|
- lib/twitter_ebooks/suffix.rb
|
163
192
|
- lib/twitter_ebooks/version.rb
|
164
|
-
- script/process_anc_data.rb
|
165
193
|
- skeleton/.gitignore
|
166
194
|
- skeleton/Gemfile
|
167
195
|
- skeleton/Procfile
|
@@ -169,6 +197,11 @@ files:
|
|
169
197
|
- skeleton/corpus/.gitignore
|
170
198
|
- skeleton/model/.gitignore
|
171
199
|
- skeleton/run.rb
|
200
|
+
- spec/data/0xabad1dea.json
|
201
|
+
- spec/data/0xabad1dea.model
|
202
|
+
- spec/memprof.rb
|
203
|
+
- spec/model_spec.rb
|
204
|
+
- spec/spec_helper.rb
|
172
205
|
- test/corpus/0xabad1dea.tweets
|
173
206
|
- test/keywords.rb
|
174
207
|
- test/tokenize.rb
|
@@ -197,6 +230,11 @@ signing_key:
|
|
197
230
|
specification_version: 4
|
198
231
|
summary: Markov chains for all your friends~
|
199
232
|
test_files:
|
233
|
+
- spec/data/0xabad1dea.json
|
234
|
+
- spec/data/0xabad1dea.model
|
235
|
+
- spec/memprof.rb
|
236
|
+
- spec/model_spec.rb
|
237
|
+
- spec/spec_helper.rb
|
200
238
|
- test/corpus/0xabad1dea.tweets
|
201
239
|
- test/keywords.rb
|
202
240
|
- test/tokenize.rb
|
data/script/process_anc_data.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# encoding: utf-8
|
3
|
-
|
4
|
-
require 'json'
|
5
|
-
|
6
|
-
freqmap = {}
|
7
|
-
|
8
|
-
data = File.read("data/ANC-all-count.txt")
|
9
|
-
data = data.unpack("C*").pack("U*")
|
10
|
-
|
11
|
-
data.lines.each do |l|
|
12
|
-
vals = l.split("\t")
|
13
|
-
|
14
|
-
freqmap[vals[0]] = vals[-1].to_i
|
15
|
-
end
|
16
|
-
|
17
|
-
File.open("data/wordfreq.json", 'w') do |f|
|
18
|
-
f.write(JSON.dump(freqmap))
|
19
|
-
end
|