gh-archive 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +25 -18
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a12aa7954977bd9a8755f560530dd8db96b94e06f486e0a4155838ceb55e7f3
|
4
|
+
data.tar.gz: 8fc967a2e7a9bb0848c6e62a30231180c9a90ee96d72d6524c6b778ce54cbcbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f7c2ac9526de7e352825ed26c2ec3fc5375000e399529f45a07db7fbdaeef361f01a4782eb825f286e55311c3fc81dda7ecbcd509519e78da22147d1998771a
|
7
|
+
data.tar.gz: ab2b5f37f433ccde1d92406bd921c0c7a3a6ccaab41a3c11347a113c8c3db63beacf174ba6368be227302500b83bba59415f2cd85df59f9aff3f7f86f5c8b69c
|
data/lib/gh-archive.rb
CHANGED
@@ -3,6 +3,7 @@ require 'json'
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'zlib'
|
5
5
|
require 'logger'
|
6
|
+
require 'tmpdir'
|
6
7
|
|
7
8
|
module GHAUtils
|
8
9
|
def get_gha_filename(date)
|
@@ -101,11 +102,12 @@ class GHAProvider
|
|
101
102
|
end
|
102
103
|
|
103
104
|
class OnlineGHAProvider < GHAProvider
|
104
|
-
def initialize(max_retries = 3, proactive = false)
|
105
|
+
def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
|
105
106
|
super()
|
106
107
|
|
107
108
|
@max_retries = max_retries
|
108
109
|
@proactive = proactive
|
110
|
+
@proactive_pool_size = proactive_pool_size
|
109
111
|
@cache = Cache.new
|
110
112
|
end
|
111
113
|
|
@@ -115,7 +117,7 @@ class OnlineGHAProvider < GHAProvider
|
|
115
117
|
filename = self.get_gha_filename(current_time)
|
116
118
|
|
117
119
|
if @cache.has?(filename)
|
118
|
-
result =
|
120
|
+
result = @cache.get(filename)
|
119
121
|
else
|
120
122
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
121
123
|
# Save to cache
|
@@ -123,7 +125,7 @@ class OnlineGHAProvider < GHAProvider
|
|
123
125
|
end
|
124
126
|
end
|
125
127
|
rescue
|
126
|
-
@logger.
|
128
|
+
@logger.warn($!)
|
127
129
|
end
|
128
130
|
end
|
129
131
|
|
@@ -136,10 +138,12 @@ class OnlineGHAProvider < GHAProvider
|
|
136
138
|
filename = self.get_gha_filename(current_time)
|
137
139
|
|
138
140
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
139
|
-
|
141
|
+
content = self.read_gha_file(gz)
|
142
|
+
@cache.put(filename, content)
|
140
143
|
return
|
141
144
|
end
|
142
145
|
rescue
|
146
|
+
p $!
|
143
147
|
end
|
144
148
|
end
|
145
149
|
end
|
@@ -148,17 +152,19 @@ class OnlineGHAProvider < GHAProvider
|
|
148
152
|
if @proactive
|
149
153
|
@logger.info("Proactive download thread started")
|
150
154
|
Thread.start do
|
155
|
+
pool = []
|
151
156
|
self.each_date(from, to) do |current_date|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
if @cache.full?
|
156
|
-
@logger.info("Full cache. Waiting...")
|
157
|
+
while pool.size > @proactive_pool_size || @cache.full?
|
158
|
+
pool.delete_if { |t| !t.alive? }
|
159
|
+
sleep 0.1
|
157
160
|
end
|
158
161
|
|
159
|
-
|
160
|
-
|
162
|
+
pool << Thread.start do
|
163
|
+
self.cache(current_date)
|
164
|
+
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
161
165
|
end
|
166
|
+
|
167
|
+
pool.delete_if { |t| !t.alive? }
|
162
168
|
end
|
163
169
|
end
|
164
170
|
end
|
@@ -175,21 +181,22 @@ class OnlineGHAProvider < GHAProvider
|
|
175
181
|
end
|
176
182
|
|
177
183
|
def put(name, content)
|
178
|
-
|
179
|
-
|
180
|
-
|
184
|
+
#filename = "#@folder/#{name}"
|
185
|
+
#File.open(filename, 'w') do |f|
|
186
|
+
#f << content
|
187
|
+
#end
|
181
188
|
|
182
189
|
@mutex.synchronize do
|
183
|
-
@cache[name] =
|
190
|
+
@cache[name] = content
|
184
191
|
end
|
185
192
|
end
|
186
193
|
|
187
194
|
def get(name)
|
188
195
|
@mutex.synchronize do
|
189
|
-
return
|
196
|
+
return @cache.delete(name)
|
190
197
|
end
|
191
198
|
ensure
|
192
|
-
self.unload(name)
|
199
|
+
#self.unload(name)
|
193
200
|
end
|
194
201
|
|
195
202
|
def unload(name)
|
@@ -204,7 +211,7 @@ class OnlineGHAProvider < GHAProvider
|
|
204
211
|
|
205
212
|
def size
|
206
213
|
@mutex.synchronize do
|
207
|
-
@cache.size
|
214
|
+
return @cache.size
|
208
215
|
end
|
209
216
|
end
|
210
217
|
|
metadata
CHANGED
@@ -1,33 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 1.1.2
|
20
|
-
- - "
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 1.1.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- - "
|
27
|
+
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 1.1.2
|
30
|
-
- - "
|
30
|
+
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 1.1.2
|
33
33
|
description: Download and analyze the GitHub events stored at GitHub archive
|
@@ -41,7 +41,7 @@ homepage: https://github.com/intersimone999/gh-archive
|
|
41
41
|
licenses:
|
42
42
|
- GPL-3.0-only
|
43
43
|
metadata: {}
|
44
|
-
post_install_message:
|
44
|
+
post_install_message:
|
45
45
|
rdoc_options: []
|
46
46
|
require_paths:
|
47
47
|
- lib
|
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
requirements: []
|
59
|
-
rubygems_version: 3.
|
60
|
-
signing_key:
|
59
|
+
rubygems_version: 3.2.21
|
60
|
+
signing_key:
|
61
61
|
specification_version: 4
|
62
62
|
summary: GitHub Archive mining utility
|
63
63
|
test_files: []
|