gh-archive 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +25 -18
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a12aa7954977bd9a8755f560530dd8db96b94e06f486e0a4155838ceb55e7f3
|
4
|
+
data.tar.gz: 8fc967a2e7a9bb0848c6e62a30231180c9a90ee96d72d6524c6b778ce54cbcbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f7c2ac9526de7e352825ed26c2ec3fc5375000e399529f45a07db7fbdaeef361f01a4782eb825f286e55311c3fc81dda7ecbcd509519e78da22147d1998771a
|
7
|
+
data.tar.gz: ab2b5f37f433ccde1d92406bd921c0c7a3a6ccaab41a3c11347a113c8c3db63beacf174ba6368be227302500b83bba59415f2cd85df59f9aff3f7f86f5c8b69c
|
data/lib/gh-archive.rb
CHANGED
@@ -3,6 +3,7 @@ require 'json'
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'zlib'
|
5
5
|
require 'logger'
|
6
|
+
require 'tmpdir'
|
6
7
|
|
7
8
|
module GHAUtils
|
8
9
|
def get_gha_filename(date)
|
@@ -101,11 +102,12 @@ class GHAProvider
|
|
101
102
|
end
|
102
103
|
|
103
104
|
class OnlineGHAProvider < GHAProvider
|
104
|
-
def initialize(max_retries = 3, proactive = false)
|
105
|
+
def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
|
105
106
|
super()
|
106
107
|
|
107
108
|
@max_retries = max_retries
|
108
109
|
@proactive = proactive
|
110
|
+
@proactive_pool_size = proactive_pool_size
|
109
111
|
@cache = Cache.new
|
110
112
|
end
|
111
113
|
|
@@ -115,7 +117,7 @@ class OnlineGHAProvider < GHAProvider
|
|
115
117
|
filename = self.get_gha_filename(current_time)
|
116
118
|
|
117
119
|
if @cache.has?(filename)
|
118
|
-
result =
|
120
|
+
result = @cache.get(filename)
|
119
121
|
else
|
120
122
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
121
123
|
# Save to cache
|
@@ -123,7 +125,7 @@ class OnlineGHAProvider < GHAProvider
|
|
123
125
|
end
|
124
126
|
end
|
125
127
|
rescue
|
126
|
-
@logger.
|
128
|
+
@logger.warn($!)
|
127
129
|
end
|
128
130
|
end
|
129
131
|
|
@@ -136,10 +138,12 @@ class OnlineGHAProvider < GHAProvider
|
|
136
138
|
filename = self.get_gha_filename(current_time)
|
137
139
|
|
138
140
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
139
|
-
|
141
|
+
content = self.read_gha_file(gz)
|
142
|
+
@cache.put(filename, content)
|
140
143
|
return
|
141
144
|
end
|
142
145
|
rescue
|
146
|
+
p $!
|
143
147
|
end
|
144
148
|
end
|
145
149
|
end
|
@@ -148,17 +152,19 @@ class OnlineGHAProvider < GHAProvider
|
|
148
152
|
if @proactive
|
149
153
|
@logger.info("Proactive download thread started")
|
150
154
|
Thread.start do
|
155
|
+
pool = []
|
151
156
|
self.each_date(from, to) do |current_date|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
if @cache.full?
|
156
|
-
@logger.info("Full cache. Waiting...")
|
157
|
+
while pool.size > @proactive_pool_size || @cache.full?
|
158
|
+
pool.delete_if { |t| !t.alive? }
|
159
|
+
sleep 0.1
|
157
160
|
end
|
158
161
|
|
159
|
-
|
160
|
-
|
162
|
+
pool << Thread.start do
|
163
|
+
self.cache(current_date)
|
164
|
+
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
161
165
|
end
|
166
|
+
|
167
|
+
pool.delete_if { |t| !t.alive? }
|
162
168
|
end
|
163
169
|
end
|
164
170
|
end
|
@@ -175,21 +181,22 @@ class OnlineGHAProvider < GHAProvider
|
|
175
181
|
end
|
176
182
|
|
177
183
|
def put(name, content)
|
178
|
-
|
179
|
-
|
180
|
-
|
184
|
+
#filename = "#@folder/#{name}"
|
185
|
+
#File.open(filename, 'w') do |f|
|
186
|
+
#f << content
|
187
|
+
#end
|
181
188
|
|
182
189
|
@mutex.synchronize do
|
183
|
-
@cache[name] =
|
190
|
+
@cache[name] = content
|
184
191
|
end
|
185
192
|
end
|
186
193
|
|
187
194
|
def get(name)
|
188
195
|
@mutex.synchronize do
|
189
|
-
return
|
196
|
+
return @cache.delete(name)
|
190
197
|
end
|
191
198
|
ensure
|
192
|
-
self.unload(name)
|
199
|
+
#self.unload(name)
|
193
200
|
end
|
194
201
|
|
195
202
|
def unload(name)
|
@@ -204,7 +211,7 @@ class OnlineGHAProvider < GHAProvider
|
|
204
211
|
|
205
212
|
def size
|
206
213
|
@mutex.synchronize do
|
207
|
-
@cache.size
|
214
|
+
return @cache.size
|
208
215
|
end
|
209
216
|
end
|
210
217
|
|
metadata
CHANGED
@@ -1,33 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 1.1.2
|
20
|
-
- - "
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 1.1.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- - "
|
27
|
+
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 1.1.2
|
30
|
-
- - "
|
30
|
+
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 1.1.2
|
33
33
|
description: Download and analyze the GitHub events stored at GitHub archive
|
@@ -41,7 +41,7 @@ homepage: https://github.com/intersimone999/gh-archive
|
|
41
41
|
licenses:
|
42
42
|
- GPL-3.0-only
|
43
43
|
metadata: {}
|
44
|
-
post_install_message:
|
44
|
+
post_install_message:
|
45
45
|
rdoc_options: []
|
46
46
|
require_paths:
|
47
47
|
- lib
|
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
requirements: []
|
59
|
-
rubygems_version: 3.
|
60
|
-
signing_key:
|
59
|
+
rubygems_version: 3.2.21
|
60
|
+
signing_key:
|
61
61
|
specification_version: 4
|
62
62
|
summary: GitHub Archive mining utility
|
63
63
|
test_files: []
|