gh-archive 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +78 -39
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91b0e957c5176b791d4f49e382680865405e7a6b2b29b349bcbb78b92d884e02
|
4
|
+
data.tar.gz: f8ddae3d80e80a24931d8632c9798c8f01520e1fe5ac8a85079c0d0e85eadcbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7b24be932f58142b36887671b4265e25631345e7b81cc36b264be4a018fc0c4a88b853ae384dc8472876bf0996e904bf499007adc3091ddb511f28c828090fc
|
7
|
+
data.tar.gz: 5cbb83495b9bb397a41022cb1bf4bce0344c735d16f9f43fb181b4b109948ac7a75bd44c693338f02b1ff17eeeeb5b83a6add9deeeedbd379a8848614041a3f5
|
data/lib/gh-archive.rb
CHANGED
@@ -14,14 +14,19 @@ module GHAUtils
|
|
14
14
|
|
15
15
|
def read_gha_file_content(gz)
|
16
16
|
gzip = Zlib::GzipReader.new(gz)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
return content
|
17
|
+
return gzip.read
|
18
|
+
ensure
|
19
|
+
gzip.close if gzip
|
21
20
|
end
|
22
21
|
|
23
|
-
def read_gha_file(
|
24
|
-
|
22
|
+
def read_gha_file(file)
|
23
|
+
if file.path.end_with?(".json")
|
24
|
+
content = file.read
|
25
|
+
elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
|
26
|
+
content = read_gha_file_content(file)
|
27
|
+
else
|
28
|
+
raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
|
29
|
+
end
|
25
30
|
|
26
31
|
result = []
|
27
32
|
content.lines.each do |line|
|
@@ -31,11 +36,11 @@ module GHAUtils
|
|
31
36
|
return result
|
32
37
|
end
|
33
38
|
|
34
|
-
def
|
35
|
-
|
36
|
-
while
|
37
|
-
yield
|
38
|
-
|
39
|
+
def each_time(from, to)
|
40
|
+
current_time = from
|
41
|
+
while current_time < to
|
42
|
+
yield current_time
|
43
|
+
current_time += 3600
|
39
44
|
end
|
40
45
|
end
|
41
46
|
end
|
@@ -73,13 +78,18 @@ class GHAProvider
|
|
73
78
|
end
|
74
79
|
|
75
80
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
76
|
-
|
81
|
+
exceptions = []
|
82
|
+
|
83
|
+
self.each_time(from, to) do |current_time|
|
77
84
|
events = []
|
78
85
|
begin
|
79
|
-
events = self.get(
|
80
|
-
|
81
|
-
|
82
|
-
|
86
|
+
events = self.get(current_time)
|
87
|
+
rescue GHAException => e
|
88
|
+
@logger.warn(e.message)
|
89
|
+
next
|
90
|
+
rescue => e
|
91
|
+
@logger.error("An exception occurred for #{current_time}: #{e.message}")
|
92
|
+
exceptions << e
|
83
93
|
next
|
84
94
|
end
|
85
95
|
|
@@ -94,12 +104,19 @@ class GHAProvider
|
|
94
104
|
end
|
95
105
|
next if skip
|
96
106
|
|
97
|
-
yield event,
|
107
|
+
yield event, current_time
|
98
108
|
end
|
99
109
|
|
110
|
+
@logger.info("Scanned #{current_time}")
|
111
|
+
|
100
112
|
events.clear
|
101
113
|
GC.start
|
102
114
|
end
|
115
|
+
|
116
|
+
return exceptions
|
117
|
+
end
|
118
|
+
|
119
|
+
class GHAException < Exception
|
103
120
|
end
|
104
121
|
end
|
105
122
|
|
@@ -132,16 +149,21 @@ class OnlineGHAProvider < GHAProvider
|
|
132
149
|
return self.read_gha_file(gz)
|
133
150
|
end
|
134
151
|
end
|
135
|
-
rescue Errno::ECONNRESET
|
152
|
+
rescue Errno::ECONNRESET => e
|
153
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
136
154
|
next
|
137
|
-
rescue
|
138
|
-
|
139
|
-
|
140
|
-
|
155
|
+
rescue OpenURI::HTTPError => e
|
156
|
+
code = e.io.status[0]
|
157
|
+
if code.start_with?("5")
|
158
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
159
|
+
next
|
160
|
+
else
|
161
|
+
raise e
|
162
|
+
end
|
141
163
|
end
|
142
164
|
end
|
143
165
|
|
144
|
-
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
|
166
|
+
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
|
145
167
|
end
|
146
168
|
|
147
169
|
def cache(current_time)
|
@@ -157,12 +179,17 @@ class OnlineGHAProvider < GHAProvider
|
|
157
179
|
@cache.put(filename, content)
|
158
180
|
return
|
159
181
|
end
|
160
|
-
rescue Errno::ECONNRESET
|
182
|
+
rescue Errno::ECONNRESET => e
|
183
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
161
184
|
next
|
162
|
-
rescue
|
163
|
-
|
164
|
-
|
165
|
-
|
185
|
+
rescue OpenURI::HTTPError => e
|
186
|
+
code = e.io.status[0]
|
187
|
+
if code.start_with?("5")
|
188
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
189
|
+
next
|
190
|
+
else
|
191
|
+
raise e
|
192
|
+
end
|
166
193
|
end
|
167
194
|
end
|
168
195
|
end
|
@@ -172,11 +199,11 @@ class OnlineGHAProvider < GHAProvider
|
|
172
199
|
any_ready = Thread.promise
|
173
200
|
|
174
201
|
@logger.info("Proactively scheduling download tasks...")
|
175
|
-
self.
|
176
|
-
@pool.process(
|
177
|
-
cache(
|
202
|
+
self.each_time(from, to) do |current_time|
|
203
|
+
@pool.process(current_time) do |current_time|
|
204
|
+
cache(current_time)
|
178
205
|
any_ready << true
|
179
|
-
@logger.info("Proactively cached #{
|
206
|
+
@logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
|
180
207
|
end
|
181
208
|
end
|
182
209
|
|
@@ -221,7 +248,7 @@ class OnlineGHAProvider < GHAProvider
|
|
221
248
|
end
|
222
249
|
end
|
223
250
|
|
224
|
-
class DownloadArchiveException <
|
251
|
+
class DownloadArchiveException < GHAProvider::GHAException
|
225
252
|
end
|
226
253
|
end
|
227
254
|
|
@@ -234,8 +261,20 @@ class FolderGHAProvider < GHAProvider
|
|
234
261
|
|
235
262
|
def get(current_time)
|
236
263
|
filename = self.get_gha_filename(current_time)
|
237
|
-
File.
|
238
|
-
|
264
|
+
complete_filename = File.join(@folder, filename)
|
265
|
+
mode = "rb"
|
266
|
+
|
267
|
+
unless FileTest.exist?(complete_filename)
|
268
|
+
complete_filename = complete_filename.sub(".gz", "")
|
269
|
+
mode = "r"
|
270
|
+
end
|
271
|
+
|
272
|
+
unless FileTest.exist?(complete_filename)
|
273
|
+
raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
|
274
|
+
end
|
275
|
+
|
276
|
+
File.open(complete_filename, mode) do |file|
|
277
|
+
return self.read_gha_file(file)
|
239
278
|
end
|
240
279
|
end
|
241
280
|
end
|
@@ -264,17 +303,17 @@ class GHADownloader
|
|
264
303
|
|
265
304
|
def download(from = Time.gm(2015, 1, 1), to = Time.now)
|
266
305
|
archive = []
|
267
|
-
self.
|
268
|
-
filename = self.get_gha_filename(
|
306
|
+
self.each_time(from, to) do |current_time|
|
307
|
+
filename = self.get_gha_filename(current_time)
|
269
308
|
out_filename = filename.clone
|
270
309
|
out_filename.gsub!(".json.gz", ".json") if @decompress
|
271
310
|
|
272
311
|
target_file = File.join(@folder, out_filename)
|
273
312
|
if FileTest.exist?(target_file)
|
274
|
-
@logger.info("Skipping existing file for #{
|
313
|
+
@logger.info("Skipping existing file for #{current_time}")
|
275
314
|
next
|
276
315
|
else
|
277
|
-
@logger.info("Downloading file for #{
|
316
|
+
@logger.info("Downloading file for #{current_time}")
|
278
317
|
end
|
279
318
|
|
280
319
|
File.open(target_file, 'w') do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.6'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|