gh-archive 0.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +78 -39
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91b0e957c5176b791d4f49e382680865405e7a6b2b29b349bcbb78b92d884e02
|
4
|
+
data.tar.gz: f8ddae3d80e80a24931d8632c9798c8f01520e1fe5ac8a85079c0d0e85eadcbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7b24be932f58142b36887671b4265e25631345e7b81cc36b264be4a018fc0c4a88b853ae384dc8472876bf0996e904bf499007adc3091ddb511f28c828090fc
|
7
|
+
data.tar.gz: 5cbb83495b9bb397a41022cb1bf4bce0344c735d16f9f43fb181b4b109948ac7a75bd44c693338f02b1ff17eeeeb5b83a6add9deeeedbd379a8848614041a3f5
|
data/lib/gh-archive.rb
CHANGED
@@ -14,14 +14,19 @@ module GHAUtils
|
|
14
14
|
|
15
15
|
def read_gha_file_content(gz)
|
16
16
|
gzip = Zlib::GzipReader.new(gz)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
return content
|
17
|
+
return gzip.read
|
18
|
+
ensure
|
19
|
+
gzip.close if gzip
|
21
20
|
end
|
22
21
|
|
23
|
-
def read_gha_file(
|
24
|
-
|
22
|
+
def read_gha_file(file)
|
23
|
+
if file.path.end_with?(".json")
|
24
|
+
content = file.read
|
25
|
+
elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
|
26
|
+
content = read_gha_file_content(file)
|
27
|
+
else
|
28
|
+
raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
|
29
|
+
end
|
25
30
|
|
26
31
|
result = []
|
27
32
|
content.lines.each do |line|
|
@@ -31,11 +36,11 @@ module GHAUtils
|
|
31
36
|
return result
|
32
37
|
end
|
33
38
|
|
34
|
-
def
|
35
|
-
|
36
|
-
while
|
37
|
-
yield
|
38
|
-
|
39
|
+
def each_time(from, to)
|
40
|
+
current_time = from
|
41
|
+
while current_time < to
|
42
|
+
yield current_time
|
43
|
+
current_time += 3600
|
39
44
|
end
|
40
45
|
end
|
41
46
|
end
|
@@ -73,13 +78,18 @@ class GHAProvider
|
|
73
78
|
end
|
74
79
|
|
75
80
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
76
|
-
|
81
|
+
exceptions = []
|
82
|
+
|
83
|
+
self.each_time(from, to) do |current_time|
|
77
84
|
events = []
|
78
85
|
begin
|
79
|
-
events = self.get(
|
80
|
-
|
81
|
-
|
82
|
-
|
86
|
+
events = self.get(current_time)
|
87
|
+
rescue GHAException => e
|
88
|
+
@logger.warn(e.message)
|
89
|
+
next
|
90
|
+
rescue => e
|
91
|
+
@logger.error("An exception occurred for #{current_time}: #{e.message}")
|
92
|
+
exceptions << e
|
83
93
|
next
|
84
94
|
end
|
85
95
|
|
@@ -94,12 +104,19 @@ class GHAProvider
|
|
94
104
|
end
|
95
105
|
next if skip
|
96
106
|
|
97
|
-
yield event,
|
107
|
+
yield event, current_time
|
98
108
|
end
|
99
109
|
|
110
|
+
@logger.info("Scanned #{current_time}")
|
111
|
+
|
100
112
|
events.clear
|
101
113
|
GC.start
|
102
114
|
end
|
115
|
+
|
116
|
+
return exceptions
|
117
|
+
end
|
118
|
+
|
119
|
+
class GHAException < Exception
|
103
120
|
end
|
104
121
|
end
|
105
122
|
|
@@ -132,16 +149,21 @@ class OnlineGHAProvider < GHAProvider
|
|
132
149
|
return self.read_gha_file(gz)
|
133
150
|
end
|
134
151
|
end
|
135
|
-
rescue Errno::ECONNRESET
|
152
|
+
rescue Errno::ECONNRESET => e
|
153
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
136
154
|
next
|
137
|
-
rescue
|
138
|
-
|
139
|
-
|
140
|
-
|
155
|
+
rescue OpenURI::HTTPError => e
|
156
|
+
code = e.io.status[0]
|
157
|
+
if code.start_with?("5")
|
158
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
159
|
+
next
|
160
|
+
else
|
161
|
+
raise e
|
162
|
+
end
|
141
163
|
end
|
142
164
|
end
|
143
165
|
|
144
|
-
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
|
166
|
+
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
|
145
167
|
end
|
146
168
|
|
147
169
|
def cache(current_time)
|
@@ -157,12 +179,17 @@ class OnlineGHAProvider < GHAProvider
|
|
157
179
|
@cache.put(filename, content)
|
158
180
|
return
|
159
181
|
end
|
160
|
-
rescue Errno::ECONNRESET
|
182
|
+
rescue Errno::ECONNRESET => e
|
183
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
161
184
|
next
|
162
|
-
rescue
|
163
|
-
|
164
|
-
|
165
|
-
|
185
|
+
rescue OpenURI::HTTPError => e
|
186
|
+
code = e.io.status[0]
|
187
|
+
if code.start_with?("5")
|
188
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
189
|
+
next
|
190
|
+
else
|
191
|
+
raise e
|
192
|
+
end
|
166
193
|
end
|
167
194
|
end
|
168
195
|
end
|
@@ -172,11 +199,11 @@ class OnlineGHAProvider < GHAProvider
|
|
172
199
|
any_ready = Thread.promise
|
173
200
|
|
174
201
|
@logger.info("Proactively scheduling download tasks...")
|
175
|
-
self.
|
176
|
-
@pool.process(
|
177
|
-
cache(
|
202
|
+
self.each_time(from, to) do |current_time|
|
203
|
+
@pool.process(current_time) do |current_time|
|
204
|
+
cache(current_time)
|
178
205
|
any_ready << true
|
179
|
-
@logger.info("Proactively cached #{
|
206
|
+
@logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
|
180
207
|
end
|
181
208
|
end
|
182
209
|
|
@@ -221,7 +248,7 @@ class OnlineGHAProvider < GHAProvider
|
|
221
248
|
end
|
222
249
|
end
|
223
250
|
|
224
|
-
class DownloadArchiveException <
|
251
|
+
class DownloadArchiveException < GHAProvider::GHAException
|
225
252
|
end
|
226
253
|
end
|
227
254
|
|
@@ -234,8 +261,20 @@ class FolderGHAProvider < GHAProvider
|
|
234
261
|
|
235
262
|
def get(current_time)
|
236
263
|
filename = self.get_gha_filename(current_time)
|
237
|
-
File.
|
238
|
-
|
264
|
+
complete_filename = File.join(@folder, filename)
|
265
|
+
mode = "rb"
|
266
|
+
|
267
|
+
unless FileTest.exist?(complete_filename)
|
268
|
+
complete_filename = complete_filename.sub(".gz", "")
|
269
|
+
mode = "r"
|
270
|
+
end
|
271
|
+
|
272
|
+
unless FileTest.exist?(complete_filename)
|
273
|
+
raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
|
274
|
+
end
|
275
|
+
|
276
|
+
File.open(complete_filename, mode) do |file|
|
277
|
+
return self.read_gha_file(file)
|
239
278
|
end
|
240
279
|
end
|
241
280
|
end
|
@@ -264,17 +303,17 @@ class GHADownloader
|
|
264
303
|
|
265
304
|
def download(from = Time.gm(2015, 1, 1), to = Time.now)
|
266
305
|
archive = []
|
267
|
-
self.
|
268
|
-
filename = self.get_gha_filename(
|
306
|
+
self.each_time(from, to) do |current_time|
|
307
|
+
filename = self.get_gha_filename(current_time)
|
269
308
|
out_filename = filename.clone
|
270
309
|
out_filename.gsub!(".json.gz", ".json") if @decompress
|
271
310
|
|
272
311
|
target_file = File.join(@folder, out_filename)
|
273
312
|
if FileTest.exist?(target_file)
|
274
|
-
@logger.info("Skipping existing file for #{
|
313
|
+
@logger.info("Skipping existing file for #{current_time}")
|
275
314
|
next
|
276
315
|
else
|
277
|
-
@logger.info("Downloading file for #{
|
316
|
+
@logger.info("Downloading file for #{current_time}")
|
278
317
|
end
|
279
318
|
|
280
319
|
File.open(target_file, 'w') do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.6'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|