gh-archive 0.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gh-archive.rb +78 -39
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 011777addb798b172d58ffaac2b509ecf85288ee90cd28726c6303d14d39db1b
4
- data.tar.gz: d8714b155567039e5de81f5ae36473c291f0af86701afaebf4527ab962dca240
3
+ metadata.gz: 91b0e957c5176b791d4f49e382680865405e7a6b2b29b349bcbb78b92d884e02
4
+ data.tar.gz: f8ddae3d80e80a24931d8632c9798c8f01520e1fe5ac8a85079c0d0e85eadcbc
5
5
  SHA512:
6
- metadata.gz: db6a72c3e6e31490c0a3b574ee0edf8f8995434f7ba32b6eb93c4ff35b3a8b0bd3e35c85ea207cb000b399fa71b524578067dc923752d5951f156b0f0d21df23
7
- data.tar.gz: a0bac6036c2147e0bd933209f458cb272f6a20d669f2616fc4aa2e6b0a257354e704bcf48f3d658241d81d10d09a94ae07c899223862177fbe5315b9719b4874
6
+ metadata.gz: f7b24be932f58142b36887671b4265e25631345e7b81cc36b264be4a018fc0c4a88b853ae384dc8472876bf0996e904bf499007adc3091ddb511f28c828090fc
7
+ data.tar.gz: 5cbb83495b9bb397a41022cb1bf4bce0344c735d16f9f43fb181b4b109948ac7a75bd44c693338f02b1ff17eeeeb5b83a6add9deeeedbd379a8848614041a3f5
data/lib/gh-archive.rb CHANGED
@@ -14,14 +14,19 @@ module GHAUtils
14
14
 
15
15
  def read_gha_file_content(gz)
16
16
  gzip = Zlib::GzipReader.new(gz)
17
- content = gzip.read
18
- gzip.close
19
-
20
- return content
17
+ return gzip.read
18
+ ensure
19
+ gzip.close if gzip
21
20
  end
22
21
 
23
- def read_gha_file(gz)
24
- content = read_gha_file_content(gz)
22
+ def read_gha_file(file)
23
+ if file.path.end_with?(".json")
24
+ content = file.read
25
+ elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
26
+ content = read_gha_file_content(file)
27
+ else
28
+ raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
29
+ end
25
30
 
26
31
  result = []
27
32
  content.lines.each do |line|
@@ -31,11 +36,11 @@ module GHAUtils
31
36
  return result
32
37
  end
33
38
 
34
- def each_date(from, to)
35
- current_date = from
36
- while current_date < to
37
- yield current_date
38
- current_date += 3600
39
+ def each_time(from, to)
40
+ current_time = from
41
+ while current_time < to
42
+ yield current_time
43
+ current_time += 3600
39
44
  end
40
45
  end
41
46
  end
@@ -73,13 +78,18 @@ class GHAProvider
73
78
  end
74
79
 
75
80
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
76
- self.each_date(from, to) do |current_date|
81
+ exceptions = []
82
+
83
+ self.each_time(from, to) do |current_time|
77
84
  events = []
78
85
  begin
79
- events = self.get(current_date)
80
- @logger.info("Scanned #{current_date}")
81
- rescue
82
- @logger.error($!)
86
+ events = self.get(current_time)
87
+ rescue GHAException => e
88
+ @logger.warn(e.message)
89
+ next
90
+ rescue => e
91
+ @logger.error("An exception occurred for #{current_time}: #{e.message}")
92
+ exceptions << e
83
93
  next
84
94
  end
85
95
 
@@ -94,12 +104,19 @@ class GHAProvider
94
104
  end
95
105
  next if skip
96
106
 
97
- yield event, current_date
107
+ yield event, current_time
98
108
  end
99
109
 
110
+ @logger.info("Scanned #{current_time}")
111
+
100
112
  events.clear
101
113
  GC.start
102
114
  end
115
+
116
+ return exceptions
117
+ end
118
+
119
+ class GHAException < Exception
103
120
  end
104
121
  end
105
122
 
@@ -132,16 +149,21 @@ class OnlineGHAProvider < GHAProvider
132
149
  return self.read_gha_file(gz)
133
150
  end
134
151
  end
135
- rescue Errno::ECONNRESET
152
+ rescue Errno::ECONNRESET => e
153
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
136
154
  next
137
- rescue Zlib::GzipFile::Error
138
- raise $!
139
- rescue
140
- @logger.warn($!)
155
+ rescue OpenURI::HTTPError => e
156
+ code = e.io.status[0]
157
+ if code.start_with?("5")
158
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
159
+ next
160
+ else
161
+ raise e
162
+ end
141
163
  end
142
164
  end
143
165
 
144
- raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
166
+ raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
145
167
  end
146
168
 
147
169
  def cache(current_time)
@@ -157,12 +179,17 @@ class OnlineGHAProvider < GHAProvider
157
179
  @cache.put(filename, content)
158
180
  return
159
181
  end
160
- rescue Errno::ECONNRESET
182
+ rescue Errno::ECONNRESET => e
183
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
161
184
  next
162
- rescue Zlib::GzipFile::Error
163
- raise $!
164
- rescue
165
- @logger.warn($!)
185
+ rescue OpenURI::HTTPError => e
186
+ code = e.io.status[0]
187
+ if code.start_with?("5")
188
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
189
+ next
190
+ else
191
+ raise e
192
+ end
166
193
  end
167
194
  end
168
195
  end
@@ -172,11 +199,11 @@ class OnlineGHAProvider < GHAProvider
172
199
  any_ready = Thread.promise
173
200
 
174
201
  @logger.info("Proactively scheduling download tasks...")
175
- self.each_date(from, to) do |current_date|
176
- @pool.process(current_date) do |current_date|
177
- cache(current_date)
202
+ self.each_time(from, to) do |current_time|
203
+ @pool.process(current_time) do |current_time|
204
+ cache(current_time)
178
205
  any_ready << true
179
- @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
206
+ @logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
180
207
  end
181
208
  end
182
209
 
@@ -221,7 +248,7 @@ class OnlineGHAProvider < GHAProvider
221
248
  end
222
249
  end
223
250
 
224
- class DownloadArchiveException < Exception
251
+ class DownloadArchiveException < GHAProvider::GHAException
225
252
  end
226
253
  end
227
254
 
@@ -234,8 +261,20 @@ class FolderGHAProvider < GHAProvider
234
261
 
235
262
  def get(current_time)
236
263
  filename = self.get_gha_filename(current_time)
237
- File.open(File.join(@folder, filename), "rb") do |gz|
238
- return self.read_gha_file(gz)
264
+ complete_filename = File.join(@folder, filename)
265
+ mode = "rb"
266
+
267
+ unless FileTest.exist?(complete_filename)
268
+ complete_filename = complete_filename.sub(".gz", "")
269
+ mode = "r"
270
+ end
271
+
272
+ unless FileTest.exist?(complete_filename)
273
+ raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
274
+ end
275
+
276
+ File.open(complete_filename, mode) do |file|
277
+ return self.read_gha_file(file)
239
278
  end
240
279
  end
241
280
  end
@@ -264,17 +303,17 @@ class GHADownloader
264
303
 
265
304
  def download(from = Time.gm(2015, 1, 1), to = Time.now)
266
305
  archive = []
267
- self.each_date(from, to) do |current_date|
268
- filename = self.get_gha_filename(current_date)
306
+ self.each_time(from, to) do |current_time|
307
+ filename = self.get_gha_filename(current_time)
269
308
  out_filename = filename.clone
270
309
  out_filename.gsub!(".json.gz", ".json") if @decompress
271
310
 
272
311
  target_file = File.join(@folder, out_filename)
273
312
  if FileTest.exist?(target_file)
274
- @logger.info("Skipping existing file for #{current_date}")
313
+ @logger.info("Skipping existing file for #{current_time}")
275
314
  next
276
315
  else
277
- @logger.info("Downloading file for #{current_date}")
316
+ @logger.info("Downloading file for #{current_time}")
278
317
  end
279
318
 
280
319
  File.open(target_file, 'w') do |f|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-12 00:00:00.000000000 Z
11
+ date: 2021-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions