gh-archive 0.5 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gh-archive.rb +78 -39
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 011777addb798b172d58ffaac2b509ecf85288ee90cd28726c6303d14d39db1b
4
- data.tar.gz: d8714b155567039e5de81f5ae36473c291f0af86701afaebf4527ab962dca240
3
+ metadata.gz: 91b0e957c5176b791d4f49e382680865405e7a6b2b29b349bcbb78b92d884e02
4
+ data.tar.gz: f8ddae3d80e80a24931d8632c9798c8f01520e1fe5ac8a85079c0d0e85eadcbc
5
5
  SHA512:
6
- metadata.gz: db6a72c3e6e31490c0a3b574ee0edf8f8995434f7ba32b6eb93c4ff35b3a8b0bd3e35c85ea207cb000b399fa71b524578067dc923752d5951f156b0f0d21df23
7
- data.tar.gz: a0bac6036c2147e0bd933209f458cb272f6a20d669f2616fc4aa2e6b0a257354e704bcf48f3d658241d81d10d09a94ae07c899223862177fbe5315b9719b4874
6
+ metadata.gz: f7b24be932f58142b36887671b4265e25631345e7b81cc36b264be4a018fc0c4a88b853ae384dc8472876bf0996e904bf499007adc3091ddb511f28c828090fc
7
+ data.tar.gz: 5cbb83495b9bb397a41022cb1bf4bce0344c735d16f9f43fb181b4b109948ac7a75bd44c693338f02b1ff17eeeeb5b83a6add9deeeedbd379a8848614041a3f5
data/lib/gh-archive.rb CHANGED
@@ -14,14 +14,19 @@ module GHAUtils
14
14
 
15
15
  def read_gha_file_content(gz)
16
16
  gzip = Zlib::GzipReader.new(gz)
17
- content = gzip.read
18
- gzip.close
19
-
20
- return content
17
+ return gzip.read
18
+ ensure
19
+ gzip.close if gzip
21
20
  end
22
21
 
23
- def read_gha_file(gz)
24
- content = read_gha_file_content(gz)
22
+ def read_gha_file(file)
23
+ if file.path.end_with?(".json")
24
+ content = file.read
25
+ elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
26
+ content = read_gha_file_content(file)
27
+ else
28
+ raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
29
+ end
25
30
 
26
31
  result = []
27
32
  content.lines.each do |line|
@@ -31,11 +36,11 @@ module GHAUtils
31
36
  return result
32
37
  end
33
38
 
34
- def each_date(from, to)
35
- current_date = from
36
- while current_date < to
37
- yield current_date
38
- current_date += 3600
39
+ def each_time(from, to)
40
+ current_time = from
41
+ while current_time < to
42
+ yield current_time
43
+ current_time += 3600
39
44
  end
40
45
  end
41
46
  end
@@ -73,13 +78,18 @@ class GHAProvider
73
78
  end
74
79
 
75
80
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
76
- self.each_date(from, to) do |current_date|
81
+ exceptions = []
82
+
83
+ self.each_time(from, to) do |current_time|
77
84
  events = []
78
85
  begin
79
- events = self.get(current_date)
80
- @logger.info("Scanned #{current_date}")
81
- rescue
82
- @logger.error($!)
86
+ events = self.get(current_time)
87
+ rescue GHAException => e
88
+ @logger.warn(e.message)
89
+ next
90
+ rescue => e
91
+ @logger.error("An exception occurred for #{current_time}: #{e.message}")
92
+ exceptions << e
83
93
  next
84
94
  end
85
95
 
@@ -94,12 +104,19 @@ class GHAProvider
94
104
  end
95
105
  next if skip
96
106
 
97
- yield event, current_date
107
+ yield event, current_time
98
108
  end
99
109
 
110
+ @logger.info("Scanned #{current_time}")
111
+
100
112
  events.clear
101
113
  GC.start
102
114
  end
115
+
116
+ return exceptions
117
+ end
118
+
119
+ class GHAException < Exception
103
120
  end
104
121
  end
105
122
 
@@ -132,16 +149,21 @@ class OnlineGHAProvider < GHAProvider
132
149
  return self.read_gha_file(gz)
133
150
  end
134
151
  end
135
- rescue Errno::ECONNRESET
152
+ rescue Errno::ECONNRESET => e
153
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
136
154
  next
137
- rescue Zlib::GzipFile::Error
138
- raise $!
139
- rescue
140
- @logger.warn($!)
155
+ rescue OpenURI::HTTPError => e
156
+ code = e.io.status[0]
157
+ if code.start_with?("5")
158
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
159
+ next
160
+ else
161
+ raise e
162
+ end
141
163
  end
142
164
  end
143
165
 
144
- raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
166
+ raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
145
167
  end
146
168
 
147
169
  def cache(current_time)
@@ -157,12 +179,17 @@ class OnlineGHAProvider < GHAProvider
157
179
  @cache.put(filename, content)
158
180
  return
159
181
  end
160
- rescue Errno::ECONNRESET
182
+ rescue Errno::ECONNRESET => e
183
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
161
184
  next
162
- rescue Zlib::GzipFile::Error
163
- raise $!
164
- rescue
165
- @logger.warn($!)
185
+ rescue OpenURI::HTTPError => e
186
+ code = e.io.status[0]
187
+ if code.start_with?("5")
188
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
189
+ next
190
+ else
191
+ raise e
192
+ end
166
193
  end
167
194
  end
168
195
  end
@@ -172,11 +199,11 @@ class OnlineGHAProvider < GHAProvider
172
199
  any_ready = Thread.promise
173
200
 
174
201
  @logger.info("Proactively scheduling download tasks...")
175
- self.each_date(from, to) do |current_date|
176
- @pool.process(current_date) do |current_date|
177
- cache(current_date)
202
+ self.each_time(from, to) do |current_time|
203
+ @pool.process(current_time) do |current_time|
204
+ cache(current_time)
178
205
  any_ready << true
179
- @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
206
+ @logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
180
207
  end
181
208
  end
182
209
 
@@ -221,7 +248,7 @@ class OnlineGHAProvider < GHAProvider
221
248
  end
222
249
  end
223
250
 
224
- class DownloadArchiveException < Exception
251
+ class DownloadArchiveException < GHAProvider::GHAException
225
252
  end
226
253
  end
227
254
 
@@ -234,8 +261,20 @@ class FolderGHAProvider < GHAProvider
234
261
 
235
262
  def get(current_time)
236
263
  filename = self.get_gha_filename(current_time)
237
- File.open(File.join(@folder, filename), "rb") do |gz|
238
- return self.read_gha_file(gz)
264
+ complete_filename = File.join(@folder, filename)
265
+ mode = "rb"
266
+
267
+ unless FileTest.exist?(complete_filename)
268
+ complete_filename = complete_filename.sub(".gz", "")
269
+ mode = "r"
270
+ end
271
+
272
+ unless FileTest.exist?(complete_filename)
273
+ raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
274
+ end
275
+
276
+ File.open(complete_filename, mode) do |file|
277
+ return self.read_gha_file(file)
239
278
  end
240
279
  end
241
280
  end
@@ -264,17 +303,17 @@ class GHADownloader
264
303
 
265
304
  def download(from = Time.gm(2015, 1, 1), to = Time.now)
266
305
  archive = []
267
- self.each_date(from, to) do |current_date|
268
- filename = self.get_gha_filename(current_date)
306
+ self.each_time(from, to) do |current_time|
307
+ filename = self.get_gha_filename(current_time)
269
308
  out_filename = filename.clone
270
309
  out_filename.gsub!(".json.gz", ".json") if @decompress
271
310
 
272
311
  target_file = File.join(@folder, out_filename)
273
312
  if FileTest.exist?(target_file)
274
- @logger.info("Skipping existing file for #{current_date}")
313
+ @logger.info("Skipping existing file for #{current_time}")
275
314
  next
276
315
  else
277
- @logger.info("Downloading file for #{current_date}")
316
+ @logger.info("Downloading file for #{current_time}")
278
317
  end
279
318
 
280
319
  File.open(target_file, 'w') do |f|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-12 00:00:00.000000000 Z
11
+ date: 2021-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions