gh-archive 0.12 → 0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +17 -3
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f339fca5ebd3f7ee085fa257b47567993ab776efd3ba143e424a6bab2ca1712
|
4
|
+
data.tar.gz: bd3709e9067fbc5ba0a7b92f156c3fa6f87d26bc81b0a969f7020526f31b5264
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d56ecf4dc4101cf162d02e49f62dc77592bc652b9933b7413cb40f56eb25c595a20eed8fbc42e1f96830048d16520299972094b0d29b14f400befc5c21e1672
|
7
|
+
data.tar.gz: 5a7e8c158271b1b540e76e1b68f93ece7cebed8d29057728c6ac93b536f3f3417b89fffddcbe57c80e75269cda9ff719fef84e5bb0558dd52fd8842f80e44f55
|
data/lib/gh-archive.rb
CHANGED
@@ -285,7 +285,12 @@ class OnlineGHAProvider < GHAProvider
|
|
285
285
|
sleep 1
|
286
286
|
end
|
287
287
|
|
288
|
-
|
288
|
+
data = @cache.get(filename)
|
289
|
+
if data
|
290
|
+
return data
|
291
|
+
else
|
292
|
+
raise DownloadArchiveException, "Could not scan #{filename}: data unavailable."
|
293
|
+
end
|
289
294
|
else
|
290
295
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
291
296
|
return self.read_gha_file(gz)
|
@@ -313,9 +318,10 @@ class OnlineGHAProvider < GHAProvider
|
|
313
318
|
while @cache.full?
|
314
319
|
sleep 1
|
315
320
|
end
|
321
|
+
|
322
|
+
filename = self.get_gha_filename(current_time)
|
316
323
|
@max_retries.times do
|
317
324
|
begin
|
318
|
-
filename = self.get_gha_filename(current_time)
|
319
325
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
320
326
|
content = self.read_gha_file(gz)
|
321
327
|
@cache.put(filename, content)
|
@@ -329,11 +335,17 @@ class OnlineGHAProvider < GHAProvider
|
|
329
335
|
if code.start_with?("5")
|
330
336
|
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
331
337
|
next
|
338
|
+
elsif code == "404"
|
339
|
+
@logger.error("File for #{current_time} not found. Skipping because: " + e.message)
|
332
340
|
else
|
333
341
|
raise e
|
334
342
|
end
|
343
|
+
rescue Zlib::GzipFile::Error => e
|
344
|
+
@logger.warn("Could not unzip, cache and analyze the zip at #{current_time}: " + e.message)
|
335
345
|
end
|
336
346
|
end
|
347
|
+
|
348
|
+
@cache.put(filename, nil) unless @cache.has?(filename)
|
337
349
|
end
|
338
350
|
|
339
351
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
@@ -383,7 +395,9 @@ class OnlineGHAProvider < GHAProvider
|
|
383
395
|
end
|
384
396
|
|
385
397
|
def has?(name)
|
386
|
-
|
398
|
+
@mutex.synchronize do
|
399
|
+
return @cache.has_key?(name)
|
400
|
+
end
|
387
401
|
end
|
388
402
|
|
389
403
|
def full?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.16'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-10-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
|
|
63
63
|
licenses:
|
64
64
|
- GPL-3.0-only
|
65
65
|
metadata: {}
|
66
|
-
post_install_message:
|
66
|
+
post_install_message:
|
67
67
|
rdoc_options: []
|
68
68
|
require_paths:
|
69
69
|
- lib
|
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
|
-
rubygems_version: 3.2.
|
82
|
-
signing_key:
|
81
|
+
rubygems_version: 3.2.21
|
82
|
+
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: GitHub Archive mining utility
|
85
85
|
test_files: []
|