gh-archive 0.12 → 0.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gh-archive.rb +17 -3
  3. metadata +6 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e36482fd8eeb76b12db28c1e68ad836d77a5985af6aa0e570a025bcb664b1a0
4
- data.tar.gz: a91e869a8e3f614e8280f03749bcd31ce0de3c7c4e9dc9dd3d777c1a3f1e0d3f
3
+ metadata.gz: 1f339fca5ebd3f7ee085fa257b47567993ab776efd3ba143e424a6bab2ca1712
4
+ data.tar.gz: bd3709e9067fbc5ba0a7b92f156c3fa6f87d26bc81b0a969f7020526f31b5264
5
5
  SHA512:
6
- metadata.gz: 4423afb5e0538be2abbe4ac21aeba3919081f0c61943ad9801add90b85b6f2e9df907836c13e00c6b300b28766c65c5cb2e78bc040393ebd38c9618092f2957d
7
- data.tar.gz: c0caf8f4e47419744f7694748ed608c2e7da8fc0094477ce96bad211cf0b398ef5d43550fdcfd25ba1bc820cafd470ebc4f7526c39c171ef991bfc5fd2399882
6
+ metadata.gz: 9d56ecf4dc4101cf162d02e49f62dc77592bc652b9933b7413cb40f56eb25c595a20eed8fbc42e1f96830048d16520299972094b0d29b14f400befc5c21e1672
7
+ data.tar.gz: 5a7e8c158271b1b540e76e1b68f93ece7cebed8d29057728c6ac93b536f3f3417b89fffddcbe57c80e75269cda9ff719fef84e5bb0558dd52fd8842f80e44f55
data/lib/gh-archive.rb CHANGED
@@ -285,7 +285,12 @@ class OnlineGHAProvider < GHAProvider
285
285
  sleep 1
286
286
  end
287
287
 
288
- return @cache.get(filename)
288
+ data = @cache.get(filename)
289
+ if data
290
+ return data
291
+ else
292
+ raise DownloadArchiveException, "Could not scan #{filename}: data unavailable."
293
+ end
289
294
  else
290
295
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
291
296
  return self.read_gha_file(gz)
@@ -313,9 +318,10 @@ class OnlineGHAProvider < GHAProvider
313
318
  while @cache.full?
314
319
  sleep 1
315
320
  end
321
+
322
+ filename = self.get_gha_filename(current_time)
316
323
  @max_retries.times do
317
324
  begin
318
- filename = self.get_gha_filename(current_time)
319
325
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
320
326
  content = self.read_gha_file(gz)
321
327
  @cache.put(filename, content)
@@ -329,11 +335,17 @@ class OnlineGHAProvider < GHAProvider
329
335
  if code.start_with?("5")
330
336
  @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
331
337
  next
338
+ elsif code == "404"
339
+ @logger.error("File for #{current_time} not found. Skipping because: " + e.message)
332
340
  else
333
341
  raise e
334
342
  end
343
+ rescue Zlib::GzipFile::Error => e
344
+ @logger.warn("Could not unzip, cache and analyze the zip at #{current_time}: " + e.message)
335
345
  end
336
346
  end
347
+
348
+ @cache.put(filename, nil) unless @cache.has?(filename)
337
349
  end
338
350
 
339
351
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
@@ -383,7 +395,9 @@ class OnlineGHAProvider < GHAProvider
383
395
  end
384
396
 
385
397
  def has?(name)
386
- return @cache.has_key?(name)
398
+ @mutex.synchronize do
399
+ return @cache.has_key?(name)
400
+ end
387
401
  end
388
402
 
389
403
  def full?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.12'
4
+ version: '0.16'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-15 00:00:00.000000000 Z
11
+ date: 2021-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
63
63
  licenses:
64
64
  - GPL-3.0-only
65
65
  metadata: {}
66
- post_install_message:
66
+ post_install_message:
67
67
  rdoc_options: []
68
68
  require_paths:
69
69
  - lib
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
78
  - !ruby/object:Gem::Version
79
79
  version: '0'
80
80
  requirements: []
81
- rubygems_version: 3.2.22
82
- signing_key:
81
+ rubygems_version: 3.2.21
82
+ signing_key:
83
83
  specification_version: 4
84
84
  summary: GitHub Archive mining utility
85
85
  test_files: []