gh-archive 0.7 → 0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40425d8038835d1e22dc684d3eb1570d06524cd2eb2216a5a6b140a33636da23
4
- data.tar.gz: a188da9ef8eb14116ca6abbdfdd933fa01cbe5900ad2e2574108e9ed0223e7af
3
+ metadata.gz: 652cb53430d2e6a230d8f11465b81685f2e92107868345566d16b07d4c4231ed
4
+ data.tar.gz: abaec255c079e5a959fd51f7b1e7ca15d72c21eaaeac98d34d13309ec91eedeb
5
5
  SHA512:
6
- metadata.gz: 19edf6c81e859d32a188e433dcba0f0e7cf76da43c7b65245ea9c97ef6f419add6a30192701841e82a5203856998f2905f0d6ee677027d78e32b68c64190dbe1
7
- data.tar.gz: c8e7aecf5cfb326e2a0e09c4ceed3e148db4b7520252c2bcecfedb008d86726a31ad33fad944de0b8eadeff0f73ec552e5e71d98add6ce692f9a81b326e9631c
6
+ metadata.gz: 829b1d53a7b72f4ea83712e8a95ba059e48dff7c982e811d30d27018e45e1b7010ace2a4a3f2641318a598e765a6d2c6138b319f9d67dd2604617dc2785175be
7
+ data.tar.gz: fd85da1c6630e8b534fb34fc9b5a08fcda55e3d2606ca393157134b6d85da2a9a7f92bc87ae31369441da1799cfd2178f0af9703ca997d10296c3f9a5ab1e4f9
data/lib/gh-archive.rb CHANGED
@@ -7,7 +7,7 @@ require 'tmpdir'
7
7
  require 'thread/pool'
8
8
  require 'thread/promise'
9
9
 
10
- require 'gh-archive/events'
10
+ require_relative File.expand_path('../gh-archive/events', __FILE__)
11
11
 
12
12
  module GHAUtils
13
13
  def get_gha_filename(date)
@@ -56,9 +56,16 @@ class GHAProvider
56
56
  @includes = {}
57
57
  @excludes = {}
58
58
 
59
+ @checkpoint_name = nil
59
60
  @use_json = true
60
61
  end
61
62
 
63
+ def use_checkpoint(filename)
64
+ @checkpoint_name = filename
65
+
66
+ return self
67
+ end
68
+
62
69
  def parse_events
63
70
  @use_json = false
64
71
 
@@ -67,7 +74,10 @@ class GHAProvider
67
74
 
68
75
  def logger=(logger)
69
76
  @logger = logger
77
+
78
+ return self
70
79
  end
80
+ alias :use_logger :logger=
71
81
 
72
82
  def get(date)
73
83
  raise "Not implemented"
@@ -91,11 +101,45 @@ class GHAProvider
91
101
  return self
92
102
  end
93
103
 
104
+ def restore_checkpoint(from)
105
+ if @checkpoint_name && FileTest.exist?(@checkpoint_name)
106
+ # Note that this throws an exception if the file is not readable. This is the intended behavior.
107
+ # As opposed to that, failing to save the checkpoint information just results in a warning on the log.
108
+ loaded_from = Marshal.load(File.read(@checkpoint_name))
109
+ raise "The loaded checkpoint (#{loaded_from}) occurs before the current from date (#{from})" if loaded_from < from
110
+
111
+ @logger.info("Valid checkpoint loaded. Restored execution from #{loaded_from}.")
112
+
113
+ return loaded_from
114
+ else
115
+ return from
116
+ end
117
+ end
118
+
119
+ def update_checkpoint(current_time)
120
+ if @checkpoint_name
121
+ begin
122
+ File.open(@checkpoint_name, "wb") do |f|
123
+ f.write(Marshal.dump(current_time))
124
+ end
125
+ rescue
126
+ @logger.warn(
127
+ "Unable to save the checkpoint at the specified location (#{File.expand_path(@checkpoint_name)})."
128
+ )
129
+ end
130
+ end
131
+ end
132
+
94
133
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
95
134
  exceptions = []
96
135
 
136
+ from = restore_checkpoint(from)
137
+
97
138
  self.each_time(from, to) do |current_time|
98
139
  events = []
140
+
141
+ update_checkpoint(current_time)
142
+
99
143
  begin
100
144
  events = self.get(current_time)
101
145
  rescue GHAException => e
@@ -131,6 +175,8 @@ class GHAProvider
131
175
  GC.start
132
176
  end
133
177
 
178
+ update_checkpoint(to)
179
+
134
180
  return exceptions
135
181
  end
136
182
 
@@ -142,13 +188,25 @@ class OnlineGHAProvider < GHAProvider
142
188
  def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
143
189
  super()
144
190
 
145
- @max_retries = max_retries
146
- @proactive = proactive
147
- @proactive_pool_size = proactive_pool_size
148
- @pool = Thread.pool(proactive_pool_size)
191
+ self.max_retries(max_retries)
192
+ self.proactive(proactive_pool_size) if proactive
193
+
149
194
  @cache = Cache.new
150
195
  end
151
196
 
197
+ def max_retries(n)
198
+ @max_retries = n
199
+
200
+ return self
201
+ end
202
+
203
+ def proactive(pool_size = 10)
204
+ @proactive = true
205
+ @pool = Thread.pool(pool_size)
206
+
207
+ return self
208
+ end
209
+
152
210
  def get(current_time)
153
211
  @max_retries.times do
154
212
  begin
@@ -214,10 +272,11 @@ class OnlineGHAProvider < GHAProvider
214
272
 
215
273
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
216
274
  if @proactive
275
+ real_from = restore_checkpoint(from)
217
276
  any_ready = Thread.promise
218
277
 
219
278
  @logger.info("Proactively scheduling download tasks...")
220
- self.each_time(from, to) do |current_time|
279
+ self.each_time(real_from, to) do |current_time|
221
280
  @pool.process(current_time) do |current_time|
222
281
  cache(current_time)
223
282
  any_ready << true
@@ -1,5 +1,5 @@
1
1
  require 'time'
2
- require 'gh-archive/entities'
2
+ require_relative File.expand_path('../entities', __FILE__)
3
3
 
4
4
  module GHArchive
5
5
  class Event
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.7'
4
+ version: '0.11'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-13 00:00:00.000000000 Z
11
+ date: 2021-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
63
63
  licenses:
64
64
  - GPL-3.0-only
65
65
  metadata: {}
66
- post_install_message:
66
+ post_install_message:
67
67
  rdoc_options: []
68
68
  require_paths:
69
69
  - lib
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
78
  - !ruby/object:Gem::Version
79
79
  version: '0'
80
80
  requirements: []
81
- rubygems_version: 3.2.21
82
- signing_key:
81
+ rubygems_version: 3.2.22
82
+ signing_key:
83
83
  specification_version: 4
84
84
  summary: GitHub Archive mining utility
85
85
  test_files: []