gh-archive 0.7 → 0.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40425d8038835d1e22dc684d3eb1570d06524cd2eb2216a5a6b140a33636da23
4
- data.tar.gz: a188da9ef8eb14116ca6abbdfdd933fa01cbe5900ad2e2574108e9ed0223e7af
3
+ metadata.gz: 652cb53430d2e6a230d8f11465b81685f2e92107868345566d16b07d4c4231ed
4
+ data.tar.gz: abaec255c079e5a959fd51f7b1e7ca15d72c21eaaeac98d34d13309ec91eedeb
5
5
  SHA512:
6
- metadata.gz: 19edf6c81e859d32a188e433dcba0f0e7cf76da43c7b65245ea9c97ef6f419add6a30192701841e82a5203856998f2905f0d6ee677027d78e32b68c64190dbe1
7
- data.tar.gz: c8e7aecf5cfb326e2a0e09c4ceed3e148db4b7520252c2bcecfedb008d86726a31ad33fad944de0b8eadeff0f73ec552e5e71d98add6ce692f9a81b326e9631c
6
+ metadata.gz: 829b1d53a7b72f4ea83712e8a95ba059e48dff7c982e811d30d27018e45e1b7010ace2a4a3f2641318a598e765a6d2c6138b319f9d67dd2604617dc2785175be
7
+ data.tar.gz: fd85da1c6630e8b534fb34fc9b5a08fcda55e3d2606ca393157134b6d85da2a9a7f92bc87ae31369441da1799cfd2178f0af9703ca997d10296c3f9a5ab1e4f9
data/lib/gh-archive.rb CHANGED
@@ -7,7 +7,7 @@ require 'tmpdir'
7
7
  require 'thread/pool'
8
8
  require 'thread/promise'
9
9
 
10
- require 'gh-archive/events'
10
+ require_relative File.expand_path('../gh-archive/events', __FILE__)
11
11
 
12
12
  module GHAUtils
13
13
  def get_gha_filename(date)
@@ -56,9 +56,16 @@ class GHAProvider
56
56
  @includes = {}
57
57
  @excludes = {}
58
58
 
59
+ @checkpoint_name = nil
59
60
  @use_json = true
60
61
  end
61
62
 
63
+ def use_checkpoint(filename)
64
+ @checkpoint_name = filename
65
+
66
+ return self
67
+ end
68
+
62
69
  def parse_events
63
70
  @use_json = false
64
71
 
@@ -67,7 +74,10 @@ class GHAProvider
67
74
 
68
75
  def logger=(logger)
69
76
  @logger = logger
77
+
78
+ return self
70
79
  end
80
+ alias :use_logger :logger=
71
81
 
72
82
  def get(date)
73
83
  raise "Not implemented"
@@ -91,11 +101,45 @@ class GHAProvider
91
101
  return self
92
102
  end
93
103
 
104
+ def restore_checkpoint(from)
105
+ if @checkpoint_name && FileTest.exist?(@checkpoint_name)
106
+ # Note that this throws an exception if the file is not readable. This is the intended behavior.
107
+ # As opposed to that, failing to save the checkpoint information just results in a warning on the log.
108
+ loaded_from = Marshal.load(File.read(@checkpoint_name))
109
+ raise "The loaded checkpoint (#{loaded_from}) occurs before the current from date (#{from})" if loaded_from < from
110
+
111
+ @logger.info("Valid checkpoint loaded. Restored execution from #{loaded_from}.")
112
+
113
+ return loaded_from
114
+ else
115
+ return from
116
+ end
117
+ end
118
+
119
+ def update_checkpoint(current_time)
120
+ if @checkpoint_name
121
+ begin
122
+ File.open(@checkpoint_name, "wb") do |f|
123
+ f.write(Marshal.dump(current_time))
124
+ end
125
+ rescue
126
+ @logger.warn(
127
+ "Unable to save the checkpoint at the specified location (#{File.expand_path(@checkpoint_name)})."
128
+ )
129
+ end
130
+ end
131
+ end
132
+
94
133
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
95
134
  exceptions = []
96
135
 
136
+ from = restore_checkpoint(from)
137
+
97
138
  self.each_time(from, to) do |current_time|
98
139
  events = []
140
+
141
+ update_checkpoint(current_time)
142
+
99
143
  begin
100
144
  events = self.get(current_time)
101
145
  rescue GHAException => e
@@ -131,6 +175,8 @@ class GHAProvider
131
175
  GC.start
132
176
  end
133
177
 
178
+ update_checkpoint(to)
179
+
134
180
  return exceptions
135
181
  end
136
182
 
@@ -142,13 +188,25 @@ class OnlineGHAProvider < GHAProvider
142
188
  def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
143
189
  super()
144
190
 
145
- @max_retries = max_retries
146
- @proactive = proactive
147
- @proactive_pool_size = proactive_pool_size
148
- @pool = Thread.pool(proactive_pool_size)
191
+ self.max_retries(max_retries)
192
+ self.proactive(proactive_pool_size) if proactive
193
+
149
194
  @cache = Cache.new
150
195
  end
151
196
 
197
+ def max_retries(n)
198
+ @max_retries = n
199
+
200
+ return self
201
+ end
202
+
203
+ def proactive(pool_size = 10)
204
+ @proactive = true
205
+ @pool = Thread.pool(pool_size)
206
+
207
+ return self
208
+ end
209
+
152
210
  def get(current_time)
153
211
  @max_retries.times do
154
212
  begin
@@ -214,10 +272,11 @@ class OnlineGHAProvider < GHAProvider
214
272
 
215
273
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
216
274
  if @proactive
275
+ real_from = restore_checkpoint(from)
217
276
  any_ready = Thread.promise
218
277
 
219
278
  @logger.info("Proactively scheduling download tasks...")
220
- self.each_time(from, to) do |current_time|
279
+ self.each_time(real_from, to) do |current_time|
221
280
  @pool.process(current_time) do |current_time|
222
281
  cache(current_time)
223
282
  any_ready << true
@@ -1,5 +1,5 @@
1
1
  require 'time'
2
- require 'gh-archive/entities'
2
+ require_relative File.expand_path('../entities', __FILE__)
3
3
 
4
4
  module GHArchive
5
5
  class Event
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.7'
4
+ version: '0.11'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-13 00:00:00.000000000 Z
11
+ date: 2021-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
63
63
  licenses:
64
64
  - GPL-3.0-only
65
65
  metadata: {}
66
- post_install_message:
66
+ post_install_message:
67
67
  rdoc_options: []
68
68
  require_paths:
69
69
  - lib
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
78
  - !ruby/object:Gem::Version
79
79
  version: '0'
80
80
  requirements: []
81
- rubygems_version: 3.2.21
82
- signing_key:
81
+ rubygems_version: 3.2.22
82
+ signing_key:
83
83
  specification_version: 4
84
84
  summary: GitHub Archive mining utility
85
85
  test_files: []