gh-archive 0.7 → 0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +65 -6
- data/lib/gh-archive/events.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652cb53430d2e6a230d8f11465b81685f2e92107868345566d16b07d4c4231ed
|
4
|
+
data.tar.gz: abaec255c079e5a959fd51f7b1e7ca15d72c21eaaeac98d34d13309ec91eedeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 829b1d53a7b72f4ea83712e8a95ba059e48dff7c982e811d30d27018e45e1b7010ace2a4a3f2641318a598e765a6d2c6138b319f9d67dd2604617dc2785175be
|
7
|
+
data.tar.gz: fd85da1c6630e8b534fb34fc9b5a08fcda55e3d2606ca393157134b6d85da2a9a7f92bc87ae31369441da1799cfd2178f0af9703ca997d10296c3f9a5ab1e4f9
|
data/lib/gh-archive.rb
CHANGED
@@ -7,7 +7,7 @@ require 'tmpdir'
|
|
7
7
|
require 'thread/pool'
|
8
8
|
require 'thread/promise'
|
9
9
|
|
10
|
-
|
10
|
+
require_relative File.expand_path('../gh-archive/events', __FILE__)
|
11
11
|
|
12
12
|
module GHAUtils
|
13
13
|
def get_gha_filename(date)
|
@@ -56,9 +56,16 @@ class GHAProvider
|
|
56
56
|
@includes = {}
|
57
57
|
@excludes = {}
|
58
58
|
|
59
|
+
@checkpoint_name = nil
|
59
60
|
@use_json = true
|
60
61
|
end
|
61
62
|
|
63
|
+
def use_checkpoint(filename)
|
64
|
+
@checkpoint_name = filename
|
65
|
+
|
66
|
+
return self
|
67
|
+
end
|
68
|
+
|
62
69
|
def parse_events
|
63
70
|
@use_json = false
|
64
71
|
|
@@ -67,7 +74,10 @@ class GHAProvider
|
|
67
74
|
|
68
75
|
def logger=(logger)
|
69
76
|
@logger = logger
|
77
|
+
|
78
|
+
return self
|
70
79
|
end
|
80
|
+
alias :use_logger :logger=
|
71
81
|
|
72
82
|
def get(date)
|
73
83
|
raise "Not implemented"
|
@@ -91,11 +101,45 @@ class GHAProvider
|
|
91
101
|
return self
|
92
102
|
end
|
93
103
|
|
104
|
+
def restore_checkpoint(from)
|
105
|
+
if @checkpoint_name && FileTest.exist?(@checkpoint_name)
|
106
|
+
# Note that this throws an exception if the file is not readable. This is the intended behavior.
|
107
|
+
# As opposed to that, failing to save the checkpoint information just results in a warning on the log.
|
108
|
+
loaded_from = Marshal.load(File.read(@checkpoint_name))
|
109
|
+
raise "The loaded checkpoint (#{loaded_from}) occurs before the current from date (#{from})" if loaded_from < from
|
110
|
+
|
111
|
+
@logger.info("Valid checkpoint loaded. Restored execution from #{loaded_from}.")
|
112
|
+
|
113
|
+
return loaded_from
|
114
|
+
else
|
115
|
+
return from
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def update_checkpoint(current_time)
|
120
|
+
if @checkpoint_name
|
121
|
+
begin
|
122
|
+
File.open(@checkpoint_name, "wb") do |f|
|
123
|
+
f.write(Marshal.dump(current_time))
|
124
|
+
end
|
125
|
+
rescue
|
126
|
+
@logger.warn(
|
127
|
+
"Unable to save the checkpoint at the specified location (#{File.expand_path(@checkpoint_name)})."
|
128
|
+
)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
94
133
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
95
134
|
exceptions = []
|
96
135
|
|
136
|
+
from = restore_checkpoint(from)
|
137
|
+
|
97
138
|
self.each_time(from, to) do |current_time|
|
98
139
|
events = []
|
140
|
+
|
141
|
+
update_checkpoint(current_time)
|
142
|
+
|
99
143
|
begin
|
100
144
|
events = self.get(current_time)
|
101
145
|
rescue GHAException => e
|
@@ -131,6 +175,8 @@ class GHAProvider
|
|
131
175
|
GC.start
|
132
176
|
end
|
133
177
|
|
178
|
+
update_checkpoint(to)
|
179
|
+
|
134
180
|
return exceptions
|
135
181
|
end
|
136
182
|
|
@@ -142,13 +188,25 @@ class OnlineGHAProvider < GHAProvider
|
|
142
188
|
def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
|
143
189
|
super()
|
144
190
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
@pool = Thread.pool(proactive_pool_size)
|
191
|
+
self.max_retries(max_retries)
|
192
|
+
self.proactive(proactive_pool_size) if proactive
|
193
|
+
|
149
194
|
@cache = Cache.new
|
150
195
|
end
|
151
196
|
|
197
|
+
def max_retries(n)
|
198
|
+
@max_retries = n
|
199
|
+
|
200
|
+
return self
|
201
|
+
end
|
202
|
+
|
203
|
+
def proactive(pool_size = 10)
|
204
|
+
@proactive = true
|
205
|
+
@pool = Thread.pool(pool_size)
|
206
|
+
|
207
|
+
return self
|
208
|
+
end
|
209
|
+
|
152
210
|
def get(current_time)
|
153
211
|
@max_retries.times do
|
154
212
|
begin
|
@@ -214,10 +272,11 @@ class OnlineGHAProvider < GHAProvider
|
|
214
272
|
|
215
273
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
216
274
|
if @proactive
|
275
|
+
real_from = restore_checkpoint(from)
|
217
276
|
any_ready = Thread.promise
|
218
277
|
|
219
278
|
@logger.info("Proactively scheduling download tasks...")
|
220
|
-
self.each_time(
|
279
|
+
self.each_time(real_from, to) do |current_time|
|
221
280
|
@pool.process(current_time) do |current_time|
|
222
281
|
cache(current_time)
|
223
282
|
any_ready << true
|
data/lib/gh-archive/events.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.11'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
|
|
63
63
|
licenses:
|
64
64
|
- GPL-3.0-only
|
65
65
|
metadata: {}
|
66
|
-
post_install_message:
|
66
|
+
post_install_message:
|
67
67
|
rdoc_options: []
|
68
68
|
require_paths:
|
69
69
|
- lib
|
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
|
-
rubygems_version: 3.2.
|
82
|
-
signing_key:
|
81
|
+
rubygems_version: 3.2.22
|
82
|
+
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: GitHub Archive mining utility
|
85
85
|
test_files: []
|