gh-archive 0.7 → 0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +65 -6
- data/lib/gh-archive/events.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652cb53430d2e6a230d8f11465b81685f2e92107868345566d16b07d4c4231ed
|
4
|
+
data.tar.gz: abaec255c079e5a959fd51f7b1e7ca15d72c21eaaeac98d34d13309ec91eedeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 829b1d53a7b72f4ea83712e8a95ba059e48dff7c982e811d30d27018e45e1b7010ace2a4a3f2641318a598e765a6d2c6138b319f9d67dd2604617dc2785175be
|
7
|
+
data.tar.gz: fd85da1c6630e8b534fb34fc9b5a08fcda55e3d2606ca393157134b6d85da2a9a7f92bc87ae31369441da1799cfd2178f0af9703ca997d10296c3f9a5ab1e4f9
|
data/lib/gh-archive.rb
CHANGED
@@ -7,7 +7,7 @@ require 'tmpdir'
|
|
7
7
|
require 'thread/pool'
|
8
8
|
require 'thread/promise'
|
9
9
|
|
10
|
-
|
10
|
+
require_relative File.expand_path('../gh-archive/events', __FILE__)
|
11
11
|
|
12
12
|
module GHAUtils
|
13
13
|
def get_gha_filename(date)
|
@@ -56,9 +56,16 @@ class GHAProvider
|
|
56
56
|
@includes = {}
|
57
57
|
@excludes = {}
|
58
58
|
|
59
|
+
@checkpoint_name = nil
|
59
60
|
@use_json = true
|
60
61
|
end
|
61
62
|
|
63
|
+
def use_checkpoint(filename)
|
64
|
+
@checkpoint_name = filename
|
65
|
+
|
66
|
+
return self
|
67
|
+
end
|
68
|
+
|
62
69
|
def parse_events
|
63
70
|
@use_json = false
|
64
71
|
|
@@ -67,7 +74,10 @@ class GHAProvider
|
|
67
74
|
|
68
75
|
def logger=(logger)
|
69
76
|
@logger = logger
|
77
|
+
|
78
|
+
return self
|
70
79
|
end
|
80
|
+
alias :use_logger :logger=
|
71
81
|
|
72
82
|
def get(date)
|
73
83
|
raise "Not implemented"
|
@@ -91,11 +101,45 @@ class GHAProvider
|
|
91
101
|
return self
|
92
102
|
end
|
93
103
|
|
104
|
+
def restore_checkpoint(from)
|
105
|
+
if @checkpoint_name && FileTest.exist?(@checkpoint_name)
|
106
|
+
# Note that this throws an exception if the file is not readable. This is the intended behavior.
|
107
|
+
# As opposed to that, failing to save the checkpoint information just results in a warning on the log.
|
108
|
+
loaded_from = Marshal.load(File.read(@checkpoint_name))
|
109
|
+
raise "The loaded checkpoint (#{loaded_from}) occurs before the current from date (#{from})" if loaded_from < from
|
110
|
+
|
111
|
+
@logger.info("Valid checkpoint loaded. Restored execution from #{loaded_from}.")
|
112
|
+
|
113
|
+
return loaded_from
|
114
|
+
else
|
115
|
+
return from
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def update_checkpoint(current_time)
|
120
|
+
if @checkpoint_name
|
121
|
+
begin
|
122
|
+
File.open(@checkpoint_name, "wb") do |f|
|
123
|
+
f.write(Marshal.dump(current_time))
|
124
|
+
end
|
125
|
+
rescue
|
126
|
+
@logger.warn(
|
127
|
+
"Unable to save the checkpoint at the specified location (#{File.expand_path(@checkpoint_name)})."
|
128
|
+
)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
94
133
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
95
134
|
exceptions = []
|
96
135
|
|
136
|
+
from = restore_checkpoint(from)
|
137
|
+
|
97
138
|
self.each_time(from, to) do |current_time|
|
98
139
|
events = []
|
140
|
+
|
141
|
+
update_checkpoint(current_time)
|
142
|
+
|
99
143
|
begin
|
100
144
|
events = self.get(current_time)
|
101
145
|
rescue GHAException => e
|
@@ -131,6 +175,8 @@ class GHAProvider
|
|
131
175
|
GC.start
|
132
176
|
end
|
133
177
|
|
178
|
+
update_checkpoint(to)
|
179
|
+
|
134
180
|
return exceptions
|
135
181
|
end
|
136
182
|
|
@@ -142,13 +188,25 @@ class OnlineGHAProvider < GHAProvider
|
|
142
188
|
def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
|
143
189
|
super()
|
144
190
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
@pool = Thread.pool(proactive_pool_size)
|
191
|
+
self.max_retries(max_retries)
|
192
|
+
self.proactive(proactive_pool_size) if proactive
|
193
|
+
|
149
194
|
@cache = Cache.new
|
150
195
|
end
|
151
196
|
|
197
|
+
def max_retries(n)
|
198
|
+
@max_retries = n
|
199
|
+
|
200
|
+
return self
|
201
|
+
end
|
202
|
+
|
203
|
+
def proactive(pool_size = 10)
|
204
|
+
@proactive = true
|
205
|
+
@pool = Thread.pool(pool_size)
|
206
|
+
|
207
|
+
return self
|
208
|
+
end
|
209
|
+
|
152
210
|
def get(current_time)
|
153
211
|
@max_retries.times do
|
154
212
|
begin
|
@@ -214,10 +272,11 @@ class OnlineGHAProvider < GHAProvider
|
|
214
272
|
|
215
273
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
216
274
|
if @proactive
|
275
|
+
real_from = restore_checkpoint(from)
|
217
276
|
any_ready = Thread.promise
|
218
277
|
|
219
278
|
@logger.info("Proactively scheduling download tasks...")
|
220
|
-
self.each_time(
|
279
|
+
self.each_time(real_from, to) do |current_time|
|
221
280
|
@pool.process(current_time) do |current_time|
|
222
281
|
cache(current_time)
|
223
282
|
any_ready << true
|
data/lib/gh-archive/events.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.11'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -63,7 +63,7 @@ homepage: https://github.com/intersimone999/gh-archive
|
|
63
63
|
licenses:
|
64
64
|
- GPL-3.0-only
|
65
65
|
metadata: {}
|
66
|
-
post_install_message:
|
66
|
+
post_install_message:
|
67
67
|
rdoc_options: []
|
68
68
|
require_paths:
|
69
69
|
- lib
|
@@ -78,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
|
-
rubygems_version: 3.2.
|
82
|
-
signing_key:
|
81
|
+
rubygems_version: 3.2.22
|
82
|
+
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: GitHub Archive mining utility
|
85
85
|
test_files: []
|