gh-archive 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +36 -39
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 011777addb798b172d58ffaac2b509ecf85288ee90cd28726c6303d14d39db1b
|
4
|
+
data.tar.gz: d8714b155567039e5de81f5ae36473c291f0af86701afaebf4527ab962dca240
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db6a72c3e6e31490c0a3b574ee0edf8f8995434f7ba32b6eb93c4ff35b3a8b0bd3e35c85ea207cb000b399fa71b524578067dc923752d5951f156b0f0d21df23
|
7
|
+
data.tar.gz: a0bac6036c2147e0bd933209f458cb272f6a20d669f2616fc4aa2e6b0a257354e704bcf48f3d658241d81d10d09a94ae07c899223862177fbe5315b9719b4874
|
data/lib/gh-archive.rb
CHANGED
@@ -4,6 +4,8 @@ require 'open-uri'
|
|
4
4
|
require 'zlib'
|
5
5
|
require 'logger'
|
6
6
|
require 'tmpdir'
|
7
|
+
require 'thread/pool'
|
8
|
+
require 'thread/promise'
|
7
9
|
|
8
10
|
module GHAUtils
|
9
11
|
def get_gha_filename(date)
|
@@ -108,6 +110,7 @@ class OnlineGHAProvider < GHAProvider
|
|
108
110
|
@max_retries = max_retries
|
109
111
|
@proactive = proactive
|
110
112
|
@proactive_pool_size = proactive_pool_size
|
113
|
+
@pool = Thread.pool(proactive_pool_size)
|
111
114
|
@cache = Cache.new
|
112
115
|
end
|
113
116
|
|
@@ -116,14 +119,23 @@ class OnlineGHAProvider < GHAProvider
|
|
116
119
|
begin
|
117
120
|
filename = self.get_gha_filename(current_time)
|
118
121
|
|
119
|
-
if @
|
120
|
-
|
122
|
+
if @proactive
|
123
|
+
@logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename)
|
124
|
+
|
125
|
+
while !@cache.has?(filename)
|
126
|
+
sleep 1
|
127
|
+
end
|
128
|
+
|
129
|
+
return @cache.get(filename)
|
121
130
|
else
|
122
131
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
123
|
-
# Save to cache
|
124
132
|
return self.read_gha_file(gz)
|
125
133
|
end
|
126
134
|
end
|
135
|
+
rescue Errno::ECONNRESET
|
136
|
+
next
|
137
|
+
rescue Zlib::GzipFile::Error
|
138
|
+
raise $!
|
127
139
|
rescue
|
128
140
|
@logger.warn($!)
|
129
141
|
end
|
@@ -133,59 +145,56 @@ class OnlineGHAProvider < GHAProvider
|
|
133
145
|
end
|
134
146
|
|
135
147
|
def cache(current_time)
|
148
|
+
@logger.info("Full cache. Waiting for some free slot...") if @cache.full?
|
149
|
+
while @cache.full?
|
150
|
+
sleep 1
|
151
|
+
end
|
136
152
|
@max_retries.times do
|
137
153
|
begin
|
138
154
|
filename = self.get_gha_filename(current_time)
|
139
|
-
|
140
155
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
141
156
|
content = self.read_gha_file(gz)
|
142
157
|
@cache.put(filename, content)
|
143
158
|
return
|
144
159
|
end
|
160
|
+
rescue Errno::ECONNRESET
|
161
|
+
next
|
162
|
+
rescue Zlib::GzipFile::Error
|
163
|
+
raise $!
|
145
164
|
rescue
|
146
|
-
|
165
|
+
@logger.warn($!)
|
147
166
|
end
|
148
167
|
end
|
149
168
|
end
|
150
169
|
|
151
170
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
152
171
|
if @proactive
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
pool << Thread.start do
|
163
|
-
self.cache(current_date)
|
164
|
-
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
165
|
-
end
|
166
|
-
|
167
|
-
pool.delete_if { |t| !t.alive? }
|
172
|
+
any_ready = Thread.promise
|
173
|
+
|
174
|
+
@logger.info("Proactively scheduling download tasks...")
|
175
|
+
self.each_date(from, to) do |current_date|
|
176
|
+
@pool.process(current_date) do |current_date|
|
177
|
+
cache(current_date)
|
178
|
+
any_ready << true
|
179
|
+
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
168
180
|
end
|
169
181
|
end
|
182
|
+
|
183
|
+
~any_ready
|
184
|
+
@logger.info("Download tasks successfully scheduled!")
|
170
185
|
end
|
171
186
|
|
172
187
|
super
|
173
188
|
end
|
174
189
|
|
175
190
|
class Cache
|
176
|
-
def initialize(
|
191
|
+
def initialize(max_size = 10)
|
177
192
|
@cache = {}
|
178
193
|
@max_size = max_size
|
179
|
-
@folder = folder
|
180
194
|
@mutex = Mutex.new
|
181
195
|
end
|
182
196
|
|
183
197
|
def put(name, content)
|
184
|
-
#filename = "#@folder/#{name}"
|
185
|
-
#File.open(filename, 'w') do |f|
|
186
|
-
#f << content
|
187
|
-
#end
|
188
|
-
|
189
198
|
@mutex.synchronize do
|
190
199
|
@cache[name] = content
|
191
200
|
end
|
@@ -195,18 +204,6 @@ class OnlineGHAProvider < GHAProvider
|
|
195
204
|
@mutex.synchronize do
|
196
205
|
return @cache.delete(name)
|
197
206
|
end
|
198
|
-
ensure
|
199
|
-
#self.unload(name)
|
200
|
-
end
|
201
|
-
|
202
|
-
def unload(name)
|
203
|
-
File.unlink(@cache[name])
|
204
|
-
|
205
|
-
@mutex.synchronize do
|
206
|
-
@cache.delete(name)
|
207
|
-
end
|
208
|
-
|
209
|
-
return true
|
210
207
|
end
|
211
208
|
|
212
209
|
def size
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -30,6 +30,26 @@ dependencies:
|
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 1.1.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: thread
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.2
|
33
53
|
description: Download and analyze the GitHub events stored at GitHub archive
|
34
54
|
email: s.scalabrino9@gmail.com
|
35
55
|
executables: []
|