gh-archive 0.4 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +121 -67
- data/lib/gh-archive/entities.rb +312 -0
- data/lib/gh-archive/events.rb +405 -0
- metadata +28 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4a9b89fb02620499e8b51f5c5e4ba34d0f3bc8b8f3ae4e2e69cba1e027bdb49
|
4
|
+
data.tar.gz: 6b707eb1bcb37b8a9b03ce36a2d4304e6760def0cc139ffad748a1596046f82f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0975e354e028e768fb5bc4c17c19cddbc44b706394f60d8e22a64537a34342965bb98fa00365c329c30d189729dadba271ec6b101ff86593affef7e1d34848b3'
|
7
|
+
data.tar.gz: 95935fb3c27841760a68696c832e02108940f9eef535f5ed89afea9613a7988c622f2649f3feb77b22706250a30cfba30b5f36f2ada617f532e86a0426e052ce
|
data/lib/gh-archive.rb
CHANGED
@@ -4,6 +4,10 @@ require 'open-uri'
|
|
4
4
|
require 'zlib'
|
5
5
|
require 'logger'
|
6
6
|
require 'tmpdir'
|
7
|
+
require 'thread/pool'
|
8
|
+
require 'thread/promise'
|
9
|
+
|
10
|
+
require_relative File.expand_path('../gh-archive/events', __FILE__)
|
7
11
|
|
8
12
|
module GHAUtils
|
9
13
|
def get_gha_filename(date)
|
@@ -12,14 +16,19 @@ module GHAUtils
|
|
12
16
|
|
13
17
|
def read_gha_file_content(gz)
|
14
18
|
gzip = Zlib::GzipReader.new(gz)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
return content
|
19
|
+
return gzip.read
|
20
|
+
ensure
|
21
|
+
gzip.close if gzip
|
19
22
|
end
|
20
23
|
|
21
|
-
def read_gha_file(
|
22
|
-
|
24
|
+
def read_gha_file(file)
|
25
|
+
if file.path.end_with?(".json")
|
26
|
+
content = file.read
|
27
|
+
elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
|
28
|
+
content = read_gha_file_content(file)
|
29
|
+
else
|
30
|
+
raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
|
31
|
+
end
|
23
32
|
|
24
33
|
result = []
|
25
34
|
content.lines.each do |line|
|
@@ -29,11 +38,11 @@ module GHAUtils
|
|
29
38
|
return result
|
30
39
|
end
|
31
40
|
|
32
|
-
def
|
33
|
-
|
34
|
-
while
|
35
|
-
yield
|
36
|
-
|
41
|
+
def each_time(from, to)
|
42
|
+
current_time = from
|
43
|
+
while current_time < to
|
44
|
+
yield current_time
|
45
|
+
current_time += 3600
|
37
46
|
end
|
38
47
|
end
|
39
48
|
end
|
@@ -46,6 +55,14 @@ class GHAProvider
|
|
46
55
|
|
47
56
|
@includes = {}
|
48
57
|
@excludes = {}
|
58
|
+
|
59
|
+
@use_json = true
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_events
|
63
|
+
@use_json = false
|
64
|
+
|
65
|
+
return self
|
49
66
|
end
|
50
67
|
|
51
68
|
def logger=(logger)
|
@@ -61,6 +78,8 @@ class GHAProvider
|
|
61
78
|
@includes[key.to_s] = [] unless @includes[key.to_s]
|
62
79
|
@includes[key.to_s] << value
|
63
80
|
end
|
81
|
+
|
82
|
+
return self
|
64
83
|
end
|
65
84
|
|
66
85
|
def exclude(**args)
|
@@ -68,16 +87,23 @@ class GHAProvider
|
|
68
87
|
@excludes[key.to_s] = [] unless @excludes[key.to_s]
|
69
88
|
@excludes[key.to_s] << value
|
70
89
|
end
|
90
|
+
|
91
|
+
return self
|
71
92
|
end
|
72
93
|
|
73
94
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
74
|
-
|
95
|
+
exceptions = []
|
96
|
+
|
97
|
+
self.each_time(from, to) do |current_time|
|
75
98
|
events = []
|
76
99
|
begin
|
77
|
-
events = self.get(
|
78
|
-
|
79
|
-
|
80
|
-
|
100
|
+
events = self.get(current_time)
|
101
|
+
rescue GHAException => e
|
102
|
+
@logger.warn(e.message)
|
103
|
+
next
|
104
|
+
rescue => e
|
105
|
+
@logger.error("An exception occurred for #{current_time}: #{e.message}")
|
106
|
+
exceptions << e
|
81
107
|
next
|
82
108
|
end
|
83
109
|
|
@@ -92,12 +118,23 @@ class GHAProvider
|
|
92
118
|
end
|
93
119
|
next if skip
|
94
120
|
|
95
|
-
|
121
|
+
if @use_json
|
122
|
+
yield event, current_time
|
123
|
+
else
|
124
|
+
yield GHArchive::Event.parse(event), current_time
|
125
|
+
end
|
96
126
|
end
|
97
127
|
|
128
|
+
@logger.info("Scanned #{current_time}")
|
129
|
+
|
98
130
|
events.clear
|
99
131
|
GC.start
|
100
132
|
end
|
133
|
+
|
134
|
+
return exceptions
|
135
|
+
end
|
136
|
+
|
137
|
+
class GHAException < Exception
|
101
138
|
end
|
102
139
|
end
|
103
140
|
|
@@ -108,6 +145,7 @@ class OnlineGHAProvider < GHAProvider
|
|
108
145
|
@max_retries = max_retries
|
109
146
|
@proactive = proactive
|
110
147
|
@proactive_pool_size = proactive_pool_size
|
148
|
+
@pool = Thread.pool(proactive_pool_size)
|
111
149
|
@cache = Cache.new
|
112
150
|
end
|
113
151
|
|
@@ -116,76 +154,92 @@ class OnlineGHAProvider < GHAProvider
|
|
116
154
|
begin
|
117
155
|
filename = self.get_gha_filename(current_time)
|
118
156
|
|
119
|
-
if @
|
120
|
-
|
157
|
+
if @proactive
|
158
|
+
@logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename)
|
159
|
+
|
160
|
+
while !@cache.has?(filename)
|
161
|
+
sleep 1
|
162
|
+
end
|
163
|
+
|
164
|
+
return @cache.get(filename)
|
121
165
|
else
|
122
166
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
123
|
-
# Save to cache
|
124
167
|
return self.read_gha_file(gz)
|
125
168
|
end
|
126
169
|
end
|
127
|
-
rescue
|
128
|
-
@logger.warn(
|
170
|
+
rescue Errno::ECONNRESET => e
|
171
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
172
|
+
next
|
173
|
+
rescue OpenURI::HTTPError => e
|
174
|
+
code = e.io.status[0]
|
175
|
+
if code.start_with?("5")
|
176
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
177
|
+
next
|
178
|
+
else
|
179
|
+
raise e
|
180
|
+
end
|
129
181
|
end
|
130
182
|
end
|
131
183
|
|
132
|
-
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
|
184
|
+
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
|
133
185
|
end
|
134
186
|
|
135
187
|
def cache(current_time)
|
188
|
+
@logger.info("Full cache. Waiting for some free slot...") if @cache.full?
|
189
|
+
while @cache.full?
|
190
|
+
sleep 1
|
191
|
+
end
|
136
192
|
@max_retries.times do
|
137
193
|
begin
|
138
194
|
filename = self.get_gha_filename(current_time)
|
139
|
-
|
140
195
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
141
196
|
content = self.read_gha_file(gz)
|
142
197
|
@cache.put(filename, content)
|
143
198
|
return
|
144
199
|
end
|
145
|
-
rescue
|
146
|
-
|
200
|
+
rescue Errno::ECONNRESET => e
|
201
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
202
|
+
next
|
203
|
+
rescue OpenURI::HTTPError => e
|
204
|
+
code = e.io.status[0]
|
205
|
+
if code.start_with?("5")
|
206
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
207
|
+
next
|
208
|
+
else
|
209
|
+
raise e
|
210
|
+
end
|
147
211
|
end
|
148
212
|
end
|
149
213
|
end
|
150
214
|
|
151
215
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
152
216
|
if @proactive
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
pool << Thread.start do
|
163
|
-
self.cache(current_date)
|
164
|
-
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
165
|
-
end
|
166
|
-
|
167
|
-
pool.delete_if { |t| !t.alive? }
|
217
|
+
any_ready = Thread.promise
|
218
|
+
|
219
|
+
@logger.info("Proactively scheduling download tasks...")
|
220
|
+
self.each_time(from, to) do |current_time|
|
221
|
+
@pool.process(current_time) do |current_time|
|
222
|
+
cache(current_time)
|
223
|
+
any_ready << true
|
224
|
+
@logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
|
168
225
|
end
|
169
226
|
end
|
227
|
+
|
228
|
+
~any_ready
|
229
|
+
@logger.info("Download tasks successfully scheduled!")
|
170
230
|
end
|
171
231
|
|
172
232
|
super
|
173
233
|
end
|
174
234
|
|
175
235
|
class Cache
|
176
|
-
def initialize(
|
236
|
+
def initialize(max_size = 10)
|
177
237
|
@cache = {}
|
178
238
|
@max_size = max_size
|
179
|
-
@folder = folder
|
180
239
|
@mutex = Mutex.new
|
181
240
|
end
|
182
241
|
|
183
242
|
def put(name, content)
|
184
|
-
#filename = "#@folder/#{name}"
|
185
|
-
#File.open(filename, 'w') do |f|
|
186
|
-
#f << content
|
187
|
-
#end
|
188
|
-
|
189
243
|
@mutex.synchronize do
|
190
244
|
@cache[name] = content
|
191
245
|
end
|
@@ -195,18 +249,6 @@ class OnlineGHAProvider < GHAProvider
|
|
195
249
|
@mutex.synchronize do
|
196
250
|
return @cache.delete(name)
|
197
251
|
end
|
198
|
-
ensure
|
199
|
-
#self.unload(name)
|
200
|
-
end
|
201
|
-
|
202
|
-
def unload(name)
|
203
|
-
File.unlink(@cache[name])
|
204
|
-
|
205
|
-
@mutex.synchronize do
|
206
|
-
@cache.delete(name)
|
207
|
-
end
|
208
|
-
|
209
|
-
return true
|
210
252
|
end
|
211
253
|
|
212
254
|
def size
|
@@ -224,7 +266,7 @@ class OnlineGHAProvider < GHAProvider
|
|
224
266
|
end
|
225
267
|
end
|
226
268
|
|
227
|
-
class DownloadArchiveException <
|
269
|
+
class DownloadArchiveException < GHAProvider::GHAException
|
228
270
|
end
|
229
271
|
end
|
230
272
|
|
@@ -237,8 +279,20 @@ class FolderGHAProvider < GHAProvider
|
|
237
279
|
|
238
280
|
def get(current_time)
|
239
281
|
filename = self.get_gha_filename(current_time)
|
240
|
-
File.
|
241
|
-
|
282
|
+
complete_filename = File.join(@folder, filename)
|
283
|
+
mode = "rb"
|
284
|
+
|
285
|
+
unless FileTest.exist?(complete_filename)
|
286
|
+
complete_filename = complete_filename.sub(".gz", "")
|
287
|
+
mode = "r"
|
288
|
+
end
|
289
|
+
|
290
|
+
unless FileTest.exist?(complete_filename)
|
291
|
+
raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
|
292
|
+
end
|
293
|
+
|
294
|
+
File.open(complete_filename, mode) do |file|
|
295
|
+
return self.read_gha_file(file)
|
242
296
|
end
|
243
297
|
end
|
244
298
|
end
|
@@ -267,17 +321,17 @@ class GHADownloader
|
|
267
321
|
|
268
322
|
def download(from = Time.gm(2015, 1, 1), to = Time.now)
|
269
323
|
archive = []
|
270
|
-
self.
|
271
|
-
filename = self.get_gha_filename(
|
324
|
+
self.each_time(from, to) do |current_time|
|
325
|
+
filename = self.get_gha_filename(current_time)
|
272
326
|
out_filename = filename.clone
|
273
327
|
out_filename.gsub!(".json.gz", ".json") if @decompress
|
274
328
|
|
275
329
|
target_file = File.join(@folder, out_filename)
|
276
330
|
if FileTest.exist?(target_file)
|
277
|
-
@logger.info("Skipping existing file for #{
|
331
|
+
@logger.info("Skipping existing file for #{current_time}")
|
278
332
|
next
|
279
333
|
else
|
280
|
-
@logger.info("Downloading file for #{
|
334
|
+
@logger.info("Downloading file for #{current_time}")
|
281
335
|
end
|
282
336
|
|
283
337
|
File.open(target_file, 'w') do |f|
|
@@ -0,0 +1,312 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module GHArchive
|
4
|
+
Repository = Struct.new(:id, :name, :url)
|
5
|
+
CommitAuthor = Struct.new(:email, :name)
|
6
|
+
|
7
|
+
class Entity
|
8
|
+
def initialize(payload)
|
9
|
+
@payload = payload
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Commit < Entity
|
14
|
+
def sha
|
15
|
+
@payload['sha']
|
16
|
+
end
|
17
|
+
|
18
|
+
def author
|
19
|
+
CommitAuthor.new(
|
20
|
+
@payload['author']['email'],
|
21
|
+
@payload['author']['name']
|
22
|
+
)
|
23
|
+
end
|
24
|
+
|
25
|
+
def message
|
26
|
+
@payload['message']
|
27
|
+
end
|
28
|
+
|
29
|
+
def distinct
|
30
|
+
@payload['distinct']
|
31
|
+
end
|
32
|
+
|
33
|
+
def url
|
34
|
+
@payload['url']
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class User < Entity
|
39
|
+
def id
|
40
|
+
@payload['id']
|
41
|
+
end
|
42
|
+
|
43
|
+
def url
|
44
|
+
@payload['url']
|
45
|
+
end
|
46
|
+
|
47
|
+
def type
|
48
|
+
@payload['type']
|
49
|
+
end
|
50
|
+
|
51
|
+
def login
|
52
|
+
@payload['login']
|
53
|
+
end
|
54
|
+
|
55
|
+
def gravatar_id
|
56
|
+
@payload['gravatar_id']
|
57
|
+
end
|
58
|
+
|
59
|
+
def avatar_url
|
60
|
+
@payload['avatar_url']
|
61
|
+
end
|
62
|
+
|
63
|
+
def site_admin
|
64
|
+
@payload['site_admin']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class BasicIssue < Entity
|
69
|
+
def url
|
70
|
+
@payload['url']
|
71
|
+
end
|
72
|
+
|
73
|
+
def id
|
74
|
+
@payload['id']
|
75
|
+
end
|
76
|
+
|
77
|
+
def number
|
78
|
+
@payload['number']
|
79
|
+
end
|
80
|
+
|
81
|
+
def state
|
82
|
+
@payload['state']
|
83
|
+
end
|
84
|
+
|
85
|
+
def locked
|
86
|
+
@payload['locked']
|
87
|
+
end
|
88
|
+
|
89
|
+
def title
|
90
|
+
@payload['title']
|
91
|
+
end
|
92
|
+
|
93
|
+
def body
|
94
|
+
@payload['body']
|
95
|
+
end
|
96
|
+
|
97
|
+
def user
|
98
|
+
User.new(@payload['user']) rescue nil
|
99
|
+
end
|
100
|
+
|
101
|
+
def created_at
|
102
|
+
Time.parse(@payload['created_at'])
|
103
|
+
end
|
104
|
+
|
105
|
+
def updated_at
|
106
|
+
Time.parse(@payload['updated_at']) rescue nil
|
107
|
+
end
|
108
|
+
|
109
|
+
def closed_at
|
110
|
+
Time.parse(@payload['closed_at']) rescue nil
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class PullRequest < BasicIssue
|
115
|
+
def merged_at
|
116
|
+
Time.parse(@payload['merged_at']) rescue nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def merge_commit_sha
|
120
|
+
@payload['merge_commit_sha']
|
121
|
+
end
|
122
|
+
|
123
|
+
def merged
|
124
|
+
@payload['merged']
|
125
|
+
end
|
126
|
+
|
127
|
+
def mergeable
|
128
|
+
@payload['mergeable']
|
129
|
+
end
|
130
|
+
|
131
|
+
def mergeable_state
|
132
|
+
@payload['mergeable_state']
|
133
|
+
end
|
134
|
+
|
135
|
+
def merged_by
|
136
|
+
@payload['merged_by']
|
137
|
+
end
|
138
|
+
|
139
|
+
def comments
|
140
|
+
@payload['comments']
|
141
|
+
end
|
142
|
+
|
143
|
+
def review_comments
|
144
|
+
@payload['review_comments']
|
145
|
+
end
|
146
|
+
|
147
|
+
def commits
|
148
|
+
@payload['commits']
|
149
|
+
end
|
150
|
+
|
151
|
+
def additions
|
152
|
+
@payload['additions']
|
153
|
+
end
|
154
|
+
|
155
|
+
def deletions
|
156
|
+
@payload['deletions']
|
157
|
+
end
|
158
|
+
|
159
|
+
def changed_files
|
160
|
+
@payload['changed_files']
|
161
|
+
end
|
162
|
+
|
163
|
+
def head
|
164
|
+
@payload['head']
|
165
|
+
end
|
166
|
+
|
167
|
+
def base
|
168
|
+
@payload['base']
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
class Issue < BasicIssue
|
173
|
+
def labels
|
174
|
+
@payload['labels']
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
class BasicComment < Entity
|
179
|
+
def url
|
180
|
+
@payload['url']
|
181
|
+
end
|
182
|
+
|
183
|
+
def id
|
184
|
+
@payload['id']
|
185
|
+
end
|
186
|
+
|
187
|
+
def user
|
188
|
+
User.new(@payload['user']) rescue nil
|
189
|
+
end
|
190
|
+
|
191
|
+
def created_at
|
192
|
+
Time.parse(@payload['created_at'])
|
193
|
+
end
|
194
|
+
|
195
|
+
def updated_at
|
196
|
+
Time.parse(@payload['updated_at']) rescue nil
|
197
|
+
end
|
198
|
+
|
199
|
+
def body
|
200
|
+
@payload['body']
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
class PullRequestComment < BasicComment
|
205
|
+
def diff_hunk
|
206
|
+
@payload['diff_hunk']
|
207
|
+
end
|
208
|
+
|
209
|
+
def path
|
210
|
+
@payload['path']
|
211
|
+
end
|
212
|
+
|
213
|
+
def position
|
214
|
+
@payload['position']
|
215
|
+
end
|
216
|
+
|
217
|
+
def original_position
|
218
|
+
@payload['original_position']
|
219
|
+
end
|
220
|
+
|
221
|
+
def commit_id
|
222
|
+
@payload['commit_id']
|
223
|
+
end
|
224
|
+
|
225
|
+
def original_commit_id
|
226
|
+
@payload['original_commit_id']
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
class IssueComment < BasicComment
|
231
|
+
end
|
232
|
+
|
233
|
+
class Release < Entity
|
234
|
+
def url
|
235
|
+
@payload['url']
|
236
|
+
end
|
237
|
+
|
238
|
+
def id
|
239
|
+
@payload['id']
|
240
|
+
end
|
241
|
+
|
242
|
+
def tag_name
|
243
|
+
@payload['tag_name']
|
244
|
+
end
|
245
|
+
|
246
|
+
def target_commitish
|
247
|
+
@payload['target_commitish']
|
248
|
+
end
|
249
|
+
|
250
|
+
def name
|
251
|
+
@payload['name']
|
252
|
+
end
|
253
|
+
|
254
|
+
def draft
|
255
|
+
@payload['draft']
|
256
|
+
end
|
257
|
+
|
258
|
+
def author
|
259
|
+
User.new(@payload['author'])
|
260
|
+
end
|
261
|
+
|
262
|
+
def prerelease
|
263
|
+
@payload['prerelease']
|
264
|
+
end
|
265
|
+
|
266
|
+
def created_at
|
267
|
+
Time.parse(@payload['created_at'])
|
268
|
+
end
|
269
|
+
|
270
|
+
def published_at
|
271
|
+
Time.parse(@payload['published_at'])
|
272
|
+
end
|
273
|
+
|
274
|
+
def assets
|
275
|
+
@payload['assets']
|
276
|
+
end
|
277
|
+
|
278
|
+
def tarball_url
|
279
|
+
@payload['tarball_url']
|
280
|
+
end
|
281
|
+
|
282
|
+
def zipball_url
|
283
|
+
@payload['zipball_url']
|
284
|
+
end
|
285
|
+
|
286
|
+
def body
|
287
|
+
@payload['body']
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class Page < Entity
|
292
|
+
def name
|
293
|
+
@payload['page_name']
|
294
|
+
end
|
295
|
+
|
296
|
+
def title
|
297
|
+
@payload['title']
|
298
|
+
end
|
299
|
+
|
300
|
+
def summary
|
301
|
+
@payload['summary']
|
302
|
+
end
|
303
|
+
|
304
|
+
def action
|
305
|
+
@payload['action']
|
306
|
+
end
|
307
|
+
|
308
|
+
def sha
|
309
|
+
@payload['sha']
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
@@ -0,0 +1,405 @@
|
|
1
|
+
require 'time'
|
2
|
+
require_relative File.expand_path('../entities', __FILE__)
|
3
|
+
|
4
|
+
module GHArchive
|
5
|
+
class Event
|
6
|
+
def self.parse(json)
|
7
|
+
IMPLEMENTATIONS.each do |event_class|
|
8
|
+
return event_class.new(json) if event_class.fits?(json)
|
9
|
+
end
|
10
|
+
|
11
|
+
return Event.new(json)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(json)
|
15
|
+
@json = json.freeze
|
16
|
+
@payload = json['payload']
|
17
|
+
end
|
18
|
+
|
19
|
+
def public?
|
20
|
+
@json['public']
|
21
|
+
end
|
22
|
+
|
23
|
+
def created_at
|
24
|
+
Time.parse(@json['created_at'])
|
25
|
+
end
|
26
|
+
alias :time :created_at
|
27
|
+
|
28
|
+
def actor
|
29
|
+
User.new(@json['actor'])
|
30
|
+
end
|
31
|
+
|
32
|
+
def repo
|
33
|
+
Repository.new(
|
34
|
+
@json['repo']['id'],
|
35
|
+
@json['repo']['name'],
|
36
|
+
@json['repo']['url']
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def json
|
41
|
+
@json
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class PushEvent < Event
|
46
|
+
def self.fits?(json)
|
47
|
+
json['type'] == "PushEvent"
|
48
|
+
end
|
49
|
+
|
50
|
+
def push_id
|
51
|
+
@payload['push_id']
|
52
|
+
end
|
53
|
+
|
54
|
+
def size
|
55
|
+
@payload['size']
|
56
|
+
end
|
57
|
+
|
58
|
+
def distinct_size
|
59
|
+
@payload['distinct_size']
|
60
|
+
end
|
61
|
+
|
62
|
+
def head
|
63
|
+
@payload['head']
|
64
|
+
end
|
65
|
+
|
66
|
+
def before
|
67
|
+
@payload['before']
|
68
|
+
end
|
69
|
+
|
70
|
+
def commits
|
71
|
+
@payload['commits'].map { |c| Commit.new(c) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class CommitCommentEvent < Event
|
76
|
+
def self.fits?(json)
|
77
|
+
return json['type'] == "CommitCommentEvent"
|
78
|
+
end
|
79
|
+
|
80
|
+
def comment_id
|
81
|
+
@payload['comment']['id']
|
82
|
+
end
|
83
|
+
|
84
|
+
def comment_url
|
85
|
+
@payload['comment']['url']
|
86
|
+
end
|
87
|
+
|
88
|
+
def comment_user
|
89
|
+
User.new(@payload['comment']['author'])
|
90
|
+
end
|
91
|
+
|
92
|
+
def comment_position
|
93
|
+
@payload['comment']['position']
|
94
|
+
end
|
95
|
+
|
96
|
+
def comment_line
|
97
|
+
@payload['comment']['line']
|
98
|
+
end
|
99
|
+
|
100
|
+
def comment_path
|
101
|
+
@payload['comment']['path']
|
102
|
+
end
|
103
|
+
|
104
|
+
def comment_commit_id
|
105
|
+
@payload['comment']['commit_id']
|
106
|
+
end
|
107
|
+
|
108
|
+
def comment_body
|
109
|
+
@payload['comment']['body']
|
110
|
+
end
|
111
|
+
|
112
|
+
def comment_created_at
|
113
|
+
Time.parse(@payload['comment']['created_at'])
|
114
|
+
end
|
115
|
+
|
116
|
+
def comment_updated_at
|
117
|
+
Time.parse(@payload['comment']['updated_at'])
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class PullRequestEvent < Event
|
122
|
+
def self.fits?(json)
|
123
|
+
return json['type'] == "PullRequestEvent"
|
124
|
+
end
|
125
|
+
|
126
|
+
def action
|
127
|
+
@payload['action']
|
128
|
+
end
|
129
|
+
|
130
|
+
def number
|
131
|
+
@payload['number']
|
132
|
+
end
|
133
|
+
|
134
|
+
def pull_request
|
135
|
+
PullRequest.new(@payload['pull_request'])
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
class PullRequestReviewCommentEvent < Event
|
140
|
+
def self.fits?(json)
|
141
|
+
return json['type'] == "PullRequestReviewCommentEvent"
|
142
|
+
end
|
143
|
+
|
144
|
+
def action
|
145
|
+
@payload['action']
|
146
|
+
end
|
147
|
+
|
148
|
+
def number
|
149
|
+
@payload['number']
|
150
|
+
end
|
151
|
+
|
152
|
+
def pull_request
|
153
|
+
PullRequest.new(@payload['pull_request'])
|
154
|
+
end
|
155
|
+
|
156
|
+
def comment
|
157
|
+
PullRequestComment.new(@payload['comment'])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class IssuesEvent < Event
|
162
|
+
def self.fits?(json)
|
163
|
+
return json['type'] == "IssuesEvent"
|
164
|
+
end
|
165
|
+
|
166
|
+
def action
|
167
|
+
@payload['action']
|
168
|
+
end
|
169
|
+
|
170
|
+
def issue
|
171
|
+
Issue.new(@payload['issue'])
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class IssueCommentEvent < Event
|
176
|
+
def self.fits?(json)
|
177
|
+
return json['type'] == "IssueCommentEvent"
|
178
|
+
end
|
179
|
+
|
180
|
+
def action
|
181
|
+
@payload['action']
|
182
|
+
end
|
183
|
+
|
184
|
+
def issue
|
185
|
+
Issue.new(@payload['issue'])
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class CreateEvent < Event
|
190
|
+
def self.fits?(json)
|
191
|
+
return json['type'] == "CreateEvent"
|
192
|
+
end
|
193
|
+
|
194
|
+
def ref
|
195
|
+
@payload['ref']
|
196
|
+
end
|
197
|
+
|
198
|
+
def ref_type
|
199
|
+
@payload['ref_type']
|
200
|
+
end
|
201
|
+
|
202
|
+
def master_branch
|
203
|
+
@payload['master_branch']
|
204
|
+
end
|
205
|
+
|
206
|
+
def description
|
207
|
+
@payload['description']
|
208
|
+
end
|
209
|
+
|
210
|
+
def pusher_type
|
211
|
+
@payload['pusher_type']
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class ForkEvent < Event
|
216
|
+
def self.fits?(json)
|
217
|
+
return json['type'] == "ForkEvent"
|
218
|
+
end
|
219
|
+
|
220
|
+
def forkee_id
|
221
|
+
@payload['forkee']['id']
|
222
|
+
end
|
223
|
+
|
224
|
+
def forkee_name
|
225
|
+
@payload['forkee']['name']
|
226
|
+
end
|
227
|
+
|
228
|
+
def forkee_full_name
|
229
|
+
@payload['forkee']['full_name']
|
230
|
+
end
|
231
|
+
|
232
|
+
def forkee_owner
|
233
|
+
User.new(@payload['forkee']['owner'])
|
234
|
+
end
|
235
|
+
|
236
|
+
def forkee_private
|
237
|
+
@payload['forkee']['private']
|
238
|
+
end
|
239
|
+
|
240
|
+
def forkee_description
|
241
|
+
@payload['forkee']['description']
|
242
|
+
end
|
243
|
+
|
244
|
+
def forkee_fork
|
245
|
+
@payload['forkee']['fork']
|
246
|
+
end
|
247
|
+
|
248
|
+
def forkee_created_at
|
249
|
+
Time.parse(@payload['forkee']['created_at'])
|
250
|
+
end
|
251
|
+
|
252
|
+
def forkee_updated_at
|
253
|
+
Time.parse(@payload['forkee']['updated_at'])
|
254
|
+
end
|
255
|
+
|
256
|
+
def forkee_pushed_at
|
257
|
+
Time.parse(@payload['forkee']['pushed_at'])
|
258
|
+
end
|
259
|
+
|
260
|
+
def forkee_urls
|
261
|
+
{
|
262
|
+
'git' => @payload['forkee']['git_url'],
|
263
|
+
'ssh' => @payload['forkee']['ssh_url'],
|
264
|
+
'clone' => @payload['forkee']['clone_url'],
|
265
|
+
'svn' => @payload['forkee']['svn_url']
|
266
|
+
}
|
267
|
+
end
|
268
|
+
|
269
|
+
def forkee_homepage
|
270
|
+
Time.parse(@payload['forkee']['homepage'])
|
271
|
+
end
|
272
|
+
|
273
|
+
def forkee_size
|
274
|
+
Time.parse(@payload['forkee']['size'])
|
275
|
+
end
|
276
|
+
|
277
|
+
def forkee_stargazers_count
|
278
|
+
Time.parse(@payload['forkee']['stargazers_count'])
|
279
|
+
end
|
280
|
+
|
281
|
+
def forkee_watchers_count
|
282
|
+
Time.parse(@payload['forkee']['watchers_count'])
|
283
|
+
end
|
284
|
+
|
285
|
+
def forkee_language
|
286
|
+
Time.parse(@payload['forkee']['language'])
|
287
|
+
end
|
288
|
+
|
289
|
+
def forkee_has_issues
|
290
|
+
Time.parse(@payload['forkee']['has_issues'])
|
291
|
+
end
|
292
|
+
|
293
|
+
def forkee_has_downloads
|
294
|
+
Time.parse(@payload['forkee']['has_downloads'])
|
295
|
+
end
|
296
|
+
|
297
|
+
def forkee_has_wiki
|
298
|
+
Time.parse(@payload['forkee']['has_wiki'])
|
299
|
+
end
|
300
|
+
|
301
|
+
def forkee_has_pages
|
302
|
+
Time.parse(@payload['forkee']['has_pages'])
|
303
|
+
end
|
304
|
+
|
305
|
+
def forkee_forks_count
|
306
|
+
Time.parse(@payload['forkee']['forks_count'])
|
307
|
+
end
|
308
|
+
|
309
|
+
def forkee_mirror_url
|
310
|
+
Time.parse(@payload['forkee']['mirror_url'])
|
311
|
+
end
|
312
|
+
|
313
|
+
def forkee_open_issues_count
|
314
|
+
Time.parse(@payload['forkee']['open_issues_count'])
|
315
|
+
end
|
316
|
+
|
317
|
+
def forkee_watchers
|
318
|
+
Time.parse(@payload['forkee']['watchers'])
|
319
|
+
end
|
320
|
+
|
321
|
+
def forkee_default_branch
|
322
|
+
Time.parse(@payload['forkee']['default_branch'])
|
323
|
+
end
|
324
|
+
|
325
|
+
def forkee_public
|
326
|
+
Time.parse(@payload['forkee']['public'])
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
class PublicEvent < Event
|
331
|
+
def self.fits?(json)
|
332
|
+
return json['type'] == "PublicEvent"
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
class WatchEvent < Event
|
337
|
+
def self.fits?(json)
|
338
|
+
return json['type'] == "WatchEvent"
|
339
|
+
end
|
340
|
+
|
341
|
+
def action
|
342
|
+
@payload['action']
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
class DeleteEvent < Event
|
347
|
+
def self.fits?(json)
|
348
|
+
return json['type'] == "DeleteEvent"
|
349
|
+
end
|
350
|
+
|
351
|
+
def ref
|
352
|
+
@payload['ref']
|
353
|
+
end
|
354
|
+
|
355
|
+
def ref_type
|
356
|
+
@payload['ref_type']
|
357
|
+
end
|
358
|
+
|
359
|
+
def pusher_type
|
360
|
+
@payload['pusher_type']
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
class ReleaseEvent < Event
|
365
|
+
def self.fits?(json)
|
366
|
+
return json['type'] == "ReleaseEvent"
|
367
|
+
end
|
368
|
+
|
369
|
+
def action
|
370
|
+
@payload['action']
|
371
|
+
end
|
372
|
+
|
373
|
+
def release
|
374
|
+
Release.new(@payload['release'])
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
class MemberEvent < Event
|
379
|
+
def self.fits?(json)
|
380
|
+
return json['type'] == "MemberEvent"
|
381
|
+
end
|
382
|
+
|
383
|
+
def action
|
384
|
+
@payload['action']
|
385
|
+
end
|
386
|
+
|
387
|
+
def member
|
388
|
+
User.new(@payload['member'])
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
class GollumEvent < Event
|
393
|
+
def self.fits?(json)
|
394
|
+
return json['type'] == "GollumEvent"
|
395
|
+
end
|
396
|
+
|
397
|
+
def pages
|
398
|
+
@payload[pages].map { |p| Page.new(p) }
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Event
|
403
|
+
IMPLEMENTATIONS = ObjectSpace.each_object(Class).select { |klass| klass < self }
|
404
|
+
end
|
405
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -30,6 +30,26 @@ dependencies:
|
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 1.1.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: thread
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.2
|
33
53
|
description: Download and analyze the GitHub events stored at GitHub archive
|
34
54
|
email: s.scalabrino9@gmail.com
|
35
55
|
executables: []
|
@@ -37,11 +57,13 @@ extensions: []
|
|
37
57
|
extra_rdoc_files: []
|
38
58
|
files:
|
39
59
|
- lib/gh-archive.rb
|
60
|
+
- lib/gh-archive/entities.rb
|
61
|
+
- lib/gh-archive/events.rb
|
40
62
|
homepage: https://github.com/intersimone999/gh-archive
|
41
63
|
licenses:
|
42
64
|
- GPL-3.0-only
|
43
65
|
metadata: {}
|
44
|
-
post_install_message:
|
66
|
+
post_install_message:
|
45
67
|
rdoc_options: []
|
46
68
|
require_paths:
|
47
69
|
- lib
|
@@ -56,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
78
|
- !ruby/object:Gem::Version
|
57
79
|
version: '0'
|
58
80
|
requirements: []
|
59
|
-
rubygems_version: 3.2.
|
60
|
-
signing_key:
|
81
|
+
rubygems_version: 3.2.22
|
82
|
+
signing_key:
|
61
83
|
specification_version: 4
|
62
84
|
summary: GitHub Archive mining utility
|
63
85
|
test_files: []
|