gh-archive 0.4 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gh-archive.rb +121 -67
- data/lib/gh-archive/entities.rb +312 -0
- data/lib/gh-archive/events.rb +405 -0
- metadata +28 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4a9b89fb02620499e8b51f5c5e4ba34d0f3bc8b8f3ae4e2e69cba1e027bdb49
|
4
|
+
data.tar.gz: 6b707eb1bcb37b8a9b03ce36a2d4304e6760def0cc139ffad748a1596046f82f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0975e354e028e768fb5bc4c17c19cddbc44b706394f60d8e22a64537a34342965bb98fa00365c329c30d189729dadba271ec6b101ff86593affef7e1d34848b3'
|
7
|
+
data.tar.gz: 95935fb3c27841760a68696c832e02108940f9eef535f5ed89afea9613a7988c622f2649f3feb77b22706250a30cfba30b5f36f2ada617f532e86a0426e052ce
|
data/lib/gh-archive.rb
CHANGED
@@ -4,6 +4,10 @@ require 'open-uri'
|
|
4
4
|
require 'zlib'
|
5
5
|
require 'logger'
|
6
6
|
require 'tmpdir'
|
7
|
+
require 'thread/pool'
|
8
|
+
require 'thread/promise'
|
9
|
+
|
10
|
+
require_relative File.expand_path('../gh-archive/events', __FILE__)
|
7
11
|
|
8
12
|
module GHAUtils
|
9
13
|
def get_gha_filename(date)
|
@@ -12,14 +16,19 @@ module GHAUtils
|
|
12
16
|
|
13
17
|
def read_gha_file_content(gz)
|
14
18
|
gzip = Zlib::GzipReader.new(gz)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
return content
|
19
|
+
return gzip.read
|
20
|
+
ensure
|
21
|
+
gzip.close if gzip
|
19
22
|
end
|
20
23
|
|
21
|
-
def read_gha_file(
|
22
|
-
|
24
|
+
def read_gha_file(file)
|
25
|
+
if file.path.end_with?(".json")
|
26
|
+
content = file.read
|
27
|
+
elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
|
28
|
+
content = read_gha_file_content(file)
|
29
|
+
else
|
30
|
+
raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
|
31
|
+
end
|
23
32
|
|
24
33
|
result = []
|
25
34
|
content.lines.each do |line|
|
@@ -29,11 +38,11 @@ module GHAUtils
|
|
29
38
|
return result
|
30
39
|
end
|
31
40
|
|
32
|
-
def
|
33
|
-
|
34
|
-
while
|
35
|
-
yield
|
36
|
-
|
41
|
+
def each_time(from, to)
|
42
|
+
current_time = from
|
43
|
+
while current_time < to
|
44
|
+
yield current_time
|
45
|
+
current_time += 3600
|
37
46
|
end
|
38
47
|
end
|
39
48
|
end
|
@@ -46,6 +55,14 @@ class GHAProvider
|
|
46
55
|
|
47
56
|
@includes = {}
|
48
57
|
@excludes = {}
|
58
|
+
|
59
|
+
@use_json = true
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_events
|
63
|
+
@use_json = false
|
64
|
+
|
65
|
+
return self
|
49
66
|
end
|
50
67
|
|
51
68
|
def logger=(logger)
|
@@ -61,6 +78,8 @@ class GHAProvider
|
|
61
78
|
@includes[key.to_s] = [] unless @includes[key.to_s]
|
62
79
|
@includes[key.to_s] << value
|
63
80
|
end
|
81
|
+
|
82
|
+
return self
|
64
83
|
end
|
65
84
|
|
66
85
|
def exclude(**args)
|
@@ -68,16 +87,23 @@ class GHAProvider
|
|
68
87
|
@excludes[key.to_s] = [] unless @excludes[key.to_s]
|
69
88
|
@excludes[key.to_s] << value
|
70
89
|
end
|
90
|
+
|
91
|
+
return self
|
71
92
|
end
|
72
93
|
|
73
94
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
74
|
-
|
95
|
+
exceptions = []
|
96
|
+
|
97
|
+
self.each_time(from, to) do |current_time|
|
75
98
|
events = []
|
76
99
|
begin
|
77
|
-
events = self.get(
|
78
|
-
|
79
|
-
|
80
|
-
|
100
|
+
events = self.get(current_time)
|
101
|
+
rescue GHAException => e
|
102
|
+
@logger.warn(e.message)
|
103
|
+
next
|
104
|
+
rescue => e
|
105
|
+
@logger.error("An exception occurred for #{current_time}: #{e.message}")
|
106
|
+
exceptions << e
|
81
107
|
next
|
82
108
|
end
|
83
109
|
|
@@ -92,12 +118,23 @@ class GHAProvider
|
|
92
118
|
end
|
93
119
|
next if skip
|
94
120
|
|
95
|
-
|
121
|
+
if @use_json
|
122
|
+
yield event, current_time
|
123
|
+
else
|
124
|
+
yield GHArchive::Event.parse(event), current_time
|
125
|
+
end
|
96
126
|
end
|
97
127
|
|
128
|
+
@logger.info("Scanned #{current_time}")
|
129
|
+
|
98
130
|
events.clear
|
99
131
|
GC.start
|
100
132
|
end
|
133
|
+
|
134
|
+
return exceptions
|
135
|
+
end
|
136
|
+
|
137
|
+
class GHAException < Exception
|
101
138
|
end
|
102
139
|
end
|
103
140
|
|
@@ -108,6 +145,7 @@ class OnlineGHAProvider < GHAProvider
|
|
108
145
|
@max_retries = max_retries
|
109
146
|
@proactive = proactive
|
110
147
|
@proactive_pool_size = proactive_pool_size
|
148
|
+
@pool = Thread.pool(proactive_pool_size)
|
111
149
|
@cache = Cache.new
|
112
150
|
end
|
113
151
|
|
@@ -116,76 +154,92 @@ class OnlineGHAProvider < GHAProvider
|
|
116
154
|
begin
|
117
155
|
filename = self.get_gha_filename(current_time)
|
118
156
|
|
119
|
-
if @
|
120
|
-
|
157
|
+
if @proactive
|
158
|
+
@logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename)
|
159
|
+
|
160
|
+
while !@cache.has?(filename)
|
161
|
+
sleep 1
|
162
|
+
end
|
163
|
+
|
164
|
+
return @cache.get(filename)
|
121
165
|
else
|
122
166
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
123
|
-
# Save to cache
|
124
167
|
return self.read_gha_file(gz)
|
125
168
|
end
|
126
169
|
end
|
127
|
-
rescue
|
128
|
-
@logger.warn(
|
170
|
+
rescue Errno::ECONNRESET => e
|
171
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
172
|
+
next
|
173
|
+
rescue OpenURI::HTTPError => e
|
174
|
+
code = e.io.status[0]
|
175
|
+
if code.start_with?("5")
|
176
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
177
|
+
next
|
178
|
+
else
|
179
|
+
raise e
|
180
|
+
end
|
129
181
|
end
|
130
182
|
end
|
131
183
|
|
132
|
-
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
|
184
|
+
raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
|
133
185
|
end
|
134
186
|
|
135
187
|
def cache(current_time)
|
188
|
+
@logger.info("Full cache. Waiting for some free slot...") if @cache.full?
|
189
|
+
while @cache.full?
|
190
|
+
sleep 1
|
191
|
+
end
|
136
192
|
@max_retries.times do
|
137
193
|
begin
|
138
194
|
filename = self.get_gha_filename(current_time)
|
139
|
-
|
140
195
|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
|
141
196
|
content = self.read_gha_file(gz)
|
142
197
|
@cache.put(filename, content)
|
143
198
|
return
|
144
199
|
end
|
145
|
-
rescue
|
146
|
-
|
200
|
+
rescue Errno::ECONNRESET => e
|
201
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
202
|
+
next
|
203
|
+
rescue OpenURI::HTTPError => e
|
204
|
+
code = e.io.status[0]
|
205
|
+
if code.start_with?("5")
|
206
|
+
@logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
|
207
|
+
next
|
208
|
+
else
|
209
|
+
raise e
|
210
|
+
end
|
147
211
|
end
|
148
212
|
end
|
149
213
|
end
|
150
214
|
|
151
215
|
def each(from = Time.gm(2015, 1, 1), to = Time.now)
|
152
216
|
if @proactive
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
pool << Thread.start do
|
163
|
-
self.cache(current_date)
|
164
|
-
@logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
|
165
|
-
end
|
166
|
-
|
167
|
-
pool.delete_if { |t| !t.alive? }
|
217
|
+
any_ready = Thread.promise
|
218
|
+
|
219
|
+
@logger.info("Proactively scheduling download tasks...")
|
220
|
+
self.each_time(from, to) do |current_time|
|
221
|
+
@pool.process(current_time) do |current_time|
|
222
|
+
cache(current_time)
|
223
|
+
any_ready << true
|
224
|
+
@logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
|
168
225
|
end
|
169
226
|
end
|
227
|
+
|
228
|
+
~any_ready
|
229
|
+
@logger.info("Download tasks successfully scheduled!")
|
170
230
|
end
|
171
231
|
|
172
232
|
super
|
173
233
|
end
|
174
234
|
|
175
235
|
class Cache
|
176
|
-
def initialize(
|
236
|
+
def initialize(max_size = 10)
|
177
237
|
@cache = {}
|
178
238
|
@max_size = max_size
|
179
|
-
@folder = folder
|
180
239
|
@mutex = Mutex.new
|
181
240
|
end
|
182
241
|
|
183
242
|
def put(name, content)
|
184
|
-
#filename = "#@folder/#{name}"
|
185
|
-
#File.open(filename, 'w') do |f|
|
186
|
-
#f << content
|
187
|
-
#end
|
188
|
-
|
189
243
|
@mutex.synchronize do
|
190
244
|
@cache[name] = content
|
191
245
|
end
|
@@ -195,18 +249,6 @@ class OnlineGHAProvider < GHAProvider
|
|
195
249
|
@mutex.synchronize do
|
196
250
|
return @cache.delete(name)
|
197
251
|
end
|
198
|
-
ensure
|
199
|
-
#self.unload(name)
|
200
|
-
end
|
201
|
-
|
202
|
-
def unload(name)
|
203
|
-
File.unlink(@cache[name])
|
204
|
-
|
205
|
-
@mutex.synchronize do
|
206
|
-
@cache.delete(name)
|
207
|
-
end
|
208
|
-
|
209
|
-
return true
|
210
252
|
end
|
211
253
|
|
212
254
|
def size
|
@@ -224,7 +266,7 @@ class OnlineGHAProvider < GHAProvider
|
|
224
266
|
end
|
225
267
|
end
|
226
268
|
|
227
|
-
class DownloadArchiveException <
|
269
|
+
class DownloadArchiveException < GHAProvider::GHAException
|
228
270
|
end
|
229
271
|
end
|
230
272
|
|
@@ -237,8 +279,20 @@ class FolderGHAProvider < GHAProvider
|
|
237
279
|
|
238
280
|
def get(current_time)
|
239
281
|
filename = self.get_gha_filename(current_time)
|
240
|
-
File.
|
241
|
-
|
282
|
+
complete_filename = File.join(@folder, filename)
|
283
|
+
mode = "rb"
|
284
|
+
|
285
|
+
unless FileTest.exist?(complete_filename)
|
286
|
+
complete_filename = complete_filename.sub(".gz", "")
|
287
|
+
mode = "r"
|
288
|
+
end
|
289
|
+
|
290
|
+
unless FileTest.exist?(complete_filename)
|
291
|
+
raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
|
292
|
+
end
|
293
|
+
|
294
|
+
File.open(complete_filename, mode) do |file|
|
295
|
+
return self.read_gha_file(file)
|
242
296
|
end
|
243
297
|
end
|
244
298
|
end
|
@@ -267,17 +321,17 @@ class GHADownloader
|
|
267
321
|
|
268
322
|
def download(from = Time.gm(2015, 1, 1), to = Time.now)
|
269
323
|
archive = []
|
270
|
-
self.
|
271
|
-
filename = self.get_gha_filename(
|
324
|
+
self.each_time(from, to) do |current_time|
|
325
|
+
filename = self.get_gha_filename(current_time)
|
272
326
|
out_filename = filename.clone
|
273
327
|
out_filename.gsub!(".json.gz", ".json") if @decompress
|
274
328
|
|
275
329
|
target_file = File.join(@folder, out_filename)
|
276
330
|
if FileTest.exist?(target_file)
|
277
|
-
@logger.info("Skipping existing file for #{
|
331
|
+
@logger.info("Skipping existing file for #{current_time}")
|
278
332
|
next
|
279
333
|
else
|
280
|
-
@logger.info("Downloading file for #{
|
334
|
+
@logger.info("Downloading file for #{current_time}")
|
281
335
|
end
|
282
336
|
|
283
337
|
File.open(target_file, 'w') do |f|
|
@@ -0,0 +1,312 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module GHArchive
|
4
|
+
Repository = Struct.new(:id, :name, :url)
|
5
|
+
CommitAuthor = Struct.new(:email, :name)
|
6
|
+
|
7
|
+
class Entity
|
8
|
+
def initialize(payload)
|
9
|
+
@payload = payload
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Commit < Entity
|
14
|
+
def sha
|
15
|
+
@payload['sha']
|
16
|
+
end
|
17
|
+
|
18
|
+
def author
|
19
|
+
CommitAuthor.new(
|
20
|
+
@payload['author']['email'],
|
21
|
+
@payload['author']['name']
|
22
|
+
)
|
23
|
+
end
|
24
|
+
|
25
|
+
def message
|
26
|
+
@payload['message']
|
27
|
+
end
|
28
|
+
|
29
|
+
def distinct
|
30
|
+
@payload['distinct']
|
31
|
+
end
|
32
|
+
|
33
|
+
def url
|
34
|
+
@payload['url']
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class User < Entity
|
39
|
+
def id
|
40
|
+
@payload['id']
|
41
|
+
end
|
42
|
+
|
43
|
+
def url
|
44
|
+
@payload['url']
|
45
|
+
end
|
46
|
+
|
47
|
+
def type
|
48
|
+
@payload['type']
|
49
|
+
end
|
50
|
+
|
51
|
+
def login
|
52
|
+
@payload['login']
|
53
|
+
end
|
54
|
+
|
55
|
+
def gravatar_id
|
56
|
+
@payload['gravatar_id']
|
57
|
+
end
|
58
|
+
|
59
|
+
def avatar_url
|
60
|
+
@payload['avatar_url']
|
61
|
+
end
|
62
|
+
|
63
|
+
def site_admin
|
64
|
+
@payload['site_admin']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class BasicIssue < Entity
|
69
|
+
def url
|
70
|
+
@payload['url']
|
71
|
+
end
|
72
|
+
|
73
|
+
def id
|
74
|
+
@payload['id']
|
75
|
+
end
|
76
|
+
|
77
|
+
def number
|
78
|
+
@payload['number']
|
79
|
+
end
|
80
|
+
|
81
|
+
def state
|
82
|
+
@payload['state']
|
83
|
+
end
|
84
|
+
|
85
|
+
def locked
|
86
|
+
@payload['locked']
|
87
|
+
end
|
88
|
+
|
89
|
+
def title
|
90
|
+
@payload['title']
|
91
|
+
end
|
92
|
+
|
93
|
+
def body
|
94
|
+
@payload['body']
|
95
|
+
end
|
96
|
+
|
97
|
+
def user
|
98
|
+
User.new(@payload['user']) rescue nil
|
99
|
+
end
|
100
|
+
|
101
|
+
def created_at
|
102
|
+
Time.parse(@payload['created_at'])
|
103
|
+
end
|
104
|
+
|
105
|
+
def updated_at
|
106
|
+
Time.parse(@payload['updated_at']) rescue nil
|
107
|
+
end
|
108
|
+
|
109
|
+
def closed_at
|
110
|
+
Time.parse(@payload['closed_at']) rescue nil
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
class PullRequest < BasicIssue
|
115
|
+
def merged_at
|
116
|
+
Time.parse(@payload['merged_at']) rescue nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def merge_commit_sha
|
120
|
+
@payload['merge_commit_sha']
|
121
|
+
end
|
122
|
+
|
123
|
+
def merged
|
124
|
+
@payload['merged']
|
125
|
+
end
|
126
|
+
|
127
|
+
def mergeable
|
128
|
+
@payload['mergeable']
|
129
|
+
end
|
130
|
+
|
131
|
+
def mergeable_state
|
132
|
+
@payload['mergeable_state']
|
133
|
+
end
|
134
|
+
|
135
|
+
def merged_by
|
136
|
+
@payload['merged_by']
|
137
|
+
end
|
138
|
+
|
139
|
+
def comments
|
140
|
+
@payload['comments']
|
141
|
+
end
|
142
|
+
|
143
|
+
def review_comments
|
144
|
+
@payload['review_comments']
|
145
|
+
end
|
146
|
+
|
147
|
+
def commits
|
148
|
+
@payload['commits']
|
149
|
+
end
|
150
|
+
|
151
|
+
def additions
|
152
|
+
@payload['additions']
|
153
|
+
end
|
154
|
+
|
155
|
+
def deletions
|
156
|
+
@payload['deletions']
|
157
|
+
end
|
158
|
+
|
159
|
+
def changed_files
|
160
|
+
@payload['changed_files']
|
161
|
+
end
|
162
|
+
|
163
|
+
def head
|
164
|
+
@payload['head']
|
165
|
+
end
|
166
|
+
|
167
|
+
def base
|
168
|
+
@payload['base']
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
class Issue < BasicIssue
|
173
|
+
def labels
|
174
|
+
@payload['labels']
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
class BasicComment < Entity
|
179
|
+
def url
|
180
|
+
@payload['url']
|
181
|
+
end
|
182
|
+
|
183
|
+
def id
|
184
|
+
@payload['id']
|
185
|
+
end
|
186
|
+
|
187
|
+
def user
|
188
|
+
User.new(@payload['user']) rescue nil
|
189
|
+
end
|
190
|
+
|
191
|
+
def created_at
|
192
|
+
Time.parse(@payload['created_at'])
|
193
|
+
end
|
194
|
+
|
195
|
+
def updated_at
|
196
|
+
Time.parse(@payload['updated_at']) rescue nil
|
197
|
+
end
|
198
|
+
|
199
|
+
def body
|
200
|
+
@payload['body']
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
class PullRequestComment < BasicComment
|
205
|
+
def diff_hunk
|
206
|
+
@payload['diff_hunk']
|
207
|
+
end
|
208
|
+
|
209
|
+
def path
|
210
|
+
@payload['path']
|
211
|
+
end
|
212
|
+
|
213
|
+
def position
|
214
|
+
@payload['position']
|
215
|
+
end
|
216
|
+
|
217
|
+
def original_position
|
218
|
+
@payload['original_position']
|
219
|
+
end
|
220
|
+
|
221
|
+
def commit_id
|
222
|
+
@payload['commit_id']
|
223
|
+
end
|
224
|
+
|
225
|
+
def original_commit_id
|
226
|
+
@payload['original_commit_id']
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
class IssueComment < BasicComment
|
231
|
+
end
|
232
|
+
|
233
|
+
class Release < Entity
|
234
|
+
def url
|
235
|
+
@payload['url']
|
236
|
+
end
|
237
|
+
|
238
|
+
def id
|
239
|
+
@payload['id']
|
240
|
+
end
|
241
|
+
|
242
|
+
def tag_name
|
243
|
+
@payload['tag_name']
|
244
|
+
end
|
245
|
+
|
246
|
+
def target_commitish
|
247
|
+
@payload['target_commitish']
|
248
|
+
end
|
249
|
+
|
250
|
+
def name
|
251
|
+
@payload['name']
|
252
|
+
end
|
253
|
+
|
254
|
+
def draft
|
255
|
+
@payload['draft']
|
256
|
+
end
|
257
|
+
|
258
|
+
def author
|
259
|
+
User.new(@payload['author'])
|
260
|
+
end
|
261
|
+
|
262
|
+
def prerelease
|
263
|
+
@payload['prerelease']
|
264
|
+
end
|
265
|
+
|
266
|
+
def created_at
|
267
|
+
Time.parse(@payload['created_at'])
|
268
|
+
end
|
269
|
+
|
270
|
+
def published_at
|
271
|
+
Time.parse(@payload['published_at'])
|
272
|
+
end
|
273
|
+
|
274
|
+
def assets
|
275
|
+
@payload['assets']
|
276
|
+
end
|
277
|
+
|
278
|
+
def tarball_url
|
279
|
+
@payload['tarball_url']
|
280
|
+
end
|
281
|
+
|
282
|
+
def zipball_url
|
283
|
+
@payload['zipball_url']
|
284
|
+
end
|
285
|
+
|
286
|
+
def body
|
287
|
+
@payload['body']
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class Page < Entity
|
292
|
+
def name
|
293
|
+
@payload['page_name']
|
294
|
+
end
|
295
|
+
|
296
|
+
def title
|
297
|
+
@payload['title']
|
298
|
+
end
|
299
|
+
|
300
|
+
def summary
|
301
|
+
@payload['summary']
|
302
|
+
end
|
303
|
+
|
304
|
+
def action
|
305
|
+
@payload['action']
|
306
|
+
end
|
307
|
+
|
308
|
+
def sha
|
309
|
+
@payload['sha']
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
@@ -0,0 +1,405 @@
|
|
1
|
+
require 'time'
|
2
|
+
require_relative File.expand_path('../entities', __FILE__)
|
3
|
+
|
4
|
+
module GHArchive
|
5
|
+
class Event
|
6
|
+
def self.parse(json)
|
7
|
+
IMPLEMENTATIONS.each do |event_class|
|
8
|
+
return event_class.new(json) if event_class.fits?(json)
|
9
|
+
end
|
10
|
+
|
11
|
+
return Event.new(json)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(json)
|
15
|
+
@json = json.freeze
|
16
|
+
@payload = json['payload']
|
17
|
+
end
|
18
|
+
|
19
|
+
def public?
|
20
|
+
@json['public']
|
21
|
+
end
|
22
|
+
|
23
|
+
def created_at
|
24
|
+
Time.parse(@json['created_at'])
|
25
|
+
end
|
26
|
+
alias :time :created_at
|
27
|
+
|
28
|
+
def actor
|
29
|
+
User.new(@json['actor'])
|
30
|
+
end
|
31
|
+
|
32
|
+
def repo
|
33
|
+
Repository.new(
|
34
|
+
@json['repo']['id'],
|
35
|
+
@json['repo']['name'],
|
36
|
+
@json['repo']['url']
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def json
|
41
|
+
@json
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class PushEvent < Event
|
46
|
+
def self.fits?(json)
|
47
|
+
json['type'] == "PushEvent"
|
48
|
+
end
|
49
|
+
|
50
|
+
def push_id
|
51
|
+
@payload['push_id']
|
52
|
+
end
|
53
|
+
|
54
|
+
def size
|
55
|
+
@payload['size']
|
56
|
+
end
|
57
|
+
|
58
|
+
def distinct_size
|
59
|
+
@payload['distinct_size']
|
60
|
+
end
|
61
|
+
|
62
|
+
def head
|
63
|
+
@payload['head']
|
64
|
+
end
|
65
|
+
|
66
|
+
def before
|
67
|
+
@payload['before']
|
68
|
+
end
|
69
|
+
|
70
|
+
def commits
|
71
|
+
@payload['commits'].map { |c| Commit.new(c) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class CommitCommentEvent < Event
|
76
|
+
def self.fits?(json)
|
77
|
+
return json['type'] == "CommitCommentEvent"
|
78
|
+
end
|
79
|
+
|
80
|
+
def comment_id
|
81
|
+
@payload['comment']['id']
|
82
|
+
end
|
83
|
+
|
84
|
+
def comment_url
|
85
|
+
@payload['comment']['url']
|
86
|
+
end
|
87
|
+
|
88
|
+
def comment_user
|
89
|
+
User.new(@payload['comment']['author'])
|
90
|
+
end
|
91
|
+
|
92
|
+
def comment_position
|
93
|
+
@payload['comment']['position']
|
94
|
+
end
|
95
|
+
|
96
|
+
def comment_line
|
97
|
+
@payload['comment']['line']
|
98
|
+
end
|
99
|
+
|
100
|
+
def comment_path
|
101
|
+
@payload['comment']['path']
|
102
|
+
end
|
103
|
+
|
104
|
+
def comment_commit_id
|
105
|
+
@payload['comment']['commit_id']
|
106
|
+
end
|
107
|
+
|
108
|
+
def comment_body
|
109
|
+
@payload['comment']['body']
|
110
|
+
end
|
111
|
+
|
112
|
+
def comment_created_at
|
113
|
+
Time.parse(@payload['comment']['created_at'])
|
114
|
+
end
|
115
|
+
|
116
|
+
def comment_updated_at
|
117
|
+
Time.parse(@payload['comment']['updated_at'])
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class PullRequestEvent < Event
|
122
|
+
def self.fits?(json)
|
123
|
+
return json['type'] == "PullRequestEvent"
|
124
|
+
end
|
125
|
+
|
126
|
+
def action
|
127
|
+
@payload['action']
|
128
|
+
end
|
129
|
+
|
130
|
+
def number
|
131
|
+
@payload['number']
|
132
|
+
end
|
133
|
+
|
134
|
+
def pull_request
|
135
|
+
PullRequest.new(@payload['pull_request'])
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
class PullRequestReviewCommentEvent < Event
|
140
|
+
def self.fits?(json)
|
141
|
+
return json['type'] == "PullRequestReviewCommentEvent"
|
142
|
+
end
|
143
|
+
|
144
|
+
def action
|
145
|
+
@payload['action']
|
146
|
+
end
|
147
|
+
|
148
|
+
def number
|
149
|
+
@payload['number']
|
150
|
+
end
|
151
|
+
|
152
|
+
def pull_request
|
153
|
+
PullRequest.new(@payload['pull_request'])
|
154
|
+
end
|
155
|
+
|
156
|
+
def comment
|
157
|
+
PullRequestComment.new(@payload['comment'])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class IssuesEvent < Event
|
162
|
+
def self.fits?(json)
|
163
|
+
return json['type'] == "IssuesEvent"
|
164
|
+
end
|
165
|
+
|
166
|
+
def action
|
167
|
+
@payload['action']
|
168
|
+
end
|
169
|
+
|
170
|
+
def issue
|
171
|
+
Issue.new(@payload['issue'])
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class IssueCommentEvent < Event
|
176
|
+
def self.fits?(json)
|
177
|
+
return json['type'] == "IssueCommentEvent"
|
178
|
+
end
|
179
|
+
|
180
|
+
def action
|
181
|
+
@payload['action']
|
182
|
+
end
|
183
|
+
|
184
|
+
def issue
|
185
|
+
Issue.new(@payload['issue'])
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
class CreateEvent < Event
|
190
|
+
def self.fits?(json)
|
191
|
+
return json['type'] == "CreateEvent"
|
192
|
+
end
|
193
|
+
|
194
|
+
def ref
|
195
|
+
@payload['ref']
|
196
|
+
end
|
197
|
+
|
198
|
+
def ref_type
|
199
|
+
@payload['ref_type']
|
200
|
+
end
|
201
|
+
|
202
|
+
def master_branch
|
203
|
+
@payload['master_branch']
|
204
|
+
end
|
205
|
+
|
206
|
+
def description
|
207
|
+
@payload['description']
|
208
|
+
end
|
209
|
+
|
210
|
+
def pusher_type
|
211
|
+
@payload['pusher_type']
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class ForkEvent < Event
|
216
|
+
def self.fits?(json)
|
217
|
+
return json['type'] == "ForkEvent"
|
218
|
+
end
|
219
|
+
|
220
|
+
def forkee_id
|
221
|
+
@payload['forkee']['id']
|
222
|
+
end
|
223
|
+
|
224
|
+
def forkee_name
|
225
|
+
@payload['forkee']['name']
|
226
|
+
end
|
227
|
+
|
228
|
+
def forkee_full_name
|
229
|
+
@payload['forkee']['full_name']
|
230
|
+
end
|
231
|
+
|
232
|
+
def forkee_owner
|
233
|
+
User.new(@payload['forkee']['owner'])
|
234
|
+
end
|
235
|
+
|
236
|
+
def forkee_private
|
237
|
+
@payload['forkee']['private']
|
238
|
+
end
|
239
|
+
|
240
|
+
def forkee_description
|
241
|
+
@payload['forkee']['description']
|
242
|
+
end
|
243
|
+
|
244
|
+
def forkee_fork
|
245
|
+
@payload['forkee']['fork']
|
246
|
+
end
|
247
|
+
|
248
|
+
def forkee_created_at
|
249
|
+
Time.parse(@payload['forkee']['created_at'])
|
250
|
+
end
|
251
|
+
|
252
|
+
def forkee_updated_at
|
253
|
+
Time.parse(@payload['forkee']['updated_at'])
|
254
|
+
end
|
255
|
+
|
256
|
+
def forkee_pushed_at
|
257
|
+
Time.parse(@payload['forkee']['pushed_at'])
|
258
|
+
end
|
259
|
+
|
260
|
+
def forkee_urls
|
261
|
+
{
|
262
|
+
'git' => @payload['forkee']['git_url'],
|
263
|
+
'ssh' => @payload['forkee']['ssh_url'],
|
264
|
+
'clone' => @payload['forkee']['clone_url'],
|
265
|
+
'svn' => @payload['forkee']['svn_url']
|
266
|
+
}
|
267
|
+
end
|
268
|
+
|
269
|
+
def forkee_homepage
|
270
|
+
Time.parse(@payload['forkee']['homepage'])
|
271
|
+
end
|
272
|
+
|
273
|
+
def forkee_size
|
274
|
+
Time.parse(@payload['forkee']['size'])
|
275
|
+
end
|
276
|
+
|
277
|
+
def forkee_stargazers_count
|
278
|
+
Time.parse(@payload['forkee']['stargazers_count'])
|
279
|
+
end
|
280
|
+
|
281
|
+
def forkee_watchers_count
|
282
|
+
Time.parse(@payload['forkee']['watchers_count'])
|
283
|
+
end
|
284
|
+
|
285
|
+
def forkee_language
|
286
|
+
Time.parse(@payload['forkee']['language'])
|
287
|
+
end
|
288
|
+
|
289
|
+
def forkee_has_issues
|
290
|
+
Time.parse(@payload['forkee']['has_issues'])
|
291
|
+
end
|
292
|
+
|
293
|
+
def forkee_has_downloads
|
294
|
+
Time.parse(@payload['forkee']['has_downloads'])
|
295
|
+
end
|
296
|
+
|
297
|
+
def forkee_has_wiki
|
298
|
+
Time.parse(@payload['forkee']['has_wiki'])
|
299
|
+
end
|
300
|
+
|
301
|
+
def forkee_has_pages
|
302
|
+
Time.parse(@payload['forkee']['has_pages'])
|
303
|
+
end
|
304
|
+
|
305
|
+
def forkee_forks_count
|
306
|
+
Time.parse(@payload['forkee']['forks_count'])
|
307
|
+
end
|
308
|
+
|
309
|
+
def forkee_mirror_url
|
310
|
+
Time.parse(@payload['forkee']['mirror_url'])
|
311
|
+
end
|
312
|
+
|
313
|
+
def forkee_open_issues_count
|
314
|
+
Time.parse(@payload['forkee']['open_issues_count'])
|
315
|
+
end
|
316
|
+
|
317
|
+
def forkee_watchers
|
318
|
+
Time.parse(@payload['forkee']['watchers'])
|
319
|
+
end
|
320
|
+
|
321
|
+
def forkee_default_branch
|
322
|
+
Time.parse(@payload['forkee']['default_branch'])
|
323
|
+
end
|
324
|
+
|
325
|
+
def forkee_public
|
326
|
+
Time.parse(@payload['forkee']['public'])
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
class PublicEvent < Event
|
331
|
+
def self.fits?(json)
|
332
|
+
return json['type'] == "PublicEvent"
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
class WatchEvent < Event
|
337
|
+
def self.fits?(json)
|
338
|
+
return json['type'] == "WatchEvent"
|
339
|
+
end
|
340
|
+
|
341
|
+
def action
|
342
|
+
@payload['action']
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
class DeleteEvent < Event
|
347
|
+
def self.fits?(json)
|
348
|
+
return json['type'] == "DeleteEvent"
|
349
|
+
end
|
350
|
+
|
351
|
+
def ref
|
352
|
+
@payload['ref']
|
353
|
+
end
|
354
|
+
|
355
|
+
def ref_type
|
356
|
+
@payload['ref_type']
|
357
|
+
end
|
358
|
+
|
359
|
+
def pusher_type
|
360
|
+
@payload['pusher_type']
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
class ReleaseEvent < Event
|
365
|
+
def self.fits?(json)
|
366
|
+
return json['type'] == "ReleaseEvent"
|
367
|
+
end
|
368
|
+
|
369
|
+
def action
|
370
|
+
@payload['action']
|
371
|
+
end
|
372
|
+
|
373
|
+
def release
|
374
|
+
Release.new(@payload['release'])
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
class MemberEvent < Event
|
379
|
+
def self.fits?(json)
|
380
|
+
return json['type'] == "MemberEvent"
|
381
|
+
end
|
382
|
+
|
383
|
+
def action
|
384
|
+
@payload['action']
|
385
|
+
end
|
386
|
+
|
387
|
+
def member
|
388
|
+
User.new(@payload['member'])
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
class GollumEvent < Event
|
393
|
+
def self.fits?(json)
|
394
|
+
return json['type'] == "GollumEvent"
|
395
|
+
end
|
396
|
+
|
397
|
+
def pages
|
398
|
+
@payload[pages].map { |p| Page.new(p) }
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Event
|
403
|
+
IMPLEMENTATIONS = ObjectSpace.each_object(Class).select { |klass| klass < self }
|
404
|
+
end
|
405
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gh-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -30,6 +30,26 @@ dependencies:
|
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 1.1.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: thread
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.2
|
33
53
|
description: Download and analyze the GitHub events stored at GitHub archive
|
34
54
|
email: s.scalabrino9@gmail.com
|
35
55
|
executables: []
|
@@ -37,11 +57,13 @@ extensions: []
|
|
37
57
|
extra_rdoc_files: []
|
38
58
|
files:
|
39
59
|
- lib/gh-archive.rb
|
60
|
+
- lib/gh-archive/entities.rb
|
61
|
+
- lib/gh-archive/events.rb
|
40
62
|
homepage: https://github.com/intersimone999/gh-archive
|
41
63
|
licenses:
|
42
64
|
- GPL-3.0-only
|
43
65
|
metadata: {}
|
44
|
-
post_install_message:
|
66
|
+
post_install_message:
|
45
67
|
rdoc_options: []
|
46
68
|
require_paths:
|
47
69
|
- lib
|
@@ -56,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
78
|
- !ruby/object:Gem::Version
|
57
79
|
version: '0'
|
58
80
|
requirements: []
|
59
|
-
rubygems_version: 3.2.
|
60
|
-
signing_key:
|
81
|
+
rubygems_version: 3.2.22
|
82
|
+
signing_key:
|
61
83
|
specification_version: 4
|
62
84
|
summary: GitHub Archive mining utility
|
63
85
|
test_files: []
|