gh-archive 0.4 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a7b3d8b97242ec787b56b409e4a81823473adc2b2915d5466e38a29015564a9
4
- data.tar.gz: 6c50ded9b466fedb0ea1ca22dfcf8bdf9ec65a83b4666a429122a7345d0ebdea
3
+ metadata.gz: a4a9b89fb02620499e8b51f5c5e4ba34d0f3bc8b8f3ae4e2e69cba1e027bdb49
4
+ data.tar.gz: 6b707eb1bcb37b8a9b03ce36a2d4304e6760def0cc139ffad748a1596046f82f
5
5
  SHA512:
6
- metadata.gz: 79bdb3c1649ff4fc86d711ce309e11055eb68720b8bf1d87f7e31b1a3a1586949cdd8d623630d446f7ec04288a2b0db7c598ca8bcf90c8a0c6c36edf6884805b
7
- data.tar.gz: b790981c3d3becd6e46cece1e4bc5fa4cc6b7a25ac5c0bdc9fbbf6ab3ea6cb188f0d5abf48036426fec82e2fd1543ba85928f32df3c6915d56d2300eb35a43ce
6
+ metadata.gz: '0975e354e028e768fb5bc4c17c19cddbc44b706394f60d8e22a64537a34342965bb98fa00365c329c30d189729dadba271ec6b101ff86593affef7e1d34848b3'
7
+ data.tar.gz: 95935fb3c27841760a68696c832e02108940f9eef535f5ed89afea9613a7988c622f2649f3feb77b22706250a30cfba30b5f36f2ada617f532e86a0426e052ce
data/lib/gh-archive.rb CHANGED
@@ -4,6 +4,10 @@ require 'open-uri'
4
4
  require 'zlib'
5
5
  require 'logger'
6
6
  require 'tmpdir'
7
+ require 'thread/pool'
8
+ require 'thread/promise'
9
+
10
+ require_relative File.expand_path('../gh-archive/events', __FILE__)
7
11
 
8
12
  module GHAUtils
9
13
  def get_gha_filename(date)
@@ -12,14 +16,19 @@ module GHAUtils
12
16
 
13
17
  def read_gha_file_content(gz)
14
18
  gzip = Zlib::GzipReader.new(gz)
15
- content = gzip.read
16
- gzip.close
17
-
18
- return content
19
+ return gzip.read
20
+ ensure
21
+ gzip.close if gzip
19
22
  end
20
23
 
21
- def read_gha_file(gz)
22
- content = read_gha_file_content(gz)
24
+ def read_gha_file(file)
25
+ if file.path.end_with?(".json")
26
+ content = file.read
27
+ elsif file.path.end_with?(".gz") || file.path.start_with?("/tmp/open-uri")
28
+ content = read_gha_file_content(file)
29
+ else
30
+ raise "Invalid file extension for #{file.path}: expected `.json.gz` or `json`,"
31
+ end
23
32
 
24
33
  result = []
25
34
  content.lines.each do |line|
@@ -29,11 +38,11 @@ module GHAUtils
29
38
  return result
30
39
  end
31
40
 
32
- def each_date(from, to)
33
- current_date = from
34
- while current_date < to
35
- yield current_date
36
- current_date += 3600
41
+ def each_time(from, to)
42
+ current_time = from
43
+ while current_time < to
44
+ yield current_time
45
+ current_time += 3600
37
46
  end
38
47
  end
39
48
  end
@@ -46,6 +55,14 @@ class GHAProvider
46
55
 
47
56
  @includes = {}
48
57
  @excludes = {}
58
+
59
+ @use_json = true
60
+ end
61
+
62
+ def parse_events
63
+ @use_json = false
64
+
65
+ return self
49
66
  end
50
67
 
51
68
  def logger=(logger)
@@ -61,6 +78,8 @@ class GHAProvider
61
78
  @includes[key.to_s] = [] unless @includes[key.to_s]
62
79
  @includes[key.to_s] << value
63
80
  end
81
+
82
+ return self
64
83
  end
65
84
 
66
85
  def exclude(**args)
@@ -68,16 +87,23 @@ class GHAProvider
68
87
  @excludes[key.to_s] = [] unless @excludes[key.to_s]
69
88
  @excludes[key.to_s] << value
70
89
  end
90
+
91
+ return self
71
92
  end
72
93
 
73
94
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
74
- self.each_date(from, to) do |current_date|
95
+ exceptions = []
96
+
97
+ self.each_time(from, to) do |current_time|
75
98
  events = []
76
99
  begin
77
- events = self.get(current_date)
78
- @logger.info("Scanned #{current_date}")
79
- rescue
80
- @logger.error($!)
100
+ events = self.get(current_time)
101
+ rescue GHAException => e
102
+ @logger.warn(e.message)
103
+ next
104
+ rescue => e
105
+ @logger.error("An exception occurred for #{current_time}: #{e.message}")
106
+ exceptions << e
81
107
  next
82
108
  end
83
109
 
@@ -92,12 +118,23 @@ class GHAProvider
92
118
  end
93
119
  next if skip
94
120
 
95
- yield event, current_date
121
+ if @use_json
122
+ yield event, current_time
123
+ else
124
+ yield GHArchive::Event.parse(event), current_time
125
+ end
96
126
  end
97
127
 
128
+ @logger.info("Scanned #{current_time}")
129
+
98
130
  events.clear
99
131
  GC.start
100
132
  end
133
+
134
+ return exceptions
135
+ end
136
+
137
+ class GHAException < Exception
101
138
  end
102
139
  end
103
140
 
@@ -108,6 +145,7 @@ class OnlineGHAProvider < GHAProvider
108
145
  @max_retries = max_retries
109
146
  @proactive = proactive
110
147
  @proactive_pool_size = proactive_pool_size
148
+ @pool = Thread.pool(proactive_pool_size)
111
149
  @cache = Cache.new
112
150
  end
113
151
 
@@ -116,76 +154,92 @@ class OnlineGHAProvider < GHAProvider
116
154
  begin
117
155
  filename = self.get_gha_filename(current_time)
118
156
 
119
- if @cache.has?(filename)
120
- result = @cache.get(filename)
157
+ if @proactive
158
+ @logger.info("Waiting for cache to have #{current_time}...") unless @cache.has?(filename)
159
+
160
+ while !@cache.has?(filename)
161
+ sleep 1
162
+ end
163
+
164
+ return @cache.get(filename)
121
165
  else
122
166
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
123
- # Save to cache
124
167
  return self.read_gha_file(gz)
125
168
  end
126
169
  end
127
- rescue
128
- @logger.warn($!)
170
+ rescue Errno::ECONNRESET => e
171
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
172
+ next
173
+ rescue OpenURI::HTTPError => e
174
+ code = e.io.status[0]
175
+ if code.start_with?("5")
176
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
177
+ next
178
+ else
179
+ raise e
180
+ end
129
181
  end
130
182
  end
131
183
 
132
- raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
184
+ raise DownloadArchiveException, "Exceeded maximum number of tentative downloads for #{current_time}."
133
185
  end
134
186
 
135
187
  def cache(current_time)
188
+ @logger.info("Full cache. Waiting for some free slot...") if @cache.full?
189
+ while @cache.full?
190
+ sleep 1
191
+ end
136
192
  @max_retries.times do
137
193
  begin
138
194
  filename = self.get_gha_filename(current_time)
139
-
140
195
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
141
196
  content = self.read_gha_file(gz)
142
197
  @cache.put(filename, content)
143
198
  return
144
199
  end
145
- rescue
146
- p $!
200
+ rescue Errno::ECONNRESET => e
201
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
202
+ next
203
+ rescue OpenURI::HTTPError => e
204
+ code = e.io.status[0]
205
+ if code.start_with?("5")
206
+ @logger.warn("A server error temporary prevented the download of #{current_time}: " + e.message)
207
+ next
208
+ else
209
+ raise e
210
+ end
147
211
  end
148
212
  end
149
213
  end
150
214
 
151
215
  def each(from = Time.gm(2015, 1, 1), to = Time.now)
152
216
  if @proactive
153
- @logger.info("Proactive download thread started")
154
- Thread.start do
155
- pool = []
156
- self.each_date(from, to) do |current_date|
157
- while pool.size > @proactive_pool_size || @cache.full?
158
- pool.delete_if { |t| !t.alive? }
159
- sleep 0.1
160
- end
161
-
162
- pool << Thread.start do
163
- self.cache(current_date)
164
- @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
165
- end
166
-
167
- pool.delete_if { |t| !t.alive? }
217
+ any_ready = Thread.promise
218
+
219
+ @logger.info("Proactively scheduling download tasks...")
220
+ self.each_time(from, to) do |current_time|
221
+ @pool.process(current_time) do |current_time|
222
+ cache(current_time)
223
+ any_ready << true
224
+ @logger.info("Proactively cached #{current_time}. Cache size: #{@cache.size}")
168
225
  end
169
226
  end
227
+
228
+ ~any_ready
229
+ @logger.info("Download tasks successfully scheduled!")
170
230
  end
171
231
 
172
232
  super
173
233
  end
174
234
 
175
235
  class Cache
176
- def initialize(folder = Dir.mktmpdir, max_size = 100)
236
+ def initialize(max_size = 10)
177
237
  @cache = {}
178
238
  @max_size = max_size
179
- @folder = folder
180
239
  @mutex = Mutex.new
181
240
  end
182
241
 
183
242
  def put(name, content)
184
- #filename = "#@folder/#{name}"
185
- #File.open(filename, 'w') do |f|
186
- #f << content
187
- #end
188
-
189
243
  @mutex.synchronize do
190
244
  @cache[name] = content
191
245
  end
@@ -195,18 +249,6 @@ class OnlineGHAProvider < GHAProvider
195
249
  @mutex.synchronize do
196
250
  return @cache.delete(name)
197
251
  end
198
- ensure
199
- #self.unload(name)
200
- end
201
-
202
- def unload(name)
203
- File.unlink(@cache[name])
204
-
205
- @mutex.synchronize do
206
- @cache.delete(name)
207
- end
208
-
209
- return true
210
252
  end
211
253
 
212
254
  def size
@@ -224,7 +266,7 @@ class OnlineGHAProvider < GHAProvider
224
266
  end
225
267
  end
226
268
 
227
- class DownloadArchiveException < Exception
269
+ class DownloadArchiveException < GHAProvider::GHAException
228
270
  end
229
271
  end
230
272
 
@@ -237,8 +279,20 @@ class FolderGHAProvider < GHAProvider
237
279
 
238
280
  def get(current_time)
239
281
  filename = self.get_gha_filename(current_time)
240
- File.open(File.join(@folder, filename), "rb") do |gz|
241
- return self.read_gha_file(gz)
282
+ complete_filename = File.join(@folder, filename)
283
+ mode = "rb"
284
+
285
+ unless FileTest.exist?(complete_filename)
286
+ complete_filename = complete_filename.sub(".gz", "")
287
+ mode = "r"
288
+ end
289
+
290
+ unless FileTest.exist?(complete_filename)
291
+ raise GHAException.new("Cannot find any file (neither `.json.gz` nor `.json`) for #{current_time}")
292
+ end
293
+
294
+ File.open(complete_filename, mode) do |file|
295
+ return self.read_gha_file(file)
242
296
  end
243
297
  end
244
298
  end
@@ -267,17 +321,17 @@ class GHADownloader
267
321
 
268
322
  def download(from = Time.gm(2015, 1, 1), to = Time.now)
269
323
  archive = []
270
- self.each_date(from, to) do |current_date|
271
- filename = self.get_gha_filename(current_date)
324
+ self.each_time(from, to) do |current_time|
325
+ filename = self.get_gha_filename(current_time)
272
326
  out_filename = filename.clone
273
327
  out_filename.gsub!(".json.gz", ".json") if @decompress
274
328
 
275
329
  target_file = File.join(@folder, out_filename)
276
330
  if FileTest.exist?(target_file)
277
- @logger.info("Skipping existing file for #{current_date}")
331
+ @logger.info("Skipping existing file for #{current_time}")
278
332
  next
279
333
  else
280
- @logger.info("Downloading file for #{current_date}")
334
+ @logger.info("Downloading file for #{current_time}")
281
335
  end
282
336
 
283
337
  File.open(target_file, 'w') do |f|
@@ -0,0 +1,312 @@
1
+ require 'time'
2
+
3
+ module GHArchive
4
+ Repository = Struct.new(:id, :name, :url)
5
+ CommitAuthor = Struct.new(:email, :name)
6
+
7
+ class Entity
8
+ def initialize(payload)
9
+ @payload = payload
10
+ end
11
+ end
12
+
13
+ class Commit < Entity
14
+ def sha
15
+ @payload['sha']
16
+ end
17
+
18
+ def author
19
+ CommitAuthor.new(
20
+ @payload['author']['email'],
21
+ @payload['author']['name']
22
+ )
23
+ end
24
+
25
+ def message
26
+ @payload['message']
27
+ end
28
+
29
+ def distinct
30
+ @payload['distinct']
31
+ end
32
+
33
+ def url
34
+ @payload['url']
35
+ end
36
+ end
37
+
38
+ class User < Entity
39
+ def id
40
+ @payload['id']
41
+ end
42
+
43
+ def url
44
+ @payload['url']
45
+ end
46
+
47
+ def type
48
+ @payload['type']
49
+ end
50
+
51
+ def login
52
+ @payload['login']
53
+ end
54
+
55
+ def gravatar_id
56
+ @payload['gravatar_id']
57
+ end
58
+
59
+ def avatar_url
60
+ @payload['avatar_url']
61
+ end
62
+
63
+ def site_admin
64
+ @payload['site_admin']
65
+ end
66
+ end
67
+
68
+ class BasicIssue < Entity
69
+ def url
70
+ @payload['url']
71
+ end
72
+
73
+ def id
74
+ @payload['id']
75
+ end
76
+
77
+ def number
78
+ @payload['number']
79
+ end
80
+
81
+ def state
82
+ @payload['state']
83
+ end
84
+
85
+ def locked
86
+ @payload['locked']
87
+ end
88
+
89
+ def title
90
+ @payload['title']
91
+ end
92
+
93
+ def body
94
+ @payload['body']
95
+ end
96
+
97
+ def user
98
+ User.new(@payload['user']) rescue nil
99
+ end
100
+
101
+ def created_at
102
+ Time.parse(@payload['created_at'])
103
+ end
104
+
105
+ def updated_at
106
+ Time.parse(@payload['updated_at']) rescue nil
107
+ end
108
+
109
+ def closed_at
110
+ Time.parse(@payload['closed_at']) rescue nil
111
+ end
112
+ end
113
+
114
+ class PullRequest < BasicIssue
115
+ def merged_at
116
+ Time.parse(@payload['merged_at']) rescue nil
117
+ end
118
+
119
+ def merge_commit_sha
120
+ @payload['merge_commit_sha']
121
+ end
122
+
123
+ def merged
124
+ @payload['merged']
125
+ end
126
+
127
+ def mergeable
128
+ @payload['mergeable']
129
+ end
130
+
131
+ def mergeable_state
132
+ @payload['mergeable_state']
133
+ end
134
+
135
+ def merged_by
136
+ @payload['merged_by']
137
+ end
138
+
139
+ def comments
140
+ @payload['comments']
141
+ end
142
+
143
+ def review_comments
144
+ @payload['review_comments']
145
+ end
146
+
147
+ def commits
148
+ @payload['commits']
149
+ end
150
+
151
+ def additions
152
+ @payload['additions']
153
+ end
154
+
155
+ def deletions
156
+ @payload['deletions']
157
+ end
158
+
159
+ def changed_files
160
+ @payload['changed_files']
161
+ end
162
+
163
+ def head
164
+ @payload['head']
165
+ end
166
+
167
+ def base
168
+ @payload['base']
169
+ end
170
+ end
171
+
172
+ class Issue < BasicIssue
173
+ def labels
174
+ @payload['labels']
175
+ end
176
+ end
177
+
178
+ class BasicComment < Entity
179
+ def url
180
+ @payload['url']
181
+ end
182
+
183
+ def id
184
+ @payload['id']
185
+ end
186
+
187
+ def user
188
+ User.new(@payload['user']) rescue nil
189
+ end
190
+
191
+ def created_at
192
+ Time.parse(@payload['created_at'])
193
+ end
194
+
195
+ def updated_at
196
+ Time.parse(@payload['updated_at']) rescue nil
197
+ end
198
+
199
+ def body
200
+ @payload['body']
201
+ end
202
+ end
203
+
204
+ class PullRequestComment < BasicComment
205
+ def diff_hunk
206
+ @payload['diff_hunk']
207
+ end
208
+
209
+ def path
210
+ @payload['path']
211
+ end
212
+
213
+ def position
214
+ @payload['position']
215
+ end
216
+
217
+ def original_position
218
+ @payload['original_position']
219
+ end
220
+
221
+ def commit_id
222
+ @payload['commit_id']
223
+ end
224
+
225
+ def original_commit_id
226
+ @payload['original_commit_id']
227
+ end
228
+ end
229
+
230
+ class IssueComment < BasicComment
231
+ end
232
+
233
+ class Release < Entity
234
+ def url
235
+ @payload['url']
236
+ end
237
+
238
+ def id
239
+ @payload['id']
240
+ end
241
+
242
+ def tag_name
243
+ @payload['tag_name']
244
+ end
245
+
246
+ def target_commitish
247
+ @payload['target_commitish']
248
+ end
249
+
250
+ def name
251
+ @payload['name']
252
+ end
253
+
254
+ def draft
255
+ @payload['draft']
256
+ end
257
+
258
+ def author
259
+ User.new(@payload['author'])
260
+ end
261
+
262
+ def prerelease
263
+ @payload['prerelease']
264
+ end
265
+
266
+ def created_at
267
+ Time.parse(@payload['created_at'])
268
+ end
269
+
270
+ def published_at
271
+ Time.parse(@payload['published_at'])
272
+ end
273
+
274
+ def assets
275
+ @payload['assets']
276
+ end
277
+
278
+ def tarball_url
279
+ @payload['tarball_url']
280
+ end
281
+
282
+ def zipball_url
283
+ @payload['zipball_url']
284
+ end
285
+
286
+ def body
287
+ @payload['body']
288
+ end
289
+ end
290
+
291
+ class Page < Entity
292
+ def name
293
+ @payload['page_name']
294
+ end
295
+
296
+ def title
297
+ @payload['title']
298
+ end
299
+
300
+ def summary
301
+ @payload['summary']
302
+ end
303
+
304
+ def action
305
+ @payload['action']
306
+ end
307
+
308
+ def sha
309
+ @payload['sha']
310
+ end
311
+ end
312
+ end
@@ -0,0 +1,405 @@
1
+ require 'time'
2
+ require_relative File.expand_path('../entities', __FILE__)
3
+
4
+ module GHArchive
5
+ class Event
6
+ def self.parse(json)
7
+ IMPLEMENTATIONS.each do |event_class|
8
+ return event_class.new(json) if event_class.fits?(json)
9
+ end
10
+
11
+ return Event.new(json)
12
+ end
13
+
14
+ def initialize(json)
15
+ @json = json.freeze
16
+ @payload = json['payload']
17
+ end
18
+
19
+ def public?
20
+ @json['public']
21
+ end
22
+
23
+ def created_at
24
+ Time.parse(@json['created_at'])
25
+ end
26
+ alias :time :created_at
27
+
28
+ def actor
29
+ User.new(@json['actor'])
30
+ end
31
+
32
+ def repo
33
+ Repository.new(
34
+ @json['repo']['id'],
35
+ @json['repo']['name'],
36
+ @json['repo']['url']
37
+ )
38
+ end
39
+
40
+ def json
41
+ @json
42
+ end
43
+ end
44
+
45
+ class PushEvent < Event
46
+ def self.fits?(json)
47
+ json['type'] == "PushEvent"
48
+ end
49
+
50
+ def push_id
51
+ @payload['push_id']
52
+ end
53
+
54
+ def size
55
+ @payload['size']
56
+ end
57
+
58
+ def distinct_size
59
+ @payload['distinct_size']
60
+ end
61
+
62
+ def head
63
+ @payload['head']
64
+ end
65
+
66
+ def before
67
+ @payload['before']
68
+ end
69
+
70
+ def commits
71
+ @payload['commits'].map { |c| Commit.new(c) }
72
+ end
73
+ end
74
+
75
+ class CommitCommentEvent < Event
76
+ def self.fits?(json)
77
+ return json['type'] == "CommitCommentEvent"
78
+ end
79
+
80
+ def comment_id
81
+ @payload['comment']['id']
82
+ end
83
+
84
+ def comment_url
85
+ @payload['comment']['url']
86
+ end
87
+
88
+ def comment_user
89
+ User.new(@payload['comment']['author'])
90
+ end
91
+
92
+ def comment_position
93
+ @payload['comment']['position']
94
+ end
95
+
96
+ def comment_line
97
+ @payload['comment']['line']
98
+ end
99
+
100
+ def comment_path
101
+ @payload['comment']['path']
102
+ end
103
+
104
+ def comment_commit_id
105
+ @payload['comment']['commit_id']
106
+ end
107
+
108
+ def comment_body
109
+ @payload['comment']['body']
110
+ end
111
+
112
+ def comment_created_at
113
+ Time.parse(@payload['comment']['created_at'])
114
+ end
115
+
116
+ def comment_updated_at
117
+ Time.parse(@payload['comment']['updated_at'])
118
+ end
119
+ end
120
+
121
+ class PullRequestEvent < Event
122
+ def self.fits?(json)
123
+ return json['type'] == "PullRequestEvent"
124
+ end
125
+
126
+ def action
127
+ @payload['action']
128
+ end
129
+
130
+ def number
131
+ @payload['number']
132
+ end
133
+
134
+ def pull_request
135
+ PullRequest.new(@payload['pull_request'])
136
+ end
137
+ end
138
+
139
+ class PullRequestReviewCommentEvent < Event
140
+ def self.fits?(json)
141
+ return json['type'] == "PullRequestReviewCommentEvent"
142
+ end
143
+
144
+ def action
145
+ @payload['action']
146
+ end
147
+
148
+ def number
149
+ @payload['number']
150
+ end
151
+
152
+ def pull_request
153
+ PullRequest.new(@payload['pull_request'])
154
+ end
155
+
156
+ def comment
157
+ PullRequestComment.new(@payload['comment'])
158
+ end
159
+ end
160
+
161
+ class IssuesEvent < Event
162
+ def self.fits?(json)
163
+ return json['type'] == "IssuesEvent"
164
+ end
165
+
166
+ def action
167
+ @payload['action']
168
+ end
169
+
170
+ def issue
171
+ Issue.new(@payload['issue'])
172
+ end
173
+ end
174
+
175
+ class IssueCommentEvent < Event
176
+ def self.fits?(json)
177
+ return json['type'] == "IssueCommentEvent"
178
+ end
179
+
180
+ def action
181
+ @payload['action']
182
+ end
183
+
184
+ def issue
185
+ Issue.new(@payload['issue'])
186
+ end
187
+ end
188
+
189
+ class CreateEvent < Event
190
+ def self.fits?(json)
191
+ return json['type'] == "CreateEvent"
192
+ end
193
+
194
+ def ref
195
+ @payload['ref']
196
+ end
197
+
198
+ def ref_type
199
+ @payload['ref_type']
200
+ end
201
+
202
+ def master_branch
203
+ @payload['master_branch']
204
+ end
205
+
206
+ def description
207
+ @payload['description']
208
+ end
209
+
210
+ def pusher_type
211
+ @payload['pusher_type']
212
+ end
213
+ end
214
+
215
+ class ForkEvent < Event
216
+ def self.fits?(json)
217
+ return json['type'] == "ForkEvent"
218
+ end
219
+
220
+ def forkee_id
221
+ @payload['forkee']['id']
222
+ end
223
+
224
+ def forkee_name
225
+ @payload['forkee']['name']
226
+ end
227
+
228
+ def forkee_full_name
229
+ @payload['forkee']['full_name']
230
+ end
231
+
232
+ def forkee_owner
233
+ User.new(@payload['forkee']['owner'])
234
+ end
235
+
236
+ def forkee_private
237
+ @payload['forkee']['private']
238
+ end
239
+
240
+ def forkee_description
241
+ @payload['forkee']['description']
242
+ end
243
+
244
+ def forkee_fork
245
+ @payload['forkee']['fork']
246
+ end
247
+
248
+ def forkee_created_at
249
+ Time.parse(@payload['forkee']['created_at'])
250
+ end
251
+
252
+ def forkee_updated_at
253
+ Time.parse(@payload['forkee']['updated_at'])
254
+ end
255
+
256
+ def forkee_pushed_at
257
+ Time.parse(@payload['forkee']['pushed_at'])
258
+ end
259
+
260
+ def forkee_urls
261
+ {
262
+ 'git' => @payload['forkee']['git_url'],
263
+ 'ssh' => @payload['forkee']['ssh_url'],
264
+ 'clone' => @payload['forkee']['clone_url'],
265
+ 'svn' => @payload['forkee']['svn_url']
266
+ }
267
+ end
268
+
269
+ def forkee_homepage
270
+ Time.parse(@payload['forkee']['homepage'])
271
+ end
272
+
273
+ def forkee_size
274
+ Time.parse(@payload['forkee']['size'])
275
+ end
276
+
277
+ def forkee_stargazers_count
278
+ Time.parse(@payload['forkee']['stargazers_count'])
279
+ end
280
+
281
+ def forkee_watchers_count
282
+ Time.parse(@payload['forkee']['watchers_count'])
283
+ end
284
+
285
+ def forkee_language
286
+ Time.parse(@payload['forkee']['language'])
287
+ end
288
+
289
+ def forkee_has_issues
290
+ Time.parse(@payload['forkee']['has_issues'])
291
+ end
292
+
293
+ def forkee_has_downloads
294
+ Time.parse(@payload['forkee']['has_downloads'])
295
+ end
296
+
297
+ def forkee_has_wiki
298
+ Time.parse(@payload['forkee']['has_wiki'])
299
+ end
300
+
301
+ def forkee_has_pages
302
+ Time.parse(@payload['forkee']['has_pages'])
303
+ end
304
+
305
+ def forkee_forks_count
306
+ Time.parse(@payload['forkee']['forks_count'])
307
+ end
308
+
309
+ def forkee_mirror_url
310
+ Time.parse(@payload['forkee']['mirror_url'])
311
+ end
312
+
313
+ def forkee_open_issues_count
314
+ Time.parse(@payload['forkee']['open_issues_count'])
315
+ end
316
+
317
+ def forkee_watchers
318
+ Time.parse(@payload['forkee']['watchers'])
319
+ end
320
+
321
+ def forkee_default_branch
322
+ Time.parse(@payload['forkee']['default_branch'])
323
+ end
324
+
325
+ def forkee_public
326
+ Time.parse(@payload['forkee']['public'])
327
+ end
328
+ end
329
+
330
+ class PublicEvent < Event
331
+ def self.fits?(json)
332
+ return json['type'] == "PublicEvent"
333
+ end
334
+ end
335
+
336
+ class WatchEvent < Event
337
+ def self.fits?(json)
338
+ return json['type'] == "WatchEvent"
339
+ end
340
+
341
+ def action
342
+ @payload['action']
343
+ end
344
+ end
345
+
346
+ class DeleteEvent < Event
347
+ def self.fits?(json)
348
+ return json['type'] == "DeleteEvent"
349
+ end
350
+
351
+ def ref
352
+ @payload['ref']
353
+ end
354
+
355
+ def ref_type
356
+ @payload['ref_type']
357
+ end
358
+
359
+ def pusher_type
360
+ @payload['pusher_type']
361
+ end
362
+ end
363
+
364
+ class ReleaseEvent < Event
365
+ def self.fits?(json)
366
+ return json['type'] == "ReleaseEvent"
367
+ end
368
+
369
+ def action
370
+ @payload['action']
371
+ end
372
+
373
+ def release
374
+ Release.new(@payload['release'])
375
+ end
376
+ end
377
+
378
+ class MemberEvent < Event
379
+ def self.fits?(json)
380
+ return json['type'] == "MemberEvent"
381
+ end
382
+
383
+ def action
384
+ @payload['action']
385
+ end
386
+
387
+ def member
388
+ User.new(@payload['member'])
389
+ end
390
+ end
391
+
392
+ class GollumEvent < Event
393
+ def self.fits?(json)
394
+ return json['type'] == "GollumEvent"
395
+ end
396
+
397
+ def pages
398
+ @payload[pages].map { |p| Page.new(p) }
399
+ end
400
+ end
401
+
402
+ class Event
403
+ IMPLEMENTATIONS = ObjectSpace.each_object(Class).select { |klass| klass < self }
404
+ end
405
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-11 00:00:00.000000000 Z
11
+ date: 2021-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
@@ -30,6 +30,26 @@ dependencies:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 1.1.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: thread
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.2.2
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 0.2.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 0.2.2
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 0.2.2
33
53
  description: Download and analyze the GitHub events stored at GitHub archive
34
54
  email: s.scalabrino9@gmail.com
35
55
  executables: []
@@ -37,11 +57,13 @@ extensions: []
37
57
  extra_rdoc_files: []
38
58
  files:
39
59
  - lib/gh-archive.rb
60
+ - lib/gh-archive/entities.rb
61
+ - lib/gh-archive/events.rb
40
62
  homepage: https://github.com/intersimone999/gh-archive
41
63
  licenses:
42
64
  - GPL-3.0-only
43
65
  metadata: {}
44
- post_install_message:
66
+ post_install_message:
45
67
  rdoc_options: []
46
68
  require_paths:
47
69
  - lib
@@ -56,8 +78,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
78
  - !ruby/object:Gem::Version
57
79
  version: '0'
58
80
  requirements: []
59
- rubygems_version: 3.2.21
60
- signing_key:
81
+ rubygems_version: 3.2.22
82
+ signing_key:
61
83
  specification_version: 4
62
84
  summary: GitHub Archive mining utility
63
85
  test_files: []