ghtorrent 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
data/lib/ghtorrent/hash.rb
CHANGED
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
require 'ghtorrent/migrations/mysql_defaults'
|
4
|
+
|
5
|
+
Sequel.migration do
|
6
|
+
up do
|
7
|
+
|
8
|
+
puts("Adding table issues")
|
9
|
+
create_table :issues do
|
10
|
+
primary_key :id
|
11
|
+
foreign_key :repo_id, :projects
|
12
|
+
foreign_key :reporter_id, :users, :null => true
|
13
|
+
foreign_key :assignee_id, :users, :null => true
|
14
|
+
Long :issue_id, :null => false
|
15
|
+
TrueClass :pull_request, :null => false
|
16
|
+
foreign_key :pull_request_id, :pull_requests, :null => true
|
17
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
18
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
19
|
+
end
|
20
|
+
|
21
|
+
puts("Adding issue history")
|
22
|
+
create_table :issue_events do
|
23
|
+
Long :event_id, :null => false
|
24
|
+
foreign_key :issue_id, :issues, :null => false
|
25
|
+
foreign_key :actor_id, :users, :null => false
|
26
|
+
String :action, :null => false
|
27
|
+
String :action_specific, :null => true, :size => 50
|
28
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
29
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
30
|
+
check(:action=>%w[closed reopened subscribed merged referenced mentioned assigned])
|
31
|
+
primary_key(:event_id, :issue_id)
|
32
|
+
end
|
33
|
+
|
34
|
+
puts("Adding table issue comments")
|
35
|
+
create_table :issue_comments do
|
36
|
+
foreign_key :issue_id, :issues, :null => false
|
37
|
+
foreign_key :user_id, :users, :null => false
|
38
|
+
Long :comment_id, :null => false
|
39
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
40
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
41
|
+
end
|
42
|
+
|
43
|
+
puts("Adding table repo labels")
|
44
|
+
create_table :repo_labels do
|
45
|
+
primary_key :id
|
46
|
+
foreign_key :repo_id, :projects
|
47
|
+
String :name, :size => 24, :null => false
|
48
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
49
|
+
end
|
50
|
+
|
51
|
+
puts("Adding table issue labels")
|
52
|
+
create_table :issue_labels do
|
53
|
+
foreign_key :label_id, :repo_labels
|
54
|
+
foreign_key :repo_id, :projects
|
55
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
56
|
+
end
|
57
|
+
|
58
|
+
puts("Adding table repo milestones")
|
59
|
+
create_table :repo_milestones do
|
60
|
+
primary_key :id
|
61
|
+
foreign_key :repo_id, :projects
|
62
|
+
String :name, :size => 24, :null => false
|
63
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
down do
|
68
|
+
drop_table :repo_milestones
|
69
|
+
drop_table :repo_labels
|
70
|
+
drop_table :issue_comments
|
71
|
+
drop_table :issue_events
|
72
|
+
drop_table :issues
|
73
|
+
end
|
74
|
+
end
|
data/lib/ghtorrent/retriever.rb
CHANGED
@@ -13,11 +13,11 @@ module GHTorrent
|
|
13
13
|
include GHTorrent::APIClient
|
14
14
|
|
15
15
|
def ext_uniq
|
16
|
-
raise Exception("Unimplemented")
|
16
|
+
raise Exception.new("Unimplemented")
|
17
17
|
end
|
18
18
|
|
19
19
|
def persister
|
20
|
-
raise Exception("Unimplemented")
|
20
|
+
raise Exception.new("Unimplemented")
|
21
21
|
end
|
22
22
|
|
23
23
|
def retrieve_user_byusername(user)
|
@@ -120,11 +120,11 @@ module GHTorrent
|
|
120
120
|
|
121
121
|
# Retrieve up to 30 * +:mirror_commit_pages_new_repo+ commits
|
122
122
|
# starting from the provided +sha+
|
123
|
-
def retrieve_commits(repo, sha, user)
|
124
|
-
last_sha = if sha
|
123
|
+
def retrieve_commits(repo, sha, user, num_pages = config(:mirror_commit_pages_new_repo))
|
124
|
+
last_sha = if sha == "head" then "master" else sha end
|
125
125
|
|
126
|
-
url = ghurl "repos/#{user}/#{repo}/commits?
|
127
|
-
commits = paged_api_request(url,
|
126
|
+
url = ghurl "repos/#{user}/#{repo}/commits?sha=#{last_sha}"
|
127
|
+
commits = paged_api_request(url, num_pages)
|
128
128
|
|
129
129
|
commits.map do |c|
|
130
130
|
retrieve_commit(repo, c['sha'], user)
|
@@ -251,7 +251,7 @@ module GHTorrent
|
|
251
251
|
# Retrieve all watchers for a repository
|
252
252
|
def retrieve_watchers(user, repo)
|
253
253
|
repo_bound_items(user, repo, :watchers,
|
254
|
-
"repos/#{user}/#{repo}/
|
254
|
+
"repos/#{user}/#{repo}/stargazers",
|
255
255
|
{'repo' => repo, 'owner' => user},
|
256
256
|
'login')
|
257
257
|
end
|
@@ -259,7 +259,7 @@ module GHTorrent
|
|
259
259
|
# Retrieve a single watcher for a repositry
|
260
260
|
def retrieve_watcher(user, repo, watcher)
|
261
261
|
repo_bound_item(user, repo, watcher, :watchers,
|
262
|
-
"repos/#{user}/#{repo}/
|
262
|
+
"repos/#{user}/#{repo}/stargazers",
|
263
263
|
{'repo' => repo, 'owner' => user},
|
264
264
|
'login')
|
265
265
|
end
|
@@ -371,17 +371,69 @@ module GHTorrent
|
|
371
371
|
end
|
372
372
|
|
373
373
|
def retrieve_issues(user, repo)
|
374
|
+
open = "repos/#{user}/#{repo}/issues"
|
375
|
+
closed = "repos/#{user}/#{repo}/issues?state=closed"
|
374
376
|
repo_bound_items(user, repo, :issues,
|
375
|
-
|
377
|
+
[open, closed],
|
376
378
|
{'repo' => repo, 'owner' => user},
|
377
|
-
'
|
379
|
+
'number')
|
378
380
|
end
|
379
381
|
|
380
382
|
def retrieve_issue(user, repo, issue_id)
|
383
|
+
open = "repos/#{user}/#{repo}/issues"
|
384
|
+
closed = "repos/#{user}/#{repo}/issues?state=closed"
|
381
385
|
repo_bound_item(user, repo, issue_id, :issues,
|
382
|
-
|
386
|
+
[open, closed],
|
383
387
|
{'repo' => repo, 'owner' => user},
|
384
|
-
'
|
388
|
+
'number')
|
389
|
+
end
|
390
|
+
|
391
|
+
def retrieve_issue_events(owner, repo, issue_id)
|
392
|
+
url = ghurl "repos/#{owner}/#{repo}/issues/#{issue_id}/events"
|
393
|
+
retrieved_events = paged_api_request url
|
394
|
+
|
395
|
+
retrieved_events.each { |x|
|
396
|
+
x['owner'] = owner
|
397
|
+
x['repo'] = repo
|
398
|
+
x['issue_id'] = issue_id
|
399
|
+
|
400
|
+
if persister.find(:issue_events, {'owner' => owner,
|
401
|
+
'repo' => repo,
|
402
|
+
'issue_id' => issue_id,
|
403
|
+
'id' => x['id']}).empty?
|
404
|
+
info "Retriever: Added issue event #{owner}/#{repo} #{issue_id}->#{x['id']}"
|
405
|
+
persister.store(:issue_events, x)
|
406
|
+
end
|
407
|
+
}
|
408
|
+
persister.find(:issue_events, {'owner' => owner, 'repo' => repo,
|
409
|
+
'issue_id' => issue_id})
|
410
|
+
end
|
411
|
+
|
412
|
+
def retrieve_issue_event(owner, repo, issue_id, event_id)
|
413
|
+
event = persister.find(:issue_events, {'repo' => repo,
|
414
|
+
'owner' => owner,
|
415
|
+
'issue_id' => issue_id,
|
416
|
+
'id' => event_id}).first
|
417
|
+
if event.nil?
|
418
|
+
r = api_request(ghurl "repos/#{owner}/#{repo}/issues/events/#{event_id}")
|
419
|
+
|
420
|
+
if r.empty?
|
421
|
+
warn "Retriever: Issue event #{owner}/#{repo} #{issue_id}->#{event_id} deleted"
|
422
|
+
return
|
423
|
+
end
|
424
|
+
|
425
|
+
r['repo'] = repo
|
426
|
+
r['owner'] = owner
|
427
|
+
r['issue_id'] = issue_id
|
428
|
+
persister.store(:issue_events, r)
|
429
|
+
info "Retriever: Added issue event #{owner}/#{repo} #{issue_id}->#{event_id}"
|
430
|
+
persister.find(:issue_events, {'repo' => repo, 'owner' => owner,
|
431
|
+
'issue_id' => issue_id,
|
432
|
+
'id' => event_id}).first
|
433
|
+
else
|
434
|
+
debug "Retriever: Issue event #{owner}/#{repo} #{issue_id}->#{event_id} exists"
|
435
|
+
event
|
436
|
+
end
|
385
437
|
end
|
386
438
|
|
387
439
|
def retrieve_issue_comments(owner, repo, issue_id)
|
@@ -410,16 +462,16 @@ module GHTorrent
|
|
410
462
|
'issue_id' => issue_id,
|
411
463
|
'id' => comment_id}).first
|
412
464
|
if comment.nil?
|
413
|
-
r = api_request(ghurl "repos/#{owner}/#{repo}/issues
|
465
|
+
r = api_request(ghurl "repos/#{owner}/#{repo}/issues/comments/#{comment_id}")
|
414
466
|
|
415
467
|
if r.empty?
|
416
|
-
|
468
|
+
warn "Retriever: Issue comment #{owner}/#{repo} #{issue_id}->#{comment_id} deleted"
|
417
469
|
return
|
418
470
|
end
|
419
471
|
|
420
472
|
r['repo'] = repo
|
421
473
|
r['owner'] = owner
|
422
|
-
|
474
|
+
r['issue_id'] = issue_id
|
423
475
|
persister.store(:issue_comments, r)
|
424
476
|
info "Retriever: Added issue comment #{owner}/#{repo} #{issue_id}->#{comment_id}"
|
425
477
|
persister.find(:issue_comments, {'repo' => repo, 'owner' => owner,
|
@@ -427,7 +479,7 @@ module GHTorrent
|
|
427
479
|
'id' => comment_id}).first
|
428
480
|
r
|
429
481
|
else
|
430
|
-
debug "Retriever:
|
482
|
+
debug "Retriever: Issue comment #{owner}/#{repo} #{issue_id}->#{comment_id} exists"
|
431
483
|
comment
|
432
484
|
end
|
433
485
|
end
|
@@ -437,6 +489,26 @@ module GHTorrent
|
|
437
489
|
api_request "https://api.github.com/events"
|
438
490
|
end
|
439
491
|
|
492
|
+
# Get all events for the specified repo
|
493
|
+
def get_repo_events(owner, repo)
|
494
|
+
url = ghurl("repos/#{owner}/#{repo}/events")
|
495
|
+
r = paged_api_request(url)
|
496
|
+
|
497
|
+
r.each do |e|
|
498
|
+
if get_event(e['id']).empty?
|
499
|
+
info "Retriever: Already got event #{owner}/#{repo} -> #{e['id']}"
|
500
|
+
else
|
501
|
+
@persister.store(:events, e)
|
502
|
+
info "Retriever: Added event #{owner}/#{repo} -> #{e['id']}"
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
# Get a specific event by +id+.
|
508
|
+
def get_event(id)
|
509
|
+
persister.find(:events, {'id' => id})
|
510
|
+
end
|
511
|
+
|
440
512
|
private
|
441
513
|
|
442
514
|
def repo_bound_items(user, repo, entity, urls, selector, descriminator,
|
data/lib/ghtorrent/settings.rb
CHANGED
@@ -30,6 +30,11 @@ module GHTorrent
|
|
30
30
|
:cache_dir => "mirror.cache_dir",
|
31
31
|
:cache_stale_age => "mirror.cache_stale_age",
|
32
32
|
|
33
|
+
:github_username => "mirror.username",
|
34
|
+
:github_passwd => "mirror.passwd",
|
35
|
+
|
36
|
+
:respect_api_ratelimit => "mirror.respect_api_ratelimit",
|
37
|
+
|
33
38
|
:attach_ip => "mirror.attach_ip"
|
34
39
|
}
|
35
40
|
|
@@ -48,7 +53,7 @@ module GHTorrent
|
|
48
53
|
end
|
49
54
|
|
50
55
|
def settings
|
51
|
-
raise Exception("Unimplemented")
|
56
|
+
raise Exception.new("Unimplemented")
|
52
57
|
end
|
53
58
|
|
54
59
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ghtorrent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.6'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-08
|
13
|
+
date: 2012-11-08 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: amqp
|
@@ -148,6 +148,8 @@ executables:
|
|
148
148
|
- ght-mirror-events
|
149
149
|
- ght-load
|
150
150
|
- ght-rm-dupl
|
151
|
+
- ght-process-event
|
152
|
+
- ght-get-more-commits
|
151
153
|
extensions: []
|
152
154
|
extra_rdoc_files: []
|
153
155
|
files:
|
@@ -158,6 +160,12 @@ files:
|
|
158
160
|
- lib/ghtorrent/bson_orderedhash.rb
|
159
161
|
- lib/ghtorrent/cache.rb
|
160
162
|
- lib/ghtorrent/command.rb
|
163
|
+
- lib/ghtorrent/commands/ght_data_retrieval.rb
|
164
|
+
- lib/ghtorrent/commands/ght_get_more_commits.rb
|
165
|
+
- lib/ghtorrent/commands/ght_load.rb
|
166
|
+
- lib/ghtorrent/commands/ght_mirror_events.rb
|
167
|
+
- lib/ghtorrent/commands/ght_retrieve_repo.rb
|
168
|
+
- lib/ghtorrent/commands/ght_rm_dupl.rb
|
161
169
|
- lib/ghtorrent/gh_torrent_exception.rb
|
162
170
|
- lib/ghtorrent/ghtorrent.rb
|
163
171
|
- lib/ghtorrent/hash.rb
|
@@ -172,6 +180,7 @@ files:
|
|
172
180
|
- lib/ghtorrent/migrations/008_add_project_unq.rb
|
173
181
|
- lib/ghtorrent/migrations/009_add_project_commit.rb
|
174
182
|
- lib/ghtorrent/migrations/010_add_forks.rb
|
183
|
+
- lib/ghtorrent/migrations/011_add_issues.rb
|
175
184
|
- lib/ghtorrent/migrations/mysql_defaults.rb
|
176
185
|
- lib/ghtorrent/persister.rb
|
177
186
|
- lib/ghtorrent/retriever.rb
|
@@ -181,9 +190,12 @@ files:
|
|
181
190
|
- lib/ghtorrent.rb
|
182
191
|
- lib/version.rb
|
183
192
|
- bin/ght-data-retrieval
|
193
|
+
- bin/ght-get-more-commits
|
184
194
|
- bin/ght-load
|
185
195
|
- bin/ght-mirror-events
|
186
196
|
- bin/ght-periodic-dump
|
197
|
+
- bin/ght-process-event
|
198
|
+
- bin/ght-retrieve-repo
|
187
199
|
- bin/ght-rm-dupl
|
188
200
|
- bin/ght-torrent-index
|
189
201
|
- CHANGELOG
|
@@ -196,30 +208,28 @@ files:
|
|
196
208
|
homepage: https://github.com/gousiosg/github-mirror
|
197
209
|
licenses: []
|
198
210
|
post_install_message: !binary |-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
WxtbMzJtVmVyc2lvbiAwLjUbWzBtXSBOZXZlciByZXRyaWV2ZSBhcnJheXMg
|
222
|
-
b2YgcmVzdWx0cyBmcm9tIE1vbmdvREIK
|
211
|
+
WxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBTdXBwb3J0IHJldHJpZXZhbCBvZiBp
|
212
|
+
c3N1ZXMsIGlzc3VlIGV2ZW50cyBhbmQgaXNzdWUgaGlzdG9yeQpbG1szMm1W
|
213
|
+
ZXJzaW9uIDAuNhtbMG1dIFN1cHBvcnQgZm9yIHNldHRpbmcgdXNlcm5hbWUv
|
214
|
+
cGFzc3dvcmQgZm9yIHBlcmZvcm1pbmcgcmVxdWVzdHMKWxtbMzJtVmVyc2lv
|
215
|
+
biAwLjYbWzBtXSBSZXNwZWN0IGJ5IGRlZmF1bHQgR2l0aHViJ3MgeC1yYXRl
|
216
|
+
bGltaXQtcmVtYWluaW5nIGhlYWRlcgpbG1szMm1WZXJzaW9uIDAuNhtbMG1d
|
217
|
+
IFNlbGVjdGl2ZSBwcm9jZXNzaW5nIG9mIGV2ZW50cyBmb3IgdXNlci1zcGVj
|
218
|
+
aWZpZWQgcmVwb3MKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBOZXcgdG9vbCAo
|
219
|
+
Z2h0LWdldC1tb3JlLWNvbW1pdHMpIHRvIHJldHJpZXZlIGFsbCBjb21taXRz
|
220
|
+
IGZvciBhIHJlcG9zaXRvcnkKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBOZXcg
|
221
|
+
dG9vbCAoZ2h0LXByb2Nlc3MtZXZlbnRzKSB0byBwcm9jZXNzIGp1c3Qgb25l
|
222
|
+
IGV2ZW50IGJ5IGlkClsbWzMybVZlcnNpb24gMC42G1swbV0gUmV0cmlldmUg
|
223
|
+
MTAwIGl0ZW1zIGF0IG9uY2UgYnkgZGVmYXVsdCBvbiBtdWx0aXBhZ2UgcmVx
|
224
|
+
dWVzdHMKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBSZW5hbWUgd2F0Y2hlcnMg
|
225
|
+
LT4gc3RhcmdhemVycywgYXMgcGVyIEdpdGh1YiBBUEkgY2hhbmdlClsbWzMy
|
226
|
+
bVZlcnNpb24gMC42G1swbV0gRml4ZXMgdG8gYnVncyB0aGF0IHBlcm1pdHRl
|
227
|
+
ZCBlZmZpY2llbnQgcHJvY2Vzc2luZyBvZiBtdWx0aXBhZ2UgcmVxdWVzdHMK
|
228
|
+
WxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBTZXZlcmFsIGZpeGVzIG9uIGhvdyBw
|
229
|
+
dWxsIHJlcXVlc3RzIGFyZSBiZWluZyBwcm9jZXNzZWQKWxtbMzJtVmVyc2lv
|
230
|
+
biAwLjYbWzBtXSBVc2VycyB3aXRoIGludmFsaWQgZ2l0IHNldHVwcyBhcmUg
|
231
|
+
bm93IGFsbG93ZWQKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBDb21wYXRpYmls
|
232
|
+
aXR5IHdpdGggUnVieSAxLjggcmVzdG9yZWQK
|
223
233
|
rdoc_options:
|
224
234
|
- --charset=UTF-8
|
225
235
|
require_paths:
|