ghtorrent 0.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
data/lib/ghtorrent/hash.rb
CHANGED
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
require 'ghtorrent/migrations/mysql_defaults'
|
4
|
+
|
5
|
+
Sequel.migration do
|
6
|
+
up do
|
7
|
+
|
8
|
+
puts("Adding table issues")
|
9
|
+
create_table :issues do
|
10
|
+
primary_key :id
|
11
|
+
foreign_key :repo_id, :projects
|
12
|
+
foreign_key :reporter_id, :users, :null => true
|
13
|
+
foreign_key :assignee_id, :users, :null => true
|
14
|
+
Long :issue_id, :null => false
|
15
|
+
TrueClass :pull_request, :null => false
|
16
|
+
foreign_key :pull_request_id, :pull_requests, :null => true
|
17
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
18
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
19
|
+
end
|
20
|
+
|
21
|
+
puts("Adding issue history")
|
22
|
+
create_table :issue_events do
|
23
|
+
Long :event_id, :null => false
|
24
|
+
foreign_key :issue_id, :issues, :null => false
|
25
|
+
foreign_key :actor_id, :users, :null => false
|
26
|
+
String :action, :null => false
|
27
|
+
String :action_specific, :null => true, :size => 50
|
28
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
29
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
30
|
+
check(:action=>%w[closed reopened subscribed merged referenced mentioned assigned])
|
31
|
+
primary_key(:event_id, :issue_id)
|
32
|
+
end
|
33
|
+
|
34
|
+
puts("Adding table issue comments")
|
35
|
+
create_table :issue_comments do
|
36
|
+
foreign_key :issue_id, :issues, :null => false
|
37
|
+
foreign_key :user_id, :users, :null => false
|
38
|
+
Long :comment_id, :null => false
|
39
|
+
DateTime :created_at, :null => false, :default=>Sequel::CURRENT_TIMESTAMP
|
40
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
41
|
+
end
|
42
|
+
|
43
|
+
puts("Adding table repo labels")
|
44
|
+
create_table :repo_labels do
|
45
|
+
primary_key :id
|
46
|
+
foreign_key :repo_id, :projects
|
47
|
+
String :name, :size => 24, :null => false
|
48
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
49
|
+
end
|
50
|
+
|
51
|
+
puts("Adding table issue labels")
|
52
|
+
create_table :issue_labels do
|
53
|
+
foreign_key :label_id, :repo_labels
|
54
|
+
foreign_key :repo_id, :projects
|
55
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
56
|
+
end
|
57
|
+
|
58
|
+
puts("Adding table repo milestones")
|
59
|
+
create_table :repo_milestones do
|
60
|
+
primary_key :id
|
61
|
+
foreign_key :repo_id, :projects
|
62
|
+
String :name, :size => 24, :null => false
|
63
|
+
String :ext_ref_id, :null => false, :size => 24, :default => "0"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
down do
|
68
|
+
drop_table :repo_milestones
|
69
|
+
drop_table :repo_labels
|
70
|
+
drop_table :issue_comments
|
71
|
+
drop_table :issue_events
|
72
|
+
drop_table :issues
|
73
|
+
end
|
74
|
+
end
|
data/lib/ghtorrent/retriever.rb
CHANGED
@@ -13,11 +13,11 @@ module GHTorrent
|
|
13
13
|
include GHTorrent::APIClient
|
14
14
|
|
15
15
|
def ext_uniq
|
16
|
-
raise Exception("Unimplemented")
|
16
|
+
raise Exception.new("Unimplemented")
|
17
17
|
end
|
18
18
|
|
19
19
|
def persister
|
20
|
-
raise Exception("Unimplemented")
|
20
|
+
raise Exception.new("Unimplemented")
|
21
21
|
end
|
22
22
|
|
23
23
|
def retrieve_user_byusername(user)
|
@@ -120,11 +120,11 @@ module GHTorrent
|
|
120
120
|
|
121
121
|
# Retrieve up to 30 * +:mirror_commit_pages_new_repo+ commits
|
122
122
|
# starting from the provided +sha+
|
123
|
-
def retrieve_commits(repo, sha, user)
|
124
|
-
last_sha = if sha
|
123
|
+
def retrieve_commits(repo, sha, user, num_pages = config(:mirror_commit_pages_new_repo))
|
124
|
+
last_sha = if sha == "head" then "master" else sha end
|
125
125
|
|
126
|
-
url = ghurl "repos/#{user}/#{repo}/commits?
|
127
|
-
commits = paged_api_request(url,
|
126
|
+
url = ghurl "repos/#{user}/#{repo}/commits?sha=#{last_sha}"
|
127
|
+
commits = paged_api_request(url, num_pages)
|
128
128
|
|
129
129
|
commits.map do |c|
|
130
130
|
retrieve_commit(repo, c['sha'], user)
|
@@ -251,7 +251,7 @@ module GHTorrent
|
|
251
251
|
# Retrieve all watchers for a repository
|
252
252
|
def retrieve_watchers(user, repo)
|
253
253
|
repo_bound_items(user, repo, :watchers,
|
254
|
-
"repos/#{user}/#{repo}/
|
254
|
+
"repos/#{user}/#{repo}/stargazers",
|
255
255
|
{'repo' => repo, 'owner' => user},
|
256
256
|
'login')
|
257
257
|
end
|
@@ -259,7 +259,7 @@ module GHTorrent
|
|
259
259
|
# Retrieve a single watcher for a repositry
|
260
260
|
def retrieve_watcher(user, repo, watcher)
|
261
261
|
repo_bound_item(user, repo, watcher, :watchers,
|
262
|
-
"repos/#{user}/#{repo}/
|
262
|
+
"repos/#{user}/#{repo}/stargazers",
|
263
263
|
{'repo' => repo, 'owner' => user},
|
264
264
|
'login')
|
265
265
|
end
|
@@ -371,17 +371,69 @@ module GHTorrent
|
|
371
371
|
end
|
372
372
|
|
373
373
|
def retrieve_issues(user, repo)
|
374
|
+
open = "repos/#{user}/#{repo}/issues"
|
375
|
+
closed = "repos/#{user}/#{repo}/issues?state=closed"
|
374
376
|
repo_bound_items(user, repo, :issues,
|
375
|
-
|
377
|
+
[open, closed],
|
376
378
|
{'repo' => repo, 'owner' => user},
|
377
|
-
'
|
379
|
+
'number')
|
378
380
|
end
|
379
381
|
|
380
382
|
def retrieve_issue(user, repo, issue_id)
|
383
|
+
open = "repos/#{user}/#{repo}/issues"
|
384
|
+
closed = "repos/#{user}/#{repo}/issues?state=closed"
|
381
385
|
repo_bound_item(user, repo, issue_id, :issues,
|
382
|
-
|
386
|
+
[open, closed],
|
383
387
|
{'repo' => repo, 'owner' => user},
|
384
|
-
'
|
388
|
+
'number')
|
389
|
+
end
|
390
|
+
|
391
|
+
def retrieve_issue_events(owner, repo, issue_id)
|
392
|
+
url = ghurl "repos/#{owner}/#{repo}/issues/#{issue_id}/events"
|
393
|
+
retrieved_events = paged_api_request url
|
394
|
+
|
395
|
+
retrieved_events.each { |x|
|
396
|
+
x['owner'] = owner
|
397
|
+
x['repo'] = repo
|
398
|
+
x['issue_id'] = issue_id
|
399
|
+
|
400
|
+
if persister.find(:issue_events, {'owner' => owner,
|
401
|
+
'repo' => repo,
|
402
|
+
'issue_id' => issue_id,
|
403
|
+
'id' => x['id']}).empty?
|
404
|
+
info "Retriever: Added issue event #{owner}/#{repo} #{issue_id}->#{x['id']}"
|
405
|
+
persister.store(:issue_events, x)
|
406
|
+
end
|
407
|
+
}
|
408
|
+
persister.find(:issue_events, {'owner' => owner, 'repo' => repo,
|
409
|
+
'issue_id' => issue_id})
|
410
|
+
end
|
411
|
+
|
412
|
+
def retrieve_issue_event(owner, repo, issue_id, event_id)
|
413
|
+
event = persister.find(:issue_events, {'repo' => repo,
|
414
|
+
'owner' => owner,
|
415
|
+
'issue_id' => issue_id,
|
416
|
+
'id' => event_id}).first
|
417
|
+
if event.nil?
|
418
|
+
r = api_request(ghurl "repos/#{owner}/#{repo}/issues/events/#{event_id}")
|
419
|
+
|
420
|
+
if r.empty?
|
421
|
+
warn "Retriever: Issue event #{owner}/#{repo} #{issue_id}->#{event_id} deleted"
|
422
|
+
return
|
423
|
+
end
|
424
|
+
|
425
|
+
r['repo'] = repo
|
426
|
+
r['owner'] = owner
|
427
|
+
r['issue_id'] = issue_id
|
428
|
+
persister.store(:issue_events, r)
|
429
|
+
info "Retriever: Added issue event #{owner}/#{repo} #{issue_id}->#{event_id}"
|
430
|
+
persister.find(:issue_events, {'repo' => repo, 'owner' => owner,
|
431
|
+
'issue_id' => issue_id,
|
432
|
+
'id' => event_id}).first
|
433
|
+
else
|
434
|
+
debug "Retriever: Issue event #{owner}/#{repo} #{issue_id}->#{event_id} exists"
|
435
|
+
event
|
436
|
+
end
|
385
437
|
end
|
386
438
|
|
387
439
|
def retrieve_issue_comments(owner, repo, issue_id)
|
@@ -410,16 +462,16 @@ module GHTorrent
|
|
410
462
|
'issue_id' => issue_id,
|
411
463
|
'id' => comment_id}).first
|
412
464
|
if comment.nil?
|
413
|
-
r = api_request(ghurl "repos/#{owner}/#{repo}/issues
|
465
|
+
r = api_request(ghurl "repos/#{owner}/#{repo}/issues/comments/#{comment_id}")
|
414
466
|
|
415
467
|
if r.empty?
|
416
|
-
|
468
|
+
warn "Retriever: Issue comment #{owner}/#{repo} #{issue_id}->#{comment_id} deleted"
|
417
469
|
return
|
418
470
|
end
|
419
471
|
|
420
472
|
r['repo'] = repo
|
421
473
|
r['owner'] = owner
|
422
|
-
|
474
|
+
r['issue_id'] = issue_id
|
423
475
|
persister.store(:issue_comments, r)
|
424
476
|
info "Retriever: Added issue comment #{owner}/#{repo} #{issue_id}->#{comment_id}"
|
425
477
|
persister.find(:issue_comments, {'repo' => repo, 'owner' => owner,
|
@@ -427,7 +479,7 @@ module GHTorrent
|
|
427
479
|
'id' => comment_id}).first
|
428
480
|
r
|
429
481
|
else
|
430
|
-
debug "Retriever:
|
482
|
+
debug "Retriever: Issue comment #{owner}/#{repo} #{issue_id}->#{comment_id} exists"
|
431
483
|
comment
|
432
484
|
end
|
433
485
|
end
|
@@ -437,6 +489,26 @@ module GHTorrent
|
|
437
489
|
api_request "https://api.github.com/events"
|
438
490
|
end
|
439
491
|
|
492
|
+
# Get all events for the specified repo
|
493
|
+
def get_repo_events(owner, repo)
|
494
|
+
url = ghurl("repos/#{owner}/#{repo}/events")
|
495
|
+
r = paged_api_request(url)
|
496
|
+
|
497
|
+
r.each do |e|
|
498
|
+
if get_event(e['id']).empty?
|
499
|
+
info "Retriever: Already got event #{owner}/#{repo} -> #{e['id']}"
|
500
|
+
else
|
501
|
+
@persister.store(:events, e)
|
502
|
+
info "Retriever: Added event #{owner}/#{repo} -> #{e['id']}"
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
# Get a specific event by +id+.
|
508
|
+
def get_event(id)
|
509
|
+
persister.find(:events, {'id' => id})
|
510
|
+
end
|
511
|
+
|
440
512
|
private
|
441
513
|
|
442
514
|
def repo_bound_items(user, repo, entity, urls, selector, descriminator,
|
data/lib/ghtorrent/settings.rb
CHANGED
@@ -30,6 +30,11 @@ module GHTorrent
|
|
30
30
|
:cache_dir => "mirror.cache_dir",
|
31
31
|
:cache_stale_age => "mirror.cache_stale_age",
|
32
32
|
|
33
|
+
:github_username => "mirror.username",
|
34
|
+
:github_passwd => "mirror.passwd",
|
35
|
+
|
36
|
+
:respect_api_ratelimit => "mirror.respect_api_ratelimit",
|
37
|
+
|
33
38
|
:attach_ip => "mirror.attach_ip"
|
34
39
|
}
|
35
40
|
|
@@ -48,7 +53,7 @@ module GHTorrent
|
|
48
53
|
end
|
49
54
|
|
50
55
|
def settings
|
51
|
-
raise Exception("Unimplemented")
|
56
|
+
raise Exception.new("Unimplemented")
|
52
57
|
end
|
53
58
|
|
54
59
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ghtorrent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.6'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-08
|
13
|
+
date: 2012-11-08 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: amqp
|
@@ -148,6 +148,8 @@ executables:
|
|
148
148
|
- ght-mirror-events
|
149
149
|
- ght-load
|
150
150
|
- ght-rm-dupl
|
151
|
+
- ght-process-event
|
152
|
+
- ght-get-more-commits
|
151
153
|
extensions: []
|
152
154
|
extra_rdoc_files: []
|
153
155
|
files:
|
@@ -158,6 +160,12 @@ files:
|
|
158
160
|
- lib/ghtorrent/bson_orderedhash.rb
|
159
161
|
- lib/ghtorrent/cache.rb
|
160
162
|
- lib/ghtorrent/command.rb
|
163
|
+
- lib/ghtorrent/commands/ght_data_retrieval.rb
|
164
|
+
- lib/ghtorrent/commands/ght_get_more_commits.rb
|
165
|
+
- lib/ghtorrent/commands/ght_load.rb
|
166
|
+
- lib/ghtorrent/commands/ght_mirror_events.rb
|
167
|
+
- lib/ghtorrent/commands/ght_retrieve_repo.rb
|
168
|
+
- lib/ghtorrent/commands/ght_rm_dupl.rb
|
161
169
|
- lib/ghtorrent/gh_torrent_exception.rb
|
162
170
|
- lib/ghtorrent/ghtorrent.rb
|
163
171
|
- lib/ghtorrent/hash.rb
|
@@ -172,6 +180,7 @@ files:
|
|
172
180
|
- lib/ghtorrent/migrations/008_add_project_unq.rb
|
173
181
|
- lib/ghtorrent/migrations/009_add_project_commit.rb
|
174
182
|
- lib/ghtorrent/migrations/010_add_forks.rb
|
183
|
+
- lib/ghtorrent/migrations/011_add_issues.rb
|
175
184
|
- lib/ghtorrent/migrations/mysql_defaults.rb
|
176
185
|
- lib/ghtorrent/persister.rb
|
177
186
|
- lib/ghtorrent/retriever.rb
|
@@ -181,9 +190,12 @@ files:
|
|
181
190
|
- lib/ghtorrent.rb
|
182
191
|
- lib/version.rb
|
183
192
|
- bin/ght-data-retrieval
|
193
|
+
- bin/ght-get-more-commits
|
184
194
|
- bin/ght-load
|
185
195
|
- bin/ght-mirror-events
|
186
196
|
- bin/ght-periodic-dump
|
197
|
+
- bin/ght-process-event
|
198
|
+
- bin/ght-retrieve-repo
|
187
199
|
- bin/ght-rm-dupl
|
188
200
|
- bin/ght-torrent-index
|
189
201
|
- CHANGELOG
|
@@ -196,30 +208,28 @@ files:
|
|
196
208
|
homepage: https://github.com/gousiosg/github-mirror
|
197
209
|
licenses: []
|
198
210
|
post_install_message: !binary |-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
WxtbMzJtVmVyc2lvbiAwLjUbWzBtXSBOZXZlciByZXRyaWV2ZSBhcnJheXMg
|
222
|
-
b2YgcmVzdWx0cyBmcm9tIE1vbmdvREIK
|
211
|
+
WxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBTdXBwb3J0IHJldHJpZXZhbCBvZiBp
|
212
|
+
c3N1ZXMsIGlzc3VlIGV2ZW50cyBhbmQgaXNzdWUgaGlzdG9yeQpbG1szMm1W
|
213
|
+
ZXJzaW9uIDAuNhtbMG1dIFN1cHBvcnQgZm9yIHNldHRpbmcgdXNlcm5hbWUv
|
214
|
+
cGFzc3dvcmQgZm9yIHBlcmZvcm1pbmcgcmVxdWVzdHMKWxtbMzJtVmVyc2lv
|
215
|
+
biAwLjYbWzBtXSBSZXNwZWN0IGJ5IGRlZmF1bHQgR2l0aHViJ3MgeC1yYXRl
|
216
|
+
bGltaXQtcmVtYWluaW5nIGhlYWRlcgpbG1szMm1WZXJzaW9uIDAuNhtbMG1d
|
217
|
+
IFNlbGVjdGl2ZSBwcm9jZXNzaW5nIG9mIGV2ZW50cyBmb3IgdXNlci1zcGVj
|
218
|
+
aWZpZWQgcmVwb3MKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBOZXcgdG9vbCAo
|
219
|
+
Z2h0LWdldC1tb3JlLWNvbW1pdHMpIHRvIHJldHJpZXZlIGFsbCBjb21taXRz
|
220
|
+
IGZvciBhIHJlcG9zaXRvcnkKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBOZXcg
|
221
|
+
dG9vbCAoZ2h0LXByb2Nlc3MtZXZlbnRzKSB0byBwcm9jZXNzIGp1c3Qgb25l
|
222
|
+
IGV2ZW50IGJ5IGlkClsbWzMybVZlcnNpb24gMC42G1swbV0gUmV0cmlldmUg
|
223
|
+
MTAwIGl0ZW1zIGF0IG9uY2UgYnkgZGVmYXVsdCBvbiBtdWx0aXBhZ2UgcmVx
|
224
|
+
dWVzdHMKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBSZW5hbWUgd2F0Y2hlcnMg
|
225
|
+
LT4gc3RhcmdhemVycywgYXMgcGVyIEdpdGh1YiBBUEkgY2hhbmdlClsbWzMy
|
226
|
+
bVZlcnNpb24gMC42G1swbV0gRml4ZXMgdG8gYnVncyB0aGF0IHBlcm1pdHRl
|
227
|
+
ZCBlZmZpY2llbnQgcHJvY2Vzc2luZyBvZiBtdWx0aXBhZ2UgcmVxdWVzdHMK
|
228
|
+
WxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBTZXZlcmFsIGZpeGVzIG9uIGhvdyBw
|
229
|
+
dWxsIHJlcXVlc3RzIGFyZSBiZWluZyBwcm9jZXNzZWQKWxtbMzJtVmVyc2lv
|
230
|
+
biAwLjYbWzBtXSBVc2VycyB3aXRoIGludmFsaWQgZ2l0IHNldHVwcyBhcmUg
|
231
|
+
bm93IGFsbG93ZWQKWxtbMzJtVmVyc2lvbiAwLjYbWzBtXSBDb21wYXRpYmls
|
232
|
+
aXR5IHdpdGggUnVieSAxLjggcmVzdG9yZWQK
|
223
233
|
rdoc_options:
|
224
234
|
- --charset=UTF-8
|
225
235
|
require_paths:
|