ghtorrent 0.6 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -0
- data/Gemfile +1 -11
- data/Gemfile.lock +27 -29
- data/README.md +10 -14
- data/bin/ght-mirror-events +0 -0
- data/bin/ght-process-event +0 -0
- data/bin/ght-retrieve-repo +0 -0
- data/bin/ght-retrieve-user +6 -0
- data/lib/ghtorrent.rb +1 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +6 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +8 -0
- data/lib/ghtorrent/api_client.rb +8 -29
- data/lib/ghtorrent/command.rb +1 -3
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +5 -10
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +28 -17
- data/lib/ghtorrent/commands/ght_load.rb +2 -2
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +45 -15
- data/lib/ghtorrent/commands/ght_retrieve_user.rb +72 -0
- data/lib/ghtorrent/ghtorrent.rb +288 -209
- data/lib/ghtorrent/migrations/012_add_forks_to_projects.rb +31 -0
- data/lib/ghtorrent/migrations/013_add_merged_to_pullreqs.rb +39 -0
- data/lib/ghtorrent/migrations/014_add_deleted_to_projects.rb +21 -0
- data/lib/ghtorrent/retriever.rb +90 -25
- data/lib/ghtorrent/settings.rb +44 -6
- data/lib/version.rb +2 -2
- metadata +52 -84
- data/bin/ght-periodic-dump +0 -130
- data/bin/ght-torrent-index +0 -150
- data/test/callstack_test.rb +0 -67
@@ -122,11 +122,11 @@ Loads object ids from a collection to a queue for further processing.
|
|
122
122
|
connection.close { EventMachine.stop }
|
123
123
|
}
|
124
124
|
|
125
|
-
# Read next
|
125
|
+
# Read next 100000 items and queue them
|
126
126
|
read_and_publish = Proc.new {
|
127
127
|
|
128
128
|
to_read = if options.number == -1
|
129
|
-
|
129
|
+
100000
|
130
130
|
else
|
131
131
|
if options.number - num_read - 1 <= 0
|
132
132
|
-1
|
@@ -46,31 +46,44 @@ An efficient way to get all data for a single repo
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def go
|
49
|
+
self.settings = override_config(settings, :mirror_history_pages_back, -1)
|
49
50
|
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
50
51
|
|
51
52
|
if user_entry.nil?
|
52
|
-
Trollop::die "Cannot find user #{
|
53
|
+
Trollop::die "Cannot find user #{ARGV[0]}"
|
53
54
|
end
|
54
55
|
|
55
56
|
user = user_entry[:login]
|
56
57
|
|
57
|
-
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
|
58
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
|
59
|
+
false, false)}
|
58
60
|
|
59
61
|
if repo_entry.nil?
|
60
|
-
Trollop::die "Cannot find repository #{
|
62
|
+
Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
|
61
63
|
end
|
62
64
|
|
63
65
|
repo = repo_entry[:name]
|
64
66
|
|
65
|
-
|
66
|
-
ensure_issues ensure_project_members ensure_watchers).each {|x|
|
67
|
+
def send_message(function, user, repo)
|
67
68
|
begin
|
68
|
-
ght.send(
|
69
|
+
ght.send(function, user, repo, refresh = true)
|
69
70
|
rescue Exception => e
|
70
71
|
puts STDERR, e.message
|
71
72
|
puts STDERR, e.backtrace
|
72
73
|
end
|
73
|
-
|
74
|
+
end
|
75
|
+
|
76
|
+
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
77
|
+
ensure_issues ensure_project_members ensure_watchers)
|
78
|
+
|
79
|
+
if ARGV[2].nil?
|
80
|
+
functions.each do |x|
|
81
|
+
send_message(x, user, repo)
|
82
|
+
end
|
83
|
+
else
|
84
|
+
Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
|
85
|
+
send_message(ARGV[2], user, repo)
|
86
|
+
end
|
74
87
|
end
|
75
88
|
end
|
76
89
|
|
@@ -79,40 +92,57 @@ end
|
|
79
92
|
class TransactedGHTorrent < GHTorrent::Mirror
|
80
93
|
|
81
94
|
def ensure_commit(repo, sha, user, comments = true)
|
82
|
-
|
95
|
+
check_transaction do
|
83
96
|
super(repo, sha, user, comments)
|
84
97
|
end
|
85
98
|
end
|
86
99
|
|
87
|
-
def ensure_fork(owner, repo, fork_id
|
88
|
-
|
89
|
-
super(owner, repo, fork_id
|
100
|
+
def ensure_fork(owner, repo, fork_id)
|
101
|
+
check_transaction do
|
102
|
+
super(owner, repo, fork_id)
|
90
103
|
end
|
91
104
|
end
|
92
105
|
|
93
106
|
def ensure_pull_request(owner, repo, pullreq_id,
|
94
107
|
comments = true, commits = true,
|
95
108
|
state = nil, created_at = nil)
|
96
|
-
|
109
|
+
check_transaction do
|
97
110
|
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
98
111
|
end
|
99
112
|
end
|
100
113
|
|
101
114
|
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
102
|
-
|
115
|
+
check_transaction do
|
103
116
|
super(owner, repo, issue_id, events, comments)
|
104
117
|
end
|
105
118
|
end
|
106
119
|
|
107
120
|
def ensure_project_member(owner, repo, new_member, date_added)
|
108
|
-
|
121
|
+
check_transaction do
|
109
122
|
super(owner, repo, new_member, date_added)
|
110
123
|
end
|
111
124
|
end
|
112
125
|
|
113
126
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
114
|
-
|
127
|
+
check_transaction do
|
115
128
|
super(owner, repo, watcher, date_added)
|
116
129
|
end
|
117
130
|
end
|
131
|
+
|
132
|
+
def check_transaction(&block)
|
133
|
+
begin
|
134
|
+
if @db.in_transaction?
|
135
|
+
debug "Transaction already started"
|
136
|
+
yield block
|
137
|
+
else
|
138
|
+
transaction do
|
139
|
+
yield block
|
140
|
+
end
|
141
|
+
end
|
142
|
+
rescue Exception => e
|
143
|
+
puts STDERR, e.message
|
144
|
+
puts STDERR, e.backtrace
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
118
148
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'ghtorrent/ghtorrent'
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/retriever'
|
8
|
+
require 'ghtorrent/commands/ght_retrieve_repo'
|
9
|
+
|
10
|
+
class GHTRetrieveUser < GHTRetrieveRepo
|
11
|
+
|
12
|
+
def prepare_options(options)
|
13
|
+
options.banner <<-BANNER
|
14
|
+
An efficient way to get all data for a single user
|
15
|
+
|
16
|
+
#{command_name} [options] user
|
17
|
+
|
18
|
+
BANNER
|
19
|
+
end
|
20
|
+
|
21
|
+
def validate
|
22
|
+
super
|
23
|
+
Trollop::die "One argument are required" unless args[0] && !args[0].empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def go
|
27
|
+
self.settings = override_config(settings, :mirror_history_pages_back, -1)
|
28
|
+
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
29
|
+
|
30
|
+
if user_entry.nil?
|
31
|
+
Trollop::die "Cannot find user #{ARGV[0]}"
|
32
|
+
end
|
33
|
+
|
34
|
+
user = user_entry[:login]
|
35
|
+
|
36
|
+
def send_message(function, user)
|
37
|
+
begin
|
38
|
+
ght.send(function, user)
|
39
|
+
rescue Exception => e
|
40
|
+
puts STDERR, e.message
|
41
|
+
puts STDERR, e.backtrace
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
functions = %w(ensure_user_followers ensure_orgs)
|
46
|
+
|
47
|
+
if ARGV[2].nil?
|
48
|
+
functions.each do |x|
|
49
|
+
send_message(x, user)
|
50
|
+
end
|
51
|
+
else
|
52
|
+
Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
|
53
|
+
send_message(ARGV[2], user)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class TransactedGHTorrent < GHTorrent::Mirror
|
60
|
+
|
61
|
+
def ensure_user_followers(user)
|
62
|
+
check_transaction do
|
63
|
+
super(user)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def ensure_orgs(user)
|
68
|
+
check_transaction do
|
69
|
+
super(user)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -24,6 +24,8 @@ module GHTorrent
|
|
24
24
|
|
25
25
|
# Get a connection to the database
|
26
26
|
def get_db
|
27
|
+
return @db unless @db.nil?
|
28
|
+
|
27
29
|
Sequel.single_threaded = true
|
28
30
|
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
29
31
|
#@db.loggers << @logger
|
@@ -54,7 +56,6 @@ module GHTorrent
|
|
54
56
|
end
|
55
57
|
|
56
58
|
transaction do
|
57
|
-
ensure_user(user, true, true)
|
58
59
|
ensure_commit(repo, sha, user)
|
59
60
|
end
|
60
61
|
end
|
@@ -78,10 +79,9 @@ module GHTorrent
|
|
78
79
|
# [user] The login of the repository owner
|
79
80
|
# [repo] The name of the repository
|
80
81
|
# [comment_id] The login of the member to add
|
81
|
-
|
82
|
-
def get_commit_comment(user, repo, comment_id, date_added)
|
82
|
+
def get_commit_comment(user, repo, comment_id)
|
83
83
|
transaction do
|
84
|
-
ensure_commit_comment(user, repo, comment_id
|
84
|
+
ensure_commit_comment(user, repo, comment_id)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
@@ -106,8 +106,6 @@ module GHTorrent
|
|
106
106
|
# [date_added] The timestamp that the add event took place
|
107
107
|
def get_follower(follower, followed, date_added)
|
108
108
|
transaction do
|
109
|
-
ensure_user(follower, true, true)
|
110
|
-
ensure_user(followed, true, true)
|
111
109
|
ensure_user_follower(followed, follower, date_added)
|
112
110
|
end
|
113
111
|
end
|
@@ -130,10 +128,9 @@ module GHTorrent
|
|
130
128
|
# [owner] The login of the repository owner
|
131
129
|
# [repo] The name of the repository
|
132
130
|
# [fork_id] The fork item id
|
133
|
-
|
134
|
-
def get_fork(owner, repo, fork_id, date_added)
|
131
|
+
def get_fork(owner, repo, fork_id)
|
135
132
|
transaction do
|
136
|
-
ensure_fork(owner, repo, fork_id
|
133
|
+
ensure_fork(owner, repo, fork_id)
|
137
134
|
end
|
138
135
|
end
|
139
136
|
|
@@ -144,9 +141,9 @@ module GHTorrent
|
|
144
141
|
# [repo] The name of the repository
|
145
142
|
# [fork_id] The fork item id
|
146
143
|
# [date_added] The timestamp that the add event took place
|
147
|
-
def get_pullreq_comment(owner, repo, pullreq_id, comment_id
|
144
|
+
def get_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
148
145
|
transaction do
|
149
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id
|
146
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
150
147
|
end
|
151
148
|
end
|
152
149
|
|
@@ -158,9 +155,9 @@ module GHTorrent
|
|
158
155
|
# [issue_id] The fork item id
|
159
156
|
# [action] The action that took place for the issue
|
160
157
|
# [date_added] The timestamp that the add event took place
|
161
|
-
def get_issue(owner, repo, issue_id
|
158
|
+
def get_issue(owner, repo, issue_id)
|
162
159
|
transaction do
|
163
|
-
ensure_issue(owner, repo, issue_id
|
160
|
+
ensure_issue(owner, repo, issue_id)
|
164
161
|
end
|
165
162
|
end
|
166
163
|
|
@@ -181,7 +178,7 @@ module GHTorrent
|
|
181
178
|
# Make sure a commit exists
|
182
179
|
#
|
183
180
|
def ensure_commit(repo, sha, user, comments = true)
|
184
|
-
ensure_repo(user, repo)
|
181
|
+
ensure_repo(user, repo, false, false, false, false)
|
185
182
|
c = retrieve_commit(repo, sha, user)
|
186
183
|
|
187
184
|
if c.nil?
|
@@ -208,8 +205,9 @@ module GHTorrent
|
|
208
205
|
# [sha] The first commit to start retrieving from. If nil, then the
|
209
206
|
# earliest stored commit will be used instead.
|
210
207
|
# [num_pages] The number of commit pages to retrieve
|
211
|
-
def ensure_commits(user, repo, sha = nil,
|
212
|
-
num_pages = config(:mirror_commit_pages_new_repo)
|
208
|
+
def ensure_commits(user, repo, refresh = false, sha = nil,
|
209
|
+
num_pages = config(:mirror_commit_pages_new_repo)
|
210
|
+
)
|
213
211
|
userid = @db[:users].filter(:login => user).first[:id]
|
214
212
|
repoid = @db[:projects].filter(:owner_id => userid,
|
215
213
|
:name => repo).first[:id]
|
@@ -236,32 +234,37 @@ module GHTorrent
|
|
236
234
|
# in the database.
|
237
235
|
def ensure_parents(commit)
|
238
236
|
commits = @db[:commits]
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
end
|
249
|
-
|
250
|
-
if parent.nil?
|
237
|
+
parents = @db[:commit_parents]
|
238
|
+
commit['parents'].map do |p|
|
239
|
+
url = p['url'].split(/\//)
|
240
|
+
this = commits.first(:sha => commit['sha'])
|
241
|
+
parent = commits.first(:sha => url[7])
|
242
|
+
|
243
|
+
if parent.nil?
|
244
|
+
c = retrieve_commit(url[5], url[7], url[4])
|
245
|
+
if c.nil?
|
251
246
|
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
252
|
-
|
247
|
+
next
|
253
248
|
end
|
249
|
+
parent = store_commit(c, url[5], url[4])
|
250
|
+
end
|
254
251
|
|
255
|
-
|
256
|
-
|
252
|
+
if parent.nil?
|
253
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
254
|
+
next
|
255
|
+
end
|
257
256
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
257
|
+
if parents.first(:commit_id => this[:id],
|
258
|
+
:parent_id => parent[:id]).nil?
|
259
|
+
|
260
|
+
parents.insert(:commit_id => this[:id],
|
261
|
+
:parent_id => parent[:id])
|
262
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
263
|
+
else
|
264
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
264
265
|
end
|
266
|
+
parents.first(:commit_id => this[:id], :parent_id => parent[:id])
|
267
|
+
end
|
265
268
|
end
|
266
269
|
|
267
270
|
##
|
@@ -271,23 +274,27 @@ module GHTorrent
|
|
271
274
|
# [repo] The repo receiving the commit
|
272
275
|
# [sha] The commit SHA
|
273
276
|
def ensure_repo_commit(user, repo, sha)
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
+
project = ensure_repo(user, repo, false, false, false, false)
|
278
|
+
|
279
|
+
if project.nil?
|
280
|
+
warn "GHTorrent: Repo #{user}/#{repo} does not exist"
|
281
|
+
return
|
282
|
+
end
|
283
|
+
|
277
284
|
commitid = @db[:commits].first(:sha => sha)[:id]
|
278
285
|
|
279
|
-
exists = @db[:project_commits].first(:project_id =>
|
286
|
+
exists = @db[:project_commits].first(:project_id => project[:id],
|
280
287
|
:commit_id => commitid)
|
281
288
|
if exists.nil?
|
282
289
|
@db[:project_commits].insert(
|
283
|
-
:project_id =>
|
290
|
+
:project_id => project[:id],
|
284
291
|
:commit_id => commitid
|
285
292
|
)
|
286
|
-
info "GHTorrent:
|
287
|
-
@db[:project_commits].first(:project_id =>
|
293
|
+
info "GHTorrent: Associating commit #{sha} with #{user}/#{repo}"
|
294
|
+
@db[:project_commits].first(:project_id => project[:id],
|
288
295
|
:commit_id => commitid)
|
289
296
|
else
|
290
|
-
debug "GHTorrent: Commit #{user}/#{repo}
|
297
|
+
debug "GHTorrent: Commit #{sha} already associated with #{user}/#{repo}"
|
291
298
|
exists
|
292
299
|
end
|
293
300
|
end
|
@@ -333,6 +340,17 @@ module GHTorrent
|
|
333
340
|
return users.first(:login => byemail[:login])
|
334
341
|
end
|
335
342
|
|
343
|
+
# This means that the user's login has been associated with a
|
344
|
+
# Github user by the time the commit was done (and hence Github was
|
345
|
+
# able to associate the commit to an account), but afterwards the
|
346
|
+
# user has deleted his account (before GHTorrent processed it).
|
347
|
+
# On absense of something better to do, try to find the user by email
|
348
|
+
# and return a "fake" user entry.
|
349
|
+
if added.nil?
|
350
|
+
warn "GHTorrent: User account for user #{login} deleted from Github"
|
351
|
+
return ensure_user("#{name}<#{email}>", false, false)
|
352
|
+
end
|
353
|
+
|
336
354
|
if byemail.nil?
|
337
355
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
338
356
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
@@ -373,10 +391,12 @@ module GHTorrent
|
|
373
391
|
def ensure_user(user, followers, orgs)
|
374
392
|
# Github only supports alpa-nums and dashes in its usernames.
|
375
393
|
# All other sympbols are treated as emails.
|
376
|
-
if not user.match(/^[
|
394
|
+
if not user.match(/^[\w\-]*$/)
|
377
395
|
begin
|
378
396
|
name, email = user.split("<")
|
379
397
|
email = email.split(">")[0]
|
398
|
+
name = name.strip unless name.nil?
|
399
|
+
email = email.strip unless email.nil?
|
380
400
|
rescue Exception
|
381
401
|
raise new GHTorrentException.new("Not a valid email address: #{user}")
|
382
402
|
end
|
@@ -384,7 +404,7 @@ module GHTorrent
|
|
384
404
|
unless is_valid_email(email)
|
385
405
|
warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
|
386
406
|
end
|
387
|
-
u = ensure_user_byemail(email
|
407
|
+
u = ensure_user_byemail(email, name)
|
388
408
|
else
|
389
409
|
u = ensure_user_byuname(user)
|
390
410
|
ensure_user_followers(user) if followers
|
@@ -446,9 +466,8 @@ module GHTorrent
|
|
446
466
|
#
|
447
467
|
# ==Parameters:
|
448
468
|
# [user] The user login to find followers by
|
449
|
-
def ensure_user_followers(followed
|
469
|
+
def ensure_user_followers(followed)
|
450
470
|
curuser = ensure_user(followed, false, false)
|
451
|
-
time = curuser[:created_at]
|
452
471
|
followers = @db.from(:followers, :users).\
|
453
472
|
where(:followers__follower_id => :users__id).
|
454
473
|
where(:followers__user_id => curuser[:id]).select(:login).all
|
@@ -459,12 +478,12 @@ module GHTorrent
|
|
459
478
|
else
|
460
479
|
acc
|
461
480
|
end
|
462
|
-
end.map { |x| ensure_user_follower(followed, x['login']
|
481
|
+
end.map { |x| ensure_user_follower(followed, x['login']) }
|
463
482
|
end
|
464
483
|
|
465
484
|
##
|
466
485
|
# Make sure that a user follows another one
|
467
|
-
def ensure_user_follower(followed, follower, date_added)
|
486
|
+
def ensure_user_follower(followed, follower, date_added = nil)
|
468
487
|
follower_user = ensure_user(follower, false, false)
|
469
488
|
followed_user = ensure_user(followed, false, false)
|
470
489
|
|
@@ -474,14 +493,17 @@ module GHTorrent
|
|
474
493
|
end
|
475
494
|
|
476
495
|
followers = @db[:followers]
|
477
|
-
|
478
|
-
|
496
|
+
follower_id = follower_user[:id]
|
497
|
+
followed_id = followed_user[:id]
|
479
498
|
|
480
499
|
follower_exists = followers.first(:user_id => followed_id,
|
481
500
|
:follower_id => follower_id)
|
482
|
-
|
483
501
|
if follower_exists.nil?
|
484
|
-
added = if date_added.nil?
|
502
|
+
added = if date_added.nil?
|
503
|
+
max(follower_user[:created_at], followed_user[:created_at])
|
504
|
+
else
|
505
|
+
date_added
|
506
|
+
end
|
485
507
|
retrieved = retrieve_user_follower(followed, follower)
|
486
508
|
|
487
509
|
if retrieved.nil?
|
@@ -495,13 +517,17 @@ module GHTorrent
|
|
495
517
|
:ext_ref_id => retrieved[@ext_uniq])
|
496
518
|
info "GHTorrent: User #{follower} follows #{followed}"
|
497
519
|
else
|
498
|
-
|
499
|
-
|
500
|
-
|
520
|
+
debug "GHTorrent: Follower #{follower} exists for user #{followed}"
|
521
|
+
end
|
522
|
+
|
523
|
+
unless date_added.nil?
|
524
|
+
followers.filter(:user_id => followed_id,
|
525
|
+
:follower_id => follower_id)\
|
501
526
|
.update(:created_at => date(date_added))
|
502
|
-
|
503
|
-
end
|
527
|
+
debug "GHTorrent: Updating follower #{followed} -> #{follower}, created_at -> #{date(date_added)}"
|
504
528
|
end
|
529
|
+
|
530
|
+
followers.first(:user_id => followed_id, :follower_id => follower_id)
|
505
531
|
end
|
506
532
|
|
507
533
|
##
|
@@ -529,19 +555,29 @@ module GHTorrent
|
|
529
555
|
:name => name,
|
530
556
|
:login => login,
|
531
557
|
:created_at => Time.now,
|
532
|
-
:ext_ref_id => ""
|
533
|
-
)
|
558
|
+
:ext_ref_id => "")
|
534
559
|
info "GHTorrent: Added fake user #{login} -> #{email}"
|
535
560
|
users.first(:login => login)
|
536
561
|
else
|
537
|
-
users.
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
562
|
+
in_db = users.first(:login => u['login'])
|
563
|
+
if in_db.nil?
|
564
|
+
users.insert(:login => u['login'],
|
565
|
+
:name => u['name'],
|
566
|
+
:company => u['company'],
|
567
|
+
:email => u['email'],
|
568
|
+
:location => u['location'],
|
569
|
+
:created_at => date(u['created_at']),
|
570
|
+
:ext_ref_id => u[@ext_uniq])
|
571
|
+
info "GHTorrent: Found #{email} through search API query"
|
572
|
+
else
|
573
|
+
in_db.update(:name => u['name'],
|
574
|
+
:company => u['company'],
|
575
|
+
:email => u['email'],
|
576
|
+
:location => u['location'],
|
577
|
+
:created_at => date(u['created_at']),
|
578
|
+
:ext_ref_id => u[@ext_uniq])
|
579
|
+
info "GHTorrent: User with email #{email} exists with username #{u['login']}"
|
580
|
+
end
|
545
581
|
users.first(:login => u['login'])
|
546
582
|
end
|
547
583
|
else
|
@@ -560,10 +596,17 @@ module GHTorrent
|
|
560
596
|
# == Returns:
|
561
597
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
562
598
|
# the result is nil
|
563
|
-
def ensure_repo(user, repo, commits = true, project_members = true,
|
599
|
+
def ensure_repo(user, repo, commits = true, project_members = true,
|
600
|
+
watchers = true, forks = true)
|
564
601
|
|
565
602
|
repos = @db[:projects]
|
566
603
|
curuser = ensure_user(user, false, false)
|
604
|
+
|
605
|
+
if curuser.nil?
|
606
|
+
warn "Cannot find user #{user}"
|
607
|
+
return
|
608
|
+
end
|
609
|
+
|
567
610
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
568
611
|
|
569
612
|
if currepo.nil?
|
@@ -575,17 +618,29 @@ module GHTorrent
|
|
575
618
|
end
|
576
619
|
|
577
620
|
repos.insert(:url => r['url'],
|
578
|
-
:owner_id =>
|
621
|
+
:owner_id => curuser[:id],
|
579
622
|
:name => r['name'],
|
580
623
|
:description => r['description'],
|
581
624
|
:language => r['language'],
|
582
625
|
:created_at => date(r['created_at']),
|
583
626
|
:ext_ref_id => r[@ext_uniq])
|
584
627
|
|
585
|
-
|
628
|
+
unless r['parent'].nil?
|
629
|
+
parent_owner = r['parent']['owner']['login']
|
630
|
+
parent_repo = r['parent']['name']
|
631
|
+
|
632
|
+
parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
|
633
|
+
|
634
|
+
repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
|
635
|
+
|
636
|
+
info "Repo #{user}/#{repo} is a fork from #{parent_owner}/#{parent_repo}"
|
637
|
+
end
|
638
|
+
|
639
|
+
info "GHTorrent: New repo #{user}/#{repo}"
|
586
640
|
ensure_commits(user, repo) if commits
|
587
641
|
ensure_project_members(user, repo) if project_members
|
588
642
|
ensure_watchers(user, repo) if watchers
|
643
|
+
ensure_forks(user, repo) if forks
|
589
644
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
590
645
|
else
|
591
646
|
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
@@ -595,8 +650,8 @@ module GHTorrent
|
|
595
650
|
|
596
651
|
##
|
597
652
|
# Make sure that a project has all the registered members defined
|
598
|
-
def ensure_project_members(user, repo)
|
599
|
-
currepo = ensure_repo(user, repo,
|
653
|
+
def ensure_project_members(user, repo, refresh = false)
|
654
|
+
currepo = ensure_repo(user, repo, false, false, false, false)
|
600
655
|
time = currepo[:created_at]
|
601
656
|
|
602
657
|
project_members = @db.from(:project_members, :users).\
|
@@ -616,7 +671,7 @@ module GHTorrent
|
|
616
671
|
# Make sure that a project member exists in a project
|
617
672
|
def ensure_project_member(owner, repo, new_member, date_added)
|
618
673
|
pr_members = @db[:project_members]
|
619
|
-
project = ensure_repo(owner, repo,
|
674
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
620
675
|
new_user = ensure_user(new_member, false, false)
|
621
676
|
|
622
677
|
if project.nil? or new_user.nil?
|
@@ -627,7 +682,11 @@ module GHTorrent
|
|
627
682
|
:repo_id => project[:id])
|
628
683
|
|
629
684
|
if memb_exist.nil?
|
630
|
-
added = if date_added.nil?
|
685
|
+
added = if date_added.nil?
|
686
|
+
max(project[:created_at], new_user[:created_at])
|
687
|
+
else
|
688
|
+
date_added
|
689
|
+
end
|
631
690
|
retrieved = retrieve_repo_collaborator(owner, repo, new_member)
|
632
691
|
|
633
692
|
if retrieved.nil?
|
@@ -644,12 +703,13 @@ module GHTorrent
|
|
644
703
|
info "GHTorrent: Added project member #{repo} -> #{new_member}"
|
645
704
|
else
|
646
705
|
debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
|
647
|
-
|
648
|
-
|
649
|
-
|
706
|
+
end
|
707
|
+
|
708
|
+
unless date_added.nil?
|
709
|
+
pr_members.filter(:user_id => new_user[:id],
|
710
|
+
:repo_id => project[:id])\
|
650
711
|
.update(:created_at => date(date_added))
|
651
|
-
|
652
|
-
end
|
712
|
+
info "GHTorrent: Updating project member #{repo} -> #{new_member}, created_at -> #{date(date_added)}"
|
653
713
|
end
|
654
714
|
end
|
655
715
|
|
@@ -734,7 +794,7 @@ module GHTorrent
|
|
734
794
|
end
|
735
795
|
end
|
736
796
|
|
737
|
-
not_saved.map{|x| ensure_commit_comment(user, repo, x['id']
|
797
|
+
not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
|
738
798
|
end
|
739
799
|
|
740
800
|
##
|
@@ -745,7 +805,7 @@ module GHTorrent
|
|
745
805
|
# [repo] The repository containing the commit whose comment will be retrieved
|
746
806
|
# [id] The comment id to retrieve
|
747
807
|
# [created_at] The timestamp that the comment was made.
|
748
|
-
def ensure_commit_comment(user, repo, id
|
808
|
+
def ensure_commit_comment(user, repo, id)
|
749
809
|
stored_comment = @db[:commit_comments].first(:comment_id => id)
|
750
810
|
|
751
811
|
if stored_comment.nil?
|
@@ -770,21 +830,15 @@ module GHTorrent
|
|
770
830
|
)
|
771
831
|
info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
|
772
832
|
else
|
773
|
-
unless created_at.nil?
|
774
|
-
@db[:commit_comments].filter(:comment_id => id)\
|
775
|
-
.update(:created_at => date(created_at))
|
776
|
-
info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
|
777
|
-
end
|
778
833
|
info "GHTorrent: Commit comment #{id} exists"
|
779
834
|
end
|
780
835
|
@db[:commit_comments].first(:comment_id => id)
|
781
836
|
end
|
782
837
|
|
783
838
|
##
|
784
|
-
# Make sure that
|
785
|
-
def ensure_watchers(owner, repo)
|
786
|
-
currepo = ensure_repo(owner, repo,
|
787
|
-
time = currepo[:created_at]
|
839
|
+
# Make sure that all watchers exist for a repository
|
840
|
+
def ensure_watchers(owner, repo, refresh = false)
|
841
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
788
842
|
|
789
843
|
if currepo.nil?
|
790
844
|
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
@@ -803,13 +857,13 @@ module GHTorrent
|
|
803
857
|
else
|
804
858
|
acc
|
805
859
|
end
|
806
|
-
end.map { |x| ensure_watcher(owner, repo, x['login'],
|
860
|
+
end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
|
807
861
|
end
|
808
862
|
|
809
863
|
##
|
810
|
-
# Make sure that a
|
864
|
+
# Make sure that a watcher/stargazer exists for a repository
|
811
865
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
812
|
-
project = ensure_repo(owner, repo, false, false, false)
|
866
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
813
867
|
new_watcher = ensure_user(watcher, false, false)
|
814
868
|
|
815
869
|
if new_watcher.nil? or project.nil?
|
@@ -818,11 +872,15 @@ module GHTorrent
|
|
818
872
|
end
|
819
873
|
|
820
874
|
watchers = @db[:watchers]
|
821
|
-
|
822
|
-
|
875
|
+
watcher_exist = watchers.first(:user_id => new_watcher[:id],
|
876
|
+
:repo_id => project[:id])
|
823
877
|
|
824
|
-
if
|
825
|
-
added = if date_added.nil?
|
878
|
+
if watcher_exist.nil?
|
879
|
+
added = if date_added.nil?
|
880
|
+
max(project[:created_at], new_watcher[:created_at])
|
881
|
+
else
|
882
|
+
date_added
|
883
|
+
end
|
826
884
|
retrieved = retrieve_watcher(owner, repo, watcher)
|
827
885
|
|
828
886
|
if retrieved.nil?
|
@@ -839,33 +897,42 @@ module GHTorrent
|
|
839
897
|
info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
|
840
898
|
else
|
841
899
|
debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
|
842
|
-
|
843
|
-
|
844
|
-
|
900
|
+
end
|
901
|
+
|
902
|
+
unless date_added.nil?
|
903
|
+
watchers.filter(:user_id => new_watcher[:id],
|
904
|
+
:repo_id => project[:id])\
|
845
905
|
.update(:created_at => date(date_added))
|
846
|
-
|
847
|
-
end
|
906
|
+
info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}, created_at -> #{date_added}"
|
848
907
|
end
|
908
|
+
|
909
|
+
watchers.first(:user_id => new_watcher[:id],
|
910
|
+
:repo_id => project[:id])
|
849
911
|
end
|
850
912
|
|
851
913
|
##
|
852
914
|
# Process all pull requests
|
853
|
-
def ensure_pull_requests(owner, repo)
|
854
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
915
|
+
def ensure_pull_requests(owner, repo, refresh = false)
|
916
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
855
917
|
if currepo.nil?
|
856
918
|
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
857
919
|
return
|
858
920
|
end
|
859
921
|
|
860
|
-
|
922
|
+
raw_pull_reqs = if refresh
|
923
|
+
retrieve_pull_requests(owner, repo, refresh = true)
|
924
|
+
else
|
925
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
|
926
|
+
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
927
|
+
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
928
|
+
acc << x
|
929
|
+
else
|
930
|
+
acc
|
931
|
+
end
|
932
|
+
end
|
933
|
+
end
|
861
934
|
|
862
|
-
|
863
|
-
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
864
|
-
acc << x
|
865
|
-
else
|
866
|
-
acc
|
867
|
-
end
|
868
|
-
end.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
935
|
+
raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
869
936
|
end
|
870
937
|
|
871
938
|
##
|
@@ -875,7 +942,7 @@ module GHTorrent
|
|
875
942
|
state = nil, created_at = nil)
|
876
943
|
pulls_reqs = @db[:pull_requests]
|
877
944
|
|
878
|
-
project = ensure_repo(owner, repo, false, false, false)
|
945
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
879
946
|
|
880
947
|
if project.nil?
|
881
948
|
return
|
@@ -885,15 +952,13 @@ module GHTorrent
|
|
885
952
|
def add_history(id, ts, unq, act)
|
886
953
|
pull_req_history = @db[:pull_request_history]
|
887
954
|
entry = pull_req_history.first(:pull_request_id => id,
|
888
|
-
:
|
955
|
+
:created_at => ts, :action => act)
|
889
956
|
if entry.nil?
|
890
957
|
pull_req_history.insert(:pull_request_id => id, :created_at => ts,
|
891
958
|
:ext_ref_id => unq, :action => act)
|
892
|
-
info "GHTorrent: New pull request (#{id}) history entry (#{act})"
|
959
|
+
info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
893
960
|
else
|
894
|
-
|
895
|
-
:action => act).update(:created_at => ts)
|
896
|
-
info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
961
|
+
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
897
962
|
end
|
898
963
|
end
|
899
964
|
|
@@ -939,7 +1004,7 @@ module GHTorrent
|
|
939
1004
|
|
940
1005
|
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
941
1006
|
retrieved['base']['repo']['name'],
|
942
|
-
false, false, false)
|
1007
|
+
false, false, false, false)
|
943
1008
|
|
944
1009
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
945
1010
|
retrieved['base']['sha'],
|
@@ -955,7 +1020,7 @@ module GHTorrent
|
|
955
1020
|
head_repo = if has_head_repo(retrieved)
|
956
1021
|
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
957
1022
|
retrieved['head']['repo']['name'],
|
958
|
-
false, false, false)
|
1023
|
+
false, false, false, false)
|
959
1024
|
end
|
960
1025
|
|
961
1026
|
head_commit = if not head_repo.nil?
|
@@ -967,7 +1032,12 @@ module GHTorrent
|
|
967
1032
|
|
968
1033
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
969
1034
|
|
970
|
-
merged = if retrieved['merged_at'].nil? then
|
1035
|
+
merged = if retrieved['merged_at'].nil? then
|
1036
|
+
# Check if the pr's commits are in the repository
|
1037
|
+
false
|
1038
|
+
else
|
1039
|
+
true
|
1040
|
+
end
|
971
1041
|
closed = if retrieved['closed_at'].nil? then false else true end
|
972
1042
|
|
973
1043
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
@@ -980,10 +1050,11 @@ module GHTorrent
|
|
980
1050
|
:base_commit_id => base_commit[:id],
|
981
1051
|
:user_id => pull_req_user[:id],
|
982
1052
|
:pullreq_id => pullreq_id,
|
983
|
-
:intra_branch => is_intra_branch(retrieved)
|
1053
|
+
:intra_branch => is_intra_branch(retrieved),
|
1054
|
+
:merged => merged
|
984
1055
|
)
|
985
1056
|
|
986
|
-
info log_msg(retrieved)
|
1057
|
+
info log_msg(retrieved) + " was added"
|
987
1058
|
else
|
988
1059
|
debug log_msg(retrieved) + " exists"
|
989
1060
|
end
|
@@ -1001,22 +1072,21 @@ module GHTorrent
|
|
1001
1072
|
state) unless state.nil?
|
1002
1073
|
|
1003
1074
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
1004
|
-
ensure_pullreq_comments(owner, repo, pullreq_id
|
1075
|
+
ensure_pullreq_comments(owner, repo, pullreq_id) if comments
|
1005
1076
|
|
1006
1077
|
pulls_reqs.first(:base_repo_id => project[:id],
|
1007
1078
|
:pullreq_id => pullreq_id)
|
1008
1079
|
end
|
1009
1080
|
|
1010
|
-
def ensure_pullreq_comments(owner, repo, pullreq_id
|
1011
|
-
currepo = ensure_repo(owner, repo,
|
1012
|
-
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1081
|
+
def ensure_pullreq_comments(owner, repo, pullreq_id)
|
1082
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1013
1083
|
|
1014
1084
|
if currepo.nil?
|
1015
1085
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1016
1086
|
return
|
1017
1087
|
end
|
1018
1088
|
|
1019
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false,
|
1089
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1020
1090
|
|
1021
1091
|
if pull_req.nil?
|
1022
1092
|
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1032,12 +1102,16 @@ module GHTorrent
|
|
1032
1102
|
acc
|
1033
1103
|
end
|
1034
1104
|
end.map { |x|
|
1035
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, x['id']
|
1105
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
|
1036
1106
|
}
|
1037
1107
|
end
|
1038
1108
|
|
1039
|
-
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id
|
1040
|
-
|
1109
|
+
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
1110
|
+
# Commit retrieval is set to false to ensure that no duplicate work
|
1111
|
+
# is done on retrieving a pull request. This has the side effect that
|
1112
|
+
# commits might not be retrieved if a pullreqcomment event gets processed
|
1113
|
+
# before the pullreq event, until the pullreq event has been processed
|
1114
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1041
1115
|
|
1042
1116
|
if pull_req.nil?
|
1043
1117
|
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1084,12 +1158,21 @@ module GHTorrent
|
|
1084
1158
|
end
|
1085
1159
|
|
1086
1160
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1161
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1162
|
+
|
1163
|
+
if pullreq.nil?
|
1164
|
+
warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
|
1165
|
+
return
|
1166
|
+
end
|
1167
|
+
|
1087
1168
|
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1088
|
-
|
1169
|
+
next if c.nil?
|
1170
|
+
head_repo_owner = c['url'].split(/\//)[4]
|
1171
|
+
head_repo_name = c['url'].split(/\//)[5]
|
1172
|
+
x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
|
1089
1173
|
acc << x if not x.nil?
|
1090
1174
|
acc
|
1091
1175
|
}.map { |c|
|
1092
|
-
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1093
1176
|
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1094
1177
|
:commit_id => c[:id])
|
1095
1178
|
if exists.nil?
|
@@ -1110,18 +1193,17 @@ module GHTorrent
|
|
1110
1193
|
# ==Parameters:
|
1111
1194
|
# [owner] The user to which the project belongs
|
1112
1195
|
# [repo] The repository/project to find forks for
|
1113
|
-
def ensure_forks(owner, repo)
|
1114
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
1196
|
+
def ensure_forks(owner, repo, refresh = false)
|
1197
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1115
1198
|
|
1116
1199
|
if currepo.nil?
|
1117
1200
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1118
1201
|
return
|
1119
1202
|
end
|
1120
1203
|
|
1121
|
-
existing_forks = @db.from(:
|
1122
|
-
where(:forks__forked_project_id => :projects__id). \
|
1204
|
+
existing_forks = @db.from(:projects, :users).\
|
1123
1205
|
where(:users__id => :projects__owner_id). \
|
1124
|
-
where(:
|
1206
|
+
where(:projects__forked_from => currepo[:id]).select(:projects__name, :login).all
|
1125
1207
|
|
1126
1208
|
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1127
1209
|
if existing_forks.find {|y|
|
@@ -1138,65 +1220,49 @@ module GHTorrent
|
|
1138
1220
|
|
1139
1221
|
##
|
1140
1222
|
# Make sure that a fork is retrieved for a project
|
1141
|
-
def ensure_fork(owner, repo, fork_id
|
1142
|
-
|
1143
|
-
forks = @db[:forks]
|
1144
|
-
forked = ensure_repo(owner, repo, false, false, false)
|
1145
|
-
fork_exists = forks.first(:fork_id => fork_id)
|
1146
|
-
|
1147
|
-
if fork_exists.nil?
|
1148
|
-
retrieved = retrieve_fork(owner, repo, fork_id)
|
1149
|
-
added = if date_added.nil? then retrieved['created_at'] else date_added end
|
1150
|
-
|
1151
|
-
if retrieved.nil?
|
1152
|
-
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1153
|
-
return
|
1154
|
-
end
|
1223
|
+
def ensure_fork(owner, repo, fork_id)
|
1224
|
+
fork = retrieve_fork(owner, repo, fork_id)
|
1155
1225
|
|
1156
|
-
|
1157
|
-
|
1226
|
+
if fork.nil?
|
1227
|
+
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1228
|
+
return
|
1229
|
+
end
|
1158
1230
|
|
1159
|
-
|
1231
|
+
fork_owner = fork['full_name'].split(/\//)[0]
|
1232
|
+
fork_name = fork['full_name'].split(/\//)[1]
|
1160
1233
|
|
1161
|
-
|
1162
|
-
warn "Could not add fork #{fork_id}"
|
1163
|
-
return
|
1164
|
-
end
|
1234
|
+
r = ensure_repo(fork_owner, fork_name, false, false, false, false)
|
1165
1235
|
|
1166
|
-
|
1167
|
-
|
1168
|
-
:fork_id => fork_id,
|
1169
|
-
:created_at => added,
|
1170
|
-
:ext_ref_id => retrieved[@ext_uniq])
|
1171
|
-
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1236
|
+
if r.nil?
|
1237
|
+
warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
1172
1238
|
else
|
1173
|
-
|
1174
|
-
unless date_added.nil?
|
1175
|
-
forks.filter(:fork_id => fork_id)\
|
1176
|
-
.update(:created_at => date(date_added))
|
1177
|
-
debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
|
1178
|
-
end
|
1239
|
+
info "GHTorrent: Added #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
1179
1240
|
end
|
1180
1241
|
end
|
1181
1242
|
|
1182
1243
|
##
|
1183
1244
|
# Make sure all issues exist for a project
|
1184
|
-
def ensure_issues(owner, repo)
|
1185
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
1245
|
+
def ensure_issues(owner, repo, refresh = false)
|
1246
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1186
1247
|
if currepo.nil?
|
1187
1248
|
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1188
1249
|
return
|
1189
1250
|
end
|
1190
1251
|
|
1191
|
-
|
1252
|
+
raw_issues = if refresh
|
1253
|
+
retrieve_issues(owner, repo, refresh = true)
|
1254
|
+
else
|
1255
|
+
issues = @db[:issues].filter(:repo_id => currepo[:id]).all
|
1256
|
+
retrieve_issues(owner, repo).reduce([]) do |acc, x|
|
1257
|
+
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1258
|
+
acc << x
|
1259
|
+
else
|
1260
|
+
acc
|
1261
|
+
end
|
1262
|
+
end
|
1263
|
+
end
|
1192
1264
|
|
1193
|
-
|
1194
|
-
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1195
|
-
acc << x
|
1196
|
-
else
|
1197
|
-
acc
|
1198
|
-
end
|
1199
|
-
end.map { |x| ensure_issue(owner, repo, x['number']) }
|
1265
|
+
raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
|
1200
1266
|
end
|
1201
1267
|
|
1202
1268
|
##
|
@@ -1204,7 +1270,7 @@ module GHTorrent
|
|
1204
1270
|
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
1205
1271
|
|
1206
1272
|
issues = @db[:issues]
|
1207
|
-
repository = ensure_repo(owner, repo, false, false, false)
|
1273
|
+
repository = ensure_repo(owner, repo, false, false, false, false)
|
1208
1274
|
|
1209
1275
|
if repo.nil?
|
1210
1276
|
warn "Cannot find repo #{owner}/#{repo}"
|
@@ -1214,24 +1280,27 @@ module GHTorrent
|
|
1214
1280
|
cur_issue = issues.first(:issue_id => issue_id,
|
1215
1281
|
:repo_id => repository[:id])
|
1216
1282
|
|
1217
|
-
|
1218
|
-
retrieved = retrieve_issue(owner, repo, issue_id)
|
1283
|
+
retrieved = retrieve_issue(owner, repo, issue_id)
|
1219
1284
|
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1285
|
+
if retrieved.nil?
|
1286
|
+
warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
|
1287
|
+
return
|
1288
|
+
end
|
1289
|
+
|
1290
|
+
# Pull requests and issues share the same issue_id
|
1291
|
+
pull_req = unless retrieved['pull_request'].nil? or
|
1292
|
+
retrieved['pull_request']['patch_url'].nil?
|
1293
|
+
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} is a pull request"
|
1294
|
+
ensure_pull_request(owner, repo, issue_id)
|
1295
|
+
end
|
1296
|
+
|
1297
|
+
if cur_issue.nil?
|
1224
1298
|
|
1225
1299
|
reporter = ensure_user(retrieved['user']['login'], false, false)
|
1226
1300
|
assignee = unless retrieved['assignee'].nil?
|
1227
1301
|
ensure_user(retrieved['assignee']['login'], false, false)
|
1228
1302
|
end
|
1229
1303
|
|
1230
|
-
# Pull requests and issues share the same issue_id
|
1231
|
-
pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
|
1232
|
-
ensure_pull_request(owner, repo, issue_id)
|
1233
|
-
end
|
1234
|
-
|
1235
1304
|
issues.insert(:repo_id => repository[:id],
|
1236
1305
|
:assignee_id => unless assignee.nil? then assignee[:id] end,
|
1237
1306
|
:reporter_id => reporter[:id],
|
@@ -1241,23 +1310,26 @@ module GHTorrent
|
|
1241
1310
|
:created_at => date(retrieved['created_at']),
|
1242
1311
|
:ext_ref_id => retrieved[@ext_uniq])
|
1243
1312
|
|
1244
|
-
ensure_issue_events(owner, repo, issue_id) if events
|
1245
|
-
ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
|
1246
|
-
|
1247
1313
|
info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
|
1248
|
-
issues.first(:issue_id => issue_id,
|
1249
|
-
:repo_id => repository[:id])
|
1250
1314
|
else
|
1251
1315
|
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
|
1252
|
-
cur_issue
|
1316
|
+
if cur_issue[:pull_request] == false and not pull_req.nil?
|
1317
|
+
info "GHTorrent: Updating issue #{owner}/#{repo}->#{issue_id} as pull request"
|
1318
|
+
issues.filter(:issue_id => issue_id, :repo_id => repository[:id]).update(
|
1319
|
+
:pull_request => true,
|
1320
|
+
:pull_request_id => pull_req[:id])
|
1321
|
+
end
|
1253
1322
|
end
|
1323
|
+
ensure_issue_events(owner, repo, issue_id) if events
|
1324
|
+
ensure_issue_comments(owner, repo, issue_id) if comments
|
1325
|
+
issues.first(:issue_id => issue_id,
|
1326
|
+
:repo_id => repository[:id])
|
1254
1327
|
end
|
1255
1328
|
|
1256
1329
|
##
|
1257
1330
|
# Retrieve and process all events for an issue
|
1258
1331
|
def ensure_issue_events(owner, repo, issue_id)
|
1259
|
-
currepo = ensure_repo(owner, repo,
|
1260
|
-
#time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1332
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1261
1333
|
|
1262
1334
|
if currepo.nil?
|
1263
1335
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
@@ -1361,7 +1433,7 @@ module GHTorrent
|
|
1361
1433
|
##
|
1362
1434
|
# Retrieve and process all comments for an issue
|
1363
1435
|
def ensure_issue_comments(owner, repo, issue_id)
|
1364
|
-
currepo = ensure_repo(owner, repo,
|
1436
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1365
1437
|
|
1366
1438
|
if currepo.nil?
|
1367
1439
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
@@ -1390,7 +1462,7 @@ module GHTorrent
|
|
1390
1462
|
##
|
1391
1463
|
# Retrieve and process +comment_id+ for an +issue_id+
|
1392
1464
|
def ensure_issue_comment(owner, repo, issue_id, comment_id)
|
1393
|
-
issue = ensure_issue(owner, repo, issue_id)
|
1465
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1394
1466
|
|
1395
1467
|
if issue.nil?
|
1396
1468
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1467,14 +1539,13 @@ module GHTorrent
|
|
1467
1539
|
repository = ensure_repo(user, repo, false, false, false)
|
1468
1540
|
|
1469
1541
|
if repository.nil?
|
1470
|
-
warn "
|
1471
|
-
return
|
1542
|
+
warn "GHTorrent: repository #{user}/#{repo} deleted"
|
1472
1543
|
end
|
1473
1544
|
|
1474
1545
|
commits.insert(:sha => c['sha'],
|
1475
1546
|
:author_id => author[:id],
|
1476
1547
|
:committer_id => commiter[:id],
|
1477
|
-
:project_id => repository[:id],
|
1548
|
+
:project_id => if repository.nil? then nil else repository[:id] end ,
|
1478
1549
|
:created_at => date(c['commit']['author']['date']),
|
1479
1550
|
:ext_ref_id => c[@ext_uniq]
|
1480
1551
|
)
|
@@ -1513,6 +1584,14 @@ module GHTorrent
|
|
1513
1584
|
def is_valid_email(email)
|
1514
1585
|
email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
|
1515
1586
|
end
|
1587
|
+
|
1588
|
+
def max(a, b)
|
1589
|
+
if a >= b
|
1590
|
+
a
|
1591
|
+
else
|
1592
|
+
b
|
1593
|
+
end
|
1594
|
+
end
|
1516
1595
|
end
|
1517
1596
|
end
|
1518
1597
|
|