ghtorrent 0.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -0
- data/Gemfile +1 -11
- data/Gemfile.lock +27 -29
- data/README.md +10 -14
- data/bin/ght-mirror-events +0 -0
- data/bin/ght-process-event +0 -0
- data/bin/ght-retrieve-repo +0 -0
- data/bin/ght-retrieve-user +6 -0
- data/lib/ghtorrent.rb +1 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +6 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +8 -0
- data/lib/ghtorrent/api_client.rb +8 -29
- data/lib/ghtorrent/command.rb +1 -3
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +5 -10
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +28 -17
- data/lib/ghtorrent/commands/ght_load.rb +2 -2
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +45 -15
- data/lib/ghtorrent/commands/ght_retrieve_user.rb +72 -0
- data/lib/ghtorrent/ghtorrent.rb +288 -209
- data/lib/ghtorrent/migrations/012_add_forks_to_projects.rb +31 -0
- data/lib/ghtorrent/migrations/013_add_merged_to_pullreqs.rb +39 -0
- data/lib/ghtorrent/migrations/014_add_deleted_to_projects.rb +21 -0
- data/lib/ghtorrent/retriever.rb +90 -25
- data/lib/ghtorrent/settings.rb +44 -6
- data/lib/version.rb +2 -2
- metadata +52 -84
- data/bin/ght-periodic-dump +0 -130
- data/bin/ght-torrent-index +0 -150
- data/test/callstack_test.rb +0 -67
@@ -122,11 +122,11 @@ Loads object ids from a collection to a queue for further processing.
|
|
122
122
|
connection.close { EventMachine.stop }
|
123
123
|
}
|
124
124
|
|
125
|
-
# Read next
|
125
|
+
# Read next 100000 items and queue them
|
126
126
|
read_and_publish = Proc.new {
|
127
127
|
|
128
128
|
to_read = if options.number == -1
|
129
|
-
|
129
|
+
100000
|
130
130
|
else
|
131
131
|
if options.number - num_read - 1 <= 0
|
132
132
|
-1
|
@@ -46,31 +46,44 @@ An efficient way to get all data for a single repo
|
|
46
46
|
end
|
47
47
|
|
48
48
|
def go
|
49
|
+
self.settings = override_config(settings, :mirror_history_pages_back, -1)
|
49
50
|
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
50
51
|
|
51
52
|
if user_entry.nil?
|
52
|
-
Trollop::die "Cannot find user #{
|
53
|
+
Trollop::die "Cannot find user #{ARGV[0]}"
|
53
54
|
end
|
54
55
|
|
55
56
|
user = user_entry[:login]
|
56
57
|
|
57
|
-
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
|
58
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
|
59
|
+
false, false)}
|
58
60
|
|
59
61
|
if repo_entry.nil?
|
60
|
-
Trollop::die "Cannot find repository #{
|
62
|
+
Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
|
61
63
|
end
|
62
64
|
|
63
65
|
repo = repo_entry[:name]
|
64
66
|
|
65
|
-
|
66
|
-
ensure_issues ensure_project_members ensure_watchers).each {|x|
|
67
|
+
def send_message(function, user, repo)
|
67
68
|
begin
|
68
|
-
ght.send(
|
69
|
+
ght.send(function, user, repo, refresh = true)
|
69
70
|
rescue Exception => e
|
70
71
|
puts STDERR, e.message
|
71
72
|
puts STDERR, e.backtrace
|
72
73
|
end
|
73
|
-
|
74
|
+
end
|
75
|
+
|
76
|
+
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
77
|
+
ensure_issues ensure_project_members ensure_watchers)
|
78
|
+
|
79
|
+
if ARGV[2].nil?
|
80
|
+
functions.each do |x|
|
81
|
+
send_message(x, user, repo)
|
82
|
+
end
|
83
|
+
else
|
84
|
+
Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
|
85
|
+
send_message(ARGV[2], user, repo)
|
86
|
+
end
|
74
87
|
end
|
75
88
|
end
|
76
89
|
|
@@ -79,40 +92,57 @@ end
|
|
79
92
|
class TransactedGHTorrent < GHTorrent::Mirror
|
80
93
|
|
81
94
|
def ensure_commit(repo, sha, user, comments = true)
|
82
|
-
|
95
|
+
check_transaction do
|
83
96
|
super(repo, sha, user, comments)
|
84
97
|
end
|
85
98
|
end
|
86
99
|
|
87
|
-
def ensure_fork(owner, repo, fork_id
|
88
|
-
|
89
|
-
super(owner, repo, fork_id
|
100
|
+
def ensure_fork(owner, repo, fork_id)
|
101
|
+
check_transaction do
|
102
|
+
super(owner, repo, fork_id)
|
90
103
|
end
|
91
104
|
end
|
92
105
|
|
93
106
|
def ensure_pull_request(owner, repo, pullreq_id,
|
94
107
|
comments = true, commits = true,
|
95
108
|
state = nil, created_at = nil)
|
96
|
-
|
109
|
+
check_transaction do
|
97
110
|
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
98
111
|
end
|
99
112
|
end
|
100
113
|
|
101
114
|
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
102
|
-
|
115
|
+
check_transaction do
|
103
116
|
super(owner, repo, issue_id, events, comments)
|
104
117
|
end
|
105
118
|
end
|
106
119
|
|
107
120
|
def ensure_project_member(owner, repo, new_member, date_added)
|
108
|
-
|
121
|
+
check_transaction do
|
109
122
|
super(owner, repo, new_member, date_added)
|
110
123
|
end
|
111
124
|
end
|
112
125
|
|
113
126
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
114
|
-
|
127
|
+
check_transaction do
|
115
128
|
super(owner, repo, watcher, date_added)
|
116
129
|
end
|
117
130
|
end
|
131
|
+
|
132
|
+
def check_transaction(&block)
|
133
|
+
begin
|
134
|
+
if @db.in_transaction?
|
135
|
+
debug "Transaction already started"
|
136
|
+
yield block
|
137
|
+
else
|
138
|
+
transaction do
|
139
|
+
yield block
|
140
|
+
end
|
141
|
+
end
|
142
|
+
rescue Exception => e
|
143
|
+
puts STDERR, e.message
|
144
|
+
puts STDERR, e.backtrace
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
118
148
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'ghtorrent/ghtorrent'
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/retriever'
|
8
|
+
require 'ghtorrent/commands/ght_retrieve_repo'
|
9
|
+
|
10
|
+
class GHTRetrieveUser < GHTRetrieveRepo
|
11
|
+
|
12
|
+
def prepare_options(options)
|
13
|
+
options.banner <<-BANNER
|
14
|
+
An efficient way to get all data for a single user
|
15
|
+
|
16
|
+
#{command_name} [options] user
|
17
|
+
|
18
|
+
BANNER
|
19
|
+
end
|
20
|
+
|
21
|
+
def validate
|
22
|
+
super
|
23
|
+
Trollop::die "One argument are required" unless args[0] && !args[0].empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def go
|
27
|
+
self.settings = override_config(settings, :mirror_history_pages_back, -1)
|
28
|
+
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
29
|
+
|
30
|
+
if user_entry.nil?
|
31
|
+
Trollop::die "Cannot find user #{ARGV[0]}"
|
32
|
+
end
|
33
|
+
|
34
|
+
user = user_entry[:login]
|
35
|
+
|
36
|
+
def send_message(function, user)
|
37
|
+
begin
|
38
|
+
ght.send(function, user)
|
39
|
+
rescue Exception => e
|
40
|
+
puts STDERR, e.message
|
41
|
+
puts STDERR, e.backtrace
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
functions = %w(ensure_user_followers ensure_orgs)
|
46
|
+
|
47
|
+
if ARGV[2].nil?
|
48
|
+
functions.each do |x|
|
49
|
+
send_message(x, user)
|
50
|
+
end
|
51
|
+
else
|
52
|
+
Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
|
53
|
+
send_message(ARGV[2], user)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class TransactedGHTorrent < GHTorrent::Mirror
|
60
|
+
|
61
|
+
def ensure_user_followers(user)
|
62
|
+
check_transaction do
|
63
|
+
super(user)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def ensure_orgs(user)
|
68
|
+
check_transaction do
|
69
|
+
super(user)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -24,6 +24,8 @@ module GHTorrent
|
|
24
24
|
|
25
25
|
# Get a connection to the database
|
26
26
|
def get_db
|
27
|
+
return @db unless @db.nil?
|
28
|
+
|
27
29
|
Sequel.single_threaded = true
|
28
30
|
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
29
31
|
#@db.loggers << @logger
|
@@ -54,7 +56,6 @@ module GHTorrent
|
|
54
56
|
end
|
55
57
|
|
56
58
|
transaction do
|
57
|
-
ensure_user(user, true, true)
|
58
59
|
ensure_commit(repo, sha, user)
|
59
60
|
end
|
60
61
|
end
|
@@ -78,10 +79,9 @@ module GHTorrent
|
|
78
79
|
# [user] The login of the repository owner
|
79
80
|
# [repo] The name of the repository
|
80
81
|
# [comment_id] The login of the member to add
|
81
|
-
|
82
|
-
def get_commit_comment(user, repo, comment_id, date_added)
|
82
|
+
def get_commit_comment(user, repo, comment_id)
|
83
83
|
transaction do
|
84
|
-
ensure_commit_comment(user, repo, comment_id
|
84
|
+
ensure_commit_comment(user, repo, comment_id)
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
@@ -106,8 +106,6 @@ module GHTorrent
|
|
106
106
|
# [date_added] The timestamp that the add event took place
|
107
107
|
def get_follower(follower, followed, date_added)
|
108
108
|
transaction do
|
109
|
-
ensure_user(follower, true, true)
|
110
|
-
ensure_user(followed, true, true)
|
111
109
|
ensure_user_follower(followed, follower, date_added)
|
112
110
|
end
|
113
111
|
end
|
@@ -130,10 +128,9 @@ module GHTorrent
|
|
130
128
|
# [owner] The login of the repository owner
|
131
129
|
# [repo] The name of the repository
|
132
130
|
# [fork_id] The fork item id
|
133
|
-
|
134
|
-
def get_fork(owner, repo, fork_id, date_added)
|
131
|
+
def get_fork(owner, repo, fork_id)
|
135
132
|
transaction do
|
136
|
-
ensure_fork(owner, repo, fork_id
|
133
|
+
ensure_fork(owner, repo, fork_id)
|
137
134
|
end
|
138
135
|
end
|
139
136
|
|
@@ -144,9 +141,9 @@ module GHTorrent
|
|
144
141
|
# [repo] The name of the repository
|
145
142
|
# [fork_id] The fork item id
|
146
143
|
# [date_added] The timestamp that the add event took place
|
147
|
-
def get_pullreq_comment(owner, repo, pullreq_id, comment_id
|
144
|
+
def get_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
148
145
|
transaction do
|
149
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id
|
146
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
150
147
|
end
|
151
148
|
end
|
152
149
|
|
@@ -158,9 +155,9 @@ module GHTorrent
|
|
158
155
|
# [issue_id] The fork item id
|
159
156
|
# [action] The action that took place for the issue
|
160
157
|
# [date_added] The timestamp that the add event took place
|
161
|
-
def get_issue(owner, repo, issue_id
|
158
|
+
def get_issue(owner, repo, issue_id)
|
162
159
|
transaction do
|
163
|
-
ensure_issue(owner, repo, issue_id
|
160
|
+
ensure_issue(owner, repo, issue_id)
|
164
161
|
end
|
165
162
|
end
|
166
163
|
|
@@ -181,7 +178,7 @@ module GHTorrent
|
|
181
178
|
# Make sure a commit exists
|
182
179
|
#
|
183
180
|
def ensure_commit(repo, sha, user, comments = true)
|
184
|
-
ensure_repo(user, repo)
|
181
|
+
ensure_repo(user, repo, false, false, false, false)
|
185
182
|
c = retrieve_commit(repo, sha, user)
|
186
183
|
|
187
184
|
if c.nil?
|
@@ -208,8 +205,9 @@ module GHTorrent
|
|
208
205
|
# [sha] The first commit to start retrieving from. If nil, then the
|
209
206
|
# earliest stored commit will be used instead.
|
210
207
|
# [num_pages] The number of commit pages to retrieve
|
211
|
-
def ensure_commits(user, repo, sha = nil,
|
212
|
-
num_pages = config(:mirror_commit_pages_new_repo)
|
208
|
+
def ensure_commits(user, repo, refresh = false, sha = nil,
|
209
|
+
num_pages = config(:mirror_commit_pages_new_repo)
|
210
|
+
)
|
213
211
|
userid = @db[:users].filter(:login => user).first[:id]
|
214
212
|
repoid = @db[:projects].filter(:owner_id => userid,
|
215
213
|
:name => repo).first[:id]
|
@@ -236,32 +234,37 @@ module GHTorrent
|
|
236
234
|
# in the database.
|
237
235
|
def ensure_parents(commit)
|
238
236
|
commits = @db[:commits]
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
end
|
249
|
-
|
250
|
-
if parent.nil?
|
237
|
+
parents = @db[:commit_parents]
|
238
|
+
commit['parents'].map do |p|
|
239
|
+
url = p['url'].split(/\//)
|
240
|
+
this = commits.first(:sha => commit['sha'])
|
241
|
+
parent = commits.first(:sha => url[7])
|
242
|
+
|
243
|
+
if parent.nil?
|
244
|
+
c = retrieve_commit(url[5], url[7], url[4])
|
245
|
+
if c.nil?
|
251
246
|
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
252
|
-
|
247
|
+
next
|
253
248
|
end
|
249
|
+
parent = store_commit(c, url[5], url[4])
|
250
|
+
end
|
254
251
|
|
255
|
-
|
256
|
-
|
252
|
+
if parent.nil?
|
253
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
254
|
+
next
|
255
|
+
end
|
257
256
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
257
|
+
if parents.first(:commit_id => this[:id],
|
258
|
+
:parent_id => parent[:id]).nil?
|
259
|
+
|
260
|
+
parents.insert(:commit_id => this[:id],
|
261
|
+
:parent_id => parent[:id])
|
262
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
263
|
+
else
|
264
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
264
265
|
end
|
266
|
+
parents.first(:commit_id => this[:id], :parent_id => parent[:id])
|
267
|
+
end
|
265
268
|
end
|
266
269
|
|
267
270
|
##
|
@@ -271,23 +274,27 @@ module GHTorrent
|
|
271
274
|
# [repo] The repo receiving the commit
|
272
275
|
# [sha] The commit SHA
|
273
276
|
def ensure_repo_commit(user, repo, sha)
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
+
project = ensure_repo(user, repo, false, false, false, false)
|
278
|
+
|
279
|
+
if project.nil?
|
280
|
+
warn "GHTorrent: Repo #{user}/#{repo} does not exist"
|
281
|
+
return
|
282
|
+
end
|
283
|
+
|
277
284
|
commitid = @db[:commits].first(:sha => sha)[:id]
|
278
285
|
|
279
|
-
exists = @db[:project_commits].first(:project_id =>
|
286
|
+
exists = @db[:project_commits].first(:project_id => project[:id],
|
280
287
|
:commit_id => commitid)
|
281
288
|
if exists.nil?
|
282
289
|
@db[:project_commits].insert(
|
283
|
-
:project_id =>
|
290
|
+
:project_id => project[:id],
|
284
291
|
:commit_id => commitid
|
285
292
|
)
|
286
|
-
info "GHTorrent:
|
287
|
-
@db[:project_commits].first(:project_id =>
|
293
|
+
info "GHTorrent: Associating commit #{sha} with #{user}/#{repo}"
|
294
|
+
@db[:project_commits].first(:project_id => project[:id],
|
288
295
|
:commit_id => commitid)
|
289
296
|
else
|
290
|
-
debug "GHTorrent: Commit #{user}/#{repo}
|
297
|
+
debug "GHTorrent: Commit #{sha} already associated with #{user}/#{repo}"
|
291
298
|
exists
|
292
299
|
end
|
293
300
|
end
|
@@ -333,6 +340,17 @@ module GHTorrent
|
|
333
340
|
return users.first(:login => byemail[:login])
|
334
341
|
end
|
335
342
|
|
343
|
+
# This means that the user's login has been associated with a
|
344
|
+
# Github user by the time the commit was done (and hence Github was
|
345
|
+
# able to associate the commit to an account), but afterwards the
|
346
|
+
# user has deleted his account (before GHTorrent processed it).
|
347
|
+
# On absense of something better to do, try to find the user by email
|
348
|
+
# and return a "fake" user entry.
|
349
|
+
if added.nil?
|
350
|
+
warn "GHTorrent: User account for user #{login} deleted from Github"
|
351
|
+
return ensure_user("#{name}<#{email}>", false, false)
|
352
|
+
end
|
353
|
+
|
336
354
|
if byemail.nil?
|
337
355
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
338
356
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
@@ -373,10 +391,12 @@ module GHTorrent
|
|
373
391
|
def ensure_user(user, followers, orgs)
|
374
392
|
# Github only supports alpa-nums and dashes in its usernames.
|
375
393
|
# All other sympbols are treated as emails.
|
376
|
-
if not user.match(/^[
|
394
|
+
if not user.match(/^[\w\-]*$/)
|
377
395
|
begin
|
378
396
|
name, email = user.split("<")
|
379
397
|
email = email.split(">")[0]
|
398
|
+
name = name.strip unless name.nil?
|
399
|
+
email = email.strip unless email.nil?
|
380
400
|
rescue Exception
|
381
401
|
raise new GHTorrentException.new("Not a valid email address: #{user}")
|
382
402
|
end
|
@@ -384,7 +404,7 @@ module GHTorrent
|
|
384
404
|
unless is_valid_email(email)
|
385
405
|
warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
|
386
406
|
end
|
387
|
-
u = ensure_user_byemail(email
|
407
|
+
u = ensure_user_byemail(email, name)
|
388
408
|
else
|
389
409
|
u = ensure_user_byuname(user)
|
390
410
|
ensure_user_followers(user) if followers
|
@@ -446,9 +466,8 @@ module GHTorrent
|
|
446
466
|
#
|
447
467
|
# ==Parameters:
|
448
468
|
# [user] The user login to find followers by
|
449
|
-
def ensure_user_followers(followed
|
469
|
+
def ensure_user_followers(followed)
|
450
470
|
curuser = ensure_user(followed, false, false)
|
451
|
-
time = curuser[:created_at]
|
452
471
|
followers = @db.from(:followers, :users).\
|
453
472
|
where(:followers__follower_id => :users__id).
|
454
473
|
where(:followers__user_id => curuser[:id]).select(:login).all
|
@@ -459,12 +478,12 @@ module GHTorrent
|
|
459
478
|
else
|
460
479
|
acc
|
461
480
|
end
|
462
|
-
end.map { |x| ensure_user_follower(followed, x['login']
|
481
|
+
end.map { |x| ensure_user_follower(followed, x['login']) }
|
463
482
|
end
|
464
483
|
|
465
484
|
##
|
466
485
|
# Make sure that a user follows another one
|
467
|
-
def ensure_user_follower(followed, follower, date_added)
|
486
|
+
def ensure_user_follower(followed, follower, date_added = nil)
|
468
487
|
follower_user = ensure_user(follower, false, false)
|
469
488
|
followed_user = ensure_user(followed, false, false)
|
470
489
|
|
@@ -474,14 +493,17 @@ module GHTorrent
|
|
474
493
|
end
|
475
494
|
|
476
495
|
followers = @db[:followers]
|
477
|
-
|
478
|
-
|
496
|
+
follower_id = follower_user[:id]
|
497
|
+
followed_id = followed_user[:id]
|
479
498
|
|
480
499
|
follower_exists = followers.first(:user_id => followed_id,
|
481
500
|
:follower_id => follower_id)
|
482
|
-
|
483
501
|
if follower_exists.nil?
|
484
|
-
added = if date_added.nil?
|
502
|
+
added = if date_added.nil?
|
503
|
+
max(follower_user[:created_at], followed_user[:created_at])
|
504
|
+
else
|
505
|
+
date_added
|
506
|
+
end
|
485
507
|
retrieved = retrieve_user_follower(followed, follower)
|
486
508
|
|
487
509
|
if retrieved.nil?
|
@@ -495,13 +517,17 @@ module GHTorrent
|
|
495
517
|
:ext_ref_id => retrieved[@ext_uniq])
|
496
518
|
info "GHTorrent: User #{follower} follows #{followed}"
|
497
519
|
else
|
498
|
-
|
499
|
-
|
500
|
-
|
520
|
+
debug "GHTorrent: Follower #{follower} exists for user #{followed}"
|
521
|
+
end
|
522
|
+
|
523
|
+
unless date_added.nil?
|
524
|
+
followers.filter(:user_id => followed_id,
|
525
|
+
:follower_id => follower_id)\
|
501
526
|
.update(:created_at => date(date_added))
|
502
|
-
|
503
|
-
end
|
527
|
+
debug "GHTorrent: Updating follower #{followed} -> #{follower}, created_at -> #{date(date_added)}"
|
504
528
|
end
|
529
|
+
|
530
|
+
followers.first(:user_id => followed_id, :follower_id => follower_id)
|
505
531
|
end
|
506
532
|
|
507
533
|
##
|
@@ -529,19 +555,29 @@ module GHTorrent
|
|
529
555
|
:name => name,
|
530
556
|
:login => login,
|
531
557
|
:created_at => Time.now,
|
532
|
-
:ext_ref_id => ""
|
533
|
-
)
|
558
|
+
:ext_ref_id => "")
|
534
559
|
info "GHTorrent: Added fake user #{login} -> #{email}"
|
535
560
|
users.first(:login => login)
|
536
561
|
else
|
537
|
-
users.
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
562
|
+
in_db = users.first(:login => u['login'])
|
563
|
+
if in_db.nil?
|
564
|
+
users.insert(:login => u['login'],
|
565
|
+
:name => u['name'],
|
566
|
+
:company => u['company'],
|
567
|
+
:email => u['email'],
|
568
|
+
:location => u['location'],
|
569
|
+
:created_at => date(u['created_at']),
|
570
|
+
:ext_ref_id => u[@ext_uniq])
|
571
|
+
info "GHTorrent: Found #{email} through search API query"
|
572
|
+
else
|
573
|
+
in_db.update(:name => u['name'],
|
574
|
+
:company => u['company'],
|
575
|
+
:email => u['email'],
|
576
|
+
:location => u['location'],
|
577
|
+
:created_at => date(u['created_at']),
|
578
|
+
:ext_ref_id => u[@ext_uniq])
|
579
|
+
info "GHTorrent: User with email #{email} exists with username #{u['login']}"
|
580
|
+
end
|
545
581
|
users.first(:login => u['login'])
|
546
582
|
end
|
547
583
|
else
|
@@ -560,10 +596,17 @@ module GHTorrent
|
|
560
596
|
# == Returns:
|
561
597
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
562
598
|
# the result is nil
|
563
|
-
def ensure_repo(user, repo, commits = true, project_members = true,
|
599
|
+
def ensure_repo(user, repo, commits = true, project_members = true,
|
600
|
+
watchers = true, forks = true)
|
564
601
|
|
565
602
|
repos = @db[:projects]
|
566
603
|
curuser = ensure_user(user, false, false)
|
604
|
+
|
605
|
+
if curuser.nil?
|
606
|
+
warn "Cannot find user #{user}"
|
607
|
+
return
|
608
|
+
end
|
609
|
+
|
567
610
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
568
611
|
|
569
612
|
if currepo.nil?
|
@@ -575,17 +618,29 @@ module GHTorrent
|
|
575
618
|
end
|
576
619
|
|
577
620
|
repos.insert(:url => r['url'],
|
578
|
-
:owner_id =>
|
621
|
+
:owner_id => curuser[:id],
|
579
622
|
:name => r['name'],
|
580
623
|
:description => r['description'],
|
581
624
|
:language => r['language'],
|
582
625
|
:created_at => date(r['created_at']),
|
583
626
|
:ext_ref_id => r[@ext_uniq])
|
584
627
|
|
585
|
-
|
628
|
+
unless r['parent'].nil?
|
629
|
+
parent_owner = r['parent']['owner']['login']
|
630
|
+
parent_repo = r['parent']['name']
|
631
|
+
|
632
|
+
parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
|
633
|
+
|
634
|
+
repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
|
635
|
+
|
636
|
+
info "Repo #{user}/#{repo} is a fork from #{parent_owner}/#{parent_repo}"
|
637
|
+
end
|
638
|
+
|
639
|
+
info "GHTorrent: New repo #{user}/#{repo}"
|
586
640
|
ensure_commits(user, repo) if commits
|
587
641
|
ensure_project_members(user, repo) if project_members
|
588
642
|
ensure_watchers(user, repo) if watchers
|
643
|
+
ensure_forks(user, repo) if forks
|
589
644
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
590
645
|
else
|
591
646
|
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
@@ -595,8 +650,8 @@ module GHTorrent
|
|
595
650
|
|
596
651
|
##
|
597
652
|
# Make sure that a project has all the registered members defined
|
598
|
-
def ensure_project_members(user, repo)
|
599
|
-
currepo = ensure_repo(user, repo,
|
653
|
+
def ensure_project_members(user, repo, refresh = false)
|
654
|
+
currepo = ensure_repo(user, repo, false, false, false, false)
|
600
655
|
time = currepo[:created_at]
|
601
656
|
|
602
657
|
project_members = @db.from(:project_members, :users).\
|
@@ -616,7 +671,7 @@ module GHTorrent
|
|
616
671
|
# Make sure that a project member exists in a project
|
617
672
|
def ensure_project_member(owner, repo, new_member, date_added)
|
618
673
|
pr_members = @db[:project_members]
|
619
|
-
project = ensure_repo(owner, repo,
|
674
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
620
675
|
new_user = ensure_user(new_member, false, false)
|
621
676
|
|
622
677
|
if project.nil? or new_user.nil?
|
@@ -627,7 +682,11 @@ module GHTorrent
|
|
627
682
|
:repo_id => project[:id])
|
628
683
|
|
629
684
|
if memb_exist.nil?
|
630
|
-
added = if date_added.nil?
|
685
|
+
added = if date_added.nil?
|
686
|
+
max(project[:created_at], new_user[:created_at])
|
687
|
+
else
|
688
|
+
date_added
|
689
|
+
end
|
631
690
|
retrieved = retrieve_repo_collaborator(owner, repo, new_member)
|
632
691
|
|
633
692
|
if retrieved.nil?
|
@@ -644,12 +703,13 @@ module GHTorrent
|
|
644
703
|
info "GHTorrent: Added project member #{repo} -> #{new_member}"
|
645
704
|
else
|
646
705
|
debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
|
647
|
-
|
648
|
-
|
649
|
-
|
706
|
+
end
|
707
|
+
|
708
|
+
unless date_added.nil?
|
709
|
+
pr_members.filter(:user_id => new_user[:id],
|
710
|
+
:repo_id => project[:id])\
|
650
711
|
.update(:created_at => date(date_added))
|
651
|
-
|
652
|
-
end
|
712
|
+
info "GHTorrent: Updating project member #{repo} -> #{new_member}, created_at -> #{date(date_added)}"
|
653
713
|
end
|
654
714
|
end
|
655
715
|
|
@@ -734,7 +794,7 @@ module GHTorrent
|
|
734
794
|
end
|
735
795
|
end
|
736
796
|
|
737
|
-
not_saved.map{|x| ensure_commit_comment(user, repo, x['id']
|
797
|
+
not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
|
738
798
|
end
|
739
799
|
|
740
800
|
##
|
@@ -745,7 +805,7 @@ module GHTorrent
|
|
745
805
|
# [repo] The repository containing the commit whose comment will be retrieved
|
746
806
|
# [id] The comment id to retrieve
|
747
807
|
# [created_at] The timestamp that the comment was made.
|
748
|
-
def ensure_commit_comment(user, repo, id
|
808
|
+
def ensure_commit_comment(user, repo, id)
|
749
809
|
stored_comment = @db[:commit_comments].first(:comment_id => id)
|
750
810
|
|
751
811
|
if stored_comment.nil?
|
@@ -770,21 +830,15 @@ module GHTorrent
|
|
770
830
|
)
|
771
831
|
info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
|
772
832
|
else
|
773
|
-
unless created_at.nil?
|
774
|
-
@db[:commit_comments].filter(:comment_id => id)\
|
775
|
-
.update(:created_at => date(created_at))
|
776
|
-
info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
|
777
|
-
end
|
778
833
|
info "GHTorrent: Commit comment #{id} exists"
|
779
834
|
end
|
780
835
|
@db[:commit_comments].first(:comment_id => id)
|
781
836
|
end
|
782
837
|
|
783
838
|
##
|
784
|
-
# Make sure that
|
785
|
-
def ensure_watchers(owner, repo)
|
786
|
-
currepo = ensure_repo(owner, repo,
|
787
|
-
time = currepo[:created_at]
|
839
|
+
# Make sure that all watchers exist for a repository
|
840
|
+
def ensure_watchers(owner, repo, refresh = false)
|
841
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
788
842
|
|
789
843
|
if currepo.nil?
|
790
844
|
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
@@ -803,13 +857,13 @@ module GHTorrent
|
|
803
857
|
else
|
804
858
|
acc
|
805
859
|
end
|
806
|
-
end.map { |x| ensure_watcher(owner, repo, x['login'],
|
860
|
+
end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
|
807
861
|
end
|
808
862
|
|
809
863
|
##
|
810
|
-
# Make sure that a
|
864
|
+
# Make sure that a watcher/stargazer exists for a repository
|
811
865
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
812
|
-
project = ensure_repo(owner, repo, false, false, false)
|
866
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
813
867
|
new_watcher = ensure_user(watcher, false, false)
|
814
868
|
|
815
869
|
if new_watcher.nil? or project.nil?
|
@@ -818,11 +872,15 @@ module GHTorrent
|
|
818
872
|
end
|
819
873
|
|
820
874
|
watchers = @db[:watchers]
|
821
|
-
|
822
|
-
|
875
|
+
watcher_exist = watchers.first(:user_id => new_watcher[:id],
|
876
|
+
:repo_id => project[:id])
|
823
877
|
|
824
|
-
if
|
825
|
-
added = if date_added.nil?
|
878
|
+
if watcher_exist.nil?
|
879
|
+
added = if date_added.nil?
|
880
|
+
max(project[:created_at], new_watcher[:created_at])
|
881
|
+
else
|
882
|
+
date_added
|
883
|
+
end
|
826
884
|
retrieved = retrieve_watcher(owner, repo, watcher)
|
827
885
|
|
828
886
|
if retrieved.nil?
|
@@ -839,33 +897,42 @@ module GHTorrent
|
|
839
897
|
info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
|
840
898
|
else
|
841
899
|
debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
|
842
|
-
|
843
|
-
|
844
|
-
|
900
|
+
end
|
901
|
+
|
902
|
+
unless date_added.nil?
|
903
|
+
watchers.filter(:user_id => new_watcher[:id],
|
904
|
+
:repo_id => project[:id])\
|
845
905
|
.update(:created_at => date(date_added))
|
846
|
-
|
847
|
-
end
|
906
|
+
info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}, created_at -> #{date_added}"
|
848
907
|
end
|
908
|
+
|
909
|
+
watchers.first(:user_id => new_watcher[:id],
|
910
|
+
:repo_id => project[:id])
|
849
911
|
end
|
850
912
|
|
851
913
|
##
|
852
914
|
# Process all pull requests
|
853
|
-
def ensure_pull_requests(owner, repo)
|
854
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
915
|
+
def ensure_pull_requests(owner, repo, refresh = false)
|
916
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
855
917
|
if currepo.nil?
|
856
918
|
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
857
919
|
return
|
858
920
|
end
|
859
921
|
|
860
|
-
|
922
|
+
raw_pull_reqs = if refresh
|
923
|
+
retrieve_pull_requests(owner, repo, refresh = true)
|
924
|
+
else
|
925
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
|
926
|
+
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
927
|
+
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
928
|
+
acc << x
|
929
|
+
else
|
930
|
+
acc
|
931
|
+
end
|
932
|
+
end
|
933
|
+
end
|
861
934
|
|
862
|
-
|
863
|
-
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
864
|
-
acc << x
|
865
|
-
else
|
866
|
-
acc
|
867
|
-
end
|
868
|
-
end.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
935
|
+
raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
869
936
|
end
|
870
937
|
|
871
938
|
##
|
@@ -875,7 +942,7 @@ module GHTorrent
|
|
875
942
|
state = nil, created_at = nil)
|
876
943
|
pulls_reqs = @db[:pull_requests]
|
877
944
|
|
878
|
-
project = ensure_repo(owner, repo, false, false, false)
|
945
|
+
project = ensure_repo(owner, repo, false, false, false, false)
|
879
946
|
|
880
947
|
if project.nil?
|
881
948
|
return
|
@@ -885,15 +952,13 @@ module GHTorrent
|
|
885
952
|
def add_history(id, ts, unq, act)
|
886
953
|
pull_req_history = @db[:pull_request_history]
|
887
954
|
entry = pull_req_history.first(:pull_request_id => id,
|
888
|
-
:
|
955
|
+
:created_at => ts, :action => act)
|
889
956
|
if entry.nil?
|
890
957
|
pull_req_history.insert(:pull_request_id => id, :created_at => ts,
|
891
958
|
:ext_ref_id => unq, :action => act)
|
892
|
-
info "GHTorrent: New pull request (#{id}) history entry (#{act})"
|
959
|
+
info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
893
960
|
else
|
894
|
-
|
895
|
-
:action => act).update(:created_at => ts)
|
896
|
-
info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
961
|
+
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
897
962
|
end
|
898
963
|
end
|
899
964
|
|
@@ -939,7 +1004,7 @@ module GHTorrent
|
|
939
1004
|
|
940
1005
|
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
941
1006
|
retrieved['base']['repo']['name'],
|
942
|
-
false, false, false)
|
1007
|
+
false, false, false, false)
|
943
1008
|
|
944
1009
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
945
1010
|
retrieved['base']['sha'],
|
@@ -955,7 +1020,7 @@ module GHTorrent
|
|
955
1020
|
head_repo = if has_head_repo(retrieved)
|
956
1021
|
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
957
1022
|
retrieved['head']['repo']['name'],
|
958
|
-
false, false, false)
|
1023
|
+
false, false, false, false)
|
959
1024
|
end
|
960
1025
|
|
961
1026
|
head_commit = if not head_repo.nil?
|
@@ -967,7 +1032,12 @@ module GHTorrent
|
|
967
1032
|
|
968
1033
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
969
1034
|
|
970
|
-
merged = if retrieved['merged_at'].nil? then
|
1035
|
+
merged = if retrieved['merged_at'].nil? then
|
1036
|
+
# Check if the pr's commits are in the repository
|
1037
|
+
false
|
1038
|
+
else
|
1039
|
+
true
|
1040
|
+
end
|
971
1041
|
closed = if retrieved['closed_at'].nil? then false else true end
|
972
1042
|
|
973
1043
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
@@ -980,10 +1050,11 @@ module GHTorrent
|
|
980
1050
|
:base_commit_id => base_commit[:id],
|
981
1051
|
:user_id => pull_req_user[:id],
|
982
1052
|
:pullreq_id => pullreq_id,
|
983
|
-
:intra_branch => is_intra_branch(retrieved)
|
1053
|
+
:intra_branch => is_intra_branch(retrieved),
|
1054
|
+
:merged => merged
|
984
1055
|
)
|
985
1056
|
|
986
|
-
info log_msg(retrieved)
|
1057
|
+
info log_msg(retrieved) + " was added"
|
987
1058
|
else
|
988
1059
|
debug log_msg(retrieved) + " exists"
|
989
1060
|
end
|
@@ -1001,22 +1072,21 @@ module GHTorrent
|
|
1001
1072
|
state) unless state.nil?
|
1002
1073
|
|
1003
1074
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
1004
|
-
ensure_pullreq_comments(owner, repo, pullreq_id
|
1075
|
+
ensure_pullreq_comments(owner, repo, pullreq_id) if comments
|
1005
1076
|
|
1006
1077
|
pulls_reqs.first(:base_repo_id => project[:id],
|
1007
1078
|
:pullreq_id => pullreq_id)
|
1008
1079
|
end
|
1009
1080
|
|
1010
|
-
def ensure_pullreq_comments(owner, repo, pullreq_id
|
1011
|
-
currepo = ensure_repo(owner, repo,
|
1012
|
-
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1081
|
+
def ensure_pullreq_comments(owner, repo, pullreq_id)
|
1082
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1013
1083
|
|
1014
1084
|
if currepo.nil?
|
1015
1085
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1016
1086
|
return
|
1017
1087
|
end
|
1018
1088
|
|
1019
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false,
|
1089
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1020
1090
|
|
1021
1091
|
if pull_req.nil?
|
1022
1092
|
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1032,12 +1102,16 @@ module GHTorrent
|
|
1032
1102
|
acc
|
1033
1103
|
end
|
1034
1104
|
end.map { |x|
|
1035
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, x['id']
|
1105
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
|
1036
1106
|
}
|
1037
1107
|
end
|
1038
1108
|
|
1039
|
-
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id
|
1040
|
-
|
1109
|
+
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
1110
|
+
# Commit retrieval is set to false to ensure that no duplicate work
|
1111
|
+
# is done on retrieving a pull request. This has the side effect that
|
1112
|
+
# commits might not be retrieved if a pullreqcomment event gets processed
|
1113
|
+
# before the pullreq event, until the pullreq event has been processed
|
1114
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1041
1115
|
|
1042
1116
|
if pull_req.nil?
|
1043
1117
|
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1084,12 +1158,21 @@ module GHTorrent
|
|
1084
1158
|
end
|
1085
1159
|
|
1086
1160
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1161
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1162
|
+
|
1163
|
+
if pullreq.nil?
|
1164
|
+
warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
|
1165
|
+
return
|
1166
|
+
end
|
1167
|
+
|
1087
1168
|
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1088
|
-
|
1169
|
+
next if c.nil?
|
1170
|
+
head_repo_owner = c['url'].split(/\//)[4]
|
1171
|
+
head_repo_name = c['url'].split(/\//)[5]
|
1172
|
+
x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
|
1089
1173
|
acc << x if not x.nil?
|
1090
1174
|
acc
|
1091
1175
|
}.map { |c|
|
1092
|
-
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1093
1176
|
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1094
1177
|
:commit_id => c[:id])
|
1095
1178
|
if exists.nil?
|
@@ -1110,18 +1193,17 @@ module GHTorrent
|
|
1110
1193
|
# ==Parameters:
|
1111
1194
|
# [owner] The user to which the project belongs
|
1112
1195
|
# [repo] The repository/project to find forks for
|
1113
|
-
def ensure_forks(owner, repo)
|
1114
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
1196
|
+
def ensure_forks(owner, repo, refresh = false)
|
1197
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1115
1198
|
|
1116
1199
|
if currepo.nil?
|
1117
1200
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1118
1201
|
return
|
1119
1202
|
end
|
1120
1203
|
|
1121
|
-
existing_forks = @db.from(:
|
1122
|
-
where(:forks__forked_project_id => :projects__id). \
|
1204
|
+
existing_forks = @db.from(:projects, :users).\
|
1123
1205
|
where(:users__id => :projects__owner_id). \
|
1124
|
-
where(:
|
1206
|
+
where(:projects__forked_from => currepo[:id]).select(:projects__name, :login).all
|
1125
1207
|
|
1126
1208
|
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1127
1209
|
if existing_forks.find {|y|
|
@@ -1138,65 +1220,49 @@ module GHTorrent
|
|
1138
1220
|
|
1139
1221
|
##
|
1140
1222
|
# Make sure that a fork is retrieved for a project
|
1141
|
-
def ensure_fork(owner, repo, fork_id
|
1142
|
-
|
1143
|
-
forks = @db[:forks]
|
1144
|
-
forked = ensure_repo(owner, repo, false, false, false)
|
1145
|
-
fork_exists = forks.first(:fork_id => fork_id)
|
1146
|
-
|
1147
|
-
if fork_exists.nil?
|
1148
|
-
retrieved = retrieve_fork(owner, repo, fork_id)
|
1149
|
-
added = if date_added.nil? then retrieved['created_at'] else date_added end
|
1150
|
-
|
1151
|
-
if retrieved.nil?
|
1152
|
-
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1153
|
-
return
|
1154
|
-
end
|
1223
|
+
def ensure_fork(owner, repo, fork_id)
|
1224
|
+
fork = retrieve_fork(owner, repo, fork_id)
|
1155
1225
|
|
1156
|
-
|
1157
|
-
|
1226
|
+
if fork.nil?
|
1227
|
+
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1228
|
+
return
|
1229
|
+
end
|
1158
1230
|
|
1159
|
-
|
1231
|
+
fork_owner = fork['full_name'].split(/\//)[0]
|
1232
|
+
fork_name = fork['full_name'].split(/\//)[1]
|
1160
1233
|
|
1161
|
-
|
1162
|
-
warn "Could not add fork #{fork_id}"
|
1163
|
-
return
|
1164
|
-
end
|
1234
|
+
r = ensure_repo(fork_owner, fork_name, false, false, false, false)
|
1165
1235
|
|
1166
|
-
|
1167
|
-
|
1168
|
-
:fork_id => fork_id,
|
1169
|
-
:created_at => added,
|
1170
|
-
:ext_ref_id => retrieved[@ext_uniq])
|
1171
|
-
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1236
|
+
if r.nil?
|
1237
|
+
warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
1172
1238
|
else
|
1173
|
-
|
1174
|
-
unless date_added.nil?
|
1175
|
-
forks.filter(:fork_id => fork_id)\
|
1176
|
-
.update(:created_at => date(date_added))
|
1177
|
-
debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
|
1178
|
-
end
|
1239
|
+
info "GHTorrent: Added #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
1179
1240
|
end
|
1180
1241
|
end
|
1181
1242
|
|
1182
1243
|
##
|
1183
1244
|
# Make sure all issues exist for a project
|
1184
|
-
def ensure_issues(owner, repo)
|
1185
|
-
currepo = ensure_repo(owner, repo, false, false, false)
|
1245
|
+
def ensure_issues(owner, repo, refresh = false)
|
1246
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1186
1247
|
if currepo.nil?
|
1187
1248
|
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1188
1249
|
return
|
1189
1250
|
end
|
1190
1251
|
|
1191
|
-
|
1252
|
+
raw_issues = if refresh
|
1253
|
+
retrieve_issues(owner, repo, refresh = true)
|
1254
|
+
else
|
1255
|
+
issues = @db[:issues].filter(:repo_id => currepo[:id]).all
|
1256
|
+
retrieve_issues(owner, repo).reduce([]) do |acc, x|
|
1257
|
+
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1258
|
+
acc << x
|
1259
|
+
else
|
1260
|
+
acc
|
1261
|
+
end
|
1262
|
+
end
|
1263
|
+
end
|
1192
1264
|
|
1193
|
-
|
1194
|
-
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1195
|
-
acc << x
|
1196
|
-
else
|
1197
|
-
acc
|
1198
|
-
end
|
1199
|
-
end.map { |x| ensure_issue(owner, repo, x['number']) }
|
1265
|
+
raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
|
1200
1266
|
end
|
1201
1267
|
|
1202
1268
|
##
|
@@ -1204,7 +1270,7 @@ module GHTorrent
|
|
1204
1270
|
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
1205
1271
|
|
1206
1272
|
issues = @db[:issues]
|
1207
|
-
repository = ensure_repo(owner, repo, false, false, false)
|
1273
|
+
repository = ensure_repo(owner, repo, false, false, false, false)
|
1208
1274
|
|
1209
1275
|
if repo.nil?
|
1210
1276
|
warn "Cannot find repo #{owner}/#{repo}"
|
@@ -1214,24 +1280,27 @@ module GHTorrent
|
|
1214
1280
|
cur_issue = issues.first(:issue_id => issue_id,
|
1215
1281
|
:repo_id => repository[:id])
|
1216
1282
|
|
1217
|
-
|
1218
|
-
retrieved = retrieve_issue(owner, repo, issue_id)
|
1283
|
+
retrieved = retrieve_issue(owner, repo, issue_id)
|
1219
1284
|
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1285
|
+
if retrieved.nil?
|
1286
|
+
warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
|
1287
|
+
return
|
1288
|
+
end
|
1289
|
+
|
1290
|
+
# Pull requests and issues share the same issue_id
|
1291
|
+
pull_req = unless retrieved['pull_request'].nil? or
|
1292
|
+
retrieved['pull_request']['patch_url'].nil?
|
1293
|
+
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} is a pull request"
|
1294
|
+
ensure_pull_request(owner, repo, issue_id)
|
1295
|
+
end
|
1296
|
+
|
1297
|
+
if cur_issue.nil?
|
1224
1298
|
|
1225
1299
|
reporter = ensure_user(retrieved['user']['login'], false, false)
|
1226
1300
|
assignee = unless retrieved['assignee'].nil?
|
1227
1301
|
ensure_user(retrieved['assignee']['login'], false, false)
|
1228
1302
|
end
|
1229
1303
|
|
1230
|
-
# Pull requests and issues share the same issue_id
|
1231
|
-
pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
|
1232
|
-
ensure_pull_request(owner, repo, issue_id)
|
1233
|
-
end
|
1234
|
-
|
1235
1304
|
issues.insert(:repo_id => repository[:id],
|
1236
1305
|
:assignee_id => unless assignee.nil? then assignee[:id] end,
|
1237
1306
|
:reporter_id => reporter[:id],
|
@@ -1241,23 +1310,26 @@ module GHTorrent
|
|
1241
1310
|
:created_at => date(retrieved['created_at']),
|
1242
1311
|
:ext_ref_id => retrieved[@ext_uniq])
|
1243
1312
|
|
1244
|
-
ensure_issue_events(owner, repo, issue_id) if events
|
1245
|
-
ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
|
1246
|
-
|
1247
1313
|
info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
|
1248
|
-
issues.first(:issue_id => issue_id,
|
1249
|
-
:repo_id => repository[:id])
|
1250
1314
|
else
|
1251
1315
|
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
|
1252
|
-
cur_issue
|
1316
|
+
if cur_issue[:pull_request] == false and not pull_req.nil?
|
1317
|
+
info "GHTorrent: Updating issue #{owner}/#{repo}->#{issue_id} as pull request"
|
1318
|
+
issues.filter(:issue_id => issue_id, :repo_id => repository[:id]).update(
|
1319
|
+
:pull_request => true,
|
1320
|
+
:pull_request_id => pull_req[:id])
|
1321
|
+
end
|
1253
1322
|
end
|
1323
|
+
ensure_issue_events(owner, repo, issue_id) if events
|
1324
|
+
ensure_issue_comments(owner, repo, issue_id) if comments
|
1325
|
+
issues.first(:issue_id => issue_id,
|
1326
|
+
:repo_id => repository[:id])
|
1254
1327
|
end
|
1255
1328
|
|
1256
1329
|
##
|
1257
1330
|
# Retrieve and process all events for an issue
|
1258
1331
|
def ensure_issue_events(owner, repo, issue_id)
|
1259
|
-
currepo = ensure_repo(owner, repo,
|
1260
|
-
#time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1332
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1261
1333
|
|
1262
1334
|
if currepo.nil?
|
1263
1335
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
@@ -1361,7 +1433,7 @@ module GHTorrent
|
|
1361
1433
|
##
|
1362
1434
|
# Retrieve and process all comments for an issue
|
1363
1435
|
def ensure_issue_comments(owner, repo, issue_id)
|
1364
|
-
currepo = ensure_repo(owner, repo,
|
1436
|
+
currepo = ensure_repo(owner, repo, false, false, false, false)
|
1365
1437
|
|
1366
1438
|
if currepo.nil?
|
1367
1439
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
@@ -1390,7 +1462,7 @@ module GHTorrent
|
|
1390
1462
|
##
|
1391
1463
|
# Retrieve and process +comment_id+ for an +issue_id+
|
1392
1464
|
def ensure_issue_comment(owner, repo, issue_id, comment_id)
|
1393
|
-
issue = ensure_issue(owner, repo, issue_id)
|
1465
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1394
1466
|
|
1395
1467
|
if issue.nil?
|
1396
1468
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1467,14 +1539,13 @@ module GHTorrent
|
|
1467
1539
|
repository = ensure_repo(user, repo, false, false, false)
|
1468
1540
|
|
1469
1541
|
if repository.nil?
|
1470
|
-
warn "
|
1471
|
-
return
|
1542
|
+
warn "GHTorrent: repository #{user}/#{repo} deleted"
|
1472
1543
|
end
|
1473
1544
|
|
1474
1545
|
commits.insert(:sha => c['sha'],
|
1475
1546
|
:author_id => author[:id],
|
1476
1547
|
:committer_id => commiter[:id],
|
1477
|
-
:project_id => repository[:id],
|
1548
|
+
:project_id => if repository.nil? then nil else repository[:id] end ,
|
1478
1549
|
:created_at => date(c['commit']['author']['date']),
|
1479
1550
|
:ext_ref_id => c[@ext_uniq]
|
1480
1551
|
)
|
@@ -1513,6 +1584,14 @@ module GHTorrent
|
|
1513
1584
|
def is_valid_email(email)
|
1514
1585
|
email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
|
1515
1586
|
end
|
1587
|
+
|
1588
|
+
def max(a, b)
|
1589
|
+
if a >= b
|
1590
|
+
a
|
1591
|
+
else
|
1592
|
+
b
|
1593
|
+
end
|
1594
|
+
end
|
1516
1595
|
end
|
1517
1596
|
end
|
1518
1597
|
|