ghtorrent 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +40 -0
- data/README.md +23 -22
- data/bin/ght-data-retrieval +66 -24
- data/bin/ght-load +41 -19
- data/bin/ght-mirror-events +13 -16
- data/bin/ght-rm-dupl +119 -77
- data/lib/ghtorrent.rb +14 -4
- data/lib/ghtorrent/adapters/base_adapter.rb +17 -5
- data/lib/ghtorrent/adapters/mongo_persister.rb +122 -56
- data/lib/ghtorrent/api_client.rb +151 -16
- data/lib/ghtorrent/bson_orderedhash.rb +23 -0
- data/lib/ghtorrent/cache.rb +97 -0
- data/lib/ghtorrent/command.rb +43 -25
- data/lib/ghtorrent/gh_torrent_exception.rb +6 -0
- data/lib/ghtorrent/ghtorrent.rb +615 -164
- data/lib/ghtorrent/hash.rb +11 -0
- data/lib/ghtorrent/logging.rb +11 -7
- data/lib/ghtorrent/migrations/001_init_schema.rb +3 -3
- data/lib/ghtorrent/migrations/002_add_external_ref_ids.rb +2 -0
- data/lib/ghtorrent/migrations/003_add_orgs.rb +4 -1
- data/lib/ghtorrent/migrations/004_add_commit_comments.rb +4 -2
- data/lib/ghtorrent/migrations/005_add_repo_collaborators.rb +2 -0
- data/lib/ghtorrent/migrations/006_add_watchers.rb +2 -0
- data/lib/ghtorrent/migrations/007_add_pull_requests.rb +64 -0
- data/lib/ghtorrent/migrations/008_add_project_unq.rb +23 -0
- data/lib/ghtorrent/migrations/009_add_project_commit.rb +27 -0
- data/lib/ghtorrent/migrations/010_add_forks.rb +28 -0
- data/lib/ghtorrent/migrations/mysql_defaults.rb +6 -0
- data/lib/ghtorrent/persister.rb +3 -0
- data/lib/ghtorrent/retriever.rb +298 -102
- data/lib/ghtorrent/settings.rb +20 -1
- data/lib/ghtorrent/time.rb +5 -0
- data/lib/ghtorrent/utils.rb +22 -4
- data/lib/version.rb +5 -0
- metadata +173 -145
- data/lib/ghtorrent/call_stack.rb +0 -91
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
require 'sequel'
|
2
2
|
|
3
|
+
require 'ghtorrent/time'
|
4
|
+
require 'ghtorrent/logging'
|
5
|
+
require 'ghtorrent/settings'
|
6
|
+
require 'ghtorrent/retriever'
|
7
|
+
require 'ghtorrent/persister'
|
8
|
+
|
3
9
|
module GHTorrent
|
4
10
|
class Mirror
|
5
11
|
|
@@ -8,32 +14,34 @@ module GHTorrent
|
|
8
14
|
include GHTorrent::Retriever
|
9
15
|
include GHTorrent::Persister
|
10
16
|
|
11
|
-
attr_reader :settings, :persister
|
12
|
-
|
13
|
-
def initialize(configuration)
|
17
|
+
attr_reader :settings, :persister, :ext_uniq, :logger
|
14
18
|
|
15
|
-
|
16
|
-
|
19
|
+
def initialize(settings)
|
20
|
+
@settings = settings
|
17
21
|
@ext_uniq = config(:uniq_id)
|
18
22
|
@logger = Logger.new(STDOUT)
|
19
|
-
@persister = connect(:mongo, @settings)
|
20
|
-
get_db
|
21
23
|
end
|
22
24
|
|
23
25
|
# db related functions
|
24
26
|
def get_db
|
25
|
-
|
26
|
-
@db = Sequel.connect(config(:sql_url))
|
27
|
-
|
27
|
+
Sequel.single_threaded = true
|
28
|
+
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
29
|
+
#@db.loggers << @logger
|
28
30
|
if @db.tables.empty?
|
29
31
|
dir = File.join(File.dirname(__FILE__), 'migrations')
|
30
32
|
puts "Database empty, running migrations from #{dir}"
|
31
33
|
Sequel.extension :migration
|
32
34
|
Sequel::Migrator.apply(@db, dir)
|
33
35
|
end
|
36
|
+
|
34
37
|
@db
|
35
38
|
end
|
36
39
|
|
40
|
+
def persister
|
41
|
+
@persister ||= connect(:mongo, @settings)
|
42
|
+
@persister
|
43
|
+
end
|
44
|
+
|
37
45
|
##
|
38
46
|
# Ensure that a user exists, or fetch its latest state from Github
|
39
47
|
# ==Parameters:
|
@@ -46,7 +54,7 @@ module GHTorrent
|
|
46
54
|
end
|
47
55
|
|
48
56
|
transaction do
|
49
|
-
|
57
|
+
ensure_user(user, true, true)
|
50
58
|
ensure_commit(repo, sha, user)
|
51
59
|
end
|
52
60
|
end
|
@@ -60,7 +68,6 @@ module GHTorrent
|
|
60
68
|
# [date_added] The timestamp that the add event took place
|
61
69
|
def get_project_member(owner, repo, new_member, date_added)
|
62
70
|
transaction do
|
63
|
-
ensure_repo(owner, repo)
|
64
71
|
ensure_project_member(owner, repo, new_member, date_added)
|
65
72
|
end
|
66
73
|
end
|
@@ -74,7 +81,6 @@ module GHTorrent
|
|
74
81
|
# [date_added] The timestamp that the add event took place
|
75
82
|
def get_commit_comment(user, repo, comment_id, date_added)
|
76
83
|
transaction do
|
77
|
-
ensure_repo(user, repo)
|
78
84
|
ensure_commit_comment(user, repo, comment_id, date_added)
|
79
85
|
end
|
80
86
|
end
|
@@ -88,7 +94,6 @@ module GHTorrent
|
|
88
94
|
# [date_added] The timestamp that the add event took place
|
89
95
|
def get_watcher(owner, repo, watcher, date_added)
|
90
96
|
transaction do
|
91
|
-
ensure_repo(owner, repo)
|
92
97
|
ensure_watcher(owner, repo, watcher, date_added)
|
93
98
|
end
|
94
99
|
end
|
@@ -101,20 +106,84 @@ module GHTorrent
|
|
101
106
|
# [date_added] The timestamp that the add event took place
|
102
107
|
def get_follower(follower, followed, date_added)
|
103
108
|
transaction do
|
104
|
-
ensure_user(follower,
|
105
|
-
ensure_user(followed,
|
106
|
-
|
109
|
+
ensure_user(follower, true, true)
|
110
|
+
ensure_user(followed, true, true)
|
111
|
+
ensure_user_follower(followed, follower, date_added)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Get a pull request and record the changes it affects
|
117
|
+
# ==Parameters:
|
118
|
+
# [owner] The owner of the repository to which the pullreq will be applied
|
119
|
+
# [repo] The repository to which the pullreq will be applied
|
120
|
+
# [pullreq_id] The ID of the pull request relative to the repository
|
121
|
+
def get_pull_request(owner, repo, pullreq_id, state, created_at)
|
122
|
+
transaction do
|
123
|
+
ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Retrieve details about a project fork (including the forked project)
|
129
|
+
# ==Parameters:
|
130
|
+
# [owner] The login of the repository owner
|
131
|
+
# [repo] The name of the repository
|
132
|
+
# [fork_id] The fork item id
|
133
|
+
# [date_added] The timestamp that the add event took place
|
134
|
+
def get_fork(owner, repo, fork_id, date_added)
|
135
|
+
transaction do
|
136
|
+
ensure_fork(owner, repo, fork_id, date_added)
|
107
137
|
end
|
108
138
|
end
|
109
139
|
|
140
|
+
##
|
141
|
+
# Retrieve a pull request review comment
|
142
|
+
# ==Parameters:
|
143
|
+
# [owner] The login of the repository owner
|
144
|
+
# [repo] The name of the repository
|
145
|
+
# [fork_id] The fork item id
|
146
|
+
# [date_added] The timestamp that the add event took place
|
147
|
+
def get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
148
|
+
transaction do
|
149
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
##
|
154
|
+
# Retrieve a pull request review comment
|
155
|
+
# ==Parameters:
|
156
|
+
# [owner] The login of the repository owner
|
157
|
+
# [repo] The name of the repository
|
158
|
+
# [fork_id] The fork item id
|
159
|
+
# [date_added] The timestamp that the add event took place
|
160
|
+
def get_issue_comment(owner, repo, issue_id, comment_id, created_at)
|
161
|
+
transaction do
|
162
|
+
raise "Not implemented"
|
163
|
+
#ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
110
168
|
##
|
111
169
|
# Make sure a commit exists
|
112
170
|
#
|
113
171
|
def ensure_commit(repo, sha, user, comments = true)
|
172
|
+
ensure_repo(user, repo)
|
114
173
|
c = retrieve_commit(repo, sha, user)
|
174
|
+
|
175
|
+
if c.nil?
|
176
|
+
warn "GHTorrent: Commit #{user}/#{repo} -> #{sha} does not exist"
|
177
|
+
return
|
178
|
+
end
|
179
|
+
|
115
180
|
stored = store_commit(c, repo, user)
|
116
181
|
ensure_parents(c)
|
117
|
-
|
182
|
+
if not c['commit']['comment_count'].nil? \
|
183
|
+
and c['commit']['comment_count'] > 0
|
184
|
+
ensure_commit_comments(user, repo, sha) if comments
|
185
|
+
end
|
186
|
+
ensure_repo_commit(user, repo, sha)
|
118
187
|
stored
|
119
188
|
end
|
120
189
|
|
@@ -162,13 +231,41 @@ module GHTorrent
|
|
162
231
|
|
163
232
|
parents.insert(:commit_id => this[:id],
|
164
233
|
:parent_id => parent[:id])
|
165
|
-
info "Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
234
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
166
235
|
else
|
167
|
-
|
236
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
168
237
|
end
|
169
238
|
end
|
170
239
|
end
|
171
240
|
|
241
|
+
##
|
242
|
+
# Make sure that a commit has been associated with the provided repo
|
243
|
+
# ==Parameters:
|
244
|
+
# [user] The user that owns the repo this commit has been submitted to
|
245
|
+
# [repo] The repo receiving the commit
|
246
|
+
# [sha] The commit SHA
|
247
|
+
def ensure_repo_commit(user, repo, sha)
|
248
|
+
userid = @db[:users].first(:login => user)[:id]
|
249
|
+
projectid = @db[:projects].first(:owner_id => userid,
|
250
|
+
:name => repo)[:id]
|
251
|
+
commitid = @db[:commits].first(:sha => sha)[:id]
|
252
|
+
|
253
|
+
exists = @db[:project_commits].first(:project_id => projectid,
|
254
|
+
:commit_id => commitid)
|
255
|
+
if exists.nil?
|
256
|
+
@db[:project_commits].insert(
|
257
|
+
:project_id => projectid,
|
258
|
+
:commit_id => commitid
|
259
|
+
)
|
260
|
+
info "GHTorrent: Added commit #{user}/#{repo} -> #{sha}"
|
261
|
+
@db[:project_commits].first(:project_id => projectid,
|
262
|
+
:commit_id => commitid)
|
263
|
+
else
|
264
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{sha} exists"
|
265
|
+
exists
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
172
269
|
##
|
173
270
|
# Add (or update) an entry for a commit author. This method uses information
|
174
271
|
# in the JSON object returned by Github to add (or update) a user in the
|
@@ -193,15 +290,14 @@ module GHTorrent
|
|
193
290
|
login = githubuser['login'] unless githubuser.nil?
|
194
291
|
|
195
292
|
if login.nil?
|
196
|
-
ensure_user("#{name}<#{email}>",
|
293
|
+
ensure_user("#{name}<#{email}>", false, false)
|
197
294
|
else
|
198
295
|
dbuser = users.first(:login => login)
|
199
296
|
byemail = users.first(:email => email)
|
200
297
|
if dbuser.nil?
|
201
298
|
# We do not have the user in the database yet. Add him
|
202
|
-
added = ensure_user(login,
|
299
|
+
added = ensure_user(login, false, false)
|
203
300
|
if byemail.nil?
|
204
|
-
#
|
205
301
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
206
302
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
207
303
|
else
|
@@ -215,8 +311,6 @@ module GHTorrent
|
|
215
311
|
:login => login,
|
216
312
|
:company => added['company'],
|
217
313
|
:location => added['location'],
|
218
|
-
:hireable => added['hireable'],
|
219
|
-
:bio => added['bio'],
|
220
314
|
:created_at => added['created_at']
|
221
315
|
)
|
222
316
|
end
|
@@ -259,7 +353,6 @@ module GHTorrent
|
|
259
353
|
return u
|
260
354
|
end
|
261
355
|
|
262
|
-
|
263
356
|
##
|
264
357
|
# Ensure that a user exists, or fetch its latest state from Github
|
265
358
|
# ==Parameters:
|
@@ -275,6 +368,12 @@ module GHTorrent
|
|
275
368
|
|
276
369
|
if usr.nil?
|
277
370
|
u = retrieve_user_byusername(user)
|
371
|
+
|
372
|
+
if u.nil?
|
373
|
+
warn "GHTorrent: User #{user} does not exist"
|
374
|
+
return
|
375
|
+
end
|
376
|
+
|
278
377
|
email = unless u['email'].nil?
|
279
378
|
if u['email'].strip == "" then
|
280
379
|
nil
|
@@ -283,49 +382,16 @@ module GHTorrent
|
|
283
382
|
end
|
284
383
|
end
|
285
384
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
:bio => u['bio'],
|
295
|
-
:location => u['location'],
|
296
|
-
:type => user_type(u['type']),
|
297
|
-
:created_at => date(u['created_at']),
|
298
|
-
:ext_ref_id => u[@ext_uniq]
|
299
|
-
)
|
300
|
-
info "GHTorrent: Updating user #{user} (email #{email})"
|
301
|
-
else
|
302
|
-
users.insert(:login => u['login'],
|
303
|
-
:name => u['name'],
|
304
|
-
:company => u['company'],
|
305
|
-
:email => email,
|
306
|
-
:hireable => boolean(u['hirable']),
|
307
|
-
:bio => u['bio'],
|
308
|
-
:location => u['location'],
|
309
|
-
:type => user_type(u['type']),
|
310
|
-
:created_at => date(u['created_at']),
|
311
|
-
:ext_ref_id => u[@ext_uniq])
|
312
|
-
|
313
|
-
info "GHTorrent: New user #{user}"
|
314
|
-
end
|
315
|
-
else
|
316
|
-
users.insert(:login => u['login'],
|
317
|
-
:name => u['name'],
|
318
|
-
:company => u['company'],
|
319
|
-
:email => email,
|
320
|
-
:hireable => boolean(u['hirable']),
|
321
|
-
:bio => u['bio'],
|
322
|
-
:location => u['location'],
|
323
|
-
:type => user_type(u['type']),
|
324
|
-
:created_at => date(u['created_at']),
|
325
|
-
:ext_ref_id => u[@ext_uniq])
|
385
|
+
users.insert(:login => u['login'],
|
386
|
+
:name => u['name'],
|
387
|
+
:company => u['company'],
|
388
|
+
:email => email,
|
389
|
+
:location => u['location'],
|
390
|
+
:type => user_type(u['type']),
|
391
|
+
:created_at => date(u['created_at']),
|
392
|
+
:ext_ref_id => u[@ext_uniq])
|
326
393
|
|
327
|
-
|
328
|
-
end
|
394
|
+
info "GHTorrent: New user #{user}"
|
329
395
|
users.first(:login => user)
|
330
396
|
else
|
331
397
|
debug "GHTorrent: User #{user} exists"
|
@@ -340,37 +406,62 @@ module GHTorrent
|
|
340
406
|
#
|
341
407
|
# ==Parameters:
|
342
408
|
# [user] The user login to find followers by
|
343
|
-
def ensure_user_followers(
|
409
|
+
def ensure_user_followers(followed, date_added = nil)
|
410
|
+
curuser = ensure_user(followed, false, false)
|
411
|
+
time = curuser[:created_at]
|
412
|
+
followers = @db.from(:followers, :users).\
|
413
|
+
where(:followers__follower_id => :users__id).
|
414
|
+
where(:followers__user_id => curuser[:id]).select(:login).all
|
415
|
+
|
416
|
+
retrieve_user_followers(followed).reduce([]) do |acc, x|
|
417
|
+
if followers.find {|y| y[:login] == x['login']}.nil?
|
418
|
+
acc << x
|
419
|
+
else
|
420
|
+
acc
|
421
|
+
end
|
422
|
+
end.map { |x| ensure_user_follower(followed, x['login'], time) }
|
423
|
+
end
|
424
|
+
|
425
|
+
##
|
426
|
+
# Make sure that a user follows another one
|
427
|
+
def ensure_user_follower(followed, follower, date_added)
|
428
|
+
follower_user = ensure_user(follower, false, false)
|
429
|
+
followed_user = ensure_user(followed, false, false)
|
430
|
+
|
431
|
+
if followed_user.nil? or follower_user.nil?
|
432
|
+
warn "Could not add follower #{follower} to #{followed}"
|
433
|
+
return
|
434
|
+
end
|
435
|
+
|
344
436
|
followers = @db[:followers]
|
345
|
-
|
437
|
+
followed_id = follower_user[:id]
|
438
|
+
follower_id = followed_user[:id]
|
346
439
|
|
347
|
-
|
348
|
-
|
349
|
-
follower = f['login']
|
350
|
-
ensure_user(user, false, false)
|
351
|
-
ensure_user(follower, false, false)
|
440
|
+
follower_exists = followers.first(:user_id => followed_id,
|
441
|
+
:follower_id => follower_id)
|
352
442
|
|
353
|
-
|
443
|
+
if follower_exists.nil?
|
444
|
+
added = if date_added.nil? then Time.now else date_added end
|
445
|
+
retrieved = retrieve_user_follower(followed, follower)
|
354
446
|
|
447
|
+
if retrieved.nil?
|
448
|
+
warn "Follower #{follower} does not exist for user #{followed}"
|
449
|
+
return
|
450
|
+
end
|
355
451
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
:follower_id => followerid).\
|
368
|
-
update(:created_at => date(date_added))
|
369
|
-
info "GHTorrent: Updated follower #{follower} -> #{user}"
|
370
|
-
end
|
371
|
-
debug "GHTorrent: User #{follower} already follows #{user}"
|
452
|
+
followers.insert(:user_id => followed_id,
|
453
|
+
:follower_id => follower_id,
|
454
|
+
:created_at => added,
|
455
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
456
|
+
info "GHTorrent: User #{follower} follows #{followed}"
|
457
|
+
else
|
458
|
+
unless date_added.nil?
|
459
|
+
followers.filter(:user_id => followed_id,
|
460
|
+
:follower_id => follower_id)\
|
461
|
+
.update(:created_at => date(date_added))
|
462
|
+
debug "GHTorrent: Updating follower #{followed} -> #{follower}"
|
372
463
|
end
|
373
|
-
|
464
|
+
end
|
374
465
|
end
|
375
466
|
|
376
467
|
##
|
@@ -379,8 +470,7 @@ module GHTorrent
|
|
379
470
|
#
|
380
471
|
# ==Parameters:
|
381
472
|
# [email] The email to lookup the user by
|
382
|
-
# [
|
383
|
-
# [followers] If true, the user's followers will be retrieved
|
473
|
+
# [name] The user's name
|
384
474
|
# == Returns:
|
385
475
|
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
386
476
|
# the result is nil
|
@@ -392,27 +482,27 @@ module GHTorrent
|
|
392
482
|
|
393
483
|
u = retrieve_user_byemail(email, name)
|
394
484
|
|
395
|
-
if u.nil? or u['
|
485
|
+
if u.nil? or u['login'].nil?
|
396
486
|
debug "GHTorrent: Cannot find #{email} through search API query"
|
487
|
+
login = (0...8).map { 65.+(rand(25)).chr }.join
|
397
488
|
users.insert(:email => email,
|
398
489
|
:name => name,
|
399
|
-
:login =>
|
490
|
+
:login => login,
|
400
491
|
:created_at => Time.now,
|
401
492
|
:ext_ref_id => ""
|
402
493
|
)
|
403
|
-
|
494
|
+
info "GHTorrent: Added fake user #{login} -> #{email}"
|
495
|
+
users.first(:login => login)
|
404
496
|
else
|
405
|
-
users.insert(:login => u['
|
406
|
-
:name => u['
|
407
|
-
:company => u['
|
408
|
-
:email => u['
|
409
|
-
:
|
410
|
-
:
|
411
|
-
:location => u['user']['location'],
|
412
|
-
:created_at => date(u['user']['created_at']),
|
497
|
+
users.insert(:login => u['login'],
|
498
|
+
:name => u['name'],
|
499
|
+
:company => u['company'],
|
500
|
+
:email => u['email'],
|
501
|
+
:location => u['location'],
|
502
|
+
:created_at => date(u['created_at']),
|
413
503
|
:ext_ref_id => u[@ext_uniq])
|
414
|
-
|
415
|
-
users.first(:
|
504
|
+
info "GHTorrent: Found #{email} through search API query"
|
505
|
+
users.first(:login => u['login'])
|
416
506
|
end
|
417
507
|
else
|
418
508
|
debug "GHTorrent: User with email #{email} exists"
|
@@ -430,15 +520,21 @@ module GHTorrent
|
|
430
520
|
# == Returns:
|
431
521
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
432
522
|
# the result is nil
|
433
|
-
def ensure_repo(user, repo)
|
523
|
+
def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
|
434
524
|
|
435
|
-
ensure_user(user,
|
525
|
+
ensure_user(user, false, false)
|
436
526
|
repos = @db[:projects]
|
437
527
|
curuser = @db[:users].first(:login => user)
|
438
528
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
439
529
|
|
440
530
|
if currepo.nil?
|
441
531
|
r = retrieve_repo(user, repo)
|
532
|
+
|
533
|
+
if r.nil?
|
534
|
+
warn "Repo #{user}/#{repo} does not exist"
|
535
|
+
return
|
536
|
+
end
|
537
|
+
|
442
538
|
repos.insert(:url => r['url'],
|
443
539
|
:owner_id => @db[:users].filter(:login => user).first[:id],
|
444
540
|
:name => r['name'],
|
@@ -448,9 +544,9 @@ module GHTorrent
|
|
448
544
|
:ext_ref_id => r[@ext_uniq])
|
449
545
|
|
450
546
|
info "GHTorrent: New repo #{repo}"
|
451
|
-
ensure_commits(user, repo)
|
452
|
-
ensure_project_members(user, repo)
|
453
|
-
ensure_watchers(user, repo)
|
547
|
+
ensure_commits(user, repo) if commits
|
548
|
+
ensure_project_members(user, repo) if project_members
|
549
|
+
ensure_watchers(user, repo) if watchers
|
454
550
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
455
551
|
else
|
456
552
|
debug "GHTorrent: Repo #{repo} exists"
|
@@ -461,27 +557,32 @@ module GHTorrent
|
|
461
557
|
##
|
462
558
|
# Make sure that a project has all the registered members defined
|
463
559
|
def ensure_project_members(user, repo)
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
560
|
+
currepo = ensure_repo(user, repo, true, false, true)
|
561
|
+
time = currepo[:created_at]
|
562
|
+
|
563
|
+
project_members = @db.from(:project_members, :users).\
|
564
|
+
where(:project_members__user_id => :users__id).\
|
565
|
+
where(:project_members__repo_id => currepo[:id]).select(:login).all
|
468
566
|
|
469
567
|
retrieve_repo_collaborators(user, repo).reduce([]) do |acc, x|
|
470
|
-
if project_members.find {
|
568
|
+
if project_members.find {|y| y[:login] == x['login']}.nil?
|
471
569
|
acc << x
|
472
570
|
else
|
473
571
|
acc
|
474
572
|
end
|
475
|
-
end.map { |x| ensure_project_member(user, repo, x['login'],
|
573
|
+
end.map { |x| ensure_project_member(user, repo, x['login'], time) }
|
476
574
|
end
|
477
575
|
|
478
576
|
##
|
479
577
|
# Make sure that a project member exists in a project
|
480
578
|
def ensure_project_member(owner, repo, new_member, date_added)
|
481
579
|
pr_members = @db[:project_members]
|
580
|
+
project = ensure_repo(owner, repo, true, false, true)
|
482
581
|
new_user = ensure_user(new_member, false, false)
|
483
|
-
|
484
|
-
project
|
582
|
+
|
583
|
+
if project.nil? or new_user.nil?
|
584
|
+
return
|
585
|
+
end
|
485
586
|
|
486
587
|
memb_exist = pr_members.first(:user_id => new_user[:id],
|
487
588
|
:repo_id => project[:id])
|
@@ -489,6 +590,12 @@ module GHTorrent
|
|
489
590
|
if memb_exist.nil?
|
490
591
|
added = if date_added.nil? then Time.now else date_added end
|
491
592
|
retrieved = retrieve_repo_collaborator(owner, repo, new_member)
|
593
|
+
|
594
|
+
if retrieved.nil?
|
595
|
+
warn "Project member #{new_member} does not exist in #{owner}/#{repo}"
|
596
|
+
return
|
597
|
+
end
|
598
|
+
|
492
599
|
pr_members.insert(
|
493
600
|
:user_id => new_user[:id],
|
494
601
|
:repo_id => project[:id],
|
@@ -513,7 +620,6 @@ module GHTorrent
|
|
513
620
|
# [user] The login name of the user to check the organizations for
|
514
621
|
#
|
515
622
|
def ensure_orgs(user)
|
516
|
-
usr = @db[:users].first(:login => user)
|
517
623
|
retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
|
518
624
|
end
|
519
625
|
|
@@ -525,8 +631,8 @@ module GHTorrent
|
|
525
631
|
# [org] The login name of the organization to check whether the user
|
526
632
|
# belongs in
|
527
633
|
#
|
528
|
-
def ensure_participation(user, organization)
|
529
|
-
org = ensure_org(organization)
|
634
|
+
def ensure_participation(user, organization, members = true)
|
635
|
+
org = ensure_org(organization, members)
|
530
636
|
usr = ensure_user(user, false, false)
|
531
637
|
|
532
638
|
org_members = @db[:organization_members]
|
@@ -550,14 +656,21 @@ module GHTorrent
|
|
550
656
|
# ==Parameters:
|
551
657
|
# [organization] The login name of the organization
|
552
658
|
#
|
553
|
-
def ensure_org(organization)
|
554
|
-
org = @db[:users].
|
659
|
+
def ensure_org(organization, members)
|
660
|
+
org = @db[:users].first(:login => organization, :type => 'org')
|
555
661
|
|
556
662
|
if org.nil?
|
557
|
-
ensure_user(
|
663
|
+
org = ensure_user(organization, false, false)
|
664
|
+
if members
|
665
|
+
retrieve_org_members(organization).map { |x|
|
666
|
+
ensure_participation(ensure_user(x['login'], false, false)[:login],
|
667
|
+
organization, false)
|
668
|
+
}
|
669
|
+
end
|
670
|
+
org
|
558
671
|
else
|
559
672
|
debug "GHTorrent: Organization #{organization} exists"
|
560
|
-
org
|
673
|
+
org
|
561
674
|
end
|
562
675
|
end
|
563
676
|
|
@@ -572,7 +685,6 @@ module GHTorrent
|
|
572
685
|
commit_id = @db[:commits].first(:sha => sha)[:id]
|
573
686
|
stored_comments = @db[:commit_comments].filter(:commit_id => commit_id)
|
574
687
|
commit_comments = retrieve_commit_comments(user, repo, sha)
|
575
|
-
#user_id = @db[:users].first(:login => user)[:id]
|
576
688
|
|
577
689
|
not_saved = commit_comments.reduce([]) do |acc, x|
|
578
690
|
if stored_comments.find{|y| y[:comment_id] == x['id']}.nil?
|
@@ -600,16 +712,16 @@ module GHTorrent
|
|
600
712
|
retrieved = retrieve_commit_comment(user, repo, id)
|
601
713
|
|
602
714
|
if retrieved.nil?
|
603
|
-
|
715
|
+
warn "GHTorrent: Commit comment #{id} deleted"
|
604
716
|
return
|
605
717
|
end
|
606
718
|
|
607
|
-
commit = ensure_commit(repo, retrieved['commit_id'], user,
|
719
|
+
commit = ensure_commit(repo, retrieved['commit_id'], user, false)
|
608
720
|
user = ensure_user(user, false, false)
|
609
721
|
@db[:commit_comments].insert(
|
610
722
|
:commit_id => commit[:id],
|
611
723
|
:user_id => user[:id],
|
612
|
-
:body => retrieved['body'],
|
724
|
+
:body => retrieved['body'][0..255],
|
613
725
|
:line => retrieved['line'],
|
614
726
|
:position => retrieved['position'],
|
615
727
|
:comment_id => retrieved['id'],
|
@@ -617,45 +729,67 @@ module GHTorrent
|
|
617
729
|
:created_at => date(retrieved['created_at'])
|
618
730
|
)
|
619
731
|
info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
|
620
|
-
@db[:commit_comments].first(:comment_id => id)
|
621
732
|
else
|
733
|
+
unless created_at.nil?
|
734
|
+
@db[:commit_comments].filter(:comment_id => id)\
|
735
|
+
.update(:created_at => date(created_at))
|
736
|
+
info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
|
737
|
+
end
|
622
738
|
info "GHTorrent: Commit comment #{id} exists"
|
623
|
-
stored_comment
|
624
739
|
end
|
740
|
+
@db[:commit_comments].first(:comment_id => id)
|
625
741
|
end
|
626
742
|
|
627
743
|
##
|
628
744
|
# Make sure that
|
629
745
|
def ensure_watchers(owner, repo)
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
746
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
747
|
+
time = currepo[:created_at]
|
748
|
+
|
749
|
+
if currepo.nil?
|
750
|
+
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
751
|
+
return
|
752
|
+
end
|
753
|
+
|
754
|
+
watchers = @db.from(:watchers, :users).\
|
755
|
+
where(:watchers__user_id => :users__id).\
|
756
|
+
where(:watchers__repo_id => currepo[:id]).select(:login).all
|
635
757
|
|
636
758
|
retrieve_watchers(owner, repo).reduce([]) do |acc, x|
|
637
|
-
if watchers.find { |y|
|
759
|
+
if watchers.find { |y|
|
760
|
+
y[:login] == x['login']
|
761
|
+
}.nil?
|
638
762
|
acc << x
|
639
763
|
else
|
640
764
|
acc
|
641
765
|
end
|
642
|
-
end.map { |x| ensure_watcher(owner, repo, x['login']) }
|
766
|
+
end.map { |x| ensure_watcher(owner, repo, x['login'], time) }
|
643
767
|
end
|
644
768
|
|
645
769
|
##
|
646
770
|
# Make sure that a project member exists in a project
|
647
771
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
648
|
-
|
772
|
+
project = ensure_repo(owner, repo, false, false, false)
|
649
773
|
new_watcher = ensure_user(watcher, false, false)
|
650
|
-
owner_id = @db[:users].first(:login => owner)[:id]
|
651
|
-
project = @db[:projects].first(:owner_id => owner_id, :name => repo)
|
652
774
|
|
775
|
+
if new_watcher.nil? or project.nil?
|
776
|
+
warn "GHTorrent: Watcher #{watcher} does not exist"
|
777
|
+
return
|
778
|
+
end
|
779
|
+
|
780
|
+
watchers = @db[:watchers]
|
653
781
|
memb_exist = watchers.first(:user_id => new_watcher[:id],
|
654
|
-
|
782
|
+
:repo_id => project[:id])
|
655
783
|
|
656
784
|
if memb_exist.nil?
|
657
785
|
added = if date_added.nil? then Time.now else date_added end
|
658
786
|
retrieved = retrieve_watcher(owner, repo, watcher)
|
787
|
+
|
788
|
+
if retrieved.nil?
|
789
|
+
warn "Watcher #{watcher} no longer watches #{owner}/#{repo}"
|
790
|
+
return
|
791
|
+
end
|
792
|
+
|
659
793
|
watchers.insert(
|
660
794
|
:user_id => new_watcher[:id],
|
661
795
|
:repo_id => project[:id],
|
@@ -673,6 +807,313 @@ module GHTorrent
|
|
673
807
|
end
|
674
808
|
end
|
675
809
|
|
810
|
+
##
|
811
|
+
# Process all pull requests
|
812
|
+
def ensure_pull_requests(owner, repo)
|
813
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
814
|
+
if currepo.nil?
|
815
|
+
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
816
|
+
return
|
817
|
+
end
|
818
|
+
|
819
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id])
|
820
|
+
|
821
|
+
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
822
|
+
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
823
|
+
acc << x
|
824
|
+
else
|
825
|
+
acc
|
826
|
+
end
|
827
|
+
end.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
828
|
+
end
|
829
|
+
|
830
|
+
##
|
831
|
+
# Process a pull request
|
832
|
+
def ensure_pull_request(owner, repo, pullreq_id,
|
833
|
+
comments = true, commits = true,
|
834
|
+
state = nil, created_at = nil)
|
835
|
+
pulls_reqs = @db[:pull_requests]
|
836
|
+
pull_req_history = @db[:pull_request_history]
|
837
|
+
|
838
|
+
project = ensure_repo(owner, repo, false, false, false)
|
839
|
+
|
840
|
+
if project.nil?
|
841
|
+
return
|
842
|
+
end
|
843
|
+
|
844
|
+
# Adds a pull request history event
|
845
|
+
add_history = Proc.new do |id, ts, unq, act|
|
846
|
+
|
847
|
+
entry = pull_req_history.first(:pull_request_id => id,
|
848
|
+
:ext_ref_id => unq, :action => act)
|
849
|
+
if entry.nil?
|
850
|
+
pull_req_history.insert(:pull_request_id => id, :created_at => ts,
|
851
|
+
:ext_ref_id => unq, :action => act)
|
852
|
+
info "GHTorrent: New pull request (#{id}) history entry (#{act})"
|
853
|
+
else
|
854
|
+
pull_req_history.filter(:pull_request_id => id, :ext_ref_id => unq,
|
855
|
+
:action => act).update(:created_at => ts)
|
856
|
+
info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
857
|
+
end
|
858
|
+
end
|
859
|
+
|
860
|
+
# Checks whether a pull request concerns two branches of the same
|
861
|
+
# repository
|
862
|
+
is_intra_branch = Proc.new do |req|
|
863
|
+
req['head']['repo'].nil?
|
864
|
+
end
|
865
|
+
|
866
|
+
# Produces a log message
|
867
|
+
log_msg = Proc.new do |req|
|
868
|
+
head = if is_intra_branch.call(req)
|
869
|
+
req['base']['repo']['full_name']
|
870
|
+
else
|
871
|
+
req['head']['repo']['full_name']
|
872
|
+
end
|
873
|
+
|
874
|
+
<<-eos.gsub(/\s+/, " ").strip
|
875
|
+
GHTorrent: Pull request #{pullreq_id}
|
876
|
+
#{head} -> #{req['base']['repo']['full_name']}
|
877
|
+
eos
|
878
|
+
end
|
879
|
+
|
880
|
+
retrieved = retrieve_pull_request(owner, repo, pullreq_id)
|
881
|
+
|
882
|
+
if retrieved.nil?
|
883
|
+
warn "GHTorrent: Cannot retrieve pull request (#{owner}/#{repo} #{pullreq_id})"
|
884
|
+
return
|
885
|
+
end
|
886
|
+
|
887
|
+
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
888
|
+
retrieved['base']['repo']['name'],
|
889
|
+
false, false, false)
|
890
|
+
|
891
|
+
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
892
|
+
retrieved['base']['sha'],
|
893
|
+
retrieved['base']['repo']['owner']['login']
|
894
|
+
)
|
895
|
+
|
896
|
+
if is_intra_branch.call(retrieved)
|
897
|
+
head_repo = base_repo
|
898
|
+
head_commit =
|
899
|
+
warn "GHTorrent: Pull request is intra branch"
|
900
|
+
else
|
901
|
+
|
902
|
+
head_repo = ensure_repo(retrieved['head']['repo']['owner']['login'],
|
903
|
+
retrieved['head']['repo']['name'],
|
904
|
+
false, false, false)
|
905
|
+
|
906
|
+
head_commit = ensure_commit(retrieved['head']['repo']['name'],
|
907
|
+
retrieved['head']['sha'],
|
908
|
+
retrieved['head']['repo']['owner']['login'])
|
909
|
+
end
|
910
|
+
|
911
|
+
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
912
|
+
|
913
|
+
merged = if retrieved['merged_at'].nil? then false else true end
|
914
|
+
closed = if retrieved['closed_at'].nil? then false else true end
|
915
|
+
|
916
|
+
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
917
|
+
:pullreq_id => pullreq_id)
|
918
|
+
if pull_req.nil?
|
919
|
+
pulls_reqs.insert(
|
920
|
+
:head_repo_id => if not head_repo.nil? then head_repo[:id] end,
|
921
|
+
:base_repo_id => base_repo[:id],
|
922
|
+
:head_commit_id => if not head_commit.nil? then head_commit[:id] end,
|
923
|
+
:base_commit_id => base_commit[:id],
|
924
|
+
:user_id => pull_req_user[:id],
|
925
|
+
:pullreq_id => pullreq_id,
|
926
|
+
:intra_branch => is_intra_branch.call(retrieved)
|
927
|
+
)
|
928
|
+
|
929
|
+
info log_msg.call(retrieved)
|
930
|
+
else
|
931
|
+
debug log_msg.call(retrieved) + " exists"
|
932
|
+
end
|
933
|
+
|
934
|
+
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
935
|
+
:pullreq_id => pullreq_id)
|
936
|
+
|
937
|
+
add_history.call(pull_req[:id], date(retrieved['created_at']),
|
938
|
+
retrieved[@ext_uniq], 'opened')
|
939
|
+
add_history.call(pull_req[:id], date(retrieved['merged_at']),
|
940
|
+
retrieved[@ext_uniq], 'merged') if merged
|
941
|
+
add_history.call(pull_req[:id], date(retrieved['closed_at']),
|
942
|
+
retrieved[@ext_uniq], 'closed') if closed
|
943
|
+
add_history.call(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
944
|
+
state) unless state.nil?
|
945
|
+
|
946
|
+
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
947
|
+
ensure_pullreq_comments(owner, repo, pullreq_id, created_at) if comments
|
948
|
+
|
949
|
+
pulls_reqs.first(:base_repo_id => project[:id],
|
950
|
+
:pullreq_id => pullreq_id)
|
951
|
+
end
|
952
|
+
|
953
|
+
def ensure_pullreq_comments(owner, repo, pullreq_id, created_at)
|
954
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
955
|
+
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
956
|
+
|
957
|
+
if currepo.nil?
|
958
|
+
warn "Could not repository #{owner}/#{repo}"
|
959
|
+
return
|
960
|
+
end
|
961
|
+
|
962
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
963
|
+
|
964
|
+
if pull_req.nil?
|
965
|
+
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
966
|
+
return
|
967
|
+
end
|
968
|
+
|
969
|
+
retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
|
970
|
+
|
971
|
+
if @db[:pull_request_comments].first(:pullreq_id => pull_req[:id],
|
972
|
+
:comment_id => x['id']).nil?
|
973
|
+
acc << x
|
974
|
+
else
|
975
|
+
acc
|
976
|
+
end
|
977
|
+
end.map { |x|
|
978
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'], time)
|
979
|
+
}
|
980
|
+
end
|
981
|
+
|
982
|
+
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
983
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
984
|
+
|
985
|
+
if pull_req.nil?
|
986
|
+
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
987
|
+
return
|
988
|
+
end
|
989
|
+
|
990
|
+
exists = @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
991
|
+
:comment_id => comment_id)
|
992
|
+
|
993
|
+
if exists.nil?
|
994
|
+
retrieved = retrieve_pull_req_comment(owner, repo, pullreq_id, comment_id)
|
995
|
+
|
996
|
+
if retrieved.nil?
|
997
|
+
warn "Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
998
|
+
return
|
999
|
+
end
|
1000
|
+
|
1001
|
+
commenter = ensure_user(retrieved['user']['login'], false, false)
|
1002
|
+
|
1003
|
+
if commenter.nil?
|
1004
|
+
warn "Could not retrieve commenter #{retrieved['user']['login']}" +
|
1005
|
+
"for pullreq comment #{owner}/#{repo} -> #{pullreq_id}(#{comment_id}) "
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
commit = ensure_commit(repo, retrieved['original_commit_id'],owner)
|
1009
|
+
|
1010
|
+
@db[:pull_request_comments].insert(
|
1011
|
+
:pull_request_id => pull_req[:id],
|
1012
|
+
:user_id => commenter[:id],
|
1013
|
+
:comment_id => comment_id,
|
1014
|
+
:position => retrieved['original_position'],
|
1015
|
+
:body => retrieved['body'][0..254],
|
1016
|
+
:commit_id => (commit[:id] unless commit.nil?),
|
1017
|
+
:created_at => retrieved['created_at'],
|
1018
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1019
|
+
)
|
1020
|
+
debug "GHTorrent: Adding comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1021
|
+
else
|
1022
|
+
debug "GHTorrent: Updating comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1023
|
+
end
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1027
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).map {|c|
|
1028
|
+
ensure_commit(repo, c['sha'], owner, true)
|
1029
|
+
}.map { |c|
|
1030
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1031
|
+
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1032
|
+
:commit_id => c[:id])
|
1033
|
+
if exists.nil?
|
1034
|
+
@db[:pull_request_commits].insert(:pull_request_id => pullreq[:id],
|
1035
|
+
:commit_id => c[:id])
|
1036
|
+
|
1037
|
+
info "GHTorrent: Added commit #{c[:sha]} to pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1038
|
+
else
|
1039
|
+
debug "GHTorrent: Commit #{c[:sha]} exists in pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1040
|
+
exists
|
1041
|
+
end
|
1042
|
+
}
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
##
|
1046
|
+
# Get all forks for a project.
|
1047
|
+
#
|
1048
|
+
# ==Parameters:
|
1049
|
+
# [owner] The user to which the project belongs
|
1050
|
+
# [repo] The repository/project to find forks for
|
1051
|
+
def ensure_forks(owner, repo)
|
1052
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
1053
|
+
time = currepo[:created_at]
|
1054
|
+
|
1055
|
+
if currepo.nil?
|
1056
|
+
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1057
|
+
return
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
existing_forks = @db.from(:forks, :projects).\
|
1061
|
+
where(:forks__forked_project_id => :projects__id). \
|
1062
|
+
where(:forks__forked_from_id => currepo[:id]).select(:name, :login).all
|
1063
|
+
|
1064
|
+
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1065
|
+
if existing_forks.find {|y|
|
1066
|
+
y[:login] == x['owner']['login'] && y[:name] == x['name']
|
1067
|
+
}.nil?
|
1068
|
+
acc << x
|
1069
|
+
else
|
1070
|
+
acc
|
1071
|
+
end
|
1072
|
+
end.map { |x| ensure_fork(owner, repo, x['id'], time) }
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
##
|
1076
|
+
# Make sure that a fork is retrieved for a project
|
1077
|
+
def ensure_fork(owner, repo, fork_id, date_added = nil)
|
1078
|
+
|
1079
|
+
forks = @db[:forks]
|
1080
|
+
forked = ensure_repo(owner, repo, false, false, false)
|
1081
|
+
fork_exists = forks.first(:fork_id => fork_id)
|
1082
|
+
|
1083
|
+
if fork_exists.nil?
|
1084
|
+
added = if date_added.nil? then Time.now else date_added end
|
1085
|
+
retrieved = retrieve_fork(owner, repo, fork_id)
|
1086
|
+
|
1087
|
+
if retrieved.nil?
|
1088
|
+
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1089
|
+
return
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
forked_repo_owner = retrieved['full_name'].split(/\//)[0]
|
1093
|
+
forked_repo_name = retrieved['full_name'].split(/\//)[1]
|
1094
|
+
|
1095
|
+
fork = ensure_repo(forked_repo_owner, forked_repo_name)
|
1096
|
+
|
1097
|
+
if forked.nil? or fork.nil?
|
1098
|
+
warn "Could not add fork #{fork_id}"
|
1099
|
+
return
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
forks.insert(:forked_project_id => fork[:id],
|
1103
|
+
:forked_from_id => forked[:id],
|
1104
|
+
:fork_id => fork_id,
|
1105
|
+
:created_at => added,
|
1106
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1107
|
+
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1108
|
+
else
|
1109
|
+
unless date_added.nil?
|
1110
|
+
forks.filter(:fork_id => fork_id)\
|
1111
|
+
.update(:created_at => date(date_added))
|
1112
|
+
debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
|
1113
|
+
end
|
1114
|
+
end
|
1115
|
+
end
|
1116
|
+
|
676
1117
|
private
|
677
1118
|
|
678
1119
|
# Store a commit contained in a hash. First check whether the commit exists.
|
@@ -684,21 +1125,24 @@ module GHTorrent
|
|
684
1125
|
author = commit_user(c['author'], c['commit']['author'])
|
685
1126
|
commiter = commit_user(c['committer'], c['commit']['committer'])
|
686
1127
|
|
687
|
-
|
688
|
-
|
689
|
-
|
1128
|
+
repository = ensure_repo(user, repo, false, false, false)
|
1129
|
+
|
1130
|
+
if repository.nil?
|
1131
|
+
warn "Could not store commit #{user}/#{repo} #{c['sha']}"
|
1132
|
+
return
|
1133
|
+
end
|
690
1134
|
|
691
1135
|
commits.insert(:sha => c['sha'],
|
692
1136
|
:author_id => author[:id],
|
693
1137
|
:committer_id => commiter[:id],
|
694
|
-
:project_id =>
|
1138
|
+
:project_id => repository[:id],
|
695
1139
|
:created_at => date(c['commit']['author']['date']),
|
696
1140
|
:ext_ref_id => c[@ext_uniq]
|
697
1141
|
)
|
1142
|
+
debug "GHTorrent: New commit #{user}/#{repo} -> #{c['sha']} "
|
698
1143
|
commits.first(:sha => c['sha'])
|
699
|
-
debug "GHTorrent: New commit #{repo} -> #{c['sha']} "
|
700
1144
|
else
|
701
|
-
debug "GHTorrent: Commit #{repo} -> #{c['sha']} exists"
|
1145
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{c['sha']} exists"
|
702
1146
|
commit
|
703
1147
|
end
|
704
1148
|
end
|
@@ -706,12 +1150,28 @@ module GHTorrent
|
|
706
1150
|
# Run a block in a DB transaction. Exceptions trigger transaction rollback
|
707
1151
|
# and are rethrown.
|
708
1152
|
def transaction(&block)
|
1153
|
+
@db ||= get_db
|
1154
|
+
@persister ||= persister
|
1155
|
+
|
709
1156
|
start_time = Time.now
|
710
|
-
|
711
|
-
|
1157
|
+
begin
|
1158
|
+
@db.transaction(:rollback => :reraise, :isolation => :committed) do
|
1159
|
+
yield block
|
1160
|
+
end
|
1161
|
+
total = Time.now.to_ms - start_time.to_ms
|
1162
|
+
debug "GHTorrent: Transaction committed (#{total} ms)"
|
1163
|
+
rescue Exception => e
|
1164
|
+
total = Time.now.to_ms - start_time.to_ms
|
1165
|
+
warn "GHTorrent: Transaction failed (#{total} ms)"
|
1166
|
+
raise e
|
1167
|
+
ensure
|
1168
|
+
@db.disconnect
|
1169
|
+
@persister.close
|
1170
|
+
|
1171
|
+
@db = nil
|
1172
|
+
@persister = nil
|
1173
|
+
GC.start
|
712
1174
|
end
|
713
|
-
total = Time.now.to_ms - start_time.to_ms
|
714
|
-
debug "GHTorrent: Transaction committed (#{total} ms)"
|
715
1175
|
end
|
716
1176
|
|
717
1177
|
##
|
@@ -742,15 +1202,6 @@ module GHTorrent
|
|
742
1202
|
email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
|
743
1203
|
end
|
744
1204
|
end
|
745
|
-
# Base exception for all GHTorrent exceptions
|
746
|
-
class GHTorrentException < Exception
|
747
|
-
end
|
748
|
-
end
|
749
|
-
|
750
|
-
class Time
|
751
|
-
def to_ms
|
752
|
-
(self.to_f * 1000.0).to_i
|
753
|
-
end
|
754
1205
|
end
|
755
1206
|
|
756
1207
|
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|