ghtorrent 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +24 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +40 -0
- data/README.md +23 -22
- data/bin/ght-data-retrieval +66 -24
- data/bin/ght-load +41 -19
- data/bin/ght-mirror-events +13 -16
- data/bin/ght-rm-dupl +119 -77
- data/lib/ghtorrent.rb +14 -4
- data/lib/ghtorrent/adapters/base_adapter.rb +17 -5
- data/lib/ghtorrent/adapters/mongo_persister.rb +122 -56
- data/lib/ghtorrent/api_client.rb +151 -16
- data/lib/ghtorrent/bson_orderedhash.rb +23 -0
- data/lib/ghtorrent/cache.rb +97 -0
- data/lib/ghtorrent/command.rb +43 -25
- data/lib/ghtorrent/gh_torrent_exception.rb +6 -0
- data/lib/ghtorrent/ghtorrent.rb +615 -164
- data/lib/ghtorrent/hash.rb +11 -0
- data/lib/ghtorrent/logging.rb +11 -7
- data/lib/ghtorrent/migrations/001_init_schema.rb +3 -3
- data/lib/ghtorrent/migrations/002_add_external_ref_ids.rb +2 -0
- data/lib/ghtorrent/migrations/003_add_orgs.rb +4 -1
- data/lib/ghtorrent/migrations/004_add_commit_comments.rb +4 -2
- data/lib/ghtorrent/migrations/005_add_repo_collaborators.rb +2 -0
- data/lib/ghtorrent/migrations/006_add_watchers.rb +2 -0
- data/lib/ghtorrent/migrations/007_add_pull_requests.rb +64 -0
- data/lib/ghtorrent/migrations/008_add_project_unq.rb +23 -0
- data/lib/ghtorrent/migrations/009_add_project_commit.rb +27 -0
- data/lib/ghtorrent/migrations/010_add_forks.rb +28 -0
- data/lib/ghtorrent/migrations/mysql_defaults.rb +6 -0
- data/lib/ghtorrent/persister.rb +3 -0
- data/lib/ghtorrent/retriever.rb +298 -102
- data/lib/ghtorrent/settings.rb +20 -1
- data/lib/ghtorrent/time.rb +5 -0
- data/lib/ghtorrent/utils.rb +22 -4
- data/lib/version.rb +5 -0
- metadata +173 -145
- data/lib/ghtorrent/call_stack.rb +0 -91
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
require 'sequel'
|
2
2
|
|
3
|
+
require 'ghtorrent/time'
|
4
|
+
require 'ghtorrent/logging'
|
5
|
+
require 'ghtorrent/settings'
|
6
|
+
require 'ghtorrent/retriever'
|
7
|
+
require 'ghtorrent/persister'
|
8
|
+
|
3
9
|
module GHTorrent
|
4
10
|
class Mirror
|
5
11
|
|
@@ -8,32 +14,34 @@ module GHTorrent
|
|
8
14
|
include GHTorrent::Retriever
|
9
15
|
include GHTorrent::Persister
|
10
16
|
|
11
|
-
attr_reader :settings, :persister
|
12
|
-
|
13
|
-
def initialize(configuration)
|
17
|
+
attr_reader :settings, :persister, :ext_uniq, :logger
|
14
18
|
|
15
|
-
|
16
|
-
|
19
|
+
def initialize(settings)
|
20
|
+
@settings = settings
|
17
21
|
@ext_uniq = config(:uniq_id)
|
18
22
|
@logger = Logger.new(STDOUT)
|
19
|
-
@persister = connect(:mongo, @settings)
|
20
|
-
get_db
|
21
23
|
end
|
22
24
|
|
23
25
|
# db related functions
|
24
26
|
def get_db
|
25
|
-
|
26
|
-
@db = Sequel.connect(config(:sql_url))
|
27
|
-
|
27
|
+
Sequel.single_threaded = true
|
28
|
+
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
29
|
+
#@db.loggers << @logger
|
28
30
|
if @db.tables.empty?
|
29
31
|
dir = File.join(File.dirname(__FILE__), 'migrations')
|
30
32
|
puts "Database empty, running migrations from #{dir}"
|
31
33
|
Sequel.extension :migration
|
32
34
|
Sequel::Migrator.apply(@db, dir)
|
33
35
|
end
|
36
|
+
|
34
37
|
@db
|
35
38
|
end
|
36
39
|
|
40
|
+
def persister
|
41
|
+
@persister ||= connect(:mongo, @settings)
|
42
|
+
@persister
|
43
|
+
end
|
44
|
+
|
37
45
|
##
|
38
46
|
# Ensure that a user exists, or fetch its latest state from Github
|
39
47
|
# ==Parameters:
|
@@ -46,7 +54,7 @@ module GHTorrent
|
|
46
54
|
end
|
47
55
|
|
48
56
|
transaction do
|
49
|
-
|
57
|
+
ensure_user(user, true, true)
|
50
58
|
ensure_commit(repo, sha, user)
|
51
59
|
end
|
52
60
|
end
|
@@ -60,7 +68,6 @@ module GHTorrent
|
|
60
68
|
# [date_added] The timestamp that the add event took place
|
61
69
|
def get_project_member(owner, repo, new_member, date_added)
|
62
70
|
transaction do
|
63
|
-
ensure_repo(owner, repo)
|
64
71
|
ensure_project_member(owner, repo, new_member, date_added)
|
65
72
|
end
|
66
73
|
end
|
@@ -74,7 +81,6 @@ module GHTorrent
|
|
74
81
|
# [date_added] The timestamp that the add event took place
|
75
82
|
def get_commit_comment(user, repo, comment_id, date_added)
|
76
83
|
transaction do
|
77
|
-
ensure_repo(user, repo)
|
78
84
|
ensure_commit_comment(user, repo, comment_id, date_added)
|
79
85
|
end
|
80
86
|
end
|
@@ -88,7 +94,6 @@ module GHTorrent
|
|
88
94
|
# [date_added] The timestamp that the add event took place
|
89
95
|
def get_watcher(owner, repo, watcher, date_added)
|
90
96
|
transaction do
|
91
|
-
ensure_repo(owner, repo)
|
92
97
|
ensure_watcher(owner, repo, watcher, date_added)
|
93
98
|
end
|
94
99
|
end
|
@@ -101,20 +106,84 @@ module GHTorrent
|
|
101
106
|
# [date_added] The timestamp that the add event took place
|
102
107
|
def get_follower(follower, followed, date_added)
|
103
108
|
transaction do
|
104
|
-
ensure_user(follower,
|
105
|
-
ensure_user(followed,
|
106
|
-
|
109
|
+
ensure_user(follower, true, true)
|
110
|
+
ensure_user(followed, true, true)
|
111
|
+
ensure_user_follower(followed, follower, date_added)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Get a pull request and record the changes it affects
|
117
|
+
# ==Parameters:
|
118
|
+
# [owner] The owner of the repository to which the pullreq will be applied
|
119
|
+
# [repo] The repository to which the pullreq will be applied
|
120
|
+
# [pullreq_id] The ID of the pull request relative to the repository
|
121
|
+
def get_pull_request(owner, repo, pullreq_id, state, created_at)
|
122
|
+
transaction do
|
123
|
+
ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Retrieve details about a project fork (including the forked project)
|
129
|
+
# ==Parameters:
|
130
|
+
# [owner] The login of the repository owner
|
131
|
+
# [repo] The name of the repository
|
132
|
+
# [fork_id] The fork item id
|
133
|
+
# [date_added] The timestamp that the add event took place
|
134
|
+
def get_fork(owner, repo, fork_id, date_added)
|
135
|
+
transaction do
|
136
|
+
ensure_fork(owner, repo, fork_id, date_added)
|
107
137
|
end
|
108
138
|
end
|
109
139
|
|
140
|
+
##
|
141
|
+
# Retrieve a pull request review comment
|
142
|
+
# ==Parameters:
|
143
|
+
# [owner] The login of the repository owner
|
144
|
+
# [repo] The name of the repository
|
145
|
+
# [fork_id] The fork item id
|
146
|
+
# [date_added] The timestamp that the add event took place
|
147
|
+
def get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
148
|
+
transaction do
|
149
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
##
|
154
|
+
# Retrieve a pull request review comment
|
155
|
+
# ==Parameters:
|
156
|
+
# [owner] The login of the repository owner
|
157
|
+
# [repo] The name of the repository
|
158
|
+
# [fork_id] The fork item id
|
159
|
+
# [date_added] The timestamp that the add event took place
|
160
|
+
def get_issue_comment(owner, repo, issue_id, comment_id, created_at)
|
161
|
+
transaction do
|
162
|
+
raise "Not implemented"
|
163
|
+
#ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
|
110
168
|
##
|
111
169
|
# Make sure a commit exists
|
112
170
|
#
|
113
171
|
def ensure_commit(repo, sha, user, comments = true)
|
172
|
+
ensure_repo(user, repo)
|
114
173
|
c = retrieve_commit(repo, sha, user)
|
174
|
+
|
175
|
+
if c.nil?
|
176
|
+
warn "GHTorrent: Commit #{user}/#{repo} -> #{sha} does not exist"
|
177
|
+
return
|
178
|
+
end
|
179
|
+
|
115
180
|
stored = store_commit(c, repo, user)
|
116
181
|
ensure_parents(c)
|
117
|
-
|
182
|
+
if not c['commit']['comment_count'].nil? \
|
183
|
+
and c['commit']['comment_count'] > 0
|
184
|
+
ensure_commit_comments(user, repo, sha) if comments
|
185
|
+
end
|
186
|
+
ensure_repo_commit(user, repo, sha)
|
118
187
|
stored
|
119
188
|
end
|
120
189
|
|
@@ -162,13 +231,41 @@ module GHTorrent
|
|
162
231
|
|
163
232
|
parents.insert(:commit_id => this[:id],
|
164
233
|
:parent_id => parent[:id])
|
165
|
-
info "Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
234
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
166
235
|
else
|
167
|
-
|
236
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
168
237
|
end
|
169
238
|
end
|
170
239
|
end
|
171
240
|
|
241
|
+
##
|
242
|
+
# Make sure that a commit has been associated with the provided repo
|
243
|
+
# ==Parameters:
|
244
|
+
# [user] The user that owns the repo this commit has been submitted to
|
245
|
+
# [repo] The repo receiving the commit
|
246
|
+
# [sha] The commit SHA
|
247
|
+
def ensure_repo_commit(user, repo, sha)
|
248
|
+
userid = @db[:users].first(:login => user)[:id]
|
249
|
+
projectid = @db[:projects].first(:owner_id => userid,
|
250
|
+
:name => repo)[:id]
|
251
|
+
commitid = @db[:commits].first(:sha => sha)[:id]
|
252
|
+
|
253
|
+
exists = @db[:project_commits].first(:project_id => projectid,
|
254
|
+
:commit_id => commitid)
|
255
|
+
if exists.nil?
|
256
|
+
@db[:project_commits].insert(
|
257
|
+
:project_id => projectid,
|
258
|
+
:commit_id => commitid
|
259
|
+
)
|
260
|
+
info "GHTorrent: Added commit #{user}/#{repo} -> #{sha}"
|
261
|
+
@db[:project_commits].first(:project_id => projectid,
|
262
|
+
:commit_id => commitid)
|
263
|
+
else
|
264
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{sha} exists"
|
265
|
+
exists
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
172
269
|
##
|
173
270
|
# Add (or update) an entry for a commit author. This method uses information
|
174
271
|
# in the JSON object returned by Github to add (or update) a user in the
|
@@ -193,15 +290,14 @@ module GHTorrent
|
|
193
290
|
login = githubuser['login'] unless githubuser.nil?
|
194
291
|
|
195
292
|
if login.nil?
|
196
|
-
ensure_user("#{name}<#{email}>",
|
293
|
+
ensure_user("#{name}<#{email}>", false, false)
|
197
294
|
else
|
198
295
|
dbuser = users.first(:login => login)
|
199
296
|
byemail = users.first(:email => email)
|
200
297
|
if dbuser.nil?
|
201
298
|
# We do not have the user in the database yet. Add him
|
202
|
-
added = ensure_user(login,
|
299
|
+
added = ensure_user(login, false, false)
|
203
300
|
if byemail.nil?
|
204
|
-
#
|
205
301
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
206
302
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
207
303
|
else
|
@@ -215,8 +311,6 @@ module GHTorrent
|
|
215
311
|
:login => login,
|
216
312
|
:company => added['company'],
|
217
313
|
:location => added['location'],
|
218
|
-
:hireable => added['hireable'],
|
219
|
-
:bio => added['bio'],
|
220
314
|
:created_at => added['created_at']
|
221
315
|
)
|
222
316
|
end
|
@@ -259,7 +353,6 @@ module GHTorrent
|
|
259
353
|
return u
|
260
354
|
end
|
261
355
|
|
262
|
-
|
263
356
|
##
|
264
357
|
# Ensure that a user exists, or fetch its latest state from Github
|
265
358
|
# ==Parameters:
|
@@ -275,6 +368,12 @@ module GHTorrent
|
|
275
368
|
|
276
369
|
if usr.nil?
|
277
370
|
u = retrieve_user_byusername(user)
|
371
|
+
|
372
|
+
if u.nil?
|
373
|
+
warn "GHTorrent: User #{user} does not exist"
|
374
|
+
return
|
375
|
+
end
|
376
|
+
|
278
377
|
email = unless u['email'].nil?
|
279
378
|
if u['email'].strip == "" then
|
280
379
|
nil
|
@@ -283,49 +382,16 @@ module GHTorrent
|
|
283
382
|
end
|
284
383
|
end
|
285
384
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
:bio => u['bio'],
|
295
|
-
:location => u['location'],
|
296
|
-
:type => user_type(u['type']),
|
297
|
-
:created_at => date(u['created_at']),
|
298
|
-
:ext_ref_id => u[@ext_uniq]
|
299
|
-
)
|
300
|
-
info "GHTorrent: Updating user #{user} (email #{email})"
|
301
|
-
else
|
302
|
-
users.insert(:login => u['login'],
|
303
|
-
:name => u['name'],
|
304
|
-
:company => u['company'],
|
305
|
-
:email => email,
|
306
|
-
:hireable => boolean(u['hirable']),
|
307
|
-
:bio => u['bio'],
|
308
|
-
:location => u['location'],
|
309
|
-
:type => user_type(u['type']),
|
310
|
-
:created_at => date(u['created_at']),
|
311
|
-
:ext_ref_id => u[@ext_uniq])
|
312
|
-
|
313
|
-
info "GHTorrent: New user #{user}"
|
314
|
-
end
|
315
|
-
else
|
316
|
-
users.insert(:login => u['login'],
|
317
|
-
:name => u['name'],
|
318
|
-
:company => u['company'],
|
319
|
-
:email => email,
|
320
|
-
:hireable => boolean(u['hirable']),
|
321
|
-
:bio => u['bio'],
|
322
|
-
:location => u['location'],
|
323
|
-
:type => user_type(u['type']),
|
324
|
-
:created_at => date(u['created_at']),
|
325
|
-
:ext_ref_id => u[@ext_uniq])
|
385
|
+
users.insert(:login => u['login'],
|
386
|
+
:name => u['name'],
|
387
|
+
:company => u['company'],
|
388
|
+
:email => email,
|
389
|
+
:location => u['location'],
|
390
|
+
:type => user_type(u['type']),
|
391
|
+
:created_at => date(u['created_at']),
|
392
|
+
:ext_ref_id => u[@ext_uniq])
|
326
393
|
|
327
|
-
|
328
|
-
end
|
394
|
+
info "GHTorrent: New user #{user}"
|
329
395
|
users.first(:login => user)
|
330
396
|
else
|
331
397
|
debug "GHTorrent: User #{user} exists"
|
@@ -340,37 +406,62 @@ module GHTorrent
|
|
340
406
|
#
|
341
407
|
# ==Parameters:
|
342
408
|
# [user] The user login to find followers by
|
343
|
-
def ensure_user_followers(
|
409
|
+
def ensure_user_followers(followed, date_added = nil)
|
410
|
+
curuser = ensure_user(followed, false, false)
|
411
|
+
time = curuser[:created_at]
|
412
|
+
followers = @db.from(:followers, :users).\
|
413
|
+
where(:followers__follower_id => :users__id).
|
414
|
+
where(:followers__user_id => curuser[:id]).select(:login).all
|
415
|
+
|
416
|
+
retrieve_user_followers(followed).reduce([]) do |acc, x|
|
417
|
+
if followers.find {|y| y[:login] == x['login']}.nil?
|
418
|
+
acc << x
|
419
|
+
else
|
420
|
+
acc
|
421
|
+
end
|
422
|
+
end.map { |x| ensure_user_follower(followed, x['login'], time) }
|
423
|
+
end
|
424
|
+
|
425
|
+
##
|
426
|
+
# Make sure that a user follows another one
|
427
|
+
def ensure_user_follower(followed, follower, date_added)
|
428
|
+
follower_user = ensure_user(follower, false, false)
|
429
|
+
followed_user = ensure_user(followed, false, false)
|
430
|
+
|
431
|
+
if followed_user.nil? or follower_user.nil?
|
432
|
+
warn "Could not add follower #{follower} to #{followed}"
|
433
|
+
return
|
434
|
+
end
|
435
|
+
|
344
436
|
followers = @db[:followers]
|
345
|
-
|
437
|
+
followed_id = follower_user[:id]
|
438
|
+
follower_id = followed_user[:id]
|
346
439
|
|
347
|
-
|
348
|
-
|
349
|
-
follower = f['login']
|
350
|
-
ensure_user(user, false, false)
|
351
|
-
ensure_user(follower, false, false)
|
440
|
+
follower_exists = followers.first(:user_id => followed_id,
|
441
|
+
:follower_id => follower_id)
|
352
442
|
|
353
|
-
|
443
|
+
if follower_exists.nil?
|
444
|
+
added = if date_added.nil? then Time.now else date_added end
|
445
|
+
retrieved = retrieve_user_follower(followed, follower)
|
354
446
|
|
447
|
+
if retrieved.nil?
|
448
|
+
warn "Follower #{follower} does not exist for user #{followed}"
|
449
|
+
return
|
450
|
+
end
|
355
451
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
:follower_id => followerid).\
|
368
|
-
update(:created_at => date(date_added))
|
369
|
-
info "GHTorrent: Updated follower #{follower} -> #{user}"
|
370
|
-
end
|
371
|
-
debug "GHTorrent: User #{follower} already follows #{user}"
|
452
|
+
followers.insert(:user_id => followed_id,
|
453
|
+
:follower_id => follower_id,
|
454
|
+
:created_at => added,
|
455
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
456
|
+
info "GHTorrent: User #{follower} follows #{followed}"
|
457
|
+
else
|
458
|
+
unless date_added.nil?
|
459
|
+
followers.filter(:user_id => followed_id,
|
460
|
+
:follower_id => follower_id)\
|
461
|
+
.update(:created_at => date(date_added))
|
462
|
+
debug "GHTorrent: Updating follower #{followed} -> #{follower}"
|
372
463
|
end
|
373
|
-
|
464
|
+
end
|
374
465
|
end
|
375
466
|
|
376
467
|
##
|
@@ -379,8 +470,7 @@ module GHTorrent
|
|
379
470
|
#
|
380
471
|
# ==Parameters:
|
381
472
|
# [email] The email to lookup the user by
|
382
|
-
# [
|
383
|
-
# [followers] If true, the user's followers will be retrieved
|
473
|
+
# [name] The user's name
|
384
474
|
# == Returns:
|
385
475
|
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
386
476
|
# the result is nil
|
@@ -392,27 +482,27 @@ module GHTorrent
|
|
392
482
|
|
393
483
|
u = retrieve_user_byemail(email, name)
|
394
484
|
|
395
|
-
if u.nil? or u['
|
485
|
+
if u.nil? or u['login'].nil?
|
396
486
|
debug "GHTorrent: Cannot find #{email} through search API query"
|
487
|
+
login = (0...8).map { 65.+(rand(25)).chr }.join
|
397
488
|
users.insert(:email => email,
|
398
489
|
:name => name,
|
399
|
-
:login =>
|
490
|
+
:login => login,
|
400
491
|
:created_at => Time.now,
|
401
492
|
:ext_ref_id => ""
|
402
493
|
)
|
403
|
-
|
494
|
+
info "GHTorrent: Added fake user #{login} -> #{email}"
|
495
|
+
users.first(:login => login)
|
404
496
|
else
|
405
|
-
users.insert(:login => u['
|
406
|
-
:name => u['
|
407
|
-
:company => u['
|
408
|
-
:email => u['
|
409
|
-
:
|
410
|
-
:
|
411
|
-
:location => u['user']['location'],
|
412
|
-
:created_at => date(u['user']['created_at']),
|
497
|
+
users.insert(:login => u['login'],
|
498
|
+
:name => u['name'],
|
499
|
+
:company => u['company'],
|
500
|
+
:email => u['email'],
|
501
|
+
:location => u['location'],
|
502
|
+
:created_at => date(u['created_at']),
|
413
503
|
:ext_ref_id => u[@ext_uniq])
|
414
|
-
|
415
|
-
users.first(:
|
504
|
+
info "GHTorrent: Found #{email} through search API query"
|
505
|
+
users.first(:login => u['login'])
|
416
506
|
end
|
417
507
|
else
|
418
508
|
debug "GHTorrent: User with email #{email} exists"
|
@@ -430,15 +520,21 @@ module GHTorrent
|
|
430
520
|
# == Returns:
|
431
521
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
432
522
|
# the result is nil
|
433
|
-
def ensure_repo(user, repo)
|
523
|
+
def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
|
434
524
|
|
435
|
-
ensure_user(user,
|
525
|
+
ensure_user(user, false, false)
|
436
526
|
repos = @db[:projects]
|
437
527
|
curuser = @db[:users].first(:login => user)
|
438
528
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
439
529
|
|
440
530
|
if currepo.nil?
|
441
531
|
r = retrieve_repo(user, repo)
|
532
|
+
|
533
|
+
if r.nil?
|
534
|
+
warn "Repo #{user}/#{repo} does not exist"
|
535
|
+
return
|
536
|
+
end
|
537
|
+
|
442
538
|
repos.insert(:url => r['url'],
|
443
539
|
:owner_id => @db[:users].filter(:login => user).first[:id],
|
444
540
|
:name => r['name'],
|
@@ -448,9 +544,9 @@ module GHTorrent
|
|
448
544
|
:ext_ref_id => r[@ext_uniq])
|
449
545
|
|
450
546
|
info "GHTorrent: New repo #{repo}"
|
451
|
-
ensure_commits(user, repo)
|
452
|
-
ensure_project_members(user, repo)
|
453
|
-
ensure_watchers(user, repo)
|
547
|
+
ensure_commits(user, repo) if commits
|
548
|
+
ensure_project_members(user, repo) if project_members
|
549
|
+
ensure_watchers(user, repo) if watchers
|
454
550
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
455
551
|
else
|
456
552
|
debug "GHTorrent: Repo #{repo} exists"
|
@@ -461,27 +557,32 @@ module GHTorrent
|
|
461
557
|
##
|
462
558
|
# Make sure that a project has all the registered members defined
|
463
559
|
def ensure_project_members(user, repo)
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
560
|
+
currepo = ensure_repo(user, repo, true, false, true)
|
561
|
+
time = currepo[:created_at]
|
562
|
+
|
563
|
+
project_members = @db.from(:project_members, :users).\
|
564
|
+
where(:project_members__user_id => :users__id).\
|
565
|
+
where(:project_members__repo_id => currepo[:id]).select(:login).all
|
468
566
|
|
469
567
|
retrieve_repo_collaborators(user, repo).reduce([]) do |acc, x|
|
470
|
-
if project_members.find {
|
568
|
+
if project_members.find {|y| y[:login] == x['login']}.nil?
|
471
569
|
acc << x
|
472
570
|
else
|
473
571
|
acc
|
474
572
|
end
|
475
|
-
end.map { |x| ensure_project_member(user, repo, x['login'],
|
573
|
+
end.map { |x| ensure_project_member(user, repo, x['login'], time) }
|
476
574
|
end
|
477
575
|
|
478
576
|
##
|
479
577
|
# Make sure that a project member exists in a project
|
480
578
|
def ensure_project_member(owner, repo, new_member, date_added)
|
481
579
|
pr_members = @db[:project_members]
|
580
|
+
project = ensure_repo(owner, repo, true, false, true)
|
482
581
|
new_user = ensure_user(new_member, false, false)
|
483
|
-
|
484
|
-
project
|
582
|
+
|
583
|
+
if project.nil? or new_user.nil?
|
584
|
+
return
|
585
|
+
end
|
485
586
|
|
486
587
|
memb_exist = pr_members.first(:user_id => new_user[:id],
|
487
588
|
:repo_id => project[:id])
|
@@ -489,6 +590,12 @@ module GHTorrent
|
|
489
590
|
if memb_exist.nil?
|
490
591
|
added = if date_added.nil? then Time.now else date_added end
|
491
592
|
retrieved = retrieve_repo_collaborator(owner, repo, new_member)
|
593
|
+
|
594
|
+
if retrieved.nil?
|
595
|
+
warn "Project member #{new_member} does not exist in #{owner}/#{repo}"
|
596
|
+
return
|
597
|
+
end
|
598
|
+
|
492
599
|
pr_members.insert(
|
493
600
|
:user_id => new_user[:id],
|
494
601
|
:repo_id => project[:id],
|
@@ -513,7 +620,6 @@ module GHTorrent
|
|
513
620
|
# [user] The login name of the user to check the organizations for
|
514
621
|
#
|
515
622
|
def ensure_orgs(user)
|
516
|
-
usr = @db[:users].first(:login => user)
|
517
623
|
retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
|
518
624
|
end
|
519
625
|
|
@@ -525,8 +631,8 @@ module GHTorrent
|
|
525
631
|
# [org] The login name of the organization to check whether the user
|
526
632
|
# belongs in
|
527
633
|
#
|
528
|
-
def ensure_participation(user, organization)
|
529
|
-
org = ensure_org(organization)
|
634
|
+
def ensure_participation(user, organization, members = true)
|
635
|
+
org = ensure_org(organization, members)
|
530
636
|
usr = ensure_user(user, false, false)
|
531
637
|
|
532
638
|
org_members = @db[:organization_members]
|
@@ -550,14 +656,21 @@ module GHTorrent
|
|
550
656
|
# ==Parameters:
|
551
657
|
# [organization] The login name of the organization
|
552
658
|
#
|
553
|
-
def ensure_org(organization)
|
554
|
-
org = @db[:users].
|
659
|
+
def ensure_org(organization, members)
|
660
|
+
org = @db[:users].first(:login => organization, :type => 'org')
|
555
661
|
|
556
662
|
if org.nil?
|
557
|
-
ensure_user(
|
663
|
+
org = ensure_user(organization, false, false)
|
664
|
+
if members
|
665
|
+
retrieve_org_members(organization).map { |x|
|
666
|
+
ensure_participation(ensure_user(x['login'], false, false)[:login],
|
667
|
+
organization, false)
|
668
|
+
}
|
669
|
+
end
|
670
|
+
org
|
558
671
|
else
|
559
672
|
debug "GHTorrent: Organization #{organization} exists"
|
560
|
-
org
|
673
|
+
org
|
561
674
|
end
|
562
675
|
end
|
563
676
|
|
@@ -572,7 +685,6 @@ module GHTorrent
|
|
572
685
|
commit_id = @db[:commits].first(:sha => sha)[:id]
|
573
686
|
stored_comments = @db[:commit_comments].filter(:commit_id => commit_id)
|
574
687
|
commit_comments = retrieve_commit_comments(user, repo, sha)
|
575
|
-
#user_id = @db[:users].first(:login => user)[:id]
|
576
688
|
|
577
689
|
not_saved = commit_comments.reduce([]) do |acc, x|
|
578
690
|
if stored_comments.find{|y| y[:comment_id] == x['id']}.nil?
|
@@ -600,16 +712,16 @@ module GHTorrent
|
|
600
712
|
retrieved = retrieve_commit_comment(user, repo, id)
|
601
713
|
|
602
714
|
if retrieved.nil?
|
603
|
-
|
715
|
+
warn "GHTorrent: Commit comment #{id} deleted"
|
604
716
|
return
|
605
717
|
end
|
606
718
|
|
607
|
-
commit = ensure_commit(repo, retrieved['commit_id'], user,
|
719
|
+
commit = ensure_commit(repo, retrieved['commit_id'], user, false)
|
608
720
|
user = ensure_user(user, false, false)
|
609
721
|
@db[:commit_comments].insert(
|
610
722
|
:commit_id => commit[:id],
|
611
723
|
:user_id => user[:id],
|
612
|
-
:body => retrieved['body'],
|
724
|
+
:body => retrieved['body'][0..255],
|
613
725
|
:line => retrieved['line'],
|
614
726
|
:position => retrieved['position'],
|
615
727
|
:comment_id => retrieved['id'],
|
@@ -617,45 +729,67 @@ module GHTorrent
|
|
617
729
|
:created_at => date(retrieved['created_at'])
|
618
730
|
)
|
619
731
|
info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
|
620
|
-
@db[:commit_comments].first(:comment_id => id)
|
621
732
|
else
|
733
|
+
unless created_at.nil?
|
734
|
+
@db[:commit_comments].filter(:comment_id => id)\
|
735
|
+
.update(:created_at => date(created_at))
|
736
|
+
info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
|
737
|
+
end
|
622
738
|
info "GHTorrent: Commit comment #{id} exists"
|
623
|
-
stored_comment
|
624
739
|
end
|
740
|
+
@db[:commit_comments].first(:comment_id => id)
|
625
741
|
end
|
626
742
|
|
627
743
|
##
|
628
744
|
# Make sure that
|
629
745
|
def ensure_watchers(owner, repo)
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
746
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
747
|
+
time = currepo[:created_at]
|
748
|
+
|
749
|
+
if currepo.nil?
|
750
|
+
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
751
|
+
return
|
752
|
+
end
|
753
|
+
|
754
|
+
watchers = @db.from(:watchers, :users).\
|
755
|
+
where(:watchers__user_id => :users__id).\
|
756
|
+
where(:watchers__repo_id => currepo[:id]).select(:login).all
|
635
757
|
|
636
758
|
retrieve_watchers(owner, repo).reduce([]) do |acc, x|
|
637
|
-
if watchers.find { |y|
|
759
|
+
if watchers.find { |y|
|
760
|
+
y[:login] == x['login']
|
761
|
+
}.nil?
|
638
762
|
acc << x
|
639
763
|
else
|
640
764
|
acc
|
641
765
|
end
|
642
|
-
end.map { |x| ensure_watcher(owner, repo, x['login']) }
|
766
|
+
end.map { |x| ensure_watcher(owner, repo, x['login'], time) }
|
643
767
|
end
|
644
768
|
|
645
769
|
##
|
646
770
|
# Make sure that a project member exists in a project
|
647
771
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
648
|
-
|
772
|
+
project = ensure_repo(owner, repo, false, false, false)
|
649
773
|
new_watcher = ensure_user(watcher, false, false)
|
650
|
-
owner_id = @db[:users].first(:login => owner)[:id]
|
651
|
-
project = @db[:projects].first(:owner_id => owner_id, :name => repo)
|
652
774
|
|
775
|
+
if new_watcher.nil? or project.nil?
|
776
|
+
warn "GHTorrent: Watcher #{watcher} does not exist"
|
777
|
+
return
|
778
|
+
end
|
779
|
+
|
780
|
+
watchers = @db[:watchers]
|
653
781
|
memb_exist = watchers.first(:user_id => new_watcher[:id],
|
654
|
-
|
782
|
+
:repo_id => project[:id])
|
655
783
|
|
656
784
|
if memb_exist.nil?
|
657
785
|
added = if date_added.nil? then Time.now else date_added end
|
658
786
|
retrieved = retrieve_watcher(owner, repo, watcher)
|
787
|
+
|
788
|
+
if retrieved.nil?
|
789
|
+
warn "Watcher #{watcher} no longer watches #{owner}/#{repo}"
|
790
|
+
return
|
791
|
+
end
|
792
|
+
|
659
793
|
watchers.insert(
|
660
794
|
:user_id => new_watcher[:id],
|
661
795
|
:repo_id => project[:id],
|
@@ -673,6 +807,313 @@ module GHTorrent
|
|
673
807
|
end
|
674
808
|
end
|
675
809
|
|
810
|
+
##
|
811
|
+
# Process all pull requests
|
812
|
+
def ensure_pull_requests(owner, repo)
|
813
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
814
|
+
if currepo.nil?
|
815
|
+
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
816
|
+
return
|
817
|
+
end
|
818
|
+
|
819
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id])
|
820
|
+
|
821
|
+
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
822
|
+
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
823
|
+
acc << x
|
824
|
+
else
|
825
|
+
acc
|
826
|
+
end
|
827
|
+
end.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
828
|
+
end
|
829
|
+
|
830
|
+
##
|
831
|
+
# Process a pull request
|
832
|
+
def ensure_pull_request(owner, repo, pullreq_id,
|
833
|
+
comments = true, commits = true,
|
834
|
+
state = nil, created_at = nil)
|
835
|
+
pulls_reqs = @db[:pull_requests]
|
836
|
+
pull_req_history = @db[:pull_request_history]
|
837
|
+
|
838
|
+
project = ensure_repo(owner, repo, false, false, false)
|
839
|
+
|
840
|
+
if project.nil?
|
841
|
+
return
|
842
|
+
end
|
843
|
+
|
844
|
+
# Adds a pull request history event
|
845
|
+
add_history = Proc.new do |id, ts, unq, act|
|
846
|
+
|
847
|
+
entry = pull_req_history.first(:pull_request_id => id,
|
848
|
+
:ext_ref_id => unq, :action => act)
|
849
|
+
if entry.nil?
|
850
|
+
pull_req_history.insert(:pull_request_id => id, :created_at => ts,
|
851
|
+
:ext_ref_id => unq, :action => act)
|
852
|
+
info "GHTorrent: New pull request (#{id}) history entry (#{act})"
|
853
|
+
else
|
854
|
+
pull_req_history.filter(:pull_request_id => id, :ext_ref_id => unq,
|
855
|
+
:action => act).update(:created_at => ts)
|
856
|
+
info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
|
857
|
+
end
|
858
|
+
end
|
859
|
+
|
860
|
+
# Checks whether a pull request concerns two branches of the same
|
861
|
+
# repository
|
862
|
+
is_intra_branch = Proc.new do |req|
|
863
|
+
req['head']['repo'].nil?
|
864
|
+
end
|
865
|
+
|
866
|
+
# Produces a log message
|
867
|
+
log_msg = Proc.new do |req|
|
868
|
+
head = if is_intra_branch.call(req)
|
869
|
+
req['base']['repo']['full_name']
|
870
|
+
else
|
871
|
+
req['head']['repo']['full_name']
|
872
|
+
end
|
873
|
+
|
874
|
+
<<-eos.gsub(/\s+/, " ").strip
|
875
|
+
GHTorrent: Pull request #{pullreq_id}
|
876
|
+
#{head} -> #{req['base']['repo']['full_name']}
|
877
|
+
eos
|
878
|
+
end
|
879
|
+
|
880
|
+
retrieved = retrieve_pull_request(owner, repo, pullreq_id)
|
881
|
+
|
882
|
+
if retrieved.nil?
|
883
|
+
warn "GHTorrent: Cannot retrieve pull request (#{owner}/#{repo} #{pullreq_id})"
|
884
|
+
return
|
885
|
+
end
|
886
|
+
|
887
|
+
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
888
|
+
retrieved['base']['repo']['name'],
|
889
|
+
false, false, false)
|
890
|
+
|
891
|
+
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
892
|
+
retrieved['base']['sha'],
|
893
|
+
retrieved['base']['repo']['owner']['login']
|
894
|
+
)
|
895
|
+
|
896
|
+
if is_intra_branch.call(retrieved)
|
897
|
+
head_repo = base_repo
|
898
|
+
head_commit =
|
899
|
+
warn "GHTorrent: Pull request is intra branch"
|
900
|
+
else
|
901
|
+
|
902
|
+
head_repo = ensure_repo(retrieved['head']['repo']['owner']['login'],
|
903
|
+
retrieved['head']['repo']['name'],
|
904
|
+
false, false, false)
|
905
|
+
|
906
|
+
head_commit = ensure_commit(retrieved['head']['repo']['name'],
|
907
|
+
retrieved['head']['sha'],
|
908
|
+
retrieved['head']['repo']['owner']['login'])
|
909
|
+
end
|
910
|
+
|
911
|
+
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
912
|
+
|
913
|
+
merged = if retrieved['merged_at'].nil? then false else true end
|
914
|
+
closed = if retrieved['closed_at'].nil? then false else true end
|
915
|
+
|
916
|
+
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
917
|
+
:pullreq_id => pullreq_id)
|
918
|
+
if pull_req.nil?
|
919
|
+
pulls_reqs.insert(
|
920
|
+
:head_repo_id => if not head_repo.nil? then head_repo[:id] end,
|
921
|
+
:base_repo_id => base_repo[:id],
|
922
|
+
:head_commit_id => if not head_commit.nil? then head_commit[:id] end,
|
923
|
+
:base_commit_id => base_commit[:id],
|
924
|
+
:user_id => pull_req_user[:id],
|
925
|
+
:pullreq_id => pullreq_id,
|
926
|
+
:intra_branch => is_intra_branch.call(retrieved)
|
927
|
+
)
|
928
|
+
|
929
|
+
info log_msg.call(retrieved)
|
930
|
+
else
|
931
|
+
debug log_msg.call(retrieved) + " exists"
|
932
|
+
end
|
933
|
+
|
934
|
+
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
935
|
+
:pullreq_id => pullreq_id)
|
936
|
+
|
937
|
+
add_history.call(pull_req[:id], date(retrieved['created_at']),
|
938
|
+
retrieved[@ext_uniq], 'opened')
|
939
|
+
add_history.call(pull_req[:id], date(retrieved['merged_at']),
|
940
|
+
retrieved[@ext_uniq], 'merged') if merged
|
941
|
+
add_history.call(pull_req[:id], date(retrieved['closed_at']),
|
942
|
+
retrieved[@ext_uniq], 'closed') if closed
|
943
|
+
add_history.call(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
944
|
+
state) unless state.nil?
|
945
|
+
|
946
|
+
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
947
|
+
ensure_pullreq_comments(owner, repo, pullreq_id, created_at) if comments
|
948
|
+
|
949
|
+
pulls_reqs.first(:base_repo_id => project[:id],
|
950
|
+
:pullreq_id => pullreq_id)
|
951
|
+
end
|
952
|
+
|
953
|
+
def ensure_pullreq_comments(owner, repo, pullreq_id, created_at)
|
954
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
955
|
+
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
956
|
+
|
957
|
+
if currepo.nil?
|
958
|
+
warn "Could not repository #{owner}/#{repo}"
|
959
|
+
return
|
960
|
+
end
|
961
|
+
|
962
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
963
|
+
|
964
|
+
if pull_req.nil?
|
965
|
+
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
966
|
+
return
|
967
|
+
end
|
968
|
+
|
969
|
+
retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
|
970
|
+
|
971
|
+
if @db[:pull_request_comments].first(:pullreq_id => pull_req[:id],
|
972
|
+
:comment_id => x['id']).nil?
|
973
|
+
acc << x
|
974
|
+
else
|
975
|
+
acc
|
976
|
+
end
|
977
|
+
end.map { |x|
|
978
|
+
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'], time)
|
979
|
+
}
|
980
|
+
end
|
981
|
+
|
982
|
+
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
983
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
984
|
+
|
985
|
+
if pull_req.nil?
|
986
|
+
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
987
|
+
return
|
988
|
+
end
|
989
|
+
|
990
|
+
exists = @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
991
|
+
:comment_id => comment_id)
|
992
|
+
|
993
|
+
if exists.nil?
|
994
|
+
retrieved = retrieve_pull_req_comment(owner, repo, pullreq_id, comment_id)
|
995
|
+
|
996
|
+
if retrieved.nil?
|
997
|
+
warn "Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
998
|
+
return
|
999
|
+
end
|
1000
|
+
|
1001
|
+
commenter = ensure_user(retrieved['user']['login'], false, false)
|
1002
|
+
|
1003
|
+
if commenter.nil?
|
1004
|
+
warn "Could not retrieve commenter #{retrieved['user']['login']}" +
|
1005
|
+
"for pullreq comment #{owner}/#{repo} -> #{pullreq_id}(#{comment_id}) "
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
commit = ensure_commit(repo, retrieved['original_commit_id'],owner)
|
1009
|
+
|
1010
|
+
@db[:pull_request_comments].insert(
|
1011
|
+
:pull_request_id => pull_req[:id],
|
1012
|
+
:user_id => commenter[:id],
|
1013
|
+
:comment_id => comment_id,
|
1014
|
+
:position => retrieved['original_position'],
|
1015
|
+
:body => retrieved['body'][0..254],
|
1016
|
+
:commit_id => (commit[:id] unless commit.nil?),
|
1017
|
+
:created_at => retrieved['created_at'],
|
1018
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1019
|
+
)
|
1020
|
+
debug "GHTorrent: Adding comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1021
|
+
else
|
1022
|
+
debug "GHTorrent: Updating comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1023
|
+
end
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1027
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).map {|c|
|
1028
|
+
ensure_commit(repo, c['sha'], owner, true)
|
1029
|
+
}.map { |c|
|
1030
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1031
|
+
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1032
|
+
:commit_id => c[:id])
|
1033
|
+
if exists.nil?
|
1034
|
+
@db[:pull_request_commits].insert(:pull_request_id => pullreq[:id],
|
1035
|
+
:commit_id => c[:id])
|
1036
|
+
|
1037
|
+
info "GHTorrent: Added commit #{c[:sha]} to pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1038
|
+
else
|
1039
|
+
debug "GHTorrent: Commit #{c[:sha]} exists in pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1040
|
+
exists
|
1041
|
+
end
|
1042
|
+
}
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
##
|
1046
|
+
# Get all forks for a project.
|
1047
|
+
#
|
1048
|
+
# ==Parameters:
|
1049
|
+
# [owner] The user to which the project belongs
|
1050
|
+
# [repo] The repository/project to find forks for
|
1051
|
+
def ensure_forks(owner, repo)
|
1052
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
1053
|
+
time = currepo[:created_at]
|
1054
|
+
|
1055
|
+
if currepo.nil?
|
1056
|
+
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1057
|
+
return
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
existing_forks = @db.from(:forks, :projects).\
|
1061
|
+
where(:forks__forked_project_id => :projects__id). \
|
1062
|
+
where(:forks__forked_from_id => currepo[:id]).select(:name, :login).all
|
1063
|
+
|
1064
|
+
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1065
|
+
if existing_forks.find {|y|
|
1066
|
+
y[:login] == x['owner']['login'] && y[:name] == x['name']
|
1067
|
+
}.nil?
|
1068
|
+
acc << x
|
1069
|
+
else
|
1070
|
+
acc
|
1071
|
+
end
|
1072
|
+
end.map { |x| ensure_fork(owner, repo, x['id'], time) }
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
##
|
1076
|
+
# Make sure that a fork is retrieved for a project
|
1077
|
+
def ensure_fork(owner, repo, fork_id, date_added = nil)
|
1078
|
+
|
1079
|
+
forks = @db[:forks]
|
1080
|
+
forked = ensure_repo(owner, repo, false, false, false)
|
1081
|
+
fork_exists = forks.first(:fork_id => fork_id)
|
1082
|
+
|
1083
|
+
if fork_exists.nil?
|
1084
|
+
added = if date_added.nil? then Time.now else date_added end
|
1085
|
+
retrieved = retrieve_fork(owner, repo, fork_id)
|
1086
|
+
|
1087
|
+
if retrieved.nil?
|
1088
|
+
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
1089
|
+
return
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
forked_repo_owner = retrieved['full_name'].split(/\//)[0]
|
1093
|
+
forked_repo_name = retrieved['full_name'].split(/\//)[1]
|
1094
|
+
|
1095
|
+
fork = ensure_repo(forked_repo_owner, forked_repo_name)
|
1096
|
+
|
1097
|
+
if forked.nil? or fork.nil?
|
1098
|
+
warn "Could not add fork #{fork_id}"
|
1099
|
+
return
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
forks.insert(:forked_project_id => fork[:id],
|
1103
|
+
:forked_from_id => forked[:id],
|
1104
|
+
:fork_id => fork_id,
|
1105
|
+
:created_at => added,
|
1106
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1107
|
+
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1108
|
+
else
|
1109
|
+
unless date_added.nil?
|
1110
|
+
forks.filter(:fork_id => fork_id)\
|
1111
|
+
.update(:created_at => date(date_added))
|
1112
|
+
debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
|
1113
|
+
end
|
1114
|
+
end
|
1115
|
+
end
|
1116
|
+
|
676
1117
|
private
|
677
1118
|
|
678
1119
|
# Store a commit contained in a hash. First check whether the commit exists.
|
@@ -684,21 +1125,24 @@ module GHTorrent
|
|
684
1125
|
author = commit_user(c['author'], c['commit']['author'])
|
685
1126
|
commiter = commit_user(c['committer'], c['commit']['committer'])
|
686
1127
|
|
687
|
-
|
688
|
-
|
689
|
-
|
1128
|
+
repository = ensure_repo(user, repo, false, false, false)
|
1129
|
+
|
1130
|
+
if repository.nil?
|
1131
|
+
warn "Could not store commit #{user}/#{repo} #{c['sha']}"
|
1132
|
+
return
|
1133
|
+
end
|
690
1134
|
|
691
1135
|
commits.insert(:sha => c['sha'],
|
692
1136
|
:author_id => author[:id],
|
693
1137
|
:committer_id => commiter[:id],
|
694
|
-
:project_id =>
|
1138
|
+
:project_id => repository[:id],
|
695
1139
|
:created_at => date(c['commit']['author']['date']),
|
696
1140
|
:ext_ref_id => c[@ext_uniq]
|
697
1141
|
)
|
1142
|
+
debug "GHTorrent: New commit #{user}/#{repo} -> #{c['sha']} "
|
698
1143
|
commits.first(:sha => c['sha'])
|
699
|
-
debug "GHTorrent: New commit #{repo} -> #{c['sha']} "
|
700
1144
|
else
|
701
|
-
debug "GHTorrent: Commit #{repo} -> #{c['sha']} exists"
|
1145
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{c['sha']} exists"
|
702
1146
|
commit
|
703
1147
|
end
|
704
1148
|
end
|
@@ -706,12 +1150,28 @@ module GHTorrent
|
|
706
1150
|
# Run a block in a DB transaction. Exceptions trigger transaction rollback
|
707
1151
|
# and are rethrown.
|
708
1152
|
def transaction(&block)
|
1153
|
+
@db ||= get_db
|
1154
|
+
@persister ||= persister
|
1155
|
+
|
709
1156
|
start_time = Time.now
|
710
|
-
|
711
|
-
|
1157
|
+
begin
|
1158
|
+
@db.transaction(:rollback => :reraise, :isolation => :committed) do
|
1159
|
+
yield block
|
1160
|
+
end
|
1161
|
+
total = Time.now.to_ms - start_time.to_ms
|
1162
|
+
debug "GHTorrent: Transaction committed (#{total} ms)"
|
1163
|
+
rescue Exception => e
|
1164
|
+
total = Time.now.to_ms - start_time.to_ms
|
1165
|
+
warn "GHTorrent: Transaction failed (#{total} ms)"
|
1166
|
+
raise e
|
1167
|
+
ensure
|
1168
|
+
@db.disconnect
|
1169
|
+
@persister.close
|
1170
|
+
|
1171
|
+
@db = nil
|
1172
|
+
@persister = nil
|
1173
|
+
GC.start
|
712
1174
|
end
|
713
|
-
total = Time.now.to_ms - start_time.to_ms
|
714
|
-
debug "GHTorrent: Transaction committed (#{total} ms)"
|
715
1175
|
end
|
716
1176
|
|
717
1177
|
##
|
@@ -742,15 +1202,6 @@ module GHTorrent
|
|
742
1202
|
email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
|
743
1203
|
end
|
744
1204
|
end
|
745
|
-
# Base exception for all GHTorrent exceptions
|
746
|
-
class GHTorrentException < Exception
|
747
|
-
end
|
748
|
-
end
|
749
|
-
|
750
|
-
class Time
|
751
|
-
def to_ms
|
752
|
-
(self.to_f * 1000.0).to_i
|
753
|
-
end
|
754
1205
|
end
|
755
1206
|
|
756
1207
|
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|