ghtorrent 0.6 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -122,11 +122,11 @@ Loads object ids from a collection to a queue for further processing.
122
122
  connection.close { EventMachine.stop }
123
123
  }
124
124
 
125
- # Read next 1000 items and queue them
125
+ # Read next 100000 items and queue them
126
126
  read_and_publish = Proc.new {
127
127
 
128
128
  to_read = if options.number == -1
129
- 1000
129
+ 100000
130
130
  else
131
131
  if options.number - num_read - 1 <= 0
132
132
  -1
@@ -46,31 +46,44 @@ An efficient way to get all data for a single repo
46
46
  end
47
47
 
48
48
  def go
49
+ self.settings = override_config(settings, :mirror_history_pages_back, -1)
49
50
  user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
50
51
 
51
52
  if user_entry.nil?
52
- Trollop::die "Cannot find user #{owner}"
53
+ Trollop::die "Cannot find user #{ARGV[0]}"
53
54
  end
54
55
 
55
56
  user = user_entry[:login]
56
57
 
57
- repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false, false)}
58
+ repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
59
+ false, false)}
58
60
 
59
61
  if repo_entry.nil?
60
- Trollop::die "Cannot find repository #{owner}/#{ARGV[1]}"
62
+ Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
61
63
  end
62
64
 
63
65
  repo = repo_entry[:name]
64
66
 
65
- %w(ensure_commits ensure_forks ensure_pull_requests
66
- ensure_issues ensure_project_members ensure_watchers).each {|x|
67
+ def send_message(function, user, repo)
67
68
  begin
68
- ght.send(x, user, repo)
69
+ ght.send(function, user, repo, refresh = true)
69
70
  rescue Exception => e
70
71
  puts STDERR, e.message
71
72
  puts STDERR, e.backtrace
72
73
  end
73
- }
74
+ end
75
+
76
+ functions = %w(ensure_commits ensure_forks ensure_pull_requests
77
+ ensure_issues ensure_project_members ensure_watchers)
78
+
79
+ if ARGV[2].nil?
80
+ functions.each do |x|
81
+ send_message(x, user, repo)
82
+ end
83
+ else
84
+ Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
85
+ send_message(ARGV[2], user, repo)
86
+ end
74
87
  end
75
88
  end
76
89
 
@@ -79,40 +92,57 @@ end
79
92
  class TransactedGHTorrent < GHTorrent::Mirror
80
93
 
81
94
  def ensure_commit(repo, sha, user, comments = true)
82
- transaction do
95
+ check_transaction do
83
96
  super(repo, sha, user, comments)
84
97
  end
85
98
  end
86
99
 
87
- def ensure_fork(owner, repo, fork_id, date_added = nil)
88
- transaction do
89
- super(owner, repo, fork_id, date_added)
100
+ def ensure_fork(owner, repo, fork_id)
101
+ check_transaction do
102
+ super(owner, repo, fork_id)
90
103
  end
91
104
  end
92
105
 
93
106
  def ensure_pull_request(owner, repo, pullreq_id,
94
107
  comments = true, commits = true,
95
108
  state = nil, created_at = nil)
96
- transaction do
109
+ check_transaction do
97
110
  super(owner, repo, pullreq_id, comments, commits, state, created_at)
98
111
  end
99
112
  end
100
113
 
101
114
  def ensure_issue(owner, repo, issue_id, events = true, comments = true)
102
- transaction do
115
+ check_transaction do
103
116
  super(owner, repo, issue_id, events, comments)
104
117
  end
105
118
  end
106
119
 
107
120
  def ensure_project_member(owner, repo, new_member, date_added)
108
- transaction do
121
+ check_transaction do
109
122
  super(owner, repo, new_member, date_added)
110
123
  end
111
124
  end
112
125
 
113
126
  def ensure_watcher(owner, repo, watcher, date_added = nil)
114
- transaction do
127
+ check_transaction do
115
128
  super(owner, repo, watcher, date_added)
116
129
  end
117
130
  end
131
+
132
+ def check_transaction(&block)
133
+ begin
134
+ if @db.in_transaction?
135
+ debug "Transaction already started"
136
+ yield block
137
+ else
138
+ transaction do
139
+ yield block
140
+ end
141
+ end
142
+ rescue Exception => e
143
+ puts STDERR, e.message
144
+ puts STDERR, e.backtrace
145
+ end
146
+ end
147
+
118
148
  end
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+
3
+ require 'ghtorrent/ghtorrent'
4
+ require 'ghtorrent/settings'
5
+ require 'ghtorrent/logging'
6
+ require 'ghtorrent/command'
7
+ require 'ghtorrent/retriever'
8
+ require 'ghtorrent/commands/ght_retrieve_repo'
9
+
10
+ class GHTRetrieveUser < GHTRetrieveRepo
11
+
12
+ def prepare_options(options)
13
+ options.banner <<-BANNER
14
+ An efficient way to get all data for a single user
15
+
16
+ #{command_name} [options] user
17
+
18
+ BANNER
19
+ end
20
+
21
+ def validate
22
+ super
23
+ Trollop::die "One argument are required" unless args[0] && !args[0].empty?
24
+ end
25
+
26
+ def go
27
+ self.settings = override_config(settings, :mirror_history_pages_back, -1)
28
+ user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
29
+
30
+ if user_entry.nil?
31
+ Trollop::die "Cannot find user #{ARGV[0]}"
32
+ end
33
+
34
+ user = user_entry[:login]
35
+
36
+ def send_message(function, user)
37
+ begin
38
+ ght.send(function, user)
39
+ rescue Exception => e
40
+ puts STDERR, e.message
41
+ puts STDERR, e.backtrace
42
+ end
43
+ end
44
+
45
+ functions = %w(ensure_user_followers ensure_orgs)
46
+
47
+ if ARGV[2].nil?
48
+ functions.each do |x|
49
+ send_message(x, user)
50
+ end
51
+ else
52
+ Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
53
+ send_message(ARGV[2], user)
54
+ end
55
+
56
+ end
57
+ end
58
+
59
+ class TransactedGHTorrent < GHTorrent::Mirror
60
+
61
+ def ensure_user_followers(user)
62
+ check_transaction do
63
+ super(user)
64
+ end
65
+ end
66
+
67
+ def ensure_orgs(user)
68
+ check_transaction do
69
+ super(user)
70
+ end
71
+ end
72
+ end
@@ -24,6 +24,8 @@ module GHTorrent
24
24
 
25
25
  # Get a connection to the database
26
26
  def get_db
27
+ return @db unless @db.nil?
28
+
27
29
  Sequel.single_threaded = true
28
30
  @db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
29
31
  #@db.loggers << @logger
@@ -54,7 +56,6 @@ module GHTorrent
54
56
  end
55
57
 
56
58
  transaction do
57
- ensure_user(user, true, true)
58
59
  ensure_commit(repo, sha, user)
59
60
  end
60
61
  end
@@ -78,10 +79,9 @@ module GHTorrent
78
79
  # [user] The login of the repository owner
79
80
  # [repo] The name of the repository
80
81
  # [comment_id] The login of the member to add
81
- # [date_added] The timestamp that the add event took place
82
- def get_commit_comment(user, repo, comment_id, date_added)
82
+ def get_commit_comment(user, repo, comment_id)
83
83
  transaction do
84
- ensure_commit_comment(user, repo, comment_id, date_added)
84
+ ensure_commit_comment(user, repo, comment_id)
85
85
  end
86
86
  end
87
87
 
@@ -106,8 +106,6 @@ module GHTorrent
106
106
  # [date_added] The timestamp that the add event took place
107
107
  def get_follower(follower, followed, date_added)
108
108
  transaction do
109
- ensure_user(follower, true, true)
110
- ensure_user(followed, true, true)
111
109
  ensure_user_follower(followed, follower, date_added)
112
110
  end
113
111
  end
@@ -130,10 +128,9 @@ module GHTorrent
130
128
  # [owner] The login of the repository owner
131
129
  # [repo] The name of the repository
132
130
  # [fork_id] The fork item id
133
- # [date_added] The timestamp that the add event took place
134
- def get_fork(owner, repo, fork_id, date_added)
131
+ def get_fork(owner, repo, fork_id)
135
132
  transaction do
136
- ensure_fork(owner, repo, fork_id, date_added)
133
+ ensure_fork(owner, repo, fork_id)
137
134
  end
138
135
  end
139
136
 
@@ -144,9 +141,9 @@ module GHTorrent
144
141
  # [repo] The name of the repository
145
142
  # [fork_id] The fork item id
146
143
  # [date_added] The timestamp that the add event took place
147
- def get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
144
+ def get_pullreq_comment(owner, repo, pullreq_id, comment_id)
148
145
  transaction do
149
- ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
146
+ ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
150
147
  end
151
148
  end
152
149
 
@@ -158,9 +155,9 @@ module GHTorrent
158
155
  # [issue_id] The fork item id
159
156
  # [action] The action that took place for the issue
160
157
  # [date_added] The timestamp that the add event took place
161
- def get_issue(owner, repo, issue_id, created_at)
158
+ def get_issue(owner, repo, issue_id)
162
159
  transaction do
163
- ensure_issue(owner, repo, issue_id, created_at)
160
+ ensure_issue(owner, repo, issue_id)
164
161
  end
165
162
  end
166
163
 
@@ -181,7 +178,7 @@ module GHTorrent
181
178
  # Make sure a commit exists
182
179
  #
183
180
  def ensure_commit(repo, sha, user, comments = true)
184
- ensure_repo(user, repo)
181
+ ensure_repo(user, repo, false, false, false, false)
185
182
  c = retrieve_commit(repo, sha, user)
186
183
 
187
184
  if c.nil?
@@ -208,8 +205,9 @@ module GHTorrent
208
205
  # [sha] The first commit to start retrieving from. If nil, then the
209
206
  # earliest stored commit will be used instead.
210
207
  # [num_pages] The number of commit pages to retrieve
211
- def ensure_commits(user, repo, sha = nil,
212
- num_pages = config(:mirror_commit_pages_new_repo))
208
+ def ensure_commits(user, repo, refresh = false, sha = nil,
209
+ num_pages = config(:mirror_commit_pages_new_repo)
210
+ )
213
211
  userid = @db[:users].filter(:login => user).first[:id]
214
212
  repoid = @db[:projects].filter(:owner_id => userid,
215
213
  :name => repo).first[:id]
@@ -236,32 +234,37 @@ module GHTorrent
236
234
  # in the database.
237
235
  def ensure_parents(commit)
238
236
  commits = @db[:commits]
239
- commit['parents'].each do |p|
240
- parents = @db[:commit_parents]
241
- url = p['url'].split(/\//)
242
- this = commits.first(:sha => commit['sha'])
243
- parent = commits.first(:sha => url[7])
244
-
245
- if parent.nil?
246
- store_commit(retrieve_commit(url[5], url[7], url[4]), url[5], url[4])
247
- parent = commits.first(:sha => url[7])
248
- end
249
-
250
- if parent.nil?
237
+ parents = @db[:commit_parents]
238
+ commit['parents'].map do |p|
239
+ url = p['url'].split(/\//)
240
+ this = commits.first(:sha => commit['sha'])
241
+ parent = commits.first(:sha => url[7])
242
+
243
+ if parent.nil?
244
+ c = retrieve_commit(url[5], url[7], url[4])
245
+ if c.nil?
251
246
  warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
252
- return
247
+ next
253
248
  end
249
+ parent = store_commit(c, url[5], url[4])
250
+ end
254
251
 
255
- if parents.first(:commit_id => this[:id],
256
- :parent_id => parent[:id]).nil?
252
+ if parent.nil?
253
+ warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
254
+ next
255
+ end
257
256
 
258
- parents.insert(:commit_id => this[:id],
259
- :parent_id => parent[:id])
260
- info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
261
- else
262
- debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
263
- end
257
+ if parents.first(:commit_id => this[:id],
258
+ :parent_id => parent[:id]).nil?
259
+
260
+ parents.insert(:commit_id => this[:id],
261
+ :parent_id => parent[:id])
262
+ info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
263
+ else
264
+ debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
264
265
  end
266
+ parents.first(:commit_id => this[:id], :parent_id => parent[:id])
267
+ end
265
268
  end
266
269
 
267
270
  ##
@@ -271,23 +274,27 @@ module GHTorrent
271
274
  # [repo] The repo receiving the commit
272
275
  # [sha] The commit SHA
273
276
  def ensure_repo_commit(user, repo, sha)
274
- userid = @db[:users].first(:login => user)[:id]
275
- projectid = @db[:projects].first(:owner_id => userid,
276
- :name => repo)[:id]
277
+ project = ensure_repo(user, repo, false, false, false, false)
278
+
279
+ if project.nil?
280
+ warn "GHTorrent: Repo #{user}/#{repo} does not exist"
281
+ return
282
+ end
283
+
277
284
  commitid = @db[:commits].first(:sha => sha)[:id]
278
285
 
279
- exists = @db[:project_commits].first(:project_id => projectid,
286
+ exists = @db[:project_commits].first(:project_id => project[:id],
280
287
  :commit_id => commitid)
281
288
  if exists.nil?
282
289
  @db[:project_commits].insert(
283
- :project_id => projectid,
290
+ :project_id => project[:id],
284
291
  :commit_id => commitid
285
292
  )
286
- info "GHTorrent: Added commit #{user}/#{repo} -> #{sha}"
287
- @db[:project_commits].first(:project_id => projectid,
293
+ info "GHTorrent: Associating commit #{sha} with #{user}/#{repo}"
294
+ @db[:project_commits].first(:project_id => project[:id],
288
295
  :commit_id => commitid)
289
296
  else
290
- debug "GHTorrent: Commit #{user}/#{repo} -> #{sha} exists"
297
+ debug "GHTorrent: Commit #{sha} already associated with #{user}/#{repo}"
291
298
  exists
292
299
  end
293
300
  end
@@ -333,6 +340,17 @@ module GHTorrent
333
340
  return users.first(:login => byemail[:login])
334
341
  end
335
342
 
343
+ # This means that the user's login has been associated with a
344
+ # Github user by the time the commit was done (and hence Github was
345
+ # able to associate the commit to an account), but afterwards the
346
+ # user has deleted his account (before GHTorrent processed it).
347
+ # On absense of something better to do, try to find the user by email
348
+ # and return a "fake" user entry.
349
+ if added.nil?
350
+ warn "GHTorrent: User account for user #{login} deleted from Github"
351
+ return ensure_user("#{name}<#{email}>", false, false)
352
+ end
353
+
336
354
  if byemail.nil?
337
355
  users.filter(:login => login).update(:name => name) if added[:name].nil?
338
356
  users.filter(:login => login).update(:email => email) if added[:email].nil?
@@ -373,10 +391,12 @@ module GHTorrent
373
391
  def ensure_user(user, followers, orgs)
374
392
  # Github only supports alpa-nums and dashes in its usernames.
375
393
  # All other sympbols are treated as emails.
376
- if not user.match(/^[A-Za-z0-9\-]*$/)
394
+ if not user.match(/^[\w\-]*$/)
377
395
  begin
378
396
  name, email = user.split("<")
379
397
  email = email.split(">")[0]
398
+ name = name.strip unless name.nil?
399
+ email = email.strip unless email.nil?
380
400
  rescue Exception
381
401
  raise new GHTorrentException.new("Not a valid email address: #{user}")
382
402
  end
@@ -384,7 +404,7 @@ module GHTorrent
384
404
  unless is_valid_email(email)
385
405
  warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
386
406
  end
387
- u = ensure_user_byemail(email.strip, name.strip)
407
+ u = ensure_user_byemail(email, name)
388
408
  else
389
409
  u = ensure_user_byuname(user)
390
410
  ensure_user_followers(user) if followers
@@ -446,9 +466,8 @@ module GHTorrent
446
466
  #
447
467
  # ==Parameters:
448
468
  # [user] The user login to find followers by
449
- def ensure_user_followers(followed, date_added = nil)
469
+ def ensure_user_followers(followed)
450
470
  curuser = ensure_user(followed, false, false)
451
- time = curuser[:created_at]
452
471
  followers = @db.from(:followers, :users).\
453
472
  where(:followers__follower_id => :users__id).
454
473
  where(:followers__user_id => curuser[:id]).select(:login).all
@@ -459,12 +478,12 @@ module GHTorrent
459
478
  else
460
479
  acc
461
480
  end
462
- end.map { |x| ensure_user_follower(followed, x['login'], time) }
481
+ end.map { |x| ensure_user_follower(followed, x['login']) }
463
482
  end
464
483
 
465
484
  ##
466
485
  # Make sure that a user follows another one
467
- def ensure_user_follower(followed, follower, date_added)
486
+ def ensure_user_follower(followed, follower, date_added = nil)
468
487
  follower_user = ensure_user(follower, false, false)
469
488
  followed_user = ensure_user(followed, false, false)
470
489
 
@@ -474,14 +493,17 @@ module GHTorrent
474
493
  end
475
494
 
476
495
  followers = @db[:followers]
477
- followed_id = follower_user[:id]
478
- follower_id = followed_user[:id]
496
+ follower_id = follower_user[:id]
497
+ followed_id = followed_user[:id]
479
498
 
480
499
  follower_exists = followers.first(:user_id => followed_id,
481
500
  :follower_id => follower_id)
482
-
483
501
  if follower_exists.nil?
484
- added = if date_added.nil? then Time.now else date_added end
502
+ added = if date_added.nil?
503
+ max(follower_user[:created_at], followed_user[:created_at])
504
+ else
505
+ date_added
506
+ end
485
507
  retrieved = retrieve_user_follower(followed, follower)
486
508
 
487
509
  if retrieved.nil?
@@ -495,13 +517,17 @@ module GHTorrent
495
517
  :ext_ref_id => retrieved[@ext_uniq])
496
518
  info "GHTorrent: User #{follower} follows #{followed}"
497
519
  else
498
- unless date_added.nil?
499
- followers.filter(:user_id => followed_id,
500
- :follower_id => follower_id)\
520
+ debug "GHTorrent: Follower #{follower} exists for user #{followed}"
521
+ end
522
+
523
+ unless date_added.nil?
524
+ followers.filter(:user_id => followed_id,
525
+ :follower_id => follower_id)\
501
526
  .update(:created_at => date(date_added))
502
- debug "GHTorrent: Updating follower #{followed} -> #{follower}"
503
- end
527
+ debug "GHTorrent: Updating follower #{followed} -> #{follower}, created_at -> #{date(date_added)}"
504
528
  end
529
+
530
+ followers.first(:user_id => followed_id, :follower_id => follower_id)
505
531
  end
506
532
 
507
533
  ##
@@ -529,19 +555,29 @@ module GHTorrent
529
555
  :name => name,
530
556
  :login => login,
531
557
  :created_at => Time.now,
532
- :ext_ref_id => ""
533
- )
558
+ :ext_ref_id => "")
534
559
  info "GHTorrent: Added fake user #{login} -> #{email}"
535
560
  users.first(:login => login)
536
561
  else
537
- users.insert(:login => u['login'],
538
- :name => u['name'],
539
- :company => u['company'],
540
- :email => u['email'],
541
- :location => u['location'],
542
- :created_at => date(u['created_at']),
543
- :ext_ref_id => u[@ext_uniq])
544
- info "GHTorrent: Found #{email} through search API query"
562
+ in_db = users.first(:login => u['login'])
563
+ if in_db.nil?
564
+ users.insert(:login => u['login'],
565
+ :name => u['name'],
566
+ :company => u['company'],
567
+ :email => u['email'],
568
+ :location => u['location'],
569
+ :created_at => date(u['created_at']),
570
+ :ext_ref_id => u[@ext_uniq])
571
+ info "GHTorrent: Found #{email} through search API query"
572
+ else
573
+ in_db.update(:name => u['name'],
574
+ :company => u['company'],
575
+ :email => u['email'],
576
+ :location => u['location'],
577
+ :created_at => date(u['created_at']),
578
+ :ext_ref_id => u[@ext_uniq])
579
+ info "GHTorrent: User with email #{email} exists with username #{u['login']}"
580
+ end
545
581
  users.first(:login => u['login'])
546
582
  end
547
583
  else
@@ -560,10 +596,17 @@ module GHTorrent
560
596
  # == Returns:
561
597
  # If the repo can be retrieved, it is returned as a Hash. Otherwise,
562
598
  # the result is nil
563
- def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
599
+ def ensure_repo(user, repo, commits = true, project_members = true,
600
+ watchers = true, forks = true)
564
601
 
565
602
  repos = @db[:projects]
566
603
  curuser = ensure_user(user, false, false)
604
+
605
+ if curuser.nil?
606
+ warn "Cannot find user #{user}"
607
+ return
608
+ end
609
+
567
610
  currepo = repos.first(:owner_id => curuser[:id], :name => repo)
568
611
 
569
612
  if currepo.nil?
@@ -575,17 +618,29 @@ module GHTorrent
575
618
  end
576
619
 
577
620
  repos.insert(:url => r['url'],
578
- :owner_id => @db[:users].filter(:login => user).first[:id],
621
+ :owner_id => curuser[:id],
579
622
  :name => r['name'],
580
623
  :description => r['description'],
581
624
  :language => r['language'],
582
625
  :created_at => date(r['created_at']),
583
626
  :ext_ref_id => r[@ext_uniq])
584
627
 
585
- info "GHTorrent: New repo #{repo}"
628
+ unless r['parent'].nil?
629
+ parent_owner = r['parent']['owner']['login']
630
+ parent_repo = r['parent']['name']
631
+
632
+ parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
633
+
634
+ repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
635
+
636
+ info "Repo #{user}/#{repo} is a fork from #{parent_owner}/#{parent_repo}"
637
+ end
638
+
639
+ info "GHTorrent: New repo #{user}/#{repo}"
586
640
  ensure_commits(user, repo) if commits
587
641
  ensure_project_members(user, repo) if project_members
588
642
  ensure_watchers(user, repo) if watchers
643
+ ensure_forks(user, repo) if forks
589
644
  repos.first(:owner_id => curuser[:id], :name => repo)
590
645
  else
591
646
  debug "GHTorrent: Repo #{user}/#{repo} exists"
@@ -595,8 +650,8 @@ module GHTorrent
595
650
 
596
651
  ##
597
652
  # Make sure that a project has all the registered members defined
598
- def ensure_project_members(user, repo)
599
- currepo = ensure_repo(user, repo, true, false, true)
653
+ def ensure_project_members(user, repo, refresh = false)
654
+ currepo = ensure_repo(user, repo, false, false, false, false)
600
655
  time = currepo[:created_at]
601
656
 
602
657
  project_members = @db.from(:project_members, :users).\
@@ -616,7 +671,7 @@ module GHTorrent
616
671
  # Make sure that a project member exists in a project
617
672
  def ensure_project_member(owner, repo, new_member, date_added)
618
673
  pr_members = @db[:project_members]
619
- project = ensure_repo(owner, repo, true, false, true)
674
+ project = ensure_repo(owner, repo, false, false, false, false)
620
675
  new_user = ensure_user(new_member, false, false)
621
676
 
622
677
  if project.nil? or new_user.nil?
@@ -627,7 +682,11 @@ module GHTorrent
627
682
  :repo_id => project[:id])
628
683
 
629
684
  if memb_exist.nil?
630
- added = if date_added.nil? then Time.now else date_added end
685
+ added = if date_added.nil?
686
+ max(project[:created_at], new_user[:created_at])
687
+ else
688
+ date_added
689
+ end
631
690
  retrieved = retrieve_repo_collaborator(owner, repo, new_member)
632
691
 
633
692
  if retrieved.nil?
@@ -644,12 +703,13 @@ module GHTorrent
644
703
  info "GHTorrent: Added project member #{repo} -> #{new_member}"
645
704
  else
646
705
  debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
647
- unless date_added.nil?
648
- pr_members.filter(:user_id => new_user[:id],
649
- :repo_id => project[:id])\
706
+ end
707
+
708
+ unless date_added.nil?
709
+ pr_members.filter(:user_id => new_user[:id],
710
+ :repo_id => project[:id])\
650
711
  .update(:created_at => date(date_added))
651
- info "GHTorrent: Updating project member #{repo} -> #{new_member}"
652
- end
712
+ info "GHTorrent: Updating project member #{repo} -> #{new_member}, created_at -> #{date(date_added)}"
653
713
  end
654
714
  end
655
715
 
@@ -734,7 +794,7 @@ module GHTorrent
734
794
  end
735
795
  end
736
796
 
737
- not_saved.map{|x| ensure_commit_comment(user, repo, x['id'], nil)}
797
+ not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
738
798
  end
739
799
 
740
800
  ##
@@ -745,7 +805,7 @@ module GHTorrent
745
805
  # [repo] The repository containing the commit whose comment will be retrieved
746
806
  # [id] The comment id to retrieve
747
807
  # [created_at] The timestamp that the comment was made.
748
- def ensure_commit_comment(user, repo, id, created_at)
808
+ def ensure_commit_comment(user, repo, id)
749
809
  stored_comment = @db[:commit_comments].first(:comment_id => id)
750
810
 
751
811
  if stored_comment.nil?
@@ -770,21 +830,15 @@ module GHTorrent
770
830
  )
771
831
  info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
772
832
  else
773
- unless created_at.nil?
774
- @db[:commit_comments].filter(:comment_id => id)\
775
- .update(:created_at => date(created_at))
776
- info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
777
- end
778
833
  info "GHTorrent: Commit comment #{id} exists"
779
834
  end
780
835
  @db[:commit_comments].first(:comment_id => id)
781
836
  end
782
837
 
783
838
  ##
784
- # Make sure that
785
- def ensure_watchers(owner, repo)
786
- currepo = ensure_repo(owner, repo, true, true, false)
787
- time = currepo[:created_at]
839
+ # Make sure that all watchers exist for a repository
840
+ def ensure_watchers(owner, repo, refresh = false)
841
+ currepo = ensure_repo(owner, repo, false, false, false, false)
788
842
 
789
843
  if currepo.nil?
790
844
  warn "Could not retrieve watchers for #{owner}/#{repo}"
@@ -803,13 +857,13 @@ module GHTorrent
803
857
  else
804
858
  acc
805
859
  end
806
- end.map { |x| ensure_watcher(owner, repo, x['login'], time) }
860
+ end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
807
861
  end
808
862
 
809
863
  ##
810
- # Make sure that a project member exists in a project
864
+ # Make sure that a watcher/stargazer exists for a repository
811
865
  def ensure_watcher(owner, repo, watcher, date_added = nil)
812
- project = ensure_repo(owner, repo, false, false, false)
866
+ project = ensure_repo(owner, repo, false, false, false, false)
813
867
  new_watcher = ensure_user(watcher, false, false)
814
868
 
815
869
  if new_watcher.nil? or project.nil?
@@ -818,11 +872,15 @@ module GHTorrent
818
872
  end
819
873
 
820
874
  watchers = @db[:watchers]
821
- memb_exist = watchers.first(:user_id => new_watcher[:id],
822
- :repo_id => project[:id])
875
+ watcher_exist = watchers.first(:user_id => new_watcher[:id],
876
+ :repo_id => project[:id])
823
877
 
824
- if memb_exist.nil?
825
- added = if date_added.nil? then Time.now else date_added end
878
+ if watcher_exist.nil?
879
+ added = if date_added.nil?
880
+ max(project[:created_at], new_watcher[:created_at])
881
+ else
882
+ date_added
883
+ end
826
884
  retrieved = retrieve_watcher(owner, repo, watcher)
827
885
 
828
886
  if retrieved.nil?
@@ -839,33 +897,42 @@ module GHTorrent
839
897
  info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
840
898
  else
841
899
  debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
842
- unless date_added.nil?
843
- watchers.filter(:user_id => new_watcher[:id],
844
- :repo_id => project[:id])\
900
+ end
901
+
902
+ unless date_added.nil?
903
+ watchers.filter(:user_id => new_watcher[:id],
904
+ :repo_id => project[:id])\
845
905
  .update(:created_at => date(date_added))
846
- info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}"
847
- end
906
+ info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}, created_at -> #{date_added}"
848
907
  end
908
+
909
+ watchers.first(:user_id => new_watcher[:id],
910
+ :repo_id => project[:id])
849
911
  end
850
912
 
851
913
  ##
852
914
  # Process all pull requests
853
- def ensure_pull_requests(owner, repo)
854
- currepo = ensure_repo(owner, repo, false, false, false)
915
+ def ensure_pull_requests(owner, repo, refresh = false)
916
+ currepo = ensure_repo(owner, repo, false, false, false, false)
855
917
  if currepo.nil?
856
918
  warn "Could not retrieve pull requests from #{owner}/#{repo}"
857
919
  return
858
920
  end
859
921
 
860
- pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
922
+ raw_pull_reqs = if refresh
923
+ retrieve_pull_requests(owner, repo, refresh = true)
924
+ else
925
+ pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
926
+ retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
927
+ if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
928
+ acc << x
929
+ else
930
+ acc
931
+ end
932
+ end
933
+ end
861
934
 
862
- retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
863
- if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
864
- acc << x
865
- else
866
- acc
867
- end
868
- end.map { |x| ensure_pull_request(owner, repo, x['number']) }
935
+ raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
869
936
  end
870
937
 
871
938
  ##
@@ -875,7 +942,7 @@ module GHTorrent
875
942
  state = nil, created_at = nil)
876
943
  pulls_reqs = @db[:pull_requests]
877
944
 
878
- project = ensure_repo(owner, repo, false, false, false)
945
+ project = ensure_repo(owner, repo, false, false, false, false)
879
946
 
880
947
  if project.nil?
881
948
  return
@@ -885,15 +952,13 @@ module GHTorrent
885
952
  def add_history(id, ts, unq, act)
886
953
  pull_req_history = @db[:pull_request_history]
887
954
  entry = pull_req_history.first(:pull_request_id => id,
888
- :ext_ref_id => unq, :action => act)
955
+ :created_at => ts, :action => act)
889
956
  if entry.nil?
890
957
  pull_req_history.insert(:pull_request_id => id, :created_at => ts,
891
958
  :ext_ref_id => unq, :action => act)
892
- info "GHTorrent: New pull request (#{id}) history entry (#{act})"
959
+ info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
893
960
  else
894
- pull_req_history.filter(:pull_request_id => id, :ext_ref_id => unq,
895
- :action => act).update(:created_at => ts)
896
- info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
961
+ info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
897
962
  end
898
963
  end
899
964
 
@@ -939,7 +1004,7 @@ module GHTorrent
939
1004
 
940
1005
  base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
941
1006
  retrieved['base']['repo']['name'],
942
- false, false, false)
1007
+ false, false, false, false)
943
1008
 
944
1009
  base_commit = ensure_commit(retrieved['base']['repo']['name'],
945
1010
  retrieved['base']['sha'],
@@ -955,7 +1020,7 @@ module GHTorrent
955
1020
  head_repo = if has_head_repo(retrieved)
956
1021
  ensure_repo(retrieved['head']['repo']['owner']['login'],
957
1022
  retrieved['head']['repo']['name'],
958
- false, false, false)
1023
+ false, false, false, false)
959
1024
  end
960
1025
 
961
1026
  head_commit = if not head_repo.nil?
@@ -967,7 +1032,12 @@ module GHTorrent
967
1032
 
968
1033
  pull_req_user = ensure_user(retrieved['user']['login'], false, false)
969
1034
 
970
- merged = if retrieved['merged_at'].nil? then false else true end
1035
+ merged = if retrieved['merged_at'].nil? then
1036
+ # Check if the pr's commits are in the repository
1037
+ false
1038
+ else
1039
+ true
1040
+ end
971
1041
  closed = if retrieved['closed_at'].nil? then false else true end
972
1042
 
973
1043
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
@@ -980,10 +1050,11 @@ module GHTorrent
980
1050
  :base_commit_id => base_commit[:id],
981
1051
  :user_id => pull_req_user[:id],
982
1052
  :pullreq_id => pullreq_id,
983
- :intra_branch => is_intra_branch(retrieved)
1053
+ :intra_branch => is_intra_branch(retrieved),
1054
+ :merged => merged
984
1055
  )
985
1056
 
986
- info log_msg(retrieved)
1057
+ info log_msg(retrieved) + " was added"
987
1058
  else
988
1059
  debug log_msg(retrieved) + " exists"
989
1060
  end
@@ -1001,22 +1072,21 @@ module GHTorrent
1001
1072
  state) unless state.nil?
1002
1073
 
1003
1074
  ensure_pull_request_commits(owner, repo, pullreq_id) if commits
1004
- ensure_pullreq_comments(owner, repo, pullreq_id, created_at) if comments
1075
+ ensure_pullreq_comments(owner, repo, pullreq_id) if comments
1005
1076
 
1006
1077
  pulls_reqs.first(:base_repo_id => project[:id],
1007
1078
  :pullreq_id => pullreq_id)
1008
1079
  end
1009
1080
 
1010
- def ensure_pullreq_comments(owner, repo, pullreq_id, created_at)
1011
- currepo = ensure_repo(owner, repo, true, true, false)
1012
- time = if created_at.nil? then currepo[:created_at] else Time.now() end
1081
+ def ensure_pullreq_comments(owner, repo, pullreq_id)
1082
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1013
1083
 
1014
1084
  if currepo.nil?
1015
1085
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1016
1086
  return
1017
1087
  end
1018
1088
 
1019
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
1089
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1020
1090
 
1021
1091
  if pull_req.nil?
1022
1092
  warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1032,12 +1102,16 @@ module GHTorrent
1032
1102
  acc
1033
1103
  end
1034
1104
  end.map { |x|
1035
- ensure_pullreq_comment(owner, repo, pullreq_id, x['id'], time)
1105
+ ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
1036
1106
  }
1037
1107
  end
1038
1108
 
1039
- def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
1040
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
1109
+ def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
1110
+ # Commit retrieval is set to false to ensure that no duplicate work
1111
+ # is done on retrieving a pull request. This has the side effect that
1112
+ # commits might not be retrieved if a pullreqcomment event gets processed
1113
+ # before the pullreq event, until the pullreq event has been processed
1114
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1041
1115
 
1042
1116
  if pull_req.nil?
1043
1117
  warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1084,12 +1158,21 @@ module GHTorrent
1084
1158
  end
1085
1159
 
1086
1160
  def ensure_pull_request_commits(owner, repo, pullreq_id)
1161
+ pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1162
+
1163
+ if pullreq.nil?
1164
+ warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
1165
+ return
1166
+ end
1167
+
1087
1168
  retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
1088
- x = ensure_commit(repo, c['sha'], owner, true)
1169
+ next if c.nil?
1170
+ head_repo_owner = c['url'].split(/\//)[4]
1171
+ head_repo_name = c['url'].split(/\//)[5]
1172
+ x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
1089
1173
  acc << x if not x.nil?
1090
1174
  acc
1091
1175
  }.map { |c|
1092
- pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1093
1176
  exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
1094
1177
  :commit_id => c[:id])
1095
1178
  if exists.nil?
@@ -1110,18 +1193,17 @@ module GHTorrent
1110
1193
  # ==Parameters:
1111
1194
  # [owner] The user to which the project belongs
1112
1195
  # [repo] The repository/project to find forks for
1113
- def ensure_forks(owner, repo)
1114
- currepo = ensure_repo(owner, repo, false, false, false)
1196
+ def ensure_forks(owner, repo, refresh = false)
1197
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1115
1198
 
1116
1199
  if currepo.nil?
1117
1200
  warn "Could not retrieve forks for #{owner}/#{repo}"
1118
1201
  return
1119
1202
  end
1120
1203
 
1121
- existing_forks = @db.from(:forks, :projects, :users).\
1122
- where(:forks__forked_project_id => :projects__id). \
1204
+ existing_forks = @db.from(:projects, :users).\
1123
1205
  where(:users__id => :projects__owner_id). \
1124
- where(:forks__forked_from_id => currepo[:id]).select(:projects__name, :login).all
1206
+ where(:projects__forked_from => currepo[:id]).select(:projects__name, :login).all
1125
1207
 
1126
1208
  retrieve_forks(owner, repo).reduce([]) do |acc, x|
1127
1209
  if existing_forks.find {|y|
@@ -1138,65 +1220,49 @@ module GHTorrent
1138
1220
 
1139
1221
  ##
1140
1222
  # Make sure that a fork is retrieved for a project
1141
- def ensure_fork(owner, repo, fork_id, date_added = nil)
1142
-
1143
- forks = @db[:forks]
1144
- forked = ensure_repo(owner, repo, false, false, false)
1145
- fork_exists = forks.first(:fork_id => fork_id)
1146
-
1147
- if fork_exists.nil?
1148
- retrieved = retrieve_fork(owner, repo, fork_id)
1149
- added = if date_added.nil? then retrieved['created_at'] else date_added end
1150
-
1151
- if retrieved.nil?
1152
- warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
1153
- return
1154
- end
1223
+ def ensure_fork(owner, repo, fork_id)
1224
+ fork = retrieve_fork(owner, repo, fork_id)
1155
1225
 
1156
- forked_repo_owner = retrieved['full_name'].split(/\//)[0]
1157
- forked_repo_name = retrieved['full_name'].split(/\//)[1]
1226
+ if fork.nil?
1227
+ warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
1228
+ return
1229
+ end
1158
1230
 
1159
- fork = ensure_repo(forked_repo_owner, forked_repo_name)
1231
+ fork_owner = fork['full_name'].split(/\//)[0]
1232
+ fork_name = fork['full_name'].split(/\//)[1]
1160
1233
 
1161
- if forked.nil? or fork.nil?
1162
- warn "Could not add fork #{fork_id}"
1163
- return
1164
- end
1234
+ r = ensure_repo(fork_owner, fork_name, false, false, false, false)
1165
1235
 
1166
- forks.insert(:forked_project_id => fork[:id],
1167
- :forked_from_id => forked[:id],
1168
- :fork_id => fork_id,
1169
- :created_at => added,
1170
- :ext_ref_id => retrieved[@ext_uniq])
1171
- info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
1236
+ if r.nil?
1237
+ warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
1172
1238
  else
1173
- debug "GHTorrent: Fork #{fork_id} exists as fork of #{owner}/#{repo}"
1174
- unless date_added.nil?
1175
- forks.filter(:fork_id => fork_id)\
1176
- .update(:created_at => date(date_added))
1177
- debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
1178
- end
1239
+ info "GHTorrent: Added #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
1179
1240
  end
1180
1241
  end
1181
1242
 
1182
1243
  ##
1183
1244
  # Make sure all issues exist for a project
1184
- def ensure_issues(owner, repo)
1185
- currepo = ensure_repo(owner, repo, false, false, false)
1245
+ def ensure_issues(owner, repo, refresh = false)
1246
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1186
1247
  if currepo.nil?
1187
1248
  warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
1188
1249
  return
1189
1250
  end
1190
1251
 
1191
- issues = @db[:issues].filter(:repo_id => currepo[:id]).all
1252
+ raw_issues = if refresh
1253
+ retrieve_issues(owner, repo, refresh = true)
1254
+ else
1255
+ issues = @db[:issues].filter(:repo_id => currepo[:id]).all
1256
+ retrieve_issues(owner, repo).reduce([]) do |acc, x|
1257
+ if issues.find { |y| y[:issue_id] == x['number'] }.nil?
1258
+ acc << x
1259
+ else
1260
+ acc
1261
+ end
1262
+ end
1263
+ end
1192
1264
 
1193
- retrieve_issues(owner, repo).reduce([]) do |acc, x|
1194
- if issues.find { |y| y[:issue_id] == x['number'] }.nil?
1195
- acc << x
1196
- else
1197
- acc
1198
- end
1199
- end.map { |x| ensure_issue(owner, repo, x['number']) }
1265
+ raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
1200
1266
  end
1201
1267
 
1202
1268
  ##
@@ -1204,7 +1270,7 @@ module GHTorrent
1204
1270
  def ensure_issue(owner, repo, issue_id, events = true, comments = true)
1205
1271
 
1206
1272
  issues = @db[:issues]
1207
- repository = ensure_repo(owner, repo, false, false, false)
1273
+ repository = ensure_repo(owner, repo, false, false, false, false)
1208
1274
 
1209
1275
  if repo.nil?
1210
1276
  warn "Cannot find repo #{owner}/#{repo}"
@@ -1214,24 +1280,27 @@ module GHTorrent
1214
1280
  cur_issue = issues.first(:issue_id => issue_id,
1215
1281
  :repo_id => repository[:id])
1216
1282
 
1217
- if cur_issue.nil?
1218
- retrieved = retrieve_issue(owner, repo, issue_id)
1283
+ retrieved = retrieve_issue(owner, repo, issue_id)
1219
1284
 
1220
- if retrieved.nil?
1221
- warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
1222
- return
1223
- end
1285
+ if retrieved.nil?
1286
+ warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
1287
+ return
1288
+ end
1289
+
1290
+ # Pull requests and issues share the same issue_id
1291
+ pull_req = unless retrieved['pull_request'].nil? or
1292
+ retrieved['pull_request']['patch_url'].nil?
1293
+ info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} is a pull request"
1294
+ ensure_pull_request(owner, repo, issue_id)
1295
+ end
1296
+
1297
+ if cur_issue.nil?
1224
1298
 
1225
1299
  reporter = ensure_user(retrieved['user']['login'], false, false)
1226
1300
  assignee = unless retrieved['assignee'].nil?
1227
1301
  ensure_user(retrieved['assignee']['login'], false, false)
1228
1302
  end
1229
1303
 
1230
- # Pull requests and issues share the same issue_id
1231
- pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
1232
- ensure_pull_request(owner, repo, issue_id)
1233
- end
1234
-
1235
1304
  issues.insert(:repo_id => repository[:id],
1236
1305
  :assignee_id => unless assignee.nil? then assignee[:id] end,
1237
1306
  :reporter_id => reporter[:id],
@@ -1241,23 +1310,26 @@ module GHTorrent
1241
1310
  :created_at => date(retrieved['created_at']),
1242
1311
  :ext_ref_id => retrieved[@ext_uniq])
1243
1312
 
1244
- ensure_issue_events(owner, repo, issue_id) if events
1245
- ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
1246
-
1247
1313
  info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
1248
- issues.first(:issue_id => issue_id,
1249
- :repo_id => repository[:id])
1250
1314
  else
1251
1315
  info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
1252
- cur_issue
1316
+ if cur_issue[:pull_request] == false and not pull_req.nil?
1317
+ info "GHTorrent: Updating issue #{owner}/#{repo}->#{issue_id} as pull request"
1318
+ issues.filter(:issue_id => issue_id, :repo_id => repository[:id]).update(
1319
+ :pull_request => true,
1320
+ :pull_request_id => pull_req[:id])
1321
+ end
1253
1322
  end
1323
+ ensure_issue_events(owner, repo, issue_id) if events
1324
+ ensure_issue_comments(owner, repo, issue_id) if comments
1325
+ issues.first(:issue_id => issue_id,
1326
+ :repo_id => repository[:id])
1254
1327
  end
1255
1328
 
1256
1329
  ##
1257
1330
  # Retrieve and process all events for an issue
1258
1331
  def ensure_issue_events(owner, repo, issue_id)
1259
- currepo = ensure_repo(owner, repo, true, true, false)
1260
- #time = if created_at.nil? then currepo[:created_at] else Time.now() end
1332
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1261
1333
 
1262
1334
  if currepo.nil?
1263
1335
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
@@ -1361,7 +1433,7 @@ module GHTorrent
1361
1433
  ##
1362
1434
  # Retrieve and process all comments for an issue
1363
1435
  def ensure_issue_comments(owner, repo, issue_id)
1364
- currepo = ensure_repo(owner, repo, true, true, false)
1436
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1365
1437
 
1366
1438
  if currepo.nil?
1367
1439
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
@@ -1390,7 +1462,7 @@ module GHTorrent
1390
1462
  ##
1391
1463
  # Retrieve and process +comment_id+ for an +issue_id+
1392
1464
  def ensure_issue_comment(owner, repo, issue_id, comment_id)
1393
- issue = ensure_issue(owner, repo, issue_id)
1465
+ issue = ensure_issue(owner, repo, issue_id, false, false)
1394
1466
 
1395
1467
  if issue.nil?
1396
1468
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1467,14 +1539,13 @@ module GHTorrent
1467
1539
  repository = ensure_repo(user, repo, false, false, false)
1468
1540
 
1469
1541
  if repository.nil?
1470
- warn "Could not store commit #{user}/#{repo} #{c['sha']}"
1471
- return
1542
+ warn "GHTorrent: repository #{user}/#{repo} deleted"
1472
1543
  end
1473
1544
 
1474
1545
  commits.insert(:sha => c['sha'],
1475
1546
  :author_id => author[:id],
1476
1547
  :committer_id => commiter[:id],
1477
- :project_id => repository[:id],
1548
+ :project_id => if repository.nil? then nil else repository[:id] end ,
1478
1549
  :created_at => date(c['commit']['author']['date']),
1479
1550
  :ext_ref_id => c[@ext_uniq]
1480
1551
  )
@@ -1513,6 +1584,14 @@ module GHTorrent
1513
1584
  def is_valid_email(email)
1514
1585
  email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
1515
1586
  end
1587
+
1588
+ def max(a, b)
1589
+ if a >= b
1590
+ a
1591
+ else
1592
+ b
1593
+ end
1594
+ end
1516
1595
  end
1517
1596
  end
1518
1597