ghtorrent 0.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,11 +122,11 @@ Loads object ids from a collection to a queue for further processing.
122
122
  connection.close { EventMachine.stop }
123
123
  }
124
124
 
125
- # Read next 1000 items and queue them
125
+ # Read next 100000 items and queue them
126
126
  read_and_publish = Proc.new {
127
127
 
128
128
  to_read = if options.number == -1
129
- 1000
129
+ 100000
130
130
  else
131
131
  if options.number - num_read - 1 <= 0
132
132
  -1
@@ -46,31 +46,44 @@ An efficient way to get all data for a single repo
46
46
  end
47
47
 
48
48
  def go
49
+ self.settings = override_config(settings, :mirror_history_pages_back, -1)
49
50
  user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
50
51
 
51
52
  if user_entry.nil?
52
- Trollop::die "Cannot find user #{owner}"
53
+ Trollop::die "Cannot find user #{ARGV[0]}"
53
54
  end
54
55
 
55
56
  user = user_entry[:login]
56
57
 
57
- repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false, false)}
58
+ repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
59
+ false, false)}
58
60
 
59
61
  if repo_entry.nil?
60
- Trollop::die "Cannot find repository #{owner}/#{ARGV[1]}"
62
+ Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
61
63
  end
62
64
 
63
65
  repo = repo_entry[:name]
64
66
 
65
- %w(ensure_commits ensure_forks ensure_pull_requests
66
- ensure_issues ensure_project_members ensure_watchers).each {|x|
67
+ def send_message(function, user, repo)
67
68
  begin
68
- ght.send(x, user, repo)
69
+ ght.send(function, user, repo, refresh = true)
69
70
  rescue Exception => e
70
71
  puts STDERR, e.message
71
72
  puts STDERR, e.backtrace
72
73
  end
73
- }
74
+ end
75
+
76
+ functions = %w(ensure_commits ensure_forks ensure_pull_requests
77
+ ensure_issues ensure_project_members ensure_watchers)
78
+
79
+ if ARGV[2].nil?
80
+ functions.each do |x|
81
+ send_message(x, user, repo)
82
+ end
83
+ else
84
+ Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
85
+ send_message(ARGV[2], user, repo)
86
+ end
74
87
  end
75
88
  end
76
89
 
@@ -79,40 +92,57 @@ end
79
92
  class TransactedGHTorrent < GHTorrent::Mirror
80
93
 
81
94
  def ensure_commit(repo, sha, user, comments = true)
82
- transaction do
95
+ check_transaction do
83
96
  super(repo, sha, user, comments)
84
97
  end
85
98
  end
86
99
 
87
- def ensure_fork(owner, repo, fork_id, date_added = nil)
88
- transaction do
89
- super(owner, repo, fork_id, date_added)
100
+ def ensure_fork(owner, repo, fork_id)
101
+ check_transaction do
102
+ super(owner, repo, fork_id)
90
103
  end
91
104
  end
92
105
 
93
106
  def ensure_pull_request(owner, repo, pullreq_id,
94
107
  comments = true, commits = true,
95
108
  state = nil, created_at = nil)
96
- transaction do
109
+ check_transaction do
97
110
  super(owner, repo, pullreq_id, comments, commits, state, created_at)
98
111
  end
99
112
  end
100
113
 
101
114
  def ensure_issue(owner, repo, issue_id, events = true, comments = true)
102
- transaction do
115
+ check_transaction do
103
116
  super(owner, repo, issue_id, events, comments)
104
117
  end
105
118
  end
106
119
 
107
120
  def ensure_project_member(owner, repo, new_member, date_added)
108
- transaction do
121
+ check_transaction do
109
122
  super(owner, repo, new_member, date_added)
110
123
  end
111
124
  end
112
125
 
113
126
  def ensure_watcher(owner, repo, watcher, date_added = nil)
114
- transaction do
127
+ check_transaction do
115
128
  super(owner, repo, watcher, date_added)
116
129
  end
117
130
  end
131
+
132
+ def check_transaction(&block)
133
+ begin
134
+ if @db.in_transaction?
135
+ debug "Transaction already started"
136
+ yield block
137
+ else
138
+ transaction do
139
+ yield block
140
+ end
141
+ end
142
+ rescue Exception => e
143
+ puts STDERR, e.message
144
+ puts STDERR, e.backtrace
145
+ end
146
+ end
147
+
118
148
  end
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+
3
+ require 'ghtorrent/ghtorrent'
4
+ require 'ghtorrent/settings'
5
+ require 'ghtorrent/logging'
6
+ require 'ghtorrent/command'
7
+ require 'ghtorrent/retriever'
8
+ require 'ghtorrent/commands/ght_retrieve_repo'
9
+
10
+ class GHTRetrieveUser < GHTRetrieveRepo
11
+
12
+ def prepare_options(options)
13
+ options.banner <<-BANNER
14
+ An efficient way to get all data for a single user
15
+
16
+ #{command_name} [options] user
17
+
18
+ BANNER
19
+ end
20
+
21
+ def validate
22
+ super
23
+ Trollop::die "One argument are required" unless args[0] && !args[0].empty?
24
+ end
25
+
26
+ def go
27
+ self.settings = override_config(settings, :mirror_history_pages_back, -1)
28
+ user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
29
+
30
+ if user_entry.nil?
31
+ Trollop::die "Cannot find user #{ARGV[0]}"
32
+ end
33
+
34
+ user = user_entry[:login]
35
+
36
+ def send_message(function, user)
37
+ begin
38
+ ght.send(function, user)
39
+ rescue Exception => e
40
+ puts STDERR, e.message
41
+ puts STDERR, e.backtrace
42
+ end
43
+ end
44
+
45
+ functions = %w(ensure_user_followers ensure_orgs)
46
+
47
+ if ARGV[2].nil?
48
+ functions.each do |x|
49
+ send_message(x, user)
50
+ end
51
+ else
52
+ Trollop::die("Not a valid function: #{ARGV[2]}") unless functions.include? ARGV[2]
53
+ send_message(ARGV[2], user)
54
+ end
55
+
56
+ end
57
+ end
58
+
59
+ class TransactedGHTorrent < GHTorrent::Mirror
60
+
61
+ def ensure_user_followers(user)
62
+ check_transaction do
63
+ super(user)
64
+ end
65
+ end
66
+
67
+ def ensure_orgs(user)
68
+ check_transaction do
69
+ super(user)
70
+ end
71
+ end
72
+ end
@@ -24,6 +24,8 @@ module GHTorrent
24
24
 
25
25
  # Get a connection to the database
26
26
  def get_db
27
+ return @db unless @db.nil?
28
+
27
29
  Sequel.single_threaded = true
28
30
  @db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
29
31
  #@db.loggers << @logger
@@ -54,7 +56,6 @@ module GHTorrent
54
56
  end
55
57
 
56
58
  transaction do
57
- ensure_user(user, true, true)
58
59
  ensure_commit(repo, sha, user)
59
60
  end
60
61
  end
@@ -78,10 +79,9 @@ module GHTorrent
78
79
  # [user] The login of the repository owner
79
80
  # [repo] The name of the repository
80
81
  # [comment_id] The login of the member to add
81
- # [date_added] The timestamp that the add event took place
82
- def get_commit_comment(user, repo, comment_id, date_added)
82
+ def get_commit_comment(user, repo, comment_id)
83
83
  transaction do
84
- ensure_commit_comment(user, repo, comment_id, date_added)
84
+ ensure_commit_comment(user, repo, comment_id)
85
85
  end
86
86
  end
87
87
 
@@ -106,8 +106,6 @@ module GHTorrent
106
106
  # [date_added] The timestamp that the add event took place
107
107
  def get_follower(follower, followed, date_added)
108
108
  transaction do
109
- ensure_user(follower, true, true)
110
- ensure_user(followed, true, true)
111
109
  ensure_user_follower(followed, follower, date_added)
112
110
  end
113
111
  end
@@ -130,10 +128,9 @@ module GHTorrent
130
128
  # [owner] The login of the repository owner
131
129
  # [repo] The name of the repository
132
130
  # [fork_id] The fork item id
133
- # [date_added] The timestamp that the add event took place
134
- def get_fork(owner, repo, fork_id, date_added)
131
+ def get_fork(owner, repo, fork_id)
135
132
  transaction do
136
- ensure_fork(owner, repo, fork_id, date_added)
133
+ ensure_fork(owner, repo, fork_id)
137
134
  end
138
135
  end
139
136
 
@@ -144,9 +141,9 @@ module GHTorrent
144
141
  # [repo] The name of the repository
145
142
  # [fork_id] The fork item id
146
143
  # [date_added] The timestamp that the add event took place
147
- def get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
144
+ def get_pullreq_comment(owner, repo, pullreq_id, comment_id)
148
145
  transaction do
149
- ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
146
+ ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
150
147
  end
151
148
  end
152
149
 
@@ -158,9 +155,9 @@ module GHTorrent
158
155
  # [issue_id] The fork item id
159
156
  # [action] The action that took place for the issue
160
157
  # [date_added] The timestamp that the add event took place
161
- def get_issue(owner, repo, issue_id, created_at)
158
+ def get_issue(owner, repo, issue_id)
162
159
  transaction do
163
- ensure_issue(owner, repo, issue_id, created_at)
160
+ ensure_issue(owner, repo, issue_id)
164
161
  end
165
162
  end
166
163
 
@@ -181,7 +178,7 @@ module GHTorrent
181
178
  # Make sure a commit exists
182
179
  #
183
180
  def ensure_commit(repo, sha, user, comments = true)
184
- ensure_repo(user, repo)
181
+ ensure_repo(user, repo, false, false, false, false)
185
182
  c = retrieve_commit(repo, sha, user)
186
183
 
187
184
  if c.nil?
@@ -208,8 +205,9 @@ module GHTorrent
208
205
  # [sha] The first commit to start retrieving from. If nil, then the
209
206
  # earliest stored commit will be used instead.
210
207
  # [num_pages] The number of commit pages to retrieve
211
- def ensure_commits(user, repo, sha = nil,
212
- num_pages = config(:mirror_commit_pages_new_repo))
208
+ def ensure_commits(user, repo, refresh = false, sha = nil,
209
+ num_pages = config(:mirror_commit_pages_new_repo)
210
+ )
213
211
  userid = @db[:users].filter(:login => user).first[:id]
214
212
  repoid = @db[:projects].filter(:owner_id => userid,
215
213
  :name => repo).first[:id]
@@ -236,32 +234,37 @@ module GHTorrent
236
234
  # in the database.
237
235
  def ensure_parents(commit)
238
236
  commits = @db[:commits]
239
- commit['parents'].each do |p|
240
- parents = @db[:commit_parents]
241
- url = p['url'].split(/\//)
242
- this = commits.first(:sha => commit['sha'])
243
- parent = commits.first(:sha => url[7])
244
-
245
- if parent.nil?
246
- store_commit(retrieve_commit(url[5], url[7], url[4]), url[5], url[4])
247
- parent = commits.first(:sha => url[7])
248
- end
249
-
250
- if parent.nil?
237
+ parents = @db[:commit_parents]
238
+ commit['parents'].map do |p|
239
+ url = p['url'].split(/\//)
240
+ this = commits.first(:sha => commit['sha'])
241
+ parent = commits.first(:sha => url[7])
242
+
243
+ if parent.nil?
244
+ c = retrieve_commit(url[5], url[7], url[4])
245
+ if c.nil?
251
246
  warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
252
- return
247
+ next
253
248
  end
249
+ parent = store_commit(c, url[5], url[4])
250
+ end
254
251
 
255
- if parents.first(:commit_id => this[:id],
256
- :parent_id => parent[:id]).nil?
252
+ if parent.nil?
253
+ warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
254
+ next
255
+ end
257
256
 
258
- parents.insert(:commit_id => this[:id],
259
- :parent_id => parent[:id])
260
- info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
261
- else
262
- debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
263
- end
257
+ if parents.first(:commit_id => this[:id],
258
+ :parent_id => parent[:id]).nil?
259
+
260
+ parents.insert(:commit_id => this[:id],
261
+ :parent_id => parent[:id])
262
+ info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
263
+ else
264
+ debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
264
265
  end
266
+ parents.first(:commit_id => this[:id], :parent_id => parent[:id])
267
+ end
265
268
  end
266
269
 
267
270
  ##
@@ -271,23 +274,27 @@ module GHTorrent
271
274
  # [repo] The repo receiving the commit
272
275
  # [sha] The commit SHA
273
276
  def ensure_repo_commit(user, repo, sha)
274
- userid = @db[:users].first(:login => user)[:id]
275
- projectid = @db[:projects].first(:owner_id => userid,
276
- :name => repo)[:id]
277
+ project = ensure_repo(user, repo, false, false, false, false)
278
+
279
+ if project.nil?
280
+ warn "GHTorrent: Repo #{user}/#{repo} does not exist"
281
+ return
282
+ end
283
+
277
284
  commitid = @db[:commits].first(:sha => sha)[:id]
278
285
 
279
- exists = @db[:project_commits].first(:project_id => projectid,
286
+ exists = @db[:project_commits].first(:project_id => project[:id],
280
287
  :commit_id => commitid)
281
288
  if exists.nil?
282
289
  @db[:project_commits].insert(
283
- :project_id => projectid,
290
+ :project_id => project[:id],
284
291
  :commit_id => commitid
285
292
  )
286
- info "GHTorrent: Added commit #{user}/#{repo} -> #{sha}"
287
- @db[:project_commits].first(:project_id => projectid,
293
+ info "GHTorrent: Associating commit #{sha} with #{user}/#{repo}"
294
+ @db[:project_commits].first(:project_id => project[:id],
288
295
  :commit_id => commitid)
289
296
  else
290
- debug "GHTorrent: Commit #{user}/#{repo} -> #{sha} exists"
297
+ debug "GHTorrent: Commit #{sha} already associated with #{user}/#{repo}"
291
298
  exists
292
299
  end
293
300
  end
@@ -333,6 +340,17 @@ module GHTorrent
333
340
  return users.first(:login => byemail[:login])
334
341
  end
335
342
 
343
+ # This means that the user's login has been associated with a
344
+ # Github user by the time the commit was done (and hence Github was
345
+ # able to associate the commit to an account), but afterwards the
346
+ # user has deleted his account (before GHTorrent processed it).
347
+ # On absense of something better to do, try to find the user by email
348
+ # and return a "fake" user entry.
349
+ if added.nil?
350
+ warn "GHTorrent: User account for user #{login} deleted from Github"
351
+ return ensure_user("#{name}<#{email}>", false, false)
352
+ end
353
+
336
354
  if byemail.nil?
337
355
  users.filter(:login => login).update(:name => name) if added[:name].nil?
338
356
  users.filter(:login => login).update(:email => email) if added[:email].nil?
@@ -373,10 +391,12 @@ module GHTorrent
373
391
  def ensure_user(user, followers, orgs)
374
392
  # Github only supports alpa-nums and dashes in its usernames.
375
393
  # All other sympbols are treated as emails.
376
- if not user.match(/^[A-Za-z0-9\-]*$/)
394
+ if not user.match(/^[\w\-]*$/)
377
395
  begin
378
396
  name, email = user.split("<")
379
397
  email = email.split(">")[0]
398
+ name = name.strip unless name.nil?
399
+ email = email.strip unless email.nil?
380
400
  rescue Exception
381
401
  raise new GHTorrentException.new("Not a valid email address: #{user}")
382
402
  end
@@ -384,7 +404,7 @@ module GHTorrent
384
404
  unless is_valid_email(email)
385
405
  warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
386
406
  end
387
- u = ensure_user_byemail(email.strip, name.strip)
407
+ u = ensure_user_byemail(email, name)
388
408
  else
389
409
  u = ensure_user_byuname(user)
390
410
  ensure_user_followers(user) if followers
@@ -446,9 +466,8 @@ module GHTorrent
446
466
  #
447
467
  # ==Parameters:
448
468
  # [user] The user login to find followers by
449
- def ensure_user_followers(followed, date_added = nil)
469
+ def ensure_user_followers(followed)
450
470
  curuser = ensure_user(followed, false, false)
451
- time = curuser[:created_at]
452
471
  followers = @db.from(:followers, :users).\
453
472
  where(:followers__follower_id => :users__id).
454
473
  where(:followers__user_id => curuser[:id]).select(:login).all
@@ -459,12 +478,12 @@ module GHTorrent
459
478
  else
460
479
  acc
461
480
  end
462
- end.map { |x| ensure_user_follower(followed, x['login'], time) }
481
+ end.map { |x| ensure_user_follower(followed, x['login']) }
463
482
  end
464
483
 
465
484
  ##
466
485
  # Make sure that a user follows another one
467
- def ensure_user_follower(followed, follower, date_added)
486
+ def ensure_user_follower(followed, follower, date_added = nil)
468
487
  follower_user = ensure_user(follower, false, false)
469
488
  followed_user = ensure_user(followed, false, false)
470
489
 
@@ -474,14 +493,17 @@ module GHTorrent
474
493
  end
475
494
 
476
495
  followers = @db[:followers]
477
- followed_id = follower_user[:id]
478
- follower_id = followed_user[:id]
496
+ follower_id = follower_user[:id]
497
+ followed_id = followed_user[:id]
479
498
 
480
499
  follower_exists = followers.first(:user_id => followed_id,
481
500
  :follower_id => follower_id)
482
-
483
501
  if follower_exists.nil?
484
- added = if date_added.nil? then Time.now else date_added end
502
+ added = if date_added.nil?
503
+ max(follower_user[:created_at], followed_user[:created_at])
504
+ else
505
+ date_added
506
+ end
485
507
  retrieved = retrieve_user_follower(followed, follower)
486
508
 
487
509
  if retrieved.nil?
@@ -495,13 +517,17 @@ module GHTorrent
495
517
  :ext_ref_id => retrieved[@ext_uniq])
496
518
  info "GHTorrent: User #{follower} follows #{followed}"
497
519
  else
498
- unless date_added.nil?
499
- followers.filter(:user_id => followed_id,
500
- :follower_id => follower_id)\
520
+ debug "GHTorrent: Follower #{follower} exists for user #{followed}"
521
+ end
522
+
523
+ unless date_added.nil?
524
+ followers.filter(:user_id => followed_id,
525
+ :follower_id => follower_id)\
501
526
  .update(:created_at => date(date_added))
502
- debug "GHTorrent: Updating follower #{followed} -> #{follower}"
503
- end
527
+ debug "GHTorrent: Updating follower #{followed} -> #{follower}, created_at -> #{date(date_added)}"
504
528
  end
529
+
530
+ followers.first(:user_id => followed_id, :follower_id => follower_id)
505
531
  end
506
532
 
507
533
  ##
@@ -529,19 +555,29 @@ module GHTorrent
529
555
  :name => name,
530
556
  :login => login,
531
557
  :created_at => Time.now,
532
- :ext_ref_id => ""
533
- )
558
+ :ext_ref_id => "")
534
559
  info "GHTorrent: Added fake user #{login} -> #{email}"
535
560
  users.first(:login => login)
536
561
  else
537
- users.insert(:login => u['login'],
538
- :name => u['name'],
539
- :company => u['company'],
540
- :email => u['email'],
541
- :location => u['location'],
542
- :created_at => date(u['created_at']),
543
- :ext_ref_id => u[@ext_uniq])
544
- info "GHTorrent: Found #{email} through search API query"
562
+ in_db = users.first(:login => u['login'])
563
+ if in_db.nil?
564
+ users.insert(:login => u['login'],
565
+ :name => u['name'],
566
+ :company => u['company'],
567
+ :email => u['email'],
568
+ :location => u['location'],
569
+ :created_at => date(u['created_at']),
570
+ :ext_ref_id => u[@ext_uniq])
571
+ info "GHTorrent: Found #{email} through search API query"
572
+ else
573
+ in_db.update(:name => u['name'],
574
+ :company => u['company'],
575
+ :email => u['email'],
576
+ :location => u['location'],
577
+ :created_at => date(u['created_at']),
578
+ :ext_ref_id => u[@ext_uniq])
579
+ info "GHTorrent: User with email #{email} exists with username #{u['login']}"
580
+ end
545
581
  users.first(:login => u['login'])
546
582
  end
547
583
  else
@@ -560,10 +596,17 @@ module GHTorrent
560
596
  # == Returns:
561
597
  # If the repo can be retrieved, it is returned as a Hash. Otherwise,
562
598
  # the result is nil
563
- def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
599
+ def ensure_repo(user, repo, commits = true, project_members = true,
600
+ watchers = true, forks = true)
564
601
 
565
602
  repos = @db[:projects]
566
603
  curuser = ensure_user(user, false, false)
604
+
605
+ if curuser.nil?
606
+ warn "Cannot find user #{user}"
607
+ return
608
+ end
609
+
567
610
  currepo = repos.first(:owner_id => curuser[:id], :name => repo)
568
611
 
569
612
  if currepo.nil?
@@ -575,17 +618,29 @@ module GHTorrent
575
618
  end
576
619
 
577
620
  repos.insert(:url => r['url'],
578
- :owner_id => @db[:users].filter(:login => user).first[:id],
621
+ :owner_id => curuser[:id],
579
622
  :name => r['name'],
580
623
  :description => r['description'],
581
624
  :language => r['language'],
582
625
  :created_at => date(r['created_at']),
583
626
  :ext_ref_id => r[@ext_uniq])
584
627
 
585
- info "GHTorrent: New repo #{repo}"
628
+ unless r['parent'].nil?
629
+ parent_owner = r['parent']['owner']['login']
630
+ parent_repo = r['parent']['name']
631
+
632
+ parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
633
+
634
+ repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
635
+
636
+ info "Repo #{user}/#{repo} is a fork from #{parent_owner}/#{parent_repo}"
637
+ end
638
+
639
+ info "GHTorrent: New repo #{user}/#{repo}"
586
640
  ensure_commits(user, repo) if commits
587
641
  ensure_project_members(user, repo) if project_members
588
642
  ensure_watchers(user, repo) if watchers
643
+ ensure_forks(user, repo) if forks
589
644
  repos.first(:owner_id => curuser[:id], :name => repo)
590
645
  else
591
646
  debug "GHTorrent: Repo #{user}/#{repo} exists"
@@ -595,8 +650,8 @@ module GHTorrent
595
650
 
596
651
  ##
597
652
  # Make sure that a project has all the registered members defined
598
- def ensure_project_members(user, repo)
599
- currepo = ensure_repo(user, repo, true, false, true)
653
+ def ensure_project_members(user, repo, refresh = false)
654
+ currepo = ensure_repo(user, repo, false, false, false, false)
600
655
  time = currepo[:created_at]
601
656
 
602
657
  project_members = @db.from(:project_members, :users).\
@@ -616,7 +671,7 @@ module GHTorrent
616
671
  # Make sure that a project member exists in a project
617
672
  def ensure_project_member(owner, repo, new_member, date_added)
618
673
  pr_members = @db[:project_members]
619
- project = ensure_repo(owner, repo, true, false, true)
674
+ project = ensure_repo(owner, repo, false, false, false, false)
620
675
  new_user = ensure_user(new_member, false, false)
621
676
 
622
677
  if project.nil? or new_user.nil?
@@ -627,7 +682,11 @@ module GHTorrent
627
682
  :repo_id => project[:id])
628
683
 
629
684
  if memb_exist.nil?
630
- added = if date_added.nil? then Time.now else date_added end
685
+ added = if date_added.nil?
686
+ max(project[:created_at], new_user[:created_at])
687
+ else
688
+ date_added
689
+ end
631
690
  retrieved = retrieve_repo_collaborator(owner, repo, new_member)
632
691
 
633
692
  if retrieved.nil?
@@ -644,12 +703,13 @@ module GHTorrent
644
703
  info "GHTorrent: Added project member #{repo} -> #{new_member}"
645
704
  else
646
705
  debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
647
- unless date_added.nil?
648
- pr_members.filter(:user_id => new_user[:id],
649
- :repo_id => project[:id])\
706
+ end
707
+
708
+ unless date_added.nil?
709
+ pr_members.filter(:user_id => new_user[:id],
710
+ :repo_id => project[:id])\
650
711
  .update(:created_at => date(date_added))
651
- info "GHTorrent: Updating project member #{repo} -> #{new_member}"
652
- end
712
+ info "GHTorrent: Updating project member #{repo} -> #{new_member}, created_at -> #{date(date_added)}"
653
713
  end
654
714
  end
655
715
 
@@ -734,7 +794,7 @@ module GHTorrent
734
794
  end
735
795
  end
736
796
 
737
- not_saved.map{|x| ensure_commit_comment(user, repo, x['id'], nil)}
797
+ not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
738
798
  end
739
799
 
740
800
  ##
@@ -745,7 +805,7 @@ module GHTorrent
745
805
  # [repo] The repository containing the commit whose comment will be retrieved
746
806
  # [id] The comment id to retrieve
747
807
  # [created_at] The timestamp that the comment was made.
748
- def ensure_commit_comment(user, repo, id, created_at)
808
+ def ensure_commit_comment(user, repo, id)
749
809
  stored_comment = @db[:commit_comments].first(:comment_id => id)
750
810
 
751
811
  if stored_comment.nil?
@@ -770,21 +830,15 @@ module GHTorrent
770
830
  )
771
831
  info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user[:login]}"
772
832
  else
773
- unless created_at.nil?
774
- @db[:commit_comments].filter(:comment_id => id)\
775
- .update(:created_at => date(created_at))
776
- info "GHTorrent: Updating comment #{user}/#{repo} -> #{id}"
777
- end
778
833
  info "GHTorrent: Commit comment #{id} exists"
779
834
  end
780
835
  @db[:commit_comments].first(:comment_id => id)
781
836
  end
782
837
 
783
838
  ##
784
- # Make sure that
785
- def ensure_watchers(owner, repo)
786
- currepo = ensure_repo(owner, repo, true, true, false)
787
- time = currepo[:created_at]
839
+ # Make sure that all watchers exist for a repository
840
+ def ensure_watchers(owner, repo, refresh = false)
841
+ currepo = ensure_repo(owner, repo, false, false, false, false)
788
842
 
789
843
  if currepo.nil?
790
844
  warn "Could not retrieve watchers for #{owner}/#{repo}"
@@ -803,13 +857,13 @@ module GHTorrent
803
857
  else
804
858
  acc
805
859
  end
806
- end.map { |x| ensure_watcher(owner, repo, x['login'], time) }
860
+ end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
807
861
  end
808
862
 
809
863
  ##
810
- # Make sure that a project member exists in a project
864
+ # Make sure that a watcher/stargazer exists for a repository
811
865
  def ensure_watcher(owner, repo, watcher, date_added = nil)
812
- project = ensure_repo(owner, repo, false, false, false)
866
+ project = ensure_repo(owner, repo, false, false, false, false)
813
867
  new_watcher = ensure_user(watcher, false, false)
814
868
 
815
869
  if new_watcher.nil? or project.nil?
@@ -818,11 +872,15 @@ module GHTorrent
818
872
  end
819
873
 
820
874
  watchers = @db[:watchers]
821
- memb_exist = watchers.first(:user_id => new_watcher[:id],
822
- :repo_id => project[:id])
875
+ watcher_exist = watchers.first(:user_id => new_watcher[:id],
876
+ :repo_id => project[:id])
823
877
 
824
- if memb_exist.nil?
825
- added = if date_added.nil? then Time.now else date_added end
878
+ if watcher_exist.nil?
879
+ added = if date_added.nil?
880
+ max(project[:created_at], new_watcher[:created_at])
881
+ else
882
+ date_added
883
+ end
826
884
  retrieved = retrieve_watcher(owner, repo, watcher)
827
885
 
828
886
  if retrieved.nil?
@@ -839,33 +897,42 @@ module GHTorrent
839
897
  info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
840
898
  else
841
899
  debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
842
- unless date_added.nil?
843
- watchers.filter(:user_id => new_watcher[:id],
844
- :repo_id => project[:id])\
900
+ end
901
+
902
+ unless date_added.nil?
903
+ watchers.filter(:user_id => new_watcher[:id],
904
+ :repo_id => project[:id])\
845
905
  .update(:created_at => date(date_added))
846
- info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}"
847
- end
906
+ info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}, created_at -> #{date_added}"
848
907
  end
908
+
909
+ watchers.first(:user_id => new_watcher[:id],
910
+ :repo_id => project[:id])
849
911
  end
850
912
 
851
913
  ##
852
914
  # Process all pull requests
853
- def ensure_pull_requests(owner, repo)
854
- currepo = ensure_repo(owner, repo, false, false, false)
915
+ def ensure_pull_requests(owner, repo, refresh = false)
916
+ currepo = ensure_repo(owner, repo, false, false, false, false)
855
917
  if currepo.nil?
856
918
  warn "Could not retrieve pull requests from #{owner}/#{repo}"
857
919
  return
858
920
  end
859
921
 
860
- pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
922
+ raw_pull_reqs = if refresh
923
+ retrieve_pull_requests(owner, repo, refresh = true)
924
+ else
925
+ pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
926
+ retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
927
+ if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
928
+ acc << x
929
+ else
930
+ acc
931
+ end
932
+ end
933
+ end
861
934
 
862
- retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
863
- if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
864
- acc << x
865
- else
866
- acc
867
- end
868
- end.map { |x| ensure_pull_request(owner, repo, x['number']) }
935
+ raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
869
936
  end
870
937
 
871
938
  ##
@@ -875,7 +942,7 @@ module GHTorrent
875
942
  state = nil, created_at = nil)
876
943
  pulls_reqs = @db[:pull_requests]
877
944
 
878
- project = ensure_repo(owner, repo, false, false, false)
945
+ project = ensure_repo(owner, repo, false, false, false, false)
879
946
 
880
947
  if project.nil?
881
948
  return
@@ -885,15 +952,13 @@ module GHTorrent
885
952
  def add_history(id, ts, unq, act)
886
953
  pull_req_history = @db[:pull_request_history]
887
954
  entry = pull_req_history.first(:pull_request_id => id,
888
- :ext_ref_id => unq, :action => act)
955
+ :created_at => ts, :action => act)
889
956
  if entry.nil?
890
957
  pull_req_history.insert(:pull_request_id => id, :created_at => ts,
891
958
  :ext_ref_id => unq, :action => act)
892
- info "GHTorrent: New pull request (#{id}) history entry (#{act})"
959
+ info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
893
960
  else
894
- pull_req_history.filter(:pull_request_id => id, :ext_ref_id => unq,
895
- :action => act).update(:created_at => ts)
896
- info "GHTorrent: Updating pull request (#{id}) history entry (#{act}) timestamp #{ts}"
961
+ info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
897
962
  end
898
963
  end
899
964
 
@@ -939,7 +1004,7 @@ module GHTorrent
939
1004
 
940
1005
  base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
941
1006
  retrieved['base']['repo']['name'],
942
- false, false, false)
1007
+ false, false, false, false)
943
1008
 
944
1009
  base_commit = ensure_commit(retrieved['base']['repo']['name'],
945
1010
  retrieved['base']['sha'],
@@ -955,7 +1020,7 @@ module GHTorrent
955
1020
  head_repo = if has_head_repo(retrieved)
956
1021
  ensure_repo(retrieved['head']['repo']['owner']['login'],
957
1022
  retrieved['head']['repo']['name'],
958
- false, false, false)
1023
+ false, false, false, false)
959
1024
  end
960
1025
 
961
1026
  head_commit = if not head_repo.nil?
@@ -967,7 +1032,12 @@ module GHTorrent
967
1032
 
968
1033
  pull_req_user = ensure_user(retrieved['user']['login'], false, false)
969
1034
 
970
- merged = if retrieved['merged_at'].nil? then false else true end
1035
+ merged = if retrieved['merged_at'].nil? then
1036
+ # Check if the pr's commits are in the repository
1037
+ false
1038
+ else
1039
+ true
1040
+ end
971
1041
  closed = if retrieved['closed_at'].nil? then false else true end
972
1042
 
973
1043
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
@@ -980,10 +1050,11 @@ module GHTorrent
980
1050
  :base_commit_id => base_commit[:id],
981
1051
  :user_id => pull_req_user[:id],
982
1052
  :pullreq_id => pullreq_id,
983
- :intra_branch => is_intra_branch(retrieved)
1053
+ :intra_branch => is_intra_branch(retrieved),
1054
+ :merged => merged
984
1055
  )
985
1056
 
986
- info log_msg(retrieved)
1057
+ info log_msg(retrieved) + " was added"
987
1058
  else
988
1059
  debug log_msg(retrieved) + " exists"
989
1060
  end
@@ -1001,22 +1072,21 @@ module GHTorrent
1001
1072
  state) unless state.nil?
1002
1073
 
1003
1074
  ensure_pull_request_commits(owner, repo, pullreq_id) if commits
1004
- ensure_pullreq_comments(owner, repo, pullreq_id, created_at) if comments
1075
+ ensure_pullreq_comments(owner, repo, pullreq_id) if comments
1005
1076
 
1006
1077
  pulls_reqs.first(:base_repo_id => project[:id],
1007
1078
  :pullreq_id => pullreq_id)
1008
1079
  end
1009
1080
 
1010
- def ensure_pullreq_comments(owner, repo, pullreq_id, created_at)
1011
- currepo = ensure_repo(owner, repo, true, true, false)
1012
- time = if created_at.nil? then currepo[:created_at] else Time.now() end
1081
+ def ensure_pullreq_comments(owner, repo, pullreq_id)
1082
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1013
1083
 
1014
1084
  if currepo.nil?
1015
1085
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1016
1086
  return
1017
1087
  end
1018
1088
 
1019
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
1089
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1020
1090
 
1021
1091
  if pull_req.nil?
1022
1092
  warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1032,12 +1102,16 @@ module GHTorrent
1032
1102
  acc
1033
1103
  end
1034
1104
  end.map { |x|
1035
- ensure_pullreq_comment(owner, repo, pullreq_id, x['id'], time)
1105
+ ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
1036
1106
  }
1037
1107
  end
1038
1108
 
1039
- def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
1040
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
1109
+ def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
1110
+ # Commit retrieval is set to false to ensure that no duplicate work
1111
+ # is done on retrieving a pull request. This has the side effect that
1112
+ # commits might not be retrieved if a pullreqcomment event gets processed
1113
+ # before the pullreq event, until the pullreq event has been processed
1114
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1041
1115
 
1042
1116
  if pull_req.nil?
1043
1117
  warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1084,12 +1158,21 @@ module GHTorrent
1084
1158
  end
1085
1159
 
1086
1160
  def ensure_pull_request_commits(owner, repo, pullreq_id)
1161
+ pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1162
+
1163
+ if pullreq.nil?
1164
+ warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
1165
+ return
1166
+ end
1167
+
1087
1168
  retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
1088
- x = ensure_commit(repo, c['sha'], owner, true)
1169
+ next if c.nil?
1170
+ head_repo_owner = c['url'].split(/\//)[4]
1171
+ head_repo_name = c['url'].split(/\//)[5]
1172
+ x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
1089
1173
  acc << x if not x.nil?
1090
1174
  acc
1091
1175
  }.map { |c|
1092
- pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1093
1176
  exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
1094
1177
  :commit_id => c[:id])
1095
1178
  if exists.nil?
@@ -1110,18 +1193,17 @@ module GHTorrent
1110
1193
  # ==Parameters:
1111
1194
  # [owner] The user to which the project belongs
1112
1195
  # [repo] The repository/project to find forks for
1113
- def ensure_forks(owner, repo)
1114
- currepo = ensure_repo(owner, repo, false, false, false)
1196
+ def ensure_forks(owner, repo, refresh = false)
1197
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1115
1198
 
1116
1199
  if currepo.nil?
1117
1200
  warn "Could not retrieve forks for #{owner}/#{repo}"
1118
1201
  return
1119
1202
  end
1120
1203
 
1121
- existing_forks = @db.from(:forks, :projects, :users).\
1122
- where(:forks__forked_project_id => :projects__id). \
1204
+ existing_forks = @db.from(:projects, :users).\
1123
1205
  where(:users__id => :projects__owner_id). \
1124
- where(:forks__forked_from_id => currepo[:id]).select(:projects__name, :login).all
1206
+ where(:projects__forked_from => currepo[:id]).select(:projects__name, :login).all
1125
1207
 
1126
1208
  retrieve_forks(owner, repo).reduce([]) do |acc, x|
1127
1209
  if existing_forks.find {|y|
@@ -1138,65 +1220,49 @@ module GHTorrent
1138
1220
 
1139
1221
  ##
1140
1222
  # Make sure that a fork is retrieved for a project
1141
- def ensure_fork(owner, repo, fork_id, date_added = nil)
1142
-
1143
- forks = @db[:forks]
1144
- forked = ensure_repo(owner, repo, false, false, false)
1145
- fork_exists = forks.first(:fork_id => fork_id)
1146
-
1147
- if fork_exists.nil?
1148
- retrieved = retrieve_fork(owner, repo, fork_id)
1149
- added = if date_added.nil? then retrieved['created_at'] else date_added end
1150
-
1151
- if retrieved.nil?
1152
- warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
1153
- return
1154
- end
1223
+ def ensure_fork(owner, repo, fork_id)
1224
+ fork = retrieve_fork(owner, repo, fork_id)
1155
1225
 
1156
- forked_repo_owner = retrieved['full_name'].split(/\//)[0]
1157
- forked_repo_name = retrieved['full_name'].split(/\//)[1]
1226
+ if fork.nil?
1227
+ warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
1228
+ return
1229
+ end
1158
1230
 
1159
- fork = ensure_repo(forked_repo_owner, forked_repo_name)
1231
+ fork_owner = fork['full_name'].split(/\//)[0]
1232
+ fork_name = fork['full_name'].split(/\//)[1]
1160
1233
 
1161
- if forked.nil? or fork.nil?
1162
- warn "Could not add fork #{fork_id}"
1163
- return
1164
- end
1234
+ r = ensure_repo(fork_owner, fork_name, false, false, false, false)
1165
1235
 
1166
- forks.insert(:forked_project_id => fork[:id],
1167
- :forked_from_id => forked[:id],
1168
- :fork_id => fork_id,
1169
- :created_at => added,
1170
- :ext_ref_id => retrieved[@ext_uniq])
1171
- info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
1236
+ if r.nil?
1237
+ warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
1172
1238
  else
1173
- debug "GHTorrent: Fork #{fork_id} exists as fork of #{owner}/#{repo}"
1174
- unless date_added.nil?
1175
- forks.filter(:fork_id => fork_id)\
1176
- .update(:created_at => date(date_added))
1177
- debug "GHTorrent: Updating fork #{owner}/#{repo} (#{fork_id})"
1178
- end
1239
+ info "GHTorrent: Added #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
1179
1240
  end
1180
1241
  end
1181
1242
 
1182
1243
  ##
1183
1244
  # Make sure all issues exist for a project
1184
- def ensure_issues(owner, repo)
1185
- currepo = ensure_repo(owner, repo, false, false, false)
1245
+ def ensure_issues(owner, repo, refresh = false)
1246
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1186
1247
  if currepo.nil?
1187
1248
  warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
1188
1249
  return
1189
1250
  end
1190
1251
 
1191
- issues = @db[:issues].filter(:repo_id => currepo[:id]).all
1252
+ raw_issues = if refresh
1253
+ retrieve_issues(owner, repo, refresh = true)
1254
+ else
1255
+ issues = @db[:issues].filter(:repo_id => currepo[:id]).all
1256
+ retrieve_issues(owner, repo).reduce([]) do |acc, x|
1257
+ if issues.find { |y| y[:issue_id] == x['number'] }.nil?
1258
+ acc << x
1259
+ else
1260
+ acc
1261
+ end
1262
+ end
1263
+ end
1192
1264
 
1193
- retrieve_issues(owner, repo).reduce([]) do |acc, x|
1194
- if issues.find { |y| y[:issue_id] == x['number'] }.nil?
1195
- acc << x
1196
- else
1197
- acc
1198
- end
1199
- end.map { |x| ensure_issue(owner, repo, x['number']) }
1265
+ raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
1200
1266
  end
1201
1267
 
1202
1268
  ##
@@ -1204,7 +1270,7 @@ module GHTorrent
1204
1270
  def ensure_issue(owner, repo, issue_id, events = true, comments = true)
1205
1271
 
1206
1272
  issues = @db[:issues]
1207
- repository = ensure_repo(owner, repo, false, false, false)
1273
+ repository = ensure_repo(owner, repo, false, false, false, false)
1208
1274
 
1209
1275
  if repo.nil?
1210
1276
  warn "Cannot find repo #{owner}/#{repo}"
@@ -1214,24 +1280,27 @@ module GHTorrent
1214
1280
  cur_issue = issues.first(:issue_id => issue_id,
1215
1281
  :repo_id => repository[:id])
1216
1282
 
1217
- if cur_issue.nil?
1218
- retrieved = retrieve_issue(owner, repo, issue_id)
1283
+ retrieved = retrieve_issue(owner, repo, issue_id)
1219
1284
 
1220
- if retrieved.nil?
1221
- warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
1222
- return
1223
- end
1285
+ if retrieved.nil?
1286
+ warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
1287
+ return
1288
+ end
1289
+
1290
+ # Pull requests and issues share the same issue_id
1291
+ pull_req = unless retrieved['pull_request'].nil? or
1292
+ retrieved['pull_request']['patch_url'].nil?
1293
+ info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} is a pull request"
1294
+ ensure_pull_request(owner, repo, issue_id)
1295
+ end
1296
+
1297
+ if cur_issue.nil?
1224
1298
 
1225
1299
  reporter = ensure_user(retrieved['user']['login'], false, false)
1226
1300
  assignee = unless retrieved['assignee'].nil?
1227
1301
  ensure_user(retrieved['assignee']['login'], false, false)
1228
1302
  end
1229
1303
 
1230
- # Pull requests and issues share the same issue_id
1231
- pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
1232
- ensure_pull_request(owner, repo, issue_id)
1233
- end
1234
-
1235
1304
  issues.insert(:repo_id => repository[:id],
1236
1305
  :assignee_id => unless assignee.nil? then assignee[:id] end,
1237
1306
  :reporter_id => reporter[:id],
@@ -1241,23 +1310,26 @@ module GHTorrent
1241
1310
  :created_at => date(retrieved['created_at']),
1242
1311
  :ext_ref_id => retrieved[@ext_uniq])
1243
1312
 
1244
- ensure_issue_events(owner, repo, issue_id) if events
1245
- ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
1246
-
1247
1313
  info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
1248
- issues.first(:issue_id => issue_id,
1249
- :repo_id => repository[:id])
1250
1314
  else
1251
1315
  info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
1252
- cur_issue
1316
+ if cur_issue[:pull_request] == false and not pull_req.nil?
1317
+ info "GHTorrent: Updating issue #{owner}/#{repo}->#{issue_id} as pull request"
1318
+ issues.filter(:issue_id => issue_id, :repo_id => repository[:id]).update(
1319
+ :pull_request => true,
1320
+ :pull_request_id => pull_req[:id])
1321
+ end
1253
1322
  end
1323
+ ensure_issue_events(owner, repo, issue_id) if events
1324
+ ensure_issue_comments(owner, repo, issue_id) if comments
1325
+ issues.first(:issue_id => issue_id,
1326
+ :repo_id => repository[:id])
1254
1327
  end
1255
1328
 
1256
1329
  ##
1257
1330
  # Retrieve and process all events for an issue
1258
1331
  def ensure_issue_events(owner, repo, issue_id)
1259
- currepo = ensure_repo(owner, repo, true, true, false)
1260
- #time = if created_at.nil? then currepo[:created_at] else Time.now() end
1332
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1261
1333
 
1262
1334
  if currepo.nil?
1263
1335
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
@@ -1361,7 +1433,7 @@ module GHTorrent
1361
1433
  ##
1362
1434
  # Retrieve and process all comments for an issue
1363
1435
  def ensure_issue_comments(owner, repo, issue_id)
1364
- currepo = ensure_repo(owner, repo, true, true, false)
1436
+ currepo = ensure_repo(owner, repo, false, false, false, false)
1365
1437
 
1366
1438
  if currepo.nil?
1367
1439
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
@@ -1390,7 +1462,7 @@ module GHTorrent
1390
1462
  ##
1391
1463
  # Retrieve and process +comment_id+ for an +issue_id+
1392
1464
  def ensure_issue_comment(owner, repo, issue_id, comment_id)
1393
- issue = ensure_issue(owner, repo, issue_id)
1465
+ issue = ensure_issue(owner, repo, issue_id, false, false)
1394
1466
 
1395
1467
  if issue.nil?
1396
1468
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1467,14 +1539,13 @@ module GHTorrent
1467
1539
  repository = ensure_repo(user, repo, false, false, false)
1468
1540
 
1469
1541
  if repository.nil?
1470
- warn "Could not store commit #{user}/#{repo} #{c['sha']}"
1471
- return
1542
+ warn "GHTorrent: repository #{user}/#{repo} deleted"
1472
1543
  end
1473
1544
 
1474
1545
  commits.insert(:sha => c['sha'],
1475
1546
  :author_id => author[:id],
1476
1547
  :committer_id => commiter[:id],
1477
- :project_id => repository[:id],
1548
+ :project_id => if repository.nil? then nil else repository[:id] end ,
1478
1549
  :created_at => date(c['commit']['author']['date']),
1479
1550
  :ext_ref_id => c[@ext_uniq]
1480
1551
  )
@@ -1513,6 +1584,14 @@ module GHTorrent
1513
1584
  def is_valid_email(email)
1514
1585
  email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
1515
1586
  end
1587
+
1588
+ def max(a, b)
1589
+ if a >= b
1590
+ a
1591
+ else
1592
+ b
1593
+ end
1594
+ end
1516
1595
  end
1517
1596
  end
1518
1597