ghtorrent 0.7.3 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -57,9 +57,8 @@ class GHTMirrorEvents < GHTorrent::Command
57
57
  end
58
58
 
59
59
  stored.each do |e|
60
- msg = JSON.dump(e)
61
60
  key = "evt.%s" % e['type']
62
- exchange.publish msg, :persistent => true, :routing_key => key
61
+ exchange.publish e['id'], :persistent => true, :routing_key => key
63
62
  end
64
63
  return new, dupl
65
64
  rescue Exception => e
@@ -97,48 +96,21 @@ class GHTMirrorEvents < GHTorrent::Command
97
96
  exchange = channel.topic(config(:amqp_exchange), :durable => true,
98
97
  :auto_delete => false)
99
98
 
100
- # Initial delay for the retrieve event loop
101
- retrieval_delay = config(:mirror_pollevery)
102
-
103
99
  # Retrieve events
104
- retriever = EventMachine.add_periodic_timer(retrieval_delay) do
100
+ EventMachine.add_periodic_timer(5) do
105
101
  (new, dupl) = retrieve exchange
106
102
  dupl_msgs += dupl
107
103
  new_msgs += new
108
104
  end
109
105
 
110
106
  # Adjust event retrieval delay time to reduce load to Github
111
- EventMachine.add_periodic_timer(120) do
107
+ EventMachine.add_periodic_timer(12) do
112
108
  ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
113
109
 
114
110
  info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
115
111
 
116
- new_delay = if ratio >= 0 and ratio < 0.3 then
117
- -1
118
- elsif ratio >= 0.3 and ratio <= 0.5 then
119
- 0
120
- elsif ratio > 0.5 and ratio < 1 then
121
- +1
122
- end
123
-
124
112
  # Reset counters for new loop
125
113
  dupl_msgs = new_msgs = 0
126
-
127
- # Update the retrieval delay and restart the event retriever
128
- if new_delay != 0
129
-
130
- # Stop the retriever task and adjust retrieval delay
131
- retriever.cancel
132
- retrieval_delay = retrieval_delay + new_delay
133
- info("Setting event retrieval delay to #{retrieval_delay} secs")
134
-
135
- # Restart the retriever
136
- retriever = EventMachine.add_periodic_timer(retrieval_delay) do
137
- (new, dupl) = retrieve exchange
138
- dupl_msgs += dupl
139
- new_msgs += new
140
- end
141
- end
142
114
  end
143
115
  end
144
116
  end
@@ -55,8 +55,7 @@ An efficient way to get all data for a single repo
55
55
 
56
56
  user = user_entry[:login]
57
57
 
58
- repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
59
- false, false)}
58
+ repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1])}
60
59
 
61
60
  if repo_entry.nil?
62
61
  Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
@@ -74,7 +73,7 @@ An efficient way to get all data for a single repo
74
73
  end
75
74
 
76
75
  functions = %w(ensure_commits ensure_forks ensure_pull_requests
77
- ensure_issues ensure_project_members ensure_watchers)
76
+ ensure_issues ensure_project_members ensure_watchers ensure_labels)
78
77
 
79
78
  if ARGV[2].nil?
80
79
  functions.each do |x|
@@ -111,9 +110,9 @@ class TransactedGHTorrent < GHTorrent::Mirror
111
110
  end
112
111
  end
113
112
 
114
- def ensure_issue(owner, repo, issue_id, events = true, comments = true)
113
+ def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
115
114
  check_transaction do
116
- super(owner, repo, issue_id, events, comments)
115
+ super(owner, repo, issue_id, events, comments, labels)
117
116
  end
118
117
  end
119
118
 
@@ -129,10 +128,15 @@ class TransactedGHTorrent < GHTorrent::Mirror
129
128
  end
130
129
  end
131
130
 
131
+ def ensure_repo_label(owner, repo, name)
132
+ check_transaction do
133
+ super(owner, repo, name)
134
+ end
135
+ end
136
+
132
137
  def check_transaction(&block)
133
138
  begin
134
139
  if @db.in_transaction?
135
- debug "Transaction already started"
136
140
  yield block
137
141
  else
138
142
  transaction do
@@ -117,9 +117,9 @@ module GHTorrent
117
117
  # [owner] The owner of the repository to which the pullreq will be applied
118
118
  # [repo] The repository to which the pullreq will be applied
119
119
  # [pullreq_id] The ID of the pull request relative to the repository
120
- def get_pull_request(owner, repo, pullreq_id, state, created_at)
120
+ def get_pull_request(owner, repo, pullreq_id, state, actor, created_at)
121
121
  transaction do
122
- ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
122
+ ensure_pull_request(owner, repo, pullreq_id, true, true, true, state, actor, created_at)
123
123
  end
124
124
  end
125
125
 
@@ -179,7 +179,7 @@ module GHTorrent
179
179
  # Make sure a commit exists
180
180
  #
181
181
  def ensure_commit(repo, sha, user, comments = true)
182
- ensure_repo(user, repo, false, false, false, false)
182
+ ensure_repo(user, repo)
183
183
  c = retrieve_commit(repo, sha, user)
184
184
 
185
185
  if c.nil?
@@ -275,7 +275,7 @@ module GHTorrent
275
275
  # [repo] The repo receiving the commit
276
276
  # [sha] The commit SHA
277
277
  def ensure_repo_commit(user, repo, sha)
278
- project = ensure_repo(user, repo, false, false, false, false)
278
+ project = ensure_repo(user, repo)
279
279
 
280
280
  if project.nil?
281
281
  warn "GHTorrent: Repo #{user}/#{repo} does not exist"
@@ -597,8 +597,8 @@ module GHTorrent
597
597
  # == Returns:
598
598
  # If the repo can be retrieved, it is returned as a Hash. Otherwise,
599
599
  # the result is nil
600
- def ensure_repo(user, repo, commits = true, project_members = true,
601
- watchers = true, forks = true)
600
+ def ensure_repo(user, repo, commits = false, project_members = false,
601
+ watchers = false, forks = false, labels = false)
602
602
 
603
603
  repos = @db[:projects]
604
604
  curuser = ensure_user(user, false, false)
@@ -630,7 +630,7 @@ module GHTorrent
630
630
  parent_owner = r['parent']['owner']['login']
631
631
  parent_repo = r['parent']['name']
632
632
 
633
- parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
633
+ parent = ensure_repo(parent_owner, parent_repo)
634
634
 
635
635
  repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
636
636
 
@@ -642,6 +642,7 @@ module GHTorrent
642
642
  ensure_project_members(user, repo) if project_members
643
643
  ensure_watchers(user, repo) if watchers
644
644
  ensure_forks(user, repo) if forks
645
+ ensure_labels(user, repo) if labels
645
646
  repos.first(:owner_id => curuser[:id], :name => repo)
646
647
  else
647
648
  debug "GHTorrent: Repo #{user}/#{repo} exists"
@@ -652,7 +653,7 @@ module GHTorrent
652
653
  ##
653
654
  # Make sure that a project has all the registered members defined
654
655
  def ensure_project_members(user, repo, refresh = false)
655
- currepo = ensure_repo(user, repo, false, false, false, false)
656
+ currepo = ensure_repo(user, repo)
656
657
  time = currepo[:created_at]
657
658
 
658
659
  project_members = @db.from(:project_members, :users).\
@@ -672,7 +673,7 @@ module GHTorrent
672
673
  # Make sure that a project member exists in a project
673
674
  def ensure_project_member(owner, repo, new_member, date_added)
674
675
  pr_members = @db[:project_members]
675
- project = ensure_repo(owner, repo, false, false, false, false)
676
+ project = ensure_repo(owner, repo)
676
677
  new_user = ensure_user(new_member, false, false)
677
678
 
678
679
  if project.nil? or new_user.nil?
@@ -839,7 +840,7 @@ module GHTorrent
839
840
  ##
840
841
  # Make sure that all watchers exist for a repository
841
842
  def ensure_watchers(owner, repo, refresh = false)
842
- currepo = ensure_repo(owner, repo, false, false, false, false)
843
+ currepo = ensure_repo(owner, repo)
843
844
 
844
845
  if currepo.nil?
845
846
  warn "Could not retrieve watchers for #{owner}/#{repo}"
@@ -864,7 +865,7 @@ module GHTorrent
864
865
  ##
865
866
  # Make sure that a watcher/stargazer exists for a repository
866
867
  def ensure_watcher(owner, repo, watcher, date_added = nil)
867
- project = ensure_repo(owner, repo, false, false, false, false)
868
+ project = ensure_repo(owner, repo)
868
869
  new_watcher = ensure_user(watcher, false, false)
869
870
 
870
871
  if new_watcher.nil? or project.nil?
@@ -914,7 +915,7 @@ module GHTorrent
914
915
  ##
915
916
  # Process all pull requests
916
917
  def ensure_pull_requests(owner, repo, refresh = false)
917
- currepo = ensure_repo(owner, repo, false, false, false, false)
918
+ currepo = ensure_repo(owner, repo)
918
919
  if currepo.nil?
919
920
  warn "Could not retrieve pull requests from #{owner}/#{repo}"
920
921
  return
@@ -939,26 +940,32 @@ module GHTorrent
939
940
  ##
940
941
  # Process a pull request
941
942
  def ensure_pull_request(owner, repo, pullreq_id,
942
- comments = true, commits = true,
943
- state = nil, created_at = nil)
943
+ comments = true, commits = true, history = true,
944
+ state = nil, actor = nil, created_at = nil)
944
945
  pulls_reqs = @db[:pull_requests]
945
946
 
946
- project = ensure_repo(owner, repo, false, false, false, false)
947
+ project = ensure_repo(owner, repo)
947
948
 
948
949
  if project.nil?
949
950
  return
950
951
  end
951
952
 
952
953
  # Adds a pull request history event
953
- def add_history(id, ts, unq, act)
954
+ def add_history(id, ts, unq, act, actor)
955
+ user = ensure_user(actor, false, false)
954
956
  pull_req_history = @db[:pull_request_history]
955
957
  entry = pull_req_history.first(:pull_request_id => id,
956
- :created_at => ts, :action => act)
958
+ :created_at => (ts - 4)..(ts + 4),
959
+ :action => act)
957
960
  if entry.nil?
958
- pull_req_history.insert(:pull_request_id => id, :created_at => ts,
959
- :ext_ref_id => unq, :action => act)
960
- info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
961
+ pull_req_history.insert(:pull_request_id => id,
962
+ :created_at => ts,
963
+ :ext_ref_id => unq,
964
+ :action => act,
965
+ :actor_id => unless user.nil? then user[:id] end)
966
+ info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
961
967
  else
968
+ entry.update(:actor_id => user[:id])
962
969
  info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
963
970
  end
964
971
  end
@@ -1004,8 +1011,7 @@ module GHTorrent
1004
1011
  end
1005
1012
 
1006
1013
  base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
1007
- retrieved['base']['repo']['name'],
1008
- false, false, false, false)
1014
+ retrieved['base']['repo']['name'])
1009
1015
 
1010
1016
  base_commit = ensure_commit(retrieved['base']['repo']['name'],
1011
1017
  retrieved['base']['sha'],
@@ -1020,8 +1026,7 @@ module GHTorrent
1020
1026
  else
1021
1027
  head_repo = if has_head_repo(retrieved)
1022
1028
  ensure_repo(retrieved['head']['repo']['owner']['login'],
1023
- retrieved['head']['repo']['name'],
1024
- false, false, false, false)
1029
+ retrieved['head']['repo']['name'])
1025
1030
  end
1026
1031
 
1027
1032
  head_commit = if not head_repo.nil?
@@ -1033,12 +1038,7 @@ module GHTorrent
1033
1038
 
1034
1039
  pull_req_user = ensure_user(retrieved['user']['login'], false, false)
1035
1040
 
1036
- merged = if retrieved['merged_at'].nil? then
1037
- # Check if the pr's commits are in the repository
1038
- false
1039
- else
1040
- true
1041
- end
1041
+ merged = if retrieved['merged_at'].nil? then false else true end
1042
1042
  closed = if retrieved['closed_at'].nil? then false else true end
1043
1043
 
1044
1044
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
@@ -1054,40 +1054,65 @@ module GHTorrent
1054
1054
  :intra_branch => is_intra_branch(retrieved),
1055
1055
  :merged => merged
1056
1056
  )
1057
-
1058
- info log_msg(retrieved) + " was added"
1057
+ info log_msg(retrieved) + ' was added'
1059
1058
  else
1060
- debug log_msg(retrieved) + " exists"
1059
+ debug log_msg(retrieved) + ' exists'
1061
1060
  end
1062
1061
 
1063
1062
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
1064
1063
  :pullreq_id => pullreq_id)
1065
1064
 
1066
- add_history(pull_req[:id], date(retrieved['created_at']),
1067
- retrieved[@ext_uniq], 'opened')
1068
- add_history(pull_req[:id], date(retrieved['merged_at']),
1069
- retrieved[@ext_uniq], 'merged') if merged
1070
- add_history(pull_req[:id], date(retrieved['closed_at']),
1071
- retrieved[@ext_uniq], 'closed') if closed
1072
- add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
1073
- state) unless state.nil?
1065
+ # Add a fake (or not so fake) issue in the issues table to serve
1066
+ # as root for retrieving discussion comments for this pull request
1067
+ issues = @db[:issues]
1068
+ issue = issues.first(:pull_request_id => pull_req[:id])
1074
1069
 
1070
+ if issue.nil?
1071
+ issues.insert(:repo_id => base_repo[:id],
1072
+ :assignee_id => nil,
1073
+ :reporter_id => nil,
1074
+ :issue_id => pullreq_id,
1075
+ :pull_request => true,
1076
+ :pull_request_id => pull_req[:id],
1077
+ :created_at => date(retrieved['created_at']),
1078
+ :ext_ref_id => retrieved[@ext_uniq])
1079
+ debug 'Adding accompanying issue for ' + log_msg(retrieved)
1080
+ else
1081
+ debug 'Accompanying issue exists for ' + log_msg(retrieved)
1082
+ end
1083
+
1084
+ if history
1085
+ # Actions on pull requests
1086
+ actor = if actor.nil? then pull_req_user[:login] else actor end
1087
+ opener = pull_req_user[:login]
1088
+ add_history(pull_req[:id], date(retrieved['created_at']),
1089
+ retrieved[@ext_uniq], 'opened', opener)
1090
+ # There is an additional merged_by field for merged pull requests
1091
+ merger = if retrieved['merged_by'].nil? then actor else retrieved['merged_by']['login'] end
1092
+ add_history(pull_req[:id], date(retrieved['merged_at']),
1093
+ retrieved[@ext_uniq], 'merged', merger) if (merged && state != 'merged')
1094
+ closer = if merged then merger else actor end
1095
+ add_history(pull_req[:id], date(retrieved['closed_at']),
1096
+ retrieved[@ext_uniq], 'closed', closer) if (closed && state != 'closed')
1097
+ add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
1098
+ state, actor) unless state.nil?
1099
+ end
1075
1100
  ensure_pull_request_commits(owner, repo, pullreq_id) if commits
1076
1101
  ensure_pullreq_comments(owner, repo, pullreq_id) if comments
1102
+ ensure_issue_comments(owner, repo, pullreq_id, pull_req[:id]) if comments
1077
1103
 
1078
- pulls_reqs.first(:base_repo_id => project[:id],
1079
- :pullreq_id => pullreq_id)
1104
+ pull_req
1080
1105
  end
1081
1106
 
1082
1107
  def ensure_pullreq_comments(owner, repo, pullreq_id)
1083
- currepo = ensure_repo(owner, repo, false, false, false, false)
1108
+ currepo = ensure_repo(owner, repo)
1084
1109
 
1085
1110
  if currepo.nil?
1086
1111
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1087
1112
  return
1088
1113
  end
1089
1114
 
1090
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1115
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1091
1116
 
1092
1117
  if pull_req.nil?
1093
1118
  warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1096,7 +1121,7 @@ module GHTorrent
1096
1121
 
1097
1122
  retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
1098
1123
 
1099
- if @db[:pull_request_comments].first(:pullreq_id => pull_req[:id],
1124
+ if @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
1100
1125
  :comment_id => x['id']).nil?
1101
1126
  acc << x
1102
1127
  else
@@ -1112,7 +1137,7 @@ module GHTorrent
1112
1137
  # is done on retrieving a pull request. This has the side effect that
1113
1138
  # commits might not be retrieved if a pullreqcomment event gets processed
1114
1139
  # before the pullreq event, until the pullreq event has been processed
1115
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1140
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1116
1141
 
1117
1142
  if pull_req.nil?
1118
1143
  warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1159,7 +1184,7 @@ module GHTorrent
1159
1184
  end
1160
1185
 
1161
1186
  def ensure_pull_request_commits(owner, repo, pullreq_id)
1162
- pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1187
+ pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1163
1188
 
1164
1189
  if pullreq.nil?
1165
1190
  warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
@@ -1195,7 +1220,7 @@ module GHTorrent
1195
1220
  # [owner] The user to which the project belongs
1196
1221
  # [repo] The repository/project to find forks for
1197
1222
  def ensure_forks(owner, repo, refresh = false)
1198
- currepo = ensure_repo(owner, repo, false, false, false, false)
1223
+ currepo = ensure_repo(owner, repo)
1199
1224
 
1200
1225
  if currepo.nil?
1201
1226
  warn "Could not retrieve forks for #{owner}/#{repo}"
@@ -1232,7 +1257,7 @@ module GHTorrent
1232
1257
  fork_owner = fork['full_name'].split(/\//)[0]
1233
1258
  fork_name = fork['full_name'].split(/\//)[1]
1234
1259
 
1235
- r = ensure_repo(fork_owner, fork_name, false, false, false, false)
1260
+ r = ensure_repo(fork_owner, fork_name)
1236
1261
 
1237
1262
  if r.nil?
1238
1263
  warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
@@ -1244,7 +1269,7 @@ module GHTorrent
1244
1269
  ##
1245
1270
  # Make sure all issues exist for a project
1246
1271
  def ensure_issues(owner, repo, refresh = false)
1247
- currepo = ensure_repo(owner, repo, false, false, false, false)
1272
+ currepo = ensure_repo(owner, repo)
1248
1273
  if currepo.nil?
1249
1274
  warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
1250
1275
  return
@@ -1268,10 +1293,11 @@ module GHTorrent
1268
1293
 
1269
1294
  ##
1270
1295
  # Make sure that the issue exists
1271
- def ensure_issue(owner, repo, issue_id, events = true, comments = true)
1296
+ def ensure_issue(owner, repo, issue_id, events = true,
1297
+ comments = true, labels = true)
1272
1298
 
1273
1299
  issues = @db[:issues]
1274
- repository = ensure_repo(owner, repo, false, false, false, false)
1300
+ repository = ensure_repo(owner, repo)
1275
1301
 
1276
1302
  if repo.nil?
1277
1303
  warn "Cannot find repo #{owner}/#{repo}"
@@ -1323,6 +1349,7 @@ module GHTorrent
1323
1349
  end
1324
1350
  ensure_issue_events(owner, repo, issue_id) if events
1325
1351
  ensure_issue_comments(owner, repo, issue_id) if comments
1352
+ ensure_issue_labels(owner, repo, issue_id) if labels
1326
1353
  issues.first(:issue_id => issue_id,
1327
1354
  :repo_id => repository[:id])
1328
1355
  end
@@ -1330,14 +1357,14 @@ module GHTorrent
1330
1357
  ##
1331
1358
  # Retrieve and process all events for an issue
1332
1359
  def ensure_issue_events(owner, repo, issue_id)
1333
- currepo = ensure_repo(owner, repo, false, false, false, false)
1360
+ currepo = ensure_repo(owner, repo)
1334
1361
 
1335
1362
  if currepo.nil?
1336
1363
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1337
1364
  return
1338
1365
  end
1339
1366
 
1340
- issue = ensure_issue(owner, repo, issue_id, false, false)
1367
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1341
1368
  if issue.nil?
1342
1369
  warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
1343
1370
  return
@@ -1359,7 +1386,7 @@ module GHTorrent
1359
1386
  ##
1360
1387
  # Retrieve and process +event_id+ for an +issue_id+
1361
1388
  def ensure_issue_event(owner, repo, issue_id, event_id)
1362
- issue = ensure_issue(owner, repo, issue_id, false, false)
1389
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1363
1390
 
1364
1391
  if issue.nil?
1365
1392
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1394,7 +1421,7 @@ module GHTorrent
1394
1421
  if retrieved['event'] == "assigned"
1395
1422
 
1396
1423
  def update_assignee(owner, repo, issue, actor)
1397
- @db[:issues][:id => issue[:id]] = {:assignee_id => actor[:id]}
1424
+ @db[:issues].first(:id => issue[:id]).update(:assignee_id => actor[:id])
1398
1425
  info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
1399
1426
  end
1400
1427
 
@@ -1432,16 +1459,24 @@ module GHTorrent
1432
1459
  end
1433
1460
 
1434
1461
  ##
1435
- # Retrieve and process all comments for an issue
1436
- def ensure_issue_comments(owner, repo, issue_id)
1437
- currepo = ensure_repo(owner, repo, false, false, false, false)
1462
+ # Retrieve and process all comments for an issue.
1463
+ # If pull_req_id is not nil this means that we are only retrieving
1464
+ # comments for the pull request discussion for projects that don't have
1465
+ # issues enabled
1466
+ def ensure_issue_comments(owner, repo, issue_id, pull_req_id = nil)
1467
+ currepo = ensure_repo(owner, repo)
1438
1468
 
1439
1469
  if currepo.nil?
1440
1470
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1441
1471
  return
1442
1472
  end
1443
1473
 
1444
- issue = ensure_issue(owner, repo, issue_id, false, false)
1474
+ issue = if pull_req_id.nil?
1475
+ ensure_issue(owner, repo, issue_id, false, false, false)
1476
+ else
1477
+ @db[:issues].first(:pull_request_id => pull_req_id)
1478
+ end
1479
+
1445
1480
  if issue.nil?
1446
1481
  warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
1447
1482
  return
@@ -1456,14 +1491,19 @@ module GHTorrent
1456
1491
  acc
1457
1492
  end
1458
1493
  end.map { |x|
1459
- ensure_issue_comment(owner, repo, issue_id, x['id'])
1494
+ ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
1460
1495
  }
1461
1496
  end
1462
1497
 
1463
1498
  ##
1464
1499
  # Retrieve and process +comment_id+ for an +issue_id+
1465
- def ensure_issue_comment(owner, repo, issue_id, comment_id)
1466
- issue = ensure_issue(owner, repo, issue_id, false, false)
1500
+ def ensure_issue_comment(owner, repo, issue_id, comment_id,
1501
+ pull_req_id = nil)
1502
+ issue = if pull_req_id.nil?
1503
+ ensure_issue(owner, repo, issue_id, false, false, false)
1504
+ else
1505
+ @db[:issues].first(:pull_request_id => pull_req_id)
1506
+ end
1467
1507
 
1468
1508
  if issue.nil?
1469
1509
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1502,6 +1542,117 @@ module GHTorrent
1502
1542
  end
1503
1543
  end
1504
1544
 
1545
+ ##
1546
+ # Retrieve repository issue labels
1547
+ def ensure_labels(owner, repo, refresh = false)
1548
+ currepo = ensure_repo(owner, repo)
1549
+
1550
+ repo_labels = @db[:repo_labels].filter(:repo_id => currepo[:id]).all
1551
+
1552
+ retrieve_repo_labels(owner, repo, refresh).reduce([]) do |acc, x|
1553
+ if repo_labels.find {|y| y[:name] == x['name']}.nil?
1554
+ acc << x
1555
+ else
1556
+ acc
1557
+ end
1558
+ end.map { |x| ensure_repo_label(owner, repo, x['name']) }
1559
+ end
1560
+
1561
+ ##
1562
+ # Retrieve a single repository issue label by name
1563
+ def ensure_repo_label(owner, repo, name)
1564
+ currepo = ensure_repo(owner, repo)
1565
+
1566
+ if currepo.nil?
1567
+ warn "GHTorrent: Repo #{owner}/#{repo} does not exist"
1568
+ return
1569
+ end
1570
+
1571
+ label = @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
1572
+
1573
+ if label.nil?
1574
+ retrieved = retrieve_repo_label(owner, repo, name)
1575
+
1576
+ if retrieved.nil?
1577
+ warn "GHTorrent: Repo label #{owner}/#{repo} -> #{name} does not exist"
1578
+ return
1579
+ end
1580
+
1581
+ @db[:repo_labels].insert(
1582
+ :repo_id => currepo[:id],
1583
+ :name => name,
1584
+ :ext_ref_id => retrieved[@ext_uniq]
1585
+ )
1586
+
1587
+ info "GHTorrent: Added repo label #{owner}/#{repo} -> #{name}"
1588
+ @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
1589
+ else
1590
+ label
1591
+ end
1592
+ end
1593
+
1594
+ ##
1595
+ # Ensure that all labels have been assigned to the issue
1596
+ def ensure_issue_labels(owner, repo, issue_id)
1597
+
1598
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1599
+
1600
+ if issue.nil?
1601
+ warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
1602
+ return
1603
+ end
1604
+
1605
+ issue_labels = @db.from(:issue_labels, :repo_labels)\
1606
+ .where(:issue_labels__label_id => :repo_labels__id)\
1607
+ .where(:issue_labels__issue_id => issue[:id])\
1608
+ .select(:repo_labels__name).all
1609
+
1610
+ retrieve_issue_labels(owner, repo, issue_id).reduce([]) do |acc, x|
1611
+ if issue_labels.find {|y| y[:name] == x['name']}.nil?
1612
+ acc << x
1613
+ else
1614
+ acc
1615
+ end
1616
+ end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
1617
+
1618
+ end
1619
+
1620
+ ##
1621
+ # Ensure that a specific label has been assigned to the issue
1622
+ def ensure_issue_label(owner, repo, issue_id, name)
1623
+
1624
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1625
+
1626
+ if issue.nil?
1627
+ warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
1628
+ return
1629
+ end
1630
+
1631
+ label = ensure_repo_label(owner, repo, name)
1632
+
1633
+ if label.nil?
1634
+ warn "GHTorrent: Label #{owner}/#{repo} -> #{name} does not exist"
1635
+ return
1636
+ end
1637
+
1638
+ issue_lbl = @db[:issue_labels].first(:label_id => label[:id],
1639
+ :issue_id => issue[:id])
1640
+
1641
+ if issue_lbl.nil?
1642
+
1643
+ @db[:issue_labels].insert(
1644
+ :label_id => label[:id],
1645
+ :issue_id => issue[:id],
1646
+ )
1647
+ info "GHTorrent: Added issue label #{name} to issue #{owner}/#{repo} -> #{issue_id}"
1648
+ @db[:issue_labels].first(:label_id => label[:id],
1649
+ :issue_id => issue[:id])
1650
+ else
1651
+ issue_lbl
1652
+ end
1653
+
1654
+ end
1655
+
1505
1656
  # Run a block in a DB transaction. Exceptions trigger transaction rollback
1506
1657
  # and are rethrown.
1507
1658
  def transaction(&block)
@@ -1537,7 +1688,7 @@ module GHTorrent
1537
1688
  author = commit_user(c['author'], c['commit']['author'])
1538
1689
  commiter = commit_user(c['committer'], c['commit']['committer'])
1539
1690
 
1540
- repository = ensure_repo(user, repo, false, false, false)
1691
+ repository = ensure_repo(user, repo)
1541
1692
 
1542
1693
  if repository.nil?
1543
1694
  warn "GHTorrent: repository #{user}/#{repo} deleted"