ghtorrent 0.7.3 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,9 +57,8 @@ class GHTMirrorEvents < GHTorrent::Command
57
57
  end
58
58
 
59
59
  stored.each do |e|
60
- msg = JSON.dump(e)
61
60
  key = "evt.%s" % e['type']
62
- exchange.publish msg, :persistent => true, :routing_key => key
61
+ exchange.publish e['id'], :persistent => true, :routing_key => key
63
62
  end
64
63
  return new, dupl
65
64
  rescue Exception => e
@@ -97,48 +96,21 @@ class GHTMirrorEvents < GHTorrent::Command
97
96
  exchange = channel.topic(config(:amqp_exchange), :durable => true,
98
97
  :auto_delete => false)
99
98
 
100
- # Initial delay for the retrieve event loop
101
- retrieval_delay = config(:mirror_pollevery)
102
-
103
99
  # Retrieve events
104
- retriever = EventMachine.add_periodic_timer(retrieval_delay) do
100
+ EventMachine.add_periodic_timer(5) do
105
101
  (new, dupl) = retrieve exchange
106
102
  dupl_msgs += dupl
107
103
  new_msgs += new
108
104
  end
109
105
 
110
106
  # Adjust event retrieval delay time to reduce load to Github
111
- EventMachine.add_periodic_timer(120) do
107
+ EventMachine.add_periodic_timer(12) do
112
108
  ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
113
109
 
114
110
  info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
115
111
 
116
- new_delay = if ratio >= 0 and ratio < 0.3 then
117
- -1
118
- elsif ratio >= 0.3 and ratio <= 0.5 then
119
- 0
120
- elsif ratio > 0.5 and ratio < 1 then
121
- +1
122
- end
123
-
124
112
  # Reset counters for new loop
125
113
  dupl_msgs = new_msgs = 0
126
-
127
- # Update the retrieval delay and restart the event retriever
128
- if new_delay != 0
129
-
130
- # Stop the retriever task and adjust retrieval delay
131
- retriever.cancel
132
- retrieval_delay = retrieval_delay + new_delay
133
- info("Setting event retrieval delay to #{retrieval_delay} secs")
134
-
135
- # Restart the retriever
136
- retriever = EventMachine.add_periodic_timer(retrieval_delay) do
137
- (new, dupl) = retrieve exchange
138
- dupl_msgs += dupl
139
- new_msgs += new
140
- end
141
- end
142
114
  end
143
115
  end
144
116
  end
@@ -55,8 +55,7 @@ An efficient way to get all data for a single repo
55
55
 
56
56
  user = user_entry[:login]
57
57
 
58
- repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false,
59
- false, false)}
58
+ repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1])}
60
59
 
61
60
  if repo_entry.nil?
62
61
  Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
@@ -74,7 +73,7 @@ An efficient way to get all data for a single repo
74
73
  end
75
74
 
76
75
  functions = %w(ensure_commits ensure_forks ensure_pull_requests
77
- ensure_issues ensure_project_members ensure_watchers)
76
+ ensure_issues ensure_project_members ensure_watchers ensure_labels)
78
77
 
79
78
  if ARGV[2].nil?
80
79
  functions.each do |x|
@@ -111,9 +110,9 @@ class TransactedGHTorrent < GHTorrent::Mirror
111
110
  end
112
111
  end
113
112
 
114
- def ensure_issue(owner, repo, issue_id, events = true, comments = true)
113
+ def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
115
114
  check_transaction do
116
- super(owner, repo, issue_id, events, comments)
115
+ super(owner, repo, issue_id, events, comments, labels)
117
116
  end
118
117
  end
119
118
 
@@ -129,10 +128,15 @@ class TransactedGHTorrent < GHTorrent::Mirror
129
128
  end
130
129
  end
131
130
 
131
+ def ensure_repo_label(owner, repo, name)
132
+ check_transaction do
133
+ super(owner, repo, name)
134
+ end
135
+ end
136
+
132
137
  def check_transaction(&block)
133
138
  begin
134
139
  if @db.in_transaction?
135
- debug "Transaction already started"
136
140
  yield block
137
141
  else
138
142
  transaction do
@@ -117,9 +117,9 @@ module GHTorrent
117
117
  # [owner] The owner of the repository to which the pullreq will be applied
118
118
  # [repo] The repository to which the pullreq will be applied
119
119
  # [pullreq_id] The ID of the pull request relative to the repository
120
- def get_pull_request(owner, repo, pullreq_id, state, created_at)
120
+ def get_pull_request(owner, repo, pullreq_id, state, actor, created_at)
121
121
  transaction do
122
- ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
122
+ ensure_pull_request(owner, repo, pullreq_id, true, true, true, state, actor, created_at)
123
123
  end
124
124
  end
125
125
 
@@ -179,7 +179,7 @@ module GHTorrent
179
179
  # Make sure a commit exists
180
180
  #
181
181
  def ensure_commit(repo, sha, user, comments = true)
182
- ensure_repo(user, repo, false, false, false, false)
182
+ ensure_repo(user, repo)
183
183
  c = retrieve_commit(repo, sha, user)
184
184
 
185
185
  if c.nil?
@@ -275,7 +275,7 @@ module GHTorrent
275
275
  # [repo] The repo receiving the commit
276
276
  # [sha] The commit SHA
277
277
  def ensure_repo_commit(user, repo, sha)
278
- project = ensure_repo(user, repo, false, false, false, false)
278
+ project = ensure_repo(user, repo)
279
279
 
280
280
  if project.nil?
281
281
  warn "GHTorrent: Repo #{user}/#{repo} does not exist"
@@ -597,8 +597,8 @@ module GHTorrent
597
597
  # == Returns:
598
598
  # If the repo can be retrieved, it is returned as a Hash. Otherwise,
599
599
  # the result is nil
600
- def ensure_repo(user, repo, commits = true, project_members = true,
601
- watchers = true, forks = true)
600
+ def ensure_repo(user, repo, commits = false, project_members = false,
601
+ watchers = false, forks = false, labels = false)
602
602
 
603
603
  repos = @db[:projects]
604
604
  curuser = ensure_user(user, false, false)
@@ -630,7 +630,7 @@ module GHTorrent
630
630
  parent_owner = r['parent']['owner']['login']
631
631
  parent_repo = r['parent']['name']
632
632
 
633
- parent = ensure_repo(parent_owner, parent_repo, false, false, false, false)
633
+ parent = ensure_repo(parent_owner, parent_repo)
634
634
 
635
635
  repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
636
636
 
@@ -642,6 +642,7 @@ module GHTorrent
642
642
  ensure_project_members(user, repo) if project_members
643
643
  ensure_watchers(user, repo) if watchers
644
644
  ensure_forks(user, repo) if forks
645
+ ensure_labels(user, repo) if labels
645
646
  repos.first(:owner_id => curuser[:id], :name => repo)
646
647
  else
647
648
  debug "GHTorrent: Repo #{user}/#{repo} exists"
@@ -652,7 +653,7 @@ module GHTorrent
652
653
  ##
653
654
  # Make sure that a project has all the registered members defined
654
655
  def ensure_project_members(user, repo, refresh = false)
655
- currepo = ensure_repo(user, repo, false, false, false, false)
656
+ currepo = ensure_repo(user, repo)
656
657
  time = currepo[:created_at]
657
658
 
658
659
  project_members = @db.from(:project_members, :users).\
@@ -672,7 +673,7 @@ module GHTorrent
672
673
  # Make sure that a project member exists in a project
673
674
  def ensure_project_member(owner, repo, new_member, date_added)
674
675
  pr_members = @db[:project_members]
675
- project = ensure_repo(owner, repo, false, false, false, false)
676
+ project = ensure_repo(owner, repo)
676
677
  new_user = ensure_user(new_member, false, false)
677
678
 
678
679
  if project.nil? or new_user.nil?
@@ -839,7 +840,7 @@ module GHTorrent
839
840
  ##
840
841
  # Make sure that all watchers exist for a repository
841
842
  def ensure_watchers(owner, repo, refresh = false)
842
- currepo = ensure_repo(owner, repo, false, false, false, false)
843
+ currepo = ensure_repo(owner, repo)
843
844
 
844
845
  if currepo.nil?
845
846
  warn "Could not retrieve watchers for #{owner}/#{repo}"
@@ -864,7 +865,7 @@ module GHTorrent
864
865
  ##
865
866
  # Make sure that a watcher/stargazer exists for a repository
866
867
  def ensure_watcher(owner, repo, watcher, date_added = nil)
867
- project = ensure_repo(owner, repo, false, false, false, false)
868
+ project = ensure_repo(owner, repo)
868
869
  new_watcher = ensure_user(watcher, false, false)
869
870
 
870
871
  if new_watcher.nil? or project.nil?
@@ -914,7 +915,7 @@ module GHTorrent
914
915
  ##
915
916
  # Process all pull requests
916
917
  def ensure_pull_requests(owner, repo, refresh = false)
917
- currepo = ensure_repo(owner, repo, false, false, false, false)
918
+ currepo = ensure_repo(owner, repo)
918
919
  if currepo.nil?
919
920
  warn "Could not retrieve pull requests from #{owner}/#{repo}"
920
921
  return
@@ -939,26 +940,32 @@ module GHTorrent
939
940
  ##
940
941
  # Process a pull request
941
942
  def ensure_pull_request(owner, repo, pullreq_id,
942
- comments = true, commits = true,
943
- state = nil, created_at = nil)
943
+ comments = true, commits = true, history = true,
944
+ state = nil, actor = nil, created_at = nil)
944
945
  pulls_reqs = @db[:pull_requests]
945
946
 
946
- project = ensure_repo(owner, repo, false, false, false, false)
947
+ project = ensure_repo(owner, repo)
947
948
 
948
949
  if project.nil?
949
950
  return
950
951
  end
951
952
 
952
953
  # Adds a pull request history event
953
- def add_history(id, ts, unq, act)
954
+ def add_history(id, ts, unq, act, actor)
955
+ user = ensure_user(actor, false, false)
954
956
  pull_req_history = @db[:pull_request_history]
955
957
  entry = pull_req_history.first(:pull_request_id => id,
956
- :created_at => ts, :action => act)
958
+ :created_at => (ts - 4)..(ts + 4),
959
+ :action => act)
957
960
  if entry.nil?
958
- pull_req_history.insert(:pull_request_id => id, :created_at => ts,
959
- :ext_ref_id => unq, :action => act)
960
- info "GHTorrent: New pull request (#{id}) history entry (#{act}) timestamp #{ts}"
961
+ pull_req_history.insert(:pull_request_id => id,
962
+ :created_at => ts,
963
+ :ext_ref_id => unq,
964
+ :action => act,
965
+ :actor_id => unless user.nil? then user[:id] end)
966
+ info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
961
967
  else
968
+ entry.update(:actor_id => user[:id])
962
969
  info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
963
970
  end
964
971
  end
@@ -1004,8 +1011,7 @@ module GHTorrent
1004
1011
  end
1005
1012
 
1006
1013
  base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
1007
- retrieved['base']['repo']['name'],
1008
- false, false, false, false)
1014
+ retrieved['base']['repo']['name'])
1009
1015
 
1010
1016
  base_commit = ensure_commit(retrieved['base']['repo']['name'],
1011
1017
  retrieved['base']['sha'],
@@ -1020,8 +1026,7 @@ module GHTorrent
1020
1026
  else
1021
1027
  head_repo = if has_head_repo(retrieved)
1022
1028
  ensure_repo(retrieved['head']['repo']['owner']['login'],
1023
- retrieved['head']['repo']['name'],
1024
- false, false, false, false)
1029
+ retrieved['head']['repo']['name'])
1025
1030
  end
1026
1031
 
1027
1032
  head_commit = if not head_repo.nil?
@@ -1033,12 +1038,7 @@ module GHTorrent
1033
1038
 
1034
1039
  pull_req_user = ensure_user(retrieved['user']['login'], false, false)
1035
1040
 
1036
- merged = if retrieved['merged_at'].nil? then
1037
- # Check if the pr's commits are in the repository
1038
- false
1039
- else
1040
- true
1041
- end
1041
+ merged = if retrieved['merged_at'].nil? then false else true end
1042
1042
  closed = if retrieved['closed_at'].nil? then false else true end
1043
1043
 
1044
1044
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
@@ -1054,40 +1054,65 @@ module GHTorrent
1054
1054
  :intra_branch => is_intra_branch(retrieved),
1055
1055
  :merged => merged
1056
1056
  )
1057
-
1058
- info log_msg(retrieved) + " was added"
1057
+ info log_msg(retrieved) + ' was added'
1059
1058
  else
1060
- debug log_msg(retrieved) + " exists"
1059
+ debug log_msg(retrieved) + ' exists'
1061
1060
  end
1062
1061
 
1063
1062
  pull_req = pulls_reqs.first(:base_repo_id => project[:id],
1064
1063
  :pullreq_id => pullreq_id)
1065
1064
 
1066
- add_history(pull_req[:id], date(retrieved['created_at']),
1067
- retrieved[@ext_uniq], 'opened')
1068
- add_history(pull_req[:id], date(retrieved['merged_at']),
1069
- retrieved[@ext_uniq], 'merged') if merged
1070
- add_history(pull_req[:id], date(retrieved['closed_at']),
1071
- retrieved[@ext_uniq], 'closed') if closed
1072
- add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
1073
- state) unless state.nil?
1065
+ # Add a fake (or not so fake) issue in the issues table to serve
1066
+ # as root for retrieving discussion comments for this pull request
1067
+ issues = @db[:issues]
1068
+ issue = issues.first(:pull_request_id => pull_req[:id])
1074
1069
 
1070
+ if issue.nil?
1071
+ issues.insert(:repo_id => base_repo[:id],
1072
+ :assignee_id => nil,
1073
+ :reporter_id => nil,
1074
+ :issue_id => pullreq_id,
1075
+ :pull_request => true,
1076
+ :pull_request_id => pull_req[:id],
1077
+ :created_at => date(retrieved['created_at']),
1078
+ :ext_ref_id => retrieved[@ext_uniq])
1079
+ debug 'Adding accompanying issue for ' + log_msg(retrieved)
1080
+ else
1081
+ debug 'Accompanying issue exists for ' + log_msg(retrieved)
1082
+ end
1083
+
1084
+ if history
1085
+ # Actions on pull requests
1086
+ actor = if actor.nil? then pull_req_user[:login] else actor end
1087
+ opener = pull_req_user[:login]
1088
+ add_history(pull_req[:id], date(retrieved['created_at']),
1089
+ retrieved[@ext_uniq], 'opened', opener)
1090
+ # There is an additional merged_by field for merged pull requests
1091
+ merger = if retrieved['merged_by'].nil? then actor else retrieved['merged_by']['login'] end
1092
+ add_history(pull_req[:id], date(retrieved['merged_at']),
1093
+ retrieved[@ext_uniq], 'merged', merger) if (merged && state != 'merged')
1094
+ closer = if merged then merger else actor end
1095
+ add_history(pull_req[:id], date(retrieved['closed_at']),
1096
+ retrieved[@ext_uniq], 'closed', closer) if (closed && state != 'closed')
1097
+ add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
1098
+ state, actor) unless state.nil?
1099
+ end
1075
1100
  ensure_pull_request_commits(owner, repo, pullreq_id) if commits
1076
1101
  ensure_pullreq_comments(owner, repo, pullreq_id) if comments
1102
+ ensure_issue_comments(owner, repo, pullreq_id, pull_req[:id]) if comments
1077
1103
 
1078
- pulls_reqs.first(:base_repo_id => project[:id],
1079
- :pullreq_id => pullreq_id)
1104
+ pull_req
1080
1105
  end
1081
1106
 
1082
1107
  def ensure_pullreq_comments(owner, repo, pullreq_id)
1083
- currepo = ensure_repo(owner, repo, false, false, false, false)
1108
+ currepo = ensure_repo(owner, repo)
1084
1109
 
1085
1110
  if currepo.nil?
1086
1111
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1087
1112
  return
1088
1113
  end
1089
1114
 
1090
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1115
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1091
1116
 
1092
1117
  if pull_req.nil?
1093
1118
  warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1096,7 +1121,7 @@ module GHTorrent
1096
1121
 
1097
1122
  retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
1098
1123
 
1099
- if @db[:pull_request_comments].first(:pullreq_id => pull_req[:id],
1124
+ if @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
1100
1125
  :comment_id => x['id']).nil?
1101
1126
  acc << x
1102
1127
  else
@@ -1112,7 +1137,7 @@ module GHTorrent
1112
1137
  # is done on retrieving a pull request. This has the side effect that
1113
1138
  # commits might not be retrieved if a pullreqcomment event gets processed
1114
1139
  # before the pullreq event, until the pullreq event has been processed
1115
- pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
1140
+ pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1116
1141
 
1117
1142
  if pull_req.nil?
1118
1143
  warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
@@ -1159,7 +1184,7 @@ module GHTorrent
1159
1184
  end
1160
1185
 
1161
1186
  def ensure_pull_request_commits(owner, repo, pullreq_id)
1162
- pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
1187
+ pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
1163
1188
 
1164
1189
  if pullreq.nil?
1165
1190
  warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
@@ -1195,7 +1220,7 @@ module GHTorrent
1195
1220
  # [owner] The user to which the project belongs
1196
1221
  # [repo] The repository/project to find forks for
1197
1222
  def ensure_forks(owner, repo, refresh = false)
1198
- currepo = ensure_repo(owner, repo, false, false, false, false)
1223
+ currepo = ensure_repo(owner, repo)
1199
1224
 
1200
1225
  if currepo.nil?
1201
1226
  warn "Could not retrieve forks for #{owner}/#{repo}"
@@ -1232,7 +1257,7 @@ module GHTorrent
1232
1257
  fork_owner = fork['full_name'].split(/\//)[0]
1233
1258
  fork_name = fork['full_name'].split(/\//)[1]
1234
1259
 
1235
- r = ensure_repo(fork_owner, fork_name, false, false, false, false)
1260
+ r = ensure_repo(fork_owner, fork_name)
1236
1261
 
1237
1262
  if r.nil?
1238
1263
  warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
@@ -1244,7 +1269,7 @@ module GHTorrent
1244
1269
  ##
1245
1270
  # Make sure all issues exist for a project
1246
1271
  def ensure_issues(owner, repo, refresh = false)
1247
- currepo = ensure_repo(owner, repo, false, false, false, false)
1272
+ currepo = ensure_repo(owner, repo)
1248
1273
  if currepo.nil?
1249
1274
  warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
1250
1275
  return
@@ -1268,10 +1293,11 @@ module GHTorrent
1268
1293
 
1269
1294
  ##
1270
1295
  # Make sure that the issue exists
1271
- def ensure_issue(owner, repo, issue_id, events = true, comments = true)
1296
+ def ensure_issue(owner, repo, issue_id, events = true,
1297
+ comments = true, labels = true)
1272
1298
 
1273
1299
  issues = @db[:issues]
1274
- repository = ensure_repo(owner, repo, false, false, false, false)
1300
+ repository = ensure_repo(owner, repo)
1275
1301
 
1276
1302
  if repo.nil?
1277
1303
  warn "Cannot find repo #{owner}/#{repo}"
@@ -1323,6 +1349,7 @@ module GHTorrent
1323
1349
  end
1324
1350
  ensure_issue_events(owner, repo, issue_id) if events
1325
1351
  ensure_issue_comments(owner, repo, issue_id) if comments
1352
+ ensure_issue_labels(owner, repo, issue_id) if labels
1326
1353
  issues.first(:issue_id => issue_id,
1327
1354
  :repo_id => repository[:id])
1328
1355
  end
@@ -1330,14 +1357,14 @@ module GHTorrent
1330
1357
  ##
1331
1358
  # Retrieve and process all events for an issue
1332
1359
  def ensure_issue_events(owner, repo, issue_id)
1333
- currepo = ensure_repo(owner, repo, false, false, false, false)
1360
+ currepo = ensure_repo(owner, repo)
1334
1361
 
1335
1362
  if currepo.nil?
1336
1363
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1337
1364
  return
1338
1365
  end
1339
1366
 
1340
- issue = ensure_issue(owner, repo, issue_id, false, false)
1367
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1341
1368
  if issue.nil?
1342
1369
  warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
1343
1370
  return
@@ -1359,7 +1386,7 @@ module GHTorrent
1359
1386
  ##
1360
1387
  # Retrieve and process +event_id+ for an +issue_id+
1361
1388
  def ensure_issue_event(owner, repo, issue_id, event_id)
1362
- issue = ensure_issue(owner, repo, issue_id, false, false)
1389
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1363
1390
 
1364
1391
  if issue.nil?
1365
1392
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1394,7 +1421,7 @@ module GHTorrent
1394
1421
  if retrieved['event'] == "assigned"
1395
1422
 
1396
1423
  def update_assignee(owner, repo, issue, actor)
1397
- @db[:issues][:id => issue[:id]] = {:assignee_id => actor[:id]}
1424
+ @db[:issues].first(:id => issue[:id]).update(:assignee_id => actor[:id])
1398
1425
  info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
1399
1426
  end
1400
1427
 
@@ -1432,16 +1459,24 @@ module GHTorrent
1432
1459
  end
1433
1460
 
1434
1461
  ##
1435
- # Retrieve and process all comments for an issue
1436
- def ensure_issue_comments(owner, repo, issue_id)
1437
- currepo = ensure_repo(owner, repo, false, false, false, false)
1462
+ # Retrieve and process all comments for an issue.
1463
+ # If pull_req_id is not nil this means that we are only retrieving
1464
+ # comments for the pull request discussion for projects that don't have
1465
+ # issues enabled
1466
+ def ensure_issue_comments(owner, repo, issue_id, pull_req_id = nil)
1467
+ currepo = ensure_repo(owner, repo)
1438
1468
 
1439
1469
  if currepo.nil?
1440
1470
  warn "GHTorrent: Could not find repository #{owner}/#{repo}"
1441
1471
  return
1442
1472
  end
1443
1473
 
1444
- issue = ensure_issue(owner, repo, issue_id, false, false)
1474
+ issue = if pull_req_id.nil?
1475
+ ensure_issue(owner, repo, issue_id, false, false, false)
1476
+ else
1477
+ @db[:issues].first(:pull_request_id => pull_req_id)
1478
+ end
1479
+
1445
1480
  if issue.nil?
1446
1481
  warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
1447
1482
  return
@@ -1456,14 +1491,19 @@ module GHTorrent
1456
1491
  acc
1457
1492
  end
1458
1493
  end.map { |x|
1459
- ensure_issue_comment(owner, repo, issue_id, x['id'])
1494
+ ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
1460
1495
  }
1461
1496
  end
1462
1497
 
1463
1498
  ##
1464
1499
  # Retrieve and process +comment_id+ for an +issue_id+
1465
- def ensure_issue_comment(owner, repo, issue_id, comment_id)
1466
- issue = ensure_issue(owner, repo, issue_id, false, false)
1500
+ def ensure_issue_comment(owner, repo, issue_id, comment_id,
1501
+ pull_req_id = nil)
1502
+ issue = if pull_req_id.nil?
1503
+ ensure_issue(owner, repo, issue_id, false, false, false)
1504
+ else
1505
+ @db[:issues].first(:pull_request_id => pull_req_id)
1506
+ end
1467
1507
 
1468
1508
  if issue.nil?
1469
1509
  warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
@@ -1502,6 +1542,117 @@ module GHTorrent
1502
1542
  end
1503
1543
  end
1504
1544
 
1545
+ ##
1546
+ # Retrieve repository issue labels
1547
+ def ensure_labels(owner, repo, refresh = false)
1548
+ currepo = ensure_repo(owner, repo)
1549
+
1550
+ repo_labels = @db[:repo_labels].filter(:repo_id => currepo[:id]).all
1551
+
1552
+ retrieve_repo_labels(owner, repo, refresh).reduce([]) do |acc, x|
1553
+ if repo_labels.find {|y| y[:name] == x['name']}.nil?
1554
+ acc << x
1555
+ else
1556
+ acc
1557
+ end
1558
+ end.map { |x| ensure_repo_label(owner, repo, x['name']) }
1559
+ end
1560
+
1561
+ ##
1562
+ # Retrieve a single repository issue label by name
1563
+ def ensure_repo_label(owner, repo, name)
1564
+ currepo = ensure_repo(owner, repo)
1565
+
1566
+ if currepo.nil?
1567
+ warn "GHTorrent: Repo #{owner}/#{repo} does not exist"
1568
+ return
1569
+ end
1570
+
1571
+ label = @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
1572
+
1573
+ if label.nil?
1574
+ retrieved = retrieve_repo_label(owner, repo, name)
1575
+
1576
+ if retrieved.nil?
1577
+ warn "GHTorrent: Repo label #{owner}/#{repo} -> #{name} does not exist"
1578
+ return
1579
+ end
1580
+
1581
+ @db[:repo_labels].insert(
1582
+ :repo_id => currepo[:id],
1583
+ :name => name,
1584
+ :ext_ref_id => retrieved[@ext_uniq]
1585
+ )
1586
+
1587
+ info "GHTorrent: Added repo label #{owner}/#{repo} -> #{name}"
1588
+ @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
1589
+ else
1590
+ label
1591
+ end
1592
+ end
1593
+
1594
+ ##
1595
+ # Ensure that all labels have been assigned to the issue
1596
+ def ensure_issue_labels(owner, repo, issue_id)
1597
+
1598
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1599
+
1600
+ if issue.nil?
1601
+ warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
1602
+ return
1603
+ end
1604
+
1605
+ issue_labels = @db.from(:issue_labels, :repo_labels)\
1606
+ .where(:issue_labels__label_id => :repo_labels__id)\
1607
+ .where(:issue_labels__issue_id => issue[:id])\
1608
+ .select(:repo_labels__name).all
1609
+
1610
+ retrieve_issue_labels(owner, repo, issue_id).reduce([]) do |acc, x|
1611
+ if issue_labels.find {|y| y[:name] == x['name']}.nil?
1612
+ acc << x
1613
+ else
1614
+ acc
1615
+ end
1616
+ end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
1617
+
1618
+ end
1619
+
1620
+ ##
1621
+ # Ensure that a specific label has been assigned to the issue
1622
+ def ensure_issue_label(owner, repo, issue_id, name)
1623
+
1624
+ issue = ensure_issue(owner, repo, issue_id, false, false, false)
1625
+
1626
+ if issue.nil?
1627
+ warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
1628
+ return
1629
+ end
1630
+
1631
+ label = ensure_repo_label(owner, repo, name)
1632
+
1633
+ if label.nil?
1634
+ warn "GHTorrent: Label #{owner}/#{repo} -> #{name} does not exist"
1635
+ return
1636
+ end
1637
+
1638
+ issue_lbl = @db[:issue_labels].first(:label_id => label[:id],
1639
+ :issue_id => issue[:id])
1640
+
1641
+ if issue_lbl.nil?
1642
+
1643
+ @db[:issue_labels].insert(
1644
+ :label_id => label[:id],
1645
+ :issue_id => issue[:id],
1646
+ )
1647
+ info "GHTorrent: Added issue label #{name} to issue #{owner}/#{repo} -> #{issue_id}"
1648
+ @db[:issue_labels].first(:label_id => label[:id],
1649
+ :issue_id => issue[:id])
1650
+ else
1651
+ issue_lbl
1652
+ end
1653
+
1654
+ end
1655
+
1505
1656
  # Run a block in a DB transaction. Exceptions trigger transaction rollback
1506
1657
  # and are rethrown.
1507
1658
  def transaction(&block)
@@ -1537,7 +1688,7 @@ module GHTorrent
1537
1688
  author = commit_user(c['author'], c['commit']['author'])
1538
1689
  commiter = commit_user(c['committer'], c['commit']['committer'])
1539
1690
 
1540
- repository = ensure_repo(user, repo, false, false, false)
1691
+ repository = ensure_repo(user, repo)
1541
1692
 
1542
1693
  if repository.nil?
1543
1694
  warn "GHTorrent: repository #{user}/#{repo} deleted"