ghtorrent 0.7.3 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +10 -0
- data/Gemfile.lock +1 -1
- data/lib/ghtorrent/adapters/base_adapter.rb +2 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +40 -52
- data/lib/ghtorrent/api_client.rb +22 -18
- data/lib/ghtorrent/command.rb +0 -1
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +16 -5
- data/lib/ghtorrent/commands/ght_load.rb +35 -100
- data/lib/ghtorrent/commands/ght_mirror_events.rb +3 -31
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +10 -6
- data/lib/ghtorrent/ghtorrent.rb +216 -65
- data/lib/ghtorrent/migrations/015_fix_table_issue_labels.rb +24 -0
- data/lib/ghtorrent/migrations/016_add_actor_pull_request_history.rb +22 -0
- data/lib/ghtorrent/retriever.rb +25 -2
- data/lib/ghtorrent/settings.rb +0 -2
- data/lib/version.rb +1 -1
- metadata +18 -4
@@ -57,9 +57,8 @@ class GHTMirrorEvents < GHTorrent::Command
|
|
57
57
|
end
|
58
58
|
|
59
59
|
stored.each do |e|
|
60
|
-
msg = JSON.dump(e)
|
61
60
|
key = "evt.%s" % e['type']
|
62
|
-
exchange.publish
|
61
|
+
exchange.publish e['id'], :persistent => true, :routing_key => key
|
63
62
|
end
|
64
63
|
return new, dupl
|
65
64
|
rescue Exception => e
|
@@ -97,48 +96,21 @@ class GHTMirrorEvents < GHTorrent::Command
|
|
97
96
|
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
98
97
|
:auto_delete => false)
|
99
98
|
|
100
|
-
# Initial delay for the retrieve event loop
|
101
|
-
retrieval_delay = config(:mirror_pollevery)
|
102
|
-
|
103
99
|
# Retrieve events
|
104
|
-
|
100
|
+
EventMachine.add_periodic_timer(5) do
|
105
101
|
(new, dupl) = retrieve exchange
|
106
102
|
dupl_msgs += dupl
|
107
103
|
new_msgs += new
|
108
104
|
end
|
109
105
|
|
110
106
|
# Adjust event retrieval delay time to reduce load to Github
|
111
|
-
EventMachine.add_periodic_timer(
|
107
|
+
EventMachine.add_periodic_timer(12) do
|
112
108
|
ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
|
113
109
|
|
114
110
|
info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
|
115
111
|
|
116
|
-
new_delay = if ratio >= 0 and ratio < 0.3 then
|
117
|
-
-1
|
118
|
-
elsif ratio >= 0.3 and ratio <= 0.5 then
|
119
|
-
0
|
120
|
-
elsif ratio > 0.5 and ratio < 1 then
|
121
|
-
+1
|
122
|
-
end
|
123
|
-
|
124
112
|
# Reset counters for new loop
|
125
113
|
dupl_msgs = new_msgs = 0
|
126
|
-
|
127
|
-
# Update the retrieval delay and restart the event retriever
|
128
|
-
if new_delay != 0
|
129
|
-
|
130
|
-
# Stop the retriever task and adjust retrieval delay
|
131
|
-
retriever.cancel
|
132
|
-
retrieval_delay = retrieval_delay + new_delay
|
133
|
-
info("Setting event retrieval delay to #{retrieval_delay} secs")
|
134
|
-
|
135
|
-
# Restart the retriever
|
136
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
137
|
-
(new, dupl) = retrieve exchange
|
138
|
-
dupl_msgs += dupl
|
139
|
-
new_msgs += new
|
140
|
-
end
|
141
|
-
end
|
142
114
|
end
|
143
115
|
end
|
144
116
|
end
|
@@ -55,8 +55,7 @@ An efficient way to get all data for a single repo
|
|
55
55
|
|
56
56
|
user = user_entry[:login]
|
57
57
|
|
58
|
-
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1]
|
59
|
-
false, false)}
|
58
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1])}
|
60
59
|
|
61
60
|
if repo_entry.nil?
|
62
61
|
Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
|
@@ -74,7 +73,7 @@ An efficient way to get all data for a single repo
|
|
74
73
|
end
|
75
74
|
|
76
75
|
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
77
|
-
ensure_issues ensure_project_members ensure_watchers)
|
76
|
+
ensure_issues ensure_project_members ensure_watchers ensure_labels)
|
78
77
|
|
79
78
|
if ARGV[2].nil?
|
80
79
|
functions.each do |x|
|
@@ -111,9 +110,9 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
111
110
|
end
|
112
111
|
end
|
113
112
|
|
114
|
-
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
113
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
|
115
114
|
check_transaction do
|
116
|
-
super(owner, repo, issue_id, events, comments)
|
115
|
+
super(owner, repo, issue_id, events, comments, labels)
|
117
116
|
end
|
118
117
|
end
|
119
118
|
|
@@ -129,10 +128,15 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
129
128
|
end
|
130
129
|
end
|
131
130
|
|
131
|
+
def ensure_repo_label(owner, repo, name)
|
132
|
+
check_transaction do
|
133
|
+
super(owner, repo, name)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
132
137
|
def check_transaction(&block)
|
133
138
|
begin
|
134
139
|
if @db.in_transaction?
|
135
|
-
debug "Transaction already started"
|
136
140
|
yield block
|
137
141
|
else
|
138
142
|
transaction do
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -117,9 +117,9 @@ module GHTorrent
|
|
117
117
|
# [owner] The owner of the repository to which the pullreq will be applied
|
118
118
|
# [repo] The repository to which the pullreq will be applied
|
119
119
|
# [pullreq_id] The ID of the pull request relative to the repository
|
120
|
-
def get_pull_request(owner, repo, pullreq_id, state, created_at)
|
120
|
+
def get_pull_request(owner, repo, pullreq_id, state, actor, created_at)
|
121
121
|
transaction do
|
122
|
-
ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
|
122
|
+
ensure_pull_request(owner, repo, pullreq_id, true, true, true, state, actor, created_at)
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
@@ -179,7 +179,7 @@ module GHTorrent
|
|
179
179
|
# Make sure a commit exists
|
180
180
|
#
|
181
181
|
def ensure_commit(repo, sha, user, comments = true)
|
182
|
-
ensure_repo(user, repo
|
182
|
+
ensure_repo(user, repo)
|
183
183
|
c = retrieve_commit(repo, sha, user)
|
184
184
|
|
185
185
|
if c.nil?
|
@@ -275,7 +275,7 @@ module GHTorrent
|
|
275
275
|
# [repo] The repo receiving the commit
|
276
276
|
# [sha] The commit SHA
|
277
277
|
def ensure_repo_commit(user, repo, sha)
|
278
|
-
project = ensure_repo(user, repo
|
278
|
+
project = ensure_repo(user, repo)
|
279
279
|
|
280
280
|
if project.nil?
|
281
281
|
warn "GHTorrent: Repo #{user}/#{repo} does not exist"
|
@@ -597,8 +597,8 @@ module GHTorrent
|
|
597
597
|
# == Returns:
|
598
598
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
599
599
|
# the result is nil
|
600
|
-
def ensure_repo(user, repo, commits =
|
601
|
-
watchers =
|
600
|
+
def ensure_repo(user, repo, commits = false, project_members = false,
|
601
|
+
watchers = false, forks = false, labels = false)
|
602
602
|
|
603
603
|
repos = @db[:projects]
|
604
604
|
curuser = ensure_user(user, false, false)
|
@@ -630,7 +630,7 @@ module GHTorrent
|
|
630
630
|
parent_owner = r['parent']['owner']['login']
|
631
631
|
parent_repo = r['parent']['name']
|
632
632
|
|
633
|
-
parent = ensure_repo(parent_owner, parent_repo
|
633
|
+
parent = ensure_repo(parent_owner, parent_repo)
|
634
634
|
|
635
635
|
repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
|
636
636
|
|
@@ -642,6 +642,7 @@ module GHTorrent
|
|
642
642
|
ensure_project_members(user, repo) if project_members
|
643
643
|
ensure_watchers(user, repo) if watchers
|
644
644
|
ensure_forks(user, repo) if forks
|
645
|
+
ensure_labels(user, repo) if labels
|
645
646
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
646
647
|
else
|
647
648
|
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
@@ -652,7 +653,7 @@ module GHTorrent
|
|
652
653
|
##
|
653
654
|
# Make sure that a project has all the registered members defined
|
654
655
|
def ensure_project_members(user, repo, refresh = false)
|
655
|
-
currepo = ensure_repo(user, repo
|
656
|
+
currepo = ensure_repo(user, repo)
|
656
657
|
time = currepo[:created_at]
|
657
658
|
|
658
659
|
project_members = @db.from(:project_members, :users).\
|
@@ -672,7 +673,7 @@ module GHTorrent
|
|
672
673
|
# Make sure that a project member exists in a project
|
673
674
|
def ensure_project_member(owner, repo, new_member, date_added)
|
674
675
|
pr_members = @db[:project_members]
|
675
|
-
project = ensure_repo(owner, repo
|
676
|
+
project = ensure_repo(owner, repo)
|
676
677
|
new_user = ensure_user(new_member, false, false)
|
677
678
|
|
678
679
|
if project.nil? or new_user.nil?
|
@@ -839,7 +840,7 @@ module GHTorrent
|
|
839
840
|
##
|
840
841
|
# Make sure that all watchers exist for a repository
|
841
842
|
def ensure_watchers(owner, repo, refresh = false)
|
842
|
-
currepo = ensure_repo(owner, repo
|
843
|
+
currepo = ensure_repo(owner, repo)
|
843
844
|
|
844
845
|
if currepo.nil?
|
845
846
|
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
@@ -864,7 +865,7 @@ module GHTorrent
|
|
864
865
|
##
|
865
866
|
# Make sure that a watcher/stargazer exists for a repository
|
866
867
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
867
|
-
project = ensure_repo(owner, repo
|
868
|
+
project = ensure_repo(owner, repo)
|
868
869
|
new_watcher = ensure_user(watcher, false, false)
|
869
870
|
|
870
871
|
if new_watcher.nil? or project.nil?
|
@@ -914,7 +915,7 @@ module GHTorrent
|
|
914
915
|
##
|
915
916
|
# Process all pull requests
|
916
917
|
def ensure_pull_requests(owner, repo, refresh = false)
|
917
|
-
currepo = ensure_repo(owner, repo
|
918
|
+
currepo = ensure_repo(owner, repo)
|
918
919
|
if currepo.nil?
|
919
920
|
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
920
921
|
return
|
@@ -939,26 +940,32 @@ module GHTorrent
|
|
939
940
|
##
|
940
941
|
# Process a pull request
|
941
942
|
def ensure_pull_request(owner, repo, pullreq_id,
|
942
|
-
comments = true, commits = true,
|
943
|
-
state = nil, created_at = nil)
|
943
|
+
comments = true, commits = true, history = true,
|
944
|
+
state = nil, actor = nil, created_at = nil)
|
944
945
|
pulls_reqs = @db[:pull_requests]
|
945
946
|
|
946
|
-
project = ensure_repo(owner, repo
|
947
|
+
project = ensure_repo(owner, repo)
|
947
948
|
|
948
949
|
if project.nil?
|
949
950
|
return
|
950
951
|
end
|
951
952
|
|
952
953
|
# Adds a pull request history event
|
953
|
-
def add_history(id, ts, unq, act)
|
954
|
+
def add_history(id, ts, unq, act, actor)
|
955
|
+
user = ensure_user(actor, false, false)
|
954
956
|
pull_req_history = @db[:pull_request_history]
|
955
957
|
entry = pull_req_history.first(:pull_request_id => id,
|
956
|
-
:created_at => ts
|
958
|
+
:created_at => (ts - 4)..(ts + 4),
|
959
|
+
:action => act)
|
957
960
|
if entry.nil?
|
958
|
-
pull_req_history.insert(:pull_request_id => id,
|
959
|
-
:
|
960
|
-
|
961
|
+
pull_req_history.insert(:pull_request_id => id,
|
962
|
+
:created_at => ts,
|
963
|
+
:ext_ref_id => unq,
|
964
|
+
:action => act,
|
965
|
+
:actor_id => unless user.nil? then user[:id] end)
|
966
|
+
info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
|
961
967
|
else
|
968
|
+
entry.update(:actor_id => user[:id])
|
962
969
|
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
963
970
|
end
|
964
971
|
end
|
@@ -1004,8 +1011,7 @@ module GHTorrent
|
|
1004
1011
|
end
|
1005
1012
|
|
1006
1013
|
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
1007
|
-
retrieved['base']['repo']['name']
|
1008
|
-
false, false, false, false)
|
1014
|
+
retrieved['base']['repo']['name'])
|
1009
1015
|
|
1010
1016
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
1011
1017
|
retrieved['base']['sha'],
|
@@ -1020,8 +1026,7 @@ module GHTorrent
|
|
1020
1026
|
else
|
1021
1027
|
head_repo = if has_head_repo(retrieved)
|
1022
1028
|
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
1023
|
-
retrieved['head']['repo']['name']
|
1024
|
-
false, false, false, false)
|
1029
|
+
retrieved['head']['repo']['name'])
|
1025
1030
|
end
|
1026
1031
|
|
1027
1032
|
head_commit = if not head_repo.nil?
|
@@ -1033,12 +1038,7 @@ module GHTorrent
|
|
1033
1038
|
|
1034
1039
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
1035
1040
|
|
1036
|
-
merged = if retrieved['merged_at'].nil? then
|
1037
|
-
# Check if the pr's commits are in the repository
|
1038
|
-
false
|
1039
|
-
else
|
1040
|
-
true
|
1041
|
-
end
|
1041
|
+
merged = if retrieved['merged_at'].nil? then false else true end
|
1042
1042
|
closed = if retrieved['closed_at'].nil? then false else true end
|
1043
1043
|
|
1044
1044
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
@@ -1054,40 +1054,65 @@ module GHTorrent
|
|
1054
1054
|
:intra_branch => is_intra_branch(retrieved),
|
1055
1055
|
:merged => merged
|
1056
1056
|
)
|
1057
|
-
|
1058
|
-
info log_msg(retrieved) + " was added"
|
1057
|
+
info log_msg(retrieved) + ' was added'
|
1059
1058
|
else
|
1060
|
-
debug log_msg(retrieved) +
|
1059
|
+
debug log_msg(retrieved) + ' exists'
|
1061
1060
|
end
|
1062
1061
|
|
1063
1062
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
1064
1063
|
:pullreq_id => pullreq_id)
|
1065
1064
|
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
add_history(pull_req[:id], date(retrieved['closed_at']),
|
1071
|
-
retrieved[@ext_uniq], 'closed') if closed
|
1072
|
-
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
1073
|
-
state) unless state.nil?
|
1065
|
+
# Add a fake (or not so fake) issue in the issues table to serve
|
1066
|
+
# as root for retrieving discussion comments for this pull request
|
1067
|
+
issues = @db[:issues]
|
1068
|
+
issue = issues.first(:pull_request_id => pull_req[:id])
|
1074
1069
|
|
1070
|
+
if issue.nil?
|
1071
|
+
issues.insert(:repo_id => base_repo[:id],
|
1072
|
+
:assignee_id => nil,
|
1073
|
+
:reporter_id => nil,
|
1074
|
+
:issue_id => pullreq_id,
|
1075
|
+
:pull_request => true,
|
1076
|
+
:pull_request_id => pull_req[:id],
|
1077
|
+
:created_at => date(retrieved['created_at']),
|
1078
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1079
|
+
debug 'Adding accompanying issue for ' + log_msg(retrieved)
|
1080
|
+
else
|
1081
|
+
debug 'Accompanying issue exists for ' + log_msg(retrieved)
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
if history
|
1085
|
+
# Actions on pull requests
|
1086
|
+
actor = if actor.nil? then pull_req_user[:login] else actor end
|
1087
|
+
opener = pull_req_user[:login]
|
1088
|
+
add_history(pull_req[:id], date(retrieved['created_at']),
|
1089
|
+
retrieved[@ext_uniq], 'opened', opener)
|
1090
|
+
# There is an additional merged_by field for merged pull requests
|
1091
|
+
merger = if retrieved['merged_by'].nil? then actor else retrieved['merged_by']['login'] end
|
1092
|
+
add_history(pull_req[:id], date(retrieved['merged_at']),
|
1093
|
+
retrieved[@ext_uniq], 'merged', merger) if (merged && state != 'merged')
|
1094
|
+
closer = if merged then merger else actor end
|
1095
|
+
add_history(pull_req[:id], date(retrieved['closed_at']),
|
1096
|
+
retrieved[@ext_uniq], 'closed', closer) if (closed && state != 'closed')
|
1097
|
+
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
1098
|
+
state, actor) unless state.nil?
|
1099
|
+
end
|
1075
1100
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
1076
1101
|
ensure_pullreq_comments(owner, repo, pullreq_id) if comments
|
1102
|
+
ensure_issue_comments(owner, repo, pullreq_id, pull_req[:id]) if comments
|
1077
1103
|
|
1078
|
-
|
1079
|
-
:pullreq_id => pullreq_id)
|
1104
|
+
pull_req
|
1080
1105
|
end
|
1081
1106
|
|
1082
1107
|
def ensure_pullreq_comments(owner, repo, pullreq_id)
|
1083
|
-
currepo = ensure_repo(owner, repo
|
1108
|
+
currepo = ensure_repo(owner, repo)
|
1084
1109
|
|
1085
1110
|
if currepo.nil?
|
1086
1111
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1087
1112
|
return
|
1088
1113
|
end
|
1089
1114
|
|
1090
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1115
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1091
1116
|
|
1092
1117
|
if pull_req.nil?
|
1093
1118
|
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1096,7 +1121,7 @@ module GHTorrent
|
|
1096
1121
|
|
1097
1122
|
retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
|
1098
1123
|
|
1099
|
-
if @db[:pull_request_comments].first(:
|
1124
|
+
if @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
1100
1125
|
:comment_id => x['id']).nil?
|
1101
1126
|
acc << x
|
1102
1127
|
else
|
@@ -1112,7 +1137,7 @@ module GHTorrent
|
|
1112
1137
|
# is done on retrieving a pull request. This has the side effect that
|
1113
1138
|
# commits might not be retrieved if a pullreqcomment event gets processed
|
1114
1139
|
# before the pullreq event, until the pullreq event has been processed
|
1115
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1140
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1116
1141
|
|
1117
1142
|
if pull_req.nil?
|
1118
1143
|
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1159,7 +1184,7 @@ module GHTorrent
|
|
1159
1184
|
end
|
1160
1185
|
|
1161
1186
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1162
|
-
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1187
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1163
1188
|
|
1164
1189
|
if pullreq.nil?
|
1165
1190
|
warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
|
@@ -1195,7 +1220,7 @@ module GHTorrent
|
|
1195
1220
|
# [owner] The user to which the project belongs
|
1196
1221
|
# [repo] The repository/project to find forks for
|
1197
1222
|
def ensure_forks(owner, repo, refresh = false)
|
1198
|
-
currepo = ensure_repo(owner, repo
|
1223
|
+
currepo = ensure_repo(owner, repo)
|
1199
1224
|
|
1200
1225
|
if currepo.nil?
|
1201
1226
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
@@ -1232,7 +1257,7 @@ module GHTorrent
|
|
1232
1257
|
fork_owner = fork['full_name'].split(/\//)[0]
|
1233
1258
|
fork_name = fork['full_name'].split(/\//)[1]
|
1234
1259
|
|
1235
|
-
r = ensure_repo(fork_owner, fork_name
|
1260
|
+
r = ensure_repo(fork_owner, fork_name)
|
1236
1261
|
|
1237
1262
|
if r.nil?
|
1238
1263
|
warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
@@ -1244,7 +1269,7 @@ module GHTorrent
|
|
1244
1269
|
##
|
1245
1270
|
# Make sure all issues exist for a project
|
1246
1271
|
def ensure_issues(owner, repo, refresh = false)
|
1247
|
-
currepo = ensure_repo(owner, repo
|
1272
|
+
currepo = ensure_repo(owner, repo)
|
1248
1273
|
if currepo.nil?
|
1249
1274
|
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1250
1275
|
return
|
@@ -1268,10 +1293,11 @@ module GHTorrent
|
|
1268
1293
|
|
1269
1294
|
##
|
1270
1295
|
# Make sure that the issue exists
|
1271
|
-
def ensure_issue(owner, repo, issue_id, events = true,
|
1296
|
+
def ensure_issue(owner, repo, issue_id, events = true,
|
1297
|
+
comments = true, labels = true)
|
1272
1298
|
|
1273
1299
|
issues = @db[:issues]
|
1274
|
-
repository = ensure_repo(owner, repo
|
1300
|
+
repository = ensure_repo(owner, repo)
|
1275
1301
|
|
1276
1302
|
if repo.nil?
|
1277
1303
|
warn "Cannot find repo #{owner}/#{repo}"
|
@@ -1323,6 +1349,7 @@ module GHTorrent
|
|
1323
1349
|
end
|
1324
1350
|
ensure_issue_events(owner, repo, issue_id) if events
|
1325
1351
|
ensure_issue_comments(owner, repo, issue_id) if comments
|
1352
|
+
ensure_issue_labels(owner, repo, issue_id) if labels
|
1326
1353
|
issues.first(:issue_id => issue_id,
|
1327
1354
|
:repo_id => repository[:id])
|
1328
1355
|
end
|
@@ -1330,14 +1357,14 @@ module GHTorrent
|
|
1330
1357
|
##
|
1331
1358
|
# Retrieve and process all events for an issue
|
1332
1359
|
def ensure_issue_events(owner, repo, issue_id)
|
1333
|
-
currepo = ensure_repo(owner, repo
|
1360
|
+
currepo = ensure_repo(owner, repo)
|
1334
1361
|
|
1335
1362
|
if currepo.nil?
|
1336
1363
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1337
1364
|
return
|
1338
1365
|
end
|
1339
1366
|
|
1340
|
-
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1367
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1341
1368
|
if issue.nil?
|
1342
1369
|
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1343
1370
|
return
|
@@ -1359,7 +1386,7 @@ module GHTorrent
|
|
1359
1386
|
##
|
1360
1387
|
# Retrieve and process +event_id+ for an +issue_id+
|
1361
1388
|
def ensure_issue_event(owner, repo, issue_id, event_id)
|
1362
|
-
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1389
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1363
1390
|
|
1364
1391
|
if issue.nil?
|
1365
1392
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1394,7 +1421,7 @@ module GHTorrent
|
|
1394
1421
|
if retrieved['event'] == "assigned"
|
1395
1422
|
|
1396
1423
|
def update_assignee(owner, repo, issue, actor)
|
1397
|
-
@db[:issues]
|
1424
|
+
@db[:issues].first(:id => issue[:id]).update(:assignee_id => actor[:id])
|
1398
1425
|
info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
|
1399
1426
|
end
|
1400
1427
|
|
@@ -1432,16 +1459,24 @@ module GHTorrent
|
|
1432
1459
|
end
|
1433
1460
|
|
1434
1461
|
##
|
1435
|
-
# Retrieve and process all comments for an issue
|
1436
|
-
|
1437
|
-
|
1462
|
+
# Retrieve and process all comments for an issue.
|
1463
|
+
# If pull_req_id is not nil this means that we are only retrieving
|
1464
|
+
# comments for the pull request discussion for projects that don't have
|
1465
|
+
# issues enabled
|
1466
|
+
def ensure_issue_comments(owner, repo, issue_id, pull_req_id = nil)
|
1467
|
+
currepo = ensure_repo(owner, repo)
|
1438
1468
|
|
1439
1469
|
if currepo.nil?
|
1440
1470
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1441
1471
|
return
|
1442
1472
|
end
|
1443
1473
|
|
1444
|
-
issue =
|
1474
|
+
issue = if pull_req_id.nil?
|
1475
|
+
ensure_issue(owner, repo, issue_id, false, false, false)
|
1476
|
+
else
|
1477
|
+
@db[:issues].first(:pull_request_id => pull_req_id)
|
1478
|
+
end
|
1479
|
+
|
1445
1480
|
if issue.nil?
|
1446
1481
|
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1447
1482
|
return
|
@@ -1456,14 +1491,19 @@ module GHTorrent
|
|
1456
1491
|
acc
|
1457
1492
|
end
|
1458
1493
|
end.map { |x|
|
1459
|
-
ensure_issue_comment(owner, repo, issue_id, x['id'])
|
1494
|
+
ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
|
1460
1495
|
}
|
1461
1496
|
end
|
1462
1497
|
|
1463
1498
|
##
|
1464
1499
|
# Retrieve and process +comment_id+ for an +issue_id+
|
1465
|
-
def ensure_issue_comment(owner, repo, issue_id, comment_id
|
1466
|
-
|
1500
|
+
def ensure_issue_comment(owner, repo, issue_id, comment_id,
|
1501
|
+
pull_req_id = nil)
|
1502
|
+
issue = if pull_req_id.nil?
|
1503
|
+
ensure_issue(owner, repo, issue_id, false, false, false)
|
1504
|
+
else
|
1505
|
+
@db[:issues].first(:pull_request_id => pull_req_id)
|
1506
|
+
end
|
1467
1507
|
|
1468
1508
|
if issue.nil?
|
1469
1509
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1502,6 +1542,117 @@ module GHTorrent
|
|
1502
1542
|
end
|
1503
1543
|
end
|
1504
1544
|
|
1545
|
+
##
|
1546
|
+
# Retrieve repository issue labels
|
1547
|
+
def ensure_labels(owner, repo, refresh = false)
|
1548
|
+
currepo = ensure_repo(owner, repo)
|
1549
|
+
|
1550
|
+
repo_labels = @db[:repo_labels].filter(:repo_id => currepo[:id]).all
|
1551
|
+
|
1552
|
+
retrieve_repo_labels(owner, repo, refresh).reduce([]) do |acc, x|
|
1553
|
+
if repo_labels.find {|y| y[:name] == x['name']}.nil?
|
1554
|
+
acc << x
|
1555
|
+
else
|
1556
|
+
acc
|
1557
|
+
end
|
1558
|
+
end.map { |x| ensure_repo_label(owner, repo, x['name']) }
|
1559
|
+
end
|
1560
|
+
|
1561
|
+
##
|
1562
|
+
# Retrieve a single repository issue label by name
|
1563
|
+
def ensure_repo_label(owner, repo, name)
|
1564
|
+
currepo = ensure_repo(owner, repo)
|
1565
|
+
|
1566
|
+
if currepo.nil?
|
1567
|
+
warn "GHTorrent: Repo #{owner}/#{repo} does not exist"
|
1568
|
+
return
|
1569
|
+
end
|
1570
|
+
|
1571
|
+
label = @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
|
1572
|
+
|
1573
|
+
if label.nil?
|
1574
|
+
retrieved = retrieve_repo_label(owner, repo, name)
|
1575
|
+
|
1576
|
+
if retrieved.nil?
|
1577
|
+
warn "GHTorrent: Repo label #{owner}/#{repo} -> #{name} does not exist"
|
1578
|
+
return
|
1579
|
+
end
|
1580
|
+
|
1581
|
+
@db[:repo_labels].insert(
|
1582
|
+
:repo_id => currepo[:id],
|
1583
|
+
:name => name,
|
1584
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1585
|
+
)
|
1586
|
+
|
1587
|
+
info "GHTorrent: Added repo label #{owner}/#{repo} -> #{name}"
|
1588
|
+
@db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
|
1589
|
+
else
|
1590
|
+
label
|
1591
|
+
end
|
1592
|
+
end
|
1593
|
+
|
1594
|
+
##
|
1595
|
+
# Ensure that all labels have been assigned to the issue
|
1596
|
+
def ensure_issue_labels(owner, repo, issue_id)
|
1597
|
+
|
1598
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1599
|
+
|
1600
|
+
if issue.nil?
|
1601
|
+
warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
|
1602
|
+
return
|
1603
|
+
end
|
1604
|
+
|
1605
|
+
issue_labels = @db.from(:issue_labels, :repo_labels)\
|
1606
|
+
.where(:issue_labels__label_id => :repo_labels__id)\
|
1607
|
+
.where(:issue_labels__issue_id => issue[:id])\
|
1608
|
+
.select(:repo_labels__name).all
|
1609
|
+
|
1610
|
+
retrieve_issue_labels(owner, repo, issue_id).reduce([]) do |acc, x|
|
1611
|
+
if issue_labels.find {|y| y[:name] == x['name']}.nil?
|
1612
|
+
acc << x
|
1613
|
+
else
|
1614
|
+
acc
|
1615
|
+
end
|
1616
|
+
end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
|
1617
|
+
|
1618
|
+
end
|
1619
|
+
|
1620
|
+
##
|
1621
|
+
# Ensure that a specific label has been assigned to the issue
|
1622
|
+
def ensure_issue_label(owner, repo, issue_id, name)
|
1623
|
+
|
1624
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1625
|
+
|
1626
|
+
if issue.nil?
|
1627
|
+
warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
|
1628
|
+
return
|
1629
|
+
end
|
1630
|
+
|
1631
|
+
label = ensure_repo_label(owner, repo, name)
|
1632
|
+
|
1633
|
+
if label.nil?
|
1634
|
+
warn "GHTorrent: Label #{owner}/#{repo} -> #{name} does not exist"
|
1635
|
+
return
|
1636
|
+
end
|
1637
|
+
|
1638
|
+
issue_lbl = @db[:issue_labels].first(:label_id => label[:id],
|
1639
|
+
:issue_id => issue[:id])
|
1640
|
+
|
1641
|
+
if issue_lbl.nil?
|
1642
|
+
|
1643
|
+
@db[:issue_labels].insert(
|
1644
|
+
:label_id => label[:id],
|
1645
|
+
:issue_id => issue[:id],
|
1646
|
+
)
|
1647
|
+
info "GHTorrent: Added issue label #{name} to issue #{owner}/#{repo} -> #{issue_id}"
|
1648
|
+
@db[:issue_labels].first(:label_id => label[:id],
|
1649
|
+
:issue_id => issue[:id])
|
1650
|
+
else
|
1651
|
+
issue_lbl
|
1652
|
+
end
|
1653
|
+
|
1654
|
+
end
|
1655
|
+
|
1505
1656
|
# Run a block in a DB transaction. Exceptions trigger transaction rollback
|
1506
1657
|
# and are rethrown.
|
1507
1658
|
def transaction(&block)
|
@@ -1537,7 +1688,7 @@ module GHTorrent
|
|
1537
1688
|
author = commit_user(c['author'], c['commit']['author'])
|
1538
1689
|
commiter = commit_user(c['committer'], c['commit']['committer'])
|
1539
1690
|
|
1540
|
-
repository = ensure_repo(user, repo
|
1691
|
+
repository = ensure_repo(user, repo)
|
1541
1692
|
|
1542
1693
|
if repository.nil?
|
1543
1694
|
warn "GHTorrent: repository #{user}/#{repo} deleted"
|