ghtorrent 0.7.3 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +10 -0
- data/Gemfile.lock +1 -1
- data/lib/ghtorrent/adapters/base_adapter.rb +2 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +40 -52
- data/lib/ghtorrent/api_client.rb +22 -18
- data/lib/ghtorrent/command.rb +0 -1
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +16 -5
- data/lib/ghtorrent/commands/ght_load.rb +35 -100
- data/lib/ghtorrent/commands/ght_mirror_events.rb +3 -31
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +10 -6
- data/lib/ghtorrent/ghtorrent.rb +216 -65
- data/lib/ghtorrent/migrations/015_fix_table_issue_labels.rb +24 -0
- data/lib/ghtorrent/migrations/016_add_actor_pull_request_history.rb +22 -0
- data/lib/ghtorrent/retriever.rb +25 -2
- data/lib/ghtorrent/settings.rb +0 -2
- data/lib/version.rb +1 -1
- metadata +18 -4
@@ -57,9 +57,8 @@ class GHTMirrorEvents < GHTorrent::Command
|
|
57
57
|
end
|
58
58
|
|
59
59
|
stored.each do |e|
|
60
|
-
msg = JSON.dump(e)
|
61
60
|
key = "evt.%s" % e['type']
|
62
|
-
exchange.publish
|
61
|
+
exchange.publish e['id'], :persistent => true, :routing_key => key
|
63
62
|
end
|
64
63
|
return new, dupl
|
65
64
|
rescue Exception => e
|
@@ -97,48 +96,21 @@ class GHTMirrorEvents < GHTorrent::Command
|
|
97
96
|
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
98
97
|
:auto_delete => false)
|
99
98
|
|
100
|
-
# Initial delay for the retrieve event loop
|
101
|
-
retrieval_delay = config(:mirror_pollevery)
|
102
|
-
|
103
99
|
# Retrieve events
|
104
|
-
|
100
|
+
EventMachine.add_periodic_timer(5) do
|
105
101
|
(new, dupl) = retrieve exchange
|
106
102
|
dupl_msgs += dupl
|
107
103
|
new_msgs += new
|
108
104
|
end
|
109
105
|
|
110
106
|
# Adjust event retrieval delay time to reduce load to Github
|
111
|
-
EventMachine.add_periodic_timer(
|
107
|
+
EventMachine.add_periodic_timer(12) do
|
112
108
|
ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
|
113
109
|
|
114
110
|
info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
|
115
111
|
|
116
|
-
new_delay = if ratio >= 0 and ratio < 0.3 then
|
117
|
-
-1
|
118
|
-
elsif ratio >= 0.3 and ratio <= 0.5 then
|
119
|
-
0
|
120
|
-
elsif ratio > 0.5 and ratio < 1 then
|
121
|
-
+1
|
122
|
-
end
|
123
|
-
|
124
112
|
# Reset counters for new loop
|
125
113
|
dupl_msgs = new_msgs = 0
|
126
|
-
|
127
|
-
# Update the retrieval delay and restart the event retriever
|
128
|
-
if new_delay != 0
|
129
|
-
|
130
|
-
# Stop the retriever task and adjust retrieval delay
|
131
|
-
retriever.cancel
|
132
|
-
retrieval_delay = retrieval_delay + new_delay
|
133
|
-
info("Setting event retrieval delay to #{retrieval_delay} secs")
|
134
|
-
|
135
|
-
# Restart the retriever
|
136
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
137
|
-
(new, dupl) = retrieve exchange
|
138
|
-
dupl_msgs += dupl
|
139
|
-
new_msgs += new
|
140
|
-
end
|
141
|
-
end
|
142
114
|
end
|
143
115
|
end
|
144
116
|
end
|
@@ -55,8 +55,7 @@ An efficient way to get all data for a single repo
|
|
55
55
|
|
56
56
|
user = user_entry[:login]
|
57
57
|
|
58
|
-
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1]
|
59
|
-
false, false)}
|
58
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1])}
|
60
59
|
|
61
60
|
if repo_entry.nil?
|
62
61
|
Trollop::die "Cannot find repository #{ARGV[0]}/#{ARGV[1]}"
|
@@ -74,7 +73,7 @@ An efficient way to get all data for a single repo
|
|
74
73
|
end
|
75
74
|
|
76
75
|
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
77
|
-
ensure_issues ensure_project_members ensure_watchers)
|
76
|
+
ensure_issues ensure_project_members ensure_watchers ensure_labels)
|
78
77
|
|
79
78
|
if ARGV[2].nil?
|
80
79
|
functions.each do |x|
|
@@ -111,9 +110,9 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
111
110
|
end
|
112
111
|
end
|
113
112
|
|
114
|
-
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
113
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
|
115
114
|
check_transaction do
|
116
|
-
super(owner, repo, issue_id, events, comments)
|
115
|
+
super(owner, repo, issue_id, events, comments, labels)
|
117
116
|
end
|
118
117
|
end
|
119
118
|
|
@@ -129,10 +128,15 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
129
128
|
end
|
130
129
|
end
|
131
130
|
|
131
|
+
def ensure_repo_label(owner, repo, name)
|
132
|
+
check_transaction do
|
133
|
+
super(owner, repo, name)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
132
137
|
def check_transaction(&block)
|
133
138
|
begin
|
134
139
|
if @db.in_transaction?
|
135
|
-
debug "Transaction already started"
|
136
140
|
yield block
|
137
141
|
else
|
138
142
|
transaction do
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -117,9 +117,9 @@ module GHTorrent
|
|
117
117
|
# [owner] The owner of the repository to which the pullreq will be applied
|
118
118
|
# [repo] The repository to which the pullreq will be applied
|
119
119
|
# [pullreq_id] The ID of the pull request relative to the repository
|
120
|
-
def get_pull_request(owner, repo, pullreq_id, state, created_at)
|
120
|
+
def get_pull_request(owner, repo, pullreq_id, state, actor, created_at)
|
121
121
|
transaction do
|
122
|
-
ensure_pull_request(owner, repo, pullreq_id, true, true, state, created_at)
|
122
|
+
ensure_pull_request(owner, repo, pullreq_id, true, true, true, state, actor, created_at)
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
@@ -179,7 +179,7 @@ module GHTorrent
|
|
179
179
|
# Make sure a commit exists
|
180
180
|
#
|
181
181
|
def ensure_commit(repo, sha, user, comments = true)
|
182
|
-
ensure_repo(user, repo
|
182
|
+
ensure_repo(user, repo)
|
183
183
|
c = retrieve_commit(repo, sha, user)
|
184
184
|
|
185
185
|
if c.nil?
|
@@ -275,7 +275,7 @@ module GHTorrent
|
|
275
275
|
# [repo] The repo receiving the commit
|
276
276
|
# [sha] The commit SHA
|
277
277
|
def ensure_repo_commit(user, repo, sha)
|
278
|
-
project = ensure_repo(user, repo
|
278
|
+
project = ensure_repo(user, repo)
|
279
279
|
|
280
280
|
if project.nil?
|
281
281
|
warn "GHTorrent: Repo #{user}/#{repo} does not exist"
|
@@ -597,8 +597,8 @@ module GHTorrent
|
|
597
597
|
# == Returns:
|
598
598
|
# If the repo can be retrieved, it is returned as a Hash. Otherwise,
|
599
599
|
# the result is nil
|
600
|
-
def ensure_repo(user, repo, commits =
|
601
|
-
watchers =
|
600
|
+
def ensure_repo(user, repo, commits = false, project_members = false,
|
601
|
+
watchers = false, forks = false, labels = false)
|
602
602
|
|
603
603
|
repos = @db[:projects]
|
604
604
|
curuser = ensure_user(user, false, false)
|
@@ -630,7 +630,7 @@ module GHTorrent
|
|
630
630
|
parent_owner = r['parent']['owner']['login']
|
631
631
|
parent_repo = r['parent']['name']
|
632
632
|
|
633
|
-
parent = ensure_repo(parent_owner, parent_repo
|
633
|
+
parent = ensure_repo(parent_owner, parent_repo)
|
634
634
|
|
635
635
|
repos.filter(:owner_id => curuser[:id], :name => repo).update(:forked_from => parent[:id])
|
636
636
|
|
@@ -642,6 +642,7 @@ module GHTorrent
|
|
642
642
|
ensure_project_members(user, repo) if project_members
|
643
643
|
ensure_watchers(user, repo) if watchers
|
644
644
|
ensure_forks(user, repo) if forks
|
645
|
+
ensure_labels(user, repo) if labels
|
645
646
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
646
647
|
else
|
647
648
|
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
@@ -652,7 +653,7 @@ module GHTorrent
|
|
652
653
|
##
|
653
654
|
# Make sure that a project has all the registered members defined
|
654
655
|
def ensure_project_members(user, repo, refresh = false)
|
655
|
-
currepo = ensure_repo(user, repo
|
656
|
+
currepo = ensure_repo(user, repo)
|
656
657
|
time = currepo[:created_at]
|
657
658
|
|
658
659
|
project_members = @db.from(:project_members, :users).\
|
@@ -672,7 +673,7 @@ module GHTorrent
|
|
672
673
|
# Make sure that a project member exists in a project
|
673
674
|
def ensure_project_member(owner, repo, new_member, date_added)
|
674
675
|
pr_members = @db[:project_members]
|
675
|
-
project = ensure_repo(owner, repo
|
676
|
+
project = ensure_repo(owner, repo)
|
676
677
|
new_user = ensure_user(new_member, false, false)
|
677
678
|
|
678
679
|
if project.nil? or new_user.nil?
|
@@ -839,7 +840,7 @@ module GHTorrent
|
|
839
840
|
##
|
840
841
|
# Make sure that all watchers exist for a repository
|
841
842
|
def ensure_watchers(owner, repo, refresh = false)
|
842
|
-
currepo = ensure_repo(owner, repo
|
843
|
+
currepo = ensure_repo(owner, repo)
|
843
844
|
|
844
845
|
if currepo.nil?
|
845
846
|
warn "Could not retrieve watchers for #{owner}/#{repo}"
|
@@ -864,7 +865,7 @@ module GHTorrent
|
|
864
865
|
##
|
865
866
|
# Make sure that a watcher/stargazer exists for a repository
|
866
867
|
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
867
|
-
project = ensure_repo(owner, repo
|
868
|
+
project = ensure_repo(owner, repo)
|
868
869
|
new_watcher = ensure_user(watcher, false, false)
|
869
870
|
|
870
871
|
if new_watcher.nil? or project.nil?
|
@@ -914,7 +915,7 @@ module GHTorrent
|
|
914
915
|
##
|
915
916
|
# Process all pull requests
|
916
917
|
def ensure_pull_requests(owner, repo, refresh = false)
|
917
|
-
currepo = ensure_repo(owner, repo
|
918
|
+
currepo = ensure_repo(owner, repo)
|
918
919
|
if currepo.nil?
|
919
920
|
warn "Could not retrieve pull requests from #{owner}/#{repo}"
|
920
921
|
return
|
@@ -939,26 +940,32 @@ module GHTorrent
|
|
939
940
|
##
|
940
941
|
# Process a pull request
|
941
942
|
def ensure_pull_request(owner, repo, pullreq_id,
|
942
|
-
comments = true, commits = true,
|
943
|
-
state = nil, created_at = nil)
|
943
|
+
comments = true, commits = true, history = true,
|
944
|
+
state = nil, actor = nil, created_at = nil)
|
944
945
|
pulls_reqs = @db[:pull_requests]
|
945
946
|
|
946
|
-
project = ensure_repo(owner, repo
|
947
|
+
project = ensure_repo(owner, repo)
|
947
948
|
|
948
949
|
if project.nil?
|
949
950
|
return
|
950
951
|
end
|
951
952
|
|
952
953
|
# Adds a pull request history event
|
953
|
-
def add_history(id, ts, unq, act)
|
954
|
+
def add_history(id, ts, unq, act, actor)
|
955
|
+
user = ensure_user(actor, false, false)
|
954
956
|
pull_req_history = @db[:pull_request_history]
|
955
957
|
entry = pull_req_history.first(:pull_request_id => id,
|
956
|
-
:created_at => ts
|
958
|
+
:created_at => (ts - 4)..(ts + 4),
|
959
|
+
:action => act)
|
957
960
|
if entry.nil?
|
958
|
-
pull_req_history.insert(:pull_request_id => id,
|
959
|
-
:
|
960
|
-
|
961
|
+
pull_req_history.insert(:pull_request_id => id,
|
962
|
+
:created_at => ts,
|
963
|
+
:ext_ref_id => unq,
|
964
|
+
:action => act,
|
965
|
+
:actor_id => unless user.nil? then user[:id] end)
|
966
|
+
info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
|
961
967
|
else
|
968
|
+
entry.update(:actor_id => user[:id])
|
962
969
|
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
963
970
|
end
|
964
971
|
end
|
@@ -1004,8 +1011,7 @@ module GHTorrent
|
|
1004
1011
|
end
|
1005
1012
|
|
1006
1013
|
base_repo = ensure_repo(retrieved['base']['repo']['owner']['login'],
|
1007
|
-
retrieved['base']['repo']['name']
|
1008
|
-
false, false, false, false)
|
1014
|
+
retrieved['base']['repo']['name'])
|
1009
1015
|
|
1010
1016
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
1011
1017
|
retrieved['base']['sha'],
|
@@ -1020,8 +1026,7 @@ module GHTorrent
|
|
1020
1026
|
else
|
1021
1027
|
head_repo = if has_head_repo(retrieved)
|
1022
1028
|
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
1023
|
-
retrieved['head']['repo']['name']
|
1024
|
-
false, false, false, false)
|
1029
|
+
retrieved['head']['repo']['name'])
|
1025
1030
|
end
|
1026
1031
|
|
1027
1032
|
head_commit = if not head_repo.nil?
|
@@ -1033,12 +1038,7 @@ module GHTorrent
|
|
1033
1038
|
|
1034
1039
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
1035
1040
|
|
1036
|
-
merged = if retrieved['merged_at'].nil? then
|
1037
|
-
# Check if the pr's commits are in the repository
|
1038
|
-
false
|
1039
|
-
else
|
1040
|
-
true
|
1041
|
-
end
|
1041
|
+
merged = if retrieved['merged_at'].nil? then false else true end
|
1042
1042
|
closed = if retrieved['closed_at'].nil? then false else true end
|
1043
1043
|
|
1044
1044
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
@@ -1054,40 +1054,65 @@ module GHTorrent
|
|
1054
1054
|
:intra_branch => is_intra_branch(retrieved),
|
1055
1055
|
:merged => merged
|
1056
1056
|
)
|
1057
|
-
|
1058
|
-
info log_msg(retrieved) + " was added"
|
1057
|
+
info log_msg(retrieved) + ' was added'
|
1059
1058
|
else
|
1060
|
-
debug log_msg(retrieved) +
|
1059
|
+
debug log_msg(retrieved) + ' exists'
|
1061
1060
|
end
|
1062
1061
|
|
1063
1062
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
1064
1063
|
:pullreq_id => pullreq_id)
|
1065
1064
|
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
add_history(pull_req[:id], date(retrieved['closed_at']),
|
1071
|
-
retrieved[@ext_uniq], 'closed') if closed
|
1072
|
-
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
1073
|
-
state) unless state.nil?
|
1065
|
+
# Add a fake (or not so fake) issue in the issues table to serve
|
1066
|
+
# as root for retrieving discussion comments for this pull request
|
1067
|
+
issues = @db[:issues]
|
1068
|
+
issue = issues.first(:pull_request_id => pull_req[:id])
|
1074
1069
|
|
1070
|
+
if issue.nil?
|
1071
|
+
issues.insert(:repo_id => base_repo[:id],
|
1072
|
+
:assignee_id => nil,
|
1073
|
+
:reporter_id => nil,
|
1074
|
+
:issue_id => pullreq_id,
|
1075
|
+
:pull_request => true,
|
1076
|
+
:pull_request_id => pull_req[:id],
|
1077
|
+
:created_at => date(retrieved['created_at']),
|
1078
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1079
|
+
debug 'Adding accompanying issue for ' + log_msg(retrieved)
|
1080
|
+
else
|
1081
|
+
debug 'Accompanying issue exists for ' + log_msg(retrieved)
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
if history
|
1085
|
+
# Actions on pull requests
|
1086
|
+
actor = if actor.nil? then pull_req_user[:login] else actor end
|
1087
|
+
opener = pull_req_user[:login]
|
1088
|
+
add_history(pull_req[:id], date(retrieved['created_at']),
|
1089
|
+
retrieved[@ext_uniq], 'opened', opener)
|
1090
|
+
# There is an additional merged_by field for merged pull requests
|
1091
|
+
merger = if retrieved['merged_by'].nil? then actor else retrieved['merged_by']['login'] end
|
1092
|
+
add_history(pull_req[:id], date(retrieved['merged_at']),
|
1093
|
+
retrieved[@ext_uniq], 'merged', merger) if (merged && state != 'merged')
|
1094
|
+
closer = if merged then merger else actor end
|
1095
|
+
add_history(pull_req[:id], date(retrieved['closed_at']),
|
1096
|
+
retrieved[@ext_uniq], 'closed', closer) if (closed && state != 'closed')
|
1097
|
+
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
1098
|
+
state, actor) unless state.nil?
|
1099
|
+
end
|
1075
1100
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
1076
1101
|
ensure_pullreq_comments(owner, repo, pullreq_id) if comments
|
1102
|
+
ensure_issue_comments(owner, repo, pullreq_id, pull_req[:id]) if comments
|
1077
1103
|
|
1078
|
-
|
1079
|
-
:pullreq_id => pullreq_id)
|
1104
|
+
pull_req
|
1080
1105
|
end
|
1081
1106
|
|
1082
1107
|
def ensure_pullreq_comments(owner, repo, pullreq_id)
|
1083
|
-
currepo = ensure_repo(owner, repo
|
1108
|
+
currepo = ensure_repo(owner, repo)
|
1084
1109
|
|
1085
1110
|
if currepo.nil?
|
1086
1111
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1087
1112
|
return
|
1088
1113
|
end
|
1089
1114
|
|
1090
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1115
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1091
1116
|
|
1092
1117
|
if pull_req.nil?
|
1093
1118
|
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1096,7 +1121,7 @@ module GHTorrent
|
|
1096
1121
|
|
1097
1122
|
retrieve_pull_req_comments(owner, repo, pullreq_id).reduce([]) do |acc, x|
|
1098
1123
|
|
1099
|
-
if @db[:pull_request_comments].first(:
|
1124
|
+
if @db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
1100
1125
|
:comment_id => x['id']).nil?
|
1101
1126
|
acc << x
|
1102
1127
|
else
|
@@ -1112,7 +1137,7 @@ module GHTorrent
|
|
1112
1137
|
# is done on retrieving a pull request. This has the side effect that
|
1113
1138
|
# commits might not be retrieved if a pullreqcomment event gets processed
|
1114
1139
|
# before the pullreq event, until the pullreq event has been processed
|
1115
|
-
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1140
|
+
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1116
1141
|
|
1117
1142
|
if pull_req.nil?
|
1118
1143
|
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
@@ -1159,7 +1184,7 @@ module GHTorrent
|
|
1159
1184
|
end
|
1160
1185
|
|
1161
1186
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1162
|
-
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1187
|
+
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false, false)
|
1163
1188
|
|
1164
1189
|
if pullreq.nil?
|
1165
1190
|
warn "GHTorrent: Pull request #{pullreq_id} does not exist for #{owner}/#{repo}"
|
@@ -1195,7 +1220,7 @@ module GHTorrent
|
|
1195
1220
|
# [owner] The user to which the project belongs
|
1196
1221
|
# [repo] The repository/project to find forks for
|
1197
1222
|
def ensure_forks(owner, repo, refresh = false)
|
1198
|
-
currepo = ensure_repo(owner, repo
|
1223
|
+
currepo = ensure_repo(owner, repo)
|
1199
1224
|
|
1200
1225
|
if currepo.nil?
|
1201
1226
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
@@ -1232,7 +1257,7 @@ module GHTorrent
|
|
1232
1257
|
fork_owner = fork['full_name'].split(/\//)[0]
|
1233
1258
|
fork_name = fork['full_name'].split(/\//)[1]
|
1234
1259
|
|
1235
|
-
r = ensure_repo(fork_owner, fork_name
|
1260
|
+
r = ensure_repo(fork_owner, fork_name)
|
1236
1261
|
|
1237
1262
|
if r.nil?
|
1238
1263
|
warn "GHTorrent: Failed to add #{fork_owner}/#{fork_name} as fork of #{owner}/#{repo}"
|
@@ -1244,7 +1269,7 @@ module GHTorrent
|
|
1244
1269
|
##
|
1245
1270
|
# Make sure all issues exist for a project
|
1246
1271
|
def ensure_issues(owner, repo, refresh = false)
|
1247
|
-
currepo = ensure_repo(owner, repo
|
1272
|
+
currepo = ensure_repo(owner, repo)
|
1248
1273
|
if currepo.nil?
|
1249
1274
|
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1250
1275
|
return
|
@@ -1268,10 +1293,11 @@ module GHTorrent
|
|
1268
1293
|
|
1269
1294
|
##
|
1270
1295
|
# Make sure that the issue exists
|
1271
|
-
def ensure_issue(owner, repo, issue_id, events = true,
|
1296
|
+
def ensure_issue(owner, repo, issue_id, events = true,
|
1297
|
+
comments = true, labels = true)
|
1272
1298
|
|
1273
1299
|
issues = @db[:issues]
|
1274
|
-
repository = ensure_repo(owner, repo
|
1300
|
+
repository = ensure_repo(owner, repo)
|
1275
1301
|
|
1276
1302
|
if repo.nil?
|
1277
1303
|
warn "Cannot find repo #{owner}/#{repo}"
|
@@ -1323,6 +1349,7 @@ module GHTorrent
|
|
1323
1349
|
end
|
1324
1350
|
ensure_issue_events(owner, repo, issue_id) if events
|
1325
1351
|
ensure_issue_comments(owner, repo, issue_id) if comments
|
1352
|
+
ensure_issue_labels(owner, repo, issue_id) if labels
|
1326
1353
|
issues.first(:issue_id => issue_id,
|
1327
1354
|
:repo_id => repository[:id])
|
1328
1355
|
end
|
@@ -1330,14 +1357,14 @@ module GHTorrent
|
|
1330
1357
|
##
|
1331
1358
|
# Retrieve and process all events for an issue
|
1332
1359
|
def ensure_issue_events(owner, repo, issue_id)
|
1333
|
-
currepo = ensure_repo(owner, repo
|
1360
|
+
currepo = ensure_repo(owner, repo)
|
1334
1361
|
|
1335
1362
|
if currepo.nil?
|
1336
1363
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1337
1364
|
return
|
1338
1365
|
end
|
1339
1366
|
|
1340
|
-
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1367
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1341
1368
|
if issue.nil?
|
1342
1369
|
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1343
1370
|
return
|
@@ -1359,7 +1386,7 @@ module GHTorrent
|
|
1359
1386
|
##
|
1360
1387
|
# Retrieve and process +event_id+ for an +issue_id+
|
1361
1388
|
def ensure_issue_event(owner, repo, issue_id, event_id)
|
1362
|
-
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1389
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1363
1390
|
|
1364
1391
|
if issue.nil?
|
1365
1392
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1394,7 +1421,7 @@ module GHTorrent
|
|
1394
1421
|
if retrieved['event'] == "assigned"
|
1395
1422
|
|
1396
1423
|
def update_assignee(owner, repo, issue, actor)
|
1397
|
-
@db[:issues]
|
1424
|
+
@db[:issues].first(:id => issue[:id]).update(:assignee_id => actor[:id])
|
1398
1425
|
info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
|
1399
1426
|
end
|
1400
1427
|
|
@@ -1432,16 +1459,24 @@ module GHTorrent
|
|
1432
1459
|
end
|
1433
1460
|
|
1434
1461
|
##
|
1435
|
-
# Retrieve and process all comments for an issue
|
1436
|
-
|
1437
|
-
|
1462
|
+
# Retrieve and process all comments for an issue.
|
1463
|
+
# If pull_req_id is not nil this means that we are only retrieving
|
1464
|
+
# comments for the pull request discussion for projects that don't have
|
1465
|
+
# issues enabled
|
1466
|
+
def ensure_issue_comments(owner, repo, issue_id, pull_req_id = nil)
|
1467
|
+
currepo = ensure_repo(owner, repo)
|
1438
1468
|
|
1439
1469
|
if currepo.nil?
|
1440
1470
|
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1441
1471
|
return
|
1442
1472
|
end
|
1443
1473
|
|
1444
|
-
issue =
|
1474
|
+
issue = if pull_req_id.nil?
|
1475
|
+
ensure_issue(owner, repo, issue_id, false, false, false)
|
1476
|
+
else
|
1477
|
+
@db[:issues].first(:pull_request_id => pull_req_id)
|
1478
|
+
end
|
1479
|
+
|
1445
1480
|
if issue.nil?
|
1446
1481
|
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1447
1482
|
return
|
@@ -1456,14 +1491,19 @@ module GHTorrent
|
|
1456
1491
|
acc
|
1457
1492
|
end
|
1458
1493
|
end.map { |x|
|
1459
|
-
ensure_issue_comment(owner, repo, issue_id, x['id'])
|
1494
|
+
ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
|
1460
1495
|
}
|
1461
1496
|
end
|
1462
1497
|
|
1463
1498
|
##
|
1464
1499
|
# Retrieve and process +comment_id+ for an +issue_id+
|
1465
|
-
def ensure_issue_comment(owner, repo, issue_id, comment_id
|
1466
|
-
|
1500
|
+
def ensure_issue_comment(owner, repo, issue_id, comment_id,
|
1501
|
+
pull_req_id = nil)
|
1502
|
+
issue = if pull_req_id.nil?
|
1503
|
+
ensure_issue(owner, repo, issue_id, false, false, false)
|
1504
|
+
else
|
1505
|
+
@db[:issues].first(:pull_request_id => pull_req_id)
|
1506
|
+
end
|
1467
1507
|
|
1468
1508
|
if issue.nil?
|
1469
1509
|
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
@@ -1502,6 +1542,117 @@ module GHTorrent
|
|
1502
1542
|
end
|
1503
1543
|
end
|
1504
1544
|
|
1545
|
+
##
|
1546
|
+
# Retrieve repository issue labels
|
1547
|
+
def ensure_labels(owner, repo, refresh = false)
|
1548
|
+
currepo = ensure_repo(owner, repo)
|
1549
|
+
|
1550
|
+
repo_labels = @db[:repo_labels].filter(:repo_id => currepo[:id]).all
|
1551
|
+
|
1552
|
+
retrieve_repo_labels(owner, repo, refresh).reduce([]) do |acc, x|
|
1553
|
+
if repo_labels.find {|y| y[:name] == x['name']}.nil?
|
1554
|
+
acc << x
|
1555
|
+
else
|
1556
|
+
acc
|
1557
|
+
end
|
1558
|
+
end.map { |x| ensure_repo_label(owner, repo, x['name']) }
|
1559
|
+
end
|
1560
|
+
|
1561
|
+
##
|
1562
|
+
# Retrieve a single repository issue label by name
|
1563
|
+
def ensure_repo_label(owner, repo, name)
|
1564
|
+
currepo = ensure_repo(owner, repo)
|
1565
|
+
|
1566
|
+
if currepo.nil?
|
1567
|
+
warn "GHTorrent: Repo #{owner}/#{repo} does not exist"
|
1568
|
+
return
|
1569
|
+
end
|
1570
|
+
|
1571
|
+
label = @db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
|
1572
|
+
|
1573
|
+
if label.nil?
|
1574
|
+
retrieved = retrieve_repo_label(owner, repo, name)
|
1575
|
+
|
1576
|
+
if retrieved.nil?
|
1577
|
+
warn "GHTorrent: Repo label #{owner}/#{repo} -> #{name} does not exist"
|
1578
|
+
return
|
1579
|
+
end
|
1580
|
+
|
1581
|
+
@db[:repo_labels].insert(
|
1582
|
+
:repo_id => currepo[:id],
|
1583
|
+
:name => name,
|
1584
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1585
|
+
)
|
1586
|
+
|
1587
|
+
info "GHTorrent: Added repo label #{owner}/#{repo} -> #{name}"
|
1588
|
+
@db[:repo_labels].first(:repo_id => currepo[:id], :name => name)
|
1589
|
+
else
|
1590
|
+
label
|
1591
|
+
end
|
1592
|
+
end
|
1593
|
+
|
1594
|
+
##
|
1595
|
+
# Ensure that all labels have been assigned to the issue
|
1596
|
+
def ensure_issue_labels(owner, repo, issue_id)
|
1597
|
+
|
1598
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1599
|
+
|
1600
|
+
if issue.nil?
|
1601
|
+
warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
|
1602
|
+
return
|
1603
|
+
end
|
1604
|
+
|
1605
|
+
issue_labels = @db.from(:issue_labels, :repo_labels)\
|
1606
|
+
.where(:issue_labels__label_id => :repo_labels__id)\
|
1607
|
+
.where(:issue_labels__issue_id => issue[:id])\
|
1608
|
+
.select(:repo_labels__name).all
|
1609
|
+
|
1610
|
+
retrieve_issue_labels(owner, repo, issue_id).reduce([]) do |acc, x|
|
1611
|
+
if issue_labels.find {|y| y[:name] == x['name']}.nil?
|
1612
|
+
acc << x
|
1613
|
+
else
|
1614
|
+
acc
|
1615
|
+
end
|
1616
|
+
end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
|
1617
|
+
|
1618
|
+
end
|
1619
|
+
|
1620
|
+
##
|
1621
|
+
# Ensure that a specific label has been assigned to the issue
|
1622
|
+
def ensure_issue_label(owner, repo, issue_id, name)
|
1623
|
+
|
1624
|
+
issue = ensure_issue(owner, repo, issue_id, false, false, false)
|
1625
|
+
|
1626
|
+
if issue.nil?
|
1627
|
+
warn "GHTorrent: Issue #{owner}/#{repo} -> #{issue_id} does not exist"
|
1628
|
+
return
|
1629
|
+
end
|
1630
|
+
|
1631
|
+
label = ensure_repo_label(owner, repo, name)
|
1632
|
+
|
1633
|
+
if label.nil?
|
1634
|
+
warn "GHTorrent: Label #{owner}/#{repo} -> #{name} does not exist"
|
1635
|
+
return
|
1636
|
+
end
|
1637
|
+
|
1638
|
+
issue_lbl = @db[:issue_labels].first(:label_id => label[:id],
|
1639
|
+
:issue_id => issue[:id])
|
1640
|
+
|
1641
|
+
if issue_lbl.nil?
|
1642
|
+
|
1643
|
+
@db[:issue_labels].insert(
|
1644
|
+
:label_id => label[:id],
|
1645
|
+
:issue_id => issue[:id],
|
1646
|
+
)
|
1647
|
+
info "GHTorrent: Added issue label #{name} to issue #{owner}/#{repo} -> #{issue_id}"
|
1648
|
+
@db[:issue_labels].first(:label_id => label[:id],
|
1649
|
+
:issue_id => issue[:id])
|
1650
|
+
else
|
1651
|
+
issue_lbl
|
1652
|
+
end
|
1653
|
+
|
1654
|
+
end
|
1655
|
+
|
1505
1656
|
# Run a block in a DB transaction. Exceptions trigger transaction rollback
|
1506
1657
|
# and are rethrown.
|
1507
1658
|
def transaction(&block)
|
@@ -1537,7 +1688,7 @@ module GHTorrent
|
|
1537
1688
|
author = commit_user(c['author'], c['commit']['author'])
|
1538
1689
|
commiter = commit_user(c['committer'], c['commit']['committer'])
|
1539
1690
|
|
1540
|
-
repository = ensure_repo(user, repo
|
1691
|
+
repository = ensure_repo(user, repo)
|
1541
1692
|
|
1542
1693
|
if repository.nil?
|
1543
1694
|
warn "GHTorrent: repository #{user}/#{repo} deleted"
|