ghtorrent 0.8 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +9 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +32 -20
- data/README.md +50 -34
- data/Rakefile +3 -3
- data/bin/ght-retrieve-dependents +6 -0
- data/bin/ght-retrieve-repos +6 -0
- data/lib/ghtorrent.rb +3 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +3 -3
- data/lib/ghtorrent/api_client.rb +6 -4
- data/lib/ghtorrent/command.rb +2 -28
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +3 -2
- data/lib/ghtorrent/commands/ght_load.rb +7 -5
- data/lib/ghtorrent/commands/ght_retrieve_dependents.rb +84 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +1 -70
- data/lib/ghtorrent/commands/ght_retrieve_repos.rb +206 -0
- data/lib/ghtorrent/commands/ght_retrieve_user.rb +9 -2
- data/lib/ghtorrent/ghtorrent.rb +103 -82
- data/lib/ghtorrent/logging.rb +2 -1
- data/lib/ghtorrent/migrations/015_fix_table_issue_labels.rb +17 -5
- data/lib/ghtorrent/migrations/016_add_actor_pull_request_history.rb +1 -1
- data/lib/ghtorrent/retriever.rb +8 -17
- data/lib/ghtorrent/settings.rb +9 -5
- data/lib/ghtorrent/transacted_ghtorrent.rb +91 -0
- data/lib/version.rb +1 -1
- data/spec/api_client_spec.rb +42 -0
- data/spec/spec_helper.rb +21 -0
- metadata +46 -52
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'amqp'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
require 'ghtorrent/ghtorrent'
|
7
|
+
require 'ghtorrent/settings'
|
8
|
+
require 'ghtorrent/logging'
|
9
|
+
require 'ghtorrent/command'
|
10
|
+
|
11
|
+
class GHTRetrieveDependents < GHTorrent::Command
|
12
|
+
|
13
|
+
include GHTorrent::Settings
|
14
|
+
include GHTorrent::Logging
|
15
|
+
include GHTorrent::Persister
|
16
|
+
|
17
|
+
def prepare_options(options)
|
18
|
+
options.banner <<-BANNER
|
19
|
+
Recursively retrieve all dependent entities for a specific entity
|
20
|
+
#{command_name} [options] entity entity-id
|
21
|
+
|
22
|
+
#{command_name} entity is one of (in parenthesis the entity-id fields):
|
23
|
+
commit (owner repo sha)
|
24
|
+
issue (owner repo issue_id)
|
25
|
+
pull_request (owner repo pullreq_id)
|
26
|
+
#{command_name}
|
27
|
+
BANNER
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
REQ_ARGS = {
|
32
|
+
:commit => 3,
|
33
|
+
:issue => 3,
|
34
|
+
:pull_request => 3
|
35
|
+
}
|
36
|
+
|
37
|
+
def logger
|
38
|
+
ghtorrent.logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def persister
|
42
|
+
@persister ||= connect(:mongo, settings)
|
43
|
+
@persister
|
44
|
+
end
|
45
|
+
|
46
|
+
def ghtorrent
|
47
|
+
@gh ||= GHTorrent::Mirror.new(@settings)
|
48
|
+
@gh
|
49
|
+
end
|
50
|
+
|
51
|
+
def db
|
52
|
+
@db ||= ghtorrent.get_db
|
53
|
+
@db
|
54
|
+
end
|
55
|
+
|
56
|
+
def go
|
57
|
+
db
|
58
|
+
type = case ARGV[0]
|
59
|
+
when 'commit'
|
60
|
+
:commit
|
61
|
+
when 'issue'
|
62
|
+
:issue
|
63
|
+
when 'pull_request'
|
64
|
+
:pull_request
|
65
|
+
else
|
66
|
+
Trollop::die("Don't know how to handle #{ARGV[0]}")
|
67
|
+
end
|
68
|
+
unless ARGV.size - 1 == REQ_ARGS[type]
|
69
|
+
Trollop::die("#{ARGV[0]} requires #{REQ_ARGS[type]} arguments")
|
70
|
+
end
|
71
|
+
|
72
|
+
case type
|
73
|
+
when :commit
|
74
|
+
ghtorrent.ensure_commit(ARGV[2], ARGV[3], ARGV[1], true)
|
75
|
+
when :issue
|
76
|
+
ghtorrent.ensure_issue(ARGV[1], ARGV[2], ARGV[3], true, true, true)
|
77
|
+
when :pull_request
|
78
|
+
ghtorrent.ensure_pull_request(ARGV[1], ARGV[2], ARGV[3], true, true, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
@@ -64,12 +64,7 @@ An efficient way to get all data for a single repo
|
|
64
64
|
repo = repo_entry[:name]
|
65
65
|
|
66
66
|
def send_message(function, user, repo)
|
67
|
-
|
68
|
-
ght.send(function, user, repo, refresh = true)
|
69
|
-
rescue Exception => e
|
70
|
-
puts STDERR, e.message
|
71
|
-
puts STDERR, e.backtrace
|
72
|
-
end
|
67
|
+
ght.send(function, user, repo, refresh = true)
|
73
68
|
end
|
74
69
|
|
75
70
|
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
@@ -86,67 +81,3 @@ An efficient way to get all data for a single repo
|
|
86
81
|
end
|
87
82
|
end
|
88
83
|
|
89
|
-
# A version of the GHTorrent class that creates a transaction per processed
|
90
|
-
# item
|
91
|
-
class TransactedGHTorrent < GHTorrent::Mirror
|
92
|
-
|
93
|
-
def ensure_commit(repo, sha, user, comments = true)
|
94
|
-
check_transaction do
|
95
|
-
super(repo, sha, user, comments)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def ensure_fork(owner, repo, fork_id)
|
100
|
-
check_transaction do
|
101
|
-
super(owner, repo, fork_id)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def ensure_pull_request(owner, repo, pullreq_id,
|
106
|
-
comments = true, commits = true,
|
107
|
-
state = nil, created_at = nil)
|
108
|
-
check_transaction do
|
109
|
-
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
|
114
|
-
check_transaction do
|
115
|
-
super(owner, repo, issue_id, events, comments, labels)
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
def ensure_project_member(owner, repo, new_member, date_added)
|
120
|
-
check_transaction do
|
121
|
-
super(owner, repo, new_member, date_added)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
126
|
-
check_transaction do
|
127
|
-
super(owner, repo, watcher, date_added)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def ensure_repo_label(owner, repo, name)
|
132
|
-
check_transaction do
|
133
|
-
super(owner, repo, name)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
def check_transaction(&block)
|
138
|
-
begin
|
139
|
-
if @db.in_transaction?
|
140
|
-
yield block
|
141
|
-
else
|
142
|
-
transaction do
|
143
|
-
yield block
|
144
|
-
end
|
145
|
-
end
|
146
|
-
rescue Exception => e
|
147
|
-
puts STDERR, e.message
|
148
|
-
puts STDERR, e.backtrace
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
require 'ghtorrent/ghtorrent'
|
2
|
+
require 'ghtorrent/settings'
|
3
|
+
require 'ghtorrent/logging'
|
4
|
+
require 'ghtorrent/command'
|
5
|
+
require 'ghtorrent/retriever'
|
6
|
+
|
7
|
+
class GHTRetrieveRepos < GHTorrent::Command
|
8
|
+
|
9
|
+
include GHTorrent::Settings
|
10
|
+
include GHTorrent::Logging
|
11
|
+
|
12
|
+
def logger
|
13
|
+
@logger ||= Logger.new(STDOUT)
|
14
|
+
@logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def prepare_options(options)
|
18
|
+
options.banner <<-BANNER
|
19
|
+
Retrieve data for multiple repos in parallel. To work, it requires
|
20
|
+
a mapping file formatted as follows:
|
21
|
+
|
22
|
+
IP UNAME PASSWD NUM_PROCS where
|
23
|
+
|
24
|
+
IP = address to use for outgoing requests (use 0.0.0.0 on non-multihomed hosts)
|
25
|
+
UNAME = Github user name to use for outgoing requests
|
26
|
+
PASSWD = Github password to use for outgoing requests
|
27
|
+
NUM_PROCS = Number of processes to spawn for this IP/UNAME combination
|
28
|
+
|
29
|
+
Values in the config.yaml file set with the -c command are overriden.
|
30
|
+
|
31
|
+
#{command_name} [options] mapping-file
|
32
|
+
|
33
|
+
BANNER
|
34
|
+
options.opt :queue, 'Queue to retrieve project names from',
|
35
|
+
:short => 'q', :default => 'retrieve-repo', :type => :string
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate
|
40
|
+
super
|
41
|
+
Trollop::die 'Argument mapping-file is required' unless not args[0].nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def go
|
45
|
+
|
46
|
+
configs = File.open(ARGV[0]).readlines.map do |line|
|
47
|
+
next if line =~ /^#/
|
48
|
+
ip,name,passwd,instances = line.strip.split(/ /)
|
49
|
+
(1..instances.to_i).map do |i|
|
50
|
+
newcfg = self.settings.clone
|
51
|
+
newcfg = override_config(newcfg, :attach_ip, ip)
|
52
|
+
newcfg = override_config(newcfg, :github_username, name)
|
53
|
+
newcfg = override_config(newcfg, :github_passwd, passwd)
|
54
|
+
newcfg = override_config(newcfg, :mirror_history_pages_back, 1000)
|
55
|
+
newcfg = override_config(newcfg, :mirror_commit_pages_new_repo, 1000)
|
56
|
+
newcfg
|
57
|
+
end
|
58
|
+
end.flatten.select{|x| !x.nil?}
|
59
|
+
|
60
|
+
children = configs.map do |config|
|
61
|
+
pid = Process::fork
|
62
|
+
|
63
|
+
if pid.nil?
|
64
|
+
retriever = GHTRepoRetriever.new(config, options[:queue])
|
65
|
+
|
66
|
+
Signal.trap('TERM') {
|
67
|
+
retriever.stop
|
68
|
+
}
|
69
|
+
|
70
|
+
retriever.run
|
71
|
+
exit
|
72
|
+
else
|
73
|
+
debug "Parent #{Process.pid} forked child #{pid}"
|
74
|
+
pid
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
debug 'Waiting for children'
|
79
|
+
begin
|
80
|
+
children.each do |pid|
|
81
|
+
debug "Waiting for child #{pid}"
|
82
|
+
Process.waitpid(pid, 0)
|
83
|
+
debug "Child #{pid} exited"
|
84
|
+
end
|
85
|
+
rescue Interrupt
|
86
|
+
debug 'Stopping'
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class GHTRepoRetriever
|
92
|
+
|
93
|
+
include GHTorrent::Settings
|
94
|
+
include GHTorrent::Retriever
|
95
|
+
include GHTorrent::Persister
|
96
|
+
|
97
|
+
def initialize(config, queue)
|
98
|
+
@config = config
|
99
|
+
@queue = queue
|
100
|
+
end
|
101
|
+
|
102
|
+
def logger
|
103
|
+
ght.logger
|
104
|
+
end
|
105
|
+
|
106
|
+
def persister
|
107
|
+
@persister ||= connect(:mongo, settings)
|
108
|
+
@persister
|
109
|
+
end
|
110
|
+
|
111
|
+
def ext_uniq
|
112
|
+
@ext_uniq ||= config(:uniq_id)
|
113
|
+
@ext_uniq
|
114
|
+
end
|
115
|
+
|
116
|
+
def ght
|
117
|
+
@ght ||= TransactedGhtorrent.new(@config)
|
118
|
+
@ght
|
119
|
+
end
|
120
|
+
|
121
|
+
def settings
|
122
|
+
@config
|
123
|
+
end
|
124
|
+
|
125
|
+
def run
|
126
|
+
AMQP.start(:host => config(:amqp_host),
|
127
|
+
:port => config(:amqp_port),
|
128
|
+
:username => config(:amqp_username),
|
129
|
+
:password => config(:amqp_password)) do |connection|
|
130
|
+
|
131
|
+
connection.on_tcp_connection_loss do |conn, settings|
|
132
|
+
warn 'AMQP: Network failure. Trying to reconnect...'
|
133
|
+
conn.reconnect(false, 2)
|
134
|
+
end
|
135
|
+
|
136
|
+
channel = AMQP::Channel.new(connection)
|
137
|
+
channel.auto_recovery = true
|
138
|
+
channel.prefetch(1)
|
139
|
+
|
140
|
+
channel.on_error do |ch, channel_close|
|
141
|
+
warn 'AMQP: Channel closed. Should reconnect by itself'
|
142
|
+
raise channel_close.reply_text
|
143
|
+
end
|
144
|
+
|
145
|
+
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
146
|
+
:auto_delete => false)
|
147
|
+
|
148
|
+
queue = channel.queue(@queue, {:durable => true}).bind(exchange)
|
149
|
+
|
150
|
+
queue.subscribe(:ack => true) do |headers, msg|
|
151
|
+
owner,repo = msg.split(/ /)
|
152
|
+
user_entry = ght.transaction { ght.ensure_user(owner, false, false) }
|
153
|
+
|
154
|
+
if user_entry.nil?
|
155
|
+
warn("Cannot find user #{owner}")
|
156
|
+
headers.ack
|
157
|
+
next
|
158
|
+
end
|
159
|
+
|
160
|
+
repo_entry = ght.transaction { ght.ensure_repo(owner, repo) }
|
161
|
+
|
162
|
+
if repo_entry.nil?
|
163
|
+
warn("Cannot find repository #{owner}/#{repo}")
|
164
|
+
headers.ack
|
165
|
+
next
|
166
|
+
end
|
167
|
+
|
168
|
+
debug("Retrieving repo #{owner}/#{repo}")
|
169
|
+
def send_message(function, user, repo)
|
170
|
+
ght.send(function, user, repo, refresh = false)
|
171
|
+
end
|
172
|
+
|
173
|
+
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
174
|
+
ensure_issues ensure_project_members ensure_watchers ensure_labels)
|
175
|
+
|
176
|
+
functions.each do |x|
|
177
|
+
|
178
|
+
begin
|
179
|
+
send_message(x, owner, repo)
|
180
|
+
rescue Interrupt
|
181
|
+
stop
|
182
|
+
rescue Exception
|
183
|
+
warn("Error processing #{x} for #{owner}/#{repo}")
|
184
|
+
next
|
185
|
+
end
|
186
|
+
|
187
|
+
if @stop
|
188
|
+
break
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
headers.ack
|
193
|
+
debug("Finished processing #{owner}/#{repo}")
|
194
|
+
if @stop
|
195
|
+
connection.disconnect{AMQP.stop { EM.stop }}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def stop
|
202
|
+
warn('Stop flag set, waiting for operations to finish')
|
203
|
+
@stop = true
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
@@ -42,7 +42,7 @@ An efficient way to get all data for a single user
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
functions = %w(ensure_user_followers ensure_orgs)
|
45
|
+
functions = %w(ensure_user_followers ensure_orgs ensure_org)
|
46
46
|
|
47
47
|
if ARGV[2].nil?
|
48
48
|
functions.each do |x|
|
@@ -69,4 +69,11 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
69
69
|
super(user)
|
70
70
|
end
|
71
71
|
end
|
72
|
-
|
72
|
+
|
73
|
+
def ensure_org(user, members = true)
|
74
|
+
check_transaction do
|
75
|
+
super(user, members)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -80,9 +80,9 @@ module GHTorrent
|
|
80
80
|
# [user] The login of the repository owner
|
81
81
|
# [repo] The name of the repository
|
82
82
|
# [comment_id] The login of the member to add
|
83
|
-
def get_commit_comment(user, repo, comment_id)
|
83
|
+
def get_commit_comment(user, repo, sha, comment_id)
|
84
84
|
transaction do
|
85
|
-
ensure_commit_comment(user, repo, comment_id)
|
85
|
+
ensure_commit_comment(user, repo, sha, comment_id)
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
@@ -226,8 +226,8 @@ module GHTorrent
|
|
226
226
|
end
|
227
227
|
|
228
228
|
commits.map do |c|
|
229
|
-
ensure_commit(repo, c['sha'], user)
|
230
|
-
end
|
229
|
+
save{ensure_commit(repo, c['sha'], user)}
|
230
|
+
end.select{|x| !x.nil?}
|
231
231
|
end
|
232
232
|
|
233
233
|
##
|
@@ -237,35 +237,37 @@ module GHTorrent
|
|
237
237
|
commits = @db[:commits]
|
238
238
|
parents = @db[:commit_parents]
|
239
239
|
commit['parents'].map do |p|
|
240
|
-
|
241
|
-
|
242
|
-
|
240
|
+
save do
|
241
|
+
url = p['url'].split(/\//)
|
242
|
+
this = commits.first(:sha => commit['sha'])
|
243
|
+
parent = commits.first(:sha => url[7])
|
244
|
+
|
245
|
+
if parent.nil?
|
246
|
+
c = retrieve_commit(url[5], url[7], url[4])
|
247
|
+
if c.nil?
|
248
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
249
|
+
next
|
250
|
+
end
|
251
|
+
parent = store_commit(c, url[5], url[4])
|
252
|
+
end
|
243
253
|
|
244
|
-
|
245
|
-
c = retrieve_commit(url[5], url[7], url[4])
|
246
|
-
if c.nil?
|
254
|
+
if parent.nil?
|
247
255
|
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
248
256
|
next
|
249
257
|
end
|
250
|
-
parent = store_commit(c, url[5], url[4])
|
251
|
-
end
|
252
258
|
|
253
|
-
|
254
|
-
|
255
|
-
next
|
256
|
-
end
|
259
|
+
if parents.first(:commit_id => this[:id],
|
260
|
+
:parent_id => parent[:id]).nil?
|
257
261
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
262
|
+
parents.insert(:commit_id => this[:id],
|
263
|
+
:parent_id => parent[:id])
|
264
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
265
|
+
else
|
266
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
267
|
+
end
|
268
|
+
parents.first(:commit_id => this[:id], :parent_id => parent[:id])
|
266
269
|
end
|
267
|
-
|
268
|
-
end
|
270
|
+
end.select{|x| !x.nil?}
|
269
271
|
end
|
270
272
|
|
271
273
|
##
|
@@ -479,7 +481,7 @@ module GHTorrent
|
|
479
481
|
else
|
480
482
|
acc
|
481
483
|
end
|
482
|
-
end.map { |x| ensure_user_follower(followed, x['login']) }
|
484
|
+
end.map { |x| save{ensure_user_follower(followed, x['login']) }}.select{|x| !x.nil?}
|
483
485
|
end
|
484
486
|
|
485
487
|
##
|
@@ -666,7 +668,7 @@ module GHTorrent
|
|
666
668
|
else
|
667
669
|
acc
|
668
670
|
end
|
669
|
-
end.map { |x| ensure_project_member(user, repo, x['login'], time) }
|
671
|
+
end.map { |x| save{ensure_project_member(user, repo, x['login'], time) }}.select{|x| !x.nil?}
|
670
672
|
end
|
671
673
|
|
672
674
|
##
|
@@ -722,7 +724,7 @@ module GHTorrent
|
|
722
724
|
# [user] The login name of the user to check the organizations for
|
723
725
|
#
|
724
726
|
def ensure_orgs(user)
|
725
|
-
retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
|
727
|
+
retrieve_orgs(user).map{|o| save{ensure_participation(user, o['login'])}}.select{|x| !x.nil?}
|
726
728
|
end
|
727
729
|
|
728
730
|
##
|
@@ -735,6 +737,12 @@ module GHTorrent
|
|
735
737
|
#
|
736
738
|
def ensure_participation(user, organization, members = true)
|
737
739
|
org = ensure_org(organization, members)
|
740
|
+
|
741
|
+
if org.nil?
|
742
|
+
warn "Organization #{organization} does not exit"
|
743
|
+
return
|
744
|
+
end
|
745
|
+
|
738
746
|
usr = ensure_user(user, false, false)
|
739
747
|
|
740
748
|
org_members = @db[:organization_members]
|
@@ -758,22 +766,26 @@ module GHTorrent
|
|
758
766
|
# ==Parameters:
|
759
767
|
# [organization] The login name of the organization
|
760
768
|
#
|
761
|
-
def ensure_org(organization, members)
|
769
|
+
def ensure_org(organization, members = true)
|
762
770
|
org = @db[:users].first(:login => organization, :type => 'org')
|
763
771
|
|
764
772
|
if org.nil?
|
765
773
|
org = ensure_user(organization, false, false)
|
774
|
+
|
775
|
+
# Not an organization, don't go ahead
|
776
|
+
if org[:type] != 'ORG'
|
777
|
+
warn "GHTorrent: Account #{organization} is not an organization"
|
778
|
+
return nil
|
779
|
+
end
|
780
|
+
|
766
781
|
if members
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
782
|
+
retrieve_org_members(organization).map do |x|
|
783
|
+
ensure_participation(ensure_user(x['login'], false, false)[:login],
|
784
|
+
organization, false)
|
785
|
+
end
|
771
786
|
end
|
772
|
-
org
|
773
|
-
else
|
774
|
-
debug "GHTorrent: Organization #{organization} exists"
|
775
|
-
org
|
776
787
|
end
|
788
|
+
org
|
777
789
|
end
|
778
790
|
|
779
791
|
##
|
@@ -796,30 +808,23 @@ module GHTorrent
|
|
796
808
|
end
|
797
809
|
end
|
798
810
|
|
799
|
-
not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
|
811
|
+
not_saved.map{|x| save{ensure_commit_comment(user, repo, sha, x['id'])}}.select{|x| !x.nil?}
|
800
812
|
end
|
801
813
|
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
# ==Parameters:
|
806
|
-
# [user] The login name of the organization
|
807
|
-
# [repo] The repository containing the commit whose comment will be retrieved
|
808
|
-
# [id] The comment id to retrieve
|
809
|
-
# [created_at] The timestamp that the comment was made.
|
810
|
-
def ensure_commit_comment(user, repo, id)
|
811
|
-
stored_comment = @db[:commit_comments].first(:comment_id => id)
|
814
|
+
|
815
|
+
def ensure_commit_comment(owner, repo, sha, comment_id)
|
816
|
+
stored_comment = @db[:commit_comments].first(:comment_id => comment_id)
|
812
817
|
|
813
818
|
if stored_comment.nil?
|
814
|
-
retrieved = retrieve_commit_comment(
|
819
|
+
retrieved = retrieve_commit_comment(owner, repo, sha, comment_id)
|
815
820
|
|
816
821
|
if retrieved.nil?
|
817
|
-
warn "GHTorrent: Commit comment #{id} deleted"
|
822
|
+
warn "GHTorrent: Commit comment #{sha}->#{id} deleted"
|
818
823
|
return
|
819
824
|
end
|
820
825
|
|
821
|
-
commit = ensure_commit(repo,
|
822
|
-
user = ensure_user(user, false, false)
|
826
|
+
commit = ensure_commit(repo, sha, owner, false)
|
827
|
+
user = ensure_user(retrieved['user']['login'], false, false)
|
823
828
|
@db[:commit_comments].insert(
|
824
829
|
:commit_id => commit[:id],
|
825
830
|
:user_id => user[:id],
|
@@ -830,11 +835,11 @@ module GHTorrent
|
|
830
835
|
:ext_ref_id => retrieved[@ext_uniq],
|
831
836
|
:created_at => date(retrieved['created_at'])
|
832
837
|
)
|
833
|
-
info "GHTorrent: Added commit comment #{
|
838
|
+
info "GHTorrent: Added commit comment #{sha} -> #{retrieved['id']} by #{user[:login]}"
|
834
839
|
else
|
835
|
-
info "GHTorrent: Commit comment #{id} exists"
|
840
|
+
info "GHTorrent: Commit comment #{sha} -> #{id} exists"
|
836
841
|
end
|
837
|
-
@db[:commit_comments].first(:comment_id =>
|
842
|
+
@db[:commit_comments].first(:comment_id => comment_id)
|
838
843
|
end
|
839
844
|
|
840
845
|
##
|
@@ -859,7 +864,7 @@ module GHTorrent
|
|
859
864
|
else
|
860
865
|
acc
|
861
866
|
end
|
862
|
-
end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
|
867
|
+
end.map { |x| save{ensure_watcher(owner, repo, x['login'], nil) }}.select{|x| !x.nil?}
|
863
868
|
end
|
864
869
|
|
865
870
|
##
|
@@ -934,7 +939,7 @@ module GHTorrent
|
|
934
939
|
end
|
935
940
|
end
|
936
941
|
|
937
|
-
raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
942
|
+
raw_pull_reqs.map { |x| save { ensure_pull_request(owner, repo, x['number']) } }.select { |x| !x.nil? }
|
938
943
|
end
|
939
944
|
|
940
945
|
##
|
@@ -966,7 +971,7 @@ module GHTorrent
|
|
966
971
|
info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
|
967
972
|
else
|
968
973
|
entry.update(:actor_id => user[:id])
|
969
|
-
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
974
|
+
info "GHTorrent: Pull request (#{id}) history entry (#{act}) by (#{actor}) timestamp #{ts} exists"
|
970
975
|
end
|
971
976
|
end
|
972
977
|
|
@@ -1128,8 +1133,8 @@ module GHTorrent
|
|
1128
1133
|
acc
|
1129
1134
|
end
|
1130
1135
|
end.map { |x|
|
1131
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
|
1132
|
-
}
|
1136
|
+
save{ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])}
|
1137
|
+
}.select{|x| !x.nil?}
|
1133
1138
|
end
|
1134
1139
|
|
1135
1140
|
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
@@ -1191,26 +1196,28 @@ module GHTorrent
|
|
1191
1196
|
return
|
1192
1197
|
end
|
1193
1198
|
|
1194
|
-
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1199
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]) { |acc, c|
|
1195
1200
|
next if c.nil?
|
1196
1201
|
head_repo_owner = c['url'].split(/\//)[4]
|
1197
1202
|
head_repo_name = c['url'].split(/\//)[5]
|
1198
1203
|
x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
|
1199
1204
|
acc << x if not x.nil?
|
1200
1205
|
acc
|
1201
|
-
}.map
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1206
|
+
}.map do |c|
|
1207
|
+
save do
|
1208
|
+
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1209
|
+
:commit_id => c[:id])
|
1210
|
+
if exists.nil?
|
1211
|
+
@db[:pull_request_commits].insert(:pull_request_id => pullreq[:id],
|
1212
|
+
:commit_id => c[:id])
|
1213
|
+
|
1214
|
+
info "GHTorrent: Added commit #{c[:sha]} to pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1215
|
+
else
|
1216
|
+
debug "GHTorrent: Commit #{c[:sha]} exists in pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1217
|
+
exists
|
1218
|
+
end
|
1212
1219
|
end
|
1213
|
-
}
|
1220
|
+
end.select{|x| !x.nil?}
|
1214
1221
|
end
|
1215
1222
|
|
1216
1223
|
##
|
@@ -1241,7 +1248,7 @@ module GHTorrent
|
|
1241
1248
|
else
|
1242
1249
|
acc
|
1243
1250
|
end
|
1244
|
-
end.map { |x| ensure_fork(owner, repo, x['id']) }
|
1251
|
+
end.map { |x| save{ensure_fork(owner, repo, x['id']) }}.select{|x| !x.nil?}
|
1245
1252
|
end
|
1246
1253
|
|
1247
1254
|
##
|
@@ -1288,7 +1295,7 @@ module GHTorrent
|
|
1288
1295
|
end
|
1289
1296
|
end
|
1290
1297
|
|
1291
|
-
raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
|
1298
|
+
raw_issues.map { |x| save { ensure_issue(owner, repo, x['number']) } }.select { |x| !x.nil? }
|
1292
1299
|
end
|
1293
1300
|
|
1294
1301
|
##
|
@@ -1379,8 +1386,8 @@ module GHTorrent
|
|
1379
1386
|
acc
|
1380
1387
|
end
|
1381
1388
|
end.map { |x|
|
1382
|
-
ensure_issue_event(owner, repo, issue_id, x['id'])
|
1383
|
-
}
|
1389
|
+
save{ensure_issue_event(owner, repo, issue_id, x['id'])}
|
1390
|
+
}.select{|x| !x.nil?}
|
1384
1391
|
end
|
1385
1392
|
|
1386
1393
|
##
|
@@ -1491,8 +1498,8 @@ module GHTorrent
|
|
1491
1498
|
acc
|
1492
1499
|
end
|
1493
1500
|
end.map { |x|
|
1494
|
-
ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
|
1495
|
-
}
|
1501
|
+
save{ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)}
|
1502
|
+
}.select{|x| !x.nil?}
|
1496
1503
|
end
|
1497
1504
|
|
1498
1505
|
##
|
@@ -1555,7 +1562,7 @@ module GHTorrent
|
|
1555
1562
|
else
|
1556
1563
|
acc
|
1557
1564
|
end
|
1558
|
-
end.map { |x| ensure_repo_label(owner, repo, x['name']) }
|
1565
|
+
end.map { |x| save { ensure_repo_label(owner, repo, x['name']) } }.select { |x| !x.nil? }
|
1559
1566
|
end
|
1560
1567
|
|
1561
1568
|
##
|
@@ -1613,7 +1620,7 @@ module GHTorrent
|
|
1613
1620
|
else
|
1614
1621
|
acc
|
1615
1622
|
end
|
1616
|
-
end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
|
1623
|
+
end.map { |x| save{ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }}.select{|x| !x.nil?}
|
1617
1624
|
|
1618
1625
|
end
|
1619
1626
|
|
@@ -1677,6 +1684,20 @@ module GHTorrent
|
|
1677
1684
|
end
|
1678
1685
|
end
|
1679
1686
|
|
1687
|
+
def save(&block)
|
1688
|
+
if config(:rescue_loops) == 'true'
|
1689
|
+
begin
|
1690
|
+
yield block
|
1691
|
+
rescue Exception => e
|
1692
|
+
@logger.error e.message
|
1693
|
+
@logger.error e.backtrace.join("\n")
|
1694
|
+
nil
|
1695
|
+
end
|
1696
|
+
else
|
1697
|
+
yield block
|
1698
|
+
end
|
1699
|
+
end
|
1700
|
+
|
1680
1701
|
private
|
1681
1702
|
|
1682
1703
|
# Store a commit contained in a hash. First check whether the commit exists.
|