ghtorrent 0.8 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +9 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +32 -20
- data/README.md +50 -34
- data/Rakefile +3 -3
- data/bin/ght-retrieve-dependents +6 -0
- data/bin/ght-retrieve-repos +6 -0
- data/lib/ghtorrent.rb +3 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +3 -3
- data/lib/ghtorrent/api_client.rb +6 -4
- data/lib/ghtorrent/command.rb +2 -28
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +3 -2
- data/lib/ghtorrent/commands/ght_load.rb +7 -5
- data/lib/ghtorrent/commands/ght_retrieve_dependents.rb +84 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +1 -70
- data/lib/ghtorrent/commands/ght_retrieve_repos.rb +206 -0
- data/lib/ghtorrent/commands/ght_retrieve_user.rb +9 -2
- data/lib/ghtorrent/ghtorrent.rb +103 -82
- data/lib/ghtorrent/logging.rb +2 -1
- data/lib/ghtorrent/migrations/015_fix_table_issue_labels.rb +17 -5
- data/lib/ghtorrent/migrations/016_add_actor_pull_request_history.rb +1 -1
- data/lib/ghtorrent/retriever.rb +8 -17
- data/lib/ghtorrent/settings.rb +9 -5
- data/lib/ghtorrent/transacted_ghtorrent.rb +91 -0
- data/lib/version.rb +1 -1
- data/spec/api_client_spec.rb +42 -0
- data/spec/spec_helper.rb +21 -0
- metadata +46 -52
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'amqp'
|
3
|
+
require 'json'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
require 'ghtorrent/ghtorrent'
|
7
|
+
require 'ghtorrent/settings'
|
8
|
+
require 'ghtorrent/logging'
|
9
|
+
require 'ghtorrent/command'
|
10
|
+
|
11
|
+
class GHTRetrieveDependents < GHTorrent::Command
|
12
|
+
|
13
|
+
include GHTorrent::Settings
|
14
|
+
include GHTorrent::Logging
|
15
|
+
include GHTorrent::Persister
|
16
|
+
|
17
|
+
def prepare_options(options)
|
18
|
+
options.banner <<-BANNER
|
19
|
+
Recursively retrieve all dependent entities for a specific entity
|
20
|
+
#{command_name} [options] entity entity-id
|
21
|
+
|
22
|
+
#{command_name} entity is one of (in parenthesis the entity-id fields):
|
23
|
+
commit (owner repo sha)
|
24
|
+
issue (owner repo issue_id)
|
25
|
+
pull_request (owner repo pullreq_id)
|
26
|
+
#{command_name}
|
27
|
+
BANNER
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
REQ_ARGS = {
|
32
|
+
:commit => 3,
|
33
|
+
:issue => 3,
|
34
|
+
:pull_request => 3
|
35
|
+
}
|
36
|
+
|
37
|
+
def logger
|
38
|
+
ghtorrent.logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def persister
|
42
|
+
@persister ||= connect(:mongo, settings)
|
43
|
+
@persister
|
44
|
+
end
|
45
|
+
|
46
|
+
def ghtorrent
|
47
|
+
@gh ||= GHTorrent::Mirror.new(@settings)
|
48
|
+
@gh
|
49
|
+
end
|
50
|
+
|
51
|
+
def db
|
52
|
+
@db ||= ghtorrent.get_db
|
53
|
+
@db
|
54
|
+
end
|
55
|
+
|
56
|
+
def go
|
57
|
+
db
|
58
|
+
type = case ARGV[0]
|
59
|
+
when 'commit'
|
60
|
+
:commit
|
61
|
+
when 'issue'
|
62
|
+
:issue
|
63
|
+
when 'pull_request'
|
64
|
+
:pull_request
|
65
|
+
else
|
66
|
+
Trollop::die("Don't know how to handle #{ARGV[0]}")
|
67
|
+
end
|
68
|
+
unless ARGV.size - 1 == REQ_ARGS[type]
|
69
|
+
Trollop::die("#{ARGV[0]} requires #{REQ_ARGS[type]} arguments")
|
70
|
+
end
|
71
|
+
|
72
|
+
case type
|
73
|
+
when :commit
|
74
|
+
ghtorrent.ensure_commit(ARGV[2], ARGV[3], ARGV[1], true)
|
75
|
+
when :issue
|
76
|
+
ghtorrent.ensure_issue(ARGV[1], ARGV[2], ARGV[3], true, true, true)
|
77
|
+
when :pull_request
|
78
|
+
ghtorrent.ensure_pull_request(ARGV[1], ARGV[2], ARGV[3], true, true, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
@@ -64,12 +64,7 @@ An efficient way to get all data for a single repo
|
|
64
64
|
repo = repo_entry[:name]
|
65
65
|
|
66
66
|
def send_message(function, user, repo)
|
67
|
-
|
68
|
-
ght.send(function, user, repo, refresh = true)
|
69
|
-
rescue Exception => e
|
70
|
-
puts STDERR, e.message
|
71
|
-
puts STDERR, e.backtrace
|
72
|
-
end
|
67
|
+
ght.send(function, user, repo, refresh = true)
|
73
68
|
end
|
74
69
|
|
75
70
|
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
@@ -86,67 +81,3 @@ An efficient way to get all data for a single repo
|
|
86
81
|
end
|
87
82
|
end
|
88
83
|
|
89
|
-
# A version of the GHTorrent class that creates a transaction per processed
|
90
|
-
# item
|
91
|
-
class TransactedGHTorrent < GHTorrent::Mirror
|
92
|
-
|
93
|
-
def ensure_commit(repo, sha, user, comments = true)
|
94
|
-
check_transaction do
|
95
|
-
super(repo, sha, user, comments)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
def ensure_fork(owner, repo, fork_id)
|
100
|
-
check_transaction do
|
101
|
-
super(owner, repo, fork_id)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def ensure_pull_request(owner, repo, pullreq_id,
|
106
|
-
comments = true, commits = true,
|
107
|
-
state = nil, created_at = nil)
|
108
|
-
check_transaction do
|
109
|
-
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
def ensure_issue(owner, repo, issue_id, events = true, comments = true, labels = true)
|
114
|
-
check_transaction do
|
115
|
-
super(owner, repo, issue_id, events, comments, labels)
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
def ensure_project_member(owner, repo, new_member, date_added)
|
120
|
-
check_transaction do
|
121
|
-
super(owner, repo, new_member, date_added)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
126
|
-
check_transaction do
|
127
|
-
super(owner, repo, watcher, date_added)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
def ensure_repo_label(owner, repo, name)
|
132
|
-
check_transaction do
|
133
|
-
super(owner, repo, name)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
def check_transaction(&block)
|
138
|
-
begin
|
139
|
-
if @db.in_transaction?
|
140
|
-
yield block
|
141
|
-
else
|
142
|
-
transaction do
|
143
|
-
yield block
|
144
|
-
end
|
145
|
-
end
|
146
|
-
rescue Exception => e
|
147
|
-
puts STDERR, e.message
|
148
|
-
puts STDERR, e.backtrace
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
require 'ghtorrent/ghtorrent'
|
2
|
+
require 'ghtorrent/settings'
|
3
|
+
require 'ghtorrent/logging'
|
4
|
+
require 'ghtorrent/command'
|
5
|
+
require 'ghtorrent/retriever'
|
6
|
+
|
7
|
+
class GHTRetrieveRepos < GHTorrent::Command
|
8
|
+
|
9
|
+
include GHTorrent::Settings
|
10
|
+
include GHTorrent::Logging
|
11
|
+
|
12
|
+
def logger
|
13
|
+
@logger ||= Logger.new(STDOUT)
|
14
|
+
@logger
|
15
|
+
end
|
16
|
+
|
17
|
+
def prepare_options(options)
|
18
|
+
options.banner <<-BANNER
|
19
|
+
Retrieve data for multiple repos in parallel. To work, it requires
|
20
|
+
a mapping file formatted as follows:
|
21
|
+
|
22
|
+
IP UNAME PASSWD NUM_PROCS where
|
23
|
+
|
24
|
+
IP = address to use for outgoing requests (use 0.0.0.0 on non-multihomed hosts)
|
25
|
+
UNAME = Github user name to use for outgoing requests
|
26
|
+
PASSWD = Github password to use for outgoing requests
|
27
|
+
NUM_PROCS = Number of processes to spawn for this IP/UNAME combination
|
28
|
+
|
29
|
+
Values in the config.yaml file set with the -c command are overriden.
|
30
|
+
|
31
|
+
#{command_name} [options] mapping-file
|
32
|
+
|
33
|
+
BANNER
|
34
|
+
options.opt :queue, 'Queue to retrieve project names from',
|
35
|
+
:short => 'q', :default => 'retrieve-repo', :type => :string
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate
|
40
|
+
super
|
41
|
+
Trollop::die 'Argument mapping-file is required' unless not args[0].nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def go
|
45
|
+
|
46
|
+
configs = File.open(ARGV[0]).readlines.map do |line|
|
47
|
+
next if line =~ /^#/
|
48
|
+
ip,name,passwd,instances = line.strip.split(/ /)
|
49
|
+
(1..instances.to_i).map do |i|
|
50
|
+
newcfg = self.settings.clone
|
51
|
+
newcfg = override_config(newcfg, :attach_ip, ip)
|
52
|
+
newcfg = override_config(newcfg, :github_username, name)
|
53
|
+
newcfg = override_config(newcfg, :github_passwd, passwd)
|
54
|
+
newcfg = override_config(newcfg, :mirror_history_pages_back, 1000)
|
55
|
+
newcfg = override_config(newcfg, :mirror_commit_pages_new_repo, 1000)
|
56
|
+
newcfg
|
57
|
+
end
|
58
|
+
end.flatten.select{|x| !x.nil?}
|
59
|
+
|
60
|
+
children = configs.map do |config|
|
61
|
+
pid = Process::fork
|
62
|
+
|
63
|
+
if pid.nil?
|
64
|
+
retriever = GHTRepoRetriever.new(config, options[:queue])
|
65
|
+
|
66
|
+
Signal.trap('TERM') {
|
67
|
+
retriever.stop
|
68
|
+
}
|
69
|
+
|
70
|
+
retriever.run
|
71
|
+
exit
|
72
|
+
else
|
73
|
+
debug "Parent #{Process.pid} forked child #{pid}"
|
74
|
+
pid
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
debug 'Waiting for children'
|
79
|
+
begin
|
80
|
+
children.each do |pid|
|
81
|
+
debug "Waiting for child #{pid}"
|
82
|
+
Process.waitpid(pid, 0)
|
83
|
+
debug "Child #{pid} exited"
|
84
|
+
end
|
85
|
+
rescue Interrupt
|
86
|
+
debug 'Stopping'
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class GHTRepoRetriever
|
92
|
+
|
93
|
+
include GHTorrent::Settings
|
94
|
+
include GHTorrent::Retriever
|
95
|
+
include GHTorrent::Persister
|
96
|
+
|
97
|
+
def initialize(config, queue)
|
98
|
+
@config = config
|
99
|
+
@queue = queue
|
100
|
+
end
|
101
|
+
|
102
|
+
def logger
|
103
|
+
ght.logger
|
104
|
+
end
|
105
|
+
|
106
|
+
def persister
|
107
|
+
@persister ||= connect(:mongo, settings)
|
108
|
+
@persister
|
109
|
+
end
|
110
|
+
|
111
|
+
def ext_uniq
|
112
|
+
@ext_uniq ||= config(:uniq_id)
|
113
|
+
@ext_uniq
|
114
|
+
end
|
115
|
+
|
116
|
+
def ght
|
117
|
+
@ght ||= TransactedGhtorrent.new(@config)
|
118
|
+
@ght
|
119
|
+
end
|
120
|
+
|
121
|
+
def settings
|
122
|
+
@config
|
123
|
+
end
|
124
|
+
|
125
|
+
def run
|
126
|
+
AMQP.start(:host => config(:amqp_host),
|
127
|
+
:port => config(:amqp_port),
|
128
|
+
:username => config(:amqp_username),
|
129
|
+
:password => config(:amqp_password)) do |connection|
|
130
|
+
|
131
|
+
connection.on_tcp_connection_loss do |conn, settings|
|
132
|
+
warn 'AMQP: Network failure. Trying to reconnect...'
|
133
|
+
conn.reconnect(false, 2)
|
134
|
+
end
|
135
|
+
|
136
|
+
channel = AMQP::Channel.new(connection)
|
137
|
+
channel.auto_recovery = true
|
138
|
+
channel.prefetch(1)
|
139
|
+
|
140
|
+
channel.on_error do |ch, channel_close|
|
141
|
+
warn 'AMQP: Channel closed. Should reconnect by itself'
|
142
|
+
raise channel_close.reply_text
|
143
|
+
end
|
144
|
+
|
145
|
+
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
146
|
+
:auto_delete => false)
|
147
|
+
|
148
|
+
queue = channel.queue(@queue, {:durable => true}).bind(exchange)
|
149
|
+
|
150
|
+
queue.subscribe(:ack => true) do |headers, msg|
|
151
|
+
owner,repo = msg.split(/ /)
|
152
|
+
user_entry = ght.transaction { ght.ensure_user(owner, false, false) }
|
153
|
+
|
154
|
+
if user_entry.nil?
|
155
|
+
warn("Cannot find user #{owner}")
|
156
|
+
headers.ack
|
157
|
+
next
|
158
|
+
end
|
159
|
+
|
160
|
+
repo_entry = ght.transaction { ght.ensure_repo(owner, repo) }
|
161
|
+
|
162
|
+
if repo_entry.nil?
|
163
|
+
warn("Cannot find repository #{owner}/#{repo}")
|
164
|
+
headers.ack
|
165
|
+
next
|
166
|
+
end
|
167
|
+
|
168
|
+
debug("Retrieving repo #{owner}/#{repo}")
|
169
|
+
def send_message(function, user, repo)
|
170
|
+
ght.send(function, user, repo, refresh = false)
|
171
|
+
end
|
172
|
+
|
173
|
+
functions = %w(ensure_commits ensure_forks ensure_pull_requests
|
174
|
+
ensure_issues ensure_project_members ensure_watchers ensure_labels)
|
175
|
+
|
176
|
+
functions.each do |x|
|
177
|
+
|
178
|
+
begin
|
179
|
+
send_message(x, owner, repo)
|
180
|
+
rescue Interrupt
|
181
|
+
stop
|
182
|
+
rescue Exception
|
183
|
+
warn("Error processing #{x} for #{owner}/#{repo}")
|
184
|
+
next
|
185
|
+
end
|
186
|
+
|
187
|
+
if @stop
|
188
|
+
break
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
headers.ack
|
193
|
+
debug("Finished processing #{owner}/#{repo}")
|
194
|
+
if @stop
|
195
|
+
connection.disconnect{AMQP.stop { EM.stop }}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def stop
|
202
|
+
warn('Stop flag set, waiting for operations to finish')
|
203
|
+
@stop = true
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
@@ -42,7 +42,7 @@ An efficient way to get all data for a single user
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
functions = %w(ensure_user_followers ensure_orgs)
|
45
|
+
functions = %w(ensure_user_followers ensure_orgs ensure_org)
|
46
46
|
|
47
47
|
if ARGV[2].nil?
|
48
48
|
functions.each do |x|
|
@@ -69,4 +69,11 @@ class TransactedGHTorrent < GHTorrent::Mirror
|
|
69
69
|
super(user)
|
70
70
|
end
|
71
71
|
end
|
72
|
-
|
72
|
+
|
73
|
+
def ensure_org(user, members = true)
|
74
|
+
check_transaction do
|
75
|
+
super(user, members)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -80,9 +80,9 @@ module GHTorrent
|
|
80
80
|
# [user] The login of the repository owner
|
81
81
|
# [repo] The name of the repository
|
82
82
|
# [comment_id] The login of the member to add
|
83
|
-
def get_commit_comment(user, repo, comment_id)
|
83
|
+
def get_commit_comment(user, repo, sha, comment_id)
|
84
84
|
transaction do
|
85
|
-
ensure_commit_comment(user, repo, comment_id)
|
85
|
+
ensure_commit_comment(user, repo, sha, comment_id)
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
@@ -226,8 +226,8 @@ module GHTorrent
|
|
226
226
|
end
|
227
227
|
|
228
228
|
commits.map do |c|
|
229
|
-
ensure_commit(repo, c['sha'], user)
|
230
|
-
end
|
229
|
+
save{ensure_commit(repo, c['sha'], user)}
|
230
|
+
end.select{|x| !x.nil?}
|
231
231
|
end
|
232
232
|
|
233
233
|
##
|
@@ -237,35 +237,37 @@ module GHTorrent
|
|
237
237
|
commits = @db[:commits]
|
238
238
|
parents = @db[:commit_parents]
|
239
239
|
commit['parents'].map do |p|
|
240
|
-
|
241
|
-
|
242
|
-
|
240
|
+
save do
|
241
|
+
url = p['url'].split(/\//)
|
242
|
+
this = commits.first(:sha => commit['sha'])
|
243
|
+
parent = commits.first(:sha => url[7])
|
244
|
+
|
245
|
+
if parent.nil?
|
246
|
+
c = retrieve_commit(url[5], url[7], url[4])
|
247
|
+
if c.nil?
|
248
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
249
|
+
next
|
250
|
+
end
|
251
|
+
parent = store_commit(c, url[5], url[4])
|
252
|
+
end
|
243
253
|
|
244
|
-
|
245
|
-
c = retrieve_commit(url[5], url[7], url[4])
|
246
|
-
if c.nil?
|
254
|
+
if parent.nil?
|
247
255
|
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
248
256
|
next
|
249
257
|
end
|
250
|
-
parent = store_commit(c, url[5], url[4])
|
251
|
-
end
|
252
258
|
|
253
|
-
|
254
|
-
|
255
|
-
next
|
256
|
-
end
|
259
|
+
if parents.first(:commit_id => this[:id],
|
260
|
+
:parent_id => parent[:id]).nil?
|
257
261
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
262
|
+
parents.insert(:commit_id => this[:id],
|
263
|
+
:parent_id => parent[:id])
|
264
|
+
info "GHTorrent: Added parent #{parent[:sha]} to commit #{this[:sha]}"
|
265
|
+
else
|
266
|
+
debug "GHTorrent: Parent #{parent[:sha]} for commit #{this[:sha]} exists"
|
267
|
+
end
|
268
|
+
parents.first(:commit_id => this[:id], :parent_id => parent[:id])
|
266
269
|
end
|
267
|
-
|
268
|
-
end
|
270
|
+
end.select{|x| !x.nil?}
|
269
271
|
end
|
270
272
|
|
271
273
|
##
|
@@ -479,7 +481,7 @@ module GHTorrent
|
|
479
481
|
else
|
480
482
|
acc
|
481
483
|
end
|
482
|
-
end.map { |x| ensure_user_follower(followed, x['login']) }
|
484
|
+
end.map { |x| save{ensure_user_follower(followed, x['login']) }}.select{|x| !x.nil?}
|
483
485
|
end
|
484
486
|
|
485
487
|
##
|
@@ -666,7 +668,7 @@ module GHTorrent
|
|
666
668
|
else
|
667
669
|
acc
|
668
670
|
end
|
669
|
-
end.map { |x| ensure_project_member(user, repo, x['login'], time) }
|
671
|
+
end.map { |x| save{ensure_project_member(user, repo, x['login'], time) }}.select{|x| !x.nil?}
|
670
672
|
end
|
671
673
|
|
672
674
|
##
|
@@ -722,7 +724,7 @@ module GHTorrent
|
|
722
724
|
# [user] The login name of the user to check the organizations for
|
723
725
|
#
|
724
726
|
def ensure_orgs(user)
|
725
|
-
retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
|
727
|
+
retrieve_orgs(user).map{|o| save{ensure_participation(user, o['login'])}}.select{|x| !x.nil?}
|
726
728
|
end
|
727
729
|
|
728
730
|
##
|
@@ -735,6 +737,12 @@ module GHTorrent
|
|
735
737
|
#
|
736
738
|
def ensure_participation(user, organization, members = true)
|
737
739
|
org = ensure_org(organization, members)
|
740
|
+
|
741
|
+
if org.nil?
|
742
|
+
warn "Organization #{organization} does not exit"
|
743
|
+
return
|
744
|
+
end
|
745
|
+
|
738
746
|
usr = ensure_user(user, false, false)
|
739
747
|
|
740
748
|
org_members = @db[:organization_members]
|
@@ -758,22 +766,26 @@ module GHTorrent
|
|
758
766
|
# ==Parameters:
|
759
767
|
# [organization] The login name of the organization
|
760
768
|
#
|
761
|
-
def ensure_org(organization, members)
|
769
|
+
def ensure_org(organization, members = true)
|
762
770
|
org = @db[:users].first(:login => organization, :type => 'org')
|
763
771
|
|
764
772
|
if org.nil?
|
765
773
|
org = ensure_user(organization, false, false)
|
774
|
+
|
775
|
+
# Not an organization, don't go ahead
|
776
|
+
if org[:type] != 'ORG'
|
777
|
+
warn "GHTorrent: Account #{organization} is not an organization"
|
778
|
+
return nil
|
779
|
+
end
|
780
|
+
|
766
781
|
if members
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
782
|
+
retrieve_org_members(organization).map do |x|
|
783
|
+
ensure_participation(ensure_user(x['login'], false, false)[:login],
|
784
|
+
organization, false)
|
785
|
+
end
|
771
786
|
end
|
772
|
-
org
|
773
|
-
else
|
774
|
-
debug "GHTorrent: Organization #{organization} exists"
|
775
|
-
org
|
776
787
|
end
|
788
|
+
org
|
777
789
|
end
|
778
790
|
|
779
791
|
##
|
@@ -796,30 +808,23 @@ module GHTorrent
|
|
796
808
|
end
|
797
809
|
end
|
798
810
|
|
799
|
-
not_saved.map{|x| ensure_commit_comment(user, repo, x['id'])}
|
811
|
+
not_saved.map{|x| save{ensure_commit_comment(user, repo, sha, x['id'])}}.select{|x| !x.nil?}
|
800
812
|
end
|
801
813
|
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
# ==Parameters:
|
806
|
-
# [user] The login name of the organization
|
807
|
-
# [repo] The repository containing the commit whose comment will be retrieved
|
808
|
-
# [id] The comment id to retrieve
|
809
|
-
# [created_at] The timestamp that the comment was made.
|
810
|
-
def ensure_commit_comment(user, repo, id)
|
811
|
-
stored_comment = @db[:commit_comments].first(:comment_id => id)
|
814
|
+
|
815
|
+
def ensure_commit_comment(owner, repo, sha, comment_id)
|
816
|
+
stored_comment = @db[:commit_comments].first(:comment_id => comment_id)
|
812
817
|
|
813
818
|
if stored_comment.nil?
|
814
|
-
retrieved = retrieve_commit_comment(
|
819
|
+
retrieved = retrieve_commit_comment(owner, repo, sha, comment_id)
|
815
820
|
|
816
821
|
if retrieved.nil?
|
817
|
-
warn "GHTorrent: Commit comment #{id} deleted"
|
822
|
+
warn "GHTorrent: Commit comment #{sha}->#{id} deleted"
|
818
823
|
return
|
819
824
|
end
|
820
825
|
|
821
|
-
commit = ensure_commit(repo,
|
822
|
-
user = ensure_user(user, false, false)
|
826
|
+
commit = ensure_commit(repo, sha, owner, false)
|
827
|
+
user = ensure_user(retrieved['user']['login'], false, false)
|
823
828
|
@db[:commit_comments].insert(
|
824
829
|
:commit_id => commit[:id],
|
825
830
|
:user_id => user[:id],
|
@@ -830,11 +835,11 @@ module GHTorrent
|
|
830
835
|
:ext_ref_id => retrieved[@ext_uniq],
|
831
836
|
:created_at => date(retrieved['created_at'])
|
832
837
|
)
|
833
|
-
info "GHTorrent: Added commit comment #{
|
838
|
+
info "GHTorrent: Added commit comment #{sha} -> #{retrieved['id']} by #{user[:login]}"
|
834
839
|
else
|
835
|
-
info "GHTorrent: Commit comment #{id} exists"
|
840
|
+
info "GHTorrent: Commit comment #{sha} -> #{id} exists"
|
836
841
|
end
|
837
|
-
@db[:commit_comments].first(:comment_id =>
|
842
|
+
@db[:commit_comments].first(:comment_id => comment_id)
|
838
843
|
end
|
839
844
|
|
840
845
|
##
|
@@ -859,7 +864,7 @@ module GHTorrent
|
|
859
864
|
else
|
860
865
|
acc
|
861
866
|
end
|
862
|
-
end.map { |x| ensure_watcher(owner, repo, x['login'], nil) }
|
867
|
+
end.map { |x| save{ensure_watcher(owner, repo, x['login'], nil) }}.select{|x| !x.nil?}
|
863
868
|
end
|
864
869
|
|
865
870
|
##
|
@@ -934,7 +939,7 @@ module GHTorrent
|
|
934
939
|
end
|
935
940
|
end
|
936
941
|
|
937
|
-
raw_pull_reqs.map { |x| ensure_pull_request(owner, repo, x['number']) }
|
942
|
+
raw_pull_reqs.map { |x| save { ensure_pull_request(owner, repo, x['number']) } }.select { |x| !x.nil? }
|
938
943
|
end
|
939
944
|
|
940
945
|
##
|
@@ -966,7 +971,7 @@ module GHTorrent
|
|
966
971
|
info "GHTorrent: New pull request (#{id}) event (#{act}) by (#{actor}) timestamp #{ts}"
|
967
972
|
else
|
968
973
|
entry.update(:actor_id => user[:id])
|
969
|
-
info "GHTorrent: Pull request (#{id}) history entry (#{act}) timestamp #{ts} exists"
|
974
|
+
info "GHTorrent: Pull request (#{id}) history entry (#{act}) by (#{actor}) timestamp #{ts} exists"
|
970
975
|
end
|
971
976
|
end
|
972
977
|
|
@@ -1128,8 +1133,8 @@ module GHTorrent
|
|
1128
1133
|
acc
|
1129
1134
|
end
|
1130
1135
|
end.map { |x|
|
1131
|
-
ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])
|
1132
|
-
}
|
1136
|
+
save{ensure_pullreq_comment(owner, repo, pullreq_id, x['id'])}
|
1137
|
+
}.select{|x| !x.nil?}
|
1133
1138
|
end
|
1134
1139
|
|
1135
1140
|
def ensure_pullreq_comment(owner, repo, pullreq_id, comment_id)
|
@@ -1191,26 +1196,28 @@ module GHTorrent
|
|
1191
1196
|
return
|
1192
1197
|
end
|
1193
1198
|
|
1194
|
-
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1199
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]) { |acc, c|
|
1195
1200
|
next if c.nil?
|
1196
1201
|
head_repo_owner = c['url'].split(/\//)[4]
|
1197
1202
|
head_repo_name = c['url'].split(/\//)[5]
|
1198
1203
|
x = ensure_commit(head_repo_name, c['sha'], head_repo_owner, true)
|
1199
1204
|
acc << x if not x.nil?
|
1200
1205
|
acc
|
1201
|
-
}.map
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1206
|
+
}.map do |c|
|
1207
|
+
save do
|
1208
|
+
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
1209
|
+
:commit_id => c[:id])
|
1210
|
+
if exists.nil?
|
1211
|
+
@db[:pull_request_commits].insert(:pull_request_id => pullreq[:id],
|
1212
|
+
:commit_id => c[:id])
|
1213
|
+
|
1214
|
+
info "GHTorrent: Added commit #{c[:sha]} to pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1215
|
+
else
|
1216
|
+
debug "GHTorrent: Commit #{c[:sha]} exists in pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1217
|
+
exists
|
1218
|
+
end
|
1212
1219
|
end
|
1213
|
-
}
|
1220
|
+
end.select{|x| !x.nil?}
|
1214
1221
|
end
|
1215
1222
|
|
1216
1223
|
##
|
@@ -1241,7 +1248,7 @@ module GHTorrent
|
|
1241
1248
|
else
|
1242
1249
|
acc
|
1243
1250
|
end
|
1244
|
-
end.map { |x| ensure_fork(owner, repo, x['id']) }
|
1251
|
+
end.map { |x| save{ensure_fork(owner, repo, x['id']) }}.select{|x| !x.nil?}
|
1245
1252
|
end
|
1246
1253
|
|
1247
1254
|
##
|
@@ -1288,7 +1295,7 @@ module GHTorrent
|
|
1288
1295
|
end
|
1289
1296
|
end
|
1290
1297
|
|
1291
|
-
raw_issues.map { |x| ensure_issue(owner, repo, x['number']) }
|
1298
|
+
raw_issues.map { |x| save { ensure_issue(owner, repo, x['number']) } }.select { |x| !x.nil? }
|
1292
1299
|
end
|
1293
1300
|
|
1294
1301
|
##
|
@@ -1379,8 +1386,8 @@ module GHTorrent
|
|
1379
1386
|
acc
|
1380
1387
|
end
|
1381
1388
|
end.map { |x|
|
1382
|
-
ensure_issue_event(owner, repo, issue_id, x['id'])
|
1383
|
-
}
|
1389
|
+
save{ensure_issue_event(owner, repo, issue_id, x['id'])}
|
1390
|
+
}.select{|x| !x.nil?}
|
1384
1391
|
end
|
1385
1392
|
|
1386
1393
|
##
|
@@ -1491,8 +1498,8 @@ module GHTorrent
|
|
1491
1498
|
acc
|
1492
1499
|
end
|
1493
1500
|
end.map { |x|
|
1494
|
-
ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)
|
1495
|
-
}
|
1501
|
+
save{ensure_issue_comment(owner, repo, issue_id, x['id'], pull_req_id)}
|
1502
|
+
}.select{|x| !x.nil?}
|
1496
1503
|
end
|
1497
1504
|
|
1498
1505
|
##
|
@@ -1555,7 +1562,7 @@ module GHTorrent
|
|
1555
1562
|
else
|
1556
1563
|
acc
|
1557
1564
|
end
|
1558
|
-
end.map { |x| ensure_repo_label(owner, repo, x['name']) }
|
1565
|
+
end.map { |x| save { ensure_repo_label(owner, repo, x['name']) } }.select { |x| !x.nil? }
|
1559
1566
|
end
|
1560
1567
|
|
1561
1568
|
##
|
@@ -1613,7 +1620,7 @@ module GHTorrent
|
|
1613
1620
|
else
|
1614
1621
|
acc
|
1615
1622
|
end
|
1616
|
-
end.map { |x| ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }
|
1623
|
+
end.map { |x| save{ensure_issue_label(owner, repo, issue[:issue_id], x['name']) }}.select{|x| !x.nil?}
|
1617
1624
|
|
1618
1625
|
end
|
1619
1626
|
|
@@ -1677,6 +1684,20 @@ module GHTorrent
|
|
1677
1684
|
end
|
1678
1685
|
end
|
1679
1686
|
|
1687
|
+
def save(&block)
|
1688
|
+
if config(:rescue_loops) == 'true'
|
1689
|
+
begin
|
1690
|
+
yield block
|
1691
|
+
rescue Exception => e
|
1692
|
+
@logger.error e.message
|
1693
|
+
@logger.error e.backtrace.join("\n")
|
1694
|
+
nil
|
1695
|
+
end
|
1696
|
+
else
|
1697
|
+
yield block
|
1698
|
+
end
|
1699
|
+
end
|
1700
|
+
|
1680
1701
|
private
|
1681
1702
|
|
1682
1703
|
# Store a commit contained in a hash. First check whether the commit exists.
|