ghtorrent 0.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'ghtorrent/ghtorrent'
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/retriever'
|
8
|
+
|
9
|
+
class GHTRetrieveRepo < GHTorrent::Command
|
10
|
+
|
11
|
+
include GHTorrent::Settings
|
12
|
+
include GHTorrent::Retriever
|
13
|
+
include GHTorrent::Persister
|
14
|
+
|
15
|
+
def prepare_options(options)
|
16
|
+
options.banner <<-BANNER
|
17
|
+
An efficient way to get all data for a single repo
|
18
|
+
|
19
|
+
#{command_name} [options] owner repo
|
20
|
+
|
21
|
+
BANNER
|
22
|
+
end
|
23
|
+
|
24
|
+
def validate
|
25
|
+
super
|
26
|
+
Trollop::die "Two arguments are required" unless args[0] && !args[0].empty?
|
27
|
+
end
|
28
|
+
|
29
|
+
def logger
|
30
|
+
ght.logger
|
31
|
+
end
|
32
|
+
|
33
|
+
def persister
|
34
|
+
@persister ||= connect(:mongo, settings)
|
35
|
+
@persister
|
36
|
+
end
|
37
|
+
|
38
|
+
def ext_uniq
|
39
|
+
@ext_uniq ||= config(:uniq_id)
|
40
|
+
@ext_uniq
|
41
|
+
end
|
42
|
+
|
43
|
+
def ght
|
44
|
+
@ght ||= TransactedGHTorrent.new(settings)
|
45
|
+
@ght
|
46
|
+
end
|
47
|
+
|
48
|
+
def go
|
49
|
+
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
50
|
+
|
51
|
+
if user_entry.nil?
|
52
|
+
Trollop::die "Cannot find user #{owner}"
|
53
|
+
end
|
54
|
+
|
55
|
+
user = user_entry[:login]
|
56
|
+
|
57
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false, false)}
|
58
|
+
|
59
|
+
if repo_entry.nil?
|
60
|
+
Trollop::die "Cannot find repository #{owner}/#{ARGV[1]}"
|
61
|
+
end
|
62
|
+
|
63
|
+
repo = repo_entry[:name]
|
64
|
+
|
65
|
+
%w(ensure_commits ensure_forks ensure_pull_requests
|
66
|
+
ensure_issues ensure_project_members ensure_watchers).each {|x|
|
67
|
+
begin
|
68
|
+
ght.send(x, user, repo)
|
69
|
+
rescue Exception => e
|
70
|
+
puts STDERR, e.message
|
71
|
+
puts STDERR, e.backtrace
|
72
|
+
end
|
73
|
+
}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# A version of the GHTorrent class that creates a transaction per processed
|
78
|
+
# item
|
79
|
+
class TransactedGHTorrent < GHTorrent::Mirror
|
80
|
+
|
81
|
+
def ensure_commit(repo, sha, user, comments = true)
|
82
|
+
transaction do
|
83
|
+
super(repo, sha, user, comments)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def ensure_fork(owner, repo, fork_id, date_added = nil)
|
88
|
+
transaction do
|
89
|
+
super(owner, repo, fork_id, date_added)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def ensure_pull_request(owner, repo, pullreq_id,
|
94
|
+
comments = true, commits = true,
|
95
|
+
state = nil, created_at = nil)
|
96
|
+
transaction do
|
97
|
+
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
102
|
+
transaction do
|
103
|
+
super(owner, repo, issue_id, events, comments)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def ensure_project_member(owner, repo, new_member, date_added)
|
108
|
+
transaction do
|
109
|
+
super(owner, repo, new_member, date_added)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
114
|
+
transaction do
|
115
|
+
super(owner, repo, watcher, date_added)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mongo'
|
3
|
+
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/persister'
|
8
|
+
|
9
|
+
class GHRMDupl < GHTorrent::Command
|
10
|
+
|
11
|
+
include GHTorrent::Settings
|
12
|
+
include GHTorrent::Persister
|
13
|
+
|
14
|
+
def col_info()
|
15
|
+
{
|
16
|
+
:commits => {
|
17
|
+
:unq => "sha",
|
18
|
+
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
19
|
+
},
|
20
|
+
:events => {
|
21
|
+
:unq => "id",
|
22
|
+
:col => persister.get_underlying_connection.collection(:events.to_s),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def persister
|
28
|
+
@persister ||= connect(:mongo, @settings)
|
29
|
+
@persister
|
30
|
+
end
|
31
|
+
|
32
|
+
def prepare_options(options)
|
33
|
+
options.banner <<-BANNER
|
34
|
+
Removes duplicate entries from collections (currently, commits and events)
|
35
|
+
|
36
|
+
#{command_name} [options] collection
|
37
|
+
|
38
|
+
#{command_name} options:
|
39
|
+
BANNER
|
40
|
+
|
41
|
+
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
42
|
+
:short => 'e', :default => 0, :type => :int
|
43
|
+
options.opt :snapshot, 'Perform clean up every x records',
|
44
|
+
:short => 's', :default => -1, :type => :int
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate
|
48
|
+
super
|
49
|
+
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
# Print MongoDB remove statements that
|
53
|
+
# remove all but one entries for each commit.
|
54
|
+
def remove_duplicates(data, col)
|
55
|
+
removed = 0
|
56
|
+
data.select { |k, v| v.size > 1 }.each do |k, v|
|
57
|
+
v.slice(0..(v.size - 2)).map do |x|
|
58
|
+
removed += 1 if delete_by_id col, x
|
59
|
+
end
|
60
|
+
end
|
61
|
+
removed
|
62
|
+
end
|
63
|
+
|
64
|
+
def delete_by_id(col, id)
|
65
|
+
begin
|
66
|
+
col.remove({'_id' => id})
|
67
|
+
true
|
68
|
+
rescue Mongo::OperationFailure
|
69
|
+
puts "Cannot remove record with id #{id} from #{col.name}"
|
70
|
+
false
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def go
|
75
|
+
collection = case ARGV[0]
|
76
|
+
when "commits" then
|
77
|
+
:commits
|
78
|
+
when "events" then
|
79
|
+
:events
|
80
|
+
else
|
81
|
+
puts "Not a known collection name: #{ARGV[0]}\n"
|
82
|
+
end
|
83
|
+
|
84
|
+
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
85
|
+
|
86
|
+
snapshot = options[:snapshot]
|
87
|
+
|
88
|
+
puts "Deleting duplicates from collection #{collection}"
|
89
|
+
puts "Deleting duplicates after #{Time.at(options[:earliest])}"
|
90
|
+
puts "Perform clean up every #{snapshot} records"
|
91
|
+
|
92
|
+
# Various counters to report stats
|
93
|
+
processed = total_processed = removed = 0
|
94
|
+
|
95
|
+
data = Hash.new
|
96
|
+
|
97
|
+
# The following code needs to save intermediate results to cope
|
98
|
+
# with large datasets
|
99
|
+
col_info[collection][:col].find(from, :fields => col_info[collection][:unq]).each do |r|
|
100
|
+
_id = r["_id"]
|
101
|
+
commit = read_value(r, col_info[collection][:unq])
|
102
|
+
|
103
|
+
# If entries cannot be parsed, remove them
|
104
|
+
if commit.empty?
|
105
|
+
puts "Deleting unknown entry #{_id}"
|
106
|
+
removed += 1 if delete_by_id col_info[collection][:col], _id
|
107
|
+
else
|
108
|
+
data[commit] = [] if data[commit].nil?
|
109
|
+
data[commit] << _id
|
110
|
+
end
|
111
|
+
|
112
|
+
processed += 1
|
113
|
+
total_processed += 1
|
114
|
+
|
115
|
+
print "\rProcessed #{processed} records"
|
116
|
+
|
117
|
+
# Calculate duplicates, save intermediate result
|
118
|
+
if snapshot > 0 and processed > snapshot
|
119
|
+
puts "\nLoaded #{data.size} values, cleaning"
|
120
|
+
removed += remove_duplicates data, col_info[collection][:col]
|
121
|
+
data = Hash.new
|
122
|
+
processed = 0
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
removed += remove_duplicates data, col_info[collection][:col]
|
127
|
+
|
128
|
+
puts "\nProcessed #{total_processed}, deleted #{removed} duplicates"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -22,7 +22,7 @@ module GHTorrent
|
|
22
22
|
@logger = Logger.new(STDOUT)
|
23
23
|
end
|
24
24
|
|
25
|
-
#
|
25
|
+
# Get a connection to the database
|
26
26
|
def get_db
|
27
27
|
Sequel.single_threaded = true
|
28
28
|
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
@@ -151,19 +151,31 @@ module GHTorrent
|
|
151
151
|
end
|
152
152
|
|
153
153
|
##
|
154
|
-
# Retrieve
|
154
|
+
# Retrieve an issue
|
155
155
|
# ==Parameters:
|
156
156
|
# [owner] The login of the repository owner
|
157
157
|
# [repo] The name of the repository
|
158
|
-
# [
|
158
|
+
# [issue_id] The fork item id
|
159
|
+
# [action] The action that took place for the issue
|
159
160
|
# [date_added] The timestamp that the add event took place
|
160
|
-
def
|
161
|
+
def get_issue(owner, repo, issue_id, created_at)
|
161
162
|
transaction do
|
162
|
-
|
163
|
-
#ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
163
|
+
ensure_issue(owner, repo, issue_id, created_at)
|
164
164
|
end
|
165
165
|
end
|
166
166
|
|
167
|
+
##
|
168
|
+
# Retrieve a issue comment
|
169
|
+
# ==Parameters:
|
170
|
+
# [owner] The login of the repository owner
|
171
|
+
# [repo] The name of the repository
|
172
|
+
# [issue_id] The fork item id
|
173
|
+
# [comment_id] The issue comment unique identifier
|
174
|
+
def get_issue_comment(owner, repo, issue_id, comment_id)
|
175
|
+
transaction do
|
176
|
+
ensure_issue_comment(owner, repo, issue_id, comment_id)
|
177
|
+
end
|
178
|
+
end
|
167
179
|
|
168
180
|
##
|
169
181
|
# Make sure a commit exists
|
@@ -188,21 +200,30 @@ module GHTorrent
|
|
188
200
|
end
|
189
201
|
|
190
202
|
##
|
191
|
-
#
|
192
|
-
#
|
203
|
+
# Retrieve commits for a repository, starting from +sha+
|
204
|
+
# and going back to 30 * +num_pages+ commit log entries.
|
193
205
|
# ==Parameters:
|
194
206
|
# [user] The user to whom the repo belongs.
|
195
207
|
# [repo] The repo to look for commits into.
|
196
|
-
|
208
|
+
# [sha] The first commit to start retrieving from. If nil, then the
|
209
|
+
# earliest stored commit will be used instead.
|
210
|
+
# [num_pages] The number of commit pages to retrieve
|
211
|
+
def ensure_commits(user, repo, sha = nil,
|
212
|
+
num_pages = config(:mirror_commit_pages_new_repo))
|
197
213
|
userid = @db[:users].filter(:login => user).first[:id]
|
198
214
|
repoid = @db[:projects].filter(:owner_id => userid,
|
199
215
|
:name => repo).first[:id]
|
200
216
|
|
201
|
-
latest =
|
217
|
+
latest = if sha.nil?
|
218
|
+
@db[:commits].filter(:project_id => repoid).order(:created_at).last
|
219
|
+
else
|
220
|
+
sha
|
221
|
+
end
|
222
|
+
|
202
223
|
commits = if latest.nil?
|
203
|
-
retrieve_commits(repo,
|
224
|
+
retrieve_commits(repo, "head", user, num_pages)
|
204
225
|
else
|
205
|
-
retrieve_commits(repo, latest[:sha], user)
|
226
|
+
retrieve_commits(repo, latest[:sha], user, num_pages)
|
206
227
|
end
|
207
228
|
|
208
229
|
commits.map do |c|
|
@@ -226,6 +247,11 @@ module GHTorrent
|
|
226
247
|
parent = commits.first(:sha => url[7])
|
227
248
|
end
|
228
249
|
|
250
|
+
if parent.nil?
|
251
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
252
|
+
return
|
253
|
+
end
|
254
|
+
|
229
255
|
if parents.first(:commit_id => this[:id],
|
230
256
|
:parent_id => parent[:id]).nil?
|
231
257
|
|
@@ -297,6 +323,16 @@ module GHTorrent
|
|
297
323
|
if dbuser.nil?
|
298
324
|
# We do not have the user in the database yet. Add him
|
299
325
|
added = ensure_user(login, false, false)
|
326
|
+
|
327
|
+
# A commit user can be found by email but not
|
328
|
+
# by the user name he used to commit. This probably means that the
|
329
|
+
# user has probably changed his user name. Treat the user's by-email
|
330
|
+
# description as valid.
|
331
|
+
if added.nil? and not byemail.nil?
|
332
|
+
warn "GHTorrent: Found user #{byemail[:login]} with same email #{email} as non existing user #{login}. Assigning user #{login} to #{byemail[:login]}"
|
333
|
+
return users.first(:login => byemail[:login])
|
334
|
+
end
|
335
|
+
|
300
336
|
if byemail.nil?
|
301
337
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
302
338
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
@@ -342,7 +378,11 @@ module GHTorrent
|
|
342
378
|
name, email = user.split("<")
|
343
379
|
email = email.split(">")[0]
|
344
380
|
rescue Exception
|
345
|
-
raise new GHTorrentException("Not a valid email address: #{user}")
|
381
|
+
raise new GHTorrentException.new("Not a valid email address: #{user}")
|
382
|
+
end
|
383
|
+
|
384
|
+
unless is_valid_email(email)
|
385
|
+
warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
|
346
386
|
end
|
347
387
|
u = ensure_user_byemail(email.strip, name.strip)
|
348
388
|
else
|
@@ -522,9 +562,8 @@ module GHTorrent
|
|
522
562
|
# the result is nil
|
523
563
|
def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
|
524
564
|
|
525
|
-
ensure_user(user, false, false)
|
526
565
|
repos = @db[:projects]
|
527
|
-
curuser =
|
566
|
+
curuser = ensure_user(user, false, false)
|
528
567
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
529
568
|
|
530
569
|
if currepo.nil?
|
@@ -549,7 +588,7 @@ module GHTorrent
|
|
549
588
|
ensure_watchers(user, repo) if watchers
|
550
589
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
551
590
|
else
|
552
|
-
debug "GHTorrent: Repo #{repo} exists"
|
591
|
+
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
553
592
|
currepo
|
554
593
|
end
|
555
594
|
end
|
@@ -604,11 +643,12 @@ module GHTorrent
|
|
604
643
|
)
|
605
644
|
info "GHTorrent: Added project member #{repo} -> #{new_member}"
|
606
645
|
else
|
646
|
+
debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
|
607
647
|
unless date_added.nil?
|
608
648
|
pr_members.filter(:user_id => new_user[:id],
|
609
649
|
:repo_id => project[:id])\
|
610
650
|
.update(:created_at => date(date_added))
|
611
|
-
info "GHTorrent: Updating
|
651
|
+
info "GHTorrent: Updating project member #{repo} -> #{new_member}"
|
612
652
|
end
|
613
653
|
end
|
614
654
|
end
|
@@ -796,13 +836,14 @@ module GHTorrent
|
|
796
836
|
:created_at => date(added),
|
797
837
|
:ext_ref_id => retrieved[@ext_uniq]
|
798
838
|
)
|
799
|
-
info "GHTorrent: Added watcher #{repo} -> #{watcher}"
|
839
|
+
info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
|
800
840
|
else
|
841
|
+
debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
|
801
842
|
unless date_added.nil?
|
802
843
|
watchers.filter(:user_id => new_watcher[:id],
|
803
844
|
:repo_id => project[:id])\
|
804
845
|
.update(:created_at => date(date_added))
|
805
|
-
info "GHTorrent: Updating
|
846
|
+
info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}"
|
806
847
|
end
|
807
848
|
end
|
808
849
|
end
|
@@ -816,7 +857,7 @@ module GHTorrent
|
|
816
857
|
return
|
817
858
|
end
|
818
859
|
|
819
|
-
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id])
|
860
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
|
820
861
|
|
821
862
|
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
822
863
|
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
@@ -833,7 +874,6 @@ module GHTorrent
|
|
833
874
|
comments = true, commits = true,
|
834
875
|
state = nil, created_at = nil)
|
835
876
|
pulls_reqs = @db[:pull_requests]
|
836
|
-
pull_req_history = @db[:pull_request_history]
|
837
877
|
|
838
878
|
project = ensure_repo(owner, repo, false, false, false)
|
839
879
|
|
@@ -842,8 +882,8 @@ module GHTorrent
|
|
842
882
|
end
|
843
883
|
|
844
884
|
# Adds a pull request history event
|
845
|
-
add_history
|
846
|
-
|
885
|
+
def add_history(id, ts, unq, act)
|
886
|
+
pull_req_history = @db[:pull_request_history]
|
847
887
|
entry = pull_req_history.first(:pull_request_id => id,
|
848
888
|
:ext_ref_id => unq, :action => act)
|
849
889
|
if entry.nil?
|
@@ -859,20 +899,33 @@ module GHTorrent
|
|
859
899
|
|
860
900
|
# Checks whether a pull request concerns two branches of the same
|
861
901
|
# repository
|
862
|
-
is_intra_branch
|
863
|
-
req
|
902
|
+
def is_intra_branch(req)
|
903
|
+
return false unless has_head_repo(req)
|
904
|
+
|
905
|
+
if req['head']['repo']['owner']['login'] ==
|
906
|
+
req['base']['repo']['owner']['login'] and
|
907
|
+
req['head']['repo']['full_name'] == req['base']['repo']['full_name']
|
908
|
+
true
|
909
|
+
else
|
910
|
+
false
|
911
|
+
end
|
912
|
+
end
|
913
|
+
|
914
|
+
# Checks if the pull request has a head repo specified
|
915
|
+
def has_head_repo(req)
|
916
|
+
not req['head']['repo'].nil?
|
864
917
|
end
|
865
918
|
|
866
919
|
# Produces a log message
|
867
|
-
log_msg
|
868
|
-
head = if
|
869
|
-
req['base']['repo']['full_name']
|
870
|
-
else
|
920
|
+
def log_msg(req)
|
921
|
+
head = if has_head_repo(req)
|
871
922
|
req['head']['repo']['full_name']
|
923
|
+
else
|
924
|
+
"(head deleted)"
|
872
925
|
end
|
873
926
|
|
874
927
|
<<-eos.gsub(/\s+/, " ").strip
|
875
|
-
GHTorrent: Pull request #{
|
928
|
+
GHTorrent: Pull request #{req['number']}
|
876
929
|
#{head} -> #{req['base']['repo']['full_name']}
|
877
930
|
eos
|
878
931
|
end
|
@@ -890,22 +943,26 @@ module GHTorrent
|
|
890
943
|
|
891
944
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
892
945
|
retrieved['base']['sha'],
|
893
|
-
retrieved['base']['repo']['owner']['login']
|
894
|
-
)
|
946
|
+
retrieved['base']['repo']['owner']['login'])
|
895
947
|
|
896
|
-
if is_intra_branch
|
948
|
+
if is_intra_branch(retrieved)
|
897
949
|
head_repo = base_repo
|
898
|
-
head_commit =
|
899
|
-
warn "GHTorrent: Pull request is intra branch"
|
900
|
-
else
|
901
|
-
|
902
|
-
head_repo = ensure_repo(retrieved['head']['repo']['owner']['login'],
|
903
|
-
retrieved['head']['repo']['name'],
|
904
|
-
false, false, false)
|
905
|
-
|
906
|
-
head_commit = ensure_commit(retrieved['head']['repo']['name'],
|
950
|
+
head_commit = ensure_commit(retrieved['base']['repo']['name'],
|
907
951
|
retrieved['head']['sha'],
|
908
|
-
retrieved['
|
952
|
+
retrieved['base']['repo']['owner']['login'])
|
953
|
+
info log_msg(retrieved) + " is intra branch"
|
954
|
+
else
|
955
|
+
head_repo = if has_head_repo(retrieved)
|
956
|
+
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
957
|
+
retrieved['head']['repo']['name'],
|
958
|
+
false, false, false)
|
959
|
+
end
|
960
|
+
|
961
|
+
head_commit = if not head_repo.nil?
|
962
|
+
ensure_commit(retrieved['head']['repo']['name'],
|
963
|
+
retrieved['head']['sha'],
|
964
|
+
retrieved['head']['repo']['owner']['login'])
|
965
|
+
end
|
909
966
|
end
|
910
967
|
|
911
968
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
@@ -923,24 +980,24 @@ module GHTorrent
|
|
923
980
|
:base_commit_id => base_commit[:id],
|
924
981
|
:user_id => pull_req_user[:id],
|
925
982
|
:pullreq_id => pullreq_id,
|
926
|
-
:intra_branch => is_intra_branch
|
983
|
+
:intra_branch => is_intra_branch(retrieved)
|
927
984
|
)
|
928
985
|
|
929
|
-
info log_msg
|
986
|
+
info log_msg(retrieved)
|
930
987
|
else
|
931
|
-
debug log_msg
|
988
|
+
debug log_msg(retrieved) + " exists"
|
932
989
|
end
|
933
990
|
|
934
991
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
935
992
|
:pullreq_id => pullreq_id)
|
936
993
|
|
937
|
-
add_history
|
994
|
+
add_history(pull_req[:id], date(retrieved['created_at']),
|
938
995
|
retrieved[@ext_uniq], 'opened')
|
939
|
-
add_history
|
996
|
+
add_history(pull_req[:id], date(retrieved['merged_at']),
|
940
997
|
retrieved[@ext_uniq], 'merged') if merged
|
941
|
-
add_history
|
998
|
+
add_history(pull_req[:id], date(retrieved['closed_at']),
|
942
999
|
retrieved[@ext_uniq], 'closed') if closed
|
943
|
-
add_history
|
1000
|
+
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
944
1001
|
state) unless state.nil?
|
945
1002
|
|
946
1003
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
@@ -955,7 +1012,7 @@ module GHTorrent
|
|
955
1012
|
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
956
1013
|
|
957
1014
|
if currepo.nil?
|
958
|
-
warn "Could not repository #{owner}/#{repo}"
|
1015
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
959
1016
|
return
|
960
1017
|
end
|
961
1018
|
|
@@ -983,7 +1040,7 @@ module GHTorrent
|
|
983
1040
|
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
984
1041
|
|
985
1042
|
if pull_req.nil?
|
986
|
-
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
1043
|
+
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
987
1044
|
return
|
988
1045
|
end
|
989
1046
|
|
@@ -994,7 +1051,7 @@ module GHTorrent
|
|
994
1051
|
retrieved = retrieve_pull_req_comment(owner, repo, pullreq_id, comment_id)
|
995
1052
|
|
996
1053
|
if retrieved.nil?
|
997
|
-
warn "Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1054
|
+
warn "GHTorrent: Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
998
1055
|
return
|
999
1056
|
end
|
1000
1057
|
|
@@ -1018,14 +1075,19 @@ module GHTorrent
|
|
1018
1075
|
:ext_ref_id => retrieved[@ext_uniq]
|
1019
1076
|
)
|
1020
1077
|
debug "GHTorrent: Adding comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1078
|
+
@db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
1079
|
+
:comment_id => comment_id)
|
1021
1080
|
else
|
1022
|
-
debug "GHTorrent:
|
1081
|
+
debug "GHTorrent: Comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id} exists"
|
1082
|
+
exists
|
1023
1083
|
end
|
1024
1084
|
end
|
1025
1085
|
|
1026
1086
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1027
|
-
retrieve_pull_req_commits(owner, repo, pullreq_id).
|
1028
|
-
ensure_commit(repo, c['sha'], owner, true)
|
1087
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1088
|
+
x = ensure_commit(repo, c['sha'], owner, true)
|
1089
|
+
acc << x if not x.nil?
|
1090
|
+
acc
|
1029
1091
|
}.map { |c|
|
1030
1092
|
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1031
1093
|
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
@@ -1050,26 +1112,28 @@ module GHTorrent
|
|
1050
1112
|
# [repo] The repository/project to find forks for
|
1051
1113
|
def ensure_forks(owner, repo)
|
1052
1114
|
currepo = ensure_repo(owner, repo, false, false, false)
|
1053
|
-
time = currepo[:created_at]
|
1054
1115
|
|
1055
1116
|
if currepo.nil?
|
1056
1117
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1057
1118
|
return
|
1058
1119
|
end
|
1059
1120
|
|
1060
|
-
existing_forks = @db.from(:forks, :projects).\
|
1121
|
+
existing_forks = @db.from(:forks, :projects, :users).\
|
1061
1122
|
where(:forks__forked_project_id => :projects__id). \
|
1062
|
-
where(:
|
1123
|
+
where(:users__id => :projects__owner_id). \
|
1124
|
+
where(:forks__forked_from_id => currepo[:id]).select(:projects__name, :login).all
|
1063
1125
|
|
1064
1126
|
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1065
1127
|
if existing_forks.find {|y|
|
1066
|
-
|
1128
|
+
forked_repo_owner = x['full_name'].split(/\//)[0]
|
1129
|
+
forked_repo_name = x['full_name'].split(/\//)[1]
|
1130
|
+
y[:login] == forked_repo_owner && y[:name] == forked_repo_name
|
1067
1131
|
}.nil?
|
1068
1132
|
acc << x
|
1069
1133
|
else
|
1070
1134
|
acc
|
1071
1135
|
end
|
1072
|
-
end.map { |x| ensure_fork(owner, repo, x['id']
|
1136
|
+
end.map { |x| ensure_fork(owner, repo, x['id']) }
|
1073
1137
|
end
|
1074
1138
|
|
1075
1139
|
##
|
@@ -1081,8 +1145,8 @@ module GHTorrent
|
|
1081
1145
|
fork_exists = forks.first(:fork_id => fork_id)
|
1082
1146
|
|
1083
1147
|
if fork_exists.nil?
|
1084
|
-
added = if date_added.nil? then Time.now else date_added end
|
1085
1148
|
retrieved = retrieve_fork(owner, repo, fork_id)
|
1149
|
+
added = if date_added.nil? then retrieved['created_at'] else date_added end
|
1086
1150
|
|
1087
1151
|
if retrieved.nil?
|
1088
1152
|
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
@@ -1106,6 +1170,7 @@ module GHTorrent
|
|
1106
1170
|
:ext_ref_id => retrieved[@ext_uniq])
|
1107
1171
|
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1108
1172
|
else
|
1173
|
+
debug "GHTorrent: Fork #{fork_id} exists as fork of #{owner}/#{repo}"
|
1109
1174
|
unless date_added.nil?
|
1110
1175
|
forks.filter(:fork_id => fork_id)\
|
1111
1176
|
.update(:created_at => date(date_added))
|
@@ -1114,36 +1179,253 @@ module GHTorrent
|
|
1114
1179
|
end
|
1115
1180
|
end
|
1116
1181
|
|
1117
|
-
|
1182
|
+
##
|
1183
|
+
# Make sure all issues exist for a project
|
1184
|
+
def ensure_issues(owner, repo)
|
1185
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
1186
|
+
if currepo.nil?
|
1187
|
+
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1188
|
+
return
|
1189
|
+
end
|
1118
1190
|
|
1119
|
-
|
1120
|
-
def store_commit(c, repo, user)
|
1121
|
-
commits = @db[:commits]
|
1122
|
-
commit = commits.first(:sha => c['sha'])
|
1191
|
+
issues = @db[:issues].filter(:repo_id => currepo[:id]).all
|
1123
1192
|
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1193
|
+
retrieve_issues(owner, repo).reduce([]) do |acc, x|
|
1194
|
+
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1195
|
+
acc << x
|
1196
|
+
else
|
1197
|
+
acc
|
1198
|
+
end
|
1199
|
+
end.map { |x| ensure_issue(owner, repo, x['number']) }
|
1200
|
+
end
|
1127
1201
|
|
1128
|
-
|
1202
|
+
##
|
1203
|
+
# Make sure that the issue exists
|
1204
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
1129
1205
|
|
1130
|
-
|
1131
|
-
|
1206
|
+
issues = @db[:issues]
|
1207
|
+
repository = ensure_repo(owner, repo, false, false, false)
|
1208
|
+
|
1209
|
+
if repo.nil?
|
1210
|
+
warn "Cannot find repo #{owner}/#{repo}"
|
1211
|
+
return
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
cur_issue = issues.first(:issue_id => issue_id,
|
1215
|
+
:repo_id => repository[:id])
|
1216
|
+
|
1217
|
+
if cur_issue.nil?
|
1218
|
+
retrieved = retrieve_issue(owner, repo, issue_id)
|
1219
|
+
|
1220
|
+
if retrieved.nil?
|
1221
|
+
warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
|
1132
1222
|
return
|
1133
1223
|
end
|
1134
1224
|
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1225
|
+
reporter = ensure_user(retrieved['user']['login'], false, false)
|
1226
|
+
assignee = unless retrieved['assignee'].nil?
|
1227
|
+
ensure_user(retrieved['assignee']['login'], false, false)
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
# Pull requests and issues share the same issue_id
|
1231
|
+
pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
|
1232
|
+
ensure_pull_request(owner, repo, issue_id)
|
1233
|
+
end
|
1234
|
+
|
1235
|
+
issues.insert(:repo_id => repository[:id],
|
1236
|
+
:assignee_id => unless assignee.nil? then assignee[:id] end,
|
1237
|
+
:reporter_id => reporter[:id],
|
1238
|
+
:issue_id => issue_id,
|
1239
|
+
:pull_request => if pull_req.nil? then false else true end,
|
1240
|
+
:pull_request_id => unless pull_req.nil? then pull_req[:id] end,
|
1241
|
+
:created_at => date(retrieved['created_at']),
|
1242
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1243
|
+
|
1244
|
+
ensure_issue_events(owner, repo, issue_id) if events
|
1245
|
+
ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
|
1246
|
+
|
1247
|
+
info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
|
1248
|
+
issues.first(:issue_id => issue_id,
|
1249
|
+
:repo_id => repository[:id])
|
1250
|
+
else
|
1251
|
+
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
|
1252
|
+
cur_issue
|
1253
|
+
end
|
1254
|
+
end
|
1255
|
+
|
1256
|
+
##
|
1257
|
+
# Retrieve and process all events for an issue
|
1258
|
+
def ensure_issue_events(owner, repo, issue_id)
|
1259
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
1260
|
+
#time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1261
|
+
|
1262
|
+
if currepo.nil?
|
1263
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1264
|
+
return
|
1265
|
+
end
|
1266
|
+
|
1267
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1268
|
+
if issue.nil?
|
1269
|
+
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1270
|
+
return
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
retrieve_issue_events(owner, repo, issue_id).reduce([]) do |acc, x|
|
1274
|
+
|
1275
|
+
if @db[:issue_events].first(:issue_id => issue[:id],
|
1276
|
+
:event_id => x['id']).nil?
|
1277
|
+
acc << x
|
1278
|
+
else
|
1279
|
+
acc
|
1280
|
+
end
|
1281
|
+
end.map { |x|
|
1282
|
+
ensure_issue_event(owner, repo, issue_id, x['id'])
|
1283
|
+
}
|
1284
|
+
end
|
1285
|
+
|
1286
|
+
##
|
1287
|
+
# Retrieve and process +event_id+ for an +issue_id+
|
1288
|
+
def ensure_issue_event(owner, repo, issue_id, event_id)
|
1289
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1290
|
+
|
1291
|
+
if issue.nil?
|
1292
|
+
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1293
|
+
return
|
1294
|
+
end
|
1295
|
+
|
1296
|
+
issue_event_str = "#{owner}/#{repo} -> #{issue_id}/#{event_id}"
|
1297
|
+
|
1298
|
+
curevent = @db[:issue_events].first(:issue_id => issue[:id],
|
1299
|
+
:event_id => event_id)
|
1300
|
+
if curevent.nil?
|
1301
|
+
|
1302
|
+
retrieved = retrieve_issue_event(owner, repo, issue_id, event_id)
|
1303
|
+
|
1304
|
+
if retrieved.nil?
|
1305
|
+
warn "GHTorrent: Could not retrieve issue event #{issue_event_str}"
|
1306
|
+
return
|
1307
|
+
elsif retrieved['actor'].nil?
|
1308
|
+
warn "GHTorrent: Issue event #{issue_event_str} does not contain an actor"
|
1309
|
+
return
|
1310
|
+
end
|
1311
|
+
|
1312
|
+
actor = ensure_user(retrieved['actor']['login'], false, false)
|
1313
|
+
|
1314
|
+
action_specific = case retrieved['event']
|
1315
|
+
when "referenced" then retrieved['commit_id']
|
1316
|
+
when "merged" then retrieved['commit_id']
|
1317
|
+
when "closed" then retrieved['commit_id']
|
1318
|
+
else nil
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
if retrieved['event'] == "assigned"
|
1322
|
+
|
1323
|
+
def update_assignee(owner, repo, issue, actor)
|
1324
|
+
@db[:issues][:id => issue[:id]] = {:assignee_id => actor[:id]}
|
1325
|
+
info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
if issue[:assignee_id].nil? then
|
1329
|
+
update_assignee(owner, repo, issue, actor)
|
1330
|
+
else
|
1331
|
+
existing = @db[:issue_events].\
|
1332
|
+
filter(:issue_id => issue[:id],:action => "assigned").\
|
1333
|
+
order(Sequel.desc(:created_at)).first
|
1334
|
+
if existing.nil?
|
1335
|
+
update_assignee(owner, repo, issue, actor)
|
1336
|
+
elsif date(existing[:created_at]) < date(retrieved['created_at'])
|
1337
|
+
update_assignee(owner, repo, issue, actor)
|
1338
|
+
end
|
1339
|
+
end
|
1340
|
+
end
|
1341
|
+
|
1342
|
+
@db[:issue_events].insert(
|
1343
|
+
:event_id => event_id,
|
1344
|
+
:issue_id => issue[:id],
|
1345
|
+
:actor_id => unless actor.nil? then actor[:id] end,
|
1346
|
+
:action => retrieved['event'],
|
1347
|
+
:action_specific => action_specific,
|
1348
|
+
:created_at => date(retrieved['created_at']),
|
1349
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1141
1350
|
)
|
1142
|
-
|
1143
|
-
|
1351
|
+
|
1352
|
+
info "GHTorrent: Added issue event #{issue_event_str}"
|
1353
|
+
@db[:issue_events].first(:issue_id => issue[:id],
|
1354
|
+
:event_id => event_id)
|
1144
1355
|
else
|
1145
|
-
debug "GHTorrent:
|
1146
|
-
|
1356
|
+
debug "GHTorrent: Issue event #{issue_event_str} exists"
|
1357
|
+
curevent
|
1358
|
+
end
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
##
|
1362
|
+
# Retrieve and process all comments for an issue
|
1363
|
+
def ensure_issue_comments(owner, repo, issue_id)
|
1364
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
1365
|
+
|
1366
|
+
if currepo.nil?
|
1367
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1368
|
+
return
|
1369
|
+
end
|
1370
|
+
|
1371
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1372
|
+
if issue.nil?
|
1373
|
+
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1374
|
+
return
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
retrieve_issue_comments(owner, repo, issue_id).reduce([]) do |acc, x|
|
1378
|
+
|
1379
|
+
if @db[:issue_comments].first(:issue_id => issue[:id],
|
1380
|
+
:comment_id => x['id']).nil?
|
1381
|
+
acc << x
|
1382
|
+
else
|
1383
|
+
acc
|
1384
|
+
end
|
1385
|
+
end.map { |x|
|
1386
|
+
ensure_issue_comment(owner, repo, issue_id, x['id'])
|
1387
|
+
}
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
##
|
1391
|
+
# Retrieve and process +comment_id+ for an +issue_id+
|
1392
|
+
def ensure_issue_comment(owner, repo, issue_id, comment_id)
|
1393
|
+
issue = ensure_issue(owner, repo, issue_id)
|
1394
|
+
|
1395
|
+
if issue.nil?
|
1396
|
+
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1397
|
+
return
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
issue_comment_str = "#{owner}/#{repo} -> #{issue_id}/#{comment_id}"
|
1401
|
+
|
1402
|
+
curcomment = @db[:issue_comments].first(:issue_id => issue[:id],
|
1403
|
+
:comment_id => comment_id)
|
1404
|
+
if curcomment.nil?
|
1405
|
+
|
1406
|
+
retrieved = retrieve_issue_comment(owner, repo, issue_id, comment_id)
|
1407
|
+
|
1408
|
+
if retrieved.nil?
|
1409
|
+
warn "GHTorrent: Could not retrieve issue comment #{issue_comment_str}"
|
1410
|
+
return
|
1411
|
+
end
|
1412
|
+
|
1413
|
+
user = ensure_user(retrieved['user']['login'], false, false)
|
1414
|
+
|
1415
|
+
@db[:issue_comments].insert(
|
1416
|
+
:comment_id => comment_id,
|
1417
|
+
:issue_id => issue[:id],
|
1418
|
+
:user_id => unless user.nil? then user[:id] end,
|
1419
|
+
:created_at => date(retrieved['created_at']),
|
1420
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1421
|
+
)
|
1422
|
+
|
1423
|
+
info "GHTorrent: Added issue comment #{issue_comment_str}"
|
1424
|
+
@db[:issue_comments].first(:issue_id => issue[:id],
|
1425
|
+
:comment_id => comment_id)
|
1426
|
+
else
|
1427
|
+
debug "GHTorrent: Issue comment #{issue_comment_str} exists"
|
1428
|
+
curcomment
|
1147
1429
|
end
|
1148
1430
|
end
|
1149
1431
|
|
@@ -1153,27 +1435,57 @@ module GHTorrent
|
|
1153
1435
|
@db ||= get_db
|
1154
1436
|
@persister ||= persister
|
1155
1437
|
|
1438
|
+
result = nil
|
1156
1439
|
start_time = Time.now
|
1157
1440
|
begin
|
1158
1441
|
@db.transaction(:rollback => :reraise, :isolation => :committed) do
|
1159
|
-
yield block
|
1442
|
+
result = yield block
|
1160
1443
|
end
|
1161
1444
|
total = Time.now.to_ms - start_time.to_ms
|
1162
1445
|
debug "GHTorrent: Transaction committed (#{total} ms)"
|
1446
|
+
result
|
1163
1447
|
rescue Exception => e
|
1164
1448
|
total = Time.now.to_ms - start_time.to_ms
|
1165
1449
|
warn "GHTorrent: Transaction failed (#{total} ms)"
|
1166
1450
|
raise e
|
1167
1451
|
ensure
|
1168
|
-
@db.disconnect
|
1169
|
-
@persister.close
|
1170
|
-
|
1171
|
-
@db = nil
|
1172
|
-
@persister = nil
|
1173
1452
|
GC.start
|
1174
1453
|
end
|
1175
1454
|
end
|
1176
1455
|
|
1456
|
+
private
|
1457
|
+
|
1458
|
+
# Store a commit contained in a hash. First check whether the commit exists.
|
1459
|
+
def store_commit(c, repo, user)
|
1460
|
+
commits = @db[:commits]
|
1461
|
+
commit = commits.first(:sha => c['sha'])
|
1462
|
+
|
1463
|
+
if commit.nil?
|
1464
|
+
author = commit_user(c['author'], c['commit']['author'])
|
1465
|
+
commiter = commit_user(c['committer'], c['commit']['committer'])
|
1466
|
+
|
1467
|
+
repository = ensure_repo(user, repo, false, false, false)
|
1468
|
+
|
1469
|
+
if repository.nil?
|
1470
|
+
warn "Could not store commit #{user}/#{repo} #{c['sha']}"
|
1471
|
+
return
|
1472
|
+
end
|
1473
|
+
|
1474
|
+
commits.insert(:sha => c['sha'],
|
1475
|
+
:author_id => author[:id],
|
1476
|
+
:committer_id => commiter[:id],
|
1477
|
+
:project_id => repository[:id],
|
1478
|
+
:created_at => date(c['commit']['author']['date']),
|
1479
|
+
:ext_ref_id => c[@ext_uniq]
|
1480
|
+
)
|
1481
|
+
debug "GHTorrent: New commit #{user}/#{repo} -> #{c['sha']} "
|
1482
|
+
commits.first(:sha => c['sha'])
|
1483
|
+
else
|
1484
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{c['sha']} exists"
|
1485
|
+
commit
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
|
1177
1489
|
##
|
1178
1490
|
# Convert a string value to boolean, the SQL way
|
1179
1491
|
def boolean(arg)
|