ghtorrent 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'ghtorrent/ghtorrent'
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/retriever'
|
8
|
+
|
9
|
+
class GHTRetrieveRepo < GHTorrent::Command
|
10
|
+
|
11
|
+
include GHTorrent::Settings
|
12
|
+
include GHTorrent::Retriever
|
13
|
+
include GHTorrent::Persister
|
14
|
+
|
15
|
+
def prepare_options(options)
|
16
|
+
options.banner <<-BANNER
|
17
|
+
An efficient way to get all data for a single repo
|
18
|
+
|
19
|
+
#{command_name} [options] owner repo
|
20
|
+
|
21
|
+
BANNER
|
22
|
+
end
|
23
|
+
|
24
|
+
def validate
|
25
|
+
super
|
26
|
+
Trollop::die "Two arguments are required" unless args[0] && !args[0].empty?
|
27
|
+
end
|
28
|
+
|
29
|
+
def logger
|
30
|
+
ght.logger
|
31
|
+
end
|
32
|
+
|
33
|
+
def persister
|
34
|
+
@persister ||= connect(:mongo, settings)
|
35
|
+
@persister
|
36
|
+
end
|
37
|
+
|
38
|
+
def ext_uniq
|
39
|
+
@ext_uniq ||= config(:uniq_id)
|
40
|
+
@ext_uniq
|
41
|
+
end
|
42
|
+
|
43
|
+
def ght
|
44
|
+
@ght ||= TransactedGHTorrent.new(settings)
|
45
|
+
@ght
|
46
|
+
end
|
47
|
+
|
48
|
+
def go
|
49
|
+
user_entry = ght.transaction{ght.ensure_user(ARGV[0], false, false)}
|
50
|
+
|
51
|
+
if user_entry.nil?
|
52
|
+
Trollop::die "Cannot find user #{owner}"
|
53
|
+
end
|
54
|
+
|
55
|
+
user = user_entry[:login]
|
56
|
+
|
57
|
+
repo_entry = ght.transaction{ght.ensure_repo(ARGV[0], ARGV[1], false, false, false)}
|
58
|
+
|
59
|
+
if repo_entry.nil?
|
60
|
+
Trollop::die "Cannot find repository #{owner}/#{ARGV[1]}"
|
61
|
+
end
|
62
|
+
|
63
|
+
repo = repo_entry[:name]
|
64
|
+
|
65
|
+
%w(ensure_commits ensure_forks ensure_pull_requests
|
66
|
+
ensure_issues ensure_project_members ensure_watchers).each {|x|
|
67
|
+
begin
|
68
|
+
ght.send(x, user, repo)
|
69
|
+
rescue Exception => e
|
70
|
+
puts STDERR, e.message
|
71
|
+
puts STDERR, e.backtrace
|
72
|
+
end
|
73
|
+
}
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# A version of the GHTorrent class that creates a transaction per processed
|
78
|
+
# item
|
79
|
+
class TransactedGHTorrent < GHTorrent::Mirror
|
80
|
+
|
81
|
+
def ensure_commit(repo, sha, user, comments = true)
|
82
|
+
transaction do
|
83
|
+
super(repo, sha, user, comments)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def ensure_fork(owner, repo, fork_id, date_added = nil)
|
88
|
+
transaction do
|
89
|
+
super(owner, repo, fork_id, date_added)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def ensure_pull_request(owner, repo, pullreq_id,
|
94
|
+
comments = true, commits = true,
|
95
|
+
state = nil, created_at = nil)
|
96
|
+
transaction do
|
97
|
+
super(owner, repo, pullreq_id, comments, commits, state, created_at)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
102
|
+
transaction do
|
103
|
+
super(owner, repo, issue_id, events, comments)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def ensure_project_member(owner, repo, new_member, date_added)
|
108
|
+
transaction do
|
109
|
+
super(owner, repo, new_member, date_added)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def ensure_watcher(owner, repo, watcher, date_added = nil)
|
114
|
+
transaction do
|
115
|
+
super(owner, repo, watcher, date_added)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mongo'
|
3
|
+
|
4
|
+
require 'ghtorrent/settings'
|
5
|
+
require 'ghtorrent/logging'
|
6
|
+
require 'ghtorrent/command'
|
7
|
+
require 'ghtorrent/persister'
|
8
|
+
|
9
|
+
class GHRMDupl < GHTorrent::Command
|
10
|
+
|
11
|
+
include GHTorrent::Settings
|
12
|
+
include GHTorrent::Persister
|
13
|
+
|
14
|
+
def col_info()
|
15
|
+
{
|
16
|
+
:commits => {
|
17
|
+
:unq => "sha",
|
18
|
+
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
19
|
+
},
|
20
|
+
:events => {
|
21
|
+
:unq => "id",
|
22
|
+
:col => persister.get_underlying_connection.collection(:events.to_s),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def persister
|
28
|
+
@persister ||= connect(:mongo, @settings)
|
29
|
+
@persister
|
30
|
+
end
|
31
|
+
|
32
|
+
def prepare_options(options)
|
33
|
+
options.banner <<-BANNER
|
34
|
+
Removes duplicate entries from collections (currently, commits and events)
|
35
|
+
|
36
|
+
#{command_name} [options] collection
|
37
|
+
|
38
|
+
#{command_name} options:
|
39
|
+
BANNER
|
40
|
+
|
41
|
+
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
42
|
+
:short => 'e', :default => 0, :type => :int
|
43
|
+
options.opt :snapshot, 'Perform clean up every x records',
|
44
|
+
:short => 's', :default => -1, :type => :int
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate
|
48
|
+
super
|
49
|
+
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
50
|
+
end
|
51
|
+
|
52
|
+
# Print MongoDB remove statements that
|
53
|
+
# remove all but one entries for each commit.
|
54
|
+
def remove_duplicates(data, col)
|
55
|
+
removed = 0
|
56
|
+
data.select { |k, v| v.size > 1 }.each do |k, v|
|
57
|
+
v.slice(0..(v.size - 2)).map do |x|
|
58
|
+
removed += 1 if delete_by_id col, x
|
59
|
+
end
|
60
|
+
end
|
61
|
+
removed
|
62
|
+
end
|
63
|
+
|
64
|
+
def delete_by_id(col, id)
|
65
|
+
begin
|
66
|
+
col.remove({'_id' => id})
|
67
|
+
true
|
68
|
+
rescue Mongo::OperationFailure
|
69
|
+
puts "Cannot remove record with id #{id} from #{col.name}"
|
70
|
+
false
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def go
|
75
|
+
collection = case ARGV[0]
|
76
|
+
when "commits" then
|
77
|
+
:commits
|
78
|
+
when "events" then
|
79
|
+
:events
|
80
|
+
else
|
81
|
+
puts "Not a known collection name: #{ARGV[0]}\n"
|
82
|
+
end
|
83
|
+
|
84
|
+
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
85
|
+
|
86
|
+
snapshot = options[:snapshot]
|
87
|
+
|
88
|
+
puts "Deleting duplicates from collection #{collection}"
|
89
|
+
puts "Deleting duplicates after #{Time.at(options[:earliest])}"
|
90
|
+
puts "Perform clean up every #{snapshot} records"
|
91
|
+
|
92
|
+
# Various counters to report stats
|
93
|
+
processed = total_processed = removed = 0
|
94
|
+
|
95
|
+
data = Hash.new
|
96
|
+
|
97
|
+
# The following code needs to save intermediate results to cope
|
98
|
+
# with large datasets
|
99
|
+
col_info[collection][:col].find(from, :fields => col_info[collection][:unq]).each do |r|
|
100
|
+
_id = r["_id"]
|
101
|
+
commit = read_value(r, col_info[collection][:unq])
|
102
|
+
|
103
|
+
# If entries cannot be parsed, remove them
|
104
|
+
if commit.empty?
|
105
|
+
puts "Deleting unknown entry #{_id}"
|
106
|
+
removed += 1 if delete_by_id col_info[collection][:col], _id
|
107
|
+
else
|
108
|
+
data[commit] = [] if data[commit].nil?
|
109
|
+
data[commit] << _id
|
110
|
+
end
|
111
|
+
|
112
|
+
processed += 1
|
113
|
+
total_processed += 1
|
114
|
+
|
115
|
+
print "\rProcessed #{processed} records"
|
116
|
+
|
117
|
+
# Calculate duplicates, save intermediate result
|
118
|
+
if snapshot > 0 and processed > snapshot
|
119
|
+
puts "\nLoaded #{data.size} values, cleaning"
|
120
|
+
removed += remove_duplicates data, col_info[collection][:col]
|
121
|
+
data = Hash.new
|
122
|
+
processed = 0
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
removed += remove_duplicates data, col_info[collection][:col]
|
127
|
+
|
128
|
+
puts "\nProcessed #{total_processed}, deleted #{removed} duplicates"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
data/lib/ghtorrent/ghtorrent.rb
CHANGED
@@ -22,7 +22,7 @@ module GHTorrent
|
|
22
22
|
@logger = Logger.new(STDOUT)
|
23
23
|
end
|
24
24
|
|
25
|
-
#
|
25
|
+
# Get a connection to the database
|
26
26
|
def get_db
|
27
27
|
Sequel.single_threaded = true
|
28
28
|
@db = Sequel.connect(config(:sql_url), :encoding => 'utf8')
|
@@ -151,19 +151,31 @@ module GHTorrent
|
|
151
151
|
end
|
152
152
|
|
153
153
|
##
|
154
|
-
# Retrieve
|
154
|
+
# Retrieve an issue
|
155
155
|
# ==Parameters:
|
156
156
|
# [owner] The login of the repository owner
|
157
157
|
# [repo] The name of the repository
|
158
|
-
# [
|
158
|
+
# [issue_id] The fork item id
|
159
|
+
# [action] The action that took place for the issue
|
159
160
|
# [date_added] The timestamp that the add event took place
|
160
|
-
def
|
161
|
+
def get_issue(owner, repo, issue_id, created_at)
|
161
162
|
transaction do
|
162
|
-
|
163
|
-
#ensure_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
163
|
+
ensure_issue(owner, repo, issue_id, created_at)
|
164
164
|
end
|
165
165
|
end
|
166
166
|
|
167
|
+
##
|
168
|
+
# Retrieve a issue comment
|
169
|
+
# ==Parameters:
|
170
|
+
# [owner] The login of the repository owner
|
171
|
+
# [repo] The name of the repository
|
172
|
+
# [issue_id] The fork item id
|
173
|
+
# [comment_id] The issue comment unique identifier
|
174
|
+
def get_issue_comment(owner, repo, issue_id, comment_id)
|
175
|
+
transaction do
|
176
|
+
ensure_issue_comment(owner, repo, issue_id, comment_id)
|
177
|
+
end
|
178
|
+
end
|
167
179
|
|
168
180
|
##
|
169
181
|
# Make sure a commit exists
|
@@ -188,21 +200,30 @@ module GHTorrent
|
|
188
200
|
end
|
189
201
|
|
190
202
|
##
|
191
|
-
#
|
192
|
-
#
|
203
|
+
# Retrieve commits for a repository, starting from +sha+
|
204
|
+
# and going back to 30 * +num_pages+ commit log entries.
|
193
205
|
# ==Parameters:
|
194
206
|
# [user] The user to whom the repo belongs.
|
195
207
|
# [repo] The repo to look for commits into.
|
196
|
-
|
208
|
+
# [sha] The first commit to start retrieving from. If nil, then the
|
209
|
+
# earliest stored commit will be used instead.
|
210
|
+
# [num_pages] The number of commit pages to retrieve
|
211
|
+
def ensure_commits(user, repo, sha = nil,
|
212
|
+
num_pages = config(:mirror_commit_pages_new_repo))
|
197
213
|
userid = @db[:users].filter(:login => user).first[:id]
|
198
214
|
repoid = @db[:projects].filter(:owner_id => userid,
|
199
215
|
:name => repo).first[:id]
|
200
216
|
|
201
|
-
latest =
|
217
|
+
latest = if sha.nil?
|
218
|
+
@db[:commits].filter(:project_id => repoid).order(:created_at).last
|
219
|
+
else
|
220
|
+
sha
|
221
|
+
end
|
222
|
+
|
202
223
|
commits = if latest.nil?
|
203
|
-
retrieve_commits(repo,
|
224
|
+
retrieve_commits(repo, "head", user, num_pages)
|
204
225
|
else
|
205
|
-
retrieve_commits(repo, latest[:sha], user)
|
226
|
+
retrieve_commits(repo, latest[:sha], user, num_pages)
|
206
227
|
end
|
207
228
|
|
208
229
|
commits.map do |c|
|
@@ -226,6 +247,11 @@ module GHTorrent
|
|
226
247
|
parent = commits.first(:sha => url[7])
|
227
248
|
end
|
228
249
|
|
250
|
+
if parent.nil?
|
251
|
+
warn "GHTorrent: Could not retrieve #{url[4]}/#{url[5]} -> #{url[7]}, parent to commit #{this[:sha]}"
|
252
|
+
return
|
253
|
+
end
|
254
|
+
|
229
255
|
if parents.first(:commit_id => this[:id],
|
230
256
|
:parent_id => parent[:id]).nil?
|
231
257
|
|
@@ -297,6 +323,16 @@ module GHTorrent
|
|
297
323
|
if dbuser.nil?
|
298
324
|
# We do not have the user in the database yet. Add him
|
299
325
|
added = ensure_user(login, false, false)
|
326
|
+
|
327
|
+
# A commit user can be found by email but not
|
328
|
+
# by the user name he used to commit. This probably means that the
|
329
|
+
# user has probably changed his user name. Treat the user's by-email
|
330
|
+
# description as valid.
|
331
|
+
if added.nil? and not byemail.nil?
|
332
|
+
warn "GHTorrent: Found user #{byemail[:login]} with same email #{email} as non existing user #{login}. Assigning user #{login} to #{byemail[:login]}"
|
333
|
+
return users.first(:login => byemail[:login])
|
334
|
+
end
|
335
|
+
|
300
336
|
if byemail.nil?
|
301
337
|
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
302
338
|
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
@@ -342,7 +378,11 @@ module GHTorrent
|
|
342
378
|
name, email = user.split("<")
|
343
379
|
email = email.split(">")[0]
|
344
380
|
rescue Exception
|
345
|
-
raise new GHTorrentException("Not a valid email address: #{user}")
|
381
|
+
raise new GHTorrentException.new("Not a valid email address: #{user}")
|
382
|
+
end
|
383
|
+
|
384
|
+
unless is_valid_email(email)
|
385
|
+
warn("GHTorrent: Extracted email(#{email}) not valid for user #{user}")
|
346
386
|
end
|
347
387
|
u = ensure_user_byemail(email.strip, name.strip)
|
348
388
|
else
|
@@ -522,9 +562,8 @@ module GHTorrent
|
|
522
562
|
# the result is nil
|
523
563
|
def ensure_repo(user, repo, commits = true, project_members = true, watchers = true)
|
524
564
|
|
525
|
-
ensure_user(user, false, false)
|
526
565
|
repos = @db[:projects]
|
527
|
-
curuser =
|
566
|
+
curuser = ensure_user(user, false, false)
|
528
567
|
currepo = repos.first(:owner_id => curuser[:id], :name => repo)
|
529
568
|
|
530
569
|
if currepo.nil?
|
@@ -549,7 +588,7 @@ module GHTorrent
|
|
549
588
|
ensure_watchers(user, repo) if watchers
|
550
589
|
repos.first(:owner_id => curuser[:id], :name => repo)
|
551
590
|
else
|
552
|
-
debug "GHTorrent: Repo #{repo} exists"
|
591
|
+
debug "GHTorrent: Repo #{user}/#{repo} exists"
|
553
592
|
currepo
|
554
593
|
end
|
555
594
|
end
|
@@ -604,11 +643,12 @@ module GHTorrent
|
|
604
643
|
)
|
605
644
|
info "GHTorrent: Added project member #{repo} -> #{new_member}"
|
606
645
|
else
|
646
|
+
debug "GHTorrent: Project member #{repo} -> #{new_member} exists"
|
607
647
|
unless date_added.nil?
|
608
648
|
pr_members.filter(:user_id => new_user[:id],
|
609
649
|
:repo_id => project[:id])\
|
610
650
|
.update(:created_at => date(date_added))
|
611
|
-
info "GHTorrent: Updating
|
651
|
+
info "GHTorrent: Updating project member #{repo} -> #{new_member}"
|
612
652
|
end
|
613
653
|
end
|
614
654
|
end
|
@@ -796,13 +836,14 @@ module GHTorrent
|
|
796
836
|
:created_at => date(added),
|
797
837
|
:ext_ref_id => retrieved[@ext_uniq]
|
798
838
|
)
|
799
|
-
info "GHTorrent: Added watcher #{repo} -> #{watcher}"
|
839
|
+
info "GHTorrent: Added watcher #{owner}/#{repo} -> #{watcher}"
|
800
840
|
else
|
841
|
+
debug "GHTorrent: Watcher #{owner}/#{repo} -> #{watcher} exists"
|
801
842
|
unless date_added.nil?
|
802
843
|
watchers.filter(:user_id => new_watcher[:id],
|
803
844
|
:repo_id => project[:id])\
|
804
845
|
.update(:created_at => date(date_added))
|
805
|
-
info "GHTorrent: Updating
|
846
|
+
info "GHTorrent: Updating watcher #{owner}/#{repo} -> #{watcher}"
|
806
847
|
end
|
807
848
|
end
|
808
849
|
end
|
@@ -816,7 +857,7 @@ module GHTorrent
|
|
816
857
|
return
|
817
858
|
end
|
818
859
|
|
819
|
-
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id])
|
860
|
+
pull_reqs = @db[:pull_requests].filter(:base_repo_id => currepo[:id]).all
|
820
861
|
|
821
862
|
retrieve_pull_requests(owner, repo).reduce([]) do |acc, x|
|
822
863
|
if pull_reqs.find { |y| y[:pullreq_id] == x['number'] }.nil?
|
@@ -833,7 +874,6 @@ module GHTorrent
|
|
833
874
|
comments = true, commits = true,
|
834
875
|
state = nil, created_at = nil)
|
835
876
|
pulls_reqs = @db[:pull_requests]
|
836
|
-
pull_req_history = @db[:pull_request_history]
|
837
877
|
|
838
878
|
project = ensure_repo(owner, repo, false, false, false)
|
839
879
|
|
@@ -842,8 +882,8 @@ module GHTorrent
|
|
842
882
|
end
|
843
883
|
|
844
884
|
# Adds a pull request history event
|
845
|
-
add_history
|
846
|
-
|
885
|
+
def add_history(id, ts, unq, act)
|
886
|
+
pull_req_history = @db[:pull_request_history]
|
847
887
|
entry = pull_req_history.first(:pull_request_id => id,
|
848
888
|
:ext_ref_id => unq, :action => act)
|
849
889
|
if entry.nil?
|
@@ -859,20 +899,33 @@ module GHTorrent
|
|
859
899
|
|
860
900
|
# Checks whether a pull request concerns two branches of the same
|
861
901
|
# repository
|
862
|
-
is_intra_branch
|
863
|
-
req
|
902
|
+
def is_intra_branch(req)
|
903
|
+
return false unless has_head_repo(req)
|
904
|
+
|
905
|
+
if req['head']['repo']['owner']['login'] ==
|
906
|
+
req['base']['repo']['owner']['login'] and
|
907
|
+
req['head']['repo']['full_name'] == req['base']['repo']['full_name']
|
908
|
+
true
|
909
|
+
else
|
910
|
+
false
|
911
|
+
end
|
912
|
+
end
|
913
|
+
|
914
|
+
# Checks if the pull request has a head repo specified
|
915
|
+
def has_head_repo(req)
|
916
|
+
not req['head']['repo'].nil?
|
864
917
|
end
|
865
918
|
|
866
919
|
# Produces a log message
|
867
|
-
log_msg
|
868
|
-
head = if
|
869
|
-
req['base']['repo']['full_name']
|
870
|
-
else
|
920
|
+
def log_msg(req)
|
921
|
+
head = if has_head_repo(req)
|
871
922
|
req['head']['repo']['full_name']
|
923
|
+
else
|
924
|
+
"(head deleted)"
|
872
925
|
end
|
873
926
|
|
874
927
|
<<-eos.gsub(/\s+/, " ").strip
|
875
|
-
GHTorrent: Pull request #{
|
928
|
+
GHTorrent: Pull request #{req['number']}
|
876
929
|
#{head} -> #{req['base']['repo']['full_name']}
|
877
930
|
eos
|
878
931
|
end
|
@@ -890,22 +943,26 @@ module GHTorrent
|
|
890
943
|
|
891
944
|
base_commit = ensure_commit(retrieved['base']['repo']['name'],
|
892
945
|
retrieved['base']['sha'],
|
893
|
-
retrieved['base']['repo']['owner']['login']
|
894
|
-
)
|
946
|
+
retrieved['base']['repo']['owner']['login'])
|
895
947
|
|
896
|
-
if is_intra_branch
|
948
|
+
if is_intra_branch(retrieved)
|
897
949
|
head_repo = base_repo
|
898
|
-
head_commit =
|
899
|
-
warn "GHTorrent: Pull request is intra branch"
|
900
|
-
else
|
901
|
-
|
902
|
-
head_repo = ensure_repo(retrieved['head']['repo']['owner']['login'],
|
903
|
-
retrieved['head']['repo']['name'],
|
904
|
-
false, false, false)
|
905
|
-
|
906
|
-
head_commit = ensure_commit(retrieved['head']['repo']['name'],
|
950
|
+
head_commit = ensure_commit(retrieved['base']['repo']['name'],
|
907
951
|
retrieved['head']['sha'],
|
908
|
-
retrieved['
|
952
|
+
retrieved['base']['repo']['owner']['login'])
|
953
|
+
info log_msg(retrieved) + " is intra branch"
|
954
|
+
else
|
955
|
+
head_repo = if has_head_repo(retrieved)
|
956
|
+
ensure_repo(retrieved['head']['repo']['owner']['login'],
|
957
|
+
retrieved['head']['repo']['name'],
|
958
|
+
false, false, false)
|
959
|
+
end
|
960
|
+
|
961
|
+
head_commit = if not head_repo.nil?
|
962
|
+
ensure_commit(retrieved['head']['repo']['name'],
|
963
|
+
retrieved['head']['sha'],
|
964
|
+
retrieved['head']['repo']['owner']['login'])
|
965
|
+
end
|
909
966
|
end
|
910
967
|
|
911
968
|
pull_req_user = ensure_user(retrieved['user']['login'], false, false)
|
@@ -923,24 +980,24 @@ module GHTorrent
|
|
923
980
|
:base_commit_id => base_commit[:id],
|
924
981
|
:user_id => pull_req_user[:id],
|
925
982
|
:pullreq_id => pullreq_id,
|
926
|
-
:intra_branch => is_intra_branch
|
983
|
+
:intra_branch => is_intra_branch(retrieved)
|
927
984
|
)
|
928
985
|
|
929
|
-
info log_msg
|
986
|
+
info log_msg(retrieved)
|
930
987
|
else
|
931
|
-
debug log_msg
|
988
|
+
debug log_msg(retrieved) + " exists"
|
932
989
|
end
|
933
990
|
|
934
991
|
pull_req = pulls_reqs.first(:base_repo_id => project[:id],
|
935
992
|
:pullreq_id => pullreq_id)
|
936
993
|
|
937
|
-
add_history
|
994
|
+
add_history(pull_req[:id], date(retrieved['created_at']),
|
938
995
|
retrieved[@ext_uniq], 'opened')
|
939
|
-
add_history
|
996
|
+
add_history(pull_req[:id], date(retrieved['merged_at']),
|
940
997
|
retrieved[@ext_uniq], 'merged') if merged
|
941
|
-
add_history
|
998
|
+
add_history(pull_req[:id], date(retrieved['closed_at']),
|
942
999
|
retrieved[@ext_uniq], 'closed') if closed
|
943
|
-
add_history
|
1000
|
+
add_history(pull_req[:id], date(created_at), retrieved[@ext_uniq],
|
944
1001
|
state) unless state.nil?
|
945
1002
|
|
946
1003
|
ensure_pull_request_commits(owner, repo, pullreq_id) if commits
|
@@ -955,7 +1012,7 @@ module GHTorrent
|
|
955
1012
|
time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
956
1013
|
|
957
1014
|
if currepo.nil?
|
958
|
-
warn "Could not repository #{owner}/#{repo}"
|
1015
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
959
1016
|
return
|
960
1017
|
end
|
961
1018
|
|
@@ -983,7 +1040,7 @@ module GHTorrent
|
|
983
1040
|
pull_req = ensure_pull_request(owner, repo, pullreq_id, false, true)
|
984
1041
|
|
985
1042
|
if pull_req.nil?
|
986
|
-
warn "Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
1043
|
+
warn "GHTorrent: Could not retrieve pull req #{owner}/#{repo} -> #{pullreq_id}"
|
987
1044
|
return
|
988
1045
|
end
|
989
1046
|
|
@@ -994,7 +1051,7 @@ module GHTorrent
|
|
994
1051
|
retrieved = retrieve_pull_req_comment(owner, repo, pullreq_id, comment_id)
|
995
1052
|
|
996
1053
|
if retrieved.nil?
|
997
|
-
warn "Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1054
|
+
warn "GHTorrent: Could not retrieve comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
998
1055
|
return
|
999
1056
|
end
|
1000
1057
|
|
@@ -1018,14 +1075,19 @@ module GHTorrent
|
|
1018
1075
|
:ext_ref_id => retrieved[@ext_uniq]
|
1019
1076
|
)
|
1020
1077
|
debug "GHTorrent: Adding comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id}"
|
1078
|
+
@db[:pull_request_comments].first(:pull_request_id => pull_req[:id],
|
1079
|
+
:comment_id => comment_id)
|
1021
1080
|
else
|
1022
|
-
debug "GHTorrent:
|
1081
|
+
debug "GHTorrent: Comment #{comment_id} for pullreq #{owner}/#{repo} -> #{pullreq_id} exists"
|
1082
|
+
exists
|
1023
1083
|
end
|
1024
1084
|
end
|
1025
1085
|
|
1026
1086
|
def ensure_pull_request_commits(owner, repo, pullreq_id)
|
1027
|
-
retrieve_pull_req_commits(owner, repo, pullreq_id).
|
1028
|
-
ensure_commit(repo, c['sha'], owner, true)
|
1087
|
+
retrieve_pull_req_commits(owner, repo, pullreq_id).reduce([]){|acc, c|
|
1088
|
+
x = ensure_commit(repo, c['sha'], owner, true)
|
1089
|
+
acc << x if not x.nil?
|
1090
|
+
acc
|
1029
1091
|
}.map { |c|
|
1030
1092
|
pullreq = ensure_pull_request(owner, repo, pullreq_id, false, false)
|
1031
1093
|
exists = @db[:pull_request_commits].first(:pull_request_id => pullreq[:id],
|
@@ -1050,26 +1112,28 @@ module GHTorrent
|
|
1050
1112
|
# [repo] The repository/project to find forks for
|
1051
1113
|
def ensure_forks(owner, repo)
|
1052
1114
|
currepo = ensure_repo(owner, repo, false, false, false)
|
1053
|
-
time = currepo[:created_at]
|
1054
1115
|
|
1055
1116
|
if currepo.nil?
|
1056
1117
|
warn "Could not retrieve forks for #{owner}/#{repo}"
|
1057
1118
|
return
|
1058
1119
|
end
|
1059
1120
|
|
1060
|
-
existing_forks = @db.from(:forks, :projects).\
|
1121
|
+
existing_forks = @db.from(:forks, :projects, :users).\
|
1061
1122
|
where(:forks__forked_project_id => :projects__id). \
|
1062
|
-
where(:
|
1123
|
+
where(:users__id => :projects__owner_id). \
|
1124
|
+
where(:forks__forked_from_id => currepo[:id]).select(:projects__name, :login).all
|
1063
1125
|
|
1064
1126
|
retrieve_forks(owner, repo).reduce([]) do |acc, x|
|
1065
1127
|
if existing_forks.find {|y|
|
1066
|
-
|
1128
|
+
forked_repo_owner = x['full_name'].split(/\//)[0]
|
1129
|
+
forked_repo_name = x['full_name'].split(/\//)[1]
|
1130
|
+
y[:login] == forked_repo_owner && y[:name] == forked_repo_name
|
1067
1131
|
}.nil?
|
1068
1132
|
acc << x
|
1069
1133
|
else
|
1070
1134
|
acc
|
1071
1135
|
end
|
1072
|
-
end.map { |x| ensure_fork(owner, repo, x['id']
|
1136
|
+
end.map { |x| ensure_fork(owner, repo, x['id']) }
|
1073
1137
|
end
|
1074
1138
|
|
1075
1139
|
##
|
@@ -1081,8 +1145,8 @@ module GHTorrent
|
|
1081
1145
|
fork_exists = forks.first(:fork_id => fork_id)
|
1082
1146
|
|
1083
1147
|
if fork_exists.nil?
|
1084
|
-
added = if date_added.nil? then Time.now else date_added end
|
1085
1148
|
retrieved = retrieve_fork(owner, repo, fork_id)
|
1149
|
+
added = if date_added.nil? then retrieved['created_at'] else date_added end
|
1086
1150
|
|
1087
1151
|
if retrieved.nil?
|
1088
1152
|
warn "GHTorrent: Fork #{fork_id} does not exist for #{owner}/#{repo}"
|
@@ -1106,6 +1170,7 @@ module GHTorrent
|
|
1106
1170
|
:ext_ref_id => retrieved[@ext_uniq])
|
1107
1171
|
info "GHTorrent: Added #{forked_repo_owner}/#{forked_repo_name} as fork of #{owner}/#{repo}"
|
1108
1172
|
else
|
1173
|
+
debug "GHTorrent: Fork #{fork_id} exists as fork of #{owner}/#{repo}"
|
1109
1174
|
unless date_added.nil?
|
1110
1175
|
forks.filter(:fork_id => fork_id)\
|
1111
1176
|
.update(:created_at => date(date_added))
|
@@ -1114,36 +1179,253 @@ module GHTorrent
|
|
1114
1179
|
end
|
1115
1180
|
end
|
1116
1181
|
|
1117
|
-
|
1182
|
+
##
|
1183
|
+
# Make sure all issues exist for a project
|
1184
|
+
def ensure_issues(owner, repo)
|
1185
|
+
currepo = ensure_repo(owner, repo, false, false, false)
|
1186
|
+
if currepo.nil?
|
1187
|
+
warn "GHTorrent: Could not retrieve issues for #{owner}/#{repo}"
|
1188
|
+
return
|
1189
|
+
end
|
1118
1190
|
|
1119
|
-
|
1120
|
-
def store_commit(c, repo, user)
|
1121
|
-
commits = @db[:commits]
|
1122
|
-
commit = commits.first(:sha => c['sha'])
|
1191
|
+
issues = @db[:issues].filter(:repo_id => currepo[:id]).all
|
1123
1192
|
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1193
|
+
retrieve_issues(owner, repo).reduce([]) do |acc, x|
|
1194
|
+
if issues.find { |y| y[:issue_id] == x['number'] }.nil?
|
1195
|
+
acc << x
|
1196
|
+
else
|
1197
|
+
acc
|
1198
|
+
end
|
1199
|
+
end.map { |x| ensure_issue(owner, repo, x['number']) }
|
1200
|
+
end
|
1127
1201
|
|
1128
|
-
|
1202
|
+
##
|
1203
|
+
# Make sure that the issue exists
|
1204
|
+
def ensure_issue(owner, repo, issue_id, events = true, comments = true)
|
1129
1205
|
|
1130
|
-
|
1131
|
-
|
1206
|
+
issues = @db[:issues]
|
1207
|
+
repository = ensure_repo(owner, repo, false, false, false)
|
1208
|
+
|
1209
|
+
if repo.nil?
|
1210
|
+
warn "Cannot find repo #{owner}/#{repo}"
|
1211
|
+
return
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
cur_issue = issues.first(:issue_id => issue_id,
|
1215
|
+
:repo_id => repository[:id])
|
1216
|
+
|
1217
|
+
if cur_issue.nil?
|
1218
|
+
retrieved = retrieve_issue(owner, repo, issue_id)
|
1219
|
+
|
1220
|
+
if retrieved.nil?
|
1221
|
+
warn "GHTorrent: Issue #{issue_id} does not exist for #{owner}/#{repo}"
|
1132
1222
|
return
|
1133
1223
|
end
|
1134
1224
|
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1225
|
+
reporter = ensure_user(retrieved['user']['login'], false, false)
|
1226
|
+
assignee = unless retrieved['assignee'].nil?
|
1227
|
+
ensure_user(retrieved['assignee']['login'], false, false)
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
# Pull requests and issues share the same issue_id
|
1231
|
+
pull_req = unless retrieved['pull_request'].nil? or retrieved['pull_request']['patch_url'].nil?
|
1232
|
+
ensure_pull_request(owner, repo, issue_id)
|
1233
|
+
end
|
1234
|
+
|
1235
|
+
issues.insert(:repo_id => repository[:id],
|
1236
|
+
:assignee_id => unless assignee.nil? then assignee[:id] end,
|
1237
|
+
:reporter_id => reporter[:id],
|
1238
|
+
:issue_id => issue_id,
|
1239
|
+
:pull_request => if pull_req.nil? then false else true end,
|
1240
|
+
:pull_request_id => unless pull_req.nil? then pull_req[:id] end,
|
1241
|
+
:created_at => date(retrieved['created_at']),
|
1242
|
+
:ext_ref_id => retrieved[@ext_uniq])
|
1243
|
+
|
1244
|
+
ensure_issue_events(owner, repo, issue_id) if events
|
1245
|
+
ensure_issue_comments(owner, repo, issue_id) if comments and retrieved['comments'] > 0
|
1246
|
+
|
1247
|
+
info "GHTorrent: Added issue #{owner}/#{repo} -> #{issue_id}"
|
1248
|
+
issues.first(:issue_id => issue_id,
|
1249
|
+
:repo_id => repository[:id])
|
1250
|
+
else
|
1251
|
+
info "GHTorrent: Issue #{owner}/#{repo}->#{issue_id} exists"
|
1252
|
+
cur_issue
|
1253
|
+
end
|
1254
|
+
end
|
1255
|
+
|
1256
|
+
##
|
1257
|
+
# Retrieve and process all events for an issue
|
1258
|
+
def ensure_issue_events(owner, repo, issue_id)
|
1259
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
1260
|
+
#time = if created_at.nil? then currepo[:created_at] else Time.now() end
|
1261
|
+
|
1262
|
+
if currepo.nil?
|
1263
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1264
|
+
return
|
1265
|
+
end
|
1266
|
+
|
1267
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1268
|
+
if issue.nil?
|
1269
|
+
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1270
|
+
return
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
retrieve_issue_events(owner, repo, issue_id).reduce([]) do |acc, x|
|
1274
|
+
|
1275
|
+
if @db[:issue_events].first(:issue_id => issue[:id],
|
1276
|
+
:event_id => x['id']).nil?
|
1277
|
+
acc << x
|
1278
|
+
else
|
1279
|
+
acc
|
1280
|
+
end
|
1281
|
+
end.map { |x|
|
1282
|
+
ensure_issue_event(owner, repo, issue_id, x['id'])
|
1283
|
+
}
|
1284
|
+
end
|
1285
|
+
|
1286
|
+
##
|
1287
|
+
# Retrieve and process +event_id+ for an +issue_id+
|
1288
|
+
def ensure_issue_event(owner, repo, issue_id, event_id)
|
1289
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1290
|
+
|
1291
|
+
if issue.nil?
|
1292
|
+
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1293
|
+
return
|
1294
|
+
end
|
1295
|
+
|
1296
|
+
issue_event_str = "#{owner}/#{repo} -> #{issue_id}/#{event_id}"
|
1297
|
+
|
1298
|
+
curevent = @db[:issue_events].first(:issue_id => issue[:id],
|
1299
|
+
:event_id => event_id)
|
1300
|
+
if curevent.nil?
|
1301
|
+
|
1302
|
+
retrieved = retrieve_issue_event(owner, repo, issue_id, event_id)
|
1303
|
+
|
1304
|
+
if retrieved.nil?
|
1305
|
+
warn "GHTorrent: Could not retrieve issue event #{issue_event_str}"
|
1306
|
+
return
|
1307
|
+
elsif retrieved['actor'].nil?
|
1308
|
+
warn "GHTorrent: Issue event #{issue_event_str} does not contain an actor"
|
1309
|
+
return
|
1310
|
+
end
|
1311
|
+
|
1312
|
+
actor = ensure_user(retrieved['actor']['login'], false, false)
|
1313
|
+
|
1314
|
+
action_specific = case retrieved['event']
|
1315
|
+
when "referenced" then retrieved['commit_id']
|
1316
|
+
when "merged" then retrieved['commit_id']
|
1317
|
+
when "closed" then retrieved['commit_id']
|
1318
|
+
else nil
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
if retrieved['event'] == "assigned"
|
1322
|
+
|
1323
|
+
def update_assignee(owner, repo, issue, actor)
|
1324
|
+
@db[:issues][:id => issue[:id]] = {:assignee_id => actor[:id]}
|
1325
|
+
info "Updating #{owner}/#{repo} -> #{issue[:id]} assignee to #{actor[:id]}"
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
if issue[:assignee_id].nil? then
|
1329
|
+
update_assignee(owner, repo, issue, actor)
|
1330
|
+
else
|
1331
|
+
existing = @db[:issue_events].\
|
1332
|
+
filter(:issue_id => issue[:id],:action => "assigned").\
|
1333
|
+
order(Sequel.desc(:created_at)).first
|
1334
|
+
if existing.nil?
|
1335
|
+
update_assignee(owner, repo, issue, actor)
|
1336
|
+
elsif date(existing[:created_at]) < date(retrieved['created_at'])
|
1337
|
+
update_assignee(owner, repo, issue, actor)
|
1338
|
+
end
|
1339
|
+
end
|
1340
|
+
end
|
1341
|
+
|
1342
|
+
@db[:issue_events].insert(
|
1343
|
+
:event_id => event_id,
|
1344
|
+
:issue_id => issue[:id],
|
1345
|
+
:actor_id => unless actor.nil? then actor[:id] end,
|
1346
|
+
:action => retrieved['event'],
|
1347
|
+
:action_specific => action_specific,
|
1348
|
+
:created_at => date(retrieved['created_at']),
|
1349
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1141
1350
|
)
|
1142
|
-
|
1143
|
-
|
1351
|
+
|
1352
|
+
info "GHTorrent: Added issue event #{issue_event_str}"
|
1353
|
+
@db[:issue_events].first(:issue_id => issue[:id],
|
1354
|
+
:event_id => event_id)
|
1144
1355
|
else
|
1145
|
-
debug "GHTorrent:
|
1146
|
-
|
1356
|
+
debug "GHTorrent: Issue event #{issue_event_str} exists"
|
1357
|
+
curevent
|
1358
|
+
end
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
##
|
1362
|
+
# Retrieve and process all comments for an issue
|
1363
|
+
def ensure_issue_comments(owner, repo, issue_id)
|
1364
|
+
currepo = ensure_repo(owner, repo, true, true, false)
|
1365
|
+
|
1366
|
+
if currepo.nil?
|
1367
|
+
warn "GHTorrent: Could not find repository #{owner}/#{repo}"
|
1368
|
+
return
|
1369
|
+
end
|
1370
|
+
|
1371
|
+
issue = ensure_issue(owner, repo, issue_id, false, false)
|
1372
|
+
if issue.nil?
|
1373
|
+
warn "Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1374
|
+
return
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
retrieve_issue_comments(owner, repo, issue_id).reduce([]) do |acc, x|
|
1378
|
+
|
1379
|
+
if @db[:issue_comments].first(:issue_id => issue[:id],
|
1380
|
+
:comment_id => x['id']).nil?
|
1381
|
+
acc << x
|
1382
|
+
else
|
1383
|
+
acc
|
1384
|
+
end
|
1385
|
+
end.map { |x|
|
1386
|
+
ensure_issue_comment(owner, repo, issue_id, x['id'])
|
1387
|
+
}
|
1388
|
+
end
|
1389
|
+
|
1390
|
+
##
|
1391
|
+
# Retrieve and process +comment_id+ for an +issue_id+
|
1392
|
+
def ensure_issue_comment(owner, repo, issue_id, comment_id)
|
1393
|
+
issue = ensure_issue(owner, repo, issue_id)
|
1394
|
+
|
1395
|
+
if issue.nil?
|
1396
|
+
warn "GHTorrent: Could not retrieve issue #{owner}/#{repo} -> #{issue_id}"
|
1397
|
+
return
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
issue_comment_str = "#{owner}/#{repo} -> #{issue_id}/#{comment_id}"
|
1401
|
+
|
1402
|
+
curcomment = @db[:issue_comments].first(:issue_id => issue[:id],
|
1403
|
+
:comment_id => comment_id)
|
1404
|
+
if curcomment.nil?
|
1405
|
+
|
1406
|
+
retrieved = retrieve_issue_comment(owner, repo, issue_id, comment_id)
|
1407
|
+
|
1408
|
+
if retrieved.nil?
|
1409
|
+
warn "GHTorrent: Could not retrieve issue comment #{issue_comment_str}"
|
1410
|
+
return
|
1411
|
+
end
|
1412
|
+
|
1413
|
+
user = ensure_user(retrieved['user']['login'], false, false)
|
1414
|
+
|
1415
|
+
@db[:issue_comments].insert(
|
1416
|
+
:comment_id => comment_id,
|
1417
|
+
:issue_id => issue[:id],
|
1418
|
+
:user_id => unless user.nil? then user[:id] end,
|
1419
|
+
:created_at => date(retrieved['created_at']),
|
1420
|
+
:ext_ref_id => retrieved[@ext_uniq]
|
1421
|
+
)
|
1422
|
+
|
1423
|
+
info "GHTorrent: Added issue comment #{issue_comment_str}"
|
1424
|
+
@db[:issue_comments].first(:issue_id => issue[:id],
|
1425
|
+
:comment_id => comment_id)
|
1426
|
+
else
|
1427
|
+
debug "GHTorrent: Issue comment #{issue_comment_str} exists"
|
1428
|
+
curcomment
|
1147
1429
|
end
|
1148
1430
|
end
|
1149
1431
|
|
@@ -1153,27 +1435,57 @@ module GHTorrent
|
|
1153
1435
|
@db ||= get_db
|
1154
1436
|
@persister ||= persister
|
1155
1437
|
|
1438
|
+
result = nil
|
1156
1439
|
start_time = Time.now
|
1157
1440
|
begin
|
1158
1441
|
@db.transaction(:rollback => :reraise, :isolation => :committed) do
|
1159
|
-
yield block
|
1442
|
+
result = yield block
|
1160
1443
|
end
|
1161
1444
|
total = Time.now.to_ms - start_time.to_ms
|
1162
1445
|
debug "GHTorrent: Transaction committed (#{total} ms)"
|
1446
|
+
result
|
1163
1447
|
rescue Exception => e
|
1164
1448
|
total = Time.now.to_ms - start_time.to_ms
|
1165
1449
|
warn "GHTorrent: Transaction failed (#{total} ms)"
|
1166
1450
|
raise e
|
1167
1451
|
ensure
|
1168
|
-
@db.disconnect
|
1169
|
-
@persister.close
|
1170
|
-
|
1171
|
-
@db = nil
|
1172
|
-
@persister = nil
|
1173
1452
|
GC.start
|
1174
1453
|
end
|
1175
1454
|
end
|
1176
1455
|
|
1456
|
+
private
|
1457
|
+
|
1458
|
+
# Store a commit contained in a hash. First check whether the commit exists.
|
1459
|
+
def store_commit(c, repo, user)
|
1460
|
+
commits = @db[:commits]
|
1461
|
+
commit = commits.first(:sha => c['sha'])
|
1462
|
+
|
1463
|
+
if commit.nil?
|
1464
|
+
author = commit_user(c['author'], c['commit']['author'])
|
1465
|
+
commiter = commit_user(c['committer'], c['commit']['committer'])
|
1466
|
+
|
1467
|
+
repository = ensure_repo(user, repo, false, false, false)
|
1468
|
+
|
1469
|
+
if repository.nil?
|
1470
|
+
warn "Could not store commit #{user}/#{repo} #{c['sha']}"
|
1471
|
+
return
|
1472
|
+
end
|
1473
|
+
|
1474
|
+
commits.insert(:sha => c['sha'],
|
1475
|
+
:author_id => author[:id],
|
1476
|
+
:committer_id => commiter[:id],
|
1477
|
+
:project_id => repository[:id],
|
1478
|
+
:created_at => date(c['commit']['author']['date']),
|
1479
|
+
:ext_ref_id => c[@ext_uniq]
|
1480
|
+
)
|
1481
|
+
debug "GHTorrent: New commit #{user}/#{repo} -> #{c['sha']} "
|
1482
|
+
commits.first(:sha => c['sha'])
|
1483
|
+
else
|
1484
|
+
debug "GHTorrent: Commit #{user}/#{repo} -> #{c['sha']} exists"
|
1485
|
+
commit
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
|
1177
1489
|
##
|
1178
1490
|
# Convert a string value to boolean, the SQL way
|
1179
1491
|
def boolean(arg)
|