ghtorrent 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -26,7 +26,10 @@
26
26
  # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
27
  # POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
+ require 'rubygems'
29
30
  require 'trollop'
31
+ require 'daemons'
32
+ require 'etc'
30
33
 
31
34
  # Base class for all GHTorrent command line utilities. Provides basic command
32
35
  # line argument parsing and command bootstraping support. The order of
@@ -38,7 +41,7 @@ require 'trollop'
38
41
  module GHTorrent
39
42
  class Command
40
43
 
41
- attr_reader :args, :options
44
+ attr_reader :args, :options, :name
42
45
 
43
46
  # Specify the run method for subclasses.
44
47
  class << self
@@ -47,6 +50,29 @@ module GHTorrent
47
50
  command.process_options
48
51
  command.validate
49
52
 
53
+ if command.options[:daemon]
54
+ if Process.uid == 0
55
+ # Daemonize as a proper system daemon
56
+ Daemons.daemonize(:app_name => File.basename($0),
57
+ :dir_mode => :system,
58
+ :log_dir => "/var/log",
59
+ :backtrace => true,
60
+ :log_output => true)
61
+ STDERR.puts "Became a daemon"
62
+ # Change effective user id for the process
63
+ unless command.options[:user].nil?
64
+ Process.euid = Etc.getpwnam(command.options[:user]).uid
65
+ end
66
+ else
67
+ # Daemonize, but output in current directory
68
+ Daemons.daemonize(:app_name => File.basename($0),
69
+ :dir_mode => :normal,
70
+ :dir => Dir.getwd,
71
+ :backtrace => true,
72
+ :log_output => true)
73
+ end
74
+ end
75
+
50
76
  begin
51
77
  command.go
52
78
  rescue => e
@@ -63,6 +89,7 @@ module GHTorrent
63
89
 
64
90
  def initialize(args)
65
91
  @args = args
92
+ @name = self.class.name
66
93
  end
67
94
 
68
95
  # Specify and parse supported command line options.
@@ -79,6 +106,9 @@ Standard options:
79
106
  opt :config, 'config.yaml file location', :short => 'c',
80
107
  :default => 'config.yaml'
81
108
  opt :verbose, 'verbose mode', :short => 'v'
109
+ opt :daemon, 'run as daemon', :short => 'd'
110
+ opt :user, 'run as the specified user (only when started as root)',
111
+ :short => 'u', :type => String
82
112
  end
83
113
 
84
114
  @args = @args.dup
@@ -103,11 +133,22 @@ Standard options:
103
133
  unless (file_exists?("config.yaml") or file_exists?("/etc/ghtorrent/config.yaml"))
104
134
  Trollop::die "No config file in default locations (., /etc/ghtorrent)
105
135
  you need to specify the #{:config} parameter. Read the
106
- documnetation on how to create a config.yaml file."
136
+ documentation on how to create a config.yaml file."
107
137
  end
108
138
  else
109
139
  Trollop::die "Cannot find file #{options[:config]}" unless file_exists?(options[:config])
110
140
  end
141
+
142
+ unless @options[:user].nil?
143
+ if not Process.uid == 0
144
+ Trollop::die "Option --user (-u) cannot be specified by normal users"
145
+ end
146
+ begin
147
+ Etc.getpwnam(@options[:user])
148
+ rescue ArgumentError
149
+ Trollop::die "No such user: #{@options[:user]}"
150
+ end
151
+ end
111
152
  end
112
153
 
113
154
  # Name of the command that is currently being executed.
@@ -78,39 +78,71 @@ module GHTorrent
78
78
  return
79
79
  end
80
80
 
81
+ transaction do
82
+ ensure_repo(user, repo)
83
+ ensure_commit(repo, sha, user)
84
+ end
85
+ end
86
+
87
+ ##
88
+ # Make sure a commit exists
89
+ def ensure_commit(repo, sha, user)
90
+ c = retrieve_commit(repo, sha, user)
91
+ store_commit(c, repo, user)
92
+ ensure_commit_comments(user, repo, sha)
93
+ ensure_parents(c)
94
+ c
95
+ end
96
+
97
+ ##
98
+ # Get as many commits for a repository as allowed by Github
99
+ #
100
+ # ==Parameters:
101
+ # [user] The user to whom the repo belongs.
102
+ # [repo] The repo to look for commits into.
103
+ def ensure_commits(user, repo)
104
+ userid = @db[:users].filter(:login => user).first[:id]
105
+ repoid = @db[:projects].filter(:owner_id => userid,
106
+ :name => repo).first[:id]
107
+
108
+ latest = @db[:commits].filter(:project_id => repoid).order(:created_at).last
109
+ commits = if latest.nil?
110
+ retrieve_commits(repo, nil, user)
111
+ else
112
+ retrieve_commits(repo, latest[:sha], user)
113
+ end
114
+
115
+ commits.map do |c|
116
+ ensure_commit(repo, c['sha'], user)
117
+ end
118
+ end
119
+
120
+ ##
121
+ # Get the parents for a specific commit. The commit must be first stored
122
+ # in the database.
123
+ def ensure_parents(commit)
81
124
  commits = @db[:commits]
82
- commit = commits.first(:sha => sha)
125
+ commit['parents'].each do |p|
126
+ parents = @db[:commit_parents]
127
+ url = p['url'].split(/\//)
128
+ this = commits.first(:sha => commit['sha'])
129
+ parent = commits.first(:sha => url[7])
130
+
131
+ if parent.nil?
132
+ store_commit(retrieve_commit(url[5], url[7], url[4]), url[5], url[4])
133
+ parent = commits.first(:sha => url[7])
134
+ end
83
135
 
84
- if commit.nil?
85
- @db.transaction(:rollback => :reraise) do
86
- ensure_repo(user, repo)
87
- c = retrieve_commit(repo, sha, user)
88
-
89
- author = commit_user(c['author'], c['commit']['author'])
90
- commiter = commit_user(c['committer'], c['commit']['committer'])
91
-
92
- commits.insert(:sha => sha,
93
- :author_id => author[:id],
94
- :committer_id => commiter[:id],
95
- :created_at => date(c['commit']['author']['date']),
96
- :ext_ref_id => c[@ext_uniq]
97
- )
136
+ if parents.first(:commit_id => this[:id],
137
+ :parent_id => parent[:id]).nil?
98
138
 
99
- #c['parents'].each do |p|
100
- # url = p['url'].split(/\//)
101
- # get_commit url[4], url[5], url[7]
102
- #
103
- # commit = commits.first(:sha => sha)
104
- # parent = commits.first(:sha => url[7])
105
- # @db[:commit_parents].insert(:commit_id => commit[:id],
106
- # :parent_id => parent[:id])
107
- # @log.info "Added parent #{parent[:sha]} to commit #{sha}"
108
- #end
139
+ parents.insert(:commit_id => this[:id],
140
+ :parent_id => parent[:id])
141
+ info "Added parent #{parent[:sha]} to commit #{this[:sha]}"
142
+ else
143
+ info "Parent #{parent[:sha]} for commit #{this[:sha]} exists"
144
+ end
109
145
  end
110
- debug "GHTorrent: Transaction committed"
111
- else
112
- debug "GHTorrent: Commit #{sha} exists"
113
- end
114
146
  end
115
147
 
116
148
  ##
@@ -120,10 +152,8 @@ module GHTorrent
120
152
  # Resolution of how
121
153
  #
122
154
  # ==Parameters:
123
- # githubuser::
124
- # A hash containing the user's Github login
125
- # commituser::
126
- # A hash containing the Git commit's user name and email
155
+ # [githubuser] A hash containing the user's Github login
156
+ # [commituser] A hash containing the Git commit's user name and email
127
157
  # == Returns:
128
158
  # The (added/modified) user entry as a Hash.
129
159
  def commit_user(githubuser, commituser)
@@ -139,13 +169,13 @@ module GHTorrent
139
169
  login = githubuser['login'] unless githubuser.nil?
140
170
 
141
171
  if login.nil?
142
- ensure_user("#{name}<#{email}>", true)
172
+ ensure_user("#{name}<#{email}>", true, false)
143
173
  else
144
174
  dbuser = users.first(:login => login)
145
175
  byemail = users.first(:email => email)
146
176
  if dbuser.nil?
147
177
  # We do not have the user in the database yet. Add him
148
- added = ensure_user(login, true)
178
+ added = ensure_user(login, true, false)
149
179
  if byemail.nil?
150
180
  #
151
181
  users.filter(:login => login).update(:name => name) if added[:name].nil?
@@ -177,28 +207,31 @@ module GHTorrent
177
207
  ##
178
208
  # Ensure that a user exists, or fetch its latest state from Github
179
209
  # ==Parameters:
180
- # user::
181
- # The full email address in RFC 822 format
182
- # or a login name to lookup the user by
183
- # followers::
184
- # A boolean value indicating whether to retrieve the user's followers
185
- # == Returns:
210
+ # [user] The full email address in RFC 822 format or a login name to lookup
211
+ # the user by
212
+ # [followers] A boolean value indicating whether to retrieve the user's
213
+ # followers
214
+ # [orgs] A boolean value indicating whether to retrieve the organizations
215
+ # the user participates into
216
+ # ==Returns:
186
217
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
187
218
  # the result is nil
188
- def ensure_user(user, followers)
219
+ def ensure_user(user, followers, orgs)
189
220
  # Github only supports alpa-nums and dashes in its usernames.
190
221
  # All other sympbols are treated as emails.
191
- u = if not user.match(/^[A-Za-z0-9\-]*$/)
192
- begin
193
- name, email = user.split("<")
194
- email = email.split(">")[0]
195
- rescue Exception
196
- raise new GHTorrentException("Not a valid email address: #{user}")
197
- end
198
- ensure_user_byemail(email.strip, name.strip, followers)
199
- else
200
- ensure_user_byuname(user, followers)
201
- end
222
+ if not user.match(/^[A-Za-z0-9\-]*$/)
223
+ begin
224
+ name, email = user.split("<")
225
+ email = email.split(">")[0]
226
+ rescue Exception
227
+ raise new GHTorrentException("Not a valid email address: #{user}")
228
+ end
229
+ u = ensure_user_byemail(email.strip, name.strip)
230
+ else
231
+ u = ensure_user_byuname(user)
232
+ ensure_user_followers(user) if followers
233
+ ensure_orgs(user) if orgs
234
+ end
202
235
  return u
203
236
  end
204
237
 
@@ -211,7 +244,7 @@ module GHTorrent
211
244
  # == Returns:
212
245
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
213
246
  # the result is nil
214
- def ensure_user_byuname(user, followers)
247
+ def ensure_user_byuname(user)
215
248
  users = @db[:users]
216
249
  usr = users.first(:login => user)
217
250
 
@@ -232,14 +265,12 @@ module GHTorrent
232
265
  :hireable => boolean(u['hirable']),
233
266
  :bio => u['bio'],
234
267
  :location => u['location'],
268
+ :type => user_type(u['type']),
235
269
  :created_at => date(u['created_at']),
236
270
  :ext_ref_id => u[@ext_uniq])
237
271
 
238
272
  info "GHTorrent: New user #{user}"
239
273
 
240
- # Get the user's followers
241
- ensure_user_followers(user) if followers
242
-
243
274
  users.first(:login => user)
244
275
  else
245
276
  debug "GHTorrent: User #{user} exists"
@@ -256,11 +287,11 @@ module GHTorrent
256
287
  # [user] The user login to find followers by
257
288
  def ensure_user_followers(user, ts = Time.now)
258
289
 
259
- followers = retrieve_new_user_followers(user)
290
+ followers = retrieve_user_followers(user)
260
291
  followers.each { |f|
261
292
  follower = f['login']
262
- ensure_user(user, false)
263
- ensure_user(follower, false)
293
+ ensure_user(user, false, false)
294
+ ensure_user(follower, false, false)
264
295
 
265
296
  userid = @db[:users].select(:id).first(:login => user)[:id]
266
297
  followerid = @db[:users].select(:id).first(:login => follower)[:id]
@@ -284,13 +315,13 @@ module GHTorrent
284
315
  # Github API v2 if unsuccessful.
285
316
  #
286
317
  # ==Parameters:
287
- # user::
288
- # The email to lookup the user by
289
- #
318
+ # [email] The email to lookup the user by
319
+ # [email] The user's name
320
+ # [followers] If true, the user's followers will be retrieved
290
321
  # == Returns:
291
322
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
292
323
  # the result is nil
293
- def ensure_user_byemail(email, name, followers)
324
+ def ensure_user_byemail(email, name)
294
325
  users = @db[:users]
295
326
  usr = users.first(:email => email)
296
327
 
@@ -318,7 +349,6 @@ module GHTorrent
318
349
  :created_at => date(u['user']['created_at']),
319
350
  :ext_ref_id => u[@ext_uniq])
320
351
  debug "GHTorrent: Found #{email} through API v2 query"
321
- ensure_user_followers(user) if followers
322
352
  users.first(:email => email)
323
353
  end
324
354
  else
@@ -334,11 +364,12 @@ module GHTorrent
334
364
  # [user] The email or login name to which this repo belongs
335
365
  # [repo] The repo name
336
366
  #
337
- # == Returns: If the repo can be retrieved, it is returned as a Hash.
338
- # Otherwise, the result is nil
367
+ # == Returns:
368
+ # If the repo can be retrieved, it is returned as a Hash. Otherwise,
369
+ # the result is nil
339
370
  def ensure_repo(user, repo)
340
371
 
341
- ensure_user(user, false)
372
+ ensure_user(user, true, true)
342
373
  repos = @db[:projects]
343
374
  currepo = repos.first(:name => repo)
344
375
 
@@ -353,6 +384,7 @@ module GHTorrent
353
384
  :ext_ref_id => r[@ext_uniq])
354
385
 
355
386
  info "GHTorrent: New repo #{repo}"
387
+ ensure_commits(user, repo)
356
388
  repos.first(:name => repo)
357
389
  else
358
390
  debug "GHTorrent: Repo #{repo} exists"
@@ -360,7 +392,165 @@ module GHTorrent
360
392
  end
361
393
  end
362
394
 
363
- private
395
+ ##
396
+ # Make sure that the organizations the user participates into exist
397
+ #
398
+ # ==Parameters:
399
+ # [user] The login name of the user to check the organizations for
400
+ #
401
+ def ensure_orgs(user)
402
+ usr = @db[:users].first(:login => user)
403
+ retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
404
+ end
405
+
406
+ ##
407
+ # Make sure that a user belongs to the provided organization
408
+ #
409
+ # ==Parameters:
410
+ # [user] The login name of the user to check the organizations for
411
+ # [org] The login name of the organization to check whether the user
412
+ # belongs in
413
+ #
414
+ def ensure_participation(user, organization)
415
+ org = ensure_org(organization)
416
+ usr = ensure_user(user, false, false)
417
+
418
+ org_members = @db[:organization_members]
419
+ participates = org_members.first(:user_id => usr[:id], :org_id => org[:id])
420
+
421
+ if participates.nil?
422
+ org_members.insert(:user_id => usr[:id],
423
+ :org_id => org[:id])
424
+ info "GHTorrent: Added participation #{organization} -> #{user}"
425
+ org_members.first(:user_id => usr[:id], :org_id => org[:id])
426
+ else
427
+ debug "GHTorrent: Participation #{organization} -> #{user} exists"
428
+ participates
429
+ end
430
+
431
+ end
432
+
433
+ ##
434
+ # Make sure that an organization exists
435
+ #
436
+ # ==Parameters:
437
+ # [org] The login name of the organization
438
+ #
439
+ def ensure_org(organization)
440
+ org = @db[:users].find(:login => organization, :type => 'org')
441
+
442
+ if org.nil?
443
+ ensure_user(org, false, false)
444
+ else
445
+ debug "GHTorrent: Organization #{organization} exists"
446
+ org.first
447
+ end
448
+ end
449
+
450
+
451
+ ##
452
+ # Get all comments for a commit
453
+ #
454
+ # ==Parameters:
455
+ # [user] The login name of the organization
456
+ def ensure_commit_comments(user, repo, sha)
457
+ commit_id = @db[:commits].first(:sha => sha)
458
+ stored_comments = @db[:commit_comments].find(:commit_id => commit_id)
459
+ commit_commets = retrieve_commit_comments(user, repo, sha)
460
+ user_id = @db[:users].first(:login => user)[:id]
461
+
462
+ not_saved = commit_commets.reduce([]) do |acc, x|
463
+ if stored_comments.find{|y| y[:comment_id] == x['comment_id']}.nil?
464
+ acc << x
465
+ else
466
+ acc
467
+ end
468
+ end
469
+
470
+ not_saved.each do |c|
471
+ @db[:commit_comments].insert(
472
+ :commit_id => commit_id,
473
+ :user_id => user_id,
474
+ :body => c['body'],
475
+ :line => c['line'],
476
+ :position => c['position'],
477
+ :comment_id => c['id'],
478
+ :ext_ref_id => c['ext_ref_id'],
479
+ :created_at => date(c['created_at'])
480
+ )
481
+ info "GHTorrent: Added commit comment #{sha} -> #{user}"
482
+ end
483
+ end
484
+
485
+ ##
486
+ # Get a specific comment
487
+ #
488
+ # ==Parameters:
489
+ # [user] The login name of the organization
490
+ def ensure_commit_comment(user, repo, id)
491
+ stored_comment = @db[:commit_comments].first(:comment_id => id)
492
+
493
+ if stored_comment.nil?
494
+ retrieved = retrieve_commit_comment(user, repo, id)
495
+ commit = ensure_commit(repo, retrieved['commit_id'], user)
496
+ user = ensure_user(user, false, false)
497
+ @db[:commit_comments].insert(
498
+ :commit_id => commit[:id],
499
+ :user_id => user[:id],
500
+ :body => retrieved['body'],
501
+ :line => retrieved['line'],
502
+ :position => retrieved['position'],
503
+ :comment_id => retrieved['id'],
504
+ :ext_ref_id => retrieved['ext_ref_id'],
505
+ :created_at => date(retrieved['created_at'])
506
+ )
507
+ @db[:commit_comments].first(:comment_id => id)
508
+ info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user}"
509
+ else
510
+ info "GHTorrent: Commit comment #{id} exists"
511
+ stored_comment
512
+ end
513
+ end
514
+
515
+ private
516
+
517
+ # Store a commit contained in a hash. First check whether the commit exists.
518
+ def store_commit(c, repo, user)
519
+ commits = @db[:commits]
520
+ commit = commits.first(:sha => c['sha'])
521
+
522
+ if commit.nil?
523
+ author = commit_user(c['author'], c['commit']['author'])
524
+ commiter = commit_user(c['committer'], c['commit']['committer'])
525
+
526
+
527
+ userid = @db[:users].filter(:login => user).first[:id]
528
+ repoid = @db[:projects].filter(:owner_id => userid,
529
+ :name => repo).first[:id]
530
+
531
+ commits.insert(:sha => c['sha'],
532
+ :author_id => author[:id],
533
+ :committer_id => commiter[:id],
534
+ :project_id => repoid,
535
+ :created_at => date(c['commit']['author']['date']),
536
+ :ext_ref_id => c[@ext_uniq]
537
+ )
538
+ debug "GHTorrent: New commit #{repo} -> #{c['sha']} "
539
+ else
540
+ debug "GHTorrent: Commit #{repo} -> #{c['sha']} exists"
541
+ end
542
+ end
543
+
544
+ # Run a block in a DB transaction. Exceptions trigger transaction rollback
545
+ # and are rethrown.
546
+ def transaction(&block)
547
+ start_time = Time.now
548
+ @db.transaction(:rollback => :reraise, :isolation => :committed) do
549
+ yield block
550
+ end
551
+ total = Time.now.to_ms - start_time.to_ms
552
+ debug "GHTorrent: Transaction committed (#{total} ms)"
553
+ end
364
554
 
365
555
  ##
366
556
  # Convert a string value to boolean, the SQL way
@@ -379,7 +569,7 @@ module GHTorrent
379
569
  # - yyyy-mm-ddThh:mm:ssZ
380
570
  # - yyyy/mm/dd hh:mm:ss {+/-}hhmm
381
571
  def date(arg)
382
- Time.parse(arg).to_i
572
+ Time.parse(arg)#.to_i
383
573
  end
384
574
 
385
575
  def is_valid_email(email)
@@ -388,9 +578,13 @@ module GHTorrent
388
578
  end
389
579
  # Base exception for all GHTorrent exceptions
390
580
  class GHTorrentException < Exception
391
-
392
581
  end
582
+ end
393
583
 
584
+ class Time
585
+ def to_ms
586
+ (self.to_f * 1000.0).to_i
587
+ end
394
588
  end
395
589
 
396
590
  # vim: set sta sts=2 shiftwidth=2 sw=2 et ai :