ghtorrent 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,10 @@
26
26
  # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
27
  # POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
+ require 'rubygems'
29
30
  require 'trollop'
31
+ require 'daemons'
32
+ require 'etc'
30
33
 
31
34
  # Base class for all GHTorrent command line utilities. Provides basic command
32
35
  # line argument parsing and command bootstraping support. The order of
@@ -38,7 +41,7 @@ require 'trollop'
38
41
  module GHTorrent
39
42
  class Command
40
43
 
41
- attr_reader :args, :options
44
+ attr_reader :args, :options, :name
42
45
 
43
46
  # Specify the run method for subclasses.
44
47
  class << self
@@ -47,6 +50,29 @@ module GHTorrent
47
50
  command.process_options
48
51
  command.validate
49
52
 
53
+ if command.options[:daemon]
54
+ if Process.uid == 0
55
+ # Daemonize as a proper system daemon
56
+ Daemons.daemonize(:app_name => File.basename($0),
57
+ :dir_mode => :system,
58
+ :log_dir => "/var/log",
59
+ :backtrace => true,
60
+ :log_output => true)
61
+ STDERR.puts "Became a daemon"
62
+ # Change effective user id for the process
63
+ unless command.options[:user].nil?
64
+ Process.euid = Etc.getpwnam(command.options[:user]).uid
65
+ end
66
+ else
67
+ # Daemonize, but output in current directory
68
+ Daemons.daemonize(:app_name => File.basename($0),
69
+ :dir_mode => :normal,
70
+ :dir => Dir.getwd,
71
+ :backtrace => true,
72
+ :log_output => true)
73
+ end
74
+ end
75
+
50
76
  begin
51
77
  command.go
52
78
  rescue => e
@@ -63,6 +89,7 @@ module GHTorrent
63
89
 
64
90
  def initialize(args)
65
91
  @args = args
92
+ @name = self.class.name
66
93
  end
67
94
 
68
95
  # Specify and parse supported command line options.
@@ -79,6 +106,9 @@ Standard options:
79
106
  opt :config, 'config.yaml file location', :short => 'c',
80
107
  :default => 'config.yaml'
81
108
  opt :verbose, 'verbose mode', :short => 'v'
109
+ opt :daemon, 'run as daemon', :short => 'd'
110
+ opt :user, 'run as the specified user (only when started as root)',
111
+ :short => 'u', :type => String
82
112
  end
83
113
 
84
114
  @args = @args.dup
@@ -103,11 +133,22 @@ Standard options:
103
133
  unless (file_exists?("config.yaml") or file_exists?("/etc/ghtorrent/config.yaml"))
104
134
  Trollop::die "No config file in default locations (., /etc/ghtorrent)
105
135
  you need to specify the #{:config} parameter. Read the
106
- documnetation on how to create a config.yaml file."
136
+ documentation on how to create a config.yaml file."
107
137
  end
108
138
  else
109
139
  Trollop::die "Cannot find file #{options[:config]}" unless file_exists?(options[:config])
110
140
  end
141
+
142
+ unless @options[:user].nil?
143
+ if not Process.uid == 0
144
+ Trollop::die "Option --user (-u) cannot be specified by normal users"
145
+ end
146
+ begin
147
+ Etc.getpwnam(@options[:user])
148
+ rescue ArgumentError
149
+ Trollop::die "No such user: #{@options[:user]}"
150
+ end
151
+ end
111
152
  end
112
153
 
113
154
  # Name of the command that is currently being executed.
@@ -78,39 +78,71 @@ module GHTorrent
78
78
  return
79
79
  end
80
80
 
81
+ transaction do
82
+ ensure_repo(user, repo)
83
+ ensure_commit(repo, sha, user)
84
+ end
85
+ end
86
+
87
+ ##
88
+ # Make sure a commit exists
89
+ def ensure_commit(repo, sha, user)
90
+ c = retrieve_commit(repo, sha, user)
91
+ store_commit(c, repo, user)
92
+ ensure_commit_comments(user, repo, sha)
93
+ ensure_parents(c)
94
+ c
95
+ end
96
+
97
+ ##
98
+ # Get as many commits for a repository as allowed by Github
99
+ #
100
+ # ==Parameters:
101
+ # [user] The user to whom the repo belongs.
102
+ # [repo] The repo to look for commits into.
103
+ def ensure_commits(user, repo)
104
+ userid = @db[:users].filter(:login => user).first[:id]
105
+ repoid = @db[:projects].filter(:owner_id => userid,
106
+ :name => repo).first[:id]
107
+
108
+ latest = @db[:commits].filter(:project_id => repoid).order(:created_at).last
109
+ commits = if latest.nil?
110
+ retrieve_commits(repo, nil, user)
111
+ else
112
+ retrieve_commits(repo, latest[:sha], user)
113
+ end
114
+
115
+ commits.map do |c|
116
+ ensure_commit(repo, c['sha'], user)
117
+ end
118
+ end
119
+
120
+ ##
121
+ # Get the parents for a specific commit. The commit must be first stored
122
+ # in the database.
123
+ def ensure_parents(commit)
81
124
  commits = @db[:commits]
82
- commit = commits.first(:sha => sha)
125
+ commit['parents'].each do |p|
126
+ parents = @db[:commit_parents]
127
+ url = p['url'].split(/\//)
128
+ this = commits.first(:sha => commit['sha'])
129
+ parent = commits.first(:sha => url[7])
130
+
131
+ if parent.nil?
132
+ store_commit(retrieve_commit(url[5], url[7], url[4]), url[5], url[4])
133
+ parent = commits.first(:sha => url[7])
134
+ end
83
135
 
84
- if commit.nil?
85
- @db.transaction(:rollback => :reraise) do
86
- ensure_repo(user, repo)
87
- c = retrieve_commit(repo, sha, user)
88
-
89
- author = commit_user(c['author'], c['commit']['author'])
90
- commiter = commit_user(c['committer'], c['commit']['committer'])
91
-
92
- commits.insert(:sha => sha,
93
- :author_id => author[:id],
94
- :committer_id => commiter[:id],
95
- :created_at => date(c['commit']['author']['date']),
96
- :ext_ref_id => c[@ext_uniq]
97
- )
136
+ if parents.first(:commit_id => this[:id],
137
+ :parent_id => parent[:id]).nil?
98
138
 
99
- #c['parents'].each do |p|
100
- # url = p['url'].split(/\//)
101
- # get_commit url[4], url[5], url[7]
102
- #
103
- # commit = commits.first(:sha => sha)
104
- # parent = commits.first(:sha => url[7])
105
- # @db[:commit_parents].insert(:commit_id => commit[:id],
106
- # :parent_id => parent[:id])
107
- # @log.info "Added parent #{parent[:sha]} to commit #{sha}"
108
- #end
139
+ parents.insert(:commit_id => this[:id],
140
+ :parent_id => parent[:id])
141
+ info "Added parent #{parent[:sha]} to commit #{this[:sha]}"
142
+ else
143
+ info "Parent #{parent[:sha]} for commit #{this[:sha]} exists"
144
+ end
109
145
  end
110
- debug "GHTorrent: Transaction committed"
111
- else
112
- debug "GHTorrent: Commit #{sha} exists"
113
- end
114
146
  end
115
147
 
116
148
  ##
@@ -120,10 +152,8 @@ module GHTorrent
120
152
  # Resolution of how
121
153
  #
122
154
  # ==Parameters:
123
- # githubuser::
124
- # A hash containing the user's Github login
125
- # commituser::
126
- # A hash containing the Git commit's user name and email
155
+ # [githubuser] A hash containing the user's Github login
156
+ # [commituser] A hash containing the Git commit's user name and email
127
157
  # == Returns:
128
158
  # The (added/modified) user entry as a Hash.
129
159
  def commit_user(githubuser, commituser)
@@ -139,13 +169,13 @@ module GHTorrent
139
169
  login = githubuser['login'] unless githubuser.nil?
140
170
 
141
171
  if login.nil?
142
- ensure_user("#{name}<#{email}>", true)
172
+ ensure_user("#{name}<#{email}>", true, false)
143
173
  else
144
174
  dbuser = users.first(:login => login)
145
175
  byemail = users.first(:email => email)
146
176
  if dbuser.nil?
147
177
  # We do not have the user in the database yet. Add him
148
- added = ensure_user(login, true)
178
+ added = ensure_user(login, true, false)
149
179
  if byemail.nil?
150
180
  #
151
181
  users.filter(:login => login).update(:name => name) if added[:name].nil?
@@ -177,28 +207,31 @@ module GHTorrent
177
207
  ##
178
208
  # Ensure that a user exists, or fetch its latest state from Github
179
209
  # ==Parameters:
180
- # user::
181
- # The full email address in RFC 822 format
182
- # or a login name to lookup the user by
183
- # followers::
184
- # A boolean value indicating whether to retrieve the user's followers
185
- # == Returns:
210
+ # [user] The full email address in RFC 822 format or a login name to lookup
211
+ # the user by
212
+ # [followers] A boolean value indicating whether to retrieve the user's
213
+ # followers
214
+ # [orgs] A boolean value indicating whether to retrieve the organizations
215
+ # the user participates into
216
+ # ==Returns:
186
217
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
187
218
  # the result is nil
188
- def ensure_user(user, followers)
219
+ def ensure_user(user, followers, orgs)
189
220
  # Github only supports alpa-nums and dashes in its usernames.
190
221
  # All other sympbols are treated as emails.
191
- u = if not user.match(/^[A-Za-z0-9\-]*$/)
192
- begin
193
- name, email = user.split("<")
194
- email = email.split(">")[0]
195
- rescue Exception
196
- raise new GHTorrentException("Not a valid email address: #{user}")
197
- end
198
- ensure_user_byemail(email.strip, name.strip, followers)
199
- else
200
- ensure_user_byuname(user, followers)
201
- end
222
+ if not user.match(/^[A-Za-z0-9\-]*$/)
223
+ begin
224
+ name, email = user.split("<")
225
+ email = email.split(">")[0]
226
+ rescue Exception
227
+ raise new GHTorrentException("Not a valid email address: #{user}")
228
+ end
229
+ u = ensure_user_byemail(email.strip, name.strip)
230
+ else
231
+ u = ensure_user_byuname(user)
232
+ ensure_user_followers(user) if followers
233
+ ensure_orgs(user) if orgs
234
+ end
202
235
  return u
203
236
  end
204
237
 
@@ -211,7 +244,7 @@ module GHTorrent
211
244
  # == Returns:
212
245
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
213
246
  # the result is nil
214
- def ensure_user_byuname(user, followers)
247
+ def ensure_user_byuname(user)
215
248
  users = @db[:users]
216
249
  usr = users.first(:login => user)
217
250
 
@@ -232,14 +265,12 @@ module GHTorrent
232
265
  :hireable => boolean(u['hirable']),
233
266
  :bio => u['bio'],
234
267
  :location => u['location'],
268
+ :type => user_type(u['type']),
235
269
  :created_at => date(u['created_at']),
236
270
  :ext_ref_id => u[@ext_uniq])
237
271
 
238
272
  info "GHTorrent: New user #{user}"
239
273
 
240
- # Get the user's followers
241
- ensure_user_followers(user) if followers
242
-
243
274
  users.first(:login => user)
244
275
  else
245
276
  debug "GHTorrent: User #{user} exists"
@@ -256,11 +287,11 @@ module GHTorrent
256
287
  # [user] The user login to find followers by
257
288
  def ensure_user_followers(user, ts = Time.now)
258
289
 
259
- followers = retrieve_new_user_followers(user)
290
+ followers = retrieve_user_followers(user)
260
291
  followers.each { |f|
261
292
  follower = f['login']
262
- ensure_user(user, false)
263
- ensure_user(follower, false)
293
+ ensure_user(user, false, false)
294
+ ensure_user(follower, false, false)
264
295
 
265
296
  userid = @db[:users].select(:id).first(:login => user)[:id]
266
297
  followerid = @db[:users].select(:id).first(:login => follower)[:id]
@@ -284,13 +315,13 @@ module GHTorrent
284
315
  # Github API v2 if unsuccessful.
285
316
  #
286
317
  # ==Parameters:
287
- # user::
288
- # The email to lookup the user by
289
- #
318
+ # [email] The email to lookup the user by
319
+ # [email] The user's name
320
+ # [followers] If true, the user's followers will be retrieved
290
321
  # == Returns:
291
322
  # If the user can be retrieved, it is returned as a Hash. Otherwise,
292
323
  # the result is nil
293
- def ensure_user_byemail(email, name, followers)
324
+ def ensure_user_byemail(email, name)
294
325
  users = @db[:users]
295
326
  usr = users.first(:email => email)
296
327
 
@@ -318,7 +349,6 @@ module GHTorrent
318
349
  :created_at => date(u['user']['created_at']),
319
350
  :ext_ref_id => u[@ext_uniq])
320
351
  debug "GHTorrent: Found #{email} through API v2 query"
321
- ensure_user_followers(user) if followers
322
352
  users.first(:email => email)
323
353
  end
324
354
  else
@@ -334,11 +364,12 @@ module GHTorrent
334
364
  # [user] The email or login name to which this repo belongs
335
365
  # [repo] The repo name
336
366
  #
337
- # == Returns: If the repo can be retrieved, it is returned as a Hash.
338
- # Otherwise, the result is nil
367
+ # == Returns:
368
+ # If the repo can be retrieved, it is returned as a Hash. Otherwise,
369
+ # the result is nil
339
370
  def ensure_repo(user, repo)
340
371
 
341
- ensure_user(user, false)
372
+ ensure_user(user, true, true)
342
373
  repos = @db[:projects]
343
374
  currepo = repos.first(:name => repo)
344
375
 
@@ -353,6 +384,7 @@ module GHTorrent
353
384
  :ext_ref_id => r[@ext_uniq])
354
385
 
355
386
  info "GHTorrent: New repo #{repo}"
387
+ ensure_commits(user, repo)
356
388
  repos.first(:name => repo)
357
389
  else
358
390
  debug "GHTorrent: Repo #{repo} exists"
@@ -360,7 +392,165 @@ module GHTorrent
360
392
  end
361
393
  end
362
394
 
363
- private
395
+ ##
396
+ # Make sure that the organizations the user participates into exist
397
+ #
398
+ # ==Parameters:
399
+ # [user] The login name of the user to check the organizations for
400
+ #
401
+ def ensure_orgs(user)
402
+ usr = @db[:users].first(:login => user)
403
+ retrieve_orgs(user).map{|o| ensure_participation(user, o['login'])}
404
+ end
405
+
406
+ ##
407
+ # Make sure that a user belongs to the provided organization
408
+ #
409
+ # ==Parameters:
410
+ # [user] The login name of the user to check the organizations for
411
+ # [org] The login name of the organization to check whether the user
412
+ # belongs in
413
+ #
414
+ def ensure_participation(user, organization)
415
+ org = ensure_org(organization)
416
+ usr = ensure_user(user, false, false)
417
+
418
+ org_members = @db[:organization_members]
419
+ participates = org_members.first(:user_id => usr[:id], :org_id => org[:id])
420
+
421
+ if participates.nil?
422
+ org_members.insert(:user_id => usr[:id],
423
+ :org_id => org[:id])
424
+ info "GHTorrent: Added participation #{organization} -> #{user}"
425
+ org_members.first(:user_id => usr[:id], :org_id => org[:id])
426
+ else
427
+ debug "GHTorrent: Participation #{organization} -> #{user} exists"
428
+ participates
429
+ end
430
+
431
+ end
432
+
433
+ ##
434
+ # Make sure that an organization exists
435
+ #
436
+ # ==Parameters:
437
+ # [org] The login name of the organization
438
+ #
439
+ def ensure_org(organization)
440
+ org = @db[:users].find(:login => organization, :type => 'org')
441
+
442
+ if org.nil?
443
+ ensure_user(org, false, false)
444
+ else
445
+ debug "GHTorrent: Organization #{organization} exists"
446
+ org.first
447
+ end
448
+ end
449
+
450
+
451
+ ##
452
+ # Get all comments for a commit
453
+ #
454
+ # ==Parameters:
455
+ # [user] The login name of the organization
456
+ def ensure_commit_comments(user, repo, sha)
457
+ commit_id = @db[:commits].first(:sha => sha)
458
+ stored_comments = @db[:commit_comments].find(:commit_id => commit_id)
459
+ commit_commets = retrieve_commit_comments(user, repo, sha)
460
+ user_id = @db[:users].first(:login => user)[:id]
461
+
462
+ not_saved = commit_commets.reduce([]) do |acc, x|
463
+ if stored_comments.find{|y| y[:comment_id] == x['comment_id']}.nil?
464
+ acc << x
465
+ else
466
+ acc
467
+ end
468
+ end
469
+
470
+ not_saved.each do |c|
471
+ @db[:commit_comments].insert(
472
+ :commit_id => commit_id,
473
+ :user_id => user_id,
474
+ :body => c['body'],
475
+ :line => c['line'],
476
+ :position => c['position'],
477
+ :comment_id => c['id'],
478
+ :ext_ref_id => c['ext_ref_id'],
479
+ :created_at => date(c['created_at'])
480
+ )
481
+ info "GHTorrent: Added commit comment #{sha} -> #{user}"
482
+ end
483
+ end
484
+
485
+ ##
486
+ # Get a specific comment
487
+ #
488
+ # ==Parameters:
489
+ # [user] The login name of the organization
490
+ def ensure_commit_comment(user, repo, id)
491
+ stored_comment = @db[:commit_comments].first(:comment_id => id)
492
+
493
+ if stored_comment.nil?
494
+ retrieved = retrieve_commit_comment(user, repo, id)
495
+ commit = ensure_commit(repo, retrieved['commit_id'], user)
496
+ user = ensure_user(user, false, false)
497
+ @db[:commit_comments].insert(
498
+ :commit_id => commit[:id],
499
+ :user_id => user[:id],
500
+ :body => retrieved['body'],
501
+ :line => retrieved['line'],
502
+ :position => retrieved['position'],
503
+ :comment_id => retrieved['id'],
504
+ :ext_ref_id => retrieved['ext_ref_id'],
505
+ :created_at => date(retrieved['created_at'])
506
+ )
507
+ @db[:commit_comments].first(:comment_id => id)
508
+ info "GHTorrent: Added commit comment #{commit[:sha]} -> #{user}"
509
+ else
510
+ info "GHTorrent: Commit comment #{id} exists"
511
+ stored_comment
512
+ end
513
+ end
514
+
515
+ private
516
+
517
+ # Store a commit contained in a hash. First check whether the commit exists.
518
+ def store_commit(c, repo, user)
519
+ commits = @db[:commits]
520
+ commit = commits.first(:sha => c['sha'])
521
+
522
+ if commit.nil?
523
+ author = commit_user(c['author'], c['commit']['author'])
524
+ commiter = commit_user(c['committer'], c['commit']['committer'])
525
+
526
+
527
+ userid = @db[:users].filter(:login => user).first[:id]
528
+ repoid = @db[:projects].filter(:owner_id => userid,
529
+ :name => repo).first[:id]
530
+
531
+ commits.insert(:sha => c['sha'],
532
+ :author_id => author[:id],
533
+ :committer_id => commiter[:id],
534
+ :project_id => repoid,
535
+ :created_at => date(c['commit']['author']['date']),
536
+ :ext_ref_id => c[@ext_uniq]
537
+ )
538
+ debug "GHTorrent: New commit #{repo} -> #{c['sha']} "
539
+ else
540
+ debug "GHTorrent: Commit #{repo} -> #{c['sha']} exists"
541
+ end
542
+ end
543
+
544
+ # Run a block in a DB transaction. Exceptions trigger transaction rollback
545
+ # and are rethrown.
546
+ def transaction(&block)
547
+ start_time = Time.now
548
+ @db.transaction(:rollback => :reraise, :isolation => :committed) do
549
+ yield block
550
+ end
551
+ total = Time.now.to_ms - start_time.to_ms
552
+ debug "GHTorrent: Transaction committed (#{total} ms)"
553
+ end
364
554
 
365
555
  ##
366
556
  # Convert a string value to boolean, the SQL way
@@ -379,7 +569,7 @@ module GHTorrent
379
569
  # - yyyy-mm-ddThh:mm:ssZ
380
570
  # - yyyy/mm/dd hh:mm:ss {+/-}hhmm
381
571
  def date(arg)
382
- Time.parse(arg).to_i
572
+ Time.parse(arg)#.to_i
383
573
  end
384
574
 
385
575
  def is_valid_email(email)
@@ -388,9 +578,13 @@ module GHTorrent
388
578
  end
389
579
  # Base exception for all GHTorrent exceptions
390
580
  class GHTorrentException < Exception
391
-
392
581
  end
582
+ end
393
583
 
584
+ class Time
585
+ def to_ms
586
+ (self.to_f * 1000.0).to_i
587
+ end
394
588
  end
395
589
 
396
590
  # vim: set sta sts=2 shiftwidth=2 sw=2 et ai :