ghtorrent 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +132 -0
- data/Rakefile +20 -0
- data/bin/ght-data-retrieval +119 -0
- data/bin/ght-load +242 -0
- data/bin/ght-mirror-events +154 -0
- data/bin/ght-periodic-dump +92 -0
- data/bin/ght-rm-dupl +124 -0
- data/bin/ght-torrent-index +180 -0
- data/lib/ghtorrent.rb +22 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +91 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +126 -0
- data/lib/ghtorrent/adapters/noop_persister.rb +58 -0
- data/lib/ghtorrent/api_client.rb +106 -0
- data/lib/ghtorrent/call_stack.rb +119 -0
- data/lib/ghtorrent/command.rb +136 -0
- data/lib/ghtorrent/ghtorrent.rb +396 -0
- data/lib/ghtorrent/logging.rb +69 -0
- data/lib/ghtorrent/migrations/001_init_schema.rb +60 -0
- data/lib/ghtorrent/migrations/002_add_followers_created_at.rb +15 -0
- data/lib/ghtorrent/migrations/003_add_external_ref_ids.rb +40 -0
- data/lib/ghtorrent/persister.rb +48 -0
- data/lib/ghtorrent/retriever.rb +148 -0
- data/lib/ghtorrent/settings.rb +63 -0
- data/lib/ghtorrent/utils.rb +58 -0
- data/test/callstack_test.rb +67 -0
- metadata +181 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
require 'trollop'
|
30
|
+
|
31
|
+
# Base class for all GHTorrent command line utilities. Provides basic command
|
32
|
+
# line argument parsing and command bootstraping support. The order of
|
33
|
+
# initialization is the following:
|
34
|
+
# prepare_options
|
35
|
+
# validate
|
36
|
+
# go
|
37
|
+
|
38
|
+
module GHTorrent
|
39
|
+
class Command
|
40
|
+
|
41
|
+
attr_reader :args, :options
|
42
|
+
|
43
|
+
# Specify the run method for subclasses.
|
44
|
+
class << self
|
45
|
+
def run(args = ARGV)
|
46
|
+
command = new(args)
|
47
|
+
command.process_options
|
48
|
+
command.validate
|
49
|
+
|
50
|
+
begin
|
51
|
+
command.go
|
52
|
+
rescue => e
|
53
|
+
STDERR.puts e.message
|
54
|
+
if command.options.verbose
|
55
|
+
STDERR.puts e.backtrace.join("\n")
|
56
|
+
else
|
57
|
+
STDERR.puts e.backtrace[0]
|
58
|
+
end
|
59
|
+
exit 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def initialize(args)
|
65
|
+
@args = args
|
66
|
+
end
|
67
|
+
|
68
|
+
# Specify and parse supported command line options.
|
69
|
+
def process_options
|
70
|
+
command = self
|
71
|
+
@options = Trollop::options(@args) do
|
72
|
+
|
73
|
+
command.prepare_options(self)
|
74
|
+
|
75
|
+
banner <<-END
|
76
|
+
Standard options:
|
77
|
+
END
|
78
|
+
|
79
|
+
opt :config, 'config.yaml file location', :short => 'c',
|
80
|
+
:default => 'config.yaml'
|
81
|
+
opt :verbose, 'verbose mode', :short => 'v'
|
82
|
+
end
|
83
|
+
|
84
|
+
@args = @args.dup
|
85
|
+
ARGV.clear
|
86
|
+
end
|
87
|
+
|
88
|
+
# Get the version of the project
|
89
|
+
def version
|
90
|
+
IO.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION'))
|
91
|
+
end
|
92
|
+
|
93
|
+
# This method should be overriden by subclasses in order to specify,
|
94
|
+
# using trollop, the supported command line options
|
95
|
+
def prepare_options(options)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Examine the validity of the provided options in the context of the
|
99
|
+
# executed command. Subclasses can also call super to also invoke the checks
|
100
|
+
# provided by this class.
|
101
|
+
def validate
|
102
|
+
if options[:config].nil?
|
103
|
+
unless (file_exists?("config.yaml") or file_exists?("/etc/ghtorrent/config.yaml"))
|
104
|
+
Trollop::die "No config file in default locations (., /etc/ghtorrent)
|
105
|
+
you need to specify the #{:config} parameter. Read the
|
106
|
+
documnetation on how to create a config.yaml file."
|
107
|
+
end
|
108
|
+
else
|
109
|
+
Trollop::die "Cannot find file #{options[:config]}" unless file_exists?(options[:config])
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Name of the command that is currently being executed.
|
114
|
+
def command_name
|
115
|
+
File.basename($0)
|
116
|
+
end
|
117
|
+
|
118
|
+
# The actual command code.
|
119
|
+
def go
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def file_exists?(file)
|
125
|
+
begin
|
126
|
+
File::Stat.new(file)
|
127
|
+
true
|
128
|
+
rescue
|
129
|
+
false
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
@@ -0,0 +1,396 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
require 'sequel'
|
30
|
+
|
31
|
+
module GHTorrent
|
32
|
+
class Mirror
|
33
|
+
|
34
|
+
include GHTorrent::Logging
|
35
|
+
include GHTorrent::Settings
|
36
|
+
include GHTorrent::Retriever
|
37
|
+
include GHTorrent::Persister
|
38
|
+
|
39
|
+
attr_reader :settings, :persister
|
40
|
+
|
41
|
+
def initialize(configuration)
|
42
|
+
|
43
|
+
@settings = YAML::load_file configuration
|
44
|
+
super(@settings)
|
45
|
+
@ext_uniq = config(:uniq_id)
|
46
|
+
@logger = Logger.new(STDOUT)
|
47
|
+
@persister = connect(:mongo, @settings)
|
48
|
+
get_db
|
49
|
+
end
|
50
|
+
|
51
|
+
# db related functions
|
52
|
+
def get_db
|
53
|
+
|
54
|
+
@db = Sequel.connect(config(:sql_url))
|
55
|
+
|
56
|
+
if @db.tables.empty?
|
57
|
+
dir = File.join(File.dirname(__FILE__), 'migrations')
|
58
|
+
puts "Database empty, running migrations from #{dir}"
|
59
|
+
Sequel.extension :migration
|
60
|
+
Sequel::Migrator.apply(@db, dir)
|
61
|
+
end
|
62
|
+
@db
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Ensure that a user exists, or fetch its latest state from Github
|
67
|
+
# ==Parameters:
|
68
|
+
# user::
|
69
|
+
# The email or login name to lookup the user by
|
70
|
+
#
|
71
|
+
# == Returns:
|
72
|
+
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
73
|
+
# the result is nil
|
74
|
+
def get_commit(user, repo, sha)
|
75
|
+
|
76
|
+
unless sha.match(/[a-f0-9]{40}$/)
|
77
|
+
error "GHTorrent: Ignoring commit #{sha}"
|
78
|
+
return
|
79
|
+
end
|
80
|
+
|
81
|
+
commits = @db[:commits]
|
82
|
+
commit = commits.first(:sha => sha)
|
83
|
+
|
84
|
+
if commit.nil?
|
85
|
+
@db.transaction(:rollback => :reraise) do
|
86
|
+
ensure_repo(user, repo)
|
87
|
+
c = retrieve_commit(repo, sha, user)
|
88
|
+
|
89
|
+
author = commit_user(c['author'], c['commit']['author'])
|
90
|
+
commiter = commit_user(c['committer'], c['commit']['committer'])
|
91
|
+
|
92
|
+
commits.insert(:sha => sha,
|
93
|
+
:author_id => author[:id],
|
94
|
+
:committer_id => commiter[:id],
|
95
|
+
:created_at => date(c['commit']['author']['date']),
|
96
|
+
:ext_ref_id => c[@ext_uniq]
|
97
|
+
)
|
98
|
+
|
99
|
+
#c['parents'].each do |p|
|
100
|
+
# url = p['url'].split(/\//)
|
101
|
+
# get_commit url[4], url[5], url[7]
|
102
|
+
#
|
103
|
+
# commit = commits.first(:sha => sha)
|
104
|
+
# parent = commits.first(:sha => url[7])
|
105
|
+
# @db[:commit_parents].insert(:commit_id => commit[:id],
|
106
|
+
# :parent_id => parent[:id])
|
107
|
+
# @log.info "Added parent #{parent[:sha]} to commit #{sha}"
|
108
|
+
#end
|
109
|
+
end
|
110
|
+
debug "GHTorrent: Transaction committed"
|
111
|
+
else
|
112
|
+
debug "GHTorrent: Commit #{sha} exists"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
##
|
117
|
+
# Add (or update) an entry for a commit author. This method uses information
|
118
|
+
# in the JSON object returned by Github to add (or update) a user in the
|
119
|
+
# metadata database with a full user entry (both Git and Github details).
|
120
|
+
# Resolution of how
|
121
|
+
#
|
122
|
+
# ==Parameters:
|
123
|
+
# githubuser::
|
124
|
+
# A hash containing the user's Github login
|
125
|
+
# commituser::
|
126
|
+
# A hash containing the Git commit's user name and email
|
127
|
+
# == Returns:
|
128
|
+
# The (added/modified) user entry as a Hash.
|
129
|
+
def commit_user(githubuser, commituser)
|
130
|
+
|
131
|
+
raise GHTorrentException.new "git user is null" if commituser.nil?
|
132
|
+
|
133
|
+
users = @db[:users]
|
134
|
+
|
135
|
+
name = commituser['name']
|
136
|
+
email = commituser['email'] #if is_valid_email(commituser['email'])
|
137
|
+
# Github user can be null when the commit email has not been associated
|
138
|
+
# with any account in Github.
|
139
|
+
login = githubuser['login'] unless githubuser.nil?
|
140
|
+
|
141
|
+
if login.nil?
|
142
|
+
ensure_user("#{name}<#{email}>", true)
|
143
|
+
else
|
144
|
+
dbuser = users.first(:login => login)
|
145
|
+
byemail = users.first(:email => email)
|
146
|
+
if dbuser.nil?
|
147
|
+
# We do not have the user in the database yet. Add him
|
148
|
+
added = ensure_user(login, true)
|
149
|
+
if byemail.nil?
|
150
|
+
#
|
151
|
+
users.filter(:login => login).update(:name => name) if added[:name].nil?
|
152
|
+
users.filter(:login => login).update(:email => email) if added[:email].nil?
|
153
|
+
else
|
154
|
+
# There is a previous entry for the user, currently identified by
|
155
|
+
# email. This means that the user has updated his account and now
|
156
|
+
# Github is able to associate his commits with his git credentials.
|
157
|
+
# As the previous entry might have already associated records, just
|
158
|
+
# delete the new one and update the existing with any extra data.
|
159
|
+
users.filter(:login => login).delete
|
160
|
+
users.filter(:email => email).update(
|
161
|
+
:login => login,
|
162
|
+
:company => added['company'],
|
163
|
+
:location => added['location'],
|
164
|
+
:hireable => added['hireable'],
|
165
|
+
:bio => added['bio'],
|
166
|
+
:created_at => added['created_at']
|
167
|
+
)
|
168
|
+
end
|
169
|
+
else
|
170
|
+
users.filter(:login => login).update(:name => name) if dbuser[:name].nil?
|
171
|
+
users.filter(:login => login).update(:email => email) if dbuser[:email].nil?
|
172
|
+
end
|
173
|
+
users.first(:login => login)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Ensure that a user exists, or fetch its latest state from Github
|
179
|
+
# ==Parameters:
|
180
|
+
# user::
|
181
|
+
# The full email address in RFC 822 format
|
182
|
+
# or a login name to lookup the user by
|
183
|
+
# followers::
|
184
|
+
# A boolean value indicating whether to retrieve the user's followers
|
185
|
+
# == Returns:
|
186
|
+
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
187
|
+
# the result is nil
|
188
|
+
def ensure_user(user, followers)
|
189
|
+
# Github only supports alpa-nums and dashes in its usernames.
|
190
|
+
# All other sympbols are treated as emails.
|
191
|
+
u = if not user.match(/^[A-Za-z0-9\-]*$/)
|
192
|
+
begin
|
193
|
+
name, email = user.split("<")
|
194
|
+
email = email.split(">")[0]
|
195
|
+
rescue Exception
|
196
|
+
raise new GHTorrentException("Not a valid email address: #{user}")
|
197
|
+
end
|
198
|
+
ensure_user_byemail(email.strip, name.strip, followers)
|
199
|
+
else
|
200
|
+
ensure_user_byuname(user, followers)
|
201
|
+
end
|
202
|
+
return u
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# Ensure that a user exists, or fetch its latest state from Github
|
207
|
+
# ==Parameters:
|
208
|
+
# user::
|
209
|
+
# The login name to lookup the user by
|
210
|
+
#
|
211
|
+
# == Returns:
|
212
|
+
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
213
|
+
# the result is nil
|
214
|
+
def ensure_user_byuname(user, followers)
|
215
|
+
users = @db[:users]
|
216
|
+
usr = users.first(:login => user)
|
217
|
+
|
218
|
+
if usr.nil?
|
219
|
+
u = retrieve_user_byusername(user)
|
220
|
+
email = unless u['email'].nil?
|
221
|
+
if u['email'].strip == "" then
|
222
|
+
nil
|
223
|
+
else
|
224
|
+
u['email'].strip
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
users.insert(:login => u['login'],
|
229
|
+
:name => u['name'],
|
230
|
+
:company => u['company'],
|
231
|
+
:email => email,
|
232
|
+
:hireable => boolean(u['hirable']),
|
233
|
+
:bio => u['bio'],
|
234
|
+
:location => u['location'],
|
235
|
+
:created_at => date(u['created_at']),
|
236
|
+
:ext_ref_id => u[@ext_uniq])
|
237
|
+
|
238
|
+
info "GHTorrent: New user #{user}"
|
239
|
+
|
240
|
+
# Get the user's followers
|
241
|
+
ensure_user_followers(user) if followers
|
242
|
+
|
243
|
+
users.first(:login => user)
|
244
|
+
else
|
245
|
+
debug "GHTorrent: User #{user} exists"
|
246
|
+
usr
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
##
|
251
|
+
# Get all followers for a user. Since we do not know when the actual
|
252
|
+
# follow event took place, we set the created_at field to the timestamp
|
253
|
+
# of the method call.
|
254
|
+
#
|
255
|
+
# ==Parameters:
|
256
|
+
# [user] The user login to find followers by
|
257
|
+
def ensure_user_followers(user, ts = Time.now)
|
258
|
+
|
259
|
+
followers = retrieve_new_user_followers(user)
|
260
|
+
followers.each { |f|
|
261
|
+
follower = f['login']
|
262
|
+
ensure_user(user, false)
|
263
|
+
ensure_user(follower, false)
|
264
|
+
|
265
|
+
userid = @db[:users].select(:id).first(:login => user)[:id]
|
266
|
+
followerid = @db[:users].select(:id).first(:login => follower)[:id]
|
267
|
+
followers = @db[:followers]
|
268
|
+
|
269
|
+
if followers.first(:user_id => userid, :follower_id => followerid).nil?
|
270
|
+
@db[:followers].insert(:user_id => userid,
|
271
|
+
:follower_id => followerid,
|
272
|
+
:created_at => ts,
|
273
|
+
:ext_ref_id => f[@ext_uniq]
|
274
|
+
)
|
275
|
+
info "GHTorrent: User #{follower} follows #{user}"
|
276
|
+
else
|
277
|
+
info "User #{follower} already follows #{user}"
|
278
|
+
end
|
279
|
+
}
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# Try to retrieve a user by email. Search the DB first, fall back to
|
284
|
+
# Github API v2 if unsuccessful.
|
285
|
+
#
|
286
|
+
# ==Parameters:
|
287
|
+
# user::
|
288
|
+
# The email to lookup the user by
|
289
|
+
#
|
290
|
+
# == Returns:
|
291
|
+
# If the user can be retrieved, it is returned as a Hash. Otherwise,
|
292
|
+
# the result is nil
|
293
|
+
def ensure_user_byemail(email, name, followers)
|
294
|
+
users = @db[:users]
|
295
|
+
usr = users.first(:email => email)
|
296
|
+
|
297
|
+
if usr.nil?
|
298
|
+
|
299
|
+
u = retrieve_user_byemail(email, name)
|
300
|
+
|
301
|
+
if u.nil? or u['user'].nil? or u['user']['login'].nil?
|
302
|
+
debug "GHTorrent: Cannot find #{email} through API v2 query"
|
303
|
+
users.insert(:email => email,
|
304
|
+
:name => name,
|
305
|
+
:login => (0...8).map { 65.+(rand(25)).chr }.join,
|
306
|
+
:created_at => Time.now,
|
307
|
+
:ext_ref_id => ""
|
308
|
+
)
|
309
|
+
users.first(:email => email)
|
310
|
+
else
|
311
|
+
users.insert(:login => u['user']['login'],
|
312
|
+
:name => u['user']['name'],
|
313
|
+
:company => u['user']['company'],
|
314
|
+
:email => u['user']['email'],
|
315
|
+
:hireable => nil,
|
316
|
+
:bio => nil,
|
317
|
+
:location => u['user']['location'],
|
318
|
+
:created_at => date(u['user']['created_at']),
|
319
|
+
:ext_ref_id => u[@ext_uniq])
|
320
|
+
debug "GHTorrent: Found #{email} through API v2 query"
|
321
|
+
ensure_user_followers(user) if followers
|
322
|
+
users.first(:email => email)
|
323
|
+
end
|
324
|
+
else
|
325
|
+
debug "GHTorrent: User with email #{email} exists"
|
326
|
+
usr
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
##
|
331
|
+
# Ensure that a repo exists, or fetch its latest state from Github
|
332
|
+
#
|
333
|
+
# ==Parameters:
|
334
|
+
# [user] The email or login name to which this repo belongs
|
335
|
+
# [repo] The repo name
|
336
|
+
#
|
337
|
+
# == Returns: If the repo can be retrieved, it is returned as a Hash.
|
338
|
+
# Otherwise, the result is nil
|
339
|
+
def ensure_repo(user, repo)
|
340
|
+
|
341
|
+
ensure_user(user, false)
|
342
|
+
repos = @db[:projects]
|
343
|
+
currepo = repos.first(:name => repo)
|
344
|
+
|
345
|
+
if currepo.nil?
|
346
|
+
r = retrieve_repo(user, repo)
|
347
|
+
repos.insert(:url => r['url'],
|
348
|
+
:owner_id => @db[:users].filter(:login => user).first[:id],
|
349
|
+
:name => r['name'],
|
350
|
+
:description => r['description'],
|
351
|
+
:language => r['language'],
|
352
|
+
:created_at => date(r['created_at']),
|
353
|
+
:ext_ref_id => r[@ext_uniq])
|
354
|
+
|
355
|
+
info "GHTorrent: New repo #{repo}"
|
356
|
+
repos.first(:name => repo)
|
357
|
+
else
|
358
|
+
debug "GHTorrent: Repo #{repo} exists"
|
359
|
+
currepo
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
private
|
364
|
+
|
365
|
+
##
|
366
|
+
# Convert a string value to boolean, the SQL way
|
367
|
+
def boolean(arg)
|
368
|
+
case arg
|
369
|
+
when 'true'
|
370
|
+
1
|
371
|
+
when 'false'
|
372
|
+
0
|
373
|
+
when nil
|
374
|
+
0
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# Dates returned by Github are formatted as:
|
379
|
+
# - yyyy-mm-ddThh:mm:ssZ
|
380
|
+
# - yyyy/mm/dd hh:mm:ss {+/-}hhmm
|
381
|
+
def date(arg)
|
382
|
+
Time.parse(arg).to_i
|
383
|
+
end
|
384
|
+
|
385
|
+
def is_valid_email(email)
|
386
|
+
email =~ /^[a-zA-Z][\w\.-]*[a-zA-Z0-9]@[a-zA-Z0-9][\w\.-]*[a-zA-Z0-9]\.[a-zA-Z][a-zA-Z\.]*[a-zA-Z]$/
|
387
|
+
end
|
388
|
+
end
|
389
|
+
# Base exception for all GHTorrent exceptions
|
390
|
+
class GHTorrentException < Exception
|
391
|
+
|
392
|
+
end
|
393
|
+
|
394
|
+
end
|
395
|
+
|
396
|
+
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|