ghtorrent 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +132 -0
- data/Rakefile +20 -0
- data/bin/ght-data-retrieval +119 -0
- data/bin/ght-load +242 -0
- data/bin/ght-mirror-events +154 -0
- data/bin/ght-periodic-dump +92 -0
- data/bin/ght-rm-dupl +124 -0
- data/bin/ght-torrent-index +180 -0
- data/lib/ghtorrent.rb +22 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +91 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +126 -0
- data/lib/ghtorrent/adapters/noop_persister.rb +58 -0
- data/lib/ghtorrent/api_client.rb +106 -0
- data/lib/ghtorrent/call_stack.rb +119 -0
- data/lib/ghtorrent/command.rb +136 -0
- data/lib/ghtorrent/ghtorrent.rb +396 -0
- data/lib/ghtorrent/logging.rb +69 -0
- data/lib/ghtorrent/migrations/001_init_schema.rb +60 -0
- data/lib/ghtorrent/migrations/002_add_followers_created_at.rb +15 -0
- data/lib/ghtorrent/migrations/003_add_external_ref_ids.rb +40 -0
- data/lib/ghtorrent/persister.rb +48 -0
- data/lib/ghtorrent/retriever.rb +148 -0
- data/lib/ghtorrent/settings.rb +63 -0
- data/lib/ghtorrent/utils.rb +58 -0
- data/test/callstack_test.rb +67 -0
- metadata +181 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
require 'logger'
|
30
|
+
|
31
|
+
module GHTorrent
|
32
|
+
module Logging
|
33
|
+
|
34
|
+
DEBUG_LEVEL = defined?(Logger) ? Logger::DEBUG : 0
|
35
|
+
|
36
|
+
def warn(msg)
|
37
|
+
log(:warn, msg)
|
38
|
+
end
|
39
|
+
|
40
|
+
def info(msg)
|
41
|
+
log(:info, msg)
|
42
|
+
end
|
43
|
+
|
44
|
+
def debug(msg)
|
45
|
+
log(:debug, msg)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# Log a message at the given level.
|
51
|
+
def log(level, msg)
|
52
|
+
return unless @logger
|
53
|
+
case level
|
54
|
+
when :fatal then
|
55
|
+
@logger.fatal msg
|
56
|
+
when :error then
|
57
|
+
@logger.error msg
|
58
|
+
when :warn then
|
59
|
+
@logger.warn msg
|
60
|
+
when :info then
|
61
|
+
@logger.info msg
|
62
|
+
when :debug then
|
63
|
+
@logger.debug msg
|
64
|
+
else
|
65
|
+
@logger.debug msg
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
puts("Creating table users")
|
6
|
+
create_table :users do
|
7
|
+
primary_key :id
|
8
|
+
String :login, :unique => true, :null => false
|
9
|
+
String :name
|
10
|
+
String :company, :null => true
|
11
|
+
String :location, :null => true
|
12
|
+
String :email, :null => true, :unique => true
|
13
|
+
TrueClass :hireable, :null => true
|
14
|
+
String :bio, :null => true
|
15
|
+
Time :created_at, :null => false
|
16
|
+
end
|
17
|
+
|
18
|
+
puts("Creating table projects")
|
19
|
+
create_table :projects do
|
20
|
+
primary_key :id
|
21
|
+
String :url
|
22
|
+
foreign_key :owner_id, :users
|
23
|
+
String :name, :null => false
|
24
|
+
String :description
|
25
|
+
String :language
|
26
|
+
Time :created_at, :null => false
|
27
|
+
end
|
28
|
+
|
29
|
+
puts("Creating table commits")
|
30
|
+
create_table :commits do
|
31
|
+
primary_key :id
|
32
|
+
String :sha, :size => 40, :unique => true
|
33
|
+
foreign_key :author_id, :users
|
34
|
+
foreign_key :committer_id, :users
|
35
|
+
Time :created_at, :null => false
|
36
|
+
end
|
37
|
+
|
38
|
+
puts("Creating table commit_parents")
|
39
|
+
create_table :commit_parents do
|
40
|
+
foreign_key :commit_id, :commits, :null => false
|
41
|
+
foreign_key :parent_id, :commits, :null => false
|
42
|
+
primary_key [:commit_id, :parent_id]
|
43
|
+
end
|
44
|
+
|
45
|
+
puts("Creating table followers")
|
46
|
+
create_table :followers do
|
47
|
+
foreign_key :user_id, :users, :null => false
|
48
|
+
foreign_key :follower_id, :users, :null => false
|
49
|
+
primary_key [:user_id, :follower_id]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
down do
|
54
|
+
drop_table :users
|
55
|
+
drop_table :projects
|
56
|
+
drop_table :commits
|
57
|
+
drop_table :commit_parents
|
58
|
+
drop_table :followers
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
Sequel.migration do
|
4
|
+
up do
|
5
|
+
alter_table :users do
|
6
|
+
add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
|
7
|
+
end
|
8
|
+
|
9
|
+
alter_table :projects do
|
10
|
+
add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
|
11
|
+
end
|
12
|
+
|
13
|
+
alter_table :commits do
|
14
|
+
add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
|
15
|
+
end
|
16
|
+
|
17
|
+
alter_table :followers do
|
18
|
+
add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
down do
|
23
|
+
alter_table :users do
|
24
|
+
drop_column :ext_ref_id
|
25
|
+
end
|
26
|
+
|
27
|
+
alter_table :projects do
|
28
|
+
drop_column :ext_ref_id
|
29
|
+
end
|
30
|
+
|
31
|
+
alter_table :commits do
|
32
|
+
drop_column :ext_ref_id
|
33
|
+
end
|
34
|
+
|
35
|
+
alter_table :followers do
|
36
|
+
drop_column :ext_ref_id
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
module GHTorrent
|
30
|
+
|
31
|
+
#
|
32
|
+
module Persister
|
33
|
+
|
34
|
+
ADAPTERS = {
|
35
|
+
:mongo => GHTorrent::MongoPersister,
|
36
|
+
:noop => GHTorrent::NoopPersister
|
37
|
+
}
|
38
|
+
|
39
|
+
# Factory method for retrieving persistence connections.
|
40
|
+
# The +settings+ argument is a fully parsed YAML document
|
41
|
+
# passed on to adapters. The available +adapter+ are :mongo and :noop
|
42
|
+
def connect(adapter, settings)
|
43
|
+
driver = ADAPTERS[adapter]
|
44
|
+
driver.new(settings)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
module GHTorrent
|
30
|
+
module Retriever
|
31
|
+
|
32
|
+
include GHTorrent::APIClient
|
33
|
+
include GHTorrent::Settings
|
34
|
+
|
35
|
+
def initialize(settings)
|
36
|
+
super(settings)
|
37
|
+
@settings = settings
|
38
|
+
@uniq = config(:uniq_id)
|
39
|
+
end
|
40
|
+
|
41
|
+
def retrieve_user_byusername(user)
|
42
|
+
stored_user = @persister.find(:users, {'login' => user})
|
43
|
+
if stored_user.empty?
|
44
|
+
url = ghurl "users/#{user}"
|
45
|
+
u = api_request(url)
|
46
|
+
|
47
|
+
if u.nil?
|
48
|
+
throw GHTorrentException.new("Cannot find user #{user}")
|
49
|
+
end
|
50
|
+
|
51
|
+
unq = @persister.store(:users, u)
|
52
|
+
u[@uniq] = unq
|
53
|
+
info "Retriever: New user #{user}"
|
54
|
+
u
|
55
|
+
else
|
56
|
+
debug "Retriever: Already got user #{user}"
|
57
|
+
stored_user.first
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Try Github API v2 user search by email. This is optional info, so
|
62
|
+
# it may not return any data.
|
63
|
+
# http://develop.github.com/p/users.html
|
64
|
+
def retrieve_user_byemail(email, name)
|
65
|
+
url = ghurl_v2("user/email/#{email}")
|
66
|
+
api_request(url)
|
67
|
+
end
|
68
|
+
|
69
|
+
def retrieve_new_user_followers(user)
|
70
|
+
stored_followers = @persister.find(:followers, {'follows' => user})
|
71
|
+
|
72
|
+
followers = paged_api_request(ghurl "users/#{user}/followers")
|
73
|
+
followers.each do |x|
|
74
|
+
x['follows'] = user
|
75
|
+
|
76
|
+
exists = !stored_followers.find { |f|
|
77
|
+
f['follows'] == user && f['login'] == x['login']
|
78
|
+
}.nil?
|
79
|
+
|
80
|
+
if not exists
|
81
|
+
@persister.store(:followers, x)
|
82
|
+
info "Retriever: Added follower #{user} -> #{x['login']}"
|
83
|
+
else
|
84
|
+
debug "Retriever: Follower #{user} -> #{x['login']} exists"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
@persister.find(:followers, {'follows' => user})
|
89
|
+
end
|
90
|
+
|
91
|
+
def retrieve_commit(repo, sha, user)
|
92
|
+
commit = @persister.find(:commits, {'sha' => "#{sha}"})
|
93
|
+
|
94
|
+
if commit.empty?
|
95
|
+
url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
|
96
|
+
c = api_request(url)
|
97
|
+
|
98
|
+
if c.nil?
|
99
|
+
throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
|
100
|
+
end
|
101
|
+
|
102
|
+
unq = @persister.store(:commits, c)
|
103
|
+
info "Retriever: New commit #{repo} -> #{sha}"
|
104
|
+
c[@uniq] = unq
|
105
|
+
c
|
106
|
+
else
|
107
|
+
debug "Retriever: Already got commit #{repo} -> #{sha}"
|
108
|
+
commit.first
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def retrieve_repo(user, repo)
|
113
|
+
stored_repo = @persister.find(:repos, {'owner.login' => user,
|
114
|
+
'name' => repo })
|
115
|
+
if stored_repo.empty?
|
116
|
+
url = ghurl "repos/#{user}/#{repo}"
|
117
|
+
r = api_request(url)
|
118
|
+
|
119
|
+
if r.nil?
|
120
|
+
throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
|
121
|
+
end
|
122
|
+
|
123
|
+
unq = @persister.store(:repos, r)
|
124
|
+
info "Retriever: New repo #{user} -> #{repo}"
|
125
|
+
r[@uniq] = unq
|
126
|
+
r
|
127
|
+
else
|
128
|
+
debug "Retriever: Already got repo #{user} -> #{repo}"
|
129
|
+
stored_repo.first
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Get current Github events
|
134
|
+
def get_events
|
135
|
+
api_request "https://api.github.com/events"
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def ghurl(path)
|
141
|
+
config(:mirror_urlbase) + path
|
142
|
+
end
|
143
|
+
|
144
|
+
def ghurl_v2(path)
|
145
|
+
config(:mirror_urlbase_v2) + path
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or
|
4
|
+
# without modification, are permitted provided that the following
|
5
|
+
# conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above
|
8
|
+
# copyright notice, this list of conditions and the following
|
9
|
+
# disclaimer.
|
10
|
+
#
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials
|
14
|
+
# provided with the distribution.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
17
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
18
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
19
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
20
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
21
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
23
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
24
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
25
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
26
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
require 'yaml'
|
30
|
+
|
31
|
+
module GHTorrent
|
32
|
+
module Settings
|
33
|
+
|
34
|
+
include GHTorrent::Utils
|
35
|
+
|
36
|
+
CONFIGKEYS = {
|
37
|
+
:amqp_host => "amqp.host",
|
38
|
+
:amqp_port => "amqp.port",
|
39
|
+
:amqp_username => "amqp.username",
|
40
|
+
:amqp_password => "amqp.password",
|
41
|
+
:amqp_exchange => "amqp.exchange",
|
42
|
+
|
43
|
+
:sql_url => "sql.url",
|
44
|
+
|
45
|
+
:mirror_urlbase => "mirror.urlbase",
|
46
|
+
:mirror_urlbase_v2 => "mirror.urlbase_v2",
|
47
|
+
:mirror_reqrate => "mirror.reqrate",
|
48
|
+
:mirror_pollevery => "mirror.pollevery",
|
49
|
+
:mirror_persister => "mirror.persister",
|
50
|
+
|
51
|
+
:uniq_id => "uniq_id"
|
52
|
+
}
|
53
|
+
|
54
|
+
def config(key)
|
55
|
+
read_value(settings, CONFIGKEYS[key])
|
56
|
+
end
|
57
|
+
|
58
|
+
def merge(more_keys)
|
59
|
+
more_keys.each {|k,v| CONFIGKEYS[k] = v}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|