ghtorrent 0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'logger'
30
+
31
+ module GHTorrent
32
+ module Logging
33
+
34
+ DEBUG_LEVEL = defined?(Logger) ? Logger::DEBUG : 0
35
+
36
+ def warn(msg)
37
+ log(:warn, msg)
38
+ end
39
+
40
+ def info(msg)
41
+ log(:info, msg)
42
+ end
43
+
44
+ def debug(msg)
45
+ log(:debug, msg)
46
+ end
47
+
48
+ private
49
+
50
+ # Log a message at the given level.
51
+ def log(level, msg)
52
+ return unless @logger
53
+ case level
54
+ when :fatal then
55
+ @logger.fatal msg
56
+ when :error then
57
+ @logger.error msg
58
+ when :warn then
59
+ @logger.warn msg
60
+ when :info then
61
+ @logger.info msg
62
+ when :debug then
63
+ @logger.debug msg
64
+ else
65
+ @logger.debug msg
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,60 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ puts("Creating table users")
6
+ create_table :users do
7
+ primary_key :id
8
+ String :login, :unique => true, :null => false
9
+ String :name
10
+ String :company, :null => true
11
+ String :location, :null => true
12
+ String :email, :null => true, :unique => true
13
+ TrueClass :hireable, :null => true
14
+ String :bio, :null => true
15
+ Time :created_at, :null => false
16
+ end
17
+
18
+ puts("Creating table projects")
19
+ create_table :projects do
20
+ primary_key :id
21
+ String :url
22
+ foreign_key :owner_id, :users
23
+ String :name, :null => false
24
+ String :description
25
+ String :language
26
+ Time :created_at, :null => false
27
+ end
28
+
29
+ puts("Creating table commits")
30
+ create_table :commits do
31
+ primary_key :id
32
+ String :sha, :size => 40, :unique => true
33
+ foreign_key :author_id, :users
34
+ foreign_key :committer_id, :users
35
+ Time :created_at, :null => false
36
+ end
37
+
38
+ puts("Creating table commit_parents")
39
+ create_table :commit_parents do
40
+ foreign_key :commit_id, :commits, :null => false
41
+ foreign_key :parent_id, :commits, :null => false
42
+ primary_key [:commit_id, :parent_id]
43
+ end
44
+
45
+ puts("Creating table followers")
46
+ create_table :followers do
47
+ foreign_key :user_id, :users, :null => false
48
+ foreign_key :follower_id, :users, :null => false
49
+ primary_key [:user_id, :follower_id]
50
+ end
51
+ end
52
+
53
+ down do
54
+ drop_table :users
55
+ drop_table :projects
56
+ drop_table :commits
57
+ drop_table :commit_parents
58
+ drop_table :followers
59
+ end
60
+ end
@@ -0,0 +1,15 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ alter_table :followers do
6
+ add_column :created_at, :Time, :null => false, :default => Time.now
7
+ end
8
+ end
9
+
10
+ down do
11
+ alter_table :followers do
12
+ drop_column :created_at
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,40 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ alter_table :users do
6
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
7
+ end
8
+
9
+ alter_table :projects do
10
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
11
+ end
12
+
13
+ alter_table :commits do
14
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
15
+ end
16
+
17
+ alter_table :followers do
18
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
19
+ end
20
+ end
21
+
22
+ down do
23
+ alter_table :users do
24
+ drop_column :ext_ref_id
25
+ end
26
+
27
+ alter_table :projects do
28
+ drop_column :ext_ref_id
29
+ end
30
+
31
+ alter_table :commits do
32
+ drop_column :ext_ref_id
33
+ end
34
+
35
+ alter_table :followers do
36
+ drop_column :ext_ref_id
37
+ end
38
+ end
39
+ end
40
+
@@ -0,0 +1,48 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ #
32
+ module Persister
33
+
34
+ ADAPTERS = {
35
+ :mongo => GHTorrent::MongoPersister,
36
+ :noop => GHTorrent::NoopPersister
37
+ }
38
+
39
+ # Factory method for retrieving persistence connections.
40
+ # The +settings+ argument is a fully parsed YAML document
41
+ # passed on to adapters. The available +adapter+ are :mongo and :noop
42
+ def connect(adapter, settings)
43
+ driver = ADAPTERS[adapter]
44
+ driver.new(settings)
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,148 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+ module Retriever
31
+
32
+ include GHTorrent::APIClient
33
+ include GHTorrent::Settings
34
+
35
+ def initialize(settings)
36
+ super(settings)
37
+ @settings = settings
38
+ @uniq = config(:uniq_id)
39
+ end
40
+
41
+ def retrieve_user_byusername(user)
42
+ stored_user = @persister.find(:users, {'login' => user})
43
+ if stored_user.empty?
44
+ url = ghurl "users/#{user}"
45
+ u = api_request(url)
46
+
47
+ if u.nil?
48
+ throw GHTorrentException.new("Cannot find user #{user}")
49
+ end
50
+
51
+ unq = @persister.store(:users, u)
52
+ u[@uniq] = unq
53
+ info "Retriever: New user #{user}"
54
+ u
55
+ else
56
+ debug "Retriever: Already got user #{user}"
57
+ stored_user.first
58
+ end
59
+ end
60
+
61
+ # Try Github API v2 user search by email. This is optional info, so
62
+ # it may not return any data.
63
+ # http://develop.github.com/p/users.html
64
+ def retrieve_user_byemail(email, name)
65
+ url = ghurl_v2("user/email/#{email}")
66
+ api_request(url)
67
+ end
68
+
69
+ def retrieve_new_user_followers(user)
70
+ stored_followers = @persister.find(:followers, {'follows' => user})
71
+
72
+ followers = paged_api_request(ghurl "users/#{user}/followers")
73
+ followers.each do |x|
74
+ x['follows'] = user
75
+
76
+ exists = !stored_followers.find { |f|
77
+ f['follows'] == user && f['login'] == x['login']
78
+ }.nil?
79
+
80
+ if not exists
81
+ @persister.store(:followers, x)
82
+ info "Retriever: Added follower #{user} -> #{x['login']}"
83
+ else
84
+ debug "Retriever: Follower #{user} -> #{x['login']} exists"
85
+ end
86
+ end
87
+
88
+ @persister.find(:followers, {'follows' => user})
89
+ end
90
+
91
+ def retrieve_commit(repo, sha, user)
92
+ commit = @persister.find(:commits, {'sha' => "#{sha}"})
93
+
94
+ if commit.empty?
95
+ url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
96
+ c = api_request(url)
97
+
98
+ if c.nil?
99
+ throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
100
+ end
101
+
102
+ unq = @persister.store(:commits, c)
103
+ info "Retriever: New commit #{repo} -> #{sha}"
104
+ c[@uniq] = unq
105
+ c
106
+ else
107
+ debug "Retriever: Already got commit #{repo} -> #{sha}"
108
+ commit.first
109
+ end
110
+ end
111
+
112
+ def retrieve_repo(user, repo)
113
+ stored_repo = @persister.find(:repos, {'owner.login' => user,
114
+ 'name' => repo })
115
+ if stored_repo.empty?
116
+ url = ghurl "repos/#{user}/#{repo}"
117
+ r = api_request(url)
118
+
119
+ if r.nil?
120
+ throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
121
+ end
122
+
123
+ unq = @persister.store(:repos, r)
124
+ info "Retriever: New repo #{user} -> #{repo}"
125
+ r[@uniq] = unq
126
+ r
127
+ else
128
+ debug "Retriever: Already got repo #{user} -> #{repo}"
129
+ stored_repo.first
130
+ end
131
+ end
132
+
133
+ # Get current Github events
134
+ def get_events
135
+ api_request "https://api.github.com/events"
136
+ end
137
+
138
+ private
139
+
140
+ def ghurl(path)
141
+ config(:mirror_urlbase) + path
142
+ end
143
+
144
+ def ghurl_v2(path)
145
+ config(:mirror_urlbase_v2) + path
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,63 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'yaml'
30
+
31
+ module GHTorrent
32
+ module Settings
33
+
34
+ include GHTorrent::Utils
35
+
36
+ CONFIGKEYS = {
37
+ :amqp_host => "amqp.host",
38
+ :amqp_port => "amqp.port",
39
+ :amqp_username => "amqp.username",
40
+ :amqp_password => "amqp.password",
41
+ :amqp_exchange => "amqp.exchange",
42
+
43
+ :sql_url => "sql.url",
44
+
45
+ :mirror_urlbase => "mirror.urlbase",
46
+ :mirror_urlbase_v2 => "mirror.urlbase_v2",
47
+ :mirror_reqrate => "mirror.reqrate",
48
+ :mirror_pollevery => "mirror.pollevery",
49
+ :mirror_persister => "mirror.persister",
50
+
51
+ :uniq_id => "uniq_id"
52
+ }
53
+
54
+ def config(key)
55
+ read_value(settings, CONFIGKEYS[key])
56
+ end
57
+
58
+ def merge(more_keys)
59
+ more_keys.each {|k,v| CONFIGKEYS[k] = v}
60
+ end
61
+
62
+ end
63
+ end