ghtorrent 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'logger'
30
+
31
+ module GHTorrent
32
+ module Logging
33
+
34
+ DEBUG_LEVEL = defined?(Logger) ? Logger::DEBUG : 0
35
+
36
+ def warn(msg)
37
+ log(:warn, msg)
38
+ end
39
+
40
+ def info(msg)
41
+ log(:info, msg)
42
+ end
43
+
44
+ def debug(msg)
45
+ log(:debug, msg)
46
+ end
47
+
48
+ private
49
+
50
+ # Log a message at the given level.
51
+ def log(level, msg)
52
+ return unless @logger
53
+ case level
54
+ when :fatal then
55
+ @logger.fatal msg
56
+ when :error then
57
+ @logger.error msg
58
+ when :warn then
59
+ @logger.warn msg
60
+ when :info then
61
+ @logger.info msg
62
+ when :debug then
63
+ @logger.debug msg
64
+ else
65
+ @logger.debug msg
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,60 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ puts("Creating table users")
6
+ create_table :users do
7
+ primary_key :id
8
+ String :login, :unique => true, :null => false
9
+ String :name
10
+ String :company, :null => true
11
+ String :location, :null => true
12
+ String :email, :null => true, :unique => true
13
+ TrueClass :hireable, :null => true
14
+ String :bio, :null => true
15
+ Time :created_at, :null => false
16
+ end
17
+
18
+ puts("Creating table projects")
19
+ create_table :projects do
20
+ primary_key :id
21
+ String :url
22
+ foreign_key :owner_id, :users
23
+ String :name, :null => false
24
+ String :description
25
+ String :language
26
+ Time :created_at, :null => false
27
+ end
28
+
29
+ puts("Creating table commits")
30
+ create_table :commits do
31
+ primary_key :id
32
+ String :sha, :size => 40, :unique => true
33
+ foreign_key :author_id, :users
34
+ foreign_key :committer_id, :users
35
+ Time :created_at, :null => false
36
+ end
37
+
38
+ puts("Creating table commit_parents")
39
+ create_table :commit_parents do
40
+ foreign_key :commit_id, :commits, :null => false
41
+ foreign_key :parent_id, :commits, :null => false
42
+ primary_key [:commit_id, :parent_id]
43
+ end
44
+
45
+ puts("Creating table followers")
46
+ create_table :followers do
47
+ foreign_key :user_id, :users, :null => false
48
+ foreign_key :follower_id, :users, :null => false
49
+ primary_key [:user_id, :follower_id]
50
+ end
51
+ end
52
+
53
+ down do
54
+ drop_table :users
55
+ drop_table :projects
56
+ drop_table :commits
57
+ drop_table :commit_parents
58
+ drop_table :followers
59
+ end
60
+ end
@@ -0,0 +1,15 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ alter_table :followers do
6
+ add_column :created_at, :Time, :null => false, :default => Time.now
7
+ end
8
+ end
9
+
10
+ down do
11
+ alter_table :followers do
12
+ drop_column :created_at
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,40 @@
1
+ require 'sequel'
2
+
3
+ Sequel.migration do
4
+ up do
5
+ alter_table :users do
6
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
7
+ end
8
+
9
+ alter_table :projects do
10
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
11
+ end
12
+
13
+ alter_table :commits do
14
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
15
+ end
16
+
17
+ alter_table :followers do
18
+ add_column :ext_ref_id, String, :null => false, :size => 24, :default => "0"
19
+ end
20
+ end
21
+
22
+ down do
23
+ alter_table :users do
24
+ drop_column :ext_ref_id
25
+ end
26
+
27
+ alter_table :projects do
28
+ drop_column :ext_ref_id
29
+ end
30
+
31
+ alter_table :commits do
32
+ drop_column :ext_ref_id
33
+ end
34
+
35
+ alter_table :followers do
36
+ drop_column :ext_ref_id
37
+ end
38
+ end
39
+ end
40
+
@@ -0,0 +1,48 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ #
32
+ module Persister
33
+
34
+ ADAPTERS = {
35
+ :mongo => GHTorrent::MongoPersister,
36
+ :noop => GHTorrent::NoopPersister
37
+ }
38
+
39
+ # Factory method for retrieving persistence connections.
40
+ # The +settings+ argument is a fully parsed YAML document
41
+ # passed on to adapters. The available +adapter+ are :mongo and :noop
42
+ def connect(adapter, settings)
43
+ driver = ADAPTERS[adapter]
44
+ driver.new(settings)
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,148 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+ module Retriever
31
+
32
+ include GHTorrent::APIClient
33
+ include GHTorrent::Settings
34
+
35
+ def initialize(settings)
36
+ super(settings)
37
+ @settings = settings
38
+ @uniq = config(:uniq_id)
39
+ end
40
+
41
+ def retrieve_user_byusername(user)
42
+ stored_user = @persister.find(:users, {'login' => user})
43
+ if stored_user.empty?
44
+ url = ghurl "users/#{user}"
45
+ u = api_request(url)
46
+
47
+ if u.nil?
48
+ throw GHTorrentException.new("Cannot find user #{user}")
49
+ end
50
+
51
+ unq = @persister.store(:users, u)
52
+ u[@uniq] = unq
53
+ info "Retriever: New user #{user}"
54
+ u
55
+ else
56
+ debug "Retriever: Already got user #{user}"
57
+ stored_user.first
58
+ end
59
+ end
60
+
61
+ # Try Github API v2 user search by email. This is optional info, so
62
+ # it may not return any data.
63
+ # http://develop.github.com/p/users.html
64
+ def retrieve_user_byemail(email, name)
65
+ url = ghurl_v2("user/email/#{email}")
66
+ api_request(url)
67
+ end
68
+
69
+ def retrieve_new_user_followers(user)
70
+ stored_followers = @persister.find(:followers, {'follows' => user})
71
+
72
+ followers = paged_api_request(ghurl "users/#{user}/followers")
73
+ followers.each do |x|
74
+ x['follows'] = user
75
+
76
+ exists = !stored_followers.find { |f|
77
+ f['follows'] == user && f['login'] == x['login']
78
+ }.nil?
79
+
80
+ if not exists
81
+ @persister.store(:followers, x)
82
+ info "Retriever: Added follower #{user} -> #{x['login']}"
83
+ else
84
+ debug "Retriever: Follower #{user} -> #{x['login']} exists"
85
+ end
86
+ end
87
+
88
+ @persister.find(:followers, {'follows' => user})
89
+ end
90
+
91
+ def retrieve_commit(repo, sha, user)
92
+ commit = @persister.find(:commits, {'sha' => "#{sha}"})
93
+
94
+ if commit.empty?
95
+ url = ghurl "repos/#{user}/#{repo}/commits/#{sha}"
96
+ c = api_request(url)
97
+
98
+ if c.nil?
99
+ throw GHTorrentException.new("Cannot find commit #{user}/#{repo}/#{sha}")
100
+ end
101
+
102
+ unq = @persister.store(:commits, c)
103
+ info "Retriever: New commit #{repo} -> #{sha}"
104
+ c[@uniq] = unq
105
+ c
106
+ else
107
+ debug "Retriever: Already got commit #{repo} -> #{sha}"
108
+ commit.first
109
+ end
110
+ end
111
+
112
+ def retrieve_repo(user, repo)
113
+ stored_repo = @persister.find(:repos, {'owner.login' => user,
114
+ 'name' => repo })
115
+ if stored_repo.empty?
116
+ url = ghurl "repos/#{user}/#{repo}"
117
+ r = api_request(url)
118
+
119
+ if r.nil?
120
+ throw GHTorrentException.new("Cannot find repo #{user}/#{repo}")
121
+ end
122
+
123
+ unq = @persister.store(:repos, r)
124
+ info "Retriever: New repo #{user} -> #{repo}"
125
+ r[@uniq] = unq
126
+ r
127
+ else
128
+ debug "Retriever: Already got repo #{user} -> #{repo}"
129
+ stored_repo.first
130
+ end
131
+ end
132
+
133
+ # Get current Github events
134
+ def get_events
135
+ api_request "https://api.github.com/events"
136
+ end
137
+
138
+ private
139
+
140
+ def ghurl(path)
141
+ config(:mirror_urlbase) + path
142
+ end
143
+
144
+ def ghurl_v2(path)
145
+ config(:mirror_urlbase_v2) + path
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,63 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'yaml'
30
+
31
+ module GHTorrent
32
+ module Settings
33
+
34
+ include GHTorrent::Utils
35
+
36
+ CONFIGKEYS = {
37
+ :amqp_host => "amqp.host",
38
+ :amqp_port => "amqp.port",
39
+ :amqp_username => "amqp.username",
40
+ :amqp_password => "amqp.password",
41
+ :amqp_exchange => "amqp.exchange",
42
+
43
+ :sql_url => "sql.url",
44
+
45
+ :mirror_urlbase => "mirror.urlbase",
46
+ :mirror_urlbase_v2 => "mirror.urlbase_v2",
47
+ :mirror_reqrate => "mirror.reqrate",
48
+ :mirror_pollevery => "mirror.pollevery",
49
+ :mirror_persister => "mirror.persister",
50
+
51
+ :uniq_id => "uniq_id"
52
+ }
53
+
54
+ def config(key)
55
+ read_value(settings, CONFIGKEYS[key])
56
+ end
57
+
58
+ def merge(more_keys)
59
+ more_keys.each {|k,v| CONFIGKEYS[k] = v}
60
+ end
61
+
62
+ end
63
+ end