ghtorrent 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ghtorrent.rb ADDED
@@ -0,0 +1,22 @@
1
+ #require 'ghtorrent-old/ghtorrent-old'
2
+
3
+ module GHTorrent
4
+ VERSION = 0.2
5
+ end
6
+
7
+ require 'ghtorrent/command'
8
+
9
+ require 'ghtorrent/utils'
10
+ require 'ghtorrent/logging'
11
+ require 'ghtorrent/settings'
12
+ require 'ghtorrent/api_client'
13
+ require 'ghtorrent/call_stack'
14
+
15
+ require 'ghtorrent/adapters/base_adapter'
16
+ require 'ghtorrent/adapters/mongo_persister'
17
+ require 'ghtorrent/adapters/noop_persister'
18
+
19
+ require 'ghtorrent/persister'
20
+ require 'ghtorrent/retriever'
21
+
22
+ require 'ghtorrent/ghtorrent'
@@ -0,0 +1,91 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ class BaseAdapter
32
+
33
+ ENTITIES = [:users, :commits, :followers, :repos, :events]
34
+
35
+
36
+ # Stores +data+ into +entity+. Returns a unique key for the stored entry.
37
+ def store(entity, data = {})
38
+ unless ENTITIES.include?(entity)
39
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
40
+ end
41
+ end
42
+
43
+ # Retrieves rows from +entity+ matching the provided +query+.
44
+ # The +query+
45
+ # is performed on the Github API JSON results. For example, given the
46
+ # following JSON object format:
47
+ #
48
+ # {
49
+ # commit: {
50
+ # sha: "23fa34aa442456"
51
+ # }
52
+ # author: {
53
+ # name: {
54
+ # real_name: "foo"
55
+ # given_name: "bar"
56
+ # }
57
+ # }
58
+ # created_at: "1980-12-30T22:25:25"
59
+ # }
60
+ #
61
+ # to query for matching +sha+, pass to +query+
62
+ #
63
+ # {'commit.sha' => 'a_value'}
64
+ #
65
+ # to query for real_name's matching an argument, pass to +query+
66
+ #
67
+ # {'author.name.real_name' => 'a_value'}
68
+ #
69
+ # to query for both a specific sha and a specific creation time
70
+ #
71
+ # {'commit.sha' => 'a_value', 'created_at' => 'other_value'}
72
+ #
73
+ # The persister adapter must translate the query to the underlying data
74
+ # storage engine query capabilities.
75
+ #
76
+ # The results are returned as an array of hierarchical maps, one for each
77
+ # matching JSON object.
78
+ def find(entity, query = {})
79
+ unless ENTITIES.include?(entity)
80
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
81
+ end
82
+ end
83
+
84
+ # Find the record identified by +id+ in +entity+
85
+ def find_by_ext_ref_id(entity, id)
86
+ unless ENTITIES.include?(entity)
87
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,126 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'mongo'
30
+
31
+ module GHTorrent
32
+
33
+ # A persistence adapter that saves data into a configurable MongoDB database.
34
+ class MongoPersister < GHTorrent::BaseAdapter
35
+
36
+ include GHTorrent::Settings
37
+ include GHTorrent::Logging
38
+
39
+ # Supported configuration options.
40
+ LOCALCONFIG = {
41
+ :mongo_host => "mongo.host",
42
+ :mongo_port => "mongo.port",
43
+ :mongo_db => "mongo.db",
44
+ :mongo_username => "mongo.username",
45
+ :mongo_passwd => "mongo.password"
46
+ }
47
+
48
+ attr_reader :settings
49
+
50
+ # Creates a new instance of the MongoDB persistence adapter.
51
+ # Expects a parsed YAML settings document as input.
52
+ def initialize(set)
53
+ merge LOCALCONFIG
54
+
55
+ @settings = set
56
+ @uniq = config(:uniq_id)
57
+ @mongo = Mongo::Connection.new(config(:mongo_host),
58
+ config(:mongo_port))\
59
+ .db(config(:mongo_db))
60
+ @enttodb = {
61
+ :users => get_collection("users"),
62
+ :commits => get_collection("commits"),
63
+ :repos => get_collection("repos"),
64
+ :followers => get_collection("followers"),
65
+ :events => get_collection("events")
66
+ }
67
+ end
68
+
69
+
70
+ def store(entity, data = {})
71
+ super
72
+ col = @enttodb[entity]
73
+
74
+ if col.nil?
75
+ raise GHTorrentException.new("Mongo: Entity #{entity} not supported")
76
+ end
77
+
78
+ col.insert(data).to_s
79
+ end
80
+
81
+ def find(entity, query = {})
82
+ super
83
+
84
+ col = @enttodb[entity]
85
+
86
+ if col.nil?
87
+ raise GHTorrentException.new("Mongo: Entity #{entity} not supported")
88
+ end
89
+
90
+ result = col.find(query)
91
+ result.to_a.map { |r|
92
+ r[@uniq] = r['_id'].to_s;
93
+ r.to_h
94
+ }
95
+ end
96
+
97
+ # Find the record identified by +id+ in +entity+
98
+ def find_by_ext_ref_id(entity, id)
99
+ super
100
+ raise NotImplementedError
101
+ end
102
+
103
+ private
104
+
105
+ def get_collection(col)
106
+ @mongo.collection(col.to_s)
107
+ end
108
+
109
+ end
110
+ end
111
+
112
+ class BSON::OrderedHash
113
+
114
+ # Convert a BSON result to a +Hash+
115
+ def to_h
116
+ inject({}) do |acc, element|
117
+ k, v = element;
118
+ acc[k] = if v.class == BSON::OrderedHash then
119
+ v.to_h
120
+ else
121
+ v
122
+ end;
123
+ acc
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ # Persister adapter that does not store any data.
32
+ class NoopPersister < BaseAdapter
33
+
34
+ def init(settings)
35
+ end
36
+
37
+ def store(entity, data = {})
38
+ super
39
+ #Nothing to see here
40
+ 0
41
+ end
42
+
43
+ def find(entity, query = {})
44
+ super
45
+ #Nothing to see here
46
+ []
47
+ end
48
+
49
+ def find_by_ext_ref_id(entity, id)
50
+ super
51
+ nil
52
+ end
53
+
54
+ def get_id
55
+ 0
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,106 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'net/http'
30
+ require 'set'
31
+ require 'open-uri'
32
+ require 'json'
33
+
34
+ module GHTorrent
35
+ module APIClient
36
+ include GHTorrent::Logging
37
+ include GHTorrent::Settings
38
+
39
+ def initialize(settings)
40
+ @num_api_calls = 0
41
+ @ts = Time.now().tv_sec()
42
+ end
43
+
44
+ def paged_api_request(url, pages = -1)
45
+
46
+ pg = if pages == -1 then
47
+ 1000000
48
+ else
49
+ pages
50
+ end
51
+ result = Array.new
52
+
53
+ (1..pg).each { |x|
54
+ data = api_request("#{url}?page=#{x}")
55
+ result += data
56
+ break if data.empty?
57
+ }
58
+ result
59
+ end
60
+
61
+ def api_request(url)
62
+ result = api_request_raw(url)
63
+ if result.nil?
64
+ nil
65
+ else
66
+ JSON.parse(result)
67
+ end
68
+ end
69
+
70
+ def api_request_raw(url)
71
+ #Rate limiting to avoid error requests
72
+ if Time.now().tv_sec() - @ts < 60 then
73
+ if @num_api_calls >= @settings['mirror']['reqrate'].to_i
74
+ sleep = 60 - (Time.now().tv_sec() - @ts)
75
+ debug "APIClient: Sleeping for #{sleep}"
76
+ sleep (sleep)
77
+ @num_api_calls = 0
78
+ @ts = Time.now().tv_sec()
79
+ end
80
+ else
81
+ debug "APIClient: Tick, num_calls = #{@num_api_calls}, zeroing"
82
+ @num_api_calls = 0
83
+ @ts = Time.now().tv_sec()
84
+ end
85
+
86
+ @num_api_calls += 1
87
+ debug "APIClient: Request: #{url} (num_calls = #{@num_api_calls})"
88
+ begin
89
+ open(url).read
90
+ rescue OpenURI::HTTPError => e
91
+ case e.io.status[0].to_i
92
+ # The following indicate valid Github return codes
93
+ when 400, # Bad request
94
+ 401, # Unauthorized
95
+ 403, # Forbidden
96
+ 404, # Not found
97
+ 422 : # Unprocessable entity
98
+ STDERR.puts "#{url}: #{e.io.status[1]}"
99
+ return nil
100
+ else # Server error or HTTP conditions that Github does not report
101
+ raise e
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,119 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+ class CallStack
31
+
32
+ @@callstacks = Hash.new
33
+
34
+ attr_reader :name
35
+
36
+ def self.new(*args)
37
+ name = args[0]
38
+ if @@callstacks.has_key? name
39
+ @@callstacks[name]
40
+ else
41
+ o = allocate
42
+ if o.__send__(:initialize, *args)
43
+ @@callstacks[name] = o
44
+ o
45
+ else
46
+ nil
47
+ end
48
+ end
49
+ end
50
+
51
+ def initialize(name, sync_every = 5)
52
+
53
+ @stack = Array.new
54
+ @name = name
55
+ @sync = sync_every
56
+
57
+ if File.exists?(name)
58
+ @file = File.new(name, "r")
59
+ puts "File #{name} exists, importing stack..."
60
+ read = @file.readlines.reverse.reduce(0) { |acc, x|
61
+ @stack.push x
62
+ acc
63
+ }
64
+ puts "\n#{read} entries read"
65
+ @file.close
66
+ end
67
+
68
+ flusher = Thread.new {
69
+ while true
70
+ begin
71
+ if not @stack.empty?
72
+ @file = File.new(name, "w+")
73
+ @stack.each { |l| @file.write("#{l} \n") }
74
+ @file.fsync
75
+ @file.close
76
+ end
77
+ sleep(@sync)
78
+ rescue
79
+ puts "flusher thread failed for #{name}"
80
+ end
81
+ end
82
+ }
83
+
84
+ ObjectSpace.define_finalizer(self, proc {
85
+ puts "Finalizer: Cleaning up #{@name}"
86
+ @@callstacks.delete[@name]
87
+ flusher.stop
88
+ cleanup
89
+ })
90
+
91
+ at_exit { cleanup }
92
+ end
93
+
94
+ def push(item)
95
+ @stack.push(item)
96
+ end
97
+
98
+ def pop()
99
+ @stack.pop
100
+ end
101
+
102
+ def empty
103
+ @stack.delete_if { |x| true }
104
+ end
105
+
106
+ private
107
+
108
+ def cleanup
109
+ if @stack.empty?
110
+ if File.exists? @name
111
+ puts "removing stack #{@name}"
112
+ File.delete(@name)
113
+ end
114
+ else
115
+ puts "stack #{@name} contains #{@stack.size} items"
116
+ end
117
+ end
118
+ end
119
+ end