ghtorrent 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ghtorrent.rb ADDED
@@ -0,0 +1,22 @@
1
+ #require 'ghtorrent-old/ghtorrent-old'
2
+
3
+ module GHTorrent
4
+ VERSION = 0.2
5
+ end
6
+
7
+ require 'ghtorrent/command'
8
+
9
+ require 'ghtorrent/utils'
10
+ require 'ghtorrent/logging'
11
+ require 'ghtorrent/settings'
12
+ require 'ghtorrent/api_client'
13
+ require 'ghtorrent/call_stack'
14
+
15
+ require 'ghtorrent/adapters/base_adapter'
16
+ require 'ghtorrent/adapters/mongo_persister'
17
+ require 'ghtorrent/adapters/noop_persister'
18
+
19
+ require 'ghtorrent/persister'
20
+ require 'ghtorrent/retriever'
21
+
22
+ require 'ghtorrent/ghtorrent'
@@ -0,0 +1,91 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ class BaseAdapter
32
+
33
+ ENTITIES = [:users, :commits, :followers, :repos, :events]
34
+
35
+
36
+ # Stores +data+ into +entity+. Returns a unique key for the stored entry.
37
+ def store(entity, data = {})
38
+ unless ENTITIES.include?(entity)
39
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
40
+ end
41
+ end
42
+
43
+ # Retrieves rows from +entity+ matching the provided +query+.
44
+ # The +query+
45
+ # is performed on the Github API JSON results. For example, given the
46
+ # following JSON object format:
47
+ #
48
+ # {
49
+ # commit: {
50
+ # sha: "23fa34aa442456"
51
+ # }
52
+ # author: {
53
+ # name: {
54
+ # real_name: "foo"
55
+ # given_name: "bar"
56
+ # }
57
+ # }
58
+ # created_at: "1980-12-30T22:25:25"
59
+ # }
60
+ #
61
+ # to query for matching +sha+, pass to +query+
62
+ #
63
+ # {'commit.sha' => 'a_value'}
64
+ #
65
+ # to query for real_name's matching an argument, pass to +query+
66
+ #
67
+ # {'author.name.real_name' => 'a_value'}
68
+ #
69
+ # to query for both a specific sha and a specific creation time
70
+ #
71
+ # {'commit.sha' => 'a_value', 'created_at' => 'other_value'}
72
+ #
73
+ # The persister adapter must translate the query to the underlying data
74
+ # storage engine query capabilities.
75
+ #
76
+ # The results are returned as an array of hierarchical maps, one for each
77
+ # matching JSON object.
78
+ def find(entity, query = {})
79
+ unless ENTITIES.include?(entity)
80
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
81
+ end
82
+ end
83
+
84
+ # Find the record identified by +id+ in +entity+
85
+ def find_by_ext_ref_id(entity, id)
86
+ unless ENTITIES.include?(entity)
87
+ throw GHTorrentException.new("Perister: Entity #{entity} not known")
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,126 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'mongo'
30
+
31
+ module GHTorrent
32
+
33
+ # A persistence adapter that saves data into a configurable MongoDB database.
34
+ class MongoPersister < GHTorrent::BaseAdapter
35
+
36
+ include GHTorrent::Settings
37
+ include GHTorrent::Logging
38
+
39
+ # Supported configuration options.
40
+ LOCALCONFIG = {
41
+ :mongo_host => "mongo.host",
42
+ :mongo_port => "mongo.port",
43
+ :mongo_db => "mongo.db",
44
+ :mongo_username => "mongo.username",
45
+ :mongo_passwd => "mongo.password"
46
+ }
47
+
48
+ attr_reader :settings
49
+
50
+ # Creates a new instance of the MongoDB persistence adapter.
51
+ # Expects a parsed YAML settings document as input.
52
+ def initialize(set)
53
+ merge LOCALCONFIG
54
+
55
+ @settings = set
56
+ @uniq = config(:uniq_id)
57
+ @mongo = Mongo::Connection.new(config(:mongo_host),
58
+ config(:mongo_port))\
59
+ .db(config(:mongo_db))
60
+ @enttodb = {
61
+ :users => get_collection("users"),
62
+ :commits => get_collection("commits"),
63
+ :repos => get_collection("repos"),
64
+ :followers => get_collection("followers"),
65
+ :events => get_collection("events")
66
+ }
67
+ end
68
+
69
+
70
+ def store(entity, data = {})
71
+ super
72
+ col = @enttodb[entity]
73
+
74
+ if col.nil?
75
+ raise GHTorrentException.new("Mongo: Entity #{entity} not supported")
76
+ end
77
+
78
+ col.insert(data).to_s
79
+ end
80
+
81
+ def find(entity, query = {})
82
+ super
83
+
84
+ col = @enttodb[entity]
85
+
86
+ if col.nil?
87
+ raise GHTorrentException.new("Mongo: Entity #{entity} not supported")
88
+ end
89
+
90
+ result = col.find(query)
91
+ result.to_a.map { |r|
92
+ r[@uniq] = r['_id'].to_s;
93
+ r.to_h
94
+ }
95
+ end
96
+
97
+ # Find the record identified by +id+ in +entity+
98
+ def find_by_ext_ref_id(entity, id)
99
+ super
100
+ raise NotImplementedError
101
+ end
102
+
103
+ private
104
+
105
+ def get_collection(col)
106
+ @mongo.collection(col.to_s)
107
+ end
108
+
109
+ end
110
+ end
111
+
112
+ class BSON::OrderedHash
113
+
114
+ # Convert a BSON result to a +Hash+
115
+ def to_h
116
+ inject({}) do |acc, element|
117
+ k, v = element;
118
+ acc[k] = if v.class == BSON::OrderedHash then
119
+ v.to_h
120
+ else
121
+ v
122
+ end;
123
+ acc
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,58 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+
31
+ # Persister adapter that does not store any data.
32
+ class NoopPersister < BaseAdapter
33
+
34
+ def init(settings)
35
+ end
36
+
37
+ def store(entity, data = {})
38
+ super
39
+ #Nothing to see here
40
+ 0
41
+ end
42
+
43
+ def find(entity, query = {})
44
+ super
45
+ #Nothing to see here
46
+ []
47
+ end
48
+
49
+ def find_by_ext_ref_id(entity, id)
50
+ super
51
+ nil
52
+ end
53
+
54
+ def get_id
55
+ 0
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,106 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ require 'net/http'
30
+ require 'set'
31
+ require 'open-uri'
32
+ require 'json'
33
+
34
+ module GHTorrent
35
+ module APIClient
36
+ include GHTorrent::Logging
37
+ include GHTorrent::Settings
38
+
39
+ def initialize(settings)
40
+ @num_api_calls = 0
41
+ @ts = Time.now().tv_sec()
42
+ end
43
+
44
+ def paged_api_request(url, pages = -1)
45
+
46
+ pg = if pages == -1 then
47
+ 1000000
48
+ else
49
+ pages
50
+ end
51
+ result = Array.new
52
+
53
+ (1..pg).each { |x|
54
+ data = api_request("#{url}?page=#{x}")
55
+ result += data
56
+ break if data.empty?
57
+ }
58
+ result
59
+ end
60
+
61
+ def api_request(url)
62
+ result = api_request_raw(url)
63
+ if result.nil?
64
+ nil
65
+ else
66
+ JSON.parse(result)
67
+ end
68
+ end
69
+
70
+ def api_request_raw(url)
71
+ #Rate limiting to avoid error requests
72
+ if Time.now().tv_sec() - @ts < 60 then
73
+ if @num_api_calls >= @settings['mirror']['reqrate'].to_i
74
+ sleep = 60 - (Time.now().tv_sec() - @ts)
75
+ debug "APIClient: Sleeping for #{sleep}"
76
+ sleep (sleep)
77
+ @num_api_calls = 0
78
+ @ts = Time.now().tv_sec()
79
+ end
80
+ else
81
+ debug "APIClient: Tick, num_calls = #{@num_api_calls}, zeroing"
82
+ @num_api_calls = 0
83
+ @ts = Time.now().tv_sec()
84
+ end
85
+
86
+ @num_api_calls += 1
87
+ debug "APIClient: Request: #{url} (num_calls = #{@num_api_calls})"
88
+ begin
89
+ open(url).read
90
+ rescue OpenURI::HTTPError => e
91
+ case e.io.status[0].to_i
92
+ # The following indicate valid Github return codes
93
+ when 400, # Bad request
94
+ 401, # Unauthorized
95
+ 403, # Forbidden
96
+ 404, # Not found
97
+ 422 : # Unprocessable entity
98
+ STDERR.puts "#{url}: #{e.io.status[1]}"
99
+ return nil
100
+ else # Server error or HTTP conditions that Github does not report
101
+ raise e
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,119 @@
1
+ # Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or
4
+ # without modification, are permitted provided that the following
5
+ # conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above
8
+ # copyright notice, this list of conditions and the following
9
+ # disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials
14
+ # provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ # AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18
+ # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
20
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23
+ # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module GHTorrent
30
+ class CallStack
31
+
32
+ @@callstacks = Hash.new
33
+
34
+ attr_reader :name
35
+
36
+ def self.new(*args)
37
+ name = args[0]
38
+ if @@callstacks.has_key? name
39
+ @@callstacks[name]
40
+ else
41
+ o = allocate
42
+ if o.__send__(:initialize, *args)
43
+ @@callstacks[name] = o
44
+ o
45
+ else
46
+ nil
47
+ end
48
+ end
49
+ end
50
+
51
+ def initialize(name, sync_every = 5)
52
+
53
+ @stack = Array.new
54
+ @name = name
55
+ @sync = sync_every
56
+
57
+ if File.exists?(name)
58
+ @file = File.new(name, "r")
59
+ puts "File #{name} exists, importing stack..."
60
+ read = @file.readlines.reverse.reduce(0) { |acc, x|
61
+ @stack.push x
62
+ acc
63
+ }
64
+ puts "\n#{read} entries read"
65
+ @file.close
66
+ end
67
+
68
+ flusher = Thread.new {
69
+ while true
70
+ begin
71
+ if not @stack.empty?
72
+ @file = File.new(name, "w+")
73
+ @stack.each { |l| @file.write("#{l} \n") }
74
+ @file.fsync
75
+ @file.close
76
+ end
77
+ sleep(@sync)
78
+ rescue
79
+ puts "flusher thread failed for #{name}"
80
+ end
81
+ end
82
+ }
83
+
84
+ ObjectSpace.define_finalizer(self, proc {
85
+ puts "Finalizer: Cleaning up #{@name}"
86
+ @@callstacks.delete[@name]
87
+ flusher.stop
88
+ cleanup
89
+ })
90
+
91
+ at_exit { cleanup }
92
+ end
93
+
94
+ def push(item)
95
+ @stack.push(item)
96
+ end
97
+
98
+ def pop()
99
+ @stack.pop
100
+ end
101
+
102
+ def empty
103
+ @stack.delete_if { |x| true }
104
+ end
105
+
106
+ private
107
+
108
+ def cleanup
109
+ if @stack.empty?
110
+ if File.exists? @name
111
+ puts "removing stack #{@name}"
112
+ File.delete(@name)
113
+ end
114
+ else
115
+ puts "stack #{@name} contains #{@stack.size} items"
116
+ end
117
+ end
118
+ end
119
+ end