ghtorrent 0.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
data/CHANGELOG
CHANGED
@@ -1,6 +1,21 @@
|
|
1
|
+
= Version 0.6
|
2
|
+
|
3
|
+
* Support retrieval of issues, issue events and issue history
|
4
|
+
* Support for setting username/password for performing requests
|
5
|
+
* Respect by default Github's x-ratelimit-remaining header
|
6
|
+
* Selective processing of events for user-specified repos
|
7
|
+
* New tool (ght-get-more-commits) to retrieve all commits for a repository
|
8
|
+
* New tool (ght-process-events) to process just one event by id
|
9
|
+
* Retrieve 100 items at once by default on multipage requests
|
10
|
+
* Rename watchers -> stargazers, as per Github API change
|
11
|
+
* Fixes to bugs that permitted efficient processing of multipage requests
|
12
|
+
* Several fixes on how pull requests are being processed
|
13
|
+
* Users with invalid git setups are now allowed
|
14
|
+
* Compatibility with Ruby 1.8 restored
|
15
|
+
|
1
16
|
= Version 0.5
|
2
17
|
|
3
|
-
|
18
|
+
* Generic methods for retrieving items that are bound to repositories
|
4
19
|
* Processing of pull requests with commits, comments and history
|
5
20
|
* Processing of project forks
|
6
21
|
* New tool (ght-load) to filter and load events to the queue
|
data/README.md
CHANGED
@@ -129,9 +129,14 @@ please consider citing the following paper:
|
|
129
129
|
|
130
130
|
> Georgios Gousios and Diomidis Spinellis, "GHTorrent: GitHub’s data from a firehose," in _MSR '12: Proceedings of the 9th Working Conference on Mining Software Repositories_, June 2-–3, 2012. Zurich, Switzerland.
|
131
131
|
|
132
|
+
See also the following presentation:
|
133
|
+
|
134
|
+
<iframe src="http://www.slideshare.net/slideshow/embed_code/13184524?rel=0" width="342" height="291" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" style="border:1px solid #CCC;border-width:1px 1px 0;margin-bottom:5px" allowfullscreen/>
|
135
|
+
<div style="margin-bottom:5px"> <strong> <a href="http://www.slideshare.net/gousiosg/ghtorrent-githubs-data-from-a-firehose-13184524" title="GHTorrent: Github's Data from a Firehose" target="_blank">GHTorrent: Github's Data from a Firehose</a> </strong> </div>
|
136
|
+
|
132
137
|
#### Authors
|
133
138
|
|
134
|
-
Georgios Gousios <gousiosg@gmail.com>
|
139
|
+
[Georgios Gousios](http://istlab.dmst.aueb.gr/~george) <gousiosg@gmail.com>
|
135
140
|
|
136
141
|
[Diomidis Spinellis](http://www.dmst.aueb.gr/dds) <dds@aueb.gr>
|
137
142
|
|
data/bin/ght-data-retrieval
CHANGED
@@ -1,166 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'json'
|
6
|
-
require 'pp'
|
4
|
+
require 'ghtorrent'
|
7
5
|
|
8
|
-
|
9
|
-
require 'ghtorrent/settings'
|
10
|
-
require 'ghtorrent/logging'
|
11
|
-
require 'ghtorrent/command'
|
12
|
-
|
13
|
-
class GHTDataRetrieval < GHTorrent::Command
|
14
|
-
|
15
|
-
include GHTorrent::Settings
|
16
|
-
include GHTorrent::Logging
|
17
|
-
|
18
|
-
def parse(msg)
|
19
|
-
JSON.parse(msg)
|
20
|
-
end
|
21
|
-
|
22
|
-
def PushEvent(data)
|
23
|
-
data['payload']['commits'].each do |c|
|
24
|
-
url = c['url'].split(/\//)
|
25
|
-
|
26
|
-
@gh.get_commit url[4], url[5], url[7]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def WatchEvent(data)
|
31
|
-
owner = data['repo']['name'].split(/\//)[0]
|
32
|
-
repo = data['repo']['name'].split(/\//)[1]
|
33
|
-
watcher = data['actor']['login']
|
34
|
-
created_at = data['created_at']
|
35
|
-
|
36
|
-
@gh.get_watcher owner, repo, watcher, created_at
|
37
|
-
end
|
38
|
-
|
39
|
-
def FollowEvent(data)
|
40
|
-
follower = data['actor']['login']
|
41
|
-
followed = data['payload']['target']['login']
|
42
|
-
created_at = data['created_at']
|
43
|
-
|
44
|
-
@gh.get_follower(follower, followed, created_at)
|
45
|
-
end
|
46
|
-
|
47
|
-
def MemberEvent(data)
|
48
|
-
owner = data['actor']['login']
|
49
|
-
repo = data['repo']['name'].split(/\//)[1]
|
50
|
-
new_member = data['payload']['member']['login']
|
51
|
-
created_at = data['created_at']
|
52
|
-
|
53
|
-
@gh.get_project_member(owner, repo, new_member, created_at)
|
54
|
-
end
|
55
|
-
|
56
|
-
def CommitCommentEvent(data)
|
57
|
-
user = data['actor']['login']
|
58
|
-
repo = data['repo']['name'].split(/\//)[1]
|
59
|
-
id = data['payload']['comment']['id']
|
60
|
-
created_at = data['created_at']
|
61
|
-
|
62
|
-
@gh.get_commit_comment(user, repo, id, created_at)
|
63
|
-
end
|
64
|
-
|
65
|
-
def PullRequestEvent(data)
|
66
|
-
owner = data['payload']['pull_request']['base']['repo']['owner']['login']
|
67
|
-
repo = data['payload']['pull_request']['base']['repo']['name']
|
68
|
-
pullreq_id = data['payload']['number']
|
69
|
-
action = data['payload']['action']
|
70
|
-
created_at = data['created_at']
|
71
|
-
|
72
|
-
@gh.get_pull_request(owner, repo, pullreq_id, action, created_at)
|
73
|
-
end
|
74
|
-
|
75
|
-
def ForkEvent(data)
|
76
|
-
owner = data['repo']['name'].split(/\//)[0]
|
77
|
-
repo = data['repo']['name'].split(/\//)[1]
|
78
|
-
fork_id = data['payload']['forkee']['id']
|
79
|
-
created_at = data['created_at']
|
80
|
-
|
81
|
-
@gh.get_fork(owner, repo, fork_id, created_at)
|
82
|
-
end
|
83
|
-
|
84
|
-
def PullRequestReviewCommentEvent(data)
|
85
|
-
owner = data['repo']['name'].split(/\//)[0]
|
86
|
-
repo = data['repo']['name'].split(/\//)[1]
|
87
|
-
comment_id = data['payload']['comment']['id']
|
88
|
-
pullreq_id = data['payload']['comment']['_links']['pull_request']['href'].split(/\//)[-1]
|
89
|
-
created_at = data['created_at']
|
90
|
-
|
91
|
-
@gh.get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
92
|
-
end
|
93
|
-
|
94
|
-
def IssueCommentEvent(data)
|
95
|
-
owner = data['repo']['name'].split(/\//)[0]
|
96
|
-
repo = data['repo']['name'].split(/\//)[1]
|
97
|
-
pullreq_id = data['payload']['forkee']['id']
|
98
|
-
created_at = data['created_at']
|
99
|
-
|
100
|
-
@gh.get_issue_comment(owner, repo, issue_id, comment_id, created_at)
|
101
|
-
end
|
102
|
-
|
103
|
-
def handlers
|
104
|
-
%w(PushEvent WatchEvent FollowEvent MemberEvent CommitCommentEvent PullRequestEvent ForkEvent PullRequestReviewCommentEvent)
|
105
|
-
#%w(PullRequestReviewCommentEvent)
|
106
|
-
end
|
107
|
-
|
108
|
-
def logger
|
109
|
-
@gh.logger
|
110
|
-
end
|
111
|
-
|
112
|
-
def go
|
113
|
-
@gh = GHTorrent::Mirror.new(@settings)
|
114
|
-
|
115
|
-
# Graceful exit
|
116
|
-
Signal.trap('INT') {
|
117
|
-
info "GHTDataRetrieval: Received SIGINT, exiting"
|
118
|
-
AMQP.stop { EM.stop }
|
119
|
-
}
|
120
|
-
Signal.trap('TERM') {
|
121
|
-
info "GHTDataRetrieval: Received SIGTERM, exiting"
|
122
|
-
AMQP.stop { EM.stop }
|
123
|
-
}
|
124
|
-
|
125
|
-
AMQP.start(:host => config(:amqp_host),
|
126
|
-
:port => config(:amqp_port),
|
127
|
-
:username => config(:amqp_username),
|
128
|
-
:password => config(:amqp_password)) do |connection|
|
129
|
-
|
130
|
-
channel = AMQP::Channel.new(connection, :prefetch => config(:amqp_prefetch))
|
131
|
-
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
132
|
-
:auto_delete => false)
|
133
|
-
|
134
|
-
handlers.each { |h|
|
135
|
-
queue = channel.queue("#{h}s", {:durable => true})\
|
136
|
-
.bind(exchange, :routing_key => "evt.#{h}")
|
137
|
-
|
138
|
-
info "GHTDataRetrieval: Binding handler #{h} to routing key evt.#{h}"
|
139
|
-
|
140
|
-
queue.subscribe(:ack => true) do |headers, msg|
|
141
|
-
begin
|
142
|
-
data = parse(msg)
|
143
|
-
info "GHTDataRetrieval: Processing event: #{data['type']}-#{data['id']}"
|
144
|
-
send(h, data)
|
145
|
-
headers.ack
|
146
|
-
info "GHTDataRetrieval: Processed event: #{data['type']}-#{data['id']}"
|
147
|
-
rescue Exception => e
|
148
|
-
# Give a message a chance to be reprocessed
|
149
|
-
if headers.redelivered?
|
150
|
-
data = parse(msg)
|
151
|
-
warn "GHTDataRetrieval: Could not process event: #{data['type']}-#{data['id']}"
|
152
|
-
headers.reject(:requeue => false)
|
153
|
-
else
|
154
|
-
headers.reject(:requeue => true)
|
155
|
-
end
|
156
|
-
|
157
|
-
STDERR.puts e
|
158
|
-
STDERR.puts e.backtrace.join("\n")
|
159
|
-
end
|
160
|
-
end
|
161
|
-
}
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
GHTDataRetrieval.run
|
6
|
+
GHTDataRetrieval.run(ARGV)
|
data/bin/ght-load
CHANGED
@@ -1,230 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'amqp'
|
6
|
-
require 'set'
|
7
|
-
require 'eventmachine'
|
8
|
-
require 'pp'
|
9
|
-
require "amqp/extensions/rabbitmq"
|
10
|
-
|
11
|
-
require 'ghtorrent/settings'
|
12
|
-
require 'ghtorrent/logging'
|
13
|
-
require 'ghtorrent/persister'
|
14
|
-
require 'ghtorrent/command'
|
15
|
-
require 'ghtorrent/bson_orderedhash'
|
16
|
-
|
17
|
-
class GHTLoad < GHTorrent::Command
|
18
|
-
|
19
|
-
include GHTorrent::Settings
|
20
|
-
include GHTorrent::Persister
|
21
|
-
|
22
|
-
def col_info()
|
23
|
-
{
|
24
|
-
:commits => {
|
25
|
-
:name => "commits",
|
26
|
-
:payload => "commit.id",
|
27
|
-
:unq => "commit.id",
|
28
|
-
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
29
|
-
:routekey => "commit.%s"
|
30
|
-
},
|
31
|
-
:events => {
|
32
|
-
:name => "events",
|
33
|
-
:payload => "",
|
34
|
-
:unq => "type",
|
35
|
-
:col => persister.get_underlying_connection.collection(:events.to_s),
|
36
|
-
:routekey => "evt.%s"
|
37
|
-
}
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
def persister
|
42
|
-
@persister ||= connect(:mongo, @settings)
|
43
|
-
@persister
|
44
|
-
end
|
45
|
-
|
46
|
-
def prepare_options(options)
|
47
|
-
options.banner <<-BANNER
|
48
|
-
Loads object ids from a collection to a queue for further processing.
|
49
|
-
|
50
|
-
#{command_name} [options] collection
|
51
|
-
|
52
|
-
#{command_name} options:
|
53
|
-
BANNER
|
54
|
-
|
55
|
-
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
56
|
-
:short => 'e', :default => 0, :type => :int
|
57
|
-
options.opt :number, 'Number of items to load (-1 means all)',
|
58
|
-
:short => 'n', :type => :int, :default => -1
|
59
|
-
options.opt :filter,
|
60
|
-
'Filter items by regexp on item attributes: item.attr=regexp',
|
61
|
-
:short => 'f', :type => String, :multi => true
|
62
|
-
end
|
63
|
-
|
64
|
-
def validate
|
65
|
-
super
|
66
|
-
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
67
|
-
filter = options[:filter]
|
68
|
-
case
|
69
|
-
when filter.is_a?(Array)
|
70
|
-
options[:filter].each { |x|
|
71
|
-
Trollop::die "not a valid filter #{x}" unless is_filter_valid?(x)
|
72
|
-
}
|
73
|
-
when filter == []
|
74
|
-
# Noop
|
75
|
-
else
|
76
|
-
Trollop::die "A filter can only be a string"
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def go
|
81
|
-
# Message tags await publisher ack
|
82
|
-
awaiting_ack = SortedSet.new
|
83
|
-
|
84
|
-
# Num events read
|
85
|
-
num_read = 0
|
86
|
-
|
87
|
-
collection = case args[0]
|
88
|
-
when "events"
|
89
|
-
:events
|
90
|
-
when "commits"
|
91
|
-
:commits
|
92
|
-
end
|
93
|
-
|
94
|
-
puts "Loading from collection #{collection}"
|
95
|
-
puts "Loading items after #{Time.at(options[:earliest])}" if options[:verbose]
|
96
|
-
puts "Loading #{options[:number]} items" if options[:verbose] && options[:number] != -1
|
97
|
-
|
98
|
-
what = case
|
99
|
-
when options[:filter].is_a?(Array)
|
100
|
-
options[:filter].reduce({}) { |acc,x|
|
101
|
-
(k,r) = x.split(/=/)
|
102
|
-
acc[k] = Regexp.new(r)
|
103
|
-
acc
|
104
|
-
}
|
105
|
-
when filter == []
|
106
|
-
{}
|
107
|
-
end
|
108
|
-
|
109
|
-
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
110
|
-
|
111
|
-
(puts "Mongo filter:"; pp what.merge(from)) if options[:verbose]
|
112
|
-
|
113
|
-
AMQP.start(:host => config(:amqp_host),
|
114
|
-
:port => config(:amqp_port),
|
115
|
-
:username => config(:amqp_username),
|
116
|
-
:password => config(:amqp_password)) do |connection|
|
117
|
-
|
118
|
-
channel = AMQP::Channel.new(connection)
|
119
|
-
exchange = channel.topic(config(:amqp_exchange),
|
120
|
-
:durable => true, :auto_delete => false)
|
121
|
-
|
122
|
-
# What to do when the user hits Ctrl+c
|
123
|
-
show_stopper = Proc.new {
|
124
|
-
connection.close { EventMachine.stop }
|
125
|
-
}
|
126
|
-
|
127
|
-
# Read next 1000 items and queue them
|
128
|
-
read_and_publish = Proc.new {
|
129
|
-
|
130
|
-
to_read = if options.number == -1
|
131
|
-
1000
|
132
|
-
else
|
133
|
-
if options.number - num_read - 1 <= 0
|
134
|
-
-1
|
135
|
-
else
|
136
|
-
options.number - num_read - 1
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
read = 0
|
141
|
-
col_info[collection][:col].find(what.merge(from),
|
142
|
-
:skip => num_read,
|
143
|
-
:limit => to_read).each do |e|
|
144
|
-
|
145
|
-
payload = read_value(e, col_info[collection][:payload])
|
146
|
-
payload = if payload.class == BSON::OrderedHash
|
147
|
-
payload.delete "_id" # Inserted by MongoDB on event insert
|
148
|
-
payload.to_json
|
149
|
-
end
|
150
|
-
read += 1
|
151
|
-
unq = read_value(e, col_info[collection][:unq])
|
152
|
-
if unq.class != String or unq.nil? then
|
153
|
-
throw Exception("Unique value can only be a String")
|
154
|
-
end
|
155
|
-
|
156
|
-
key = col_info[collection][:routekey] % unq
|
157
|
-
|
158
|
-
exchange.publish payload, :persistent => true, :routing_key => key
|
159
|
-
|
160
|
-
num_read += 1
|
161
|
-
puts("Publish id = #{payload[unq]} (#{num_read} total)") if options.verbose
|
162
|
-
awaiting_ack << num_read
|
163
|
-
end
|
164
|
-
|
165
|
-
# Nothing new in the DB and no msgs waiting ack
|
166
|
-
if (read == 0 and awaiting_ack.size == 0) or to_read == -1
|
167
|
-
puts("Finished reading, exiting")
|
168
|
-
show_stopper.call
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
# Remove acknowledged or failed msg tags from the queue
|
173
|
-
# Trigger more messages to be read when ack msg queue size drops to zero
|
174
|
-
publisher_event = Proc.new { |ack|
|
175
|
-
if ack.multiple then
|
176
|
-
awaiting_ack.delete_if { |x| x <= ack.delivery_tag }
|
177
|
-
else
|
178
|
-
awaiting_ack.delete ack.delivery_tag
|
179
|
-
end
|
180
|
-
|
181
|
-
if awaiting_ack.size == 0
|
182
|
-
puts("ACKS.size= #{awaiting_ack.size}") if options.verbose
|
183
|
-
EventMachine.next_tick do
|
184
|
-
read_and_publish.call
|
185
|
-
end
|
186
|
-
end
|
187
|
-
}
|
188
|
-
|
189
|
-
# Await publisher confirms
|
190
|
-
channel.confirm_select
|
191
|
-
|
192
|
-
# Callback when confirms have arrived
|
193
|
-
channel.on_ack do |ack|
|
194
|
-
puts "ACK: tag=#{ack.delivery_tag}, mul=#{ack.multiple}" if options.verbose
|
195
|
-
publisher_event.call(ack)
|
196
|
-
end
|
197
|
-
|
198
|
-
# Callback when confirms failed.
|
199
|
-
channel.on_nack do |nack|
|
200
|
-
puts "NACK: tag=#{nack.delivery_tag}, mul=#{nack.multiple}" if options.verbose
|
201
|
-
publisher_event.call(nack)
|
202
|
-
end
|
203
|
-
|
204
|
-
# Signal handlers
|
205
|
-
Signal.trap('INT', show_stopper)
|
206
|
-
Signal.trap('TERM', show_stopper)
|
207
|
-
|
208
|
-
# Trigger start processing
|
209
|
-
EventMachine.add_timer(0.1) do
|
210
|
-
read_and_publish.call
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
private
|
216
|
-
|
217
|
-
def is_filter_valid?(filter)
|
218
|
-
(k, r) = filter.split(/=/)
|
219
|
-
return false if r.nil?
|
220
|
-
begin
|
221
|
-
Regexp.new(r)
|
222
|
-
true
|
223
|
-
rescue
|
224
|
-
false
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
4
|
+
require 'ghtorrent'
|
228
5
|
|
229
6
|
GHTLoad.run
|
230
7
|
|
data/bin/ght-mirror-events
CHANGED
@@ -1,151 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'amqp'
|
6
|
-
require 'eventmachine'
|
7
|
-
require 'json'
|
8
|
-
require 'logger'
|
4
|
+
require 'ghtorrent'
|
9
5
|
|
10
|
-
|
11
|
-
require 'ghtorrent/settings'
|
12
|
-
require 'ghtorrent/logging'
|
13
|
-
require 'ghtorrent/persister'
|
14
|
-
require 'ghtorrent/command'
|
15
|
-
|
16
|
-
class GHTMirrorEvents < GHTorrent::Command
|
17
|
-
|
18
|
-
include GHTorrent::Settings
|
19
|
-
include GHTorrent::Logging
|
20
|
-
include GHTorrent::Persister
|
21
|
-
include GHTorrent::APIClient
|
22
|
-
|
23
|
-
def logger
|
24
|
-
@logger
|
25
|
-
end
|
26
|
-
|
27
|
-
def store_count(events)
|
28
|
-
stored = Array.new
|
29
|
-
new = dupl = 0
|
30
|
-
events.each do |e|
|
31
|
-
if @persister.find(:events, {'id' => e['id']}).empty?
|
32
|
-
stored << e
|
33
|
-
new += 1
|
34
|
-
@persister.store(:events, e)
|
35
|
-
info "Added #{e['id']}"
|
36
|
-
else
|
37
|
-
info "Already got #{e['id']}"
|
38
|
-
dupl += 1
|
39
|
-
end
|
40
|
-
end
|
41
|
-
return new, dupl, stored
|
42
|
-
end
|
43
|
-
|
44
|
-
# Retrieve events from Github, store them in the DB
|
45
|
-
def retrieve(exchange)
|
46
|
-
begin
|
47
|
-
new = dupl = 0
|
48
|
-
events = api_request "https://api.github.com/events", false
|
49
|
-
(new, dupl, stored) = store_count events
|
50
|
-
|
51
|
-
# This means that first page cannot contain all new events. Go
|
52
|
-
# up to 10 pages back to find all new events not contained in first page.
|
53
|
-
if dupl == 0
|
54
|
-
events = paged_api_request "https://api.github.com/events", 10
|
55
|
-
(new1, dupl1, stored1) = store_count events
|
56
|
-
stored = stored | stored1
|
57
|
-
new = new + new1
|
58
|
-
new
|
59
|
-
end
|
60
|
-
|
61
|
-
stored.each do |e|
|
62
|
-
msg = JSON.dump(e)
|
63
|
-
key = "evt.%s" % e['type']
|
64
|
-
exchange.publish msg, :persistent => true, :routing_key => key
|
65
|
-
end
|
66
|
-
return new, dupl
|
67
|
-
rescue Exception => e
|
68
|
-
STDERR.puts e.message
|
69
|
-
STDERR.puts e.backtrace
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def go
|
74
|
-
@persister = connect(:mongo, @settings)
|
75
|
-
@logger = Logger.new(STDOUT)
|
76
|
-
|
77
|
-
# Graceful exit
|
78
|
-
Signal.trap('INT') {
|
79
|
-
info "Received SIGINT, exiting"
|
80
|
-
AMQP.stop { EM.stop }
|
81
|
-
}
|
82
|
-
Signal.trap('TERM') {
|
83
|
-
info "Received SIGTERM, exiting"
|
84
|
-
AMQP.stop { EM.stop }
|
85
|
-
}
|
86
|
-
|
87
|
-
# The event loop
|
88
|
-
AMQP.start(:host => config(:amqp_host),
|
89
|
-
:port => config(:amqp_port),
|
90
|
-
:username => config(:amqp_username),
|
91
|
-
:password => config(:amqp_password)) do |connection|
|
92
|
-
|
93
|
-
# Statistics used to recalibrate event delays
|
94
|
-
dupl_msgs = new_msgs = 1
|
95
|
-
|
96
|
-
debug "connected to rabbit"
|
97
|
-
|
98
|
-
channel = AMQP::Channel.new(connection)
|
99
|
-
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
100
|
-
:auto_delete => false)
|
101
|
-
|
102
|
-
# Initial delay for the retrieve event loop
|
103
|
-
retrieval_delay = config(:mirror_pollevery)
|
104
|
-
|
105
|
-
# Retrieve events
|
106
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
107
|
-
(new, dupl) = retrieve exchange
|
108
|
-
dupl_msgs += dupl
|
109
|
-
new_msgs += new
|
110
|
-
end
|
111
|
-
|
112
|
-
# Adjust event retrieval delay time to reduce load to Github
|
113
|
-
EventMachine.add_periodic_timer(120) do
|
114
|
-
ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
|
115
|
-
|
116
|
-
info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
|
117
|
-
|
118
|
-
new_delay = if ratio >= 0 and ratio < 0.3 then
|
119
|
-
-1
|
120
|
-
elsif ratio >= 0.3 and ratio <= 0.5 then
|
121
|
-
0
|
122
|
-
elsif ratio > 0.5 and ratio < 1 then
|
123
|
-
+1
|
124
|
-
end
|
125
|
-
|
126
|
-
# Reset counters for new loop
|
127
|
-
dupl_msgs = new_msgs = 0
|
128
|
-
|
129
|
-
# Update the retrieval delay and restart the event retriever
|
130
|
-
if new_delay != 0
|
131
|
-
|
132
|
-
# Stop the retriever task and adjust retrieval delay
|
133
|
-
retriever.cancel
|
134
|
-
retrieval_delay = retrieval_delay + new_delay
|
135
|
-
info("Setting event retrieval delay to #{retrieval_delay} secs")
|
136
|
-
|
137
|
-
# Restart the retriever
|
138
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
139
|
-
(new, dupl) = retrieve exchange
|
140
|
-
dupl_msgs += dupl
|
141
|
-
new_msgs += new
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
GHTMirrorEvents.run
|
150
|
-
|
151
|
-
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
6
|
+
GHTMirrorEvents.run(ARGV)
|