ghtorrent 0.5 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -1
- data/README.md +6 -1
- data/bin/ght-data-retrieval +2 -162
- data/bin/ght-get-more-commits +6 -0
- data/bin/ght-load +1 -224
- data/bin/ght-mirror-events +2 -147
- data/bin/ght-process-event +35 -0
- data/bin/ght-retrieve-repo +6 -0
- data/bin/ght-rm-dupl +2 -130
- data/lib/ghtorrent.rb +10 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +1 -1
- data/lib/ghtorrent/adapters/mongo_persister.rb +12 -1
- data/lib/ghtorrent/api_client.rb +47 -13
- data/lib/ghtorrent/bson_orderedhash.rb +2 -1
- data/lib/ghtorrent/command.rb +18 -0
- data/lib/ghtorrent/commands/ght_data_retrieval.rb +218 -0
- data/lib/ghtorrent/commands/ght_get_more_commits.rb +116 -0
- data/lib/ghtorrent/commands/ght_load.rb +227 -0
- data/lib/ghtorrent/commands/ght_mirror_events.rb +147 -0
- data/lib/ghtorrent/commands/ght_retrieve_repo.rb +118 -0
- data/lib/ghtorrent/commands/ght_rm_dupl.rb +132 -0
- data/lib/ghtorrent/ghtorrent.rb +401 -89
- data/lib/ghtorrent/hash.rb +1 -1
- data/lib/ghtorrent/migrations/011_add_issues.rb +74 -0
- data/lib/ghtorrent/retriever.rb +88 -16
- data/lib/ghtorrent/settings.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +36 -26
data/CHANGELOG
CHANGED
@@ -1,6 +1,21 @@
|
|
1
|
+
= Version 0.6
|
2
|
+
|
3
|
+
* Support retrieval of issues, issue events and issue history
|
4
|
+
* Support for setting username/password for performing requests
|
5
|
+
* Respect by default Github's x-ratelimit-remaining header
|
6
|
+
* Selective processing of events for user-specified repos
|
7
|
+
* New tool (ght-get-more-commits) to retrieve all commits for a repository
|
8
|
+
* New tool (ght-process-events) to process just one event by id
|
9
|
+
* Retrieve 100 items at once by default on multipage requests
|
10
|
+
* Rename watchers -> stargazers, as per Github API change
|
11
|
+
* Fixes to bugs that permitted efficient processing of multipage requests
|
12
|
+
* Several fixes on how pull requests are being processed
|
13
|
+
* Users with invalid git setups are now allowed
|
14
|
+
* Compatibility with Ruby 1.8 restored
|
15
|
+
|
1
16
|
= Version 0.5
|
2
17
|
|
3
|
-
|
18
|
+
* Generic methods for retrieving items that are bound to repositories
|
4
19
|
* Processing of pull requests with commits, comments and history
|
5
20
|
* Processing of project forks
|
6
21
|
* New tool (ght-load) to filter and load events to the queue
|
data/README.md
CHANGED
@@ -129,9 +129,14 @@ please consider citing the following paper:
|
|
129
129
|
|
130
130
|
> Georgios Gousios and Diomidis Spinellis, "GHTorrent: GitHub’s data from a firehose," in _MSR '12: Proceedings of the 9th Working Conference on Mining Software Repositories_, June 2-–3, 2012. Zurich, Switzerland.
|
131
131
|
|
132
|
+
See also the following presentation:
|
133
|
+
|
134
|
+
<iframe src="http://www.slideshare.net/slideshow/embed_code/13184524?rel=0" width="342" height="291" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" style="border:1px solid #CCC;border-width:1px 1px 0;margin-bottom:5px" allowfullscreen/>
|
135
|
+
<div style="margin-bottom:5px"> <strong> <a href="http://www.slideshare.net/gousiosg/ghtorrent-githubs-data-from-a-firehose-13184524" title="GHTorrent: Github's Data from a Firehose" target="_blank">GHTorrent: Github's Data from a Firehose</a> </strong> </div>
|
136
|
+
|
132
137
|
#### Authors
|
133
138
|
|
134
|
-
Georgios Gousios <gousiosg@gmail.com>
|
139
|
+
[Georgios Gousios](http://istlab.dmst.aueb.gr/~george) <gousiosg@gmail.com>
|
135
140
|
|
136
141
|
[Diomidis Spinellis](http://www.dmst.aueb.gr/dds) <dds@aueb.gr>
|
137
142
|
|
data/bin/ght-data-retrieval
CHANGED
@@ -1,166 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'json'
|
6
|
-
require 'pp'
|
4
|
+
require 'ghtorrent'
|
7
5
|
|
8
|
-
|
9
|
-
require 'ghtorrent/settings'
|
10
|
-
require 'ghtorrent/logging'
|
11
|
-
require 'ghtorrent/command'
|
12
|
-
|
13
|
-
class GHTDataRetrieval < GHTorrent::Command
|
14
|
-
|
15
|
-
include GHTorrent::Settings
|
16
|
-
include GHTorrent::Logging
|
17
|
-
|
18
|
-
def parse(msg)
|
19
|
-
JSON.parse(msg)
|
20
|
-
end
|
21
|
-
|
22
|
-
def PushEvent(data)
|
23
|
-
data['payload']['commits'].each do |c|
|
24
|
-
url = c['url'].split(/\//)
|
25
|
-
|
26
|
-
@gh.get_commit url[4], url[5], url[7]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def WatchEvent(data)
|
31
|
-
owner = data['repo']['name'].split(/\//)[0]
|
32
|
-
repo = data['repo']['name'].split(/\//)[1]
|
33
|
-
watcher = data['actor']['login']
|
34
|
-
created_at = data['created_at']
|
35
|
-
|
36
|
-
@gh.get_watcher owner, repo, watcher, created_at
|
37
|
-
end
|
38
|
-
|
39
|
-
def FollowEvent(data)
|
40
|
-
follower = data['actor']['login']
|
41
|
-
followed = data['payload']['target']['login']
|
42
|
-
created_at = data['created_at']
|
43
|
-
|
44
|
-
@gh.get_follower(follower, followed, created_at)
|
45
|
-
end
|
46
|
-
|
47
|
-
def MemberEvent(data)
|
48
|
-
owner = data['actor']['login']
|
49
|
-
repo = data['repo']['name'].split(/\//)[1]
|
50
|
-
new_member = data['payload']['member']['login']
|
51
|
-
created_at = data['created_at']
|
52
|
-
|
53
|
-
@gh.get_project_member(owner, repo, new_member, created_at)
|
54
|
-
end
|
55
|
-
|
56
|
-
def CommitCommentEvent(data)
|
57
|
-
user = data['actor']['login']
|
58
|
-
repo = data['repo']['name'].split(/\//)[1]
|
59
|
-
id = data['payload']['comment']['id']
|
60
|
-
created_at = data['created_at']
|
61
|
-
|
62
|
-
@gh.get_commit_comment(user, repo, id, created_at)
|
63
|
-
end
|
64
|
-
|
65
|
-
def PullRequestEvent(data)
|
66
|
-
owner = data['payload']['pull_request']['base']['repo']['owner']['login']
|
67
|
-
repo = data['payload']['pull_request']['base']['repo']['name']
|
68
|
-
pullreq_id = data['payload']['number']
|
69
|
-
action = data['payload']['action']
|
70
|
-
created_at = data['created_at']
|
71
|
-
|
72
|
-
@gh.get_pull_request(owner, repo, pullreq_id, action, created_at)
|
73
|
-
end
|
74
|
-
|
75
|
-
def ForkEvent(data)
|
76
|
-
owner = data['repo']['name'].split(/\//)[0]
|
77
|
-
repo = data['repo']['name'].split(/\//)[1]
|
78
|
-
fork_id = data['payload']['forkee']['id']
|
79
|
-
created_at = data['created_at']
|
80
|
-
|
81
|
-
@gh.get_fork(owner, repo, fork_id, created_at)
|
82
|
-
end
|
83
|
-
|
84
|
-
def PullRequestReviewCommentEvent(data)
|
85
|
-
owner = data['repo']['name'].split(/\//)[0]
|
86
|
-
repo = data['repo']['name'].split(/\//)[1]
|
87
|
-
comment_id = data['payload']['comment']['id']
|
88
|
-
pullreq_id = data['payload']['comment']['_links']['pull_request']['href'].split(/\//)[-1]
|
89
|
-
created_at = data['created_at']
|
90
|
-
|
91
|
-
@gh.get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
92
|
-
end
|
93
|
-
|
94
|
-
def IssueCommentEvent(data)
|
95
|
-
owner = data['repo']['name'].split(/\//)[0]
|
96
|
-
repo = data['repo']['name'].split(/\//)[1]
|
97
|
-
pullreq_id = data['payload']['forkee']['id']
|
98
|
-
created_at = data['created_at']
|
99
|
-
|
100
|
-
@gh.get_issue_comment(owner, repo, issue_id, comment_id, created_at)
|
101
|
-
end
|
102
|
-
|
103
|
-
def handlers
|
104
|
-
%w(PushEvent WatchEvent FollowEvent MemberEvent CommitCommentEvent PullRequestEvent ForkEvent PullRequestReviewCommentEvent)
|
105
|
-
#%w(PullRequestReviewCommentEvent)
|
106
|
-
end
|
107
|
-
|
108
|
-
def logger
|
109
|
-
@gh.logger
|
110
|
-
end
|
111
|
-
|
112
|
-
def go
|
113
|
-
@gh = GHTorrent::Mirror.new(@settings)
|
114
|
-
|
115
|
-
# Graceful exit
|
116
|
-
Signal.trap('INT') {
|
117
|
-
info "GHTDataRetrieval: Received SIGINT, exiting"
|
118
|
-
AMQP.stop { EM.stop }
|
119
|
-
}
|
120
|
-
Signal.trap('TERM') {
|
121
|
-
info "GHTDataRetrieval: Received SIGTERM, exiting"
|
122
|
-
AMQP.stop { EM.stop }
|
123
|
-
}
|
124
|
-
|
125
|
-
AMQP.start(:host => config(:amqp_host),
|
126
|
-
:port => config(:amqp_port),
|
127
|
-
:username => config(:amqp_username),
|
128
|
-
:password => config(:amqp_password)) do |connection|
|
129
|
-
|
130
|
-
channel = AMQP::Channel.new(connection, :prefetch => config(:amqp_prefetch))
|
131
|
-
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
132
|
-
:auto_delete => false)
|
133
|
-
|
134
|
-
handlers.each { |h|
|
135
|
-
queue = channel.queue("#{h}s", {:durable => true})\
|
136
|
-
.bind(exchange, :routing_key => "evt.#{h}")
|
137
|
-
|
138
|
-
info "GHTDataRetrieval: Binding handler #{h} to routing key evt.#{h}"
|
139
|
-
|
140
|
-
queue.subscribe(:ack => true) do |headers, msg|
|
141
|
-
begin
|
142
|
-
data = parse(msg)
|
143
|
-
info "GHTDataRetrieval: Processing event: #{data['type']}-#{data['id']}"
|
144
|
-
send(h, data)
|
145
|
-
headers.ack
|
146
|
-
info "GHTDataRetrieval: Processed event: #{data['type']}-#{data['id']}"
|
147
|
-
rescue Exception => e
|
148
|
-
# Give a message a chance to be reprocessed
|
149
|
-
if headers.redelivered?
|
150
|
-
data = parse(msg)
|
151
|
-
warn "GHTDataRetrieval: Could not process event: #{data['type']}-#{data['id']}"
|
152
|
-
headers.reject(:requeue => false)
|
153
|
-
else
|
154
|
-
headers.reject(:requeue => true)
|
155
|
-
end
|
156
|
-
|
157
|
-
STDERR.puts e
|
158
|
-
STDERR.puts e.backtrace.join("\n")
|
159
|
-
end
|
160
|
-
end
|
161
|
-
}
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
GHTDataRetrieval.run
|
6
|
+
GHTDataRetrieval.run(ARGV)
|
data/bin/ght-load
CHANGED
@@ -1,230 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'amqp'
|
6
|
-
require 'set'
|
7
|
-
require 'eventmachine'
|
8
|
-
require 'pp'
|
9
|
-
require "amqp/extensions/rabbitmq"
|
10
|
-
|
11
|
-
require 'ghtorrent/settings'
|
12
|
-
require 'ghtorrent/logging'
|
13
|
-
require 'ghtorrent/persister'
|
14
|
-
require 'ghtorrent/command'
|
15
|
-
require 'ghtorrent/bson_orderedhash'
|
16
|
-
|
17
|
-
class GHTLoad < GHTorrent::Command
|
18
|
-
|
19
|
-
include GHTorrent::Settings
|
20
|
-
include GHTorrent::Persister
|
21
|
-
|
22
|
-
def col_info()
|
23
|
-
{
|
24
|
-
:commits => {
|
25
|
-
:name => "commits",
|
26
|
-
:payload => "commit.id",
|
27
|
-
:unq => "commit.id",
|
28
|
-
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
29
|
-
:routekey => "commit.%s"
|
30
|
-
},
|
31
|
-
:events => {
|
32
|
-
:name => "events",
|
33
|
-
:payload => "",
|
34
|
-
:unq => "type",
|
35
|
-
:col => persister.get_underlying_connection.collection(:events.to_s),
|
36
|
-
:routekey => "evt.%s"
|
37
|
-
}
|
38
|
-
}
|
39
|
-
end
|
40
|
-
|
41
|
-
def persister
|
42
|
-
@persister ||= connect(:mongo, @settings)
|
43
|
-
@persister
|
44
|
-
end
|
45
|
-
|
46
|
-
def prepare_options(options)
|
47
|
-
options.banner <<-BANNER
|
48
|
-
Loads object ids from a collection to a queue for further processing.
|
49
|
-
|
50
|
-
#{command_name} [options] collection
|
51
|
-
|
52
|
-
#{command_name} options:
|
53
|
-
BANNER
|
54
|
-
|
55
|
-
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
56
|
-
:short => 'e', :default => 0, :type => :int
|
57
|
-
options.opt :number, 'Number of items to load (-1 means all)',
|
58
|
-
:short => 'n', :type => :int, :default => -1
|
59
|
-
options.opt :filter,
|
60
|
-
'Filter items by regexp on item attributes: item.attr=regexp',
|
61
|
-
:short => 'f', :type => String, :multi => true
|
62
|
-
end
|
63
|
-
|
64
|
-
def validate
|
65
|
-
super
|
66
|
-
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
67
|
-
filter = options[:filter]
|
68
|
-
case
|
69
|
-
when filter.is_a?(Array)
|
70
|
-
options[:filter].each { |x|
|
71
|
-
Trollop::die "not a valid filter #{x}" unless is_filter_valid?(x)
|
72
|
-
}
|
73
|
-
when filter == []
|
74
|
-
# Noop
|
75
|
-
else
|
76
|
-
Trollop::die "A filter can only be a string"
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def go
|
81
|
-
# Message tags await publisher ack
|
82
|
-
awaiting_ack = SortedSet.new
|
83
|
-
|
84
|
-
# Num events read
|
85
|
-
num_read = 0
|
86
|
-
|
87
|
-
collection = case args[0]
|
88
|
-
when "events"
|
89
|
-
:events
|
90
|
-
when "commits"
|
91
|
-
:commits
|
92
|
-
end
|
93
|
-
|
94
|
-
puts "Loading from collection #{collection}"
|
95
|
-
puts "Loading items after #{Time.at(options[:earliest])}" if options[:verbose]
|
96
|
-
puts "Loading #{options[:number]} items" if options[:verbose] && options[:number] != -1
|
97
|
-
|
98
|
-
what = case
|
99
|
-
when options[:filter].is_a?(Array)
|
100
|
-
options[:filter].reduce({}) { |acc,x|
|
101
|
-
(k,r) = x.split(/=/)
|
102
|
-
acc[k] = Regexp.new(r)
|
103
|
-
acc
|
104
|
-
}
|
105
|
-
when filter == []
|
106
|
-
{}
|
107
|
-
end
|
108
|
-
|
109
|
-
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
110
|
-
|
111
|
-
(puts "Mongo filter:"; pp what.merge(from)) if options[:verbose]
|
112
|
-
|
113
|
-
AMQP.start(:host => config(:amqp_host),
|
114
|
-
:port => config(:amqp_port),
|
115
|
-
:username => config(:amqp_username),
|
116
|
-
:password => config(:amqp_password)) do |connection|
|
117
|
-
|
118
|
-
channel = AMQP::Channel.new(connection)
|
119
|
-
exchange = channel.topic(config(:amqp_exchange),
|
120
|
-
:durable => true, :auto_delete => false)
|
121
|
-
|
122
|
-
# What to do when the user hits Ctrl+c
|
123
|
-
show_stopper = Proc.new {
|
124
|
-
connection.close { EventMachine.stop }
|
125
|
-
}
|
126
|
-
|
127
|
-
# Read next 1000 items and queue them
|
128
|
-
read_and_publish = Proc.new {
|
129
|
-
|
130
|
-
to_read = if options.number == -1
|
131
|
-
1000
|
132
|
-
else
|
133
|
-
if options.number - num_read - 1 <= 0
|
134
|
-
-1
|
135
|
-
else
|
136
|
-
options.number - num_read - 1
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
read = 0
|
141
|
-
col_info[collection][:col].find(what.merge(from),
|
142
|
-
:skip => num_read,
|
143
|
-
:limit => to_read).each do |e|
|
144
|
-
|
145
|
-
payload = read_value(e, col_info[collection][:payload])
|
146
|
-
payload = if payload.class == BSON::OrderedHash
|
147
|
-
payload.delete "_id" # Inserted by MongoDB on event insert
|
148
|
-
payload.to_json
|
149
|
-
end
|
150
|
-
read += 1
|
151
|
-
unq = read_value(e, col_info[collection][:unq])
|
152
|
-
if unq.class != String or unq.nil? then
|
153
|
-
throw Exception("Unique value can only be a String")
|
154
|
-
end
|
155
|
-
|
156
|
-
key = col_info[collection][:routekey] % unq
|
157
|
-
|
158
|
-
exchange.publish payload, :persistent => true, :routing_key => key
|
159
|
-
|
160
|
-
num_read += 1
|
161
|
-
puts("Publish id = #{payload[unq]} (#{num_read} total)") if options.verbose
|
162
|
-
awaiting_ack << num_read
|
163
|
-
end
|
164
|
-
|
165
|
-
# Nothing new in the DB and no msgs waiting ack
|
166
|
-
if (read == 0 and awaiting_ack.size == 0) or to_read == -1
|
167
|
-
puts("Finished reading, exiting")
|
168
|
-
show_stopper.call
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
# Remove acknowledged or failed msg tags from the queue
|
173
|
-
# Trigger more messages to be read when ack msg queue size drops to zero
|
174
|
-
publisher_event = Proc.new { |ack|
|
175
|
-
if ack.multiple then
|
176
|
-
awaiting_ack.delete_if { |x| x <= ack.delivery_tag }
|
177
|
-
else
|
178
|
-
awaiting_ack.delete ack.delivery_tag
|
179
|
-
end
|
180
|
-
|
181
|
-
if awaiting_ack.size == 0
|
182
|
-
puts("ACKS.size= #{awaiting_ack.size}") if options.verbose
|
183
|
-
EventMachine.next_tick do
|
184
|
-
read_and_publish.call
|
185
|
-
end
|
186
|
-
end
|
187
|
-
}
|
188
|
-
|
189
|
-
# Await publisher confirms
|
190
|
-
channel.confirm_select
|
191
|
-
|
192
|
-
# Callback when confirms have arrived
|
193
|
-
channel.on_ack do |ack|
|
194
|
-
puts "ACK: tag=#{ack.delivery_tag}, mul=#{ack.multiple}" if options.verbose
|
195
|
-
publisher_event.call(ack)
|
196
|
-
end
|
197
|
-
|
198
|
-
# Callback when confirms failed.
|
199
|
-
channel.on_nack do |nack|
|
200
|
-
puts "NACK: tag=#{nack.delivery_tag}, mul=#{nack.multiple}" if options.verbose
|
201
|
-
publisher_event.call(nack)
|
202
|
-
end
|
203
|
-
|
204
|
-
# Signal handlers
|
205
|
-
Signal.trap('INT', show_stopper)
|
206
|
-
Signal.trap('TERM', show_stopper)
|
207
|
-
|
208
|
-
# Trigger start processing
|
209
|
-
EventMachine.add_timer(0.1) do
|
210
|
-
read_and_publish.call
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
private
|
216
|
-
|
217
|
-
def is_filter_valid?(filter)
|
218
|
-
(k, r) = filter.split(/=/)
|
219
|
-
return false if r.nil?
|
220
|
-
begin
|
221
|
-
Regexp.new(r)
|
222
|
-
true
|
223
|
-
rescue
|
224
|
-
false
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
4
|
+
require 'ghtorrent'
|
228
5
|
|
229
6
|
GHTLoad.run
|
230
7
|
|
data/bin/ght-mirror-events
CHANGED
@@ -1,151 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
require 'amqp'
|
6
|
-
require 'eventmachine'
|
7
|
-
require 'json'
|
8
|
-
require 'logger'
|
4
|
+
require 'ghtorrent'
|
9
5
|
|
10
|
-
|
11
|
-
require 'ghtorrent/settings'
|
12
|
-
require 'ghtorrent/logging'
|
13
|
-
require 'ghtorrent/persister'
|
14
|
-
require 'ghtorrent/command'
|
15
|
-
|
16
|
-
class GHTMirrorEvents < GHTorrent::Command
|
17
|
-
|
18
|
-
include GHTorrent::Settings
|
19
|
-
include GHTorrent::Logging
|
20
|
-
include GHTorrent::Persister
|
21
|
-
include GHTorrent::APIClient
|
22
|
-
|
23
|
-
def logger
|
24
|
-
@logger
|
25
|
-
end
|
26
|
-
|
27
|
-
def store_count(events)
|
28
|
-
stored = Array.new
|
29
|
-
new = dupl = 0
|
30
|
-
events.each do |e|
|
31
|
-
if @persister.find(:events, {'id' => e['id']}).empty?
|
32
|
-
stored << e
|
33
|
-
new += 1
|
34
|
-
@persister.store(:events, e)
|
35
|
-
info "Added #{e['id']}"
|
36
|
-
else
|
37
|
-
info "Already got #{e['id']}"
|
38
|
-
dupl += 1
|
39
|
-
end
|
40
|
-
end
|
41
|
-
return new, dupl, stored
|
42
|
-
end
|
43
|
-
|
44
|
-
# Retrieve events from Github, store them in the DB
|
45
|
-
def retrieve(exchange)
|
46
|
-
begin
|
47
|
-
new = dupl = 0
|
48
|
-
events = api_request "https://api.github.com/events", false
|
49
|
-
(new, dupl, stored) = store_count events
|
50
|
-
|
51
|
-
# This means that first page cannot contain all new events. Go
|
52
|
-
# up to 10 pages back to find all new events not contained in first page.
|
53
|
-
if dupl == 0
|
54
|
-
events = paged_api_request "https://api.github.com/events", 10
|
55
|
-
(new1, dupl1, stored1) = store_count events
|
56
|
-
stored = stored | stored1
|
57
|
-
new = new + new1
|
58
|
-
new
|
59
|
-
end
|
60
|
-
|
61
|
-
stored.each do |e|
|
62
|
-
msg = JSON.dump(e)
|
63
|
-
key = "evt.%s" % e['type']
|
64
|
-
exchange.publish msg, :persistent => true, :routing_key => key
|
65
|
-
end
|
66
|
-
return new, dupl
|
67
|
-
rescue Exception => e
|
68
|
-
STDERR.puts e.message
|
69
|
-
STDERR.puts e.backtrace
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def go
|
74
|
-
@persister = connect(:mongo, @settings)
|
75
|
-
@logger = Logger.new(STDOUT)
|
76
|
-
|
77
|
-
# Graceful exit
|
78
|
-
Signal.trap('INT') {
|
79
|
-
info "Received SIGINT, exiting"
|
80
|
-
AMQP.stop { EM.stop }
|
81
|
-
}
|
82
|
-
Signal.trap('TERM') {
|
83
|
-
info "Received SIGTERM, exiting"
|
84
|
-
AMQP.stop { EM.stop }
|
85
|
-
}
|
86
|
-
|
87
|
-
# The event loop
|
88
|
-
AMQP.start(:host => config(:amqp_host),
|
89
|
-
:port => config(:amqp_port),
|
90
|
-
:username => config(:amqp_username),
|
91
|
-
:password => config(:amqp_password)) do |connection|
|
92
|
-
|
93
|
-
# Statistics used to recalibrate event delays
|
94
|
-
dupl_msgs = new_msgs = 1
|
95
|
-
|
96
|
-
debug "connected to rabbit"
|
97
|
-
|
98
|
-
channel = AMQP::Channel.new(connection)
|
99
|
-
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
100
|
-
:auto_delete => false)
|
101
|
-
|
102
|
-
# Initial delay for the retrieve event loop
|
103
|
-
retrieval_delay = config(:mirror_pollevery)
|
104
|
-
|
105
|
-
# Retrieve events
|
106
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
107
|
-
(new, dupl) = retrieve exchange
|
108
|
-
dupl_msgs += dupl
|
109
|
-
new_msgs += new
|
110
|
-
end
|
111
|
-
|
112
|
-
# Adjust event retrieval delay time to reduce load to Github
|
113
|
-
EventMachine.add_periodic_timer(120) do
|
114
|
-
ratio = (dupl_msgs.to_f / (dupl_msgs + new_msgs).to_f)
|
115
|
-
|
116
|
-
info("Stats: #{new_msgs} new, #{dupl_msgs} duplicate, ratio: #{ratio}")
|
117
|
-
|
118
|
-
new_delay = if ratio >= 0 and ratio < 0.3 then
|
119
|
-
-1
|
120
|
-
elsif ratio >= 0.3 and ratio <= 0.5 then
|
121
|
-
0
|
122
|
-
elsif ratio > 0.5 and ratio < 1 then
|
123
|
-
+1
|
124
|
-
end
|
125
|
-
|
126
|
-
# Reset counters for new loop
|
127
|
-
dupl_msgs = new_msgs = 0
|
128
|
-
|
129
|
-
# Update the retrieval delay and restart the event retriever
|
130
|
-
if new_delay != 0
|
131
|
-
|
132
|
-
# Stop the retriever task and adjust retrieval delay
|
133
|
-
retriever.cancel
|
134
|
-
retrieval_delay = retrieval_delay + new_delay
|
135
|
-
info("Setting event retrieval delay to #{retrieval_delay} secs")
|
136
|
-
|
137
|
-
# Restart the retriever
|
138
|
-
retriever = EventMachine.add_periodic_timer(retrieval_delay) do
|
139
|
-
(new, dupl) = retrieve exchange
|
140
|
-
dupl_msgs += dupl
|
141
|
-
new_msgs += new
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
GHTMirrorEvents.run
|
150
|
-
|
151
|
-
# vim: set sta sts=2 shiftwidth=2 sw=2 et ai :
|
6
|
+
GHTMirrorEvents.run(ARGV)
|