ghtorrent 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +132 -0
- data/Rakefile +20 -0
- data/bin/ght-data-retrieval +119 -0
- data/bin/ght-load +242 -0
- data/bin/ght-mirror-events +154 -0
- data/bin/ght-periodic-dump +92 -0
- data/bin/ght-rm-dupl +124 -0
- data/bin/ght-torrent-index +180 -0
- data/lib/ghtorrent.rb +22 -0
- data/lib/ghtorrent/adapters/base_adapter.rb +91 -0
- data/lib/ghtorrent/adapters/mongo_persister.rb +126 -0
- data/lib/ghtorrent/adapters/noop_persister.rb +58 -0
- data/lib/ghtorrent/api_client.rb +106 -0
- data/lib/ghtorrent/call_stack.rb +119 -0
- data/lib/ghtorrent/command.rb +136 -0
- data/lib/ghtorrent/ghtorrent.rb +396 -0
- data/lib/ghtorrent/logging.rb +69 -0
- data/lib/ghtorrent/migrations/001_init_schema.rb +60 -0
- data/lib/ghtorrent/migrations/002_add_followers_created_at.rb +15 -0
- data/lib/ghtorrent/migrations/003_add_external_ref_ids.rb +40 -0
- data/lib/ghtorrent/persister.rb +48 -0
- data/lib/ghtorrent/retriever.rb +148 -0
- data/lib/ghtorrent/settings.rb +63 -0
- data/lib/ghtorrent/utils.rb +58 -0
- data/test/callstack_test.rb +67 -0
- metadata +181 -0
data/README.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
github-mirror: Mirror and process the Github event steam
|
2
|
+
=========================================================
|
3
|
+
|
4
|
+
A collection of scripts used to mirror the Github event stream, for
|
5
|
+
research purposes. The scripts are distributed as a Gem (`ghtorrent`),
|
6
|
+
but they can also be run by checking out this repository.
|
7
|
+
|
8
|
+
GHTorrent relies on the following software to work:
|
9
|
+
|
10
|
+
* MongoDB > 2.0
|
11
|
+
* RabbitMQ >= 2.7
|
12
|
+
* An SQL database compatible with [Sequel](http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html). GHTorrent is tested with SQLite and MySQL,
|
13
|
+
so your mileage may vary if you are using other databases.
|
14
|
+
|
15
|
+
GHTorrent is written in Ruby (tested with 1.8 and JRuby). To install
|
16
|
+
it as a Gem do:
|
17
|
+
|
18
|
+
<code>
|
19
|
+
sudo gem install ghtorrent
|
20
|
+
</code>
|
21
|
+
|
22
|
+
#### Configuring
|
23
|
+
|
24
|
+
Copy the contents of the
|
25
|
+
[config.yaml.tmpl](https://github.com/gousiosg/github-mirror/blob/master/config.yaml.tmpl)
|
26
|
+
file to a file in your home directory. All provided scripts accept the `-c`
|
27
|
+
option, which you can use to pass the location of the configuration file as
|
28
|
+
a parameter.
|
29
|
+
|
30
|
+
Edit the MongoDB and AMQP
|
31
|
+
configuration options accordingly. The scripts require accounts with permissions
|
32
|
+
to create queues and exchanges in the AMQP queue, collections
|
33
|
+
in MongoDB and tables in the selected SQL database, respectively.
|
34
|
+
|
35
|
+
To prepare MongoDB:
|
36
|
+
|
37
|
+
<pre>
|
38
|
+
$ mongo admin
|
39
|
+
> db.addUser('github', 'github')
|
40
|
+
> use github
|
41
|
+
> db.addUser('github', 'github')
|
42
|
+
</pre>
|
43
|
+
|
44
|
+
To prepare RabbitMQ:
|
45
|
+
|
46
|
+
<pre>
|
47
|
+
$ rabbitmqctl add_user github
|
48
|
+
$ rabbitmqctl set_permissions -p / github ".*" ".*" ".*"
|
49
|
+
|
50
|
+
# The following will enable the RabbitMQ web admin for the github user
|
51
|
+
# Not necessary to have, but good to debug and diagnose problems
|
52
|
+
$ rabbitmq-plugins enable rabbitmq_management
|
53
|
+
$ rabbitmqctl set_user_tags github administrator
|
54
|
+
</pre>
|
55
|
+
|
56
|
+
To prepare MySQL:
|
57
|
+
|
58
|
+
<pre>
|
59
|
+
$ mysql -u root -p
|
60
|
+
mysql> create user 'github'@'localhost' identified by 'github';
|
61
|
+
mysql> create database github;
|
62
|
+
mysql> GRANT ALL PRIVILEGES ON github.* to github@'localhost';
|
63
|
+
mysql> flush privileges;
|
64
|
+
</pre>
|
65
|
+
|
66
|
+
You can find more information of how you can setup a cluster of machines
|
67
|
+
to retrieve data in parallel on the [Wiki](https://github.com/gousiosg/github-mirror/wiki/Setting-up-a-mirroring-cluster).
|
68
|
+
|
69
|
+
### Running
|
70
|
+
|
71
|
+
To retrieve data with GHTorrent
|
72
|
+
|
73
|
+
* `ght-mirror-events.rb` periodically polls Github's event
|
74
|
+
queue (`https://api.github.com/events`), stores all new events in the `events`
|
75
|
+
collection in MongoDB and posts them to the `github` exchange in RabbitMQ.
|
76
|
+
|
77
|
+
* `ght-data_retrieval.rb` creates queues that route posted events to processor
|
78
|
+
functions, which in turn use the appropriate Github API call to retrieve the
|
79
|
+
linked contents, extract metadata to store in the SQL database and store the
|
80
|
+
retrieved data in the appropriate collection in Mongo, to avoid further
|
81
|
+
API calls. Data in the SQL database contain pointers (the MongoDB key)
|
82
|
+
to the "raw" data in MongoDB.
|
83
|
+
|
84
|
+
Both scripts can be run concurrently on more than one hosts, for resilience and
|
85
|
+
performance reasons. To catch up with Github's event stream, it is enough to
|
86
|
+
run `mirror_events.rb` on one host. To collect all data pointed by each event,
|
87
|
+
one instance of `data_retrieval.rb` is not enough. Both scripts employ
|
88
|
+
throttling mechanisms to keep API usage whithin the limits imposed by Github
|
89
|
+
(currently 5000 reqs/hr).
|
90
|
+
|
91
|
+
#### Data
|
92
|
+
|
93
|
+
You can find torrents for retrieving data on the
|
94
|
+
[Available Torrents](https://github.com/gousiosg/github-mirror/wiki/Available-Torrents) page. You need two sets of data:
|
95
|
+
|
96
|
+
* Raw events: Github's [event stream](https://api.github.com/events). These
|
97
|
+
are the roots for mirroring operations. The `ght-data-retrieval` crawler starts
|
98
|
+
from an event and goes deep into the rabbit hole.
|
99
|
+
* SQL dumps+Linked data: Data dumps from the SQL database and the corresponding
|
100
|
+
MongoDB entities.
|
101
|
+
|
102
|
+
|
103
|
+
*At the moment, GHTorrent is in the process of redesigning its data storage
|
104
|
+
schema. Consequently, it does not distribute SQL dumps or linked data raw data.
|
105
|
+
The distribution service will come back shortly.*
|
106
|
+
|
107
|
+
#### Reporting bugs
|
108
|
+
|
109
|
+
Please use the [Issue
|
110
|
+
Tracker](https://github.com/gousiosg/github-mirror/issues) for reporting bugs
|
111
|
+
and feature requests.
|
112
|
+
|
113
|
+
Patches, bug fixes etc are welcome. Please fork the repository and create
|
114
|
+
a pull request when done fixing/implementing the new feature.
|
115
|
+
|
116
|
+
#### Citation information
|
117
|
+
|
118
|
+
If you find GHTorrent and the accompanying datasets useful in your research,
|
119
|
+
please consider citing the following paper:
|
120
|
+
|
121
|
+
> Georgios Gousios and Diomidis Spinellis, "GHTorrent: GitHub’s data from a firehose," in _MSR '12: Proceedings of the 9th Working Conference on Mining Software Repositories_, June 2-–3, 2012. Zurich, Switzerland.
|
122
|
+
|
123
|
+
#### Authors
|
124
|
+
|
125
|
+
Georgios Gousios <gousiosg@gmail.com>
|
126
|
+
|
127
|
+
Diomidis Spinellis
|
128
|
+
|
129
|
+
#### License
|
130
|
+
|
131
|
+
[2-clause BSD](http://www.opensource.org/licenses/bsd-license.php)
|
132
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
task :default => [:test, :rdoc]
|
6
|
+
|
7
|
+
desc "Run basic tests"
|
8
|
+
Rake::TestTask.new(:test) do |t|
|
9
|
+
t.pattern = 'test/*_test.rb'
|
10
|
+
t.verbose = true
|
11
|
+
t.warning = true
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Run Rdoc"
|
15
|
+
Rake::RDocTask.new(:rdoc) do |rd|
|
16
|
+
# rd.main = "README.doc"
|
17
|
+
rd.rdoc_files.include("lib/**/*.rb")
|
18
|
+
rd.options << "-d"
|
19
|
+
rd.options << "-x migrations"
|
20
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or
|
6
|
+
# without modification, are permitted provided that the following
|
7
|
+
# conditions are met:
|
8
|
+
#
|
9
|
+
# 1. Redistributions of source code must retain the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer.
|
12
|
+
#
|
13
|
+
# 2. Redistributions in binary form must reproduce the above
|
14
|
+
# copyright notice, this list of conditions and the following
|
15
|
+
# disclaimer in the documentation and/or other materials
|
16
|
+
# provided with the distribution.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
20
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
21
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
22
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
25
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
26
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
27
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
28
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
29
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
|
31
|
+
require 'rubygems'
|
32
|
+
require 'amqp'
|
33
|
+
require 'json'
|
34
|
+
require 'ghtorrent'
|
35
|
+
require 'pp'
|
36
|
+
|
37
|
+
class GHTDataRetrieval < GHTorrent::Command
|
38
|
+
|
39
|
+
include GHTorrent::Settings
|
40
|
+
|
41
|
+
attr_reader :settings
|
42
|
+
|
43
|
+
def parse(msg)
|
44
|
+
JSON.parse(msg)
|
45
|
+
end
|
46
|
+
|
47
|
+
def PushEvent(evt)
|
48
|
+
data = parse evt
|
49
|
+
data['payload']['commits'].each do |c|
|
50
|
+
url = c['url'].split(/\//)
|
51
|
+
@gh.get_commit url[4], url[5], url[7]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def WatchEvent(evt)
|
56
|
+
data = parse evt
|
57
|
+
user = data['actor']['login']
|
58
|
+
#@gh.get_watched user, evt
|
59
|
+
end
|
60
|
+
|
61
|
+
def FollowEvent(evt)
|
62
|
+
data = parse evt
|
63
|
+
user = data['actor']['login']
|
64
|
+
#@gh.get_followed user
|
65
|
+
|
66
|
+
followed = data['payload']['target']['login']
|
67
|
+
#@gh.get_followers followed
|
68
|
+
end
|
69
|
+
|
70
|
+
def handlers
|
71
|
+
%w(PushEvent WatchEvent FollowEvent)
|
72
|
+
end
|
73
|
+
|
74
|
+
def go
|
75
|
+
@gh = GHTorrent::Mirror.new(options[:config])
|
76
|
+
@settings = @gh.settings
|
77
|
+
|
78
|
+
# Graceful exit
|
79
|
+
Signal.trap('INT') { AMQP.stop { EM.stop } }
|
80
|
+
Signal.trap('TERM') { AMQP.stop { EM.stop } }
|
81
|
+
|
82
|
+
AMQP.start(:host => config(:amqp_host),
|
83
|
+
:port => config(:amqp_port),
|
84
|
+
:username => config(:amqp_username),
|
85
|
+
:password => config(:amqp_password)) do |connection|
|
86
|
+
|
87
|
+
channel = AMQP::Channel.new(connection, :prefetch => 5)
|
88
|
+
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
89
|
+
:auto_delete => false)
|
90
|
+
|
91
|
+
handlers.each { |h|
|
92
|
+
queue = channel.queue("#{h}s", {:durable => true})\
|
93
|
+
.bind(exchange, :routing_key => "evt.#{h}")
|
94
|
+
|
95
|
+
puts "Binding handler #{h} to routing key evt.#{h}"
|
96
|
+
|
97
|
+
queue.subscribe(:ack => true) do |headers, msg|
|
98
|
+
begin
|
99
|
+
send(h, msg)
|
100
|
+
headers.ack
|
101
|
+
rescue Exception => e
|
102
|
+
# Give a message a chance to be reprocessed
|
103
|
+
if headers.redelivered?
|
104
|
+
headers.reject(:requeue => false)
|
105
|
+
else
|
106
|
+
headers.reject(:requeue => true)
|
107
|
+
end
|
108
|
+
|
109
|
+
#pp JSON.parse(msg)
|
110
|
+
STDERR.puts e
|
111
|
+
STDERR.puts e.backtrace.join("\n")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
GHTDataRetrieval.run
|
data/bin/ght-load
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Loads items from Mongo to the queue for further processing
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# Copyright 2012 Georgios Gousios <gousiosg@gmail.com>
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or
|
9
|
+
# without modification, are permitted provided that the following
|
10
|
+
# conditions are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above
|
13
|
+
# copyright notice, this list of conditions and the following
|
14
|
+
# disclaimer.
|
15
|
+
#
|
16
|
+
# 2. Redistributions in binary form must reproduce the above
|
17
|
+
# copyright notice, this list of conditions and the following
|
18
|
+
# disclaimer in the documentation and/or other materials
|
19
|
+
# provided with the distribution.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22
|
+
# AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
23
|
+
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
24
|
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
|
25
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
26
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
27
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
28
|
+
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
29
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
30
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
31
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
32
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
|
34
|
+
require 'rubygems'
|
35
|
+
require 'ghtorrent-old'
|
36
|
+
require 'mongo'
|
37
|
+
require 'amqp'
|
38
|
+
require 'set'
|
39
|
+
require 'eventmachine'
|
40
|
+
require 'optparse'
|
41
|
+
require 'ostruct'
|
42
|
+
require 'pp'
|
43
|
+
require "amqp/extensions/rabbitmq"
|
44
|
+
|
45
|
+
class GHTLoad < GHTorrent::Command
|
46
|
+
|
47
|
+
def col_info()
|
48
|
+
{
|
49
|
+
:commits => {
|
50
|
+
:name => "commits",
|
51
|
+
:payload => "commit.id",
|
52
|
+
:unq => "commit.id",
|
53
|
+
:col => GH.commits_col,
|
54
|
+
:routekey => "commit.%s"
|
55
|
+
},
|
56
|
+
:events => {
|
57
|
+
:name => "events",
|
58
|
+
:payload => "",
|
59
|
+
:unq => "type",
|
60
|
+
:col => GH.events_col,
|
61
|
+
:routekey => "evt.%s"
|
62
|
+
}
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def prepare_options(options)
|
67
|
+
options.banner <<-BANNER
|
68
|
+
Loads object ids from a collection to a queue for further processing.
|
69
|
+
|
70
|
+
#{command_name} [options] collection
|
71
|
+
|
72
|
+
#{command_name} options:
|
73
|
+
BANNER
|
74
|
+
|
75
|
+
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
76
|
+
:short => 'e', :default => 0, :type => :int
|
77
|
+
options.opt :filter,
|
78
|
+
'Filter items by regexp on item attributes: item.attr=regexp',
|
79
|
+
:short => 'f', :type => String, :multi => true
|
80
|
+
end
|
81
|
+
|
82
|
+
def validate
|
83
|
+
super
|
84
|
+
Trollop::die "no collection specified" unless args[0] && !args[0].empty?
|
85
|
+
filter = options[:filter]
|
86
|
+
case
|
87
|
+
when filter.is_a?(Array)
|
88
|
+
options[:filter].each { |x|
|
89
|
+
Trollop::die "not a valid filter #{x}" unless is_filter_valid?(x)
|
90
|
+
}
|
91
|
+
when filter == []
|
92
|
+
# Noop
|
93
|
+
else
|
94
|
+
Trollop::die "A filter can only be a string"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def go
|
99
|
+
@gh = GHTorrent::Mirror.new(options[:config])
|
100
|
+
@settings = @gh.settings
|
101
|
+
|
102
|
+
GH.init(options[:config])
|
103
|
+
# Message tags await publisher ack
|
104
|
+
awaiting_ack = SortedSet.new
|
105
|
+
|
106
|
+
# Num events read
|
107
|
+
num_read = 0
|
108
|
+
|
109
|
+
collection = case args[0]
|
110
|
+
when "events"
|
111
|
+
:events
|
112
|
+
when "commits"
|
113
|
+
:commits
|
114
|
+
end
|
115
|
+
|
116
|
+
puts "Loading form collection #{collection}"
|
117
|
+
puts "Loading items after #{Time.at(options[:earliest])}" if options[:verbose]
|
118
|
+
|
119
|
+
what = case
|
120
|
+
when options[:filter].is_a?(Array)
|
121
|
+
options[:filter].reduce({}) { |acc,x|
|
122
|
+
(k,r) = x.split(/=/)
|
123
|
+
acc[k] = Regexp.new(r)
|
124
|
+
acc
|
125
|
+
}
|
126
|
+
when filter == []
|
127
|
+
{}
|
128
|
+
end
|
129
|
+
|
130
|
+
from = {'_id' => {'$gte' => BSON::ObjectId.from_time(Time.at(options[:earliest]))}}
|
131
|
+
|
132
|
+
(puts "Mongo filter:"; pp what.merge(from)) if options[:verbose]
|
133
|
+
|
134
|
+
AMQP.start(:host => GH.settings['amqp']['host'],
|
135
|
+
:port => GH.settings['amqp']['port'],
|
136
|
+
:username => GH.settings['amqp']['username'],
|
137
|
+
:password => GH.settings['amqp']['password']) do |connection|
|
138
|
+
|
139
|
+
channel = AMQP::Channel.new(connection)
|
140
|
+
exchange = channel.topic(GH.settings['amqp']['exchange'],
|
141
|
+
:durable => true, :auto_delete => false)
|
142
|
+
|
143
|
+
# What to do when the user hits Ctrl+c
|
144
|
+
show_stopper = Proc.new {
|
145
|
+
connection.close { EventMachine.stop }
|
146
|
+
}
|
147
|
+
|
148
|
+
# Read next 1000 items and queue them
|
149
|
+
read_and_publish = Proc.new {
|
150
|
+
|
151
|
+
read = 0
|
152
|
+
col_info[collection][:col].find(what.merge(from),
|
153
|
+
:skip => num_read,
|
154
|
+
:limit => 1000).each do |e|
|
155
|
+
|
156
|
+
payload = GH.read_value(e, col_info[collection][:payload])
|
157
|
+
payload = if payload.class == BSON::OrderedHash
|
158
|
+
payload.delete "_id" # Inserted by MongoDB on event insert
|
159
|
+
payload.to_json
|
160
|
+
end
|
161
|
+
read += 1
|
162
|
+
unq = GH.read_value(e, col_info[collection][:unq])
|
163
|
+
if unq.class != String or unq.nil? then
|
164
|
+
throw Exception("Unique value can only be a String")
|
165
|
+
end
|
166
|
+
|
167
|
+
key = col_info[collection][:routekey] % unq
|
168
|
+
|
169
|
+
exchange.publish payload, :persistent => true, :routing_key => key
|
170
|
+
|
171
|
+
num_read += 1
|
172
|
+
puts("Publish id = #{unq} (#{num_read} total)") if options.verbose
|
173
|
+
awaiting_ack << num_read
|
174
|
+
end
|
175
|
+
|
176
|
+
# Nothing new in the DB and no msgs waiting ack
|
177
|
+
if read == 0 and awaiting_ack.size == 0
|
178
|
+
puts("Finished reading, exiting")
|
179
|
+
show_stopper.call
|
180
|
+
end
|
181
|
+
}
|
182
|
+
|
183
|
+
# Remove acknowledged or failed msg tags from the queue
|
184
|
+
# Trigger more messages to be read when ack msg queue size drops to zero
|
185
|
+
publisher_event = Proc.new { |ack|
|
186
|
+
if ack.multiple then
|
187
|
+
awaiting_ack.delete_if { |x| x <= ack.delivery_tag }
|
188
|
+
else
|
189
|
+
awaiting_ack.delete ack.delivery_tag
|
190
|
+
end
|
191
|
+
|
192
|
+
if awaiting_ack.size == 0
|
193
|
+
puts("ACKS.size= #{awaiting_ack.size}") if options.verbose
|
194
|
+
EventMachine.next_tick do
|
195
|
+
read_and_publish.call
|
196
|
+
end
|
197
|
+
end
|
198
|
+
}
|
199
|
+
|
200
|
+
# Await publisher confirms
|
201
|
+
channel.confirm_select
|
202
|
+
|
203
|
+
# Callback when confirms have arrived
|
204
|
+
channel.on_ack do |ack|
|
205
|
+
puts "ACK: tag=#{ack.delivery_tag}, mul=#{ack.multiple}" if options.verbose
|
206
|
+
publisher_event.call(ack)
|
207
|
+
end
|
208
|
+
|
209
|
+
# Callback when confirms failed.
|
210
|
+
channel.on_nack do |nack|
|
211
|
+
puts "NACK: tag=#{nack.delivery_tag}, mul=#{nack.multiple}" if options.verbose
|
212
|
+
publisher_event.call(nack)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Signal handlers
|
216
|
+
Signal.trap('INT', show_stopper)
|
217
|
+
Signal.trap('TERM', show_stopper)
|
218
|
+
|
219
|
+
# Trigger start processing
|
220
|
+
EventMachine.add_timer(0.1) do
|
221
|
+
read_and_publish.call
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
private
|
227
|
+
|
228
|
+
def is_filter_valid?(filter)
|
229
|
+
(k, r) = filter.split(/=/)
|
230
|
+
return false if r.nil?
|
231
|
+
begin
|
232
|
+
Regexp.new(r)
|
233
|
+
true
|
234
|
+
rescue
|
235
|
+
false
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
GHTLoad.run
|
241
|
+
|
242
|
+
#vim: set filetype=ruby expandtab tabstop=2 shiftwidth=2 autoindent smartindent:
|