ghtorrent 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +24 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +40 -0
- data/README.md +23 -22
- data/bin/ght-data-retrieval +66 -24
- data/bin/ght-load +41 -19
- data/bin/ght-mirror-events +13 -16
- data/bin/ght-rm-dupl +119 -77
- data/lib/ghtorrent.rb +14 -4
- data/lib/ghtorrent/adapters/base_adapter.rb +17 -5
- data/lib/ghtorrent/adapters/mongo_persister.rb +122 -56
- data/lib/ghtorrent/api_client.rb +151 -16
- data/lib/ghtorrent/bson_orderedhash.rb +23 -0
- data/lib/ghtorrent/cache.rb +97 -0
- data/lib/ghtorrent/command.rb +43 -25
- data/lib/ghtorrent/gh_torrent_exception.rb +6 -0
- data/lib/ghtorrent/ghtorrent.rb +615 -164
- data/lib/ghtorrent/hash.rb +11 -0
- data/lib/ghtorrent/logging.rb +11 -7
- data/lib/ghtorrent/migrations/001_init_schema.rb +3 -3
- data/lib/ghtorrent/migrations/002_add_external_ref_ids.rb +2 -0
- data/lib/ghtorrent/migrations/003_add_orgs.rb +4 -1
- data/lib/ghtorrent/migrations/004_add_commit_comments.rb +4 -2
- data/lib/ghtorrent/migrations/005_add_repo_collaborators.rb +2 -0
- data/lib/ghtorrent/migrations/006_add_watchers.rb +2 -0
- data/lib/ghtorrent/migrations/007_add_pull_requests.rb +64 -0
- data/lib/ghtorrent/migrations/008_add_project_unq.rb +23 -0
- data/lib/ghtorrent/migrations/009_add_project_commit.rb +27 -0
- data/lib/ghtorrent/migrations/010_add_forks.rb +28 -0
- data/lib/ghtorrent/migrations/mysql_defaults.rb +6 -0
- data/lib/ghtorrent/persister.rb +3 -0
- data/lib/ghtorrent/retriever.rb +298 -102
- data/lib/ghtorrent/settings.rb +20 -1
- data/lib/ghtorrent/time.rb +5 -0
- data/lib/ghtorrent/utils.rb +22 -4
- data/lib/version.rb +5 -0
- metadata +173 -145
- data/lib/ghtorrent/call_stack.rb +0 -91
data/CHANGELOG
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
= Version 0.5
|
2
|
+
|
3
|
+
* Generic methods for retrieving items that are bound to repositories
|
4
|
+
* Processing of pull requests with commits, comments and history
|
5
|
+
* Processing of project forks
|
6
|
+
* New tool (ght-load) to filter and load events to the queue
|
7
|
+
* New tool (ght-rm-dupl) to delete duplicate entries from collections (events & commits supported)
|
8
|
+
* Project wide requesting result caching for multi-page requests
|
9
|
+
* Better logging in various places
|
10
|
+
* Better defaults for MySQL (UTF8 + InnoDB tables)
|
11
|
+
* Commits are now seperated from projects. Project forks can share commits.
|
12
|
+
* Support for setting the IP address to use for retrieval on multi-homed
|
13
|
+
* Compatibility with Ruby 1.9 (now default) and JRuby
|
14
|
+
* Proper modularization, following the cake design pattern
|
15
|
+
* Never retrieve arrays of results from MongoDB
|
16
|
+
|
17
|
+
= Version 0.4
|
18
|
+
|
19
|
+
* Implement support for retrieving watches and project members
|
20
|
+
* Support for processing FollowEvents, WatchEvents, CommitCommentEvents, MemberEvents
|
21
|
+
* Projects are exclusively associated to users
|
22
|
+
* Remove dependence on Github API v2
|
23
|
+
* Remove license headers
|
24
|
+
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
amq-client (0.9.4)
|
5
|
+
amq-protocol (>= 0.9.4)
|
6
|
+
eventmachine
|
7
|
+
amq-protocol (0.9.4)
|
8
|
+
amqp (0.9.7)
|
9
|
+
amq-client (~> 0.9.4)
|
10
|
+
amq-protocol (>= 0.9.4)
|
11
|
+
eventmachine
|
12
|
+
bson (1.6.4)
|
13
|
+
bson (1.6.4-java)
|
14
|
+
bson_ext (1.6.4)
|
15
|
+
bson (~> 1.6.4)
|
16
|
+
daemons (1.1.8)
|
17
|
+
eventmachine (0.12.10)
|
18
|
+
eventmachine (0.12.10-java)
|
19
|
+
jdbc-mysql (5.1.13)
|
20
|
+
json (1.7.3)
|
21
|
+
mongo (1.6.4)
|
22
|
+
bson (~> 1.6.4)
|
23
|
+
sequel (3.37.0)
|
24
|
+
sqlite3 (1.3.6)
|
25
|
+
trollop (1.16.2)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
java
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
amqp
|
33
|
+
bson_ext
|
34
|
+
daemons
|
35
|
+
jdbc-mysql
|
36
|
+
json
|
37
|
+
mongo
|
38
|
+
sequel
|
39
|
+
sqlite3
|
40
|
+
trollop
|
data/README.md
CHANGED
@@ -10,18 +10,18 @@ GHTorrent relies on the following software to work:
|
|
10
10
|
* MongoDB > 2.0
|
11
11
|
* RabbitMQ >= 2.7
|
12
12
|
* An SQL database compatible with [Sequel](http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html).
|
13
|
-
GHTorrent is tested with
|
13
|
+
GHTorrent is tested mainly with MySQL, so your mileage may vary if you are using other databases.
|
14
14
|
|
15
|
-
GHTorrent is written in Ruby (tested with 1.
|
15
|
+
GHTorrent is written in Ruby (tested with 1.9 and JRuby). To install
|
16
16
|
it as a Gem do:
|
17
17
|
|
18
18
|
<code>
|
19
19
|
sudo gem install ghtorrent
|
20
20
|
</code>
|
21
21
|
|
22
|
-
Depending on which SQL database you want to use, install the appropriate
|
23
|
-
GHTorrent already installs the `sqlite3` gem (if it fails,
|
24
|
-
package for `sqlite3` for your system).
|
22
|
+
Depending on which SQL database you want to use, install the appropriate
|
23
|
+
dependency gem. GHTorrent already installs the `sqlite3` gem (if it fails,
|
24
|
+
install the development package for `sqlite3` for your system).
|
25
25
|
|
26
26
|
<code>
|
27
27
|
sudo gem install mysql2 #or postgres
|
@@ -35,10 +35,10 @@ file to a file in your home directory. All provided scripts accept the `-c`
|
|
35
35
|
option, which you can use to pass the location of the configuration file as
|
36
36
|
a parameter.
|
37
37
|
|
38
|
-
Edit the MongoDB and AMQP
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
Edit the MongoDB and AMQP configuration options accordingly. The scripts
|
39
|
+
require accounts with permissions to create queues and exchanges in the AMQP
|
40
|
+
queue, collections in MongoDB and tables in the selected SQL database,
|
41
|
+
respectively.
|
42
42
|
|
43
43
|
To prepare MongoDB:
|
44
44
|
|
@@ -76,25 +76,26 @@ to retrieve data in parallel on the [Wiki](https://github.com/gousiosg/github-mi
|
|
76
76
|
|
77
77
|
### Running
|
78
78
|
|
79
|
-
To retrieve data with GHTorrent
|
79
|
+
To retrieve data with GHTorrent:
|
80
80
|
|
81
81
|
* `ght-mirror-events.rb` periodically polls Github's event
|
82
|
-
queue (`https://api.github.com/events`), stores all new events in the
|
83
|
-
collection in MongoDB and posts them to the `github` exchange in
|
82
|
+
queue (`https://api.github.com/events`), stores all new events in the
|
83
|
+
`events` collection in MongoDB and posts them to the `github` exchange in
|
84
|
+
RabbitMQ.
|
84
85
|
|
85
86
|
* `ght-data_retrieval.rb` creates queues that route posted events to processor
|
86
87
|
functions, which in turn use the appropriate Github API call to retrieve the
|
87
88
|
linked contents, extract metadata to store in the SQL database and store the
|
88
|
-
retrieved data in the appropriate collection in Mongo, to avoid further
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
Both scripts can be run concurrently on more than one hosts, for resilience
|
93
|
-
performance reasons. To catch up with Github's event stream, it is
|
94
|
-
run `
|
95
|
-
one instance of `
|
96
|
-
throttling mechanisms to keep API usage whithin the
|
97
|
-
(currently 5000 reqs/hr).
|
89
|
+
retrieved data in the appropriate collection in Mongo, to avoid further API
|
90
|
+
calls. Data in the SQL database contain pointers (the MongoDB key) to the
|
91
|
+
"raw" data in MongoDB.
|
92
|
+
|
93
|
+
Both scripts can be run concurrently on more than one hosts, for resilience
|
94
|
+
and performance reasons. To catch up with Github's event stream, it is
|
95
|
+
usually enough to run `ght-mirror-events` on one host. To collect all data
|
96
|
+
pointed by each event, one instance of `ght-data-retrieval` is not enough.
|
97
|
+
Both scripts employ throttling mechanisms to keep API usage whithin the
|
98
|
+
limits imposed by Github (currently 5000 reqs/hr).
|
98
99
|
|
99
100
|
#### Data
|
100
101
|
|
data/bin/ght-data-retrieval
CHANGED
@@ -1,30 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'amqp'
|
3
5
|
require 'json'
|
4
|
-
require 'ghtorrent'
|
5
6
|
require 'pp'
|
6
7
|
|
8
|
+
require 'ghtorrent/ghtorrent'
|
9
|
+
require 'ghtorrent/settings'
|
10
|
+
require 'ghtorrent/logging'
|
11
|
+
require 'ghtorrent/command'
|
12
|
+
|
7
13
|
class GHTDataRetrieval < GHTorrent::Command
|
8
14
|
|
9
15
|
include GHTorrent::Settings
|
10
16
|
include GHTorrent::Logging
|
11
17
|
|
12
|
-
attr_reader :settings, :name
|
13
|
-
|
14
18
|
def parse(msg)
|
15
19
|
JSON.parse(msg)
|
16
20
|
end
|
17
21
|
|
18
|
-
def PushEvent(
|
19
|
-
data = parse evt
|
22
|
+
def PushEvent(data)
|
20
23
|
data['payload']['commits'].each do |c|
|
21
24
|
url = c['url'].split(/\//)
|
25
|
+
|
22
26
|
@gh.get_commit url[4], url[5], url[7]
|
23
27
|
end
|
24
28
|
end
|
25
29
|
|
26
|
-
def WatchEvent(
|
27
|
-
data = parse evt
|
30
|
+
def WatchEvent(data)
|
28
31
|
owner = data['repo']['name'].split(/\//)[0]
|
29
32
|
repo = data['repo']['name'].split(/\//)[1]
|
30
33
|
watcher = data['actor']['login']
|
@@ -33,8 +36,7 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
33
36
|
@gh.get_watcher owner, repo, watcher, created_at
|
34
37
|
end
|
35
38
|
|
36
|
-
def FollowEvent(
|
37
|
-
data = parse evt
|
39
|
+
def FollowEvent(data)
|
38
40
|
follower = data['actor']['login']
|
39
41
|
followed = data['payload']['target']['login']
|
40
42
|
created_at = data['created_at']
|
@@ -42,8 +44,7 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
42
44
|
@gh.get_follower(follower, followed, created_at)
|
43
45
|
end
|
44
46
|
|
45
|
-
def MemberEvent(
|
46
|
-
data = parse evt
|
47
|
+
def MemberEvent(data)
|
47
48
|
owner = data['actor']['login']
|
48
49
|
repo = data['repo']['name'].split(/\//)[1]
|
49
50
|
new_member = data['payload']['member']['login']
|
@@ -52,8 +53,7 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
52
53
|
@gh.get_project_member(owner, repo, new_member, created_at)
|
53
54
|
end
|
54
55
|
|
55
|
-
def CommitCommentEvent(
|
56
|
-
data = parse evt
|
56
|
+
def CommitCommentEvent(data)
|
57
57
|
user = data['actor']['login']
|
58
58
|
repo = data['repo']['name'].split(/\//)[1]
|
59
59
|
id = data['payload']['comment']['id']
|
@@ -62,25 +62,63 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
62
62
|
@gh.get_commit_comment(user, repo, id, created_at)
|
63
63
|
end
|
64
64
|
|
65
|
+
def PullRequestEvent(data)
|
66
|
+
owner = data['payload']['pull_request']['base']['repo']['owner']['login']
|
67
|
+
repo = data['payload']['pull_request']['base']['repo']['name']
|
68
|
+
pullreq_id = data['payload']['number']
|
69
|
+
action = data['payload']['action']
|
70
|
+
created_at = data['created_at']
|
71
|
+
|
72
|
+
@gh.get_pull_request(owner, repo, pullreq_id, action, created_at)
|
73
|
+
end
|
74
|
+
|
75
|
+
def ForkEvent(data)
|
76
|
+
owner = data['repo']['name'].split(/\//)[0]
|
77
|
+
repo = data['repo']['name'].split(/\//)[1]
|
78
|
+
fork_id = data['payload']['forkee']['id']
|
79
|
+
created_at = data['created_at']
|
80
|
+
|
81
|
+
@gh.get_fork(owner, repo, fork_id, created_at)
|
82
|
+
end
|
83
|
+
|
84
|
+
def PullRequestReviewCommentEvent(data)
|
85
|
+
owner = data['repo']['name'].split(/\//)[0]
|
86
|
+
repo = data['repo']['name'].split(/\//)[1]
|
87
|
+
comment_id = data['payload']['comment']['id']
|
88
|
+
pullreq_id = data['payload']['comment']['_links']['pull_request']['href'].split(/\//)[-1]
|
89
|
+
created_at = data['created_at']
|
90
|
+
|
91
|
+
@gh.get_pullreq_comment(owner, repo, pullreq_id, comment_id, created_at)
|
92
|
+
end
|
93
|
+
|
94
|
+
def IssueCommentEvent(data)
|
95
|
+
owner = data['repo']['name'].split(/\//)[0]
|
96
|
+
repo = data['repo']['name'].split(/\//)[1]
|
97
|
+
pullreq_id = data['payload']['forkee']['id']
|
98
|
+
created_at = data['created_at']
|
99
|
+
|
100
|
+
@gh.get_issue_comment(owner, repo, issue_id, comment_id, created_at)
|
101
|
+
end
|
102
|
+
|
65
103
|
def handlers
|
66
|
-
%w(PushEvent WatchEvent FollowEvent MemberEvent CommitCommentEvent)
|
104
|
+
%w(PushEvent WatchEvent FollowEvent MemberEvent CommitCommentEvent PullRequestEvent ForkEvent PullRequestReviewCommentEvent)
|
105
|
+
#%w(PullRequestReviewCommentEvent)
|
67
106
|
end
|
68
107
|
|
69
|
-
def
|
70
|
-
|
108
|
+
def logger
|
109
|
+
@gh.logger
|
71
110
|
end
|
72
111
|
|
73
112
|
def go
|
74
|
-
@gh = GHTorrent::Mirror.new(
|
75
|
-
@settings = @gh.settings
|
113
|
+
@gh = GHTorrent::Mirror.new(@settings)
|
76
114
|
|
77
115
|
# Graceful exit
|
78
116
|
Signal.trap('INT') {
|
79
|
-
info "Received SIGINT, exiting"
|
117
|
+
info "GHTDataRetrieval: Received SIGINT, exiting"
|
80
118
|
AMQP.stop { EM.stop }
|
81
119
|
}
|
82
120
|
Signal.trap('TERM') {
|
83
|
-
info "Received SIGTERM, exiting"
|
121
|
+
info "GHTDataRetrieval: Received SIGTERM, exiting"
|
84
122
|
AMQP.stop { EM.stop }
|
85
123
|
}
|
86
124
|
|
@@ -89,7 +127,7 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
89
127
|
:username => config(:amqp_username),
|
90
128
|
:password => config(:amqp_password)) do |connection|
|
91
129
|
|
92
|
-
channel = AMQP::Channel.new(connection, :prefetch =>
|
130
|
+
channel = AMQP::Channel.new(connection, :prefetch => config(:amqp_prefetch))
|
93
131
|
exchange = channel.topic(config(:amqp_exchange), :durable => true,
|
94
132
|
:auto_delete => false)
|
95
133
|
|
@@ -97,21 +135,25 @@ class GHTDataRetrieval < GHTorrent::Command
|
|
97
135
|
queue = channel.queue("#{h}s", {:durable => true})\
|
98
136
|
.bind(exchange, :routing_key => "evt.#{h}")
|
99
137
|
|
100
|
-
info "Binding handler #{h} to routing key evt.#{h}"
|
138
|
+
info "GHTDataRetrieval: Binding handler #{h} to routing key evt.#{h}"
|
101
139
|
|
102
140
|
queue.subscribe(:ack => true) do |headers, msg|
|
103
141
|
begin
|
104
|
-
|
142
|
+
data = parse(msg)
|
143
|
+
info "GHTDataRetrieval: Processing event: #{data['type']}-#{data['id']}"
|
144
|
+
send(h, data)
|
105
145
|
headers.ack
|
146
|
+
info "GHTDataRetrieval: Processed event: #{data['type']}-#{data['id']}"
|
106
147
|
rescue Exception => e
|
107
148
|
# Give a message a chance to be reprocessed
|
108
149
|
if headers.redelivered?
|
150
|
+
data = parse(msg)
|
151
|
+
warn "GHTDataRetrieval: Could not process event: #{data['type']}-#{data['id']}"
|
109
152
|
headers.reject(:requeue => false)
|
110
153
|
else
|
111
154
|
headers.reject(:requeue => true)
|
112
155
|
end
|
113
156
|
|
114
|
-
#pp JSON.parse(msg)
|
115
157
|
STDERR.puts e
|
116
158
|
STDERR.puts e.backtrace.join("\n")
|
117
159
|
end
|
data/bin/ght-load
CHANGED
@@ -1,35 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
|
-
require 'ghtorrent-old'
|
3
4
|
require 'mongo'
|
4
5
|
require 'amqp'
|
5
6
|
require 'set'
|
6
7
|
require 'eventmachine'
|
7
|
-
require 'optparse'
|
8
|
-
require 'ostruct'
|
9
8
|
require 'pp'
|
10
9
|
require "amqp/extensions/rabbitmq"
|
11
10
|
|
11
|
+
require 'ghtorrent/settings'
|
12
|
+
require 'ghtorrent/logging'
|
13
|
+
require 'ghtorrent/persister'
|
14
|
+
require 'ghtorrent/command'
|
15
|
+
require 'ghtorrent/bson_orderedhash'
|
16
|
+
|
12
17
|
class GHTLoad < GHTorrent::Command
|
13
18
|
|
19
|
+
include GHTorrent::Settings
|
20
|
+
include GHTorrent::Persister
|
21
|
+
|
14
22
|
def col_info()
|
15
23
|
{
|
16
24
|
:commits => {
|
17
25
|
:name => "commits",
|
18
26
|
:payload => "commit.id",
|
19
27
|
:unq => "commit.id",
|
20
|
-
:col =>
|
28
|
+
:col => persister.get_underlying_connection.collection(:commits.to_s),
|
21
29
|
:routekey => "commit.%s"
|
22
30
|
},
|
23
31
|
:events => {
|
24
32
|
:name => "events",
|
25
33
|
:payload => "",
|
26
34
|
:unq => "type",
|
27
|
-
:col =>
|
35
|
+
:col => persister.get_underlying_connection.collection(:events.to_s),
|
28
36
|
:routekey => "evt.%s"
|
29
37
|
}
|
30
38
|
}
|
31
39
|
end
|
32
40
|
|
41
|
+
def persister
|
42
|
+
@persister ||= connect(:mongo, @settings)
|
43
|
+
@persister
|
44
|
+
end
|
45
|
+
|
33
46
|
def prepare_options(options)
|
34
47
|
options.banner <<-BANNER
|
35
48
|
Loads object ids from a collection to a queue for further processing.
|
@@ -41,6 +54,8 @@ Loads object ids from a collection to a queue for further processing.
|
|
41
54
|
|
42
55
|
options.opt :earliest, 'Seconds since epoch of earliest item to load',
|
43
56
|
:short => 'e', :default => 0, :type => :int
|
57
|
+
options.opt :number, 'Number of items to load (-1 means all)',
|
58
|
+
:short => 'n', :type => :int, :default => -1
|
44
59
|
options.opt :filter,
|
45
60
|
'Filter items by regexp on item attributes: item.attr=regexp',
|
46
61
|
:short => 'f', :type => String, :multi => true
|
@@ -63,10 +78,6 @@ Loads object ids from a collection to a queue for further processing.
|
|
63
78
|
end
|
64
79
|
|
65
80
|
def go
|
66
|
-
@gh = GHTorrent::Mirror.new(options[:config])
|
67
|
-
@settings = @gh.settings
|
68
|
-
|
69
|
-
GH.init(options[:config])
|
70
81
|
# Message tags await publisher ack
|
71
82
|
awaiting_ack = SortedSet.new
|
72
83
|
|
@@ -82,6 +93,7 @@ Loads object ids from a collection to a queue for further processing.
|
|
82
93
|
|
83
94
|
puts "Loading from collection #{collection}"
|
84
95
|
puts "Loading items after #{Time.at(options[:earliest])}" if options[:verbose]
|
96
|
+
puts "Loading #{options[:number]} items" if options[:verbose] && options[:number] != -1
|
85
97
|
|
86
98
|
what = case
|
87
99
|
when options[:filter].is_a?(Array)
|
@@ -98,13 +110,13 @@ Loads object ids from a collection to a queue for further processing.
|
|
98
110
|
|
99
111
|
(puts "Mongo filter:"; pp what.merge(from)) if options[:verbose]
|
100
112
|
|
101
|
-
AMQP.start(:host =>
|
102
|
-
:port =>
|
103
|
-
:username =>
|
104
|
-
:password =>
|
113
|
+
AMQP.start(:host => config(:amqp_host),
|
114
|
+
:port => config(:amqp_port),
|
115
|
+
:username => config(:amqp_username),
|
116
|
+
:password => config(:amqp_password)) do |connection|
|
105
117
|
|
106
118
|
channel = AMQP::Channel.new(connection)
|
107
|
-
exchange = channel.topic(
|
119
|
+
exchange = channel.topic(config(:amqp_exchange),
|
108
120
|
:durable => true, :auto_delete => false)
|
109
121
|
|
110
122
|
# What to do when the user hits Ctrl+c
|
@@ -115,18 +127,28 @@ Loads object ids from a collection to a queue for further processing.
|
|
115
127
|
# Read next 1000 items and queue them
|
116
128
|
read_and_publish = Proc.new {
|
117
129
|
|
130
|
+
to_read = if options.number == -1
|
131
|
+
1000
|
132
|
+
else
|
133
|
+
if options.number - num_read - 1 <= 0
|
134
|
+
-1
|
135
|
+
else
|
136
|
+
options.number - num_read - 1
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
118
140
|
read = 0
|
119
141
|
col_info[collection][:col].find(what.merge(from),
|
120
142
|
:skip => num_read,
|
121
|
-
:limit =>
|
143
|
+
:limit => to_read).each do |e|
|
122
144
|
|
123
|
-
payload =
|
145
|
+
payload = read_value(e, col_info[collection][:payload])
|
124
146
|
payload = if payload.class == BSON::OrderedHash
|
125
147
|
payload.delete "_id" # Inserted by MongoDB on event insert
|
126
148
|
payload.to_json
|
127
149
|
end
|
128
150
|
read += 1
|
129
|
-
unq =
|
151
|
+
unq = read_value(e, col_info[collection][:unq])
|
130
152
|
if unq.class != String or unq.nil? then
|
131
153
|
throw Exception("Unique value can only be a String")
|
132
154
|
end
|
@@ -136,12 +158,12 @@ Loads object ids from a collection to a queue for further processing.
|
|
136
158
|
exchange.publish payload, :persistent => true, :routing_key => key
|
137
159
|
|
138
160
|
num_read += 1
|
139
|
-
puts("Publish id = #{unq} (#{num_read} total)") if options.verbose
|
161
|
+
puts("Publish id = #{payload[unq]} (#{num_read} total)") if options.verbose
|
140
162
|
awaiting_ack << num_read
|
141
163
|
end
|
142
164
|
|
143
165
|
# Nothing new in the DB and no msgs waiting ack
|
144
|
-
if read == 0 and awaiting_ack.size == 0
|
166
|
+
if (read == 0 and awaiting_ack.size == 0) or to_read == -1
|
145
167
|
puts("Finished reading, exiting")
|
146
168
|
show_stopper.call
|
147
169
|
end
|