disbatch 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +27 -0
- data/README +0 -0
- data/bin/disbatchd +74 -0
- data/disbatch_specification.txt +205 -0
- data/doc/Disbatch/NoNodeError.html +128 -0
- data/doc/Disbatch/Node.html +931 -0
- data/doc/Disbatch/Plugin/Hello.html +253 -0
- data/doc/Disbatch/Plugin.html +482 -0
- data/doc/Disbatch/Plugins.html +397 -0
- data/doc/Disbatch/Queue/Task.html +1671 -0
- data/doc/Disbatch/Queue.html +1043 -0
- data/doc/Disbatch.html +602 -0
- data/doc/Mongo.html +236 -0
- data/doc/_index.html +186 -0
- data/doc/class_list.html +47 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/file.README.html +66 -0
- data/doc/file_list.html +49 -0
- data/doc/frames.html +13 -0
- data/doc/index.html +66 -0
- data/doc/js/app.js +205 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +16 -0
- data/doc/method_list.html +446 -0
- data/doc/top-level-namespace.html +103 -0
- data/lib/disbatch/errors.rb +6 -0
- data/lib/disbatch/node.rb +105 -0
- data/lib/disbatch/plugin/hello.rb +33 -0
- data/lib/disbatch/plugin.rb +40 -0
- data/lib/disbatch/queue/task.rb +142 -0
- data/lib/disbatch/queue.rb +101 -0
- data/lib/disbatch.rb +61 -0
- metadata +147 -0
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2011, Matthew Berg <mberg@synacor.com>
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright
|
11
|
+
notice, this list of conditions and the following disclaimer in the
|
12
|
+
documentation and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* The names of its contributors may not be used to endorse or promote
|
15
|
+
products derived from this software without specific prior written
|
16
|
+
permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
File without changes
|
data/bin/disbatchd
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
require 'disbatch'
|
6
|
+
require 'eventmachine'
|
7
|
+
require 'json'
|
8
|
+
require 'trollop'
|
9
|
+
|
10
|
+
opts = Trollop.options do
|
11
|
+
opt 'config', 'the disbatchd config file', :default => '/etc/disbatch/disbatchd.conf'
|
12
|
+
opt 'max', 'maximum number of threads per queue', :default => 10
|
13
|
+
opt 'plugins', 'path(s) to dispatch plugins', :multi => true
|
14
|
+
opt 'force', 'force node registration'
|
15
|
+
end
|
16
|
+
|
17
|
+
config_file = opts['config']
|
18
|
+
config = File.exists?(config_file) ? JSON.parse(File.read(config_file)) : {}
|
19
|
+
config = opts.merge(config)
|
20
|
+
force = config['force'] || false
|
21
|
+
|
22
|
+
node = Disbatch.node.register(force)
|
23
|
+
max = config['max']
|
24
|
+
|
25
|
+
config.has_key?('plugins') && config['plugins'].each { |dir| Disbatch::Plugin.init_all(dir) }
|
26
|
+
Disbatch::Plugin.init_all(File.dirname(File.expand_path(__FILE__)) + '/../lib/disbatch/plugin/**/*.rb')
|
27
|
+
|
28
|
+
trap('TERM') { node.release; exit }
|
29
|
+
trap('INT') { node.release; exit }
|
30
|
+
trap('QUIT') { node.release; exit }
|
31
|
+
|
32
|
+
EventMachine::run do
|
33
|
+
|
34
|
+
queues = Disbatch::Queue.get_all
|
35
|
+
|
36
|
+
queues.each do |queue|
|
37
|
+
|
38
|
+
next unless Disbatch::Plugin[queue.plugin]
|
39
|
+
|
40
|
+
puts "Adding #{queue.plugin} runner for #{queue.id}"
|
41
|
+
|
42
|
+
tg = ThreadGroup.new
|
43
|
+
|
44
|
+
EventMachine::add_periodic_timer(0.1) do
|
45
|
+
|
46
|
+
next unless ((nt = tg.list.length) < max && (np = queue.length) > 0)
|
47
|
+
|
48
|
+
ft = max - nt
|
49
|
+
|
50
|
+
(np > ft ? ft : np).times do
|
51
|
+
t = Thread.new do
|
52
|
+
task = queue.pop
|
53
|
+
|
54
|
+
unless defined?(task) && !task.nil? && task.params
|
55
|
+
puts 'Huh? Got invalid task back.'
|
56
|
+
next
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
task.execute!
|
61
|
+
rescue
|
62
|
+
puts 'Damn. Execution failed.'
|
63
|
+
task.release
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
tg.add(t)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
Disbatch Execution Node Specification 1.0
|
2
|
+
Matt Busigin <mbusigin@gmail.com>
|
3
|
+
May, 2011
|
4
|
+
|
5
|
+
Disbatch is a multi-platform, multi-language, elastic, distributed batch
|
6
|
+
processing framework. This document outlines the protocol which a Disbatch
|
7
|
+
Execution Node (DEN) must follow to correctly operate with Disbatch Command
|
8
|
+
Nodes (DCN), as well as other DENs operating under the same
|
9
|
+
network/database.
|
10
|
+
|
11
|
+
OVERVIEW
|
12
|
+
|
13
|
+
Disbatch is a multi-platform, multi-language elastic distributed batch
|
14
|
+
processing framework. It heavily leverages MongoDB and JSON. The core
|
15
|
+
components are MongoDB, N-number of DENs (Disbatch Execution Nodes), and
|
16
|
+
potentially one or more DCNs (Disbatch Command Node).
|
17
|
+
|
18
|
+
Execution is split among queues, which are of homogenous type. Each queue
|
19
|
+
class is implemented as a plugin on the node. Nodes which poll for queues
|
20
|
+
that don't have requisite classes implemented are ignored.
|
21
|
+
|
22
|
+
MECHANICS
|
23
|
+
|
24
|
+
Each DEN is partitioned into queues. Each queue contains a pool of threads,
|
25
|
+
which are limited by the 'maxthreads' queue parameter on a per-node basis.
|
26
|
+
|
27
|
+
On DEN startup, the node registers itself, pulls the list of queues from the
|
28
|
+
MongoDB, which are created one-document per queue in the 'queues' collection.
|
29
|
+
|
30
|
+
TASK LIFECYCLE
|
31
|
+
|
32
|
+
A task is first created, generally on a DCN, either individually, or as a
|
33
|
+
result of a batch operation. Individual Tasks are saved to the 'tasks'
|
34
|
+
collection, with a reference back to the queue (by ObjectId) it sits under.
|
35
|
+
All tasks have a number of attributes in common: id, ctime, mtime, queue,
|
36
|
+
node, parameters, stdout, stderr, status.
|
37
|
+
|
38
|
+
The mechanism for creating the tasks is up to the implementor of the DCN, or
|
39
|
+
even through some other mechanism. It is their job to create tasks to this
|
40
|
+
specification so that DENs may execute them, and a DCN can provide an
|
41
|
+
interface into the data.
|
42
|
+
|
43
|
+
A task is created, initialised with parameters, its status set to Created,
|
44
|
+
and its node set to Unclaimed. Upon each Scheduling Interval, every DEN will
|
45
|
+
seek out these Created and Unclaimed tasks, putting them into a Pending and
|
46
|
+
Claimed state up until the per-node maximum thread threshold (Maxthreads) is
|
47
|
+
saturated.
|
48
|
+
|
49
|
+
When the DEN is ready, the Task is actually put into the Running status. The
|
50
|
+
plugin itself may report back status at any interval it likes, and it may
|
51
|
+
alter the task object any way it likes, but it is the responsibility of the
|
52
|
+
DEN to assure the status is up to date after it is complete, whether the
|
53
|
+
plugin has adjusted the status correctly or not.
|
54
|
+
|
55
|
+
DISBATCH EXECUTION NODE RESPONSIBILITIES
|
56
|
+
|
57
|
+
REQUIREMENTS
|
58
|
+
|
59
|
+
Each DEN:
|
60
|
+
|
61
|
+
MUST have a unique Node ID
|
62
|
+
|
63
|
+
MUST register itself via the DEN Node Registration Protocol
|
64
|
+
|
65
|
+
MUST execute tasks via the DEN Task Execution Protocol
|
66
|
+
|
67
|
+
NODE ID
|
68
|
+
|
69
|
+
Node ID should be a short (1-8 characters), and follow the standard C identifier methdology:
|
70
|
+
|
71
|
+
[a-zA-Z][a-zA-Z_0-9]*
|
72
|
+
|
73
|
+
|
74
|
+
STARTUP & INITIALISATION
|
75
|
+
|
76
|
+
Before a DEN starts processing tasks, it is obligated to clean up any tasks
|
77
|
+
that were not put into a completed state.
|
78
|
+
|
79
|
+
|
80
|
+
DEN NODE REGISTRATION PROTOCOL
|
81
|
+
|
82
|
+
Upon startup, each node must register itself to the Nodes collection. The
|
83
|
+
following elements must be included:
|
84
|
+
|
85
|
+
_id: ObjectId() // Node ID,
|
86
|
+
heartbeat: {...} // Heartbeat object (see
|
87
|
+
below)
|
88
|
+
version: "r-disbatch 1.21" // Engine version string
|
89
|
+
spec_version: "1.0" // DEN Specification
|
90
|
+
queuetypes [ { class: "Synacor::Migration::IMAP2IMAP",
|
91
|
+
version: '1.1a' }, .. ] // Array of plugins enabled
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
HEARTBEAT OBJECT
|
96
|
+
|
97
|
+
The heartbeat object includes status updates on both the engine, as well as any queues active.
|
98
|
+
|
99
|
+
datetime: ISODate("2011-05-12T06:00:07Z") // Heartbeat timestamp
|
100
|
+
pid: 1234 // System PID
|
101
|
+
queues: [...] // Queue heartbeat objects (see
|
102
|
+
below)
|
103
|
+
|
104
|
+
QUEUE HEARTBEAT OBJECT
|
105
|
+
|
106
|
+
All of this data is on a per-queue basis.
|
107
|
+
|
108
|
+
class: 'Synacor::Migration::IMAP2IMAP' // Plugin class
|
109
|
+
queue: ObjectId() // Queue reference
|
110
|
+
threads 50 // Thread count
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
FULL NODE JSON OBJECT EXAMPLE
|
115
|
+
|
116
|
+
{
|
117
|
+
_id: 'mig01',
|
118
|
+
version: 'Disbatch.pl v2.0.2',
|
119
|
+
den_version: '1.0',
|
120
|
+
queuetypes: [
|
121
|
+
{
|
122
|
+
class: 'Synacor::Migration::IMAP2IMAP',
|
123
|
+
version: '1.1a'
|
124
|
+
},
|
125
|
+
{
|
126
|
+
class: 'Synacor::Migration::Zimbra::UserImport',
|
127
|
+
version: '0.9'
|
128
|
+
}
|
129
|
+
],
|
130
|
+
heartbeat: {
|
131
|
+
datetime: ISODate("2011-05-12T06:00:07Z"),
|
132
|
+
|
133
|
+
pid: 34182,
|
134
|
+
queues: [
|
135
|
+
{
|
136
|
+
class: 'Synacor::Migration::IMAP2IMAP',
|
137
|
+
queue: ObjectId(),
|
138
|
+
threads: 50,
|
139
|
+
idle: 15,
|
140
|
+
processing: 35
|
141
|
+
}
|
142
|
+
]
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
|
147
|
+
QUEUE DOCUMENT SPECIFICATION
|
148
|
+
|
149
|
+
The DENs will read from the "queues" collection, and can expect the following
|
150
|
+
format:
|
151
|
+
|
152
|
+
{
|
153
|
+
_id: 'insight1' // Queue ID,
|
154
|
+
ctime: ISODate("2011-05-12T06:00:07Z"),// Create time
|
155
|
+
class: 'Synacor::Migration::IMAP2IMAP',// Class
|
156
|
+
description: 'Insight Primary Migration', // Description
|
157
|
+
maxthreads: 50, // Maximum per-node concurrency
|
158
|
+
|
159
|
+
nodes_pin: [ 'mig01', 'mig02' ], // Pin queue to these nodes (and no-where else)
|
160
|
+
nodes_ignore: [ 'mig03', 'mig04' ] // OR: ignore on these nodes
|
161
|
+
}
|
162
|
+
|
163
|
+
The "nodes_pin" and "nodes_ignore" attributes are mutually exclusive. If
|
164
|
+
using pin, it is exclusive to the specified nodes. If using ignore, it will
|
165
|
+
run on all nodes except for those specified. If both are erroneously
|
166
|
+
provided, nodes_pin takes precedence over nodes_ignore.
|
167
|
+
|
168
|
+
Queue IDs can either be automatically generated, or provided as a uniquely
|
169
|
+
named user parameter.
|
170
|
+
|
171
|
+
|
172
|
+
TASK DOCUMENT SPECIFICATION
|
173
|
+
|
174
|
+
Each task the DEN pulls will be in the following format:
|
175
|
+
|
176
|
+
{
|
177
|
+
_id: ObjectId('238d892efadee'),
|
178
|
+
queue: 'insight1',
|
179
|
+
ctime: ISODate("2011-05-12T06:00:07Z"),// Create time
|
180
|
+
mtime: ISODate("2011-05-12T06:00:07Z"),// Modify time
|
181
|
+
parameters: { username: 'foo@foo.com', host: 'md08.foo.com' }, // Parameters
|
182
|
+
stdout: '', // Your task output goes here
|
183
|
+
stderr: '', // Your task errors go here
|
184
|
+
status: -2, // Task status code (described below)
|
185
|
+
node: -1
|
186
|
+
}
|
187
|
+
|
188
|
+
TASK STATUS CODE
|
189
|
+
|
190
|
+
-4: Blocked
|
191
|
+
-3: Terminated
|
192
|
+
-2: Created
|
193
|
+
-1: Claimed
|
194
|
+
0: Running
|
195
|
+
1: Concluded
|
196
|
+
|
197
|
+
|
198
|
+
TASK LIFECYCLE
|
199
|
+
|
200
|
+
Each task is initialised with its node as -1 (unclaimed) and status as -2 (created).
|
201
|
+
|
202
|
+
DENs can claim tasks from a queue one of two ways:
|
203
|
+
|
204
|
+
1. findAndModify()
|
205
|
+
where: queue: <queue> and n
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
|
+
<head>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
6
|
+
<title>
|
7
|
+
Exception: Disbatch::NoNodeError
|
8
|
+
|
9
|
+
— Documentation by YARD 0.7.3
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="../css/common.css" type="text/css" media="screen" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
relpath = '..';
|
19
|
+
if (relpath != '') relpath += '/';
|
20
|
+
</script>
|
21
|
+
|
22
|
+
<script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
|
23
|
+
|
24
|
+
<script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
|
25
|
+
|
26
|
+
|
27
|
+
</head>
|
28
|
+
<body>
|
29
|
+
<script type="text/javascript" charset="utf-8">
|
30
|
+
if (window.top.frames.main) document.body.className = 'frames';
|
31
|
+
</script>
|
32
|
+
|
33
|
+
<div id="header">
|
34
|
+
<div id="menu">
|
35
|
+
|
36
|
+
<a href="../_index.html">Index (N)</a> »
|
37
|
+
<span class='title'><span class='object_link'><a href="../Disbatch.html" title="Disbatch (module)">Disbatch</a></span></span>
|
38
|
+
»
|
39
|
+
<span class="title">NoNodeError</span>
|
40
|
+
|
41
|
+
|
42
|
+
<div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
|
43
|
+
</div>
|
44
|
+
|
45
|
+
<div id="search">
|
46
|
+
|
47
|
+
<a id="class_list_link" href="#">Class List</a>
|
48
|
+
|
49
|
+
<a id="method_list_link" href="#">Method List</a>
|
50
|
+
|
51
|
+
<a id="file_list_link" href="#">File List</a>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
<div class="clear"></div>
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<iframe id="search_frame"></iframe>
|
58
|
+
|
59
|
+
<div id="content"><h1>Exception: Disbatch::NoNodeError
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
</h1>
|
64
|
+
|
65
|
+
<dl class="box">
|
66
|
+
|
67
|
+
<dt class="r1">Inherits:</dt>
|
68
|
+
<dd class="r1">
|
69
|
+
<span class="inheritName">RuntimeError</span>
|
70
|
+
|
71
|
+
<ul class="fullTree">
|
72
|
+
<li>Object</li>
|
73
|
+
|
74
|
+
<li class="next">RuntimeError</li>
|
75
|
+
|
76
|
+
<li class="next">Disbatch::NoNodeError</li>
|
77
|
+
|
78
|
+
</ul>
|
79
|
+
<a href="#" class="inheritanceTree">show all</a>
|
80
|
+
|
81
|
+
</dd>
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
<dt class="r2 last">Defined in:</dt>
|
92
|
+
<dd class="r2 last">lib/disbatch/errors.rb</dd>
|
93
|
+
|
94
|
+
</dl>
|
95
|
+
<div class="clear"></div>
|
96
|
+
|
97
|
+
<h2>Overview</h2><div class="docstring">
|
98
|
+
<div class="discussion">
|
99
|
+
<p>
|
100
|
+
Raised when attempting to open an invalid node
|
101
|
+
</p>
|
102
|
+
|
103
|
+
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
<div class="tags">
|
107
|
+
|
108
|
+
|
109
|
+
</div>
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="footer">
|
122
|
+
Generated on Mon Oct 24 16:54:00 2011 by
|
123
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
124
|
+
0.7.3 (ruby-1.8.7).
|
125
|
+
</div>
|
126
|
+
|
127
|
+
</body>
|
128
|
+
</html>
|