disbatch 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +27 -0
- data/README +0 -0
- data/bin/disbatchd +74 -0
- data/disbatch_specification.txt +205 -0
- data/doc/Disbatch/NoNodeError.html +128 -0
- data/doc/Disbatch/Node.html +931 -0
- data/doc/Disbatch/Plugin/Hello.html +253 -0
- data/doc/Disbatch/Plugin.html +482 -0
- data/doc/Disbatch/Plugins.html +397 -0
- data/doc/Disbatch/Queue/Task.html +1671 -0
- data/doc/Disbatch/Queue.html +1043 -0
- data/doc/Disbatch.html +602 -0
- data/doc/Mongo.html +236 -0
- data/doc/_index.html +186 -0
- data/doc/class_list.html +47 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/file.README.html +66 -0
- data/doc/file_list.html +49 -0
- data/doc/frames.html +13 -0
- data/doc/index.html +66 -0
- data/doc/js/app.js +205 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +16 -0
- data/doc/method_list.html +446 -0
- data/doc/top-level-namespace.html +103 -0
- data/lib/disbatch/errors.rb +6 -0
- data/lib/disbatch/node.rb +105 -0
- data/lib/disbatch/plugin/hello.rb +33 -0
- data/lib/disbatch/plugin.rb +40 -0
- data/lib/disbatch/queue/task.rb +142 -0
- data/lib/disbatch/queue.rb +101 -0
- data/lib/disbatch.rb +61 -0
- metadata +147 -0
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2011, Matthew Berg <mberg@synacor.com>
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright
|
11
|
+
notice, this list of conditions and the following disclaimer in the
|
12
|
+
documentation and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* The names of its contributors may not be used to endorse or promote
|
15
|
+
products derived from this software without specific prior written
|
16
|
+
permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
File without changes
|
data/bin/disbatchd
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
require 'disbatch'
|
6
|
+
require 'eventmachine'
|
7
|
+
require 'json'
|
8
|
+
require 'trollop'
|
9
|
+
|
10
|
+
opts = Trollop.options do
|
11
|
+
opt 'config', 'the disbatchd config file', :default => '/etc/disbatch/disbatchd.conf'
|
12
|
+
opt 'max', 'maximum number of threads per queue', :default => 10
|
13
|
+
opt 'plugins', 'path(s) to dispatch plugins', :multi => true
|
14
|
+
opt 'force', 'force node registration'
|
15
|
+
end
|
16
|
+
|
17
|
+
config_file = opts['config']
|
18
|
+
config = File.exists?(config_file) ? JSON.parse(File.read(config_file)) : {}
|
19
|
+
config = opts.merge(config)
|
20
|
+
force = config['force'] || false
|
21
|
+
|
22
|
+
node = Disbatch.node.register(force)
|
23
|
+
max = config['max']
|
24
|
+
|
25
|
+
config.has_key?('plugins') && config['plugins'].each { |dir| Disbatch::Plugin.init_all(dir) }
|
26
|
+
Disbatch::Plugin.init_all(File.dirname(File.expand_path(__FILE__)) + '/../lib/disbatch/plugin/**/*.rb')
|
27
|
+
|
28
|
+
trap('TERM') { node.release; exit }
|
29
|
+
trap('INT') { node.release; exit }
|
30
|
+
trap('QUIT') { node.release; exit }
|
31
|
+
|
32
|
+
EventMachine::run do
|
33
|
+
|
34
|
+
queues = Disbatch::Queue.get_all
|
35
|
+
|
36
|
+
queues.each do |queue|
|
37
|
+
|
38
|
+
next unless Disbatch::Plugin[queue.plugin]
|
39
|
+
|
40
|
+
puts "Adding #{queue.plugin} runner for #{queue.id}"
|
41
|
+
|
42
|
+
tg = ThreadGroup.new
|
43
|
+
|
44
|
+
EventMachine::add_periodic_timer(0.1) do
|
45
|
+
|
46
|
+
next unless ((nt = tg.list.length) < max && (np = queue.length) > 0)
|
47
|
+
|
48
|
+
ft = max - nt
|
49
|
+
|
50
|
+
(np > ft ? ft : np).times do
|
51
|
+
t = Thread.new do
|
52
|
+
task = queue.pop
|
53
|
+
|
54
|
+
unless defined?(task) && !task.nil? && task.params
|
55
|
+
puts 'Huh? Got invalid task back.'
|
56
|
+
next
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
task.execute!
|
61
|
+
rescue
|
62
|
+
puts 'Damn. Execution failed.'
|
63
|
+
task.release
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
tg.add(t)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
Disbatch Execution Node Specification 1.0
|
2
|
+
Matt Busigin <mbusigin@gmail.com>
|
3
|
+
May, 2011
|
4
|
+
|
5
|
+
Disbatch is a multi-platform, multi-language, elastic, distributed batch
|
6
|
+
processing framework. This document outlines the protocol which a Disbatch
|
7
|
+
Execution Node (DEN) must follow to correctly operate with Disbatch Command
|
8
|
+
Nodes (DCN), as well as other DENs operating under the same
|
9
|
+
network/database.
|
10
|
+
|
11
|
+
OVERVIEW
|
12
|
+
|
13
|
+
Disbatch is a multi-platform, multi-language elastic distributed batch
|
14
|
+
processing framework. It heavily leverages MongoDB and JSON. The core
|
15
|
+
components are MongoDB, N-number of DENs (Disbatch Execution Nodes), and
|
16
|
+
potentially one or more DCNs (Disbatch Command Node).
|
17
|
+
|
18
|
+
Execution is split among queues, which are of homogenous type. Each queue
|
19
|
+
class is implemented as a plugin on the node. Nodes which poll for queues
|
20
|
+
that don't have requisite classes implemented are ignored.
|
21
|
+
|
22
|
+
MECHANICS
|
23
|
+
|
24
|
+
Each DEN is partitioned into queues. Each queue contains a pool of threads,
|
25
|
+
which are limited by the 'maxthreads' queue parameter on a per-node basis.
|
26
|
+
|
27
|
+
On DEN startup, the node registers itself, pulls the list of queues from the
|
28
|
+
MongoDB, which are created one-document per queue in the 'queues' collection.
|
29
|
+
|
30
|
+
TASK LIFECYCLE
|
31
|
+
|
32
|
+
A task is first created, generally on a DCN, either individually, or as a
|
33
|
+
result of a batch operation. Individual Tasks are saved to the 'tasks'
|
34
|
+
collection, with a reference back to the queue (by ObjectId) it sits under.
|
35
|
+
All tasks have a number of attributes in common: id, ctime, mtime, queue,
|
36
|
+
node, parameters, stdout, stderr, status.
|
37
|
+
|
38
|
+
The mechanism for creating the tasks is up to the implementor of the DCN, or
|
39
|
+
even through some other mechanism. It is their job to create tasks to this
|
40
|
+
specification so that DENs may execute them, and a DCN can provide an
|
41
|
+
interface into the data.
|
42
|
+
|
43
|
+
A task is created, initialised with parameters, its status set to Created,
|
44
|
+
and its node set to Unclaimed. Upon each Scheduling Interval, every DEN will
|
45
|
+
seek out these Created and Unclaimed tasks, putting them into a Pending and
|
46
|
+
Claimed state up until the per-node maximum thread threshold (Maxthreads) is
|
47
|
+
saturated.
|
48
|
+
|
49
|
+
When the DEN is ready, the Task is actually put into the Running status. The
|
50
|
+
plugin itself may report back status at any interval it likes, and it may
|
51
|
+
alter the task object any way it likes, but it is the responsibility of the
|
52
|
+
DEN to assure the status is up to date after it is complete, whether the
|
53
|
+
plugin has adjusted the status correctly or not.
|
54
|
+
|
55
|
+
DISBATCH EXECUTION NODE RESPONSIBILITIES
|
56
|
+
|
57
|
+
REQUIREMENTS
|
58
|
+
|
59
|
+
Each DEN:
|
60
|
+
|
61
|
+
MUST have a unique Node ID
|
62
|
+
|
63
|
+
MUST register itself via the DEN Node Registration Protocol
|
64
|
+
|
65
|
+
MUST execute tasks via the DEN Task Execution Protocol
|
66
|
+
|
67
|
+
NODE ID
|
68
|
+
|
69
|
+
Node ID should be a short (1-8 characters), and follow the standard C identifier methdology:
|
70
|
+
|
71
|
+
[a-zA-Z][a-zA-Z_0-9]*
|
72
|
+
|
73
|
+
|
74
|
+
STARTUP & INITIALISATION
|
75
|
+
|
76
|
+
Before a DEN starts processing tasks, it is obligated to clean up any tasks
|
77
|
+
that were not put into a completed state.
|
78
|
+
|
79
|
+
|
80
|
+
DEN NODE REGISTRATION PROTOCOL
|
81
|
+
|
82
|
+
Upon startup, each node must register itself to the Nodes collection. The
|
83
|
+
following elements must be included:
|
84
|
+
|
85
|
+
_id: ObjectId() // Node ID,
|
86
|
+
heartbeat: {...} // Heartbeat object (see
|
87
|
+
below)
|
88
|
+
version: "r-disbatch 1.21" // Engine version string
|
89
|
+
spec_version: "1.0" // DEN Specification
|
90
|
+
queuetypes [ { class: "Synacor::Migration::IMAP2IMAP",
|
91
|
+
version: '1.1a' }, .. ] // Array of plugins enabled
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
HEARTBEAT OBJECT
|
96
|
+
|
97
|
+
The heartbeat object includes status updates on both the engine, as well as any queues active.
|
98
|
+
|
99
|
+
datetime: ISODate("2011-05-12T06:00:07Z") // Heartbeat timestamp
|
100
|
+
pid: 1234 // System PID
|
101
|
+
queues: [...] // Queue heartbeat objects (see
|
102
|
+
below)
|
103
|
+
|
104
|
+
QUEUE HEARTBEAT OBJECT
|
105
|
+
|
106
|
+
All of this data is on a per-queue basis.
|
107
|
+
|
108
|
+
class: 'Synacor::Migration::IMAP2IMAP' // Plugin class
|
109
|
+
queue: ObjectId() // Queue reference
|
110
|
+
threads 50 // Thread count
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
FULL NODE JSON OBJECT EXAMPLE
|
115
|
+
|
116
|
+
{
|
117
|
+
_id: 'mig01',
|
118
|
+
version: 'Disbatch.pl v2.0.2',
|
119
|
+
den_version: '1.0',
|
120
|
+
queuetypes: [
|
121
|
+
{
|
122
|
+
class: 'Synacor::Migration::IMAP2IMAP',
|
123
|
+
version: '1.1a'
|
124
|
+
},
|
125
|
+
{
|
126
|
+
class: 'Synacor::Migration::Zimbra::UserImport',
|
127
|
+
version: '0.9'
|
128
|
+
}
|
129
|
+
],
|
130
|
+
heartbeat: {
|
131
|
+
datetime: ISODate("2011-05-12T06:00:07Z"),
|
132
|
+
|
133
|
+
pid: 34182,
|
134
|
+
queues: [
|
135
|
+
{
|
136
|
+
class: 'Synacor::Migration::IMAP2IMAP',
|
137
|
+
queue: ObjectId(),
|
138
|
+
threads: 50,
|
139
|
+
idle: 15,
|
140
|
+
processing: 35
|
141
|
+
}
|
142
|
+
]
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
|
147
|
+
QUEUE DOCUMENT SPECIFICATION
|
148
|
+
|
149
|
+
The DENs will read from the "queues" collection, and can expect the following
|
150
|
+
format:
|
151
|
+
|
152
|
+
{
|
153
|
+
_id: 'insight1' // Queue ID,
|
154
|
+
ctime: ISODate("2011-05-12T06:00:07Z"),// Create time
|
155
|
+
class: 'Synacor::Migration::IMAP2IMAP',// Class
|
156
|
+
description: 'Insight Primary Migration', // Description
|
157
|
+
maxthreads: 50, // Maximum per-node concurrency
|
158
|
+
|
159
|
+
nodes_pin: [ 'mig01', 'mig02' ], // Pin queue to these nodes (and no-where else)
|
160
|
+
nodes_ignore: [ 'mig03', 'mig04' ] // OR: ignore on these nodes
|
161
|
+
}
|
162
|
+
|
163
|
+
The "nodes_pin" and "nodes_ignore" attributes are mutually exclusive. If
|
164
|
+
using pin, it is exclusive to the specified nodes. If using ignore, it will
|
165
|
+
run on all nodes except for those specified. If both are erroneously
|
166
|
+
provided, nodes_pin takes precedence over nodes_ignore.
|
167
|
+
|
168
|
+
Queue IDs can either be automatically generated, or provided as a uniquely
|
169
|
+
named user parameter.
|
170
|
+
|
171
|
+
|
172
|
+
TASK DOCUMENT SPECIFICATION
|
173
|
+
|
174
|
+
Each task the DEN pulls will be in the following format:
|
175
|
+
|
176
|
+
{
|
177
|
+
_id: ObjectId('238d892efadee'),
|
178
|
+
queue: 'insight1',
|
179
|
+
ctime: ISODate("2011-05-12T06:00:07Z"),// Create time
|
180
|
+
mtime: ISODate("2011-05-12T06:00:07Z"),// Modify time
|
181
|
+
parameters: { username: 'foo@foo.com', host: 'md08.foo.com' }, // Parameters
|
182
|
+
stdout: '', // Your task output goes here
|
183
|
+
stderr: '', // Your task errors go here
|
184
|
+
status: -2, // Task status code (described below)
|
185
|
+
node: -1
|
186
|
+
}
|
187
|
+
|
188
|
+
TASK STATUS CODE
|
189
|
+
|
190
|
+
-4: Blocked
|
191
|
+
-3: Terminated
|
192
|
+
-2: Created
|
193
|
+
-1: Claimed
|
194
|
+
0: Running
|
195
|
+
1: Concluded
|
196
|
+
|
197
|
+
|
198
|
+
TASK LIFECYCLE
|
199
|
+
|
200
|
+
Each task is initialised with its node as -1 (unclaimed) and status as -2 (created).
|
201
|
+
|
202
|
+
DENs can claim tasks from a queue one of two ways:
|
203
|
+
|
204
|
+
1. findAndModify()
|
205
|
+
where: queue: <queue> and n
|
@@ -0,0 +1,128 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
|
+
<head>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
6
|
+
<title>
|
7
|
+
Exception: Disbatch::NoNodeError
|
8
|
+
|
9
|
+
— Documentation by YARD 0.7.3
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="../css/common.css" type="text/css" media="screen" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
relpath = '..';
|
19
|
+
if (relpath != '') relpath += '/';
|
20
|
+
</script>
|
21
|
+
|
22
|
+
<script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
|
23
|
+
|
24
|
+
<script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
|
25
|
+
|
26
|
+
|
27
|
+
</head>
|
28
|
+
<body>
|
29
|
+
<script type="text/javascript" charset="utf-8">
|
30
|
+
if (window.top.frames.main) document.body.className = 'frames';
|
31
|
+
</script>
|
32
|
+
|
33
|
+
<div id="header">
|
34
|
+
<div id="menu">
|
35
|
+
|
36
|
+
<a href="../_index.html">Index (N)</a> »
|
37
|
+
<span class='title'><span class='object_link'><a href="../Disbatch.html" title="Disbatch (module)">Disbatch</a></span></span>
|
38
|
+
»
|
39
|
+
<span class="title">NoNodeError</span>
|
40
|
+
|
41
|
+
|
42
|
+
<div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
|
43
|
+
</div>
|
44
|
+
|
45
|
+
<div id="search">
|
46
|
+
|
47
|
+
<a id="class_list_link" href="#">Class List</a>
|
48
|
+
|
49
|
+
<a id="method_list_link" href="#">Method List</a>
|
50
|
+
|
51
|
+
<a id="file_list_link" href="#">File List</a>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
<div class="clear"></div>
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<iframe id="search_frame"></iframe>
|
58
|
+
|
59
|
+
<div id="content"><h1>Exception: Disbatch::NoNodeError
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
</h1>
|
64
|
+
|
65
|
+
<dl class="box">
|
66
|
+
|
67
|
+
<dt class="r1">Inherits:</dt>
|
68
|
+
<dd class="r1">
|
69
|
+
<span class="inheritName">RuntimeError</span>
|
70
|
+
|
71
|
+
<ul class="fullTree">
|
72
|
+
<li>Object</li>
|
73
|
+
|
74
|
+
<li class="next">RuntimeError</li>
|
75
|
+
|
76
|
+
<li class="next">Disbatch::NoNodeError</li>
|
77
|
+
|
78
|
+
</ul>
|
79
|
+
<a href="#" class="inheritanceTree">show all</a>
|
80
|
+
|
81
|
+
</dd>
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
<dt class="r2 last">Defined in:</dt>
|
92
|
+
<dd class="r2 last">lib/disbatch/errors.rb</dd>
|
93
|
+
|
94
|
+
</dl>
|
95
|
+
<div class="clear"></div>
|
96
|
+
|
97
|
+
<h2>Overview</h2><div class="docstring">
|
98
|
+
<div class="discussion">
|
99
|
+
<p>
|
100
|
+
Raised when attempting to open an invalid node
|
101
|
+
</p>
|
102
|
+
|
103
|
+
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
<div class="tags">
|
107
|
+
|
108
|
+
|
109
|
+
</div>
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div id="footer">
|
122
|
+
Generated on Mon Oct 24 16:54:00 2011 by
|
123
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
124
|
+
0.7.3 (ruby-1.8.7).
|
125
|
+
</div>
|
126
|
+
|
127
|
+
</body>
|
128
|
+
</html>
|