wukong-storm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.rspec +3 -0
- data/Gemfile +8 -0
- data/README.md +31 -0
- data/Rakefile +7 -0
- data/bin/wu-storm +51 -0
- data/lib/wukong-storm/configuration.rb +676 -0
- data/lib/wukong-storm/runner.rb +45 -0
- data/lib/wukong-storm/version.rb +5 -0
- data/lib/wukong-storm.rb +3 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/support/examples.rb +27 -0
- data/spec/wu_storm_spec.rb +54 -0
- data/wu-storm.gemspec +22 -0
- metadata +86 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Wukong Storm
|
|
2
|
+
|
|
3
|
+
## Usage
|
|
4
|
+
|
|
5
|
+
The Wukong Storm plugin is very basic at the moment. It functions entirely over STDIN and STDOUT. Taken from the `wu-storm` executable:
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
usage: wu-storm PROCESSOR|FLOW [...--param=value...]
|
|
9
|
+
|
|
10
|
+
wu-storm is a commandline tool for running Wukong processors and flows in
|
|
11
|
+
a storm or trident topology.
|
|
12
|
+
|
|
13
|
+
wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
|
|
14
|
+
For example, when using an identity processor, wu-storm, given an event 'foo', will return
|
|
15
|
+
'foo|'. The '|' character is the specified End-Of-File delimiter.
|
|
16
|
+
|
|
17
|
+
If there is ever a suppressed error in pricessing, or a skipped record for any reason,
|
|
18
|
+
wu-storm will still respond with a '|', signifying an empty return event.
|
|
19
|
+
|
|
20
|
+
If there are multiple messages that have resulted from a single event, wu-storm will return
|
|
21
|
+
them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
Params:
|
|
25
|
+
-t, --delimiter=String The EOF specifier when returning events [Default: |]
|
|
26
|
+
-r, --run=String Name of the processor or dataflow to use. Defaults to basename of the given path
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## TODO
|
|
30
|
+
|
|
31
|
+
The configuration file has __all__ of the options for storm listed. Slowly translating into real Configliere options.
|
data/Rakefile
ADDED
data/bin/wu-storm
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'wukong-storm'
|
|
3
|
+
require 'configliere'
|
|
4
|
+
|
|
5
|
+
Settings.use(:commandline)
|
|
6
|
+
Settings.define :run, description: 'Name of the processor or dataflow to use. Defaults to basename of the given path', flag: 'r'
|
|
7
|
+
Settings.define :delimiter, description: 'The EOF specifier when returning events', default: '|', flag: 't'
|
|
8
|
+
|
|
9
|
+
def Settings.usage() "usage: #{File.basename($0)} PROCESSOR|FLOW [...--param=value...]" ; end
|
|
10
|
+
|
|
11
|
+
Settings.description = <<'EOF'
|
|
12
|
+
wu-storm is a commandline tool for running Wukong processors and flows in
|
|
13
|
+
a storm or trident topology.
|
|
14
|
+
|
|
15
|
+
wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
|
|
16
|
+
For example, when using an identity processor, wu-storm, given an event 'foo', will return
|
|
17
|
+
'foo|'. The '|' character is the specified End-Of-File delimiter.
|
|
18
|
+
|
|
19
|
+
If there is ever a suppressed error in pricessing, or a skipped record for any reason,
|
|
20
|
+
wu-storm will still respond with a '|', signifying an empty return event.
|
|
21
|
+
|
|
22
|
+
If there are multiple messages that have resulted from a single event, wu-storm will return
|
|
23
|
+
them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
|
|
24
|
+
EOF
|
|
25
|
+
|
|
26
|
+
Settings.resolve!
|
|
27
|
+
|
|
28
|
+
runnable = Settings.rest.first
|
|
29
|
+
|
|
30
|
+
case
|
|
31
|
+
when runnable.nil?
|
|
32
|
+
Settings.dump_help
|
|
33
|
+
exit(1)
|
|
34
|
+
when Wukong.registry.registered?(runnable.to_sym)
|
|
35
|
+
processor = runnable
|
|
36
|
+
when File.exist?(runnable)
|
|
37
|
+
load runnable
|
|
38
|
+
processor = Settings.run || File.basename(runnable, '.rb')
|
|
39
|
+
else
|
|
40
|
+
Settings.dump_help
|
|
41
|
+
exit(1)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
EM.run do
|
|
46
|
+
Wu::StormRunner.start(processor.to_sym, Settings)
|
|
47
|
+
end
|
|
48
|
+
rescue Wu::Error => e
|
|
49
|
+
$stderr.puts e.message
|
|
50
|
+
exit(1)
|
|
51
|
+
end
|
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
module Wukong
|
|
2
|
+
module Storm
|
|
3
|
+
|
|
4
|
+
Configuration = Configliere::Param.new unless defined? Configuration
|
|
5
|
+
|
|
6
|
+
Configuration.define :zookeepers_servers, description: 'storm.zookeeper.servers'
|
|
7
|
+
Configuration.define :zookeepers_port, description: 'storm.zookeeper.port'
|
|
8
|
+
Configuration.define :local_dir, description: 'storm.local.dir'
|
|
9
|
+
Configuration.define :scheduler, description: 'storm.scheduler'
|
|
10
|
+
Configuration.define :cluster_mode, description: 'storm.cluster.mode'
|
|
11
|
+
Configuration.define :local_hostname, description: 'storm.local.hostname'
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Whether or not to use ZeroMQ for messaging in local mode. If this is set
|
|
15
|
+
* to false, then Storm will use a pure-Java messaging system. The purpose
|
|
16
|
+
* of this flag is to make it easy to run Storm in local mode by eliminating
|
|
17
|
+
* the need for native dependencies, which can be difficult to install.
|
|
18
|
+
*
|
|
19
|
+
* Defaults to false.
|
|
20
|
+
*/
|
|
21
|
+
public static String STORM_LOCAL_MODE_ZMQ = "storm.local.mode.zmq";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* The root location at which Storm stores data in ZooKeeper.
|
|
25
|
+
*/
|
|
26
|
+
public static String STORM_ZOOKEEPER_ROOT = "storm.zookeeper.root";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* The session timeout for clients to ZooKeeper.
|
|
30
|
+
*/
|
|
31
|
+
public static String STORM_ZOOKEEPER_SESSION_TIMEOUT = "storm.zookeeper.session.timeout";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* The connection timeout for clients to ZooKeeper.
|
|
35
|
+
*/
|
|
36
|
+
public static String STORM_ZOOKEEPER_CONNECTION_TIMEOUT = "storm.zookeeper.connection.timeout";
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* The number of times to retry a Zookeeper operation.
|
|
41
|
+
*/
|
|
42
|
+
public static String STORM_ZOOKEEPER_RETRY_TIMES="storm.zookeeper.retry.times";
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* The interval between retries of a Zookeeper operation.
|
|
46
|
+
*/
|
|
47
|
+
public static String STORM_ZOOKEEPER_RETRY_INTERVAL="storm.zookeeper.retry.interval";
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* The Zookeeper authentication scheme to use, e.g. "digest". Defaults to no authentication.
|
|
51
|
+
*/
|
|
52
|
+
public static String STORM_ZOOKEEPER_AUTH_SCHEME="storm.zookeeper.auth.scheme";
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* A string representing the payload for Zookeeper authentication. It gets serialized using UTF-8 encoding during authentication.
|
|
56
|
+
*/
|
|
57
|
+
public static String STORM_ZOOKEEPER_AUTH_PAYLOAD="storm.zookeeper.auth.payload";
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* The id assigned to a running topology. The id is the storm name with a unique nonce appended.
|
|
61
|
+
*/
|
|
62
|
+
public static String STORM_ID = "storm.id";
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* The host that the master server is running on.
|
|
66
|
+
*/
|
|
67
|
+
public static String NIMBUS_HOST = "nimbus.host";
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Which port the Thrift interface of Nimbus should run on. Clients should
|
|
71
|
+
* connect to this port to upload jars and submit topologies.
|
|
72
|
+
*/
|
|
73
|
+
public static String NIMBUS_THRIFT_PORT = "nimbus.thrift.port";
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* This parameter is used by the storm-deploy project to configure the
|
|
78
|
+
* jvm options for the nimbus daemon.
|
|
79
|
+
*/
|
|
80
|
+
public static String NIMBUS_CHILDOPTS = "nimbus.childopts";
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* How long without heartbeating a task can go before nimbus will consider the
|
|
85
|
+
* task dead and reassign it to another location.
|
|
86
|
+
*/
|
|
87
|
+
public static String NIMBUS_TASK_TIMEOUT_SECS = "nimbus.task.timeout.secs";
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* How often nimbus should wake up to check heartbeats and do reassignments. Note
|
|
92
|
+
* that if a machine ever goes down Nimbus will immediately wake up and take action.
|
|
93
|
+
* This parameter is for checking for failures when there's no explicit event like that
|
|
94
|
+
* occuring.
|
|
95
|
+
*/
|
|
96
|
+
public static String NIMBUS_MONITOR_FREQ_SECS = "nimbus.monitor.freq.secs";
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* How often nimbus should wake the cleanup thread to clean the inbox.
|
|
100
|
+
* @see NIMBUS_INBOX_JAR_EXPIRATION_SECS
|
|
101
|
+
*/
|
|
102
|
+
public static String NIMBUS_CLEANUP_INBOX_FREQ_SECS = "nimbus.cleanup.inbox.freq.secs";
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* The length of time a jar file lives in the inbox before being deleted by the cleanup thread.
|
|
106
|
+
*
|
|
107
|
+
* Probably keep this value greater than or equal to NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS.
|
|
108
|
+
* Note that the time it takes to delete an inbox jar file is going to be somewhat more than
|
|
109
|
+
* NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS (depending on how often NIMBUS_CLEANUP_FREQ_SECS
|
|
110
|
+
* is set to).
|
|
111
|
+
* @see NIMBUS_CLEANUP_FREQ_SECS
|
|
112
|
+
*/
|
|
113
|
+
public static String NIMBUS_INBOX_JAR_EXPIRATION_SECS = "nimbus.inbox.jar.expiration.secs";
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* How long before a supervisor can go without heartbeating before nimbus considers it dead
|
|
117
|
+
* and stops assigning new work to it.
|
|
118
|
+
*/
|
|
119
|
+
public static String NIMBUS_SUPERVISOR_TIMEOUT_SECS = "nimbus.supervisor.timeout.secs";
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* A special timeout used when a task is initially launched. During launch, this is the timeout
|
|
123
|
+
* used until the first heartbeat, overriding nimbus.task.timeout.secs.
|
|
124
|
+
*
|
|
125
|
+
* <p>A separate timeout exists for launch because there can be quite a bit of overhead
|
|
126
|
+
* to launching new JVM's and configuring them.</p>
|
|
127
|
+
*/
|
|
128
|
+
public static String NIMBUS_TASK_LAUNCH_SECS = "nimbus.task.launch.secs";
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Whether or not nimbus should reassign tasks if it detects that a task goes down.
|
|
132
|
+
* Defaults to true, and it's not recommended to change this value.
|
|
133
|
+
*/
|
|
134
|
+
public static String NIMBUS_REASSIGN = "nimbus.reassign";
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* During upload/download with the master, how long an upload or download connection is idle
|
|
138
|
+
* before nimbus considers it dead and drops the connection.
|
|
139
|
+
*/
|
|
140
|
+
public static String NIMBUS_FILE_COPY_EXPIRATION_SECS = "nimbus.file.copy.expiration.secs";
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* A custom class that implements ITopologyValidator that is run whenever a
|
|
144
|
+
* topology is submitted. Can be used to provide business-specific logic for
|
|
145
|
+
* whether topologies are allowed to run or not.
|
|
146
|
+
*/
|
|
147
|
+
public static String NIMBUS_TOPOLOGY_VALIDATOR = "nimbus.topology.validator";
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Storm UI binds to this port.
|
|
152
|
+
*/
|
|
153
|
+
public static String UI_PORT = "ui.port";
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Childopts for Storm UI Java process.
|
|
157
|
+
*/
|
|
158
|
+
public static String UI_CHILDOPTS = "ui.childopts";
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* List of DRPC servers so that the DRPCSpout knows who to talk to.
|
|
163
|
+
*/
|
|
164
|
+
public static String DRPC_SERVERS = "drpc.servers";
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* This port is used by Storm DRPC for receiving DPRC requests from clients.
|
|
168
|
+
*/
|
|
169
|
+
public static String DRPC_PORT = "drpc.port";
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* This port on Storm DRPC is used by DRPC topologies to receive function invocations and send results back.
|
|
173
|
+
*/
|
|
174
|
+
public static String DRPC_INVOCATIONS_PORT = "drpc.invocations.port";
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* The timeout on DRPC requests within the DRPC server. Defaults to 10 minutes. Note that requests can also
|
|
178
|
+
* timeout based on the socket timeout on the DRPC client, and separately based on the topology message
|
|
179
|
+
* timeout for the topology implementing the DRPC function.
|
|
180
|
+
*/
|
|
181
|
+
public static String DRPC_REQUEST_TIMEOUT_SECS = "drpc.request.timeout.secs";
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* the metadata configed on the supervisor
|
|
185
|
+
*/
|
|
186
|
+
public static String SUPERVISOR_SCHEDULER_META = "supervisor.scheduler.meta";
|
|
187
|
+
/**
|
|
188
|
+
* A list of ports that can run workers on this supervisor. Each worker uses one port, and
|
|
189
|
+
* the supervisor will only run one worker per port. Use this configuration to tune
|
|
190
|
+
* how many workers run on each machine.
|
|
191
|
+
*/
|
|
192
|
+
public static String SUPERVISOR_SLOTS_PORTS = "supervisor.slots.ports";
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* This parameter is used by the storm-deploy project to configure the
|
|
198
|
+
* jvm options for the supervisor daemon.
|
|
199
|
+
*/
|
|
200
|
+
public static String SUPERVISOR_CHILDOPTS = "supervisor.childopts";
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* How long a worker can go without heartbeating before the supervisor tries to
|
|
205
|
+
* restart the worker process.
|
|
206
|
+
*/
|
|
207
|
+
public static String SUPERVISOR_WORKER_TIMEOUT_SECS = "supervisor.worker.timeout.secs";
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* How long a worker can go without heartbeating during the initial launch before
|
|
212
|
+
* the supervisor tries to restart the worker process. This value override
|
|
213
|
+
* supervisor.worker.timeout.secs during launch because there is additional
|
|
214
|
+
* overhead to starting and configuring the JVM on launch.
|
|
215
|
+
*/
|
|
216
|
+
public static String SUPERVISOR_WORKER_START_TIMEOUT_SECS = "supervisor.worker.start.timeout.secs";
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Whether or not the supervisor should launch workers assigned to it. Defaults
|
|
221
|
+
* to true -- and you should probably never change this value. This configuration
|
|
222
|
+
* is used in the Storm unit tests.
|
|
223
|
+
*/
|
|
224
|
+
public static String SUPERVISOR_ENABLE = "supervisor.enable";
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* how often the supervisor sends a heartbeat to the master.
|
|
229
|
+
*/
|
|
230
|
+
public static String SUPERVISOR_HEARTBEAT_FREQUENCY_SECS = "supervisor.heartbeat.frequency.secs";
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* How often the supervisor checks the worker heartbeats to see if any of them
|
|
235
|
+
* need to be restarted.
|
|
236
|
+
*/
|
|
237
|
+
public static String SUPERVISOR_MONITOR_FREQUENCY_SECS = "supervisor.monitor.frequency.secs";
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* The jvm opts provided to workers launched by this supervisor. All "%ID%" substrings are replaced
|
|
241
|
+
* with an identifier for this worker.
|
|
242
|
+
*/
|
|
243
|
+
public static String WORKER_CHILDOPTS = "worker.childopts";
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* How often this worker should heartbeat to the supervisor.
|
|
248
|
+
*/
|
|
249
|
+
public static String WORKER_HEARTBEAT_FREQUENCY_SECS = "worker.heartbeat.frequency.secs";
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* How often a task should heartbeat its status to the master.
|
|
253
|
+
*/
|
|
254
|
+
public static String TASK_HEARTBEAT_FREQUENCY_SECS = "task.heartbeat.frequency.secs";
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* How often a task should sync its connections with other tasks (if a task is
|
|
259
|
+
* reassigned, the other tasks sending messages to it need to refresh their connections).
|
|
260
|
+
* In general though, when a reassignment happens other tasks will be notified
|
|
261
|
+
* almost immediately. This configuration is here just in case that notification doesn't
|
|
262
|
+
* come through.
|
|
263
|
+
*/
|
|
264
|
+
public static String TASK_REFRESH_POLL_SECS = "task.refresh.poll.secs";
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* True if Storm should timeout messages or not. Defaults to true. This is meant to be used
|
|
270
|
+
* in unit tests to prevent tuples from being accidentally timed out during the test.
|
|
271
|
+
*/
|
|
272
|
+
public static String TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS = "topology.enable.message.timeouts";
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* When set to true, Storm will log every message that's emitted.
|
|
276
|
+
*/
|
|
277
|
+
public static String TOPOLOGY_DEBUG = "topology.debug";
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Whether or not the master should optimize topologies by running multiple
|
|
282
|
+
* tasks in a single thread where appropriate.
|
|
283
|
+
*/
|
|
284
|
+
public static String TOPOLOGY_OPTIMIZE = "topology.optimize";
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* How many processes should be spawned around the cluster to execute this
|
|
288
|
+
* topology. Each process will execute some number of tasks as threads within
|
|
289
|
+
* them. This parameter should be used in conjunction with the parallelism hints
|
|
290
|
+
* on each component in the topology to tune the performance of a topology.
|
|
291
|
+
*/
|
|
292
|
+
public static String TOPOLOGY_WORKERS = "topology.workers";
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* How many instances to create for a spout/bolt. A task runs on a thread with zero or more
|
|
296
|
+
* other tasks for the same spout/bolt. The number of tasks for a spout/bolt is always
|
|
297
|
+
* the same throughout the lifetime of a topology, but the number of executors (threads) for
|
|
298
|
+
* a spout/bolt can change over time. This allows a topology to scale to more or less resources
|
|
299
|
+
* without redeploying the topology or violating the constraints of Storm (such as a fields grouping
|
|
300
|
+
* guaranteeing that the same value goes to the same task).
|
|
301
|
+
*/
|
|
302
|
+
public static String TOPOLOGY_TASKS = "topology.tasks";
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* How many executors to spawn for ackers.
|
|
306
|
+
*
|
|
307
|
+
* <p>If this is set to 0, then Storm will immediately ack tuples as soon
|
|
308
|
+
* as they come off the spout, effectively disabling reliability.</p>
|
|
309
|
+
*/
|
|
310
|
+
public static String TOPOLOGY_ACKER_EXECUTORS = "topology.acker.executors";
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* The maximum amount of time given to the topology to fully process a message
|
|
315
|
+
* emitted by a spout. If the message is not acked within this time frame, Storm
|
|
316
|
+
* will fail the message on the spout. Some spouts implementations will then replay
|
|
317
|
+
* the message at a later time.
|
|
318
|
+
*/
|
|
319
|
+
public static String TOPOLOGY_MESSAGE_TIMEOUT_SECS = "topology.message.timeout.secs";
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* A list of serialization registrations for Kryo ( http://code.google.com/p/kryo/ ),
|
|
323
|
+
* the underlying serialization framework for Storm. A serialization can either
|
|
324
|
+
* be the name of a class (in which case Kryo will automatically create a serializer for the class
|
|
325
|
+
* that saves all the object's fields), or an implementation of com.esotericsoftware.kryo.Serializer.
|
|
326
|
+
*
|
|
327
|
+
* See Kryo's documentation for more information about writing custom serializers.
|
|
328
|
+
*/
|
|
329
|
+
public static String TOPOLOGY_KRYO_REGISTER = "topology.kryo.register";
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* A list of classes that customize storm's kryo instance during start-up.
|
|
333
|
+
* Each listed class name must implement IKryoDecorator. During start-up the
|
|
334
|
+
* listed class is instantiated with 0 arguments, then its 'decorate' method
|
|
335
|
+
* is called with storm's kryo instance as the only argument.
|
|
336
|
+
*/
|
|
337
|
+
public static String TOPOLOGY_KRYO_DECORATORS = "topology.kryo.decorators";
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Class that specifies how to create a Kryo instance for serialization. Storm will then apply
|
|
341
|
+
* topology.kryo.register and topology.kryo.decorators on top of this. The default implementation
|
|
342
|
+
* implements topology.fall.back.on.java.serialization and turns references off.
|
|
343
|
+
*/
|
|
344
|
+
public static String TOPOLOGY_KRYO_FACTORY = "topology.kryo.factory";
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Whether or not Storm should skip the loading of kryo registrations for which it
|
|
349
|
+
* does not know the class or have the serializer implementation. Otherwise, the task will
|
|
350
|
+
* fail to load and will throw an error at runtime. The use case of this is if you want to
|
|
351
|
+
* declare your serializations on the storm.yaml files on the cluster rather than every single
|
|
352
|
+
* time you submit a topology. Different applications may use different serializations and so
|
|
353
|
+
* a single application may not have the code for the other serializers used by other apps.
|
|
354
|
+
* By setting this config to true, Storm will ignore that it doesn't have those other serializations
|
|
355
|
+
* rather than throw an error.
|
|
356
|
+
*/
|
|
357
|
+
public static String TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS= "topology.skip.missing.kryo.registrations";
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* The maximum parallelism allowed for a component in this topology. This configuration is
|
|
362
|
+
* typically used in testing to limit the number of threads spawned in local mode.
|
|
363
|
+
*/
|
|
364
|
+
public static String TOPOLOGY_MAX_TASK_PARALLELISM="topology.max.task.parallelism";
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* The maximum number of tuples that can be pending on a spout task at any given time.
|
|
369
|
+
* This config applies to individual tasks, not to spouts or topologies as a whole.
|
|
370
|
+
*
|
|
371
|
+
* A pending tuple is one that has been emitted from a spout but has not been acked or failed yet.
|
|
372
|
+
* Note that this config parameter has no effect for unreliable spouts that don't tag
|
|
373
|
+
* their tuples with a message id.
|
|
374
|
+
*/
|
|
375
|
+
public static String TOPOLOGY_MAX_SPOUT_PENDING="topology.max.spout.pending";
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* A class that implements a strategy for what to do when a spout needs to wait. Waiting is
|
|
379
|
+
* triggered in one of two conditions:
|
|
380
|
+
*
|
|
381
|
+
* 1. nextTuple emits no tuples
|
|
382
|
+
* 2. The spout has hit maxSpoutPending and can't emit any more tuples
|
|
383
|
+
*/
|
|
384
|
+
public static String TOPOLOGY_SPOUT_WAIT_STRATEGY="topology.spout.wait.strategy";
|
|
385
|
+
|
|
386
|
+
/**
|
|
387
|
+
* The amount of milliseconds the SleepEmptyEmitStrategy should sleep for.
|
|
388
|
+
*/
|
|
389
|
+
public static String TOPOLOGY_SLEEP_SPOUT_WAIT_STRATEGY_TIME_MS="topology.sleep.spout.wait.strategy.time.ms";
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* The maximum amount of time a component gives a source of state to synchronize before it requests
|
|
393
|
+
* synchronization again.
|
|
394
|
+
*/
|
|
395
|
+
public static String TOPOLOGY_STATE_SYNCHRONIZATION_TIMEOUT_SECS="topology.state.synchronization.timeout.secs";
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* The percentage of tuples to sample to produce stats for a task.
|
|
399
|
+
*/
|
|
400
|
+
public static String TOPOLOGY_STATS_SAMPLE_RATE="topology.stats.sample.rate";
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Whether or not to use Java serialization in a topology.
|
|
404
|
+
*/
|
|
405
|
+
public static String TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION="topology.fall.back.on.java.serialization";
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Topology-specific options for the worker child process. This is used in addition to WORKER_CHILDOPTS.
|
|
409
|
+
*/
|
|
410
|
+
public static String TOPOLOGY_WORKER_CHILDOPTS="topology.worker.childopts";
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* This config is available for TransactionalSpouts, and contains the id ( a String) for
|
|
414
|
+
* the transactional topology. This id is used to store the state of the transactional
|
|
415
|
+
* topology in Zookeeper.
|
|
416
|
+
*/
|
|
417
|
+
public static String TOPOLOGY_TRANSACTIONAL_ID="topology.transactional.id";
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* A list of task hooks that are automatically added to every spout and bolt in the topology. An example
|
|
421
|
+
* of when you'd do this is to add a hook that integrates with your internal
|
|
422
|
+
* monitoring system. These hooks are instantiated using the zero-arg constructor.
|
|
423
|
+
*/
|
|
424
|
+
public static String TOPOLOGY_AUTO_TASK_HOOKS="topology.auto.task.hooks";
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* The size of the Disruptor receive queue for each executor. Must be a power of 2.
|
|
429
|
+
*/
|
|
430
|
+
public static String TOPOLOGY_EXECUTOR_RECEIVE_BUFFER_SIZE="topology.executor.receive.buffer.size";
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* The maximum number of messages to batch from the thread receiving off the network to the
|
|
434
|
+
* executor queues. Must be a power of 2.
|
|
435
|
+
*/
|
|
436
|
+
public static String TOPOLOGY_RECEIVER_BUFFER_SIZE="topology.receiver.buffer.size";
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* The size of the Disruptor send queue for each executor. Must be a power of 2.
|
|
440
|
+
*/
|
|
441
|
+
public static String TOPOLOGY_EXECUTOR_SEND_BUFFER_SIZE="topology.executor.send.buffer.size";
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* The size of the Disruptor transfer queue for each worker.
|
|
445
|
+
*/
|
|
446
|
+
public static String TOPOLOGY_TRANSFER_BUFFER_SIZE="topology.transfer.buffer.size";
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* How often a tick tuple from the "__system" component and "__tick" stream should be sent
|
|
450
|
+
* to tasks. Meant to be used as a component-specific configuration.
|
|
451
|
+
*/
|
|
452
|
+
public static String TOPOLOGY_TICK_TUPLE_FREQ_SECS="topology.tick.tuple.freq.secs";
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Configure the wait strategy used for internal queuing. Can be used to tradeoff latency
|
|
457
|
+
* vs. throughput
|
|
458
|
+
*/
|
|
459
|
+
public static String TOPOLOGY_DISRUPTOR_WAIT_STRATEGY="topology.disruptor.wait.strategy";
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed
|
|
463
|
+
* via the TopologyContext.
|
|
464
|
+
*/
|
|
465
|
+
public static String TOPOLOGY_WORKER_SHARED_THREAD_POOL_SIZE="topology.worker.shared.thread.pool.size";
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example,
|
|
469
|
+
* an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be
|
|
470
|
+
* reported to Zookeeper per task for every 10 second interval of time.
|
|
471
|
+
*/
|
|
472
|
+
public static String TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS="topology.error.throttle.interval.secs";
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* See doc for TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS
|
|
476
|
+
*/
|
|
477
|
+
public static String TOPOLOGY_MAX_ERROR_REPORT_PER_INTERVAL="topology.max.error.report.per.interval";
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* How often a batch can be emitted in a Trident topology.
|
|
482
|
+
*/
|
|
483
|
+
public static String TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS="topology.trident.batch.emit.interval.millis";
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Name of the topology. This config is automatically set by Storm when the topology is submitted.
|
|
487
|
+
*/
|
|
488
|
+
public static String TOPOLOGY_NAME="topology.name";
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* The root directory in ZooKeeper for metadata about TransactionalSpouts.
|
|
492
|
+
*/
|
|
493
|
+
public static String TRANSACTIONAL_ZOOKEEPER_ROOT="transactional.zookeeper.root";
|
|
494
|
+
|
|
495
|
+
/**
|
|
496
|
+
* The list of zookeeper servers in which to keep the transactional state. If null (which is default),
|
|
497
|
+
* will use storm.zookeeper.servers
|
|
498
|
+
*/
|
|
499
|
+
public static String TRANSACTIONAL_ZOOKEEPER_SERVERS="transactional.zookeeper.servers";
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* The port to use to connect to the transactional zookeeper servers. If null (which is default),
|
|
503
|
+
* will use storm.zookeeper.port
|
|
504
|
+
*/
|
|
505
|
+
public static String TRANSACTIONAL_ZOOKEEPER_PORT="transactional.zookeeper.port";
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* The number of threads that should be used by the zeromq context in each worker process.
|
|
509
|
+
*/
|
|
510
|
+
public static String ZMQ_THREADS = "zmq.threads";
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* How long a connection should retry sending messages to a target host when
|
|
514
|
+
* the connection is closed. This is an advanced configuration and can almost
|
|
515
|
+
* certainly be ignored.
|
|
516
|
+
*/
|
|
517
|
+
public static String ZMQ_LINGER_MILLIS = "zmq.linger.millis";
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* The high water for the ZeroMQ push sockets used for networking. Use this config to prevent buffer explosion
|
|
521
|
+
* on the networking layer.
|
|
522
|
+
*/
|
|
523
|
+
public static String ZMQ_HWM = "zmq.hwm";
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* This value is passed to spawned JVMs (e.g., Nimbus, Supervisor, and Workers)
|
|
527
|
+
* for the java.library.path value. java.library.path tells the JVM where
|
|
528
|
+
* to look for native libraries. It is necessary to set this config correctly since
|
|
529
|
+
* Storm uses the ZeroMQ and JZMQ native libs.
|
|
530
|
+
*/
|
|
531
|
+
public static String JAVA_LIBRARY_PATH = "java.library.path";
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* The path to use as the zookeeper dir when running a zookeeper server via
|
|
535
|
+
* "storm dev-zookeeper". This zookeeper instance is only intended for development;
|
|
536
|
+
* it is not a production grade zookeeper setup.
|
|
537
|
+
*/
|
|
538
|
+
public static String DEV_ZOOKEEPER_PATH = "dev.zookeeper.path";
|
|
539
|
+
|
|
540
|
+
public static void setDebug(Map conf, boolean isOn) {
|
|
541
|
+
conf.put(Config.TOPOLOGY_DEBUG, isOn);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
public void setDebug(boolean isOn) {
|
|
545
|
+
setDebug(this, isOn);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
@Deprecated
|
|
549
|
+
public void setOptimize(boolean isOn) {
|
|
550
|
+
put(Config.TOPOLOGY_OPTIMIZE, isOn);
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
public static void setNumWorkers(Map conf, int workers) {
|
|
554
|
+
conf.put(Config.TOPOLOGY_WORKERS, workers);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
public void setNumWorkers(int workers) {
|
|
558
|
+
setNumWorkers(this, workers);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
public static void setNumAckers(Map conf, int numExecutors) {
|
|
562
|
+
conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, numExecutors);
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
public void setNumAckers(int numExecutors) {
|
|
566
|
+
setNumAckers(this, numExecutors);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
public static void setMessageTimeoutSecs(Map conf, int secs) {
|
|
570
|
+
conf.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, secs);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
public void setMessageTimeoutSecs(int secs) {
|
|
574
|
+
setMessageTimeoutSecs(this, secs);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
public static void registerSerialization(Map conf, Class klass) {
|
|
578
|
+
getRegisteredSerializations(conf).add(klass.getName());
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
public void registerSerialization(Class klass) {
|
|
582
|
+
registerSerialization(this, klass);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
public static void registerSerialization(Map conf, Class klass, Class<? extends Serializer> serializerClass) {
|
|
586
|
+
Map<String, String> register = new HashMap<String, String>();
|
|
587
|
+
register.put(klass.getName(), serializerClass.getName());
|
|
588
|
+
getRegisteredSerializations(conf).add(register);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
public void registerSerialization(Class klass, Class<? extends Serializer> serializerClass) {
|
|
592
|
+
registerSerialization(this, klass, serializerClass);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
public static void registerDecorator(Map conf, Class<? extends IKryoDecorator> klass) {
|
|
596
|
+
getRegisteredDecorators(conf).add(klass.getName());
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
public void registerDecorator(Class<? extends IKryoDecorator> klass) {
|
|
600
|
+
registerDecorator(this, klass);
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
public static void setKryoFactory(Map conf, Class<? extends IKryoFactory> klass) {
|
|
604
|
+
conf.put(Config.TOPOLOGY_KRYO_FACTORY, klass.getName());
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
public void setKryoFactory(Class<? extends IKryoFactory> klass) {
|
|
608
|
+
setKryoFactory(this, klass);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
public static void setSkipMissingKryoRegistrations(Map conf, boolean skip) {
|
|
612
|
+
conf.put(Config.TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS, skip);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
public void setSkipMissingKryoRegistrations(boolean skip) {
|
|
616
|
+
setSkipMissingKryoRegistrations(this, skip);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
public static void setMaxTaskParallelism(Map conf, int max) {
|
|
620
|
+
conf.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, max);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
public void setMaxTaskParallelism(int max) {
|
|
624
|
+
setMaxTaskParallelism(this, max);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
public static void setMaxSpoutPending(Map conf, int max) {
|
|
628
|
+
conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, max);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
public void setMaxSpoutPending(int max) {
|
|
632
|
+
setMaxSpoutPending(this, max);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
public static void setStatsSampleRate(Map conf, double rate) {
|
|
636
|
+
conf.put(Config.TOPOLOGY_STATS_SAMPLE_RATE, rate);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
public void setStatsSampleRate(double rate) {
|
|
640
|
+
setStatsSampleRate(this, rate);
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
public static void setFallBackOnJavaSerialization(Map conf, boolean fallback) {
|
|
644
|
+
conf.put(Config.TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION, fallback);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
public void setFallBackOnJavaSerialization(boolean fallback) {
|
|
648
|
+
setFallBackOnJavaSerialization(this, fallback);
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
private static List getRegisteredSerializations(Map conf) {
|
|
652
|
+
List ret;
|
|
653
|
+
if(!conf.containsKey(Config.TOPOLOGY_KRYO_REGISTER)) {
|
|
654
|
+
ret = new ArrayList();
|
|
655
|
+
} else {
|
|
656
|
+
ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_REGISTER));
|
|
657
|
+
}
|
|
658
|
+
conf.put(Config.TOPOLOGY_KRYO_REGISTER, ret);
|
|
659
|
+
return ret;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
private static List getRegisteredDecorators(Map conf) {
|
|
663
|
+
List ret;
|
|
664
|
+
if(!conf.containsKey(Config.TOPOLOGY_KRYO_DECORATORS)) {
|
|
665
|
+
ret = new ArrayList();
|
|
666
|
+
} else {
|
|
667
|
+
ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_DECORATORS));
|
|
668
|
+
}
|
|
669
|
+
conf.put(Config.TOPOLOGY_KRYO_DECORATORS, ret);
|
|
670
|
+
return ret;
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
end
|
|
676
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module Wukong
|
|
2
|
+
class StormRunner < EM::P::LineAndTextProtocol
|
|
3
|
+
include DriverMethods
|
|
4
|
+
|
|
5
|
+
attr_accessor :dataflow, :settings
|
|
6
|
+
|
|
7
|
+
def self.start(label, settings = {})
|
|
8
|
+
EM.attach($stdin, self, label, settings)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(label, settings)
|
|
12
|
+
super
|
|
13
|
+
@settings = settings
|
|
14
|
+
@dataflow = construct_dataflow(label, settings)
|
|
15
|
+
@messages = []
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def post_init
|
|
19
|
+
setup_dataflow
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def receive_line line
|
|
23
|
+
driver.send_through_dataflow(line)
|
|
24
|
+
send_messages
|
|
25
|
+
rescue => e
|
|
26
|
+
$stderr.puts e.message
|
|
27
|
+
EM.stop
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def send_messages
|
|
31
|
+
$stdout.write(@messages.join("\n") + settings.delimiter)
|
|
32
|
+
$stdout.flush
|
|
33
|
+
@messages.clear
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def unbind
|
|
37
|
+
EM.stop
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def setup() ; end
|
|
41
|
+
def process(record) @messages << record ; end
|
|
42
|
+
def stop() ; end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/wukong-storm.rb
ADDED
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Wukong.processor(:simple) do
|
|
2
|
+
def process(record)
|
|
3
|
+
yield record
|
|
4
|
+
end
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
Wukong.processor(:skipped) do
|
|
8
|
+
def process(record)
|
|
9
|
+
# skip records
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
Wukong.processor(:multi) do
|
|
14
|
+
def process(record)
|
|
15
|
+
3.times{ yield record }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
Wukong.processor(:test_example) do
|
|
20
|
+
def process(record)
|
|
21
|
+
yield "I raised the #{record['foo']}"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
Wukong.dataflow(:flow) do
|
|
26
|
+
from_json | test_example
|
|
27
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
Wu.processor(:test) do
|
|
4
|
+
|
|
5
|
+
def process(record)
|
|
6
|
+
# do nothing
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
describe 'wu-storm' do
|
|
12
|
+
let(:examples) { File.expand_path('../support/examples.rb', __FILE__) }
|
|
13
|
+
|
|
14
|
+
context 'without any arguments' do
|
|
15
|
+
subject { command 'wu-storm' }
|
|
16
|
+
it { should exit_with(:non_zero) }
|
|
17
|
+
it { should have_stderr('usage: wu-storm') }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
context 'with a simple processor' do
|
|
21
|
+
let(:input) { 'one event' }
|
|
22
|
+
subject { command('wu-storm', examples, '--run=simple') < input }
|
|
23
|
+
it { should exit_with(0) }
|
|
24
|
+
it { should have_stdout('one event|') }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
context 'with a skipped processor' do
|
|
28
|
+
let(:input) { 'never see this' }
|
|
29
|
+
subject { command('wu-storm', examples, '--run=skipped') < input }
|
|
30
|
+
it { should exit_with(0) }
|
|
31
|
+
it { should have_stdout('|') }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
context 'with a duplicating processor' do
|
|
35
|
+
let(:input) { 'foo' }
|
|
36
|
+
subject { command('wu-storm', examples, '--run=multi') < input }
|
|
37
|
+
it { should exit_with(0) }
|
|
38
|
+
it { should have_stdout("foo\nfoo\nfoo|") }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
context 'with a flow' do
|
|
42
|
+
let(:input) { '{"foo":"bar"}' }
|
|
43
|
+
subject { command('wu-storm', examples, '--run=flow') < input }
|
|
44
|
+
it { should exit_with(0) }
|
|
45
|
+
it { should have_stdout('I raised the bar|') }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
context 'with multiple arguments' do
|
|
49
|
+
let(:input) { "foo\nbar\nbaz" }
|
|
50
|
+
subject { command('wu-storm', examples, '--run=simple') < input }
|
|
51
|
+
it { should exit_with(0) }
|
|
52
|
+
it { should have_stdout('foo|bar|baz|') }
|
|
53
|
+
end
|
|
54
|
+
end
|
data/wu-storm.gemspec
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
require File.expand_path('../lib/wukong-storm/version', __FILE__)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |gem|
|
|
5
|
+
gem.name = 'wukong-storm'
|
|
6
|
+
gem.homepage = 'https://github.com/infochimps-labs/wukong-storm'
|
|
7
|
+
gem.licenses = ["Apache 2.0"]
|
|
8
|
+
gem.email = 'coders@infochimps.org'
|
|
9
|
+
gem.authors = ['Infochimps', 'Travis Dempsey']
|
|
10
|
+
gem.version = Wukong::Storm::VERSION
|
|
11
|
+
|
|
12
|
+
gem.summary = 'Storm processing for Ruby'
|
|
13
|
+
gem.description = <<-EOF
|
|
14
|
+
EOF
|
|
15
|
+
|
|
16
|
+
gem.files = `git ls-files`.split("\n")
|
|
17
|
+
gem.executables = ['wu-storm']
|
|
18
|
+
gem.test_files = gem.files.grep(/^spec/)
|
|
19
|
+
gem.require_paths = ['lib']
|
|
20
|
+
|
|
21
|
+
gem.add_dependency('wukong', '3.0.0.pre3')
|
|
22
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: wukong-storm
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Infochimps
|
|
9
|
+
- Travis Dempsey
|
|
10
|
+
autorequire:
|
|
11
|
+
bindir: bin
|
|
12
|
+
cert_chain: []
|
|
13
|
+
date: 2012-12-17 00:00:00.000000000 Z
|
|
14
|
+
dependencies:
|
|
15
|
+
- !ruby/object:Gem::Dependency
|
|
16
|
+
name: wukong
|
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
|
18
|
+
none: false
|
|
19
|
+
requirements:
|
|
20
|
+
- - '='
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 3.0.0.pre3
|
|
23
|
+
type: :runtime
|
|
24
|
+
prerelease: false
|
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
26
|
+
none: false
|
|
27
|
+
requirements:
|
|
28
|
+
- - '='
|
|
29
|
+
- !ruby/object:Gem::Version
|
|
30
|
+
version: 3.0.0.pre3
|
|
31
|
+
description: ''
|
|
32
|
+
email: coders@infochimps.org
|
|
33
|
+
executables:
|
|
34
|
+
- wu-storm
|
|
35
|
+
extensions: []
|
|
36
|
+
extra_rdoc_files: []
|
|
37
|
+
files:
|
|
38
|
+
- .gitignore
|
|
39
|
+
- .rspec
|
|
40
|
+
- Gemfile
|
|
41
|
+
- README.md
|
|
42
|
+
- Rakefile
|
|
43
|
+
- bin/wu-storm
|
|
44
|
+
- lib/wukong-storm.rb
|
|
45
|
+
- lib/wukong-storm/configuration.rb
|
|
46
|
+
- lib/wukong-storm/runner.rb
|
|
47
|
+
- lib/wukong-storm/version.rb
|
|
48
|
+
- spec/spec_helper.rb
|
|
49
|
+
- spec/support/examples.rb
|
|
50
|
+
- spec/wu_storm_spec.rb
|
|
51
|
+
- wu-storm.gemspec
|
|
52
|
+
homepage: https://github.com/infochimps-labs/wukong-storm
|
|
53
|
+
licenses:
|
|
54
|
+
- Apache 2.0
|
|
55
|
+
post_install_message:
|
|
56
|
+
rdoc_options: []
|
|
57
|
+
require_paths:
|
|
58
|
+
- lib
|
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
60
|
+
none: false
|
|
61
|
+
requirements:
|
|
62
|
+
- - ! '>='
|
|
63
|
+
- !ruby/object:Gem::Version
|
|
64
|
+
version: '0'
|
|
65
|
+
segments:
|
|
66
|
+
- 0
|
|
67
|
+
hash: 1144670354774271812
|
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
|
+
none: false
|
|
70
|
+
requirements:
|
|
71
|
+
- - ! '>='
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: '0'
|
|
74
|
+
segments:
|
|
75
|
+
- 0
|
|
76
|
+
hash: 1144670354774271812
|
|
77
|
+
requirements: []
|
|
78
|
+
rubyforge_project:
|
|
79
|
+
rubygems_version: 1.8.24
|
|
80
|
+
signing_key:
|
|
81
|
+
specification_version: 3
|
|
82
|
+
summary: Storm processing for Ruby
|
|
83
|
+
test_files:
|
|
84
|
+
- spec/spec_helper.rb
|
|
85
|
+
- spec/support/examples.rb
|
|
86
|
+
- spec/wu_storm_spec.rb
|