wukong-storm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg/
2
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --drb
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source :rubygems
2
+
3
+ gemspec
4
+
5
+ group :development do
6
+ gem 'rake', '~> 0.9'
7
+ gem 'rspec', '~> 2'
8
+ end
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Wukong Storm
2
+
3
+ ## Usage
4
+
5
+ The Wukong Storm plugin is very basic at the moment. It functions entirely over STDIN and STDOUT. Taken from the `wu-storm` executable:
6
+
7
+ ```
8
+ usage: wu-storm PROCESSOR|FLOW [...--param=value...]
9
+
10
+ wu-storm is a commandline tool for running Wukong processors and flows in
11
+ a storm or trident topology.
12
+
13
+ wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
14
+ For example, when using an identity processor, wu-storm, given an event 'foo', will return
15
+ 'foo|'. The '|' character is the specified End-Of-File delimiter.
16
+
17
+ If there is ever a suppressed error in pricessing, or a skipped record for any reason,
18
+ wu-storm will still respond with a '|', signifying an empty return event.
19
+
20
+ If there are multiple messages that have resulted from a single event, wu-storm will return
21
+ them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
22
+
23
+
24
+ Params:
25
+ -t, --delimiter=String The EOF specifier when returning events [Default: |]
26
+ -r, --run=String Name of the processor or dataflow to use. Defaults to basename of the given path
27
+ ```
28
+
29
+ ## TODO
30
+
31
+ The configuration file has __all__ of the options for storm listed. Slowly translating into real Configliere options.
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:rspec)
6
+
7
+ task :default => [:rspec]
data/bin/wu-storm ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ require 'wukong-storm'
3
+ require 'configliere'
4
+
5
+ Settings.use(:commandline)
6
+ Settings.define :run, description: 'Name of the processor or dataflow to use. Defaults to basename of the given path', flag: 'r'
7
+ Settings.define :delimiter, description: 'The EOF specifier when returning events', default: '|', flag: 't'
8
+
9
+ def Settings.usage() "usage: #{File.basename($0)} PROCESSOR|FLOW [...--param=value...]" ; end
10
+
11
+ Settings.description = <<'EOF'
12
+ wu-storm is a commandline tool for running Wukong processors and flows in
13
+ a storm or trident topology.
14
+
15
+ wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
16
+ For example, when using an identity processor, wu-storm, given an event 'foo', will return
17
+ 'foo|'. The '|' character is the specified End-Of-File delimiter.
18
+
19
+ If there is ever a suppressed error in pricessing, or a skipped record for any reason,
20
+ wu-storm will still respond with a '|', signifying an empty return event.
21
+
22
+ If there are multiple messages that have resulted from a single event, wu-storm will return
23
+ them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
24
+ EOF
25
+
26
+ Settings.resolve!
27
+
28
+ runnable = Settings.rest.first
29
+
30
+ case
31
+ when runnable.nil?
32
+ Settings.dump_help
33
+ exit(1)
34
+ when Wukong.registry.registered?(runnable.to_sym)
35
+ processor = runnable
36
+ when File.exist?(runnable)
37
+ load runnable
38
+ processor = Settings.run || File.basename(runnable, '.rb')
39
+ else
40
+ Settings.dump_help
41
+ exit(1)
42
+ end
43
+
44
+ begin
45
+ EM.run do
46
+ Wu::StormRunner.start(processor.to_sym, Settings)
47
+ end
48
+ rescue Wu::Error => e
49
+ $stderr.puts e.message
50
+ exit(1)
51
+ end
@@ -0,0 +1,676 @@
1
+ module Wukong
2
+ module Storm
3
+
4
+ Configuration = Configliere::Param.new unless defined? Configuration
5
+
6
+ Configuration.define :zookeepers_servers, description: 'storm.zookeeper.servers'
7
+ Configuration.define :zookeepers_port, description: 'storm.zookeeper.port'
8
+ Configuration.define :local_dir, description: 'storm.local.dir'
9
+ Configuration.define :scheduler, description: 'storm.scheduler'
10
+ Configuration.define :cluster_mode, description: 'storm.cluster.mode'
11
+ Configuration.define :local_hostname, description: 'storm.local.hostname'
12
+
13
+ /**
14
+ * Whether or not to use ZeroMQ for messaging in local mode. If this is set
15
+ * to false, then Storm will use a pure-Java messaging system. The purpose
16
+ * of this flag is to make it easy to run Storm in local mode by eliminating
17
+ * the need for native dependencies, which can be difficult to install.
18
+ *
19
+ * Defaults to false.
20
+ */
21
+ public static String STORM_LOCAL_MODE_ZMQ = "storm.local.mode.zmq";
22
+
23
+ /**
24
+ * The root location at which Storm stores data in ZooKeeper.
25
+ */
26
+ public static String STORM_ZOOKEEPER_ROOT = "storm.zookeeper.root";
27
+
28
+ /**
29
+ * The session timeout for clients to ZooKeeper.
30
+ */
31
+ public static String STORM_ZOOKEEPER_SESSION_TIMEOUT = "storm.zookeeper.session.timeout";
32
+
33
+ /**
34
+ * The connection timeout for clients to ZooKeeper.
35
+ */
36
+ public static String STORM_ZOOKEEPER_CONNECTION_TIMEOUT = "storm.zookeeper.connection.timeout";
37
+
38
+
39
+ /**
40
+ * The number of times to retry a Zookeeper operation.
41
+ */
42
+ public static String STORM_ZOOKEEPER_RETRY_TIMES="storm.zookeeper.retry.times";
43
+
44
+ /**
45
+ * The interval between retries of a Zookeeper operation.
46
+ */
47
+ public static String STORM_ZOOKEEPER_RETRY_INTERVAL="storm.zookeeper.retry.interval";
48
+
49
+ /**
50
+ * The Zookeeper authentication scheme to use, e.g. "digest". Defaults to no authentication.
51
+ */
52
+ public static String STORM_ZOOKEEPER_AUTH_SCHEME="storm.zookeeper.auth.scheme";
53
+
54
+ /**
55
+ * A string representing the payload for Zookeeper authentication. It gets serialized using UTF-8 encoding during authentication.
56
+ */
57
+ public static String STORM_ZOOKEEPER_AUTH_PAYLOAD="storm.zookeeper.auth.payload";
58
+
59
+ /**
60
+ * The id assigned to a running topology. The id is the storm name with a unique nonce appended.
61
+ */
62
+ public static String STORM_ID = "storm.id";
63
+
64
+ /**
65
+ * The host that the master server is running on.
66
+ */
67
+ public static String NIMBUS_HOST = "nimbus.host";
68
+
69
+ /**
70
+ * Which port the Thrift interface of Nimbus should run on. Clients should
71
+ * connect to this port to upload jars and submit topologies.
72
+ */
73
+ public static String NIMBUS_THRIFT_PORT = "nimbus.thrift.port";
74
+
75
+
76
+ /**
77
+ * This parameter is used by the storm-deploy project to configure the
78
+ * jvm options for the nimbus daemon.
79
+ */
80
+ public static String NIMBUS_CHILDOPTS = "nimbus.childopts";
81
+
82
+
83
+ /**
84
+ * How long without heartbeating a task can go before nimbus will consider the
85
+ * task dead and reassign it to another location.
86
+ */
87
+ public static String NIMBUS_TASK_TIMEOUT_SECS = "nimbus.task.timeout.secs";
88
+
89
+
90
+ /**
91
+ * How often nimbus should wake up to check heartbeats and do reassignments. Note
92
+ * that if a machine ever goes down Nimbus will immediately wake up and take action.
93
+ * This parameter is for checking for failures when there's no explicit event like that
94
+ * occuring.
95
+ */
96
+ public static String NIMBUS_MONITOR_FREQ_SECS = "nimbus.monitor.freq.secs";
97
+
98
+ /**
99
+ * How often nimbus should wake the cleanup thread to clean the inbox.
100
+ * @see NIMBUS_INBOX_JAR_EXPIRATION_SECS
101
+ */
102
+ public static String NIMBUS_CLEANUP_INBOX_FREQ_SECS = "nimbus.cleanup.inbox.freq.secs";
103
+
104
+ /**
105
+ * The length of time a jar file lives in the inbox before being deleted by the cleanup thread.
106
+ *
107
+ * Probably keep this value greater than or equal to NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS.
108
+ * Note that the time it takes to delete an inbox jar file is going to be somewhat more than
109
+ * NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS (depending on how often NIMBUS_CLEANUP_FREQ_SECS
110
+ * is set to).
111
+ * @see NIMBUS_CLEANUP_FREQ_SECS
112
+ */
113
+ public static String NIMBUS_INBOX_JAR_EXPIRATION_SECS = "nimbus.inbox.jar.expiration.secs";
114
+
115
+ /**
116
+ * How long before a supervisor can go without heartbeating before nimbus considers it dead
117
+ * and stops assigning new work to it.
118
+ */
119
+ public static String NIMBUS_SUPERVISOR_TIMEOUT_SECS = "nimbus.supervisor.timeout.secs";
120
+
121
+ /**
122
+ * A special timeout used when a task is initially launched. During launch, this is the timeout
123
+ * used until the first heartbeat, overriding nimbus.task.timeout.secs.
124
+ *
125
+ * <p>A separate timeout exists for launch because there can be quite a bit of overhead
126
+ * to launching new JVM's and configuring them.</p>
127
+ */
128
+ public static String NIMBUS_TASK_LAUNCH_SECS = "nimbus.task.launch.secs";
129
+
130
+ /**
131
+ * Whether or not nimbus should reassign tasks if it detects that a task goes down.
132
+ * Defaults to true, and it's not recommended to change this value.
133
+ */
134
+ public static String NIMBUS_REASSIGN = "nimbus.reassign";
135
+
136
+ /**
137
+ * During upload/download with the master, how long an upload or download connection is idle
138
+ * before nimbus considers it dead and drops the connection.
139
+ */
140
+ public static String NIMBUS_FILE_COPY_EXPIRATION_SECS = "nimbus.file.copy.expiration.secs";
141
+
142
+ /**
143
+ * A custom class that implements ITopologyValidator that is run whenever a
144
+ * topology is submitted. Can be used to provide business-specific logic for
145
+ * whether topologies are allowed to run or not.
146
+ */
147
+ public static String NIMBUS_TOPOLOGY_VALIDATOR = "nimbus.topology.validator";
148
+
149
+
150
+ /**
151
+ * Storm UI binds to this port.
152
+ */
153
+ public static String UI_PORT = "ui.port";
154
+
155
+ /**
156
+ * Childopts for Storm UI Java process.
157
+ */
158
+ public static String UI_CHILDOPTS = "ui.childopts";
159
+
160
+
161
+ /**
162
+ * List of DRPC servers so that the DRPCSpout knows who to talk to.
163
+ */
164
+ public static String DRPC_SERVERS = "drpc.servers";
165
+
166
+ /**
167
+ * This port is used by Storm DRPC for receiving DPRC requests from clients.
168
+ */
169
+ public static String DRPC_PORT = "drpc.port";
170
+
171
+ /**
172
+ * This port on Storm DRPC is used by DRPC topologies to receive function invocations and send results back.
173
+ */
174
+ public static String DRPC_INVOCATIONS_PORT = "drpc.invocations.port";
175
+
176
+ /**
177
+ * The timeout on DRPC requests within the DRPC server. Defaults to 10 minutes. Note that requests can also
178
+ * timeout based on the socket timeout on the DRPC client, and separately based on the topology message
179
+ * timeout for the topology implementing the DRPC function.
180
+ */
181
+ public static String DRPC_REQUEST_TIMEOUT_SECS = "drpc.request.timeout.secs";
182
+
183
+ /**
184
+ * the metadata configed on the supervisor
185
+ */
186
+ public static String SUPERVISOR_SCHEDULER_META = "supervisor.scheduler.meta";
187
+ /**
188
+ * A list of ports that can run workers on this supervisor. Each worker uses one port, and
189
+ * the supervisor will only run one worker per port. Use this configuration to tune
190
+ * how many workers run on each machine.
191
+ */
192
+ public static String SUPERVISOR_SLOTS_PORTS = "supervisor.slots.ports";
193
+
194
+
195
+
196
+ /**
197
+ * This parameter is used by the storm-deploy project to configure the
198
+ * jvm options for the supervisor daemon.
199
+ */
200
+ public static String SUPERVISOR_CHILDOPTS = "supervisor.childopts";
201
+
202
+
203
+ /**
204
+ * How long a worker can go without heartbeating before the supervisor tries to
205
+ * restart the worker process.
206
+ */
207
+ public static String SUPERVISOR_WORKER_TIMEOUT_SECS = "supervisor.worker.timeout.secs";
208
+
209
+
210
+ /**
211
+ * How long a worker can go without heartbeating during the initial launch before
212
+ * the supervisor tries to restart the worker process. This value override
213
+ * supervisor.worker.timeout.secs during launch because there is additional
214
+ * overhead to starting and configuring the JVM on launch.
215
+ */
216
+ public static String SUPERVISOR_WORKER_START_TIMEOUT_SECS = "supervisor.worker.start.timeout.secs";
217
+
218
+
219
+ /**
220
+ * Whether or not the supervisor should launch workers assigned to it. Defaults
221
+ * to true -- and you should probably never change this value. This configuration
222
+ * is used in the Storm unit tests.
223
+ */
224
+ public static String SUPERVISOR_ENABLE = "supervisor.enable";
225
+
226
+
227
+ /**
228
+ * how often the supervisor sends a heartbeat to the master.
229
+ */
230
+ public static String SUPERVISOR_HEARTBEAT_FREQUENCY_SECS = "supervisor.heartbeat.frequency.secs";
231
+
232
+
233
+ /**
234
+ * How often the supervisor checks the worker heartbeats to see if any of them
235
+ * need to be restarted.
236
+ */
237
+ public static String SUPERVISOR_MONITOR_FREQUENCY_SECS = "supervisor.monitor.frequency.secs";
238
+
239
+ /**
240
+ * The jvm opts provided to workers launched by this supervisor. All "%ID%" substrings are replaced
241
+ * with an identifier for this worker.
242
+ */
243
+ public static String WORKER_CHILDOPTS = "worker.childopts";
244
+
245
+
246
+ /**
247
+ * How often this worker should heartbeat to the supervisor.
248
+ */
249
+ public static String WORKER_HEARTBEAT_FREQUENCY_SECS = "worker.heartbeat.frequency.secs";
250
+
251
+ /**
252
+ * How often a task should heartbeat its status to the master.
253
+ */
254
+ public static String TASK_HEARTBEAT_FREQUENCY_SECS = "task.heartbeat.frequency.secs";
255
+
256
+
257
+ /**
258
+ * How often a task should sync its connections with other tasks (if a task is
259
+ * reassigned, the other tasks sending messages to it need to refresh their connections).
260
+ * In general though, when a reassignment happens other tasks will be notified
261
+ * almost immediately. This configuration is here just in case that notification doesn't
262
+ * come through.
263
+ */
264
+ public static String TASK_REFRESH_POLL_SECS = "task.refresh.poll.secs";
265
+
266
+
267
+
268
+ /**
269
+ * True if Storm should timeout messages or not. Defaults to true. This is meant to be used
270
+ * in unit tests to prevent tuples from being accidentally timed out during the test.
271
+ */
272
+ public static String TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS = "topology.enable.message.timeouts";
273
+
274
+ /**
275
+ * When set to true, Storm will log every message that's emitted.
276
+ */
277
+ public static String TOPOLOGY_DEBUG = "topology.debug";
278
+
279
+
280
+ /**
281
+ * Whether or not the master should optimize topologies by running multiple
282
+ * tasks in a single thread where appropriate.
283
+ */
284
+ public static String TOPOLOGY_OPTIMIZE = "topology.optimize";
285
+
286
+ /**
287
+ * How many processes should be spawned around the cluster to execute this
288
+ * topology. Each process will execute some number of tasks as threads within
289
+ * them. This parameter should be used in conjunction with the parallelism hints
290
+ * on each component in the topology to tune the performance of a topology.
291
+ */
292
+ public static String TOPOLOGY_WORKERS = "topology.workers";
293
+
294
+ /**
295
+ * How many instances to create for a spout/bolt. A task runs on a thread with zero or more
296
+ * other tasks for the same spout/bolt. The number of tasks for a spout/bolt is always
297
+ * the same throughout the lifetime of a topology, but the number of executors (threads) for
298
+ * a spout/bolt can change over time. This allows a topology to scale to more or less resources
299
+ * without redeploying the topology or violating the constraints of Storm (such as a fields grouping
300
+ * guaranteeing that the same value goes to the same task).
301
+ */
302
+ public static String TOPOLOGY_TASKS = "topology.tasks";
303
+
304
+ /**
305
+ * How many executors to spawn for ackers.
306
+ *
307
+ * <p>If this is set to 0, then Storm will immediately ack tuples as soon
308
+ * as they come off the spout, effectively disabling reliability.</p>
309
+ */
310
+ public static String TOPOLOGY_ACKER_EXECUTORS = "topology.acker.executors";
311
+
312
+
313
+ /**
314
+ * The maximum amount of time given to the topology to fully process a message
315
+ * emitted by a spout. If the message is not acked within this time frame, Storm
316
+ * will fail the message on the spout. Some spouts implementations will then replay
317
+ * the message at a later time.
318
+ */
319
+ public static String TOPOLOGY_MESSAGE_TIMEOUT_SECS = "topology.message.timeout.secs";
320
+
321
+ /**
322
+ * A list of serialization registrations for Kryo ( http://code.google.com/p/kryo/ ),
323
+ * the underlying serialization framework for Storm. A serialization can either
324
+ * be the name of a class (in which case Kryo will automatically create a serializer for the class
325
+ * that saves all the object's fields), or an implementation of com.esotericsoftware.kryo.Serializer.
326
+ *
327
+ * See Kryo's documentation for more information about writing custom serializers.
328
+ */
329
+ public static String TOPOLOGY_KRYO_REGISTER = "topology.kryo.register";
330
+
331
+ /**
332
+ * A list of classes that customize storm's kryo instance during start-up.
333
+ * Each listed class name must implement IKryoDecorator. During start-up the
334
+ * listed class is instantiated with 0 arguments, then its 'decorate' method
335
+ * is called with storm's kryo instance as the only argument.
336
+ */
337
+ public static String TOPOLOGY_KRYO_DECORATORS = "topology.kryo.decorators";
338
+
339
+ /**
340
+ * Class that specifies how to create a Kryo instance for serialization. Storm will then apply
341
+ * topology.kryo.register and topology.kryo.decorators on top of this. The default implementation
342
+ * implements topology.fall.back.on.java.serialization and turns references off.
343
+ */
344
+ public static String TOPOLOGY_KRYO_FACTORY = "topology.kryo.factory";
345
+
346
+
347
+ /**
348
+ * Whether or not Storm should skip the loading of kryo registrations for which it
349
+ * does not know the class or have the serializer implementation. Otherwise, the task will
350
+ * fail to load and will throw an error at runtime. The use case of this is if you want to
351
+ * declare your serializations on the storm.yaml files on the cluster rather than every single
352
+ * time you submit a topology. Different applications may use different serializations and so
353
+ * a single application may not have the code for the other serializers used by other apps.
354
+ * By setting this config to true, Storm will ignore that it doesn't have those other serializations
355
+ * rather than throw an error.
356
+ */
357
+ public static String TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS= "topology.skip.missing.kryo.registrations";
358
+
359
+
360
+ /**
361
+ * The maximum parallelism allowed for a component in this topology. This configuration is
362
+ * typically used in testing to limit the number of threads spawned in local mode.
363
+ */
364
+ public static String TOPOLOGY_MAX_TASK_PARALLELISM="topology.max.task.parallelism";
365
+
366
+
367
+ /**
368
+ * The maximum number of tuples that can be pending on a spout task at any given time.
369
+ * This config applies to individual tasks, not to spouts or topologies as a whole.
370
+ *
371
+ * A pending tuple is one that has been emitted from a spout but has not been acked or failed yet.
372
+ * Note that this config parameter has no effect for unreliable spouts that don't tag
373
+ * their tuples with a message id.
374
+ */
375
+ public static String TOPOLOGY_MAX_SPOUT_PENDING="topology.max.spout.pending";
376
+
377
+ /**
378
+ * A class that implements a strategy for what to do when a spout needs to wait. Waiting is
379
+ * triggered in one of two conditions:
380
+ *
381
+ * 1. nextTuple emits no tuples
382
+ * 2. The spout has hit maxSpoutPending and can't emit any more tuples
383
+ */
384
+ public static String TOPOLOGY_SPOUT_WAIT_STRATEGY="topology.spout.wait.strategy";
385
+
386
+ /**
387
+ * The amount of milliseconds the SleepEmptyEmitStrategy should sleep for.
388
+ */
389
+ public static String TOPOLOGY_SLEEP_SPOUT_WAIT_STRATEGY_TIME_MS="topology.sleep.spout.wait.strategy.time.ms";
390
+
391
+ /**
392
+ * The maximum amount of time a component gives a source of state to synchronize before it requests
393
+ * synchronization again.
394
+ */
395
+ public static String TOPOLOGY_STATE_SYNCHRONIZATION_TIMEOUT_SECS="topology.state.synchronization.timeout.secs";
396
+
397
+ /**
398
+ * The percentage of tuples to sample to produce stats for a task.
399
+ */
400
+ public static String TOPOLOGY_STATS_SAMPLE_RATE="topology.stats.sample.rate";
401
+
402
+ /**
403
+ * Whether or not to use Java serialization in a topology.
404
+ */
405
+ public static String TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION="topology.fall.back.on.java.serialization";
406
+
407
+ /**
408
+ * Topology-specific options for the worker child process. This is used in addition to WORKER_CHILDOPTS.
409
+ */
410
+ public static String TOPOLOGY_WORKER_CHILDOPTS="topology.worker.childopts";
411
+
412
+ /**
413
+ * This config is available for TransactionalSpouts, and contains the id ( a String) for
414
+ * the transactional topology. This id is used to store the state of the transactional
415
+ * topology in Zookeeper.
416
+ */
417
+ public static String TOPOLOGY_TRANSACTIONAL_ID="topology.transactional.id";
418
+
419
+ /**
420
+ * A list of task hooks that are automatically added to every spout and bolt in the topology. An example
421
+ * of when you'd do this is to add a hook that integrates with your internal
422
+ * monitoring system. These hooks are instantiated using the zero-arg constructor.
423
+ */
424
+ public static String TOPOLOGY_AUTO_TASK_HOOKS="topology.auto.task.hooks";
425
+
426
+
427
+ /**
428
+ * The size of the Disruptor receive queue for each executor. Must be a power of 2.
429
+ */
430
+ public static String TOPOLOGY_EXECUTOR_RECEIVE_BUFFER_SIZE="topology.executor.receive.buffer.size";
431
+
432
+ /**
433
+ * The maximum number of messages to batch from the thread receiving off the network to the
434
+ * executor queues. Must be a power of 2.
435
+ */
436
+ public static String TOPOLOGY_RECEIVER_BUFFER_SIZE="topology.receiver.buffer.size";
437
+
438
+ /**
439
+ * The size of the Disruptor send queue for each executor. Must be a power of 2.
440
+ */
441
+ public static String TOPOLOGY_EXECUTOR_SEND_BUFFER_SIZE="topology.executor.send.buffer.size";
442
+
443
+ /**
444
+ * The size of the Disruptor transfer queue for each worker.
445
+ */
446
+ public static String TOPOLOGY_TRANSFER_BUFFER_SIZE="topology.transfer.buffer.size";
447
+
448
+ /**
449
+ * How often a tick tuple from the "__system" component and "__tick" stream should be sent
450
+ * to tasks. Meant to be used as a component-specific configuration.
451
+ */
452
+ public static String TOPOLOGY_TICK_TUPLE_FREQ_SECS="topology.tick.tuple.freq.secs";
453
+
454
+
455
+ /**
456
+ * Configure the wait strategy used for internal queuing. Can be used to tradeoff latency
457
+ * vs. throughput
458
+ */
459
+ public static String TOPOLOGY_DISRUPTOR_WAIT_STRATEGY="topology.disruptor.wait.strategy";
460
+
461
+ /**
462
+ * The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed
463
+ * via the TopologyContext.
464
+ */
465
+ public static String TOPOLOGY_WORKER_SHARED_THREAD_POOL_SIZE="topology.worker.shared.thread.pool.size";
466
+
467
+ /**
468
+ * The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example,
469
+ * an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be
470
+ * reported to Zookeeper per task for every 10 second interval of time.
471
+ */
472
+ public static String TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS="topology.error.throttle.interval.secs";
473
+
474
+ /**
475
+ * See doc for TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS
476
+ */
477
+ public static String TOPOLOGY_MAX_ERROR_REPORT_PER_INTERVAL="topology.max.error.report.per.interval";
478
+
479
+
480
+ /**
481
+ * How often a batch can be emitted in a Trident topology.
482
+ */
483
+ public static String TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS="topology.trident.batch.emit.interval.millis";
484
+
485
+ /**
486
+ * Name of the topology. This config is automatically set by Storm when the topology is submitted.
487
+ */
488
+ public static String TOPOLOGY_NAME="topology.name";
489
+
490
+ /**
491
+ * The root directory in ZooKeeper for metadata about TransactionalSpouts.
492
+ */
493
+ public static String TRANSACTIONAL_ZOOKEEPER_ROOT="transactional.zookeeper.root";
494
+
495
+ /**
496
+ * The list of zookeeper servers in which to keep the transactional state. If null (which is default),
497
+ * will use storm.zookeeper.servers
498
+ */
499
+ public static String TRANSACTIONAL_ZOOKEEPER_SERVERS="transactional.zookeeper.servers";
500
+
501
+ /**
502
+ * The port to use to connect to the transactional zookeeper servers. If null (which is default),
503
+ * will use storm.zookeeper.port
504
+ */
505
+ public static String TRANSACTIONAL_ZOOKEEPER_PORT="transactional.zookeeper.port";
506
+
507
+ /**
508
+ * The number of threads that should be used by the zeromq context in each worker process.
509
+ */
510
+ public static String ZMQ_THREADS = "zmq.threads";
511
+
512
+ /**
513
+ * How long a connection should retry sending messages to a target host when
514
+ * the connection is closed. This is an advanced configuration and can almost
515
+ * certainly be ignored.
516
+ */
517
+ public static String ZMQ_LINGER_MILLIS = "zmq.linger.millis";
518
+
519
+ /**
520
+ * The high water for the ZeroMQ push sockets used for networking. Use this config to prevent buffer explosion
521
+ * on the networking layer.
522
+ */
523
+ public static String ZMQ_HWM = "zmq.hwm";
524
+
525
+ /**
526
+ * This value is passed to spawned JVMs (e.g., Nimbus, Supervisor, and Workers)
527
+ * for the java.library.path value. java.library.path tells the JVM where
528
+ * to look for native libraries. It is necessary to set this config correctly since
529
+ * Storm uses the ZeroMQ and JZMQ native libs.
530
+ */
531
+ public static String JAVA_LIBRARY_PATH = "java.library.path";
532
+
533
+ /**
534
+ * The path to use as the zookeeper dir when running a zookeeper server via
535
+ * "storm dev-zookeeper". This zookeeper instance is only intended for development;
536
+ * it is not a production grade zookeeper setup.
537
+ */
538
+ public static String DEV_ZOOKEEPER_PATH = "dev.zookeeper.path";
539
+
540
+ public static void setDebug(Map conf, boolean isOn) {
541
+ conf.put(Config.TOPOLOGY_DEBUG, isOn);
542
+ }
543
+
544
+ public void setDebug(boolean isOn) {
545
+ setDebug(this, isOn);
546
+ }
547
+
548
+ @Deprecated
549
+ public void setOptimize(boolean isOn) {
550
+ put(Config.TOPOLOGY_OPTIMIZE, isOn);
551
+ }
552
+
553
+ public static void setNumWorkers(Map conf, int workers) {
554
+ conf.put(Config.TOPOLOGY_WORKERS, workers);
555
+ }
556
+
557
+ public void setNumWorkers(int workers) {
558
+ setNumWorkers(this, workers);
559
+ }
560
+
561
+ public static void setNumAckers(Map conf, int numExecutors) {
562
+ conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, numExecutors);
563
+ }
564
+
565
+ public void setNumAckers(int numExecutors) {
566
+ setNumAckers(this, numExecutors);
567
+ }
568
+
569
+ public static void setMessageTimeoutSecs(Map conf, int secs) {
570
+ conf.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, secs);
571
+ }
572
+
573
+ public void setMessageTimeoutSecs(int secs) {
574
+ setMessageTimeoutSecs(this, secs);
575
+ }
576
+
577
+ public static void registerSerialization(Map conf, Class klass) {
578
+ getRegisteredSerializations(conf).add(klass.getName());
579
+ }
580
+
581
+ public void registerSerialization(Class klass) {
582
+ registerSerialization(this, klass);
583
+ }
584
+
585
+ public static void registerSerialization(Map conf, Class klass, Class<? extends Serializer> serializerClass) {
586
+ Map<String, String> register = new HashMap<String, String>();
587
+ register.put(klass.getName(), serializerClass.getName());
588
+ getRegisteredSerializations(conf).add(register);
589
+ }
590
+
591
+ public void registerSerialization(Class klass, Class<? extends Serializer> serializerClass) {
592
+ registerSerialization(this, klass, serializerClass);
593
+ }
594
+
595
+ public static void registerDecorator(Map conf, Class<? extends IKryoDecorator> klass) {
596
+ getRegisteredDecorators(conf).add(klass.getName());
597
+ }
598
+
599
+ public void registerDecorator(Class<? extends IKryoDecorator> klass) {
600
+ registerDecorator(this, klass);
601
+ }
602
+
603
+ public static void setKryoFactory(Map conf, Class<? extends IKryoFactory> klass) {
604
+ conf.put(Config.TOPOLOGY_KRYO_FACTORY, klass.getName());
605
+ }
606
+
607
+ public void setKryoFactory(Class<? extends IKryoFactory> klass) {
608
+ setKryoFactory(this, klass);
609
+ }
610
+
611
+ public static void setSkipMissingKryoRegistrations(Map conf, boolean skip) {
612
+ conf.put(Config.TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS, skip);
613
+ }
614
+
615
+ public void setSkipMissingKryoRegistrations(boolean skip) {
616
+ setSkipMissingKryoRegistrations(this, skip);
617
+ }
618
+
619
+ public static void setMaxTaskParallelism(Map conf, int max) {
620
+ conf.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, max);
621
+ }
622
+
623
+ public void setMaxTaskParallelism(int max) {
624
+ setMaxTaskParallelism(this, max);
625
+ }
626
+
627
+ public static void setMaxSpoutPending(Map conf, int max) {
628
+ conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, max);
629
+ }
630
+
631
+ public void setMaxSpoutPending(int max) {
632
+ setMaxSpoutPending(this, max);
633
+ }
634
+
635
+ public static void setStatsSampleRate(Map conf, double rate) {
636
+ conf.put(Config.TOPOLOGY_STATS_SAMPLE_RATE, rate);
637
+ }
638
+
639
+ public void setStatsSampleRate(double rate) {
640
+ setStatsSampleRate(this, rate);
641
+ }
642
+
643
+ public static void setFallBackOnJavaSerialization(Map conf, boolean fallback) {
644
+ conf.put(Config.TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION, fallback);
645
+ }
646
+
647
+ public void setFallBackOnJavaSerialization(boolean fallback) {
648
+ setFallBackOnJavaSerialization(this, fallback);
649
+ }
650
+
651
+ private static List getRegisteredSerializations(Map conf) {
652
+ List ret;
653
+ if(!conf.containsKey(Config.TOPOLOGY_KRYO_REGISTER)) {
654
+ ret = new ArrayList();
655
+ } else {
656
+ ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_REGISTER));
657
+ }
658
+ conf.put(Config.TOPOLOGY_KRYO_REGISTER, ret);
659
+ return ret;
660
+ }
661
+
662
+ private static List getRegisteredDecorators(Map conf) {
663
+ List ret;
664
+ if(!conf.containsKey(Config.TOPOLOGY_KRYO_DECORATORS)) {
665
+ ret = new ArrayList();
666
+ } else {
667
+ ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_DECORATORS));
668
+ }
669
+ conf.put(Config.TOPOLOGY_KRYO_DECORATORS, ret);
670
+ return ret;
671
+ }
672
+ }
673
+
674
+
675
+ end
676
+ end
@@ -0,0 +1,45 @@
1
+ module Wukong
2
+ class StormRunner < EM::P::LineAndTextProtocol
3
+ include DriverMethods
4
+
5
+ attr_accessor :dataflow, :settings
6
+
7
+ def self.start(label, settings = {})
8
+ EM.attach($stdin, self, label, settings)
9
+ end
10
+
11
+ def initialize(label, settings)
12
+ super
13
+ @settings = settings
14
+ @dataflow = construct_dataflow(label, settings)
15
+ @messages = []
16
+ end
17
+
18
+ def post_init
19
+ setup_dataflow
20
+ end
21
+
22
+ def receive_line line
23
+ driver.send_through_dataflow(line)
24
+ send_messages
25
+ rescue => e
26
+ $stderr.puts e.message
27
+ EM.stop
28
+ end
29
+
30
+ def send_messages
31
+ $stdout.write(@messages.join("\n") + settings.delimiter)
32
+ $stdout.flush
33
+ @messages.clear
34
+ end
35
+
36
+ def unbind
37
+ EM.stop
38
+ end
39
+
40
+ def setup() ; end
41
+ def process(record) @messages << record ; end
42
+ def stop() ; end
43
+
44
+ end
45
+ end
@@ -0,0 +1,5 @@
1
+ module Wukong
2
+ module Storm
3
+ VERSION = '0.0.1'
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ require 'wukong'
2
+ require 'wukong-storm/runner'
3
+ # require 'wukong-storm/configuration'
@@ -0,0 +1,6 @@
1
+ require 'wukong-storm'
2
+ require 'wukong/spec_helpers'
3
+
4
+ RSpec.configure do |config|
5
+ include Wukong::SpecHelpers
6
+ end
@@ -0,0 +1,27 @@
1
+ Wukong.processor(:simple) do
2
+ def process(record)
3
+ yield record
4
+ end
5
+ end
6
+
7
+ Wukong.processor(:skipped) do
8
+ def process(record)
9
+ # skip records
10
+ end
11
+ end
12
+
13
+ Wukong.processor(:multi) do
14
+ def process(record)
15
+ 3.times{ yield record }
16
+ end
17
+ end
18
+
19
+ Wukong.processor(:test_example) do
20
+ def process(record)
21
+ yield "I raised the #{record['foo']}"
22
+ end
23
+ end
24
+
25
+ Wukong.dataflow(:flow) do
26
+ from_json | test_example
27
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ Wu.processor(:test) do
4
+
5
+ def process(record)
6
+ # do nothing
7
+ end
8
+
9
+ end
10
+
11
+ describe 'wu-storm' do
12
+ let(:examples) { File.expand_path('../support/examples.rb', __FILE__) }
13
+
14
+ context 'without any arguments' do
15
+ subject { command 'wu-storm' }
16
+ it { should exit_with(:non_zero) }
17
+ it { should have_stderr('usage: wu-storm') }
18
+ end
19
+
20
+ context 'with a simple processor' do
21
+ let(:input) { 'one event' }
22
+ subject { command('wu-storm', examples, '--run=simple') < input }
23
+ it { should exit_with(0) }
24
+ it { should have_stdout('one event|') }
25
+ end
26
+
27
+ context 'with a skipped processor' do
28
+ let(:input) { 'never see this' }
29
+ subject { command('wu-storm', examples, '--run=skipped') < input }
30
+ it { should exit_with(0) }
31
+ it { should have_stdout('|') }
32
+ end
33
+
34
+ context 'with a duplicating processor' do
35
+ let(:input) { 'foo' }
36
+ subject { command('wu-storm', examples, '--run=multi') < input }
37
+ it { should exit_with(0) }
38
+ it { should have_stdout("foo\nfoo\nfoo|") }
39
+ end
40
+
41
+ context 'with a flow' do
42
+ let(:input) { '{"foo":"bar"}' }
43
+ subject { command('wu-storm', examples, '--run=flow') < input }
44
+ it { should exit_with(0) }
45
+ it { should have_stdout('I raised the bar|') }
46
+ end
47
+
48
+ context 'with multiple arguments' do
49
+ let(:input) { "foo\nbar\nbaz" }
50
+ subject { command('wu-storm', examples, '--run=simple') < input }
51
+ it { should exit_with(0) }
52
+ it { should have_stdout('foo|bar|baz|') }
53
+ end
54
+ end
data/wu-storm.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/wukong-storm/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = 'wukong-storm'
6
+ gem.homepage = 'https://github.com/infochimps-labs/wukong-storm'
7
+ gem.licenses = ["Apache 2.0"]
8
+ gem.email = 'coders@infochimps.org'
9
+ gem.authors = ['Infochimps', 'Travis Dempsey']
10
+ gem.version = Wukong::Storm::VERSION
11
+
12
+ gem.summary = 'Storm processing for Ruby'
13
+ gem.description = <<-EOF
14
+ EOF
15
+
16
+ gem.files = `git ls-files`.split("\n")
17
+ gem.executables = ['wu-storm']
18
+ gem.test_files = gem.files.grep(/^spec/)
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency('wukong', '3.0.0.pre3')
22
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wukong-storm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Infochimps
9
+ - Travis Dempsey
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-12-17 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: wukong
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - '='
21
+ - !ruby/object:Gem::Version
22
+ version: 3.0.0.pre3
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - '='
29
+ - !ruby/object:Gem::Version
30
+ version: 3.0.0.pre3
31
+ description: ''
32
+ email: coders@infochimps.org
33
+ executables:
34
+ - wu-storm
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - .rspec
40
+ - Gemfile
41
+ - README.md
42
+ - Rakefile
43
+ - bin/wu-storm
44
+ - lib/wukong-storm.rb
45
+ - lib/wukong-storm/configuration.rb
46
+ - lib/wukong-storm/runner.rb
47
+ - lib/wukong-storm/version.rb
48
+ - spec/spec_helper.rb
49
+ - spec/support/examples.rb
50
+ - spec/wu_storm_spec.rb
51
+ - wu-storm.gemspec
52
+ homepage: https://github.com/infochimps-labs/wukong-storm
53
+ licenses:
54
+ - Apache 2.0
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ segments:
66
+ - 0
67
+ hash: 1144670354774271812
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ! '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ segments:
75
+ - 0
76
+ hash: 1144670354774271812
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 1.8.24
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Storm processing for Ruby
83
+ test_files:
84
+ - spec/spec_helper.rb
85
+ - spec/support/examples.rb
86
+ - spec/wu_storm_spec.rb