wukong-storm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ pkg/
2
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --drb
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source :rubygems
2
+
3
+ gemspec
4
+
5
+ group :development do
6
+ gem 'rake', '~> 0.9'
7
+ gem 'rspec', '~> 2'
8
+ end
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Wukong Storm
2
+
3
+ ## Usage
4
+
5
+ The Wukong Storm plugin is very basic at the moment. It functions entirely over STDIN and STDOUT. Taken from the `wu-storm` executable:
6
+
7
+ ```
8
+ usage: wu-storm PROCESSOR|FLOW [...--param=value...]
9
+
10
+ wu-storm is a commandline tool for running Wukong processors and flows in
11
+ a storm or trident topology.
12
+
13
+ wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
14
+ For example, when using an identity processor, wu-storm, given an event 'foo', will return
15
+ 'foo|'. The '|' character is the specified End-Of-File delimiter.
16
+
17
+ If there is ever a suppressed error in pricessing, or a skipped record for any reason,
18
+ wu-storm will still respond with a '|', signifying an empty return event.
19
+
20
+ If there are multiple messages that have resulted from a single event, wu-storm will return
21
+ them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
22
+
23
+
24
+ Params:
25
+ -t, --delimiter=String The EOF specifier when returning events [Default: |]
26
+ -r, --run=String Name of the processor or dataflow to use. Defaults to basename of the given path
27
+ ```
28
+
29
+ ## TODO
30
+
31
+ The configuration file has __all__ of the options for storm listed. Slowly translating into real Configliere options.
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rspec/core/rake_task'
5
+ RSpec::Core::RakeTask.new(:rspec)
6
+
7
+ task :default => [:rspec]
data/bin/wu-storm ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ require 'wukong-storm'
3
+ require 'configliere'
4
+
5
+ Settings.use(:commandline)
6
+ Settings.define :run, description: 'Name of the processor or dataflow to use. Defaults to basename of the given path', flag: 'r'
7
+ Settings.define :delimiter, description: 'The EOF specifier when returning events', default: '|', flag: 't'
8
+
9
+ def Settings.usage() "usage: #{File.basename($0)} PROCESSOR|FLOW [...--param=value...]" ; end
10
+
11
+ Settings.description = <<'EOF'
12
+ wu-storm is a commandline tool for running Wukong processors and flows in
13
+ a storm or trident topology.
14
+
15
+ wu-storm operates over STDIN and STDOUT and has a one-to-one message guarantee.
16
+ For example, when using an identity processor, wu-storm, given an event 'foo', will return
17
+ 'foo|'. The '|' character is the specified End-Of-File delimiter.
18
+
19
+ If there is ever a suppressed error in pricessing, or a skipped record for any reason,
20
+ wu-storm will still respond with a '|', signifying an empty return event.
21
+
22
+ If there are multiple messages that have resulted from a single event, wu-storm will return
23
+ them newline separated, followed by the delimite, e.g. 'foo\nbar\nbaz|'.
24
+ EOF
25
+
26
+ Settings.resolve!
27
+
28
+ runnable = Settings.rest.first
29
+
30
+ case
31
+ when runnable.nil?
32
+ Settings.dump_help
33
+ exit(1)
34
+ when Wukong.registry.registered?(runnable.to_sym)
35
+ processor = runnable
36
+ when File.exist?(runnable)
37
+ load runnable
38
+ processor = Settings.run || File.basename(runnable, '.rb')
39
+ else
40
+ Settings.dump_help
41
+ exit(1)
42
+ end
43
+
44
+ begin
45
+ EM.run do
46
+ Wu::StormRunner.start(processor.to_sym, Settings)
47
+ end
48
+ rescue Wu::Error => e
49
+ $stderr.puts e.message
50
+ exit(1)
51
+ end
@@ -0,0 +1,676 @@
1
+ module Wukong
2
+ module Storm
3
+
4
+ Configuration = Configliere::Param.new unless defined? Configuration
5
+
6
+ Configuration.define :zookeepers_servers, description: 'storm.zookeeper.servers'
7
+ Configuration.define :zookeepers_port, description: 'storm.zookeeper.port'
8
+ Configuration.define :local_dir, description: 'storm.local.dir'
9
+ Configuration.define :scheduler, description: 'storm.scheduler'
10
+ Configuration.define :cluster_mode, description: 'storm.cluster.mode'
11
+ Configuration.define :local_hostname, description: 'storm.local.hostname'
12
+
13
+ /**
14
+ * Whether or not to use ZeroMQ for messaging in local mode. If this is set
15
+ * to false, then Storm will use a pure-Java messaging system. The purpose
16
+ * of this flag is to make it easy to run Storm in local mode by eliminating
17
+ * the need for native dependencies, which can be difficult to install.
18
+ *
19
+ * Defaults to false.
20
+ */
21
+ public static String STORM_LOCAL_MODE_ZMQ = "storm.local.mode.zmq";
22
+
23
+ /**
24
+ * The root location at which Storm stores data in ZooKeeper.
25
+ */
26
+ public static String STORM_ZOOKEEPER_ROOT = "storm.zookeeper.root";
27
+
28
+ /**
29
+ * The session timeout for clients to ZooKeeper.
30
+ */
31
+ public static String STORM_ZOOKEEPER_SESSION_TIMEOUT = "storm.zookeeper.session.timeout";
32
+
33
+ /**
34
+ * The connection timeout for clients to ZooKeeper.
35
+ */
36
+ public static String STORM_ZOOKEEPER_CONNECTION_TIMEOUT = "storm.zookeeper.connection.timeout";
37
+
38
+
39
+ /**
40
+ * The number of times to retry a Zookeeper operation.
41
+ */
42
+ public static String STORM_ZOOKEEPER_RETRY_TIMES="storm.zookeeper.retry.times";
43
+
44
+ /**
45
+ * The interval between retries of a Zookeeper operation.
46
+ */
47
+ public static String STORM_ZOOKEEPER_RETRY_INTERVAL="storm.zookeeper.retry.interval";
48
+
49
+ /**
50
+ * The Zookeeper authentication scheme to use, e.g. "digest". Defaults to no authentication.
51
+ */
52
+ public static String STORM_ZOOKEEPER_AUTH_SCHEME="storm.zookeeper.auth.scheme";
53
+
54
+ /**
55
+ * A string representing the payload for Zookeeper authentication. It gets serialized using UTF-8 encoding during authentication.
56
+ */
57
+ public static String STORM_ZOOKEEPER_AUTH_PAYLOAD="storm.zookeeper.auth.payload";
58
+
59
+ /**
60
+ * The id assigned to a running topology. The id is the storm name with a unique nonce appended.
61
+ */
62
+ public static String STORM_ID = "storm.id";
63
+
64
+ /**
65
+ * The host that the master server is running on.
66
+ */
67
+ public static String NIMBUS_HOST = "nimbus.host";
68
+
69
+ /**
70
+ * Which port the Thrift interface of Nimbus should run on. Clients should
71
+ * connect to this port to upload jars and submit topologies.
72
+ */
73
+ public static String NIMBUS_THRIFT_PORT = "nimbus.thrift.port";
74
+
75
+
76
+ /**
77
+ * This parameter is used by the storm-deploy project to configure the
78
+ * jvm options for the nimbus daemon.
79
+ */
80
+ public static String NIMBUS_CHILDOPTS = "nimbus.childopts";
81
+
82
+
83
+ /**
84
+ * How long without heartbeating a task can go before nimbus will consider the
85
+ * task dead and reassign it to another location.
86
+ */
87
+ public static String NIMBUS_TASK_TIMEOUT_SECS = "nimbus.task.timeout.secs";
88
+
89
+
90
+ /**
91
+ * How often nimbus should wake up to check heartbeats and do reassignments. Note
92
+ * that if a machine ever goes down Nimbus will immediately wake up and take action.
93
+ * This parameter is for checking for failures when there's no explicit event like that
94
+ * occuring.
95
+ */
96
+ public static String NIMBUS_MONITOR_FREQ_SECS = "nimbus.monitor.freq.secs";
97
+
98
+ /**
99
+ * How often nimbus should wake the cleanup thread to clean the inbox.
100
+ * @see NIMBUS_INBOX_JAR_EXPIRATION_SECS
101
+ */
102
+ public static String NIMBUS_CLEANUP_INBOX_FREQ_SECS = "nimbus.cleanup.inbox.freq.secs";
103
+
104
+ /**
105
+ * The length of time a jar file lives in the inbox before being deleted by the cleanup thread.
106
+ *
107
+ * Probably keep this value greater than or equal to NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS.
108
+ * Note that the time it takes to delete an inbox jar file is going to be somewhat more than
109
+ * NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS (depending on how often NIMBUS_CLEANUP_FREQ_SECS
110
+ * is set to).
111
+ * @see NIMBUS_CLEANUP_FREQ_SECS
112
+ */
113
+ public static String NIMBUS_INBOX_JAR_EXPIRATION_SECS = "nimbus.inbox.jar.expiration.secs";
114
+
115
+ /**
116
+ * How long before a supervisor can go without heartbeating before nimbus considers it dead
117
+ * and stops assigning new work to it.
118
+ */
119
+ public static String NIMBUS_SUPERVISOR_TIMEOUT_SECS = "nimbus.supervisor.timeout.secs";
120
+
121
+ /**
122
+ * A special timeout used when a task is initially launched. During launch, this is the timeout
123
+ * used until the first heartbeat, overriding nimbus.task.timeout.secs.
124
+ *
125
+ * <p>A separate timeout exists for launch because there can be quite a bit of overhead
126
+ * to launching new JVM's and configuring them.</p>
127
+ */
128
+ public static String NIMBUS_TASK_LAUNCH_SECS = "nimbus.task.launch.secs";
129
+
130
+ /**
131
+ * Whether or not nimbus should reassign tasks if it detects that a task goes down.
132
+ * Defaults to true, and it's not recommended to change this value.
133
+ */
134
+ public static String NIMBUS_REASSIGN = "nimbus.reassign";
135
+
136
+ /**
137
+ * During upload/download with the master, how long an upload or download connection is idle
138
+ * before nimbus considers it dead and drops the connection.
139
+ */
140
+ public static String NIMBUS_FILE_COPY_EXPIRATION_SECS = "nimbus.file.copy.expiration.secs";
141
+
142
+ /**
143
+ * A custom class that implements ITopologyValidator that is run whenever a
144
+ * topology is submitted. Can be used to provide business-specific logic for
145
+ * whether topologies are allowed to run or not.
146
+ */
147
+ public static String NIMBUS_TOPOLOGY_VALIDATOR = "nimbus.topology.validator";
148
+
149
+
150
+ /**
151
+ * Storm UI binds to this port.
152
+ */
153
+ public static String UI_PORT = "ui.port";
154
+
155
+ /**
156
+ * Childopts for Storm UI Java process.
157
+ */
158
+ public static String UI_CHILDOPTS = "ui.childopts";
159
+
160
+
161
+ /**
162
+ * List of DRPC servers so that the DRPCSpout knows who to talk to.
163
+ */
164
+ public static String DRPC_SERVERS = "drpc.servers";
165
+
166
+ /**
167
+ * This port is used by Storm DRPC for receiving DPRC requests from clients.
168
+ */
169
+ public static String DRPC_PORT = "drpc.port";
170
+
171
+ /**
172
+ * This port on Storm DRPC is used by DRPC topologies to receive function invocations and send results back.
173
+ */
174
+ public static String DRPC_INVOCATIONS_PORT = "drpc.invocations.port";
175
+
176
+ /**
177
+ * The timeout on DRPC requests within the DRPC server. Defaults to 10 minutes. Note that requests can also
178
+ * timeout based on the socket timeout on the DRPC client, and separately based on the topology message
179
+ * timeout for the topology implementing the DRPC function.
180
+ */
181
+ public static String DRPC_REQUEST_TIMEOUT_SECS = "drpc.request.timeout.secs";
182
+
183
+ /**
184
+ * the metadata configed on the supervisor
185
+ */
186
+ public static String SUPERVISOR_SCHEDULER_META = "supervisor.scheduler.meta";
187
+ /**
188
+ * A list of ports that can run workers on this supervisor. Each worker uses one port, and
189
+ * the supervisor will only run one worker per port. Use this configuration to tune
190
+ * how many workers run on each machine.
191
+ */
192
+ public static String SUPERVISOR_SLOTS_PORTS = "supervisor.slots.ports";
193
+
194
+
195
+
196
+ /**
197
+ * This parameter is used by the storm-deploy project to configure the
198
+ * jvm options for the supervisor daemon.
199
+ */
200
+ public static String SUPERVISOR_CHILDOPTS = "supervisor.childopts";
201
+
202
+
203
+ /**
204
+ * How long a worker can go without heartbeating before the supervisor tries to
205
+ * restart the worker process.
206
+ */
207
+ public static String SUPERVISOR_WORKER_TIMEOUT_SECS = "supervisor.worker.timeout.secs";
208
+
209
+
210
+ /**
211
+ * How long a worker can go without heartbeating during the initial launch before
212
+ * the supervisor tries to restart the worker process. This value override
213
+ * supervisor.worker.timeout.secs during launch because there is additional
214
+ * overhead to starting and configuring the JVM on launch.
215
+ */
216
+ public static String SUPERVISOR_WORKER_START_TIMEOUT_SECS = "supervisor.worker.start.timeout.secs";
217
+
218
+
219
+ /**
220
+ * Whether or not the supervisor should launch workers assigned to it. Defaults
221
+ * to true -- and you should probably never change this value. This configuration
222
+ * is used in the Storm unit tests.
223
+ */
224
+ public static String SUPERVISOR_ENABLE = "supervisor.enable";
225
+
226
+
227
+ /**
228
+ * how often the supervisor sends a heartbeat to the master.
229
+ */
230
+ public static String SUPERVISOR_HEARTBEAT_FREQUENCY_SECS = "supervisor.heartbeat.frequency.secs";
231
+
232
+
233
+ /**
234
+ * How often the supervisor checks the worker heartbeats to see if any of them
235
+ * need to be restarted.
236
+ */
237
+ public static String SUPERVISOR_MONITOR_FREQUENCY_SECS = "supervisor.monitor.frequency.secs";
238
+
239
+ /**
240
+ * The jvm opts provided to workers launched by this supervisor. All "%ID%" substrings are replaced
241
+ * with an identifier for this worker.
242
+ */
243
+ public static String WORKER_CHILDOPTS = "worker.childopts";
244
+
245
+
246
+ /**
247
+ * How often this worker should heartbeat to the supervisor.
248
+ */
249
+ public static String WORKER_HEARTBEAT_FREQUENCY_SECS = "worker.heartbeat.frequency.secs";
250
+
251
+ /**
252
+ * How often a task should heartbeat its status to the master.
253
+ */
254
+ public static String TASK_HEARTBEAT_FREQUENCY_SECS = "task.heartbeat.frequency.secs";
255
+
256
+
257
+ /**
258
+ * How often a task should sync its connections with other tasks (if a task is
259
+ * reassigned, the other tasks sending messages to it need to refresh their connections).
260
+ * In general though, when a reassignment happens other tasks will be notified
261
+ * almost immediately. This configuration is here just in case that notification doesn't
262
+ * come through.
263
+ */
264
+ public static String TASK_REFRESH_POLL_SECS = "task.refresh.poll.secs";
265
+
266
+
267
+
268
+ /**
269
+ * True if Storm should timeout messages or not. Defaults to true. This is meant to be used
270
+ * in unit tests to prevent tuples from being accidentally timed out during the test.
271
+ */
272
+ public static String TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS = "topology.enable.message.timeouts";
273
+
274
+ /**
275
+ * When set to true, Storm will log every message that's emitted.
276
+ */
277
+ public static String TOPOLOGY_DEBUG = "topology.debug";
278
+
279
+
280
+ /**
281
+ * Whether or not the master should optimize topologies by running multiple
282
+ * tasks in a single thread where appropriate.
283
+ */
284
+ public static String TOPOLOGY_OPTIMIZE = "topology.optimize";
285
+
286
+ /**
287
+ * How many processes should be spawned around the cluster to execute this
288
+ * topology. Each process will execute some number of tasks as threads within
289
+ * them. This parameter should be used in conjunction with the parallelism hints
290
+ * on each component in the topology to tune the performance of a topology.
291
+ */
292
+ public static String TOPOLOGY_WORKERS = "topology.workers";
293
+
294
+ /**
295
+ * How many instances to create for a spout/bolt. A task runs on a thread with zero or more
296
+ * other tasks for the same spout/bolt. The number of tasks for a spout/bolt is always
297
+ * the same throughout the lifetime of a topology, but the number of executors (threads) for
298
+ * a spout/bolt can change over time. This allows a topology to scale to more or less resources
299
+ * without redeploying the topology or violating the constraints of Storm (such as a fields grouping
300
+ * guaranteeing that the same value goes to the same task).
301
+ */
302
+ public static String TOPOLOGY_TASKS = "topology.tasks";
303
+
304
+ /**
305
+ * How many executors to spawn for ackers.
306
+ *
307
+ * <p>If this is set to 0, then Storm will immediately ack tuples as soon
308
+ * as they come off the spout, effectively disabling reliability.</p>
309
+ */
310
+ public static String TOPOLOGY_ACKER_EXECUTORS = "topology.acker.executors";
311
+
312
+
313
+ /**
314
+ * The maximum amount of time given to the topology to fully process a message
315
+ * emitted by a spout. If the message is not acked within this time frame, Storm
316
+ * will fail the message on the spout. Some spouts implementations will then replay
317
+ * the message at a later time.
318
+ */
319
+ public static String TOPOLOGY_MESSAGE_TIMEOUT_SECS = "topology.message.timeout.secs";
320
+
321
+ /**
322
+ * A list of serialization registrations for Kryo ( http://code.google.com/p/kryo/ ),
323
+ * the underlying serialization framework for Storm. A serialization can either
324
+ * be the name of a class (in which case Kryo will automatically create a serializer for the class
325
+ * that saves all the object's fields), or an implementation of com.esotericsoftware.kryo.Serializer.
326
+ *
327
+ * See Kryo's documentation for more information about writing custom serializers.
328
+ */
329
+ public static String TOPOLOGY_KRYO_REGISTER = "topology.kryo.register";
330
+
331
+ /**
332
+ * A list of classes that customize storm's kryo instance during start-up.
333
+ * Each listed class name must implement IKryoDecorator. During start-up the
334
+ * listed class is instantiated with 0 arguments, then its 'decorate' method
335
+ * is called with storm's kryo instance as the only argument.
336
+ */
337
+ public static String TOPOLOGY_KRYO_DECORATORS = "topology.kryo.decorators";
338
+
339
+ /**
340
+ * Class that specifies how to create a Kryo instance for serialization. Storm will then apply
341
+ * topology.kryo.register and topology.kryo.decorators on top of this. The default implementation
342
+ * implements topology.fall.back.on.java.serialization and turns references off.
343
+ */
344
+ public static String TOPOLOGY_KRYO_FACTORY = "topology.kryo.factory";
345
+
346
+
347
+ /**
348
+ * Whether or not Storm should skip the loading of kryo registrations for which it
349
+ * does not know the class or have the serializer implementation. Otherwise, the task will
350
+ * fail to load and will throw an error at runtime. The use case of this is if you want to
351
+ * declare your serializations on the storm.yaml files on the cluster rather than every single
352
+ * time you submit a topology. Different applications may use different serializations and so
353
+ * a single application may not have the code for the other serializers used by other apps.
354
+ * By setting this config to true, Storm will ignore that it doesn't have those other serializations
355
+ * rather than throw an error.
356
+ */
357
+ public static String TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS= "topology.skip.missing.kryo.registrations";
358
+
359
+
360
+ /**
361
+ * The maximum parallelism allowed for a component in this topology. This configuration is
362
+ * typically used in testing to limit the number of threads spawned in local mode.
363
+ */
364
+ public static String TOPOLOGY_MAX_TASK_PARALLELISM="topology.max.task.parallelism";
365
+
366
+
367
+ /**
368
+ * The maximum number of tuples that can be pending on a spout task at any given time.
369
+ * This config applies to individual tasks, not to spouts or topologies as a whole.
370
+ *
371
+ * A pending tuple is one that has been emitted from a spout but has not been acked or failed yet.
372
+ * Note that this config parameter has no effect for unreliable spouts that don't tag
373
+ * their tuples with a message id.
374
+ */
375
+ public static String TOPOLOGY_MAX_SPOUT_PENDING="topology.max.spout.pending";
376
+
377
+ /**
378
+ * A class that implements a strategy for what to do when a spout needs to wait. Waiting is
379
+ * triggered in one of two conditions:
380
+ *
381
+ * 1. nextTuple emits no tuples
382
+ * 2. The spout has hit maxSpoutPending and can't emit any more tuples
383
+ */
384
+ public static String TOPOLOGY_SPOUT_WAIT_STRATEGY="topology.spout.wait.strategy";
385
+
386
+ /**
387
+ * The amount of milliseconds the SleepEmptyEmitStrategy should sleep for.
388
+ */
389
+ public static String TOPOLOGY_SLEEP_SPOUT_WAIT_STRATEGY_TIME_MS="topology.sleep.spout.wait.strategy.time.ms";
390
+
391
+ /**
392
+ * The maximum amount of time a component gives a source of state to synchronize before it requests
393
+ * synchronization again.
394
+ */
395
+ public static String TOPOLOGY_STATE_SYNCHRONIZATION_TIMEOUT_SECS="topology.state.synchronization.timeout.secs";
396
+
397
+ /**
398
+ * The percentage of tuples to sample to produce stats for a task.
399
+ */
400
+ public static String TOPOLOGY_STATS_SAMPLE_RATE="topology.stats.sample.rate";
401
+
402
+ /**
403
+ * Whether or not to use Java serialization in a topology.
404
+ */
405
+ public static String TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION="topology.fall.back.on.java.serialization";
406
+
407
+ /**
408
+ * Topology-specific options for the worker child process. This is used in addition to WORKER_CHILDOPTS.
409
+ */
410
+ public static String TOPOLOGY_WORKER_CHILDOPTS="topology.worker.childopts";
411
+
412
+ /**
413
+ * This config is available for TransactionalSpouts, and contains the id ( a String) for
414
+ * the transactional topology. This id is used to store the state of the transactional
415
+ * topology in Zookeeper.
416
+ */
417
+ public static String TOPOLOGY_TRANSACTIONAL_ID="topology.transactional.id";
418
+
419
+ /**
420
+ * A list of task hooks that are automatically added to every spout and bolt in the topology. An example
421
+ * of when you'd do this is to add a hook that integrates with your internal
422
+ * monitoring system. These hooks are instantiated using the zero-arg constructor.
423
+ */
424
+ public static String TOPOLOGY_AUTO_TASK_HOOKS="topology.auto.task.hooks";
425
+
426
+
427
+ /**
428
+ * The size of the Disruptor receive queue for each executor. Must be a power of 2.
429
+ */
430
+ public static String TOPOLOGY_EXECUTOR_RECEIVE_BUFFER_SIZE="topology.executor.receive.buffer.size";
431
+
432
+ /**
433
+ * The maximum number of messages to batch from the thread receiving off the network to the
434
+ * executor queues. Must be a power of 2.
435
+ */
436
+ public static String TOPOLOGY_RECEIVER_BUFFER_SIZE="topology.receiver.buffer.size";
437
+
438
+ /**
439
+ * The size of the Disruptor send queue for each executor. Must be a power of 2.
440
+ */
441
+ public static String TOPOLOGY_EXECUTOR_SEND_BUFFER_SIZE="topology.executor.send.buffer.size";
442
+
443
+ /**
444
+ * The size of the Disruptor transfer queue for each worker.
445
+ */
446
+ public static String TOPOLOGY_TRANSFER_BUFFER_SIZE="topology.transfer.buffer.size";
447
+
448
+ /**
449
+ * How often a tick tuple from the "__system" component and "__tick" stream should be sent
450
+ * to tasks. Meant to be used as a component-specific configuration.
451
+ */
452
+ public static String TOPOLOGY_TICK_TUPLE_FREQ_SECS="topology.tick.tuple.freq.secs";
453
+
454
+
455
+ /**
456
+ * Configure the wait strategy used for internal queuing. Can be used to tradeoff latency
457
+ * vs. throughput
458
+ */
459
+ public static String TOPOLOGY_DISRUPTOR_WAIT_STRATEGY="topology.disruptor.wait.strategy";
460
+
461
+ /**
462
+ * The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed
463
+ * via the TopologyContext.
464
+ */
465
+ public static String TOPOLOGY_WORKER_SHARED_THREAD_POOL_SIZE="topology.worker.shared.thread.pool.size";
466
+
467
+ /**
468
+ * The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example,
469
+ * an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be
470
+ * reported to Zookeeper per task for every 10 second interval of time.
471
+ */
472
+ public static String TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS="topology.error.throttle.interval.secs";
473
+
474
+ /**
475
+ * See doc for TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS
476
+ */
477
+ public static String TOPOLOGY_MAX_ERROR_REPORT_PER_INTERVAL="topology.max.error.report.per.interval";
478
+
479
+
480
+ /**
481
+ * How often a batch can be emitted in a Trident topology.
482
+ */
483
+ public static String TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS="topology.trident.batch.emit.interval.millis";
484
+
485
+ /**
486
+ * Name of the topology. This config is automatically set by Storm when the topology is submitted.
487
+ */
488
+ public static String TOPOLOGY_NAME="topology.name";
489
+
490
+ /**
491
+ * The root directory in ZooKeeper for metadata about TransactionalSpouts.
492
+ */
493
+ public static String TRANSACTIONAL_ZOOKEEPER_ROOT="transactional.zookeeper.root";
494
+
495
+ /**
496
+ * The list of zookeeper servers in which to keep the transactional state. If null (which is default),
497
+ * will use storm.zookeeper.servers
498
+ */
499
+ public static String TRANSACTIONAL_ZOOKEEPER_SERVERS="transactional.zookeeper.servers";
500
+
501
+ /**
502
+ * The port to use to connect to the transactional zookeeper servers. If null (which is default),
503
+ * will use storm.zookeeper.port
504
+ */
505
+ public static String TRANSACTIONAL_ZOOKEEPER_PORT="transactional.zookeeper.port";
506
+
507
+ /**
508
+ * The number of threads that should be used by the zeromq context in each worker process.
509
+ */
510
+ public static String ZMQ_THREADS = "zmq.threads";
511
+
512
+ /**
513
+ * How long a connection should retry sending messages to a target host when
514
+ * the connection is closed. This is an advanced configuration and can almost
515
+ * certainly be ignored.
516
+ */
517
+ public static String ZMQ_LINGER_MILLIS = "zmq.linger.millis";
518
+
519
+ /**
520
+ * The high water for the ZeroMQ push sockets used for networking. Use this config to prevent buffer explosion
521
+ * on the networking layer.
522
+ */
523
+ public static String ZMQ_HWM = "zmq.hwm";
524
+
525
+ /**
526
+ * This value is passed to spawned JVMs (e.g., Nimbus, Supervisor, and Workers)
527
+ * for the java.library.path value. java.library.path tells the JVM where
528
+ * to look for native libraries. It is necessary to set this config correctly since
529
+ * Storm uses the ZeroMQ and JZMQ native libs.
530
+ */
531
+ public static String JAVA_LIBRARY_PATH = "java.library.path";
532
+
533
+ /**
534
+ * The path to use as the zookeeper dir when running a zookeeper server via
535
+ * "storm dev-zookeeper". This zookeeper instance is only intended for development;
536
+ * it is not a production grade zookeeper setup.
537
+ */
538
+ public static String DEV_ZOOKEEPER_PATH = "dev.zookeeper.path";
539
+
540
+ public static void setDebug(Map conf, boolean isOn) {
541
+ conf.put(Config.TOPOLOGY_DEBUG, isOn);
542
+ }
543
+
544
+ public void setDebug(boolean isOn) {
545
+ setDebug(this, isOn);
546
+ }
547
+
548
+ @Deprecated
549
+ public void setOptimize(boolean isOn) {
550
+ put(Config.TOPOLOGY_OPTIMIZE, isOn);
551
+ }
552
+
553
+ public static void setNumWorkers(Map conf, int workers) {
554
+ conf.put(Config.TOPOLOGY_WORKERS, workers);
555
+ }
556
+
557
+ public void setNumWorkers(int workers) {
558
+ setNumWorkers(this, workers);
559
+ }
560
+
561
+ public static void setNumAckers(Map conf, int numExecutors) {
562
+ conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, numExecutors);
563
+ }
564
+
565
+ public void setNumAckers(int numExecutors) {
566
+ setNumAckers(this, numExecutors);
567
+ }
568
+
569
+ public static void setMessageTimeoutSecs(Map conf, int secs) {
570
+ conf.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, secs);
571
+ }
572
+
573
+ public void setMessageTimeoutSecs(int secs) {
574
+ setMessageTimeoutSecs(this, secs);
575
+ }
576
+
577
+ public static void registerSerialization(Map conf, Class klass) {
578
+ getRegisteredSerializations(conf).add(klass.getName());
579
+ }
580
+
581
+ public void registerSerialization(Class klass) {
582
+ registerSerialization(this, klass);
583
+ }
584
+
585
+ public static void registerSerialization(Map conf, Class klass, Class<? extends Serializer> serializerClass) {
586
+ Map<String, String> register = new HashMap<String, String>();
587
+ register.put(klass.getName(), serializerClass.getName());
588
+ getRegisteredSerializations(conf).add(register);
589
+ }
590
+
591
+ public void registerSerialization(Class klass, Class<? extends Serializer> serializerClass) {
592
+ registerSerialization(this, klass, serializerClass);
593
+ }
594
+
595
+ public static void registerDecorator(Map conf, Class<? extends IKryoDecorator> klass) {
596
+ getRegisteredDecorators(conf).add(klass.getName());
597
+ }
598
+
599
+ public void registerDecorator(Class<? extends IKryoDecorator> klass) {
600
+ registerDecorator(this, klass);
601
+ }
602
+
603
+ public static void setKryoFactory(Map conf, Class<? extends IKryoFactory> klass) {
604
+ conf.put(Config.TOPOLOGY_KRYO_FACTORY, klass.getName());
605
+ }
606
+
607
+ public void setKryoFactory(Class<? extends IKryoFactory> klass) {
608
+ setKryoFactory(this, klass);
609
+ }
610
+
611
+ public static void setSkipMissingKryoRegistrations(Map conf, boolean skip) {
612
+ conf.put(Config.TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS, skip);
613
+ }
614
+
615
+ public void setSkipMissingKryoRegistrations(boolean skip) {
616
+ setSkipMissingKryoRegistrations(this, skip);
617
+ }
618
+
619
+ public static void setMaxTaskParallelism(Map conf, int max) {
620
+ conf.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, max);
621
+ }
622
+
623
+ public void setMaxTaskParallelism(int max) {
624
+ setMaxTaskParallelism(this, max);
625
+ }
626
+
627
+ public static void setMaxSpoutPending(Map conf, int max) {
628
+ conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, max);
629
+ }
630
+
631
+ public void setMaxSpoutPending(int max) {
632
+ setMaxSpoutPending(this, max);
633
+ }
634
+
635
+ public static void setStatsSampleRate(Map conf, double rate) {
636
+ conf.put(Config.TOPOLOGY_STATS_SAMPLE_RATE, rate);
637
+ }
638
+
639
+ public void setStatsSampleRate(double rate) {
640
+ setStatsSampleRate(this, rate);
641
+ }
642
+
643
+ public static void setFallBackOnJavaSerialization(Map conf, boolean fallback) {
644
+ conf.put(Config.TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION, fallback);
645
+ }
646
+
647
+ public void setFallBackOnJavaSerialization(boolean fallback) {
648
+ setFallBackOnJavaSerialization(this, fallback);
649
+ }
650
+
651
+ private static List getRegisteredSerializations(Map conf) {
652
+ List ret;
653
+ if(!conf.containsKey(Config.TOPOLOGY_KRYO_REGISTER)) {
654
+ ret = new ArrayList();
655
+ } else {
656
+ ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_REGISTER));
657
+ }
658
+ conf.put(Config.TOPOLOGY_KRYO_REGISTER, ret);
659
+ return ret;
660
+ }
661
+
662
+ private static List getRegisteredDecorators(Map conf) {
663
+ List ret;
664
+ if(!conf.containsKey(Config.TOPOLOGY_KRYO_DECORATORS)) {
665
+ ret = new ArrayList();
666
+ } else {
667
+ ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_DECORATORS));
668
+ }
669
+ conf.put(Config.TOPOLOGY_KRYO_DECORATORS, ret);
670
+ return ret;
671
+ }
672
+ }
673
+
674
+
675
+ end
676
+ end
@@ -0,0 +1,45 @@
1
+ module Wukong
2
+ class StormRunner < EM::P::LineAndTextProtocol
3
+ include DriverMethods
4
+
5
+ attr_accessor :dataflow, :settings
6
+
7
+ def self.start(label, settings = {})
8
+ EM.attach($stdin, self, label, settings)
9
+ end
10
+
11
+ def initialize(label, settings)
12
+ super
13
+ @settings = settings
14
+ @dataflow = construct_dataflow(label, settings)
15
+ @messages = []
16
+ end
17
+
18
+ def post_init
19
+ setup_dataflow
20
+ end
21
+
22
+ def receive_line line
23
+ driver.send_through_dataflow(line)
24
+ send_messages
25
+ rescue => e
26
+ $stderr.puts e.message
27
+ EM.stop
28
+ end
29
+
30
+ def send_messages
31
+ $stdout.write(@messages.join("\n") + settings.delimiter)
32
+ $stdout.flush
33
+ @messages.clear
34
+ end
35
+
36
+ def unbind
37
+ EM.stop
38
+ end
39
+
40
+ def setup() ; end
41
+ def process(record) @messages << record ; end
42
+ def stop() ; end
43
+
44
+ end
45
+ end
@@ -0,0 +1,5 @@
1
+ module Wukong
2
+ module Storm
3
+ VERSION = '0.0.1'
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ require 'wukong'
2
+ require 'wukong-storm/runner'
3
+ # require 'wukong-storm/configuration'
@@ -0,0 +1,6 @@
1
+ require 'wukong-storm'
2
+ require 'wukong/spec_helpers'
3
+
4
+ RSpec.configure do |config|
5
+ include Wukong::SpecHelpers
6
+ end
@@ -0,0 +1,27 @@
1
+ Wukong.processor(:simple) do
2
+ def process(record)
3
+ yield record
4
+ end
5
+ end
6
+
7
+ Wukong.processor(:skipped) do
8
+ def process(record)
9
+ # skip records
10
+ end
11
+ end
12
+
13
+ Wukong.processor(:multi) do
14
+ def process(record)
15
+ 3.times{ yield record }
16
+ end
17
+ end
18
+
19
+ Wukong.processor(:test_example) do
20
+ def process(record)
21
+ yield "I raised the #{record['foo']}"
22
+ end
23
+ end
24
+
25
+ Wukong.dataflow(:flow) do
26
+ from_json | test_example
27
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ Wu.processor(:test) do
4
+
5
+ def process(record)
6
+ # do nothing
7
+ end
8
+
9
+ end
10
+
11
+ describe 'wu-storm' do
12
+ let(:examples) { File.expand_path('../support/examples.rb', __FILE__) }
13
+
14
+ context 'without any arguments' do
15
+ subject { command 'wu-storm' }
16
+ it { should exit_with(:non_zero) }
17
+ it { should have_stderr('usage: wu-storm') }
18
+ end
19
+
20
+ context 'with a simple processor' do
21
+ let(:input) { 'one event' }
22
+ subject { command('wu-storm', examples, '--run=simple') < input }
23
+ it { should exit_with(0) }
24
+ it { should have_stdout('one event|') }
25
+ end
26
+
27
+ context 'with a skipped processor' do
28
+ let(:input) { 'never see this' }
29
+ subject { command('wu-storm', examples, '--run=skipped') < input }
30
+ it { should exit_with(0) }
31
+ it { should have_stdout('|') }
32
+ end
33
+
34
+ context 'with a duplicating processor' do
35
+ let(:input) { 'foo' }
36
+ subject { command('wu-storm', examples, '--run=multi') < input }
37
+ it { should exit_with(0) }
38
+ it { should have_stdout("foo\nfoo\nfoo|") }
39
+ end
40
+
41
+ context 'with a flow' do
42
+ let(:input) { '{"foo":"bar"}' }
43
+ subject { command('wu-storm', examples, '--run=flow') < input }
44
+ it { should exit_with(0) }
45
+ it { should have_stdout('I raised the bar|') }
46
+ end
47
+
48
+ context 'with multiple arguments' do
49
+ let(:input) { "foo\nbar\nbaz" }
50
+ subject { command('wu-storm', examples, '--run=simple') < input }
51
+ it { should exit_with(0) }
52
+ it { should have_stdout('foo|bar|baz|') }
53
+ end
54
+ end
data/wu-storm.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/wukong-storm/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = 'wukong-storm'
6
+ gem.homepage = 'https://github.com/infochimps-labs/wukong-storm'
7
+ gem.licenses = ["Apache 2.0"]
8
+ gem.email = 'coders@infochimps.org'
9
+ gem.authors = ['Infochimps', 'Travis Dempsey']
10
+ gem.version = Wukong::Storm::VERSION
11
+
12
+ gem.summary = 'Storm processing for Ruby'
13
+ gem.description = <<-EOF
14
+ EOF
15
+
16
+ gem.files = `git ls-files`.split("\n")
17
+ gem.executables = ['wu-storm']
18
+ gem.test_files = gem.files.grep(/^spec/)
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency('wukong', '3.0.0.pre3')
22
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wukong-storm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Infochimps
9
+ - Travis Dempsey
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-12-17 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: wukong
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - '='
21
+ - !ruby/object:Gem::Version
22
+ version: 3.0.0.pre3
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - '='
29
+ - !ruby/object:Gem::Version
30
+ version: 3.0.0.pre3
31
+ description: ''
32
+ email: coders@infochimps.org
33
+ executables:
34
+ - wu-storm
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - .rspec
40
+ - Gemfile
41
+ - README.md
42
+ - Rakefile
43
+ - bin/wu-storm
44
+ - lib/wukong-storm.rb
45
+ - lib/wukong-storm/configuration.rb
46
+ - lib/wukong-storm/runner.rb
47
+ - lib/wukong-storm/version.rb
48
+ - spec/spec_helper.rb
49
+ - spec/support/examples.rb
50
+ - spec/wu_storm_spec.rb
51
+ - wu-storm.gemspec
52
+ homepage: https://github.com/infochimps-labs/wukong-storm
53
+ licenses:
54
+ - Apache 2.0
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ segments:
66
+ - 0
67
+ hash: 1144670354774271812
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ! '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ segments:
75
+ - 0
76
+ hash: 1144670354774271812
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 1.8.24
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Storm processing for Ruby
83
+ test_files:
84
+ - spec/spec_helper.rb
85
+ - spec/support/examples.rb
86
+ - spec/wu_storm_spec.rb