wukong-storm 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.rspec +1 -2
- data/Gemfile +1 -1
- data/README.md +174 -18
- data/bin/wu-bolt +4 -0
- data/lib/wukong-storm.rb +50 -10
- data/lib/wukong-storm/bolt_driver.rb +81 -0
- data/lib/wukong-storm/bolt_runner.rb +44 -0
- data/lib/wukong-storm/storm_invocation.rb +386 -0
- data/lib/wukong-storm/storm_runner.rb +123 -0
- data/lib/wukong-storm/version.rb +1 -1
- data/lib/wukong-storm/wukong-storm.jar +0 -0
- data/pom.xml +111 -0
- data/spec/spec_helper.rb +13 -1
- data/spec/wukong-storm/bolt_driver_spec.rb +46 -0
- data/spec/wukong-storm/storm_invocation_spec.rb +204 -0
- data/spec/wukong-storm/storm_runner_spec.rb +76 -0
- data/spec/{wu_storm_spec.rb → wukong-storm/wu-bolt_spec.rb} +14 -14
- data/spec/wukong-storm/wu-storm_spec.rb +17 -0
- data/spec/wukong-storm_spec.rb +5 -0
- data/src/main/java/com/infochimps/wukong/storm/Builder.java +53 -0
- data/src/main/java/com/infochimps/wukong/storm/DataflowBuilder.java +74 -0
- data/src/main/java/com/infochimps/wukong/storm/SpoutBuilder.java +237 -0
- data/src/main/java/com/infochimps/wukong/storm/StateBuilder.java +46 -0
- data/src/main/java/com/infochimps/wukong/storm/TopologyBuilder.java +130 -0
- data/src/main/java/com/infochimps/wukong/storm/TopologySubmitter.java +181 -0
- data/wukong-storm.gemspec +3 -2
- metadata +49 -11
- data/lib/wukong-storm/driver.rb +0 -58
- data/lib/wukong-storm/runner.rb +0 -40
@@ -0,0 +1,123 @@
|
|
1
|
+
require_relative("storm_invocation")
|
2
|
+
require 'kafka'
|
3
|
+
|
4
|
+
module Wukong
|
5
|
+
module Storm
|
6
|
+
|
7
|
+
# Implements the runner for wu-storm.
|
8
|
+
class StormRunner < Wukong::Local::LocalRunner
|
9
|
+
|
10
|
+
# The default port Kafka is assumed to be running on.
|
11
|
+
DEFAULT_KAFKA_PORT = 9092
|
12
|
+
|
13
|
+
include Logging
|
14
|
+
include StormInvocation
|
15
|
+
|
16
|
+
usage "DATAFLOW|PROCESSOR"
|
17
|
+
|
18
|
+
description <<-EOF.gsub(/^ {8}/,'')
|
19
|
+
wu-storm is a commandline tool for launching dynamically assembled
|
20
|
+
parametrized Storm topologies that embed Wukong dataflows. The
|
21
|
+
overall "shape" of the launched topology is
|
22
|
+
|
23
|
+
spout -> wukong dataflow -> state
|
24
|
+
|
25
|
+
The default spout and state will read and write to Kafka topics.
|
26
|
+
Additional spouts and states are also available.
|
27
|
+
|
28
|
+
Here's an example which launches `my_flow` as a Storm topology reading
|
29
|
+
from the Kafka topic `raw` and writing to the Kafka topic `clean`:
|
30
|
+
|
31
|
+
$ wu-storm my_flow --input=raw --output=clean
|
32
|
+
|
33
|
+
Here's an example which launches `my_flow` as a Storm topology reading
|
34
|
+
from the S3 bucket `example-data` and the path `raw` and writing to
|
35
|
+
the Kafka topic `clean`:
|
36
|
+
|
37
|
+
$ wu-storm my_flow --input=s3://example-data/raw --output=clean
|
38
|
+
|
39
|
+
There are several options which apply to any topology like --name,
|
40
|
+
--nimubs_host, --zookeeper_hosts, --ackers, --parallelism, &c.
|
41
|
+
|
42
|
+
Some options only make sense when reading from Kafka like
|
43
|
+
--kafka_batch, --kafka_partitions, or --input_parallelism.
|
44
|
+
|
45
|
+
Other options only make sense when reading from S3 like --aws_key,
|
46
|
+
--aws_secret, and --aws_region.
|
47
|
+
|
48
|
+
Options like --from_beginning, --from_end, and --offset have general
|
49
|
+
applicability but are interpeted differently based on the spout type.
|
50
|
+
|
51
|
+
For a complete list of options try `wu storm --help`.
|
52
|
+
EOF
|
53
|
+
|
54
|
+
def kill_first?
|
55
|
+
settings[:rm]
|
56
|
+
end
|
57
|
+
|
58
|
+
def validate
|
59
|
+
begin
|
60
|
+
super()
|
61
|
+
rescue => e
|
62
|
+
raise e if dataflow
|
63
|
+
raise Error.new("Must provide a processor or dataflow to run, via either the --run option or as the first argument, or provide an explicit --bolt_command") unless settings[:bolt_command]
|
64
|
+
end
|
65
|
+
raise Error.new("An explicit --input URI is required to launch a dataflow") if settings[:input].nil? || settings[:input].empty?
|
66
|
+
raise Error.new("An explicit --output URI is required to launch a dataflow") if settings[:output].nil? || settings[:output].empty?
|
67
|
+
|
68
|
+
if kafka_input? || kafka_output?
|
69
|
+
raise Error.new("Must provide a list of comma-separated Kafka hosts") if settings[:kafka_hosts].nil? || settings[:kafka_hosts].empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
if s3_input?
|
73
|
+
raise Error.new("Must provide an S3 bucket and path") if input_uri.path.nil? || input_uri.path.empty?
|
74
|
+
raise Error.new("Must provide an AWS access key (settings[:aws_key])") if settings[:aws_key].nil? || settings[:aws_key].empty?
|
75
|
+
raise Error.new("Must provide an AWS secret key (settings[:aws_secret])") if settings[:aws_secret].nil? || settings[:aws_secret].empty?
|
76
|
+
raise Error.new("Invalid AWS region: <#{settings[:aws_region]}>") unless s3_endpoint
|
77
|
+
end
|
78
|
+
true
|
79
|
+
end
|
80
|
+
|
81
|
+
def run
|
82
|
+
log_topology_structure_and_settings
|
83
|
+
setup_run
|
84
|
+
if kill_first?
|
85
|
+
log.debug("Killing topology <#{topology_name}> and waiting <#{settings[:wait]}> seconds...")
|
86
|
+
execute_command(storm_kill_commandline)
|
87
|
+
sleep settings[:wait].to_i unless settings[:dry_run]
|
88
|
+
end
|
89
|
+
execute_command!(storm_launch_commandline)
|
90
|
+
raise Error.new("Failed to launch topology #{topology_name}!") unless settings[:dry_run] || $?.success?
|
91
|
+
end
|
92
|
+
|
93
|
+
protected
|
94
|
+
|
95
|
+
def log_topology_structure_and_settings
|
96
|
+
log.info("Using Zookeeper at <#{settings[:zookeeper_hosts]}>")
|
97
|
+
|
98
|
+
log.info("Reading from Kafka <#{settings[:kafka_hosts]}/#{settings[:input]}>") if kafka_input?
|
99
|
+
log.info("Reading from filesystem at <#{settings[:input]}>") if blob_input?
|
100
|
+
log.info("Writing to Kafka <#{settings[:kafka_hosts]}/#{settings[:output]}>") if kafka_output?
|
101
|
+
|
102
|
+
log.info("Dry run:") if settings[:dry_run]
|
103
|
+
end
|
104
|
+
|
105
|
+
def setup_run
|
106
|
+
return unless kafka_input?
|
107
|
+
topic = settings[:input]
|
108
|
+
host, port = kafka_host_and_port
|
109
|
+
log.info("Ensuring input topic <#{topic}> exists on Kafka broker <#{host}:#{port}>")
|
110
|
+
Kafka::Producer.new(host: host, port: port, topic: topic).push([]) unless settings[:dry_run]
|
111
|
+
end
|
112
|
+
|
113
|
+
def kafka_host_and_port
|
114
|
+
kafka_host = settings[:kafka_hosts].to_s.split(/ *, */).first
|
115
|
+
raise Error.new("Could not construct a Kafka host from <#{settings[:kafka_hosts]}>") unless kafka_host
|
116
|
+
parts = kafka_host.split(':')
|
117
|
+
raise Error.new("Badly formed Kafka host <#{kafka_host}>. Must be in the format HOST[:PORT]") if parts.size > 2
|
118
|
+
[parts[0], (parts[1] || DEFAULT_KAFKA_PORT).to_i]
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/wukong-storm/version.rb
CHANGED
Binary file
|
data/pom.xml
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
2
|
+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
3
|
+
<modelVersion>4.0.0</modelVersion>
|
4
|
+
<groupId>com.infochimps.wukong</groupId>
|
5
|
+
<artifactId>wukong-storm</artifactId>
|
6
|
+
<packaging>jar</packaging>
|
7
|
+
<version>1.4.0-SNAPSHOT</version>
|
8
|
+
<name>wukong-storm</name>
|
9
|
+
<url>http://github.com/infochimps-labs/wukong-storm</url>
|
10
|
+
|
11
|
+
<parent>
|
12
|
+
<groupId>com.infochimps</groupId>
|
13
|
+
<artifactId>parent-pom</artifactId>
|
14
|
+
<version>1.0.0-SNAPSHOT</version>
|
15
|
+
</parent>
|
16
|
+
|
17
|
+
<build>
|
18
|
+
<plugins>
|
19
|
+
<plugin>
|
20
|
+
<groupId>org.apache.maven.plugins</groupId>
|
21
|
+
<artifactId>maven-assembly-plugin</artifactId>
|
22
|
+
<version>2.4</version>
|
23
|
+
<configuration>
|
24
|
+
<descriptorRefs>
|
25
|
+
<descriptorRef>jar-with-dependencies</descriptorRef>
|
26
|
+
</descriptorRefs>
|
27
|
+
</configuration>
|
28
|
+
<executions>
|
29
|
+
<execution>
|
30
|
+
<id>dist-assembly</id>
|
31
|
+
<phase>package</phase>
|
32
|
+
<goals>
|
33
|
+
<goal>single</goal>
|
34
|
+
</goals>
|
35
|
+
</execution>
|
36
|
+
</executions>
|
37
|
+
</plugin>
|
38
|
+
</plugins>
|
39
|
+
<sourceDirectory>src</sourceDirectory>
|
40
|
+
</build>
|
41
|
+
|
42
|
+
|
43
|
+
<repositories>
|
44
|
+
<repository>
|
45
|
+
<id>Sonatype-public</id>
|
46
|
+
<name>SnakeYAML repository</name>
|
47
|
+
<url>http://oss.sonatype.org/content/groups/public/</url>
|
48
|
+
</repository>
|
49
|
+
<!-- Infochimps Repositories -->
|
50
|
+
<repository>
|
51
|
+
<id>infochimps.releases</id>
|
52
|
+
<name>Infochimps Internal Repository</name>
|
53
|
+
<url>https://s3.amazonaws.com/artifacts.chimpy.us/maven-s3p/releases</url>
|
54
|
+
</repository>
|
55
|
+
<repository>
|
56
|
+
<id>infochimps.snapshots</id>
|
57
|
+
<name>Infochimps Internal Repository</name>
|
58
|
+
<url>https://s3.amazonaws.com/artifacts.chimpy.us/maven-s3p/snapshots</url>
|
59
|
+
<snapshots>
|
60
|
+
<enabled>true</enabled>
|
61
|
+
<updatePolicy>always</updatePolicy>
|
62
|
+
</snapshots>
|
63
|
+
</repository>
|
64
|
+
</repositories>
|
65
|
+
|
66
|
+
<dependencies>
|
67
|
+
<dependency>
|
68
|
+
<groupId>org.apache.kafka</groupId>
|
69
|
+
<artifactId>kafka-core</artifactId>
|
70
|
+
<version>0.7.1-incubating</version>
|
71
|
+
</dependency>
|
72
|
+
|
73
|
+
<dependency>
|
74
|
+
<groupId>storm</groupId>
|
75
|
+
<artifactId>storm</artifactId>
|
76
|
+
<version>0.9.0</version>
|
77
|
+
<scope>provided</scope>
|
78
|
+
</dependency>
|
79
|
+
|
80
|
+
<dependency>
|
81
|
+
<groupId>junit</groupId>
|
82
|
+
<artifactId>junit</artifactId>
|
83
|
+
<version>3.8.1</version>
|
84
|
+
<scope>test</scope>
|
85
|
+
</dependency>
|
86
|
+
|
87
|
+
<dependency>
|
88
|
+
<groupId>storm</groupId>
|
89
|
+
<artifactId>storm-kafka</artifactId>
|
90
|
+
<version>0.9.0-wip16a-scala292</version>
|
91
|
+
</dependency>
|
92
|
+
|
93
|
+
<dependency>
|
94
|
+
<groupId>com.fasterxml.jackson.core</groupId>
|
95
|
+
<artifactId>jackson-core</artifactId>
|
96
|
+
<version>2.1.2</version>
|
97
|
+
</dependency>
|
98
|
+
<dependency>
|
99
|
+
<groupId>com.fasterxml.jackson.datatype</groupId>
|
100
|
+
<artifactId>jackson-datatype-json-org</artifactId>
|
101
|
+
<version>2.1.2</version>
|
102
|
+
</dependency>
|
103
|
+
|
104
|
+
<dependency>
|
105
|
+
<groupId>com.infochimps</groupId>
|
106
|
+
<artifactId>storm-util</artifactId>
|
107
|
+
<version>1.7.0-SNAPSHOT</version>
|
108
|
+
</dependency>
|
109
|
+
|
110
|
+
</dependencies>
|
111
|
+
</project>
|
data/spec/spec_helper.rb
CHANGED
@@ -15,7 +15,19 @@ RSpec.configure do |config|
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def storm_runner *args, &block
|
18
|
-
runner(Wukong::Storm::StormRunner, 'wu-storm', *args)
|
18
|
+
runner(Wukong::Storm::StormRunner, 'wu-storm', *args) do
|
19
|
+
stub(:execute_command)
|
20
|
+
instance_eval(&block) if block_given?
|
21
|
+
end
|
19
22
|
end
|
23
|
+
|
24
|
+
def wu_storm *args
|
25
|
+
command('wu-storm', *args)
|
26
|
+
end
|
27
|
+
|
28
|
+
def wu_bolt *args
|
29
|
+
command('wu-bolt', *args)
|
30
|
+
end
|
31
|
+
|
20
32
|
|
21
33
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::Storm::BoltDriver do
|
4
|
+
|
5
|
+
let(:settings) do
|
6
|
+
Configliere::Param.new.tap do |s|
|
7
|
+
Wukong::Storm.configure(s, 'wu-bolt')
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
let(:driver) { Wukong::Storm::BoltDriver.new(:bogus_event_machine_inserted_arg, :identity, settings) }
|
12
|
+
|
13
|
+
describe "setting up a dataflow" do
|
14
|
+
context "#post_init hook from EventMachine" do
|
15
|
+
after { driver.post_init }
|
16
|
+
it "should not sync $stdout" do
|
17
|
+
$stdout.should_not_receive(:sync)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "driving a dataflow" do
|
23
|
+
context "#receive_line hook from EventMachine" do
|
24
|
+
let(:line) { "hello" }
|
25
|
+
before { $stdout.stub(:write) }
|
26
|
+
after { driver.receive_line(line) }
|
27
|
+
it "passes the line to the #send_through_dataflow method" do
|
28
|
+
driver.should_receive(:send_through_dataflow).with(line)
|
29
|
+
end
|
30
|
+
it "calls the #write_output method" do
|
31
|
+
driver.should_receive(:write_output)
|
32
|
+
end
|
33
|
+
it "writes each output record" do
|
34
|
+
$stdout.should_receive(:write).with(line)
|
35
|
+
end
|
36
|
+
it "writes the batch delimiter" do
|
37
|
+
$stdout.should_receive(:write).with('X')
|
38
|
+
end
|
39
|
+
it "writes newlines after each output record and after the batch delimiter" do
|
40
|
+
$stdout.should_receive(:write).with("\n").exactly(2).times
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Wukong::Storm::StormInvocation do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@producer = double("Kafka::Producer", push: true)
|
8
|
+
Kafka::Producer.stub(:new).and_return(@producer)
|
9
|
+
end
|
10
|
+
|
11
|
+
context "without any options" do
|
12
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
13
|
+
|
14
|
+
its(:topology_name) { should == 'identity' }
|
15
|
+
its(:dataflow_name) { should == 'identity' }
|
16
|
+
|
17
|
+
its(:kafka_input?) { should be_true }
|
18
|
+
its(:kafka_output?) { should be_true }
|
19
|
+
|
20
|
+
its(:storm_runner) { should == 'storm' }
|
21
|
+
|
22
|
+
its(:storm_launch_commandline) { should match(/jar .*storm.*.jar/) }
|
23
|
+
its(:storm_launch_commandline) { should match(/com\.infochimps\..*TopologySubmitter/) }
|
24
|
+
|
25
|
+
its(:storm_kill_commandline) { should match(/storm kill identity -w 300/) }
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "Storm runner options" do
|
29
|
+
context "--storm_runner" do
|
30
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--storm_runner=/opt/bin/storm') }
|
31
|
+
its(:storm_runner) { should == '/opt/bin/storm' }
|
32
|
+
end
|
33
|
+
context "--storm_home" do
|
34
|
+
around do |example|
|
35
|
+
FileUtils.mkdir_p File.join(pwd, 'bin')
|
36
|
+
FileUtils.touch File.join(pwd, 'bin', 'storm')
|
37
|
+
example.run
|
38
|
+
FileUtils.rm_r File.join(pwd, 'bin')
|
39
|
+
end
|
40
|
+
let(:pwd){ File.expand_path('../..', __FILE__) }
|
41
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', "--storm_home=#{pwd}") }
|
42
|
+
its(:storm_runner) { should == File.join(pwd, 'bin/storm') }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "native Storm options" do
|
47
|
+
context "when setting --ackers" do
|
48
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--ackers=10') }
|
49
|
+
its(:storm_launch_commandline) { should match(/topology.acker.executors=10/) }
|
50
|
+
end
|
51
|
+
context "when setting --wait" do
|
52
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--wait=1') }
|
53
|
+
its(:storm_kill_commandline) { should match(/-w 1/) }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "services options" do
|
58
|
+
context "by default" do
|
59
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
60
|
+
its(:storm_launch_commandline) { should match(/\wukong\.kafka\.hosts.*localhost/) }
|
61
|
+
its(:storm_launch_commandline) { should match(/\wukong\.zookeeper\.hosts.*localhost/) }
|
62
|
+
end
|
63
|
+
context "when setting --kafka_hosts" do
|
64
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--kafka_hosts=k1.example.com:9092,k2.example.com:9093') }
|
65
|
+
its(:storm_launch_commandline) { should match(/\wukong\.kafka\.hosts.*k1.example.com:9092,k2.example.com:9093/) }
|
66
|
+
end
|
67
|
+
context "when setting --zookeeper_hosts" do
|
68
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--zookeeper_hosts=z1.example.com:2181,z2.example.com:3181') }
|
69
|
+
its(:storm_launch_commandline) { should match(/\wukong\.zookeeper\.hosts.*z1.example.com:2181,z2.example.com:3181/) }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "topology options" do
|
74
|
+
context "by default" do
|
75
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
76
|
+
its(:topology_name) { should == 'identity' }
|
77
|
+
its(:dataflow_name) { should == 'identity' }
|
78
|
+
its(:storm_launch_commandline) { should match(/\wukong\.topology.*identity/) }
|
79
|
+
end
|
80
|
+
context "when setting --name" do
|
81
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--name=myFlow') }
|
82
|
+
its(:topology_name) { should == 'myFlow' }
|
83
|
+
its(:dataflow_name) { should == 'identity' }
|
84
|
+
its(:storm_launch_commandline) { should match(/\wukong\.topology.*myFlow/) }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "spout options" do
|
89
|
+
context "when reading from Kafka" do
|
90
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
91
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.type.*kafka/) }
|
92
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.topic.*foo/) }
|
93
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.partitions.*1/) }
|
94
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.batch.*1048576/) }
|
95
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.parallelism.*1/) }
|
96
|
+
context "when setting --kafka_partitions" do
|
97
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--kafka_partitions=10') }
|
98
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.partitions.*10/) }
|
99
|
+
end
|
100
|
+
context "when setting --kafka_batch" do
|
101
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--kafka_batch=100') }
|
102
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.batch.*10/) }
|
103
|
+
end
|
104
|
+
context "when setting --input_parallelism" do
|
105
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--input_parallelism=10') }
|
106
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.parallelism.*10/) }
|
107
|
+
end
|
108
|
+
context "when setting --from_beginning" do
|
109
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--from_beginning') }
|
110
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.offset.*-2/) }
|
111
|
+
end
|
112
|
+
context "when setting --from_end" do
|
113
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--from_end') }
|
114
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.offset.*-1/) }
|
115
|
+
end
|
116
|
+
context "when setting --offset" do
|
117
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--offset=1234') }
|
118
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.kafka\.offset.*1234/) }
|
119
|
+
end
|
120
|
+
end
|
121
|
+
context "when reading from a filesystem" do
|
122
|
+
context "when reading from a local filesystem" do
|
123
|
+
subject { storm_runner('identity', '--input=file:///foo/bar', '--output=baz') }
|
124
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.type.*blob/) }
|
125
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.type.*file/) }
|
126
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.path.*\/foo\/bar/) }
|
127
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*RESUME/) }
|
128
|
+
context "when setting --from_beginning" do
|
129
|
+
subject { storm_runner('identity', '--input=file:///foo/bar', '--output=baz', '--from_beginning') }
|
130
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*EARLIEST/) }
|
131
|
+
end
|
132
|
+
context "when setting --from_end" do
|
133
|
+
subject { storm_runner('identity', '--input=file:///foo/bar', '--output=baz', '--from_end') }
|
134
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*LATEST/) }
|
135
|
+
end
|
136
|
+
context "when setting --offset" do
|
137
|
+
subject { storm_runner('identity', '--input=file:///foo/bar', '--output=baz', '--offset=bing-1') }
|
138
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*EXPLICIT/) }
|
139
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.marker.*bing-1/) }
|
140
|
+
end
|
141
|
+
end
|
142
|
+
context "when reading from S3" do
|
143
|
+
subject { storm_runner('identity', '--input=s3://foo/bar', '--output=baz', '--aws_key=key', '--aws_secret=secret') }
|
144
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.type.*blob/) }
|
145
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.type.*s3/) }
|
146
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.aws_key.*key/) }
|
147
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.aws_secret.*secret/) }
|
148
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.s3_endpoint.*s3\.amazonaws\.com/) }
|
149
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.s3_bucket.*foo/) }
|
150
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.path.*bar/) }
|
151
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*RESUME/) }
|
152
|
+
context "when setting --from_beginning" do
|
153
|
+
subject { storm_runner('identity', '--input=s3://foo/bar', '--output=baz', '--aws_key=key', '--aws_secret=secret', '--from_beginning') }
|
154
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*EARLIEST/) }
|
155
|
+
end
|
156
|
+
context "when setting --from_end" do
|
157
|
+
subject { storm_runner('identity', '--input=s3://foo/bar', '--output=baz', '--aws_key=key', '--aws_secret=secret', '--from_end') }
|
158
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*LATEST/) }
|
159
|
+
end
|
160
|
+
context "when setting --offset" do
|
161
|
+
subject { storm_runner('identity', '--input=s3://foo/bar', '--output=baz', '--aws_key=key', '--aws_secret=secret', '--offset=bing-1') }
|
162
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.start.*EXPLICIT/) }
|
163
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.marker.*bing-1/) }
|
164
|
+
end
|
165
|
+
context "when setting --aws_region" do
|
166
|
+
subject { storm_runner('identity', '--input=s3://foo/bar', '--output=baz', '--aws_key=key', '--aws_secret=secret', '--aws_region=us-west-1') }
|
167
|
+
its(:storm_launch_commandline) { should match(/\wukong\.input\.blob\.s3_endpoint.*s3-us-west-1\.amazonaws\.com/) }
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
describe "dataflow options" do
|
174
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
175
|
+
its(:storm_launch_commandline) { should match(/wukong\.directory.*#{Dir.pwd}/) }
|
176
|
+
its(:storm_launch_commandline) { should match(/wukong\.dataflow.*identity/) }
|
177
|
+
its(:storm_launch_commandline) { should match(/wukong\.command.*wu-bolt.*identity/) }
|
178
|
+
its(:storm_launch_commandline) { should match(/wukong\.parallelism.*1/) }
|
179
|
+
context "when setting --environment" do
|
180
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--environment=production') }
|
181
|
+
its(:storm_launch_commandline) { should match(/wukong\.environment.*production/) }
|
182
|
+
end
|
183
|
+
context "when setting --command_prefix" do
|
184
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--command_prefix="bundle exec"') }
|
185
|
+
its(:storm_launch_commandline) { should match(/wukong\.command.*bundle.*exec.*wu-bolt.*identity/) }
|
186
|
+
end
|
187
|
+
context "when setting --bolt_command" do
|
188
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--bolt_command="uniq -c"') }
|
189
|
+
its(:storm_launch_commandline) { should match(/wukong\.command.*uniq.*-c/) }
|
190
|
+
end
|
191
|
+
context "when setting --parallelism" do
|
192
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar', '--parallelism=10') }
|
193
|
+
its(:storm_launch_commandline) { should match(/wukong\.parallelism.*10/) }
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
describe "state options" do
|
198
|
+
context "when writing to Kafka" do
|
199
|
+
subject { storm_runner('identity', '--input=foo', '--output=bar') }
|
200
|
+
its(:storm_launch_commandline) { should match(/wukong\.output\.kafka\.topic.*bar/) }
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|