wukong-storm 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ package com.infochimps.wukong.storm;
2
+
3
+ import org.apache.log4j.Logger;
4
+ import com.infochimps.storm.trident.KafkaState;
5
+
6
+ public class StateBuilder extends Builder {
7
+
8
+ static Logger LOG = Logger.getLogger(StateBuilder.class);
9
+
10
+ public KafkaState.Factory state() {
11
+ return new KafkaState.Factory(kafkaOutputTopic(), zookeeperHosts());
12
+ }
13
+
14
+ public KafkaState.Updater updater() {
15
+ return new KafkaState.Updater();
16
+ }
17
+
18
+ @Override
19
+ public Boolean valid() {
20
+ if (kafkaOutputTopic() == null) {
21
+ LOG.error("Must set a Kafka output topic using the " + KAFKA_OUTPUT_TOPIC + "property");
22
+ return false;
23
+ }
24
+ return true;
25
+ }
26
+
27
+ @Override
28
+ public void logInfo() {
29
+ LOG.info("STATE: Writing to Kafka topic <" + kafkaOutputTopic() + ">");
30
+ }
31
+
32
+ public static String usage() {
33
+ String s = "STATE OPTIONS\n"
34
+ + "\n"
35
+ + "The only available state is Kafka which has the following options:\n"
36
+ + "\n"
37
+ + " " + String.format("%10s", KAFKA_OUTPUT_TOPIC) + " The Kafka output topic (Required)\n";
38
+ return s;
39
+ }
40
+
41
+ public static String KAFKA_OUTPUT_TOPIC = "wukong.output.kafka.topic";
42
+ public String kafkaOutputTopic() {
43
+ return prop(KAFKA_OUTPUT_TOPIC);
44
+ }
45
+
46
+ }
@@ -0,0 +1,130 @@
1
+ package com.infochimps.wukong.storm;
2
+
3
+ import java.util.Map;
4
+ import java.util.HashMap;
5
+ import java.util.Arrays;
6
+
7
+ import org.apache.log4j.Logger;
8
+
9
+ import backtype.storm.generated.StormTopology;
10
+ import backtype.storm.tuple.Fields;
11
+
12
+ import storm.trident.Stream;
13
+ import storm.trident.TridentTopology;
14
+
15
+ import storm.trident.operation.BaseFunction;
16
+ import storm.trident.operation.TridentCollector;
17
+ import backtype.storm.tuple.Values;
18
+ import storm.trident.tuple.TridentTuple;
19
+
20
+ public class TopologyBuilder extends Builder {
21
+
22
+ private static class CombineMetadata extends BaseFunction {
23
+ @Override
24
+ public void execute(TridentTuple tuple, TridentCollector collector) {
25
+ String content = tuple.getStringByField("content");
26
+ String metadata = tuple.getStringByField("metadata");
27
+ Integer lineNumber = tuple.getIntegerByField("linenumber");
28
+ LOG.debug(String.format("%s\t%s\t%s", metadata, content, lineNumber));
29
+ collector.emit(new Values(String.format("%s\t%s\t%s", metadata, content, lineNumber)));
30
+ }
31
+ }
32
+
33
+ private SpoutBuilder spoutBuilder;
34
+ private DataflowBuilder dataflowBuilder;
35
+ private StateBuilder stateBuilder;
36
+
37
+ static Logger LOG = Logger.getLogger(TopologyBuilder.class);
38
+
39
+ public TopologyBuilder() {
40
+ this.spoutBuilder = new SpoutBuilder();
41
+ this.dataflowBuilder = new DataflowBuilder(spoutBuilder);
42
+ this.stateBuilder = new StateBuilder();
43
+ }
44
+
45
+ @Override
46
+ public Boolean valid() {
47
+ if (topologyName() == null) {
48
+ LOG.error("Must set a topology name using the " + TOPOLOGY_NAME + " property");
49
+ return false;
50
+ }
51
+ if (!spoutBuilder.valid()) { return false; }
52
+ if (!dataflowBuilder.valid()) { return false; }
53
+ if (!stateBuilder.valid()) { return false; }
54
+ return true;
55
+ }
56
+
57
+ @Override
58
+ public void logInfo() {
59
+ LOG.info("\n");
60
+ spoutBuilder.logInfo();
61
+ dataflowBuilder.logInfo();
62
+ stateBuilder.logInfo();
63
+ }
64
+
65
+ public StormTopology topology() {
66
+ TridentTopology top = new TridentTopology();
67
+
68
+ Stream spoutOutput = top.newStream(topologyName(), spoutBuilder.spout())
69
+ .parallelismHint(spoutBuilder.inputParallelism());
70
+
71
+ Stream possiblyShuffledSpoutOutput;
72
+ if (needToShuffleSpoutOutput()) {
73
+ possiblyShuffledSpoutOutput = spoutOutput.shuffle();
74
+ } else {
75
+ possiblyShuffledSpoutOutput = spoutOutput;
76
+ }
77
+
78
+ Stream dataflowInput;
79
+ if (spoutBuilder.isBlobSpout()) {
80
+ dataflowInput = possiblyShuffledSpoutOutput.each(new Fields("content", "metadata", "linenumber"), new CombineMetadata(), new Fields("str"));
81
+ } else {
82
+ dataflowInput = possiblyShuffledSpoutOutput;
83
+ }
84
+
85
+ Stream dataflowOutput = dataflowInput.each(new Fields("str"), dataflowBuilder.dataflow(), new Fields("_wukong"))
86
+ .parallelismHint(dataflowBuilder.dataflowParallelism());
87
+
88
+ dataflowOutput.partitionPersist(stateBuilder.state(), new Fields("_wukong"), stateBuilder.updater());
89
+
90
+ return top.build();
91
+ }
92
+
93
+ public static String usage() {
94
+ String s = "\n"
95
+ + "Dynamically assemble and launch a parametrized Storm topology that\n"
96
+ + "embeds Wukong dataflow(s). The current overall \"shape\" of the\n"
97
+ + "topology is\n"
98
+ + "\n"
99
+ + " spout -> wukong dataflow -> state\n"
100
+ + "\n"
101
+ + "The available spouts read from Kafka or S3. The only available state\n"
102
+ + "is Kafka.\n"
103
+ + "\n"
104
+ + "TOPOLOGY OPTIONS\n"
105
+ + "\n"
106
+ + "The following options can be used for any topology:\n"
107
+ + "\n"
108
+ + " " + String.format("%10s", TOPOLOGY_NAME) + " Name of the Storm topology that will be launched (Required)\n"
109
+ + " " + String.format("%10s", KAFKA_HOSTS) + " Comma-separated list of Kafka host (and optional port) pairs (Default: " + DEFAULT_KAFKA_HOSTS + ")\n"
110
+ + " " + String.format("%10s", ZOOKEEPER_HOSTS) + " Comma-separated list of Zookeeper host (and optional port) pairs (Default: " + DEFAULT_ZOOKEEPER_HOSTS + ")\n"
111
+ + "\n"
112
+ + SpoutBuilder.usage()
113
+ + "\n"
114
+ + DataflowBuilder.usage()
115
+ + "\n"
116
+ + StateBuilder.usage()
117
+ + "\n";
118
+ return s;
119
+ }
120
+
121
+ private Boolean needToShuffleSpoutOutput() {
122
+ return (dataflowBuilder.dataflowParallelism() > spoutBuilder.inputParallelism());
123
+ }
124
+
125
+ public static String TOPOLOGY_NAME = "wukong.topology";
126
+ public String topologyName() {
127
+ return prop(TOPOLOGY_NAME);
128
+ }
129
+
130
+ }
@@ -0,0 +1,181 @@
1
+ package com.infochimps.wukong.storm;
2
+
3
+ import java.io.File;
4
+
5
+ import org.apache.log4j.Logger;
6
+
7
+ import backtype.storm.Config;
8
+ import backtype.storm.StormSubmitter;
9
+ import backtype.storm.generated.AlreadyAliveException;
10
+ import backtype.storm.generated.InvalidTopologyException;
11
+
12
+ import com.infochimps.wukong.storm.TopologyBuilder;
13
+
14
+ public class TopologySubmitter {
15
+
16
+ private static Logger LOG = Logger.getLogger(TopologySubmitter.class);
17
+
18
+ private TopologyBuilder builder;
19
+ private Config config;
20
+
21
+ public static void main(String[] args) throws Exception {
22
+ setPropertiesFromArgsBecauseStupidlyHard(args);
23
+ TopologySubmitter submitter = new TopologySubmitter();
24
+ submitter.setConfig();
25
+ submitter.validate();
26
+ submitter.submit();
27
+ System.exit(0);
28
+ }
29
+
30
+ public static void setPropertiesFromArgsBecauseStupidlyHard(String[] args) {
31
+ int numArgs = args.length;
32
+ int argIndex = 0;
33
+ boolean isOption = false;
34
+ while (argIndex < numArgs) {
35
+ String arg = args[argIndex];
36
+ if (isOption) {
37
+ setPropertyFromArgBecauseStupidlyHard(arg);
38
+ isOption = false;
39
+ } else {
40
+ if (arg.matches("-D.+")) {
41
+ setPropertyFromArgBecauseStupidlyHard(arg.substring(2));
42
+ } else if (arg.matches("-D")) {
43
+ isOption = true;
44
+ } else {
45
+ LOG.error("Malformed option: " + arg);
46
+ }
47
+ }
48
+ argIndex += 1;
49
+ }
50
+ }
51
+
52
+ private static void setPropertyFromArgBecauseStupidlyHard(String arg) {
53
+ String[] parts = arg.split("=");
54
+ if (parts.length >= 2) {
55
+ String key = parts[0];
56
+ String value = arg.substring(key.length() + 1);
57
+ System.setProperty(key, value);
58
+ } else {
59
+ LOG.error("Invalid property: " + arg);
60
+ }
61
+ }
62
+
63
+ private String prop(String key, String defaultValue) {
64
+ if (System.getProperty(key) == null) {
65
+ System.setProperty(key, defaultValue);
66
+ }
67
+ return prop(key);
68
+ }
69
+
70
+ private String prop(String key) {
71
+ return System.getProperty(key);
72
+ }
73
+
74
+ public TopologySubmitter() {
75
+ this.builder = new TopologyBuilder();
76
+ this.config = new Config();
77
+ }
78
+
79
+ private void validate() {
80
+ if (!builder.valid()) {
81
+ System.out.println(usage());
82
+ System.exit(1);
83
+ }
84
+ }
85
+
86
+ public String usage() {
87
+ return "usage: storm jar " + fullyQualifiedClassPath() + " -DOPTION=VALUE ..." + TopologyBuilder.usage();
88
+ }
89
+
90
+ public File fullyQualifiedClassPath() {
91
+ return new File(TopologySubmitter.class.getProtectionDomain().getCodeSource().getLocation().getPath());
92
+ }
93
+
94
+ public void setConfig() {
95
+ setDebug();
96
+ setMaxSpoutPending();
97
+ setMaxTaskParallelism();
98
+ setMessageTimeoutSecs();
99
+ setNumAckers();
100
+ setNumWorkers();
101
+ setOptimize();
102
+ setStatsSampleRate();
103
+ }
104
+
105
+ public void submit() {
106
+ try {
107
+ builder.logInfo();
108
+ StormSubmitter.submitTopology(builder.topologyName(), config, builder.topology());
109
+ } catch (AlreadyAliveException e) {
110
+ LOG.error("Topology " + builder.topologyName() + " is already running", e);
111
+ System.exit(2);
112
+ } catch (InvalidTopologyException e) {
113
+ LOG.error("Topology " + builder.topologyName() + " is invalid", e);
114
+ System.exit(3);
115
+ }
116
+ }
117
+
118
+ public void setDebug() {
119
+ String value = prop(Config.TOPOLOGY_DEBUG);
120
+ if (! (value == null)) {
121
+ LOG.info("Setting " + Config.TOPOLOGY_DEBUG + " to " + value);
122
+ config.setDebug(Boolean.parseBoolean(value));
123
+ }
124
+ }
125
+
126
+ public void setMaxSpoutPending() {
127
+ String value = prop(Config.TOPOLOGY_MAX_SPOUT_PENDING);
128
+ if (! (value == null)) {
129
+ LOG.info("Setting " + Config.TOPOLOGY_MAX_SPOUT_PENDING + " to " + value);
130
+ config.setMaxSpoutPending(Integer.parseInt(value));
131
+ }
132
+ }
133
+
134
+ public void setMaxTaskParallelism() {
135
+ String value = prop(Config.TOPOLOGY_MAX_TASK_PARALLELISM);
136
+ if (! (value == null)) {
137
+ LOG.info("Setting " + Config.TOPOLOGY_MAX_TASK_PARALLELISM + " to " + value);
138
+ config.setMaxTaskParallelism(Integer.parseInt(value));
139
+ }
140
+ }
141
+
142
+ public void setMessageTimeoutSecs() {
143
+ String value = prop(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS);
144
+ if (! (value == null)) {
145
+ LOG.info("Setting " + Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS + " to " + value);
146
+ config.setMessageTimeoutSecs(Integer.parseInt(value));
147
+ }
148
+ }
149
+
150
+ public void setNumAckers() {
151
+ String value = prop(Config.TOPOLOGY_ACKER_EXECUTORS);
152
+ if (! (value == null)) {
153
+ LOG.info("Setting " + Config.TOPOLOGY_ACKER_EXECUTORS + " to " + value);
154
+ config.setNumAckers(Integer.parseInt(value));
155
+ }
156
+ }
157
+
158
+ public void setNumWorkers() {
159
+ String value = prop(Config.TOPOLOGY_WORKERS);
160
+ if (! (value == null)) {
161
+ LOG.info("Setting " + Config.TOPOLOGY_WORKERS + " to " + value);
162
+ config.setNumWorkers(Integer.parseInt(value));
163
+ }
164
+ }
165
+
166
+ public void setOptimize() {
167
+ String value = prop(Config.TOPOLOGY_OPTIMIZE);
168
+ if (! (value == null)) {
169
+ LOG.info("Setting " + Config.TOPOLOGY_OPTIMIZE + " to " + value);
170
+ config.setDebug(Boolean.parseBoolean(value));
171
+ }
172
+ }
173
+
174
+ public void setStatsSampleRate() {
175
+ String value = prop(Config.TOPOLOGY_STATS_SAMPLE_RATE);
176
+ if (! (value == null)) {
177
+ LOG.info("Setting " + Config.TOPOLOGY_STATS_SAMPLE_RATE + " to " + value);
178
+ config.setStatsSampleRate(Integer.parseInt(value));
179
+ }
180
+ }
181
+ }
data/wukong-storm.gemspec CHANGED
@@ -14,9 +14,10 @@ Gem::Specification.new do |gem|
14
14
  EOF
15
15
 
16
16
  gem.files = `git ls-files`.split("\n")
17
- gem.executables = ['wu-storm']
17
+ gem.executables = ['wu-storm', 'wu-bolt']
18
18
  gem.test_files = gem.files.grep(/^spec/)
19
19
  gem.require_paths = ['lib']
20
20
 
21
- gem.add_dependency('wukong', '3.0.1')
21
+ gem.add_dependency('wukong', '4.0.0')
22
+ gem.add_dependency('kafka-rb')
22
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong-storm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-03-07 00:00:00.000000000 Z
13
+ date: 2014-03-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: wukong
@@ -19,7 +19,7 @@ dependencies:
19
19
  requirements:
20
20
  - - '='
21
21
  - !ruby/object:Gem::Version
22
- version: 3.0.1
22
+ version: 4.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -27,11 +27,28 @@ dependencies:
27
27
  requirements:
28
28
  - - '='
29
29
  - !ruby/object:Gem::Version
30
- version: 3.0.1
30
+ version: 4.0.0
31
+ - !ruby/object:Gem::Dependency
32
+ name: kafka-rb
33
+ requirement: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
31
47
  description: ''
32
48
  email: coders@infochimps.org
33
49
  executables:
34
50
  - wu-storm
51
+ - wu-bolt
35
52
  extensions: []
36
53
  extra_rdoc_files: []
37
54
  files:
@@ -42,15 +59,31 @@ files:
42
59
  - LICENSE.md
43
60
  - README.md
44
61
  - Rakefile
62
+ - bin/wu-bolt
45
63
  - bin/wu-storm
46
64
  - lib/wukong-storm.rb
47
- - lib/wukong-storm/driver.rb
48
- - lib/wukong-storm/runner.rb
65
+ - lib/wukong-storm/bolt_driver.rb
66
+ - lib/wukong-storm/bolt_runner.rb
67
+ - lib/wukong-storm/storm_invocation.rb
68
+ - lib/wukong-storm/storm_runner.rb
49
69
  - lib/wukong-storm/storm_settings.java
50
70
  - lib/wukong-storm/version.rb
71
+ - lib/wukong-storm/wukong-storm.jar
72
+ - pom.xml
51
73
  - spec/spec_helper.rb
52
74
  - spec/support/examples.rb
53
- - spec/wu_storm_spec.rb
75
+ - spec/wukong-storm/bolt_driver_spec.rb
76
+ - spec/wukong-storm/storm_invocation_spec.rb
77
+ - spec/wukong-storm/storm_runner_spec.rb
78
+ - spec/wukong-storm/wu-bolt_spec.rb
79
+ - spec/wukong-storm/wu-storm_spec.rb
80
+ - spec/wukong-storm_spec.rb
81
+ - src/main/java/com/infochimps/wukong/storm/Builder.java
82
+ - src/main/java/com/infochimps/wukong/storm/DataflowBuilder.java
83
+ - src/main/java/com/infochimps/wukong/storm/SpoutBuilder.java
84
+ - src/main/java/com/infochimps/wukong/storm/StateBuilder.java
85
+ - src/main/java/com/infochimps/wukong/storm/TopologyBuilder.java
86
+ - src/main/java/com/infochimps/wukong/storm/TopologySubmitter.java
54
87
  - wukong-storm.gemspec
55
88
  homepage: https://github.com/infochimps-labs/wukong-storm
56
89
  licenses:
@@ -67,7 +100,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
100
  version: '0'
68
101
  segments:
69
102
  - 0
70
- hash: -3852445642211148218
103
+ hash: -2207093139671910492
71
104
  required_rubygems_version: !ruby/object:Gem::Requirement
72
105
  none: false
73
106
  requirements:
@@ -76,15 +109,20 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
109
  version: '0'
77
110
  segments:
78
111
  - 0
79
- hash: -3852445642211148218
112
+ hash: -2207093139671910492
80
113
  requirements: []
81
114
  rubyforge_project:
82
- rubygems_version: 1.8.24
115
+ rubygems_version: 1.8.23
83
116
  signing_key:
84
117
  specification_version: 3
85
118
  summary: Storm processing for Ruby
86
119
  test_files:
87
120
  - spec/spec_helper.rb
88
121
  - spec/support/examples.rb
89
- - spec/wu_storm_spec.rb
122
+ - spec/wukong-storm/bolt_driver_spec.rb
123
+ - spec/wukong-storm/storm_invocation_spec.rb
124
+ - spec/wukong-storm/storm_runner_spec.rb
125
+ - spec/wukong-storm/wu-bolt_spec.rb
126
+ - spec/wukong-storm/wu-storm_spec.rb
127
+ - spec/wukong-storm_spec.rb
90
128
  has_rdoc: