kb-redstorm 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,41 @@
1
+ require 'red_storm'
2
+ require 'thread'
3
+
4
+ java_import 'redstorm.storm.jruby.JRubyShellBolt'
5
+
6
+ class SimpleSpout < RedStorm::SimpleSpout
7
+ on_init do
8
+ @q = Queue.new
9
+ @q << "the quick red fox"
10
+ end
11
+
12
+ on_send do
13
+ # avoid putting the thread to sleep endlessly on @q.pop which will prevent local cluster.shutdown
14
+ sleep(1)
15
+ @q.pop unless @q.empty?
16
+ end
17
+ end
18
+
19
+ class ShellTopology < RedStorm::SimpleTopology
20
+ spout SimpleSpout do
21
+ output_fields :string
22
+ end
23
+
24
+ bolt JRubyShellBolt, ["python", "splitsentence.py"] do
25
+ output_fields "word"
26
+ source SimpleSpout, :shuffle
27
+ end
28
+
29
+ configure do |env|
30
+ debug true
31
+ end
32
+
33
+ on_submit do |env|
34
+ case env
35
+ when :local
36
+ sleep(10)
37
+ cluster.shutdown
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,10 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class ExclamationBolt < RedStorm::SimpleBolt
6
+ output_fields :word
7
+ on_receive(:ack => true, :anchor => true) {|tuple| tuple.getString(0) + "!!!"}
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,45 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'examples/simple/exclamation_bolt'
4
+
5
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
6
+
7
+ module RedStorm
8
+ module Examples
9
+ class ExclamationTopology < RedStorm::SimpleTopology
10
+ spout TestWordSpout, :parallelism => 5 do
11
+ debug true
12
+ end
13
+
14
+ bolt ExclamationBolt, :parallelism => 2 do
15
+ source TestWordSpout, :shuffle
16
+ # max_task_parallelism 1
17
+ end
18
+
19
+ bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
20
+ source ExclamationBolt, :shuffle
21
+ # max_task_parallelism 1
22
+ debug true
23
+ end
24
+
25
+ configure do |env|
26
+ debug false
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
28
+ case env
29
+ when :local
30
+ max_task_parallelism 40
31
+ when :cluster
32
+ num_workers 20
33
+ max_spout_pending(1000);
34
+ end
35
+ end
36
+
37
+ on_submit do |env|
38
+ if env == :local
39
+ sleep(5)
40
+ cluster.shutdown
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+ require 'red_storm'
3
+
4
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
5
+ # and a locally defined ExclamationBolt
6
+
7
+ module RedStorm
8
+ module Examples
9
+ class ExclamationBolt < RedStorm::SimpleBolt
10
+ output_fields :word
11
+ on_receive(:ack => true, :anchor => true) {|tuple| "!#{tuple.getString(0)}!"}
12
+ end
13
+
14
+ class ExclamationTopology2 < RedStorm::SimpleTopology
15
+ spout TestWordSpout, :parallelism => 10
16
+
17
+ bolt ExclamationBolt, :parallelism => 3 do
18
+ source TestWordSpout, :shuffle
19
+ end
20
+
21
+ bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
22
+ source ExclamationBolt, :shuffle
23
+ end
24
+
25
+ configure do |env|
26
+ debug true
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
28
+ case env
29
+ when :local
30
+ max_task_parallelism 3
31
+ when :cluster
32
+ num_workers 20
33
+ max_spout_pending(1000);
34
+ end
35
+ end
36
+
37
+ on_submit do |env|
38
+ if env == :local
39
+ sleep(5)
40
+ cluster.shutdown
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,55 @@
1
+ require 'red_storm'
2
+ java_import 'storm.kafka.KafkaConfig'
3
+ java_import 'storm.kafka.SpoutConfig'
4
+ java_import 'storm.kafka.StringScheme'
5
+ java_import 'storm.kafka.KafkaSpout'
6
+
7
+ # the KafkaTopology obviously requires a Kafka server running, you can ajust the
8
+ # host and port below.
9
+ #
10
+ # custom dependencies are required for the Kafka and Scala jars. put the following
11
+ # dependencies in the "Dependencies" file in the root of your RedStorm project:
12
+ #
13
+ # {
14
+ # :storm_artifacts => [
15
+ # "storm:storm:0.8.1, transitive=true",
16
+ # ],
17
+ # :topology_artifacts => [
18
+ # "org.jruby:jruby-complete:1.6.8, transitive=false",
19
+ # "org.scala-lang:scala-library:2.8.0, transitive=false",
20
+ # "storm:kafka:0.7.0-incubating, transitive=false",
21
+ # "storm:storm-kafka:0.8.0-wip4, transitive=false",
22
+ # ],
23
+ # }
24
+
25
+ class KafkaTopology < RedStorm::SimpleTopology
26
+ spout_config = SpoutConfig.new(
27
+ KafkaConfig::ZkHosts.new("localhost:2181", "/brokers"),
28
+ "words", # topic to read from
29
+ "/kafkastorm", # Zookeeper root path to store the consumer offsets
30
+ "discovery" # Zookeeper consumer id to store the consumer offsets
31
+ )
32
+ spout_config.scheme = StringScheme.new
33
+
34
+ class SplitStringBolt < RedStorm::SimpleBolt
35
+ on_receive {|tuple| tuple.getString(0).split.map{|w| [w]}}
36
+ end
37
+
38
+ spout KafkaSpout, [spout_config]
39
+
40
+ bolt SplitStringBolt do
41
+ output_fields :word
42
+ source KafkaSpout, :shuffle
43
+ end
44
+
45
+ configure do |env|
46
+ debug true
47
+ end
48
+
49
+ on_submit do |env|
50
+ if env == :local
51
+ sleep(10)
52
+ cluster.shutdown
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,21 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class RandomSentenceSpout < RedStorm::SimpleSpout
6
+ output_fields :word
7
+
8
+ on_send {@sentences[rand(@sentences.length)]}
9
+
10
+ on_init do
11
+ @sentences = [
12
+ "the cow jumped over the moon",
13
+ "an apple a day keeps the doctor away",
14
+ "four score and seven years ago",
15
+ "snow white and the seven dwarfs",
16
+ "i am at two with nature"
17
+ ]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'red_storm'
3
+
4
+ require 'redis'
5
+ require 'thread'
6
+ require 'examples/simple/word_count_bolt'
7
+
8
+ module RedStorm
9
+ module Examples
10
+
11
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
12
+ # and emits each word items pop'ed from the queue.
13
+
14
+ class RedisWordSpout < RedStorm::SimpleSpout
15
+ output_fields :word
16
+
17
+ on_send {@q.pop.to_s if @q.size > 0}
18
+
19
+ on_init do
20
+ @q = Queue.new
21
+ @redis_reader = detach_redis_reader
22
+ end
23
+
24
+ private
25
+
26
+ def detach_redis_reader
27
+ Thread.new do
28
+ Thread.current.abort_on_exception = true
29
+
30
+ redis = Redis.new(:host => "localhost", :port => 6379)
31
+ loop do
32
+ if data = redis.blpop("test", 0)
33
+ @q << data[1]
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ class RedisWordCountTopology < RedStorm::SimpleTopology
41
+ spout RedisWordSpout
42
+
43
+ bolt WordCountBolt, :parallelism => 3 do
44
+ source RedisWordSpout, :fields => ["word"]
45
+ end
46
+
47
+ configure do |env|
48
+ debug true
49
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
50
+ case env
51
+ when :local
52
+ max_task_parallelism 3
53
+ when :cluster
54
+ max_task_parallelism 5
55
+ num_workers 20
56
+ max_spout_pending(1000);
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,32 @@
1
+ require 'red_storm'
2
+
3
+ # this example topology only prints the Ruby version string. No tuple is emitted.
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class VersionSpout < RedStorm::SimpleSpout
8
+ output_fields :dummy
9
+ on_init {log.info("***************** RUBY_VERSION=#{RUBY_VERSION}")}
10
+ on_send {}
11
+ end
12
+
13
+ class RubyVersionTopology < RedStorm::SimpleTopology
14
+ spout VersionSpout
15
+
16
+ configure do |env|
17
+ debug true
18
+
19
+ # force the JRuby version property for this topology. this will only affect remote cluster execution
20
+ # for local execution use the --1.8|--1.9 switch when launching
21
+ # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
+ end
23
+
24
+ on_submit do |env|
25
+ if env == :local
26
+ sleep(5)
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,33 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class SplitSentenceBolt < RedStorm::SimpleBolt
6
+ output_fields :word
7
+
8
+ # block declaration style using auto-emit (default)
9
+ #
10
+ on_receive {|tuple| tuple.getString(0).split(' ').map{|w| [w]}}
11
+
12
+ # block declaration style no auto-emit
13
+ #
14
+ # on_receive :emit => false do |tuple|
15
+ # tuple.getString(0).split(' ').each{|w| unanchored_emit(w)}
16
+ # end
17
+
18
+ # alternate declaration style using on_receive method
19
+ #
20
+ # on_receive :emit => true
21
+ # def on_receive(tuple)
22
+ # tuple.getString(0).split(' ').map{|w| [w]}
23
+ # end
24
+
25
+ # alternate declaration style using any specific method
26
+ #
27
+ # on_receive :my_method, :emit => true
28
+ # def my_method(tuple)
29
+ # tuple.getString(0).split(' ').map{|w| [w]}
30
+ # end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,19 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class WordCountBolt < RedStorm::SimpleBolt
6
+ output_fields :word, :count
7
+ on_init {@counts = Hash.new{|h, k| h[k] = 0}}
8
+
9
+ # block declaration style using auto-emit (default)
10
+ #
11
+ on_receive do |tuple|
12
+ word = tuple.getValue(0).to_s
13
+ @counts[word] += 1
14
+
15
+ [word, @counts[word]]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,38 @@
1
+ require 'examples/simple/random_sentence_spout'
2
+ require 'examples/simple/split_sentence_bolt'
3
+ require 'examples/simple/word_count_bolt'
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class WordCountTopology < SimpleTopology
8
+ spout RandomSentenceSpout, :parallelism => 5
9
+
10
+ bolt SplitSentenceBolt, :parallelism => 8 do
11
+ source RandomSentenceSpout, :shuffle
12
+ end
13
+
14
+ bolt WordCountBolt, :parallelism => 12 do
15
+ source SplitSentenceBolt, :fields => ["word"]
16
+ end
17
+
18
+ configure :word_count do |env|
19
+ debug true
20
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
21
+ case env
22
+ when :local
23
+ max_task_parallelism 3
24
+ when :cluster
25
+ num_workers 20
26
+ max_spout_pending(1000);
27
+ end
28
+ end
29
+
30
+ on_submit do |env|
31
+ if env == :local
32
+ sleep(5)
33
+ cluster.shutdown
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
data/ivy/settings.xml ADDED
@@ -0,0 +1,11 @@
1
+ <ivysettings>
2
+ <settings defaultResolver="repositories"/>
3
+ <resolvers>
4
+ <chain name="repositories">
5
+ <ibiblio name="ibiblio" m2compatible="true"/>
6
+ <ibiblio name="maven2" root="http://repo.maven.apache.org/maven2/" m2compatible="true"/>
7
+ <ibiblio name="sonatype" root="http://repo.maven.apache.org/maven2/" m2compatible="true"/>
8
+ <ibiblio name="clojars" root="http://clojars.org/repo/" m2compatible="true"/>
9
+ </chain>
10
+ </resolvers>
11
+ </ivysettings>
data/lib/red_storm.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+
3
+ require 'red_storm/environment'
4
+ require 'red_storm/version'
5
+ require 'red_storm/configuration'
6
+ require 'red_storm/simple_bolt'
7
+ require 'red_storm/simple_spout'
8
+ require 'red_storm/simple_topology'
9
+ require 'red_storm/simple_drpc_topology'
@@ -0,0 +1,85 @@
1
+ require 'red_storm/version'
2
+ require 'red_storm/environment'
3
+
4
+ CWD = Dir.pwd
5
+ TARGET_DIR = "#{CWD}/target"
6
+ TARGET_LIB_DIR = "#{TARGET_DIR}/lib"
7
+ TARGET_SRC_DIR = "#{TARGET_DIR}/src"
8
+ TARGET_GEM_DIR = "#{TARGET_DIR}/gems/gems"
9
+ TARGET_SPECS_DIR = "#{TARGET_DIR}/gems/specifications"
10
+ TARGET_CLASSES_DIR = "#{TARGET_DIR}/classes"
11
+ TARGET_DEPENDENCY_DIR = "#{TARGET_DIR}/dependency"
12
+ TARGET_DEPENDENCY_UNPACKED_DIR = "#{TARGET_DIR}/dependency-unpacked"
13
+ TARGET_CLUSTER_JAR = "#{TARGET_DIR}/cluster-topology.jar"
14
+
15
+ REDSTORM_JAVA_SRC_DIR = "#{RedStorm::REDSTORM_HOME}/src/main"
16
+ REDSTORM_LIB_DIR = "#{RedStorm::REDSTORM_HOME}/lib"
17
+
18
+ SRC_EXAMPLES = "#{RedStorm::REDSTORM_HOME}/examples"
19
+ DST_EXAMPLES = "#{CWD}/examples"
20
+
21
+ SRC_IVY_DIR = "#{RedStorm::REDSTORM_HOME}/ivy"
22
+ DST_IVY_DIR = "#{CWD}/ivy"
23
+ CUSTOM_DEPENDENCIES = "#{CWD}/Dependencies"
24
+ DEFAULT_IVY_SETTINGS = "#{SRC_IVY_DIR}/settings.xml"
25
+ CUSTOM_IVY_SETTINGS = "#{DST_IVY_DIR}/settings.xml"
26
+
27
+
28
+ module RedStorm
29
+
30
+ class Application
31
+ TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
32
+
33
+ def self.local_storm_command(class_file, ruby_mode = nil)
34
+ src_dir = File.expand_path(File.dirname(class_file))
35
+ "java -Djruby.compat.version=#{RedStorm.jruby_mode_token(ruby_mode)} -cp \"#{TARGET_CLASSES_DIR}:#{TARGET_DEPENDENCY_DIR}/storm/default/*:#{TARGET_DEPENDENCY_DIR}/topology/default/*:#{src_dir}/\" redstorm.TopologyLauncher local #{class_file}"
36
+ end
37
+
38
+ def self.cluster_storm_command(class_file, ruby_mode = nil)
39
+ "storm jar #{TARGET_CLUSTER_JAR} -Djruby.compat.version=#{RedStorm.jruby_mode_token(ruby_mode)} redstorm.TopologyLauncher cluster #{class_file}"
40
+ end
41
+
42
+ def self.usage
43
+ puts("usage: redstorm version")
44
+ puts(" redstorm install")
45
+ puts(" redstorm deps")
46
+ puts(" redstorm build")
47
+ puts(" redstorm examples")
48
+ puts(" redstorm bundle [BUNDLER_GROUP]")
49
+ puts(" redstorm jar DIR1, [DIR2, ...]")
50
+ puts(" redstorm local [--1.8|--1.9] TOPOLOGY_CLASS_PATH")
51
+ puts(" redstorm cluster [--1.8|--1.9] TOPOLOGY_CLASS_PATH")
52
+ exit(1)
53
+ end
54
+
55
+ def self.run(args)
56
+ if args.size > 0
57
+ if args[0] == "version"
58
+ puts("RedStorm v#{VERSION}")
59
+ exit
60
+ elsif ["install", "examples", "jar", "bundle", "deps", "build"].include?(args[0])
61
+ load(TASKS_FILE)
62
+ Rake::Task[args.shift].invoke(args.join(":"))
63
+ exit
64
+ elsif args.size >= 2 && ["local", "cluster"].include?(args[0])
65
+ env = args.delete_at(0)
66
+ version = args.delete("--1.8") || args.delete("--1.9")
67
+ if args.size == 1
68
+ file = args[0]
69
+ load(TASKS_FILE)
70
+ Rake::Task['launch'].invoke(env, version, file)
71
+ exit
72
+ end
73
+ end
74
+ end
75
+ usage
76
+ end
77
+
78
+ def self.subshell(command)
79
+ out = IO.popen(command, {:err => [:child, :out]}) {|io| io.read}
80
+ [!!$?.success?, out]
81
+ end
82
+
83
+ end
84
+
85
+ end