kb-redstorm 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,41 @@
1
+ require 'red_storm'
2
+ require 'thread'
3
+
4
+ java_import 'redstorm.storm.jruby.JRubyShellBolt'
5
+
6
+ class SimpleSpout < RedStorm::SimpleSpout
7
+ on_init do
8
+ @q = Queue.new
9
+ @q << "the quick red fox"
10
+ end
11
+
12
+ on_send do
13
+ # avoid putting the thread to sleep endlessly on @q.pop which will prevent local cluster.shutdown
14
+ sleep(1)
15
+ @q.pop unless @q.empty?
16
+ end
17
+ end
18
+
19
+ class ShellTopology < RedStorm::SimpleTopology
20
+ spout SimpleSpout do
21
+ output_fields :string
22
+ end
23
+
24
+ bolt JRubyShellBolt, ["python", "splitsentence.py"] do
25
+ output_fields "word"
26
+ source SimpleSpout, :shuffle
27
+ end
28
+
29
+ configure do |env|
30
+ debug true
31
+ end
32
+
33
+ on_submit do |env|
34
+ case env
35
+ when :local
36
+ sleep(10)
37
+ cluster.shutdown
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,10 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class ExclamationBolt < RedStorm::SimpleBolt
6
+ output_fields :word
7
+ on_receive(:ack => true, :anchor => true) {|tuple| tuple.getString(0) + "!!!"}
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,45 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'examples/simple/exclamation_bolt'
4
+
5
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
6
+
7
+ module RedStorm
8
+ module Examples
9
+ class ExclamationTopology < RedStorm::SimpleTopology
10
+ spout TestWordSpout, :parallelism => 5 do
11
+ debug true
12
+ end
13
+
14
+ bolt ExclamationBolt, :parallelism => 2 do
15
+ source TestWordSpout, :shuffle
16
+ # max_task_parallelism 1
17
+ end
18
+
19
+ bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
20
+ source ExclamationBolt, :shuffle
21
+ # max_task_parallelism 1
22
+ debug true
23
+ end
24
+
25
+ configure do |env|
26
+ debug false
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
28
+ case env
29
+ when :local
30
+ max_task_parallelism 40
31
+ when :cluster
32
+ num_workers 20
33
+ max_spout_pending(1000);
34
+ end
35
+ end
36
+
37
+ on_submit do |env|
38
+ if env == :local
39
+ sleep(5)
40
+ cluster.shutdown
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,45 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+ require 'red_storm'
3
+
4
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
5
+ # and a locally defined ExclamationBolt
6
+
7
+ module RedStorm
8
+ module Examples
9
+ class ExclamationBolt < RedStorm::SimpleBolt
10
+ output_fields :word
11
+ on_receive(:ack => true, :anchor => true) {|tuple| "!#{tuple.getString(0)}!"}
12
+ end
13
+
14
+ class ExclamationTopology2 < RedStorm::SimpleTopology
15
+ spout TestWordSpout, :parallelism => 10
16
+
17
+ bolt ExclamationBolt, :parallelism => 3 do
18
+ source TestWordSpout, :shuffle
19
+ end
20
+
21
+ bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
22
+ source ExclamationBolt, :shuffle
23
+ end
24
+
25
+ configure do |env|
26
+ debug true
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
28
+ case env
29
+ when :local
30
+ max_task_parallelism 3
31
+ when :cluster
32
+ num_workers 20
33
+ max_spout_pending(1000);
34
+ end
35
+ end
36
+
37
+ on_submit do |env|
38
+ if env == :local
39
+ sleep(5)
40
+ cluster.shutdown
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,55 @@
1
+ require 'red_storm'
2
+ java_import 'storm.kafka.KafkaConfig'
3
+ java_import 'storm.kafka.SpoutConfig'
4
+ java_import 'storm.kafka.StringScheme'
5
+ java_import 'storm.kafka.KafkaSpout'
6
+
7
+ # the KafkaTopology obviously requires a Kafka server running, you can ajust the
8
+ # host and port below.
9
+ #
10
+ # custom dependencies are required for the Kafka and Scala jars. put the following
11
+ # dependencies in the "Dependencies" file in the root of your RedStorm project:
12
+ #
13
+ # {
14
+ # :storm_artifacts => [
15
+ # "storm:storm:0.8.1, transitive=true",
16
+ # ],
17
+ # :topology_artifacts => [
18
+ # "org.jruby:jruby-complete:1.6.8, transitive=false",
19
+ # "org.scala-lang:scala-library:2.8.0, transitive=false",
20
+ # "storm:kafka:0.7.0-incubating, transitive=false",
21
+ # "storm:storm-kafka:0.8.0-wip4, transitive=false",
22
+ # ],
23
+ # }
24
+
25
+ class KafkaTopology < RedStorm::SimpleTopology
26
+ spout_config = SpoutConfig.new(
27
+ KafkaConfig::ZkHosts.new("localhost:2181", "/brokers"),
28
+ "words", # topic to read from
29
+ "/kafkastorm", # Zookeeper root path to store the consumer offsets
30
+ "discovery" # Zookeeper consumer id to store the consumer offsets
31
+ )
32
+ spout_config.scheme = StringScheme.new
33
+
34
+ class SplitStringBolt < RedStorm::SimpleBolt
35
+ on_receive {|tuple| tuple.getString(0).split.map{|w| [w]}}
36
+ end
37
+
38
+ spout KafkaSpout, [spout_config]
39
+
40
+ bolt SplitStringBolt do
41
+ output_fields :word
42
+ source KafkaSpout, :shuffle
43
+ end
44
+
45
+ configure do |env|
46
+ debug true
47
+ end
48
+
49
+ on_submit do |env|
50
+ if env == :local
51
+ sleep(10)
52
+ cluster.shutdown
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,21 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class RandomSentenceSpout < RedStorm::SimpleSpout
6
+ output_fields :word
7
+
8
+ on_send {@sentences[rand(@sentences.length)]}
9
+
10
+ on_init do
11
+ @sentences = [
12
+ "the cow jumped over the moon",
13
+ "an apple a day keeps the doctor away",
14
+ "four score and seven years ago",
15
+ "snow white and the seven dwarfs",
16
+ "i am at two with nature"
17
+ ]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'red_storm'
3
+
4
+ require 'redis'
5
+ require 'thread'
6
+ require 'examples/simple/word_count_bolt'
7
+
8
+ module RedStorm
9
+ module Examples
10
+
11
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
12
+ # and emits each word items pop'ed from the queue.
13
+
14
+ class RedisWordSpout < RedStorm::SimpleSpout
15
+ output_fields :word
16
+
17
+ on_send {@q.pop.to_s if @q.size > 0}
18
+
19
+ on_init do
20
+ @q = Queue.new
21
+ @redis_reader = detach_redis_reader
22
+ end
23
+
24
+ private
25
+
26
+ def detach_redis_reader
27
+ Thread.new do
28
+ Thread.current.abort_on_exception = true
29
+
30
+ redis = Redis.new(:host => "localhost", :port => 6379)
31
+ loop do
32
+ if data = redis.blpop("test", 0)
33
+ @q << data[1]
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ class RedisWordCountTopology < RedStorm::SimpleTopology
41
+ spout RedisWordSpout
42
+
43
+ bolt WordCountBolt, :parallelism => 3 do
44
+ source RedisWordSpout, :fields => ["word"]
45
+ end
46
+
47
+ configure do |env|
48
+ debug true
49
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
50
+ case env
51
+ when :local
52
+ max_task_parallelism 3
53
+ when :cluster
54
+ max_task_parallelism 5
55
+ num_workers 20
56
+ max_spout_pending(1000);
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,32 @@
1
+ require 'red_storm'
2
+
3
+ # this example topology only prints the Ruby version string. No tuple is emitted.
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class VersionSpout < RedStorm::SimpleSpout
8
+ output_fields :dummy
9
+ on_init {log.info("***************** RUBY_VERSION=#{RUBY_VERSION}")}
10
+ on_send {}
11
+ end
12
+
13
+ class RubyVersionTopology < RedStorm::SimpleTopology
14
+ spout VersionSpout
15
+
16
+ configure do |env|
17
+ debug true
18
+
19
+ # force the JRuby version property for this topology. this will only affect remote cluster execution
20
+ # for local execution use the --1.8|--1.9 switch when launching
21
+ # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
+ end
23
+
24
+ on_submit do |env|
25
+ if env == :local
26
+ sleep(5)
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,33 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class SplitSentenceBolt < RedStorm::SimpleBolt
6
+ output_fields :word
7
+
8
+ # block declaration style using auto-emit (default)
9
+ #
10
+ on_receive {|tuple| tuple.getString(0).split(' ').map{|w| [w]}}
11
+
12
+ # block declaration style no auto-emit
13
+ #
14
+ # on_receive :emit => false do |tuple|
15
+ # tuple.getString(0).split(' ').each{|w| unanchored_emit(w)}
16
+ # end
17
+
18
+ # alternate declaration style using on_receive method
19
+ #
20
+ # on_receive :emit => true
21
+ # def on_receive(tuple)
22
+ # tuple.getString(0).split(' ').map{|w| [w]}
23
+ # end
24
+
25
+ # alternate declaration style using any specific method
26
+ #
27
+ # on_receive :my_method, :emit => true
28
+ # def my_method(tuple)
29
+ # tuple.getString(0).split(' ').map{|w| [w]}
30
+ # end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,19 @@
1
+ require 'red_storm'
2
+
3
+ module RedStorm
4
+ module Examples
5
+ class WordCountBolt < RedStorm::SimpleBolt
6
+ output_fields :word, :count
7
+ on_init {@counts = Hash.new{|h, k| h[k] = 0}}
8
+
9
+ # block declaration style using auto-emit (default)
10
+ #
11
+ on_receive do |tuple|
12
+ word = tuple.getValue(0).to_s
13
+ @counts[word] += 1
14
+
15
+ [word, @counts[word]]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,38 @@
1
+ require 'examples/simple/random_sentence_spout'
2
+ require 'examples/simple/split_sentence_bolt'
3
+ require 'examples/simple/word_count_bolt'
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class WordCountTopology < SimpleTopology
8
+ spout RandomSentenceSpout, :parallelism => 5
9
+
10
+ bolt SplitSentenceBolt, :parallelism => 8 do
11
+ source RandomSentenceSpout, :shuffle
12
+ end
13
+
14
+ bolt WordCountBolt, :parallelism => 12 do
15
+ source SplitSentenceBolt, :fields => ["word"]
16
+ end
17
+
18
+ configure :word_count do |env|
19
+ debug true
20
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
21
+ case env
22
+ when :local
23
+ max_task_parallelism 3
24
+ when :cluster
25
+ num_workers 20
26
+ max_spout_pending(1000);
27
+ end
28
+ end
29
+
30
+ on_submit do |env|
31
+ if env == :local
32
+ sleep(5)
33
+ cluster.shutdown
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
data/ivy/settings.xml ADDED
@@ -0,0 +1,11 @@
1
+ <ivysettings>
2
+ <settings defaultResolver="repositories"/>
3
+ <resolvers>
4
+ <chain name="repositories">
5
+ <ibiblio name="ibiblio" m2compatible="true"/>
6
+ <ibiblio name="maven2" root="http://repo.maven.apache.org/maven2/" m2compatible="true"/>
7
+ <ibiblio name="sonatype" root="http://repo.maven.apache.org/maven2/" m2compatible="true"/>
8
+ <ibiblio name="clojars" root="http://clojars.org/repo/" m2compatible="true"/>
9
+ </chain>
10
+ </resolvers>
11
+ </ivysettings>
data/lib/red_storm.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+
3
+ require 'red_storm/environment'
4
+ require 'red_storm/version'
5
+ require 'red_storm/configuration'
6
+ require 'red_storm/simple_bolt'
7
+ require 'red_storm/simple_spout'
8
+ require 'red_storm/simple_topology'
9
+ require 'red_storm/simple_drpc_topology'
@@ -0,0 +1,85 @@
1
+ require 'red_storm/version'
2
+ require 'red_storm/environment'
3
+
4
+ CWD = Dir.pwd
5
+ TARGET_DIR = "#{CWD}/target"
6
+ TARGET_LIB_DIR = "#{TARGET_DIR}/lib"
7
+ TARGET_SRC_DIR = "#{TARGET_DIR}/src"
8
+ TARGET_GEM_DIR = "#{TARGET_DIR}/gems/gems"
9
+ TARGET_SPECS_DIR = "#{TARGET_DIR}/gems/specifications"
10
+ TARGET_CLASSES_DIR = "#{TARGET_DIR}/classes"
11
+ TARGET_DEPENDENCY_DIR = "#{TARGET_DIR}/dependency"
12
+ TARGET_DEPENDENCY_UNPACKED_DIR = "#{TARGET_DIR}/dependency-unpacked"
13
+ TARGET_CLUSTER_JAR = "#{TARGET_DIR}/cluster-topology.jar"
14
+
15
+ REDSTORM_JAVA_SRC_DIR = "#{RedStorm::REDSTORM_HOME}/src/main"
16
+ REDSTORM_LIB_DIR = "#{RedStorm::REDSTORM_HOME}/lib"
17
+
18
+ SRC_EXAMPLES = "#{RedStorm::REDSTORM_HOME}/examples"
19
+ DST_EXAMPLES = "#{CWD}/examples"
20
+
21
+ SRC_IVY_DIR = "#{RedStorm::REDSTORM_HOME}/ivy"
22
+ DST_IVY_DIR = "#{CWD}/ivy"
23
+ CUSTOM_DEPENDENCIES = "#{CWD}/Dependencies"
24
+ DEFAULT_IVY_SETTINGS = "#{SRC_IVY_DIR}/settings.xml"
25
+ CUSTOM_IVY_SETTINGS = "#{DST_IVY_DIR}/settings.xml"
26
+
27
+
28
+ module RedStorm
29
+
30
+ class Application
31
+ TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
32
+
33
+ def self.local_storm_command(class_file, ruby_mode = nil)
34
+ src_dir = File.expand_path(File.dirname(class_file))
35
+ "java -Djruby.compat.version=#{RedStorm.jruby_mode_token(ruby_mode)} -cp \"#{TARGET_CLASSES_DIR}:#{TARGET_DEPENDENCY_DIR}/storm/default/*:#{TARGET_DEPENDENCY_DIR}/topology/default/*:#{src_dir}/\" redstorm.TopologyLauncher local #{class_file}"
36
+ end
37
+
38
+ def self.cluster_storm_command(class_file, ruby_mode = nil)
39
+ "storm jar #{TARGET_CLUSTER_JAR} -Djruby.compat.version=#{RedStorm.jruby_mode_token(ruby_mode)} redstorm.TopologyLauncher cluster #{class_file}"
40
+ end
41
+
42
+ def self.usage
43
+ puts("usage: redstorm version")
44
+ puts(" redstorm install")
45
+ puts(" redstorm deps")
46
+ puts(" redstorm build")
47
+ puts(" redstorm examples")
48
+ puts(" redstorm bundle [BUNDLER_GROUP]")
49
+ puts(" redstorm jar DIR1, [DIR2, ...]")
50
+ puts(" redstorm local [--1.8|--1.9] TOPOLOGY_CLASS_PATH")
51
+ puts(" redstorm cluster [--1.8|--1.9] TOPOLOGY_CLASS_PATH")
52
+ exit(1)
53
+ end
54
+
55
+ def self.run(args)
56
+ if args.size > 0
57
+ if args[0] == "version"
58
+ puts("RedStorm v#{VERSION}")
59
+ exit
60
+ elsif ["install", "examples", "jar", "bundle", "deps", "build"].include?(args[0])
61
+ load(TASKS_FILE)
62
+ Rake::Task[args.shift].invoke(args.join(":"))
63
+ exit
64
+ elsif args.size >= 2 && ["local", "cluster"].include?(args[0])
65
+ env = args.delete_at(0)
66
+ version = args.delete("--1.8") || args.delete("--1.9")
67
+ if args.size == 1
68
+ file = args[0]
69
+ load(TASKS_FILE)
70
+ Rake::Task['launch'].invoke(env, version, file)
71
+ exit
72
+ end
73
+ end
74
+ end
75
+ usage
76
+ end
77
+
78
+ def self.subshell(command)
79
+ out = IO.popen(command, {:err => [:child, :out]}) {|io| io.read}
80
+ [!!$?.success?, out]
81
+ end
82
+
83
+ end
84
+
85
+ end