redstorm 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. data/CHANGELOG.md +7 -0
  2. data/README.md +363 -32
  3. data/Rakefile +10 -125
  4. data/bin/redstorm +1 -0
  5. data/examples/{cluster_word_count_topology.rb → native/cluster_word_count_topology.rb} +4 -4
  6. data/examples/{exclamation_bolt.rb → native/exclamation_bolt.rb} +0 -0
  7. data/examples/{local_exclamation_topology.rb → native/local_exclamation_topology.rb} +2 -2
  8. data/examples/{local_exclamation_topology2.rb → native/local_exclamation_topology2.rb} +1 -1
  9. data/examples/{local_redis_word_count_topology.rb → native/local_redis_word_count_topology.rb} +2 -2
  10. data/examples/{local_word_count_topology.rb → native/local_word_count_topology.rb} +4 -4
  11. data/examples/{random_sentence_spout.rb → native/random_sentence_spout.rb} +0 -0
  12. data/examples/{split_sentence_bolt.rb → native/split_sentence_bolt.rb} +0 -0
  13. data/examples/{word_count_bolt.rb → native/word_count_bolt.rb} +0 -0
  14. data/examples/simple/exclamation_bolt.rb +6 -0
  15. data/examples/simple/exclamation_topology.rb +36 -0
  16. data/examples/simple/exclamation_topology2.rb +41 -0
  17. data/examples/simple/random_sentence_spout.rb +18 -0
  18. data/examples/simple/redis_word_count_topology.rb +54 -0
  19. data/examples/simple/split_sentence_bolt.rb +29 -0
  20. data/examples/simple/word_count_bolt.rb +15 -0
  21. data/examples/simple/word_count_topology.rb +34 -0
  22. data/lib/red_storm.rb +3 -0
  23. data/lib/red_storm/application.rb +20 -13
  24. data/lib/red_storm/simple_bolt.rb +106 -0
  25. data/lib/red_storm/simple_spout.rb +136 -0
  26. data/lib/red_storm/simple_topology.rb +191 -0
  27. data/lib/red_storm/topology_launcher.rb +10 -7
  28. data/lib/red_storm/version.rb +1 -1
  29. data/lib/tasks/red_storm.rake +151 -0
  30. data/pom.xml +1 -1
  31. metadata +24 -12
data/Rakefile CHANGED
@@ -1,129 +1,14 @@
1
- require 'ant'
1
+ load 'lib/tasks/red_storm.rake'
2
2
 
3
- begin
4
- # will work from gem, since lib dir is in gem require_paths
5
- require 'red_storm'
6
- rescue LoadError
7
- # will work within RedStorm dev project
8
- $:.unshift './lib'
9
- require 'red_storm'
10
- end
11
-
12
- CWD = Dir.pwd
13
- TARGET_DIR = "#{CWD}/target"
14
- TARGET_SRC_DIR = "#{TARGET_DIR}/src"
15
- TARGET_CLASSES_DIR = "#{TARGET_DIR}/classes"
16
- TARGET_DEPENDENCY_DIR = "#{TARGET_DIR}/dependency"
17
- TARGET_DEPENDENCY_UNPACKED_DIR = "#{TARGET_DIR}/dependency-unpacked"
18
- TARGET_CLUSTER_JAR = "#{TARGET_DIR}/cluster-topology.jar"
19
-
20
- JAVA_SRC_DIR = "#{RedStorm::REDSTORM_HOME}/src/main"
21
- JRUBY_SRC_DIR = "#{RedStorm::REDSTORM_HOME}/lib/red_storm"
22
-
23
- SRC_EXAMPLES = "#{RedStorm::REDSTORM_HOME}/examples"
24
- DST_EXAMPLES = "#{CWD}/examples"
25
-
26
- task :default => [:clean, :build]
27
-
28
- task :launch, :class_file do |t, args|
29
- gem_home = ENV["GEM_HOME"].to_s.empty? ? " -Djruby.gem.home=`gem env home`" : ""
30
- puts("launching java -cp \"#{TARGET_CLASSES_DIR}:#{TARGET_DEPENDENCY_DIR}/*\"#{gem_home} redstorm.TopologyLauncher #{args[:class_file]}")
31
- system("java -cp \"#{TARGET_CLASSES_DIR}:#{TARGET_DEPENDENCY_DIR}/*\"#{gem_home} redstorm.TopologyLauncher #{args[:class_file]}")
32
- end
33
-
34
- task :clean do
35
- ant.delete :dir => TARGET_DIR
36
- end
37
-
38
- task :clean_jar do
39
- ant.delete :dir => "#{TARGET_DIR}/cluster-topology.jar"
40
- end
41
-
42
- task :setup do
43
- ant.mkdir :dir => TARGET_DIR
44
- ant.mkdir :dir => TARGET_CLASSES_DIR
45
- ant.mkdir :dir => TARGET_SRC_DIR
46
- ant.path :id => 'classpath' do
47
- fileset :dir => TARGET_DEPENDENCY_DIR
48
- fileset :dir => TARGET_CLASSES_DIR
49
- end
50
- end
51
-
52
- task :install => [:deps, :build] do
53
- puts("\nRedStorm install completed. All dependencies installed in #{TARGET_DIR}")
54
- end
55
-
56
- task :unpack do
57
- system("rmvn dependency:unpack -f #{RedStorm::REDSTORM_HOME}/pom.xml -DoutputDirectory=#{TARGET_DEPENDENCY_UNPACKED_DIR}")
58
- end
3
+ task :default => :spec
59
4
 
60
- task :jar => [:unpack, :clean_jar] do
61
- ant.jar :destfile => TARGET_CLUSTER_JAR do
62
- fileset :dir => TARGET_CLASSES_DIR
63
- fileset :dir => TARGET_DEPENDENCY_UNPACKED_DIR
64
- fileset :dir => CWD do
65
- exclude :name => "target/**/*"
66
- end
67
- manifest do
68
- attribute :name => "Main-Class", :value => "redstorm.TopologyLauncher"
69
- end
70
- end
71
- puts("\nRedStorm jar completed. Generated jar file #{TARGET_CLUSTER_JAR}")
72
- end
73
-
74
- task :examples do
75
- if File.identical?(SRC_EXAMPLES, DST_EXAMPLES)
76
- STDERR.puts("error: cannot copy examples into itself")
77
- exit(1)
78
- end
79
- if File.exist?(DST_EXAMPLES)
80
- STDERR.puts("error: directory #{DST_EXAMPLES} already exists")
81
- exit(1)
5
+ begin
6
+ require 'rspec/core/rake_task'
7
+ desc "run specs"
8
+ task :spec do
9
+ system("ruby -v")
10
+ RSpec::Core::RakeTask.new
82
11
  end
83
-
84
- puts("copying examples into #{DST_EXAMPLES}")
85
- system("mkdir #{DST_EXAMPLES}")
86
- system("cp -r #{SRC_EXAMPLES}/* #{DST_EXAMPLES}")
87
- puts("\nRedStorm examples completed. All examples copied in #{DST_EXAMPLES}")
88
- end
89
-
90
- task :deps do
91
- system("rmvn dependency:copy-dependencies -f #{RedStorm::REDSTORM_HOME}/pom.xml -DoutputDirectory=#{TARGET_DEPENDENCY_DIR}")
92
- end
93
-
94
- task :build => :setup do
95
- # compile the JRuby proxy classes to Java
96
- build_jruby("#{JRUBY_SRC_DIR}/proxy")
97
-
98
- # compile the generated Java proxy classes
99
- build_java_dir("#{TARGET_SRC_DIR}")
100
-
101
- # generate the JRuby topology launcher
102
- build_jruby("#{JRUBY_SRC_DIR}/topology_launcher.rb")
103
-
104
- # compile the JRuby proxy classes
105
- build_java_dir("#{JAVA_SRC_DIR}")
106
-
107
- # compile the JRuby proxy classes
108
- build_java_dir("#{TARGET_SRC_DIR}")
109
- end
110
-
111
- def build_java_dir(source_folder)
112
- puts("\n--> Compiling Java")
113
- ant.javac(
114
- :srcdir => source_folder,
115
- :destdir => TARGET_CLASSES_DIR,
116
- :classpathref => 'classpath',
117
- :source => "1.6",
118
- :target => "1.6",
119
- :debug => "yes",
120
- :includeantruntime => "no",
121
- :verbose => false,
122
- :listfiles => true
123
- )
124
- end
125
-
126
- def build_jruby(source_path)
127
- puts("\n--> Compiling JRuby")
128
- system("cd #{RedStorm::REDSTORM_HOME}; jrubyc -t #{TARGET_SRC_DIR} --verbose --java -c \"#{TARGET_DEPENDENCY_DIR}/storm-0.5.3.jar\" -c \"#{TARGET_CLASSES_DIR}\" #{source_path}")
12
+ rescue NameError, LoadError => e
13
+ puts e
129
14
  end
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
+ require 'rake'
4
5
 
5
6
  begin
6
7
  # will work from gem, since lib dir is in gem require_paths
@@ -1,9 +1,9 @@
1
- require 'examples/random_sentence_spout'
2
- require 'examples/split_sentence_bolt'
3
- require 'examples/word_count_bolt'
1
+ require 'examples/native/random_sentence_spout'
2
+ require 'examples/native/split_sentence_bolt'
3
+ require 'examples/native/word_count_bolt'
4
4
 
5
5
  class ClusterWordCountTopology
6
- def start(base_class_path)
6
+ def start(base_class_path, env)
7
7
  builder = TopologyBuilder.new
8
8
  builder.setSpout(1, JRubySpout.new(base_class_path, "RandomSentenceSpout"), 5)
9
9
  builder.setBolt(2, JRubyBolt.new(base_class_path, "SplitSentenceBolt"), 4).shuffleGrouping(1)
@@ -1,10 +1,10 @@
1
1
  java_import 'backtype.storm.testing.TestWordSpout'
2
- require 'examples/exclamation_bolt'
2
+ require 'examples/native/exclamation_bolt'
3
3
 
4
4
  # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
5
5
 
6
6
  class LocalExclamationTopology
7
- def start(base_class_path)
7
+ def start(base_class_path, env)
8
8
  builder = TopologyBuilder.new
9
9
 
10
10
  builder.setSpout(1, TestWordSpout.new, 10)
@@ -18,7 +18,7 @@ end
18
18
  # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
19
19
 
20
20
  class LocalExclamationTopology2
21
- def start(base_class_path)
21
+ def start(base_class_path, env)
22
22
  builder = TopologyBuilder.new
23
23
 
24
24
  builder.setSpout(1, TestWordSpout.new, 10)
@@ -1,6 +1,6 @@
1
1
  require 'redis'
2
2
  require 'thread'
3
- require 'examples/word_count_bolt'
3
+ require 'examples/native/word_count_bolt'
4
4
 
5
5
  # RedisWordSpout reads the Redis queue "test" on localhost:6379
6
6
  # and emits each word items pop'ed from the queue.
@@ -41,7 +41,7 @@ class RedisWordSpout
41
41
  end
42
42
 
43
43
  class LocalRedisWordCountTopology
44
- def start(base_class_path)
44
+ def start(base_class_path, env)
45
45
  builder = TopologyBuilder.new
46
46
  builder.setSpout(1, JRubySpout.new(base_class_path, "RedisWordSpout"), 1)
47
47
  builder.setBolt(2, JRubyBolt.new(base_class_path, "WordCountBolt"), 3).fieldsGrouping(1, Fields.new("word"))
@@ -1,9 +1,9 @@
1
- require 'examples/random_sentence_spout'
2
- require 'examples/split_sentence_bolt'
3
- require 'examples/word_count_bolt'
1
+ require 'examples/native/random_sentence_spout'
2
+ require 'examples/native/split_sentence_bolt'
3
+ require 'examples/native/word_count_bolt'
4
4
 
5
5
  class LocalWordCountTopology
6
- def start(base_class_path)
6
+ def start(base_class_path, env)
7
7
  builder = TopologyBuilder.new
8
8
  builder.setSpout(1, JRubySpout.new(base_class_path, "RandomSentenceSpout"), 5)
9
9
  builder.setBolt(2, JRubyBolt.new(base_class_path, "SplitSentenceBolt"), 8).shuffleGrouping(1)
@@ -0,0 +1,6 @@
1
+ require 'red_storm'
2
+
3
+ class ExclamationBolt < RedStorm::SimpleBolt
4
+ output_fields :word
5
+ on_receive (:ack => true, :anchor => true) {|tuple| tuple.getString(0) + "!!!"}
6
+ end
@@ -0,0 +1,36 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'examples/simple/exclamation_bolt'
4
+
5
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
6
+
7
+ class ExclamationTopology < RedStorm::SimpleTopology
8
+ spout TestWordSpout, :parallelism => 10
9
+
10
+ bolt ExclamationBolt, :parallelism => 3 do
11
+ source TestWordSpout, :shuffle
12
+ end
13
+
14
+ bolt ExclamationBolt, :id => :ignore, :parallelism => 2 do
15
+ source ExclamationBolt, :shuffle
16
+ end
17
+
18
+ configure do |env|
19
+ case env
20
+ when :local
21
+ debug true
22
+ max_task_parallelism 3
23
+ when :cluster
24
+ debug true
25
+ num_workers 20
26
+ max_spout_pending(1000);
27
+ end
28
+ end
29
+
30
+ on_submit do |env|
31
+ if env == :local
32
+ sleep(5)
33
+ cluster.shutdown
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,41 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+ require 'red_storm'
3
+
4
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
5
+ # and a locally defined ExclamationBolt
6
+
7
+ class ExclamationBolt < RedStorm::SimpleBolt
8
+ output_fields :word
9
+ on_receive(:ack => true, :anchor => true) {|tuple| "!#{tuple.getString(0)}!"}
10
+ end
11
+
12
+ class ExclamationTopology2 < RedStorm::SimpleTopology
13
+ spout TestWordSpout, :parallelism => 10
14
+
15
+ bolt ExclamationBolt, :parallelism => 3 do
16
+ source TestWordSpout, :shuffle
17
+ end
18
+
19
+ bolt ExclamationBolt, :id => :ignore, :parallelism => 2 do
20
+ source ExclamationBolt, :shuffle
21
+ end
22
+
23
+ configure do |env|
24
+ case env
25
+ when :local
26
+ debug true
27
+ max_task_parallelism 3
28
+ when :cluster
29
+ debug true
30
+ num_workers 20
31
+ max_spout_pending(1000);
32
+ end
33
+ end
34
+
35
+ on_submit do |env|
36
+ if env == :local
37
+ sleep(5)
38
+ cluster.shutdown
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,18 @@
1
+ require 'red_storm'
2
+
3
+ class RandomSentenceSpout < RedStorm::SimpleSpout
4
+ set :is_distributed => true
5
+ output_fields :word
6
+
7
+ on_send {@sentences[rand(@sentences.length)]}
8
+
9
+ on_init do
10
+ @sentences = [
11
+ "the cow jumped over the moon",
12
+ "an apple a day keeps the doctor away",
13
+ "four score and seven years ago",
14
+ "snow white and the seven dwarfs",
15
+ "i am at two with nature"
16
+ ]
17
+ end
18
+ end
@@ -0,0 +1,54 @@
1
+ require 'redis'
2
+ require 'thread'
3
+ require 'red_storm'
4
+
5
+ require 'examples/simple/word_count_bolt'
6
+
7
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
8
+ # and emits each word items pop'ed from the queue.
9
+
10
+ class RedisWordSpout < RedStorm::SimpleSpout
11
+ output_fields :word
12
+
13
+ on_send {@q.pop if @q.size > 0}
14
+
15
+ on_init do
16
+ @q = Queue.new
17
+ @redis_reader = detach_redis_reader
18
+ end
19
+
20
+ private
21
+
22
+ def detach_redis_reader
23
+ Thread.new do
24
+ Thread.current.abort_on_exception = true
25
+
26
+ redis = Redis.new(:host => "localhost", :port => 6379)
27
+ loop do
28
+ if data = redis.blpop("test", 0)
29
+ @q << data[1]
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ class RedisWordCountTopology < RedStorm::SimpleTopology
37
+ spout RedisWordSpout
38
+
39
+ bolt WordCountBolt, :parallelism => 3 do
40
+ source RedisWordSpout, :fields => ["word"]
41
+ end
42
+
43
+ configure do |env|
44
+ case env
45
+ when :local
46
+ debug true
47
+ max_task_parallelism 3
48
+ when :cluster
49
+ debug true
50
+ num_workers 20
51
+ max_spout_pending(1000);
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,29 @@
1
+ require 'red_storm'
2
+
3
+ class SplitSentenceBolt < RedStorm::SimpleBolt
4
+ output_fields :word
5
+
6
+ # block declaration style using auto-emit (default)
7
+ #
8
+ on_receive {|tuple| tuple.getString(0).split(' ').map{|w| [w]}}
9
+
10
+ # block declaration style no auto-emit
11
+ #
12
+ # on_receive :emit => false do |tuple|
13
+ # tuple.getString(0).split(' ').each{|w| unanchored_emit(w)}
14
+ # end
15
+
16
+ # alternate declaration style using on_receive method
17
+ #
18
+ # on_receive :emit => true
19
+ # def on_receive(tuple)
20
+ # tuple.getString(0).split(' ').map{|w| [w]}
21
+ # end
22
+
23
+ # alternate declaration style using any specific method
24
+ #
25
+ # on_receive :my_method, :emit => true
26
+ # def my_method(tuple)
27
+ # tuple.getString(0).split(' ').map{|w| [w]}
28
+ # end
29
+ end
@@ -0,0 +1,15 @@
1
+ require 'red_storm'
2
+
3
+ class WordCountBolt < RedStorm::SimpleBolt
4
+ output_fields :word, :count
5
+ on_init {@counts = Hash.new{|h, k| h[k] = 0}}
6
+
7
+ # block declaration style using auto-emit (default)
8
+ #
9
+ on_receive do |tuple|
10
+ word = tuple.getString(0)
11
+ @counts[word] += 1
12
+
13
+ [word, @counts[word]]
14
+ end
15
+ end
@@ -0,0 +1,34 @@
1
+ require 'examples/simple/random_sentence_spout'
2
+ require 'examples/simple/split_sentence_bolt'
3
+ require 'examples/simple/word_count_bolt'
4
+
5
+ class WordCountTopology < RedStorm::SimpleTopology
6
+ spout RandomSentenceSpout, :parallelism => 5
7
+
8
+ bolt SplitSentenceBolt, :parallelism => 8 do
9
+ source RandomSentenceSpout, :shuffle
10
+ end
11
+
12
+ bolt WordCountBolt, :parallelism => 12 do
13
+ source SplitSentenceBolt, :fields => ["word"]
14
+ end
15
+
16
+ configure :word_count do |env|
17
+ case env
18
+ when :local
19
+ debug true
20
+ max_task_parallelism 3
21
+ when :cluster
22
+ debug true
23
+ num_workers 20
24
+ max_spout_pending(1000);
25
+ end
26
+ end
27
+
28
+ on_submit do |env|
29
+ if env == :local
30
+ sleep(5)
31
+ cluster.shutdown
32
+ end
33
+ end
34
+ end