rflow 1.0.0a1 → 1.0.0a2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
data/lib/rflow/shard.rb CHANGED
@@ -1,148 +1,81 @@
1
- class RFlow
1
+ require 'rflow/child_process'
2
2
 
3
+ class RFlow
3
4
  # An object implementation shared between two processes. The parent
4
5
  # process will instantiate, configure, and run! a shard, at which
5
6
  # point the parent will have access to the shard object and be able
6
7
  # to monitor the underlying processes. The child implementation,
7
- # running in a separate process, will not return from run!, but
8
- # start an Eventmachine reactor, connect the components, and not
9
- # return
8
+ # running in a separate process, will not return from spawn!, but
9
+ # start an EventMachine reactor.
10
10
  class Shard
11
-
12
- # An internal class that represents an instance of the running
13
- # shard, i.e. a process
14
- class Worker
15
-
16
- attr_accessor :shard, :index, :name, :pid
17
- attr_accessor :components
18
- attr_accessor :worker_read, :master_write
19
-
20
- def initialize(shard, index=1)
11
+ class Worker < ChildProcess
12
+ def initialize(shard, index = 1)
13
+ super("#{shard.name}-#{index}", 'Worker')
21
14
  @shard = shard
22
- @index = index
23
- @name = "#{shard.name}-#{index}"
24
-
25
- # Set up the IPC pipes
26
- @worker_read, @master_write = IO.pipe
27
- [@worker_read, @master_write].each do |io|
28
- io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
29
- end
30
15
 
31
- @components = shard.config.components.map do |component_config|
32
- Component.build(component_config)
33
- end
16
+ # build at initialize time to fail fast
17
+ @components = shard.config.components.map {|config| Component.build(config) }
34
18
  end
35
19
 
36
- def handle_signals
37
- ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
38
- Signal.trap signal do
39
- Thread.new { shutdown(signal) }.join
40
- end
20
+ def run_process
21
+ EM.run do
22
+ # TODO: Monitor the master
23
+ configure_components!
24
+ connect_components!
25
+ # TODO: need to do proper node synchronization for ZMQ to remove sleep
26
+ sleep 1
27
+ run_components!
41
28
  end
42
29
 
43
- ['SIGUSR1'].each do |signal|
44
- Signal.trap signal do
45
- Thread.new do
46
- RFlow.logger.reopen
47
- end.join
48
- end
49
- end
50
-
51
- # Toggle log level on USR2
52
- ['SIGUSR2'].each do |signal|
53
- Signal.trap signal do
54
- Thread.new do
55
- RFlow.logger.toggle_log_level
56
- end.join
57
- end
58
- end
30
+ RFlow.logger.info "Shutting down worker after EM stopped"
59
31
  end
60
32
 
61
- # Launch another process to execute the shard. The parent
62
- # process retains the original worker object (with pid and IPC
63
- # pipe) to allow for process management
64
- def launch
65
- @pid = Process.fork do
66
- @master_write.close
67
-
68
- handle_signals
69
-
70
- $0 += " #{name}"
71
- Log4r::NDC.push name
72
-
73
- RFlow.logger.info "Worker started"
74
- EM.run do
75
- # TODO: Monitor the master
76
-
77
- connect_components!
78
- # TODO: need to do proper node synchronization for ZMQ to
79
- # remove sleep
80
- sleep 1
81
- run_components!
82
- end
83
-
84
- RFlow.logger.info "Shutting down worker after EM stopped"
33
+ def configure_components!
34
+ RFlow.logger.debug "Configuring components"
35
+ @components.zip(@shard.config.components.map(&:options)).each do |(component, config)|
36
+ RFlow.logger.debug "Configuring component '#{component.name}' (#{component.uuid})"
37
+ component.configure! config
85
38
  end
86
-
87
- @worker_read.close
88
- self
89
39
  end
90
40
 
91
- # Send a command to each component to tell them to connect their
92
- # ports via their connections
93
41
  def connect_components!
94
42
  RFlow.logger.debug "Connecting components"
95
- components.each do |component|
43
+ @components.each do |component|
96
44
  RFlow.logger.debug "Connecting component '#{component.name}' (#{component.uuid})"
97
45
  component.connect!
98
46
  end
99
47
  end
100
48
 
101
- # Start each component running
102
49
  def run_components!
103
50
  RFlow.logger.debug "Running components"
104
- components.each do |component|
51
+ @components.each do |component|
105
52
  RFlow.logger.debug "Running component '#{component.name}' (#{component.uuid})"
106
53
  component.run!
107
54
  end
108
55
  end
109
- end
110
56
 
57
+ def shutdown!(signal)
58
+ EM.stop_event_loop
59
+ super
60
+ end
61
+ end
111
62
 
112
- attr_reader :config, :uuid, :name, :count
113
- attr_accessor :workers
114
-
63
+ attr_reader :config, :name, :count, :workers
115
64
 
116
65
  def initialize(config)
117
66
  @config = config
118
67
  @uuid = config.uuid
119
68
  @name = config.name
120
69
  @count = config.count
121
-
122
- @workers = count.times.map do |i|
123
- Worker.new(self, i+1)
124
- end
70
+ @workers = count.times.map {|i| Worker.new(self, i+1) }
125
71
  end
126
72
 
127
-
128
73
  def run!
129
74
  RFlow.logger.debug "Running shard #{name} with #{count} workers"
130
- workers.each do |worker|
131
- worker.launch
132
- end
75
+ workers.each(&:spawn!)
133
76
 
134
77
  RFlow.logger.debug "#{count} workers started for #{name}: #{workers.map { |w| "#{w.name} (#{w.pid})" }.join(", ")}"
135
78
  workers
136
79
  end
137
-
138
-
139
- # TODO: Implement
140
- def shutdown!
141
- end
142
-
143
-
144
- # TODO: Implement
145
- def cleanup!
146
- end
147
80
  end
148
81
  end
data/lib/rflow/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "1.0.0a1"
3
- end # class RFlow
2
+ VERSION = "1.0.0a2"
3
+ end
data/rflow.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # -*- encoding: utf-8 -*-
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "rflow/version"
@@ -30,7 +29,8 @@ Gem::Specification.new do |s|
30
29
  s.add_dependency "em-zeromq", "~> 0.4.2"
31
30
 
32
31
  s.add_development_dependency "bundler", "~> 1.5"
33
- s.add_development_dependency "rspec", "~> 2.6"
32
+ s.add_development_dependency "rspec", "~> 2.99"
33
+ s.add_development_dependency "rspec-collection_matchers", "~> 0.0.4"
34
34
  s.add_development_dependency "rake", ">= 0.8.7"
35
35
  s.add_development_dependency "yard", "~> 0.8.7"
36
36
  end
data/schema/tick.avsc ADDED
@@ -0,0 +1,10 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Tick",
4
+ "namespace": "org.rflow.message.clock",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "name", "type": ["string", "null"]},
8
+ {"name": "timestamp", "type": ["long", "null"]}
9
+ ]
10
+ }
@@ -1,61 +1,25 @@
1
- # Meat of the config file. Stuff above this should probably be in
2
- # separate gems and/or files that are brought in at runtime.
3
1
  RFlow::Configuration::RubyDSL.configure do |config|
4
2
  # Configure the settings, which include paths for various files, log
5
3
  # levels, and component specific stuffs
6
- config.setting('rflow.log_level', 'DEBUG')
4
+ config.setting('rflow.log_level', 'FATAL')
7
5
  config.setting('rflow.application_directory_path', '../tmp')
8
-
9
6
  config.setting('rflow.application_name', 'testapp')
10
7
 
11
- # Add schemas to the list of available. Not convinced this is necessary
12
- # config.schema('schemaname', 'schematype', 'schemadata')
13
-
14
8
  # Instantiate components
15
- # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
16
- # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
17
- # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
18
- # config.component 'replicate', 'RFlow::Components::Replicate'
19
- # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
20
- # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
21
-
22
- # Hook components together
23
- # config.connect 'generate_ints#out' => 'filter#in'
24
- # config.connect 'filter#filtered' => 'replicate#in'
25
- # config.connect 'replicate#out[0]' => 'simple#in'
26
- # config.connect 'replicate#out[one]' => 'complex#in'
27
- # config.connect 'simple#out' => 'output#in'
28
- # config.connect 'complex#out' => 'output#in'
29
-
30
- # config.connect 'generate_ints1#out' => 'filter#in'
31
- # config.connect 'generate_ints2#out' => 'filter#in'
32
- # config.connect 'filter#filtered' => 'replicate#in'
33
- # config.connect 'replicate#out[1]' => 'output1#in'
34
- # config.connect 'replicate#out[2]' => 'output2#in'
35
- # Some tests that should fail
36
- # output should not have an 'out' ports
37
- # config.connect 'output#out' => 'simple#in'
38
-
39
9
  config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
10
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
40
11
  config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
41
12
  config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
42
13
  config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
43
14
  config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
44
15
  config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
16
+ config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
45
17
 
18
+ # Hook components together
46
19
  config.connect 'generate_ints#out' => 'output#in'
47
20
  config.connect 'generate_ints#out' => 'output2#in'
48
21
  config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
49
22
  config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
50
23
  config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
51
-
52
-
53
-
54
- config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
55
- config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
56
-
57
24
  config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
58
-
59
25
  end
60
-
61
-
@@ -1,5 +1,5 @@
1
1
  RFlow::Configuration::RubyDSL.configure do |config|
2
- config.setting('rflow.log_level', 'DEBUG')
2
+ config.setting('rflow.log_level', 'FATAL')
3
3
  config.setting('rflow.application_directory_path', '.')
4
4
  config.setting('rflow.application_name', 'shardapp')
5
5
 
@@ -26,5 +26,4 @@ RFlow::Configuration::RubyDSL.configure do |config|
26
26
  config.connect 'filter#filtered' => 'replicate#in'
27
27
  config.connect 'replicate#out' => 'output1#in'
28
28
  config.connect 'replicate#out' => 'output2#in'
29
-
30
29
  end
@@ -1,9 +1,6 @@
1
- # This will/should bring in available components and their schemas
2
1
  require 'rflow/components'
3
2
  require 'rflow/message'
4
3
 
5
- #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
-
7
4
  # Example of creating and registering a data extension
8
5
  module SimpleDataExtension
9
6
  # Use this to default/verify the data in data_object
@@ -15,92 +12,6 @@ module SimpleDataExtension
15
12
  end
16
13
  RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
14
 
18
-
19
-
20
- # Example of creating and registering a new schema
21
- long_integer_schema = '{"type": "long"}'
22
- RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
-
24
-
25
- class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
- output_port :out
27
- output_port :even_odd_out
28
-
29
- def configure!(config)
30
- @start = config['start'].to_i
31
- @finish = config['finish'].to_i
32
- @step = config['step'] ? config['step'].to_i : 1
33
- # If interval seconds is not given, it will default to 0
34
- @interval_seconds = config['interval_seconds'].to_i
35
- end
36
-
37
- # Note that this uses the timer (sometimes with 0 interval) so as
38
- # not to block the reactor
39
- def run!
40
- timer = EM::PeriodicTimer.new(@interval_seconds) do
41
- message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
- message.data.data_object = @start
43
- out.send_message message
44
- if @start % 2 == 0
45
- even_odd_out['even'].send_message message
46
- else
47
- even_odd_out['odd'].send_message message
48
- end
49
-
50
- @start += @step
51
- timer.cancel if @start > @finish
52
- end
53
- end
54
-
55
- end
56
-
57
- class RFlow::Components::Replicate < RFlow::Component
58
- input_port :in
59
- output_port :out
60
- output_port :errored
61
-
62
- def process_message(input_port, input_port_key, connection, message)
63
- puts "Processing message in Replicate"
64
- out.each do |connections|
65
- puts "Replicating"
66
- begin
67
- connections.send_message message
68
- rescue Exception => e
69
- puts "Exception #{e.message}"
70
- errored.send_message message
71
- end
72
- end
73
- end
74
- end
75
-
76
- puts "Before RubyProcFilter"
77
- class RFlow::Components::RubyProcFilter < RFlow::Component
78
- input_port :in
79
- output_port :filtered
80
- output_port :dropped
81
- output_port :errored
82
-
83
-
84
- def configure!(config)
85
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
86
- end
87
-
88
- def process_message(input_port, input_port_key, connection, message)
89
- puts "Processing message in RubyProcFilter"
90
- begin
91
- if @filter_proc.call(message)
92
- filtered.send_message message
93
- else
94
- dropped.send_message message
95
- end
96
- rescue Exception => e
97
- puts "Attempting to send message to errored #{e.message}"
98
- errored.send_message message
99
- end
100
- end
101
- end
102
-
103
- puts "Before FileOutput"
104
15
  class RFlow::Components::FileOutput < RFlow::Component
105
16
  attr_accessor :output_file_path, :output_file
106
17
  input_port :in
@@ -110,23 +21,16 @@ class RFlow::Components::FileOutput < RFlow::Component
110
21
  self.output_file = File.new output_file_path, 'w+'
111
22
  end
112
23
 
113
- #def run!; end
114
-
115
24
  def process_message(input_port, input_port_key, connection, message)
116
- puts "About to output to a file #{output_file_path}"
117
25
  output_file.puts message.data.data_object.inspect
118
26
  output_file.flush
119
27
  end
120
28
 
121
-
122
29
  def cleanup
123
30
  output_file.close
124
31
  end
125
-
126
32
  end
127
33
 
128
- # TODO: Ensure that all the following methods work as they are
129
- # supposed to. This is the interface that I'm adhering to
130
34
  class SimpleComponent < RFlow::Component
131
35
  input_port :in
132
36
  output_port :out
@@ -137,5 +41,3 @@ class SimpleComponent < RFlow::Component
137
41
  def shutdown!; end
138
42
  def cleanup!; end
139
43
  end
140
-
141
-
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ class RFlow
4
+ class Component
5
+ describe Port do
6
+ it "should not be connected" do
7
+ described_class.new.should_not be_connected
8
+ end
9
+ end
10
+
11
+ describe HashPort do
12
+ it "should not be connected" do
13
+ config = double('Port Config')
14
+ config.stub(:name).and_return('port')
15
+ config.stub(:uuid).and_return('1')
16
+
17
+ described_class.new(config).should_not be_connected
18
+ end
19
+ end
20
+
21
+ describe InputPort do
22
+ context "#connect!" do
23
+ it "should be connected" do
24
+ connection = double('connection')
25
+ connection.should_receive(:connect_input!)
26
+
27
+ config = double('Port Config')
28
+ config.stub(:name).and_return('port')
29
+ config.stub(:uuid).and_return('1')
30
+
31
+ described_class.new(config).tap do |port|
32
+ port.add_connection(nil, connection)
33
+ port.should_not be_connected
34
+ port.connect!
35
+ port.should be_connected
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ describe OutputPort do
42
+ context "#connect!" do
43
+ it "should be connected" do
44
+ connection = double('connection')
45
+ connection.should_receive(:connect_output!)
46
+
47
+ port_config = double('Port Config')
48
+ port_config.stub(:name).and_return('port')
49
+ port_config.stub(:uuid).and_return('1')
50
+
51
+ described_class.new(port_config).tap do |port|
52
+ port.add_connection(nil, connection)
53
+ port.should_not be_connected
54
+ port.connect!
55
+ port.should be_connected
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end