rflow 1.0.0a1 → 1.0.0a2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
data/lib/rflow/shard.rb CHANGED
@@ -1,148 +1,81 @@
1
- class RFlow
1
+ require 'rflow/child_process'
2
2
 
3
+ class RFlow
3
4
  # An object implementation shared between two processes. The parent
4
5
  # process will instantiate, configure, and run! a shard, at which
5
6
  # point the parent will have access to the shard object and be able
6
7
  # to monitor the underlying processes. The child implementation,
7
- # running in a separate process, will not return from run!, but
8
- # start an Eventmachine reactor, connect the components, and not
9
- # return
8
+ # running in a separate process, will not return from spawn!, but
9
+ # start an EventMachine reactor.
10
10
  class Shard
11
-
12
- # An internal class that represents an instance of the running
13
- # shard, i.e. a process
14
- class Worker
15
-
16
- attr_accessor :shard, :index, :name, :pid
17
- attr_accessor :components
18
- attr_accessor :worker_read, :master_write
19
-
20
- def initialize(shard, index=1)
11
+ class Worker < ChildProcess
12
+ def initialize(shard, index = 1)
13
+ super("#{shard.name}-#{index}", 'Worker')
21
14
  @shard = shard
22
- @index = index
23
- @name = "#{shard.name}-#{index}"
24
-
25
- # Set up the IPC pipes
26
- @worker_read, @master_write = IO.pipe
27
- [@worker_read, @master_write].each do |io|
28
- io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
29
- end
30
15
 
31
- @components = shard.config.components.map do |component_config|
32
- Component.build(component_config)
33
- end
16
+ # build at initialize time to fail fast
17
+ @components = shard.config.components.map {|config| Component.build(config) }
34
18
  end
35
19
 
36
- def handle_signals
37
- ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
38
- Signal.trap signal do
39
- Thread.new { shutdown(signal) }.join
40
- end
20
+ def run_process
21
+ EM.run do
22
+ # TODO: Monitor the master
23
+ configure_components!
24
+ connect_components!
25
+ # TODO: need to do proper node synchronization for ZMQ to remove sleep
26
+ sleep 1
27
+ run_components!
41
28
  end
42
29
 
43
- ['SIGUSR1'].each do |signal|
44
- Signal.trap signal do
45
- Thread.new do
46
- RFlow.logger.reopen
47
- end.join
48
- end
49
- end
50
-
51
- # Toggle log level on USR2
52
- ['SIGUSR2'].each do |signal|
53
- Signal.trap signal do
54
- Thread.new do
55
- RFlow.logger.toggle_log_level
56
- end.join
57
- end
58
- end
30
+ RFlow.logger.info "Shutting down worker after EM stopped"
59
31
  end
60
32
 
61
- # Launch another process to execute the shard. The parent
62
- # process retains the original worker object (with pid and IPC
63
- # pipe) to allow for process management
64
- def launch
65
- @pid = Process.fork do
66
- @master_write.close
67
-
68
- handle_signals
69
-
70
- $0 += " #{name}"
71
- Log4r::NDC.push name
72
-
73
- RFlow.logger.info "Worker started"
74
- EM.run do
75
- # TODO: Monitor the master
76
-
77
- connect_components!
78
- # TODO: need to do proper node synchronization for ZMQ to
79
- # remove sleep
80
- sleep 1
81
- run_components!
82
- end
83
-
84
- RFlow.logger.info "Shutting down worker after EM stopped"
33
+ def configure_components!
34
+ RFlow.logger.debug "Configuring components"
35
+ @components.zip(@shard.config.components.map(&:options)).each do |(component, config)|
36
+ RFlow.logger.debug "Configuring component '#{component.name}' (#{component.uuid})"
37
+ component.configure! config
85
38
  end
86
-
87
- @worker_read.close
88
- self
89
39
  end
90
40
 
91
- # Send a command to each component to tell them to connect their
92
- # ports via their connections
93
41
  def connect_components!
94
42
  RFlow.logger.debug "Connecting components"
95
- components.each do |component|
43
+ @components.each do |component|
96
44
  RFlow.logger.debug "Connecting component '#{component.name}' (#{component.uuid})"
97
45
  component.connect!
98
46
  end
99
47
  end
100
48
 
101
- # Start each component running
102
49
  def run_components!
103
50
  RFlow.logger.debug "Running components"
104
- components.each do |component|
51
+ @components.each do |component|
105
52
  RFlow.logger.debug "Running component '#{component.name}' (#{component.uuid})"
106
53
  component.run!
107
54
  end
108
55
  end
109
- end
110
56
 
57
+ def shutdown!(signal)
58
+ EM.stop_event_loop
59
+ super
60
+ end
61
+ end
111
62
 
112
- attr_reader :config, :uuid, :name, :count
113
- attr_accessor :workers
114
-
63
+ attr_reader :config, :name, :count, :workers
115
64
 
116
65
  def initialize(config)
117
66
  @config = config
118
67
  @uuid = config.uuid
119
68
  @name = config.name
120
69
  @count = config.count
121
-
122
- @workers = count.times.map do |i|
123
- Worker.new(self, i+1)
124
- end
70
+ @workers = count.times.map {|i| Worker.new(self, i+1) }
125
71
  end
126
72
 
127
-
128
73
  def run!
129
74
  RFlow.logger.debug "Running shard #{name} with #{count} workers"
130
- workers.each do |worker|
131
- worker.launch
132
- end
75
+ workers.each(&:spawn!)
133
76
 
134
77
  RFlow.logger.debug "#{count} workers started for #{name}: #{workers.map { |w| "#{w.name} (#{w.pid})" }.join(", ")}"
135
78
  workers
136
79
  end
137
-
138
-
139
- # TODO: Implement
140
- def shutdown!
141
- end
142
-
143
-
144
- # TODO: Implement
145
- def cleanup!
146
- end
147
80
  end
148
81
  end
data/lib/rflow/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "1.0.0a1"
3
- end # class RFlow
2
+ VERSION = "1.0.0a2"
3
+ end
data/rflow.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # -*- encoding: utf-8 -*-
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "rflow/version"
@@ -30,7 +29,8 @@ Gem::Specification.new do |s|
30
29
  s.add_dependency "em-zeromq", "~> 0.4.2"
31
30
 
32
31
  s.add_development_dependency "bundler", "~> 1.5"
33
- s.add_development_dependency "rspec", "~> 2.6"
32
+ s.add_development_dependency "rspec", "~> 2.99"
33
+ s.add_development_dependency "rspec-collection_matchers", "~> 0.0.4"
34
34
  s.add_development_dependency "rake", ">= 0.8.7"
35
35
  s.add_development_dependency "yard", "~> 0.8.7"
36
36
  end
data/schema/tick.avsc ADDED
@@ -0,0 +1,10 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Tick",
4
+ "namespace": "org.rflow.message.clock",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "name", "type": ["string", "null"]},
8
+ {"name": "timestamp", "type": ["long", "null"]}
9
+ ]
10
+ }
@@ -1,61 +1,25 @@
1
- # Meat of the config file. Stuff above this should probably be in
2
- # separate gems and/or files that are brought in at runtime.
3
1
  RFlow::Configuration::RubyDSL.configure do |config|
4
2
  # Configure the settings, which include paths for various files, log
5
3
  # levels, and component specific stuffs
6
- config.setting('rflow.log_level', 'DEBUG')
4
+ config.setting('rflow.log_level', 'FATAL')
7
5
  config.setting('rflow.application_directory_path', '../tmp')
8
-
9
6
  config.setting('rflow.application_name', 'testapp')
10
7
 
11
- # Add schemas to the list of available. Not convinced this is necessary
12
- # config.schema('schemaname', 'schematype', 'schemadata')
13
-
14
8
  # Instantiate components
15
- # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
16
- # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
17
- # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
18
- # config.component 'replicate', 'RFlow::Components::Replicate'
19
- # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
20
- # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
21
-
22
- # Hook components together
23
- # config.connect 'generate_ints#out' => 'filter#in'
24
- # config.connect 'filter#filtered' => 'replicate#in'
25
- # config.connect 'replicate#out[0]' => 'simple#in'
26
- # config.connect 'replicate#out[one]' => 'complex#in'
27
- # config.connect 'simple#out' => 'output#in'
28
- # config.connect 'complex#out' => 'output#in'
29
-
30
- # config.connect 'generate_ints1#out' => 'filter#in'
31
- # config.connect 'generate_ints2#out' => 'filter#in'
32
- # config.connect 'filter#filtered' => 'replicate#in'
33
- # config.connect 'replicate#out[1]' => 'output1#in'
34
- # config.connect 'replicate#out[2]' => 'output2#in'
35
- # Some tests that should fail
36
- # output should not have an 'out' ports
37
- # config.connect 'output#out' => 'simple#in'
38
-
39
9
  config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
10
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
40
11
  config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
41
12
  config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
42
13
  config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
43
14
  config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
44
15
  config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
16
+ config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
45
17
 
18
+ # Hook components together
46
19
  config.connect 'generate_ints#out' => 'output#in'
47
20
  config.connect 'generate_ints#out' => 'output2#in'
48
21
  config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
49
22
  config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
50
23
  config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
51
-
52
-
53
-
54
- config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
55
- config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
56
-
57
24
  config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
58
-
59
25
  end
60
-
61
-
@@ -1,5 +1,5 @@
1
1
  RFlow::Configuration::RubyDSL.configure do |config|
2
- config.setting('rflow.log_level', 'DEBUG')
2
+ config.setting('rflow.log_level', 'FATAL')
3
3
  config.setting('rflow.application_directory_path', '.')
4
4
  config.setting('rflow.application_name', 'shardapp')
5
5
 
@@ -26,5 +26,4 @@ RFlow::Configuration::RubyDSL.configure do |config|
26
26
  config.connect 'filter#filtered' => 'replicate#in'
27
27
  config.connect 'replicate#out' => 'output1#in'
28
28
  config.connect 'replicate#out' => 'output2#in'
29
-
30
29
  end
@@ -1,9 +1,6 @@
1
- # This will/should bring in available components and their schemas
2
1
  require 'rflow/components'
3
2
  require 'rflow/message'
4
3
 
5
- #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
-
7
4
  # Example of creating and registering a data extension
8
5
  module SimpleDataExtension
9
6
  # Use this to default/verify the data in data_object
@@ -15,92 +12,6 @@ module SimpleDataExtension
15
12
  end
16
13
  RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
14
 
18
-
19
-
20
- # Example of creating and registering a new schema
21
- long_integer_schema = '{"type": "long"}'
22
- RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
-
24
-
25
- class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
- output_port :out
27
- output_port :even_odd_out
28
-
29
- def configure!(config)
30
- @start = config['start'].to_i
31
- @finish = config['finish'].to_i
32
- @step = config['step'] ? config['step'].to_i : 1
33
- # If interval seconds is not given, it will default to 0
34
- @interval_seconds = config['interval_seconds'].to_i
35
- end
36
-
37
- # Note that this uses the timer (sometimes with 0 interval) so as
38
- # not to block the reactor
39
- def run!
40
- timer = EM::PeriodicTimer.new(@interval_seconds) do
41
- message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
- message.data.data_object = @start
43
- out.send_message message
44
- if @start % 2 == 0
45
- even_odd_out['even'].send_message message
46
- else
47
- even_odd_out['odd'].send_message message
48
- end
49
-
50
- @start += @step
51
- timer.cancel if @start > @finish
52
- end
53
- end
54
-
55
- end
56
-
57
- class RFlow::Components::Replicate < RFlow::Component
58
- input_port :in
59
- output_port :out
60
- output_port :errored
61
-
62
- def process_message(input_port, input_port_key, connection, message)
63
- puts "Processing message in Replicate"
64
- out.each do |connections|
65
- puts "Replicating"
66
- begin
67
- connections.send_message message
68
- rescue Exception => e
69
- puts "Exception #{e.message}"
70
- errored.send_message message
71
- end
72
- end
73
- end
74
- end
75
-
76
- puts "Before RubyProcFilter"
77
- class RFlow::Components::RubyProcFilter < RFlow::Component
78
- input_port :in
79
- output_port :filtered
80
- output_port :dropped
81
- output_port :errored
82
-
83
-
84
- def configure!(config)
85
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
86
- end
87
-
88
- def process_message(input_port, input_port_key, connection, message)
89
- puts "Processing message in RubyProcFilter"
90
- begin
91
- if @filter_proc.call(message)
92
- filtered.send_message message
93
- else
94
- dropped.send_message message
95
- end
96
- rescue Exception => e
97
- puts "Attempting to send message to errored #{e.message}"
98
- errored.send_message message
99
- end
100
- end
101
- end
102
-
103
- puts "Before FileOutput"
104
15
  class RFlow::Components::FileOutput < RFlow::Component
105
16
  attr_accessor :output_file_path, :output_file
106
17
  input_port :in
@@ -110,23 +21,16 @@ class RFlow::Components::FileOutput < RFlow::Component
110
21
  self.output_file = File.new output_file_path, 'w+'
111
22
  end
112
23
 
113
- #def run!; end
114
-
115
24
  def process_message(input_port, input_port_key, connection, message)
116
- puts "About to output to a file #{output_file_path}"
117
25
  output_file.puts message.data.data_object.inspect
118
26
  output_file.flush
119
27
  end
120
28
 
121
-
122
29
  def cleanup
123
30
  output_file.close
124
31
  end
125
-
126
32
  end
127
33
 
128
- # TODO: Ensure that all the following methods work as they are
129
- # supposed to. This is the interface that I'm adhering to
130
34
  class SimpleComponent < RFlow::Component
131
35
  input_port :in
132
36
  output_port :out
@@ -137,5 +41,3 @@ class SimpleComponent < RFlow::Component
137
41
  def shutdown!; end
138
42
  def cleanup!; end
139
43
  end
140
-
141
-
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ class RFlow
4
+ class Component
5
+ describe Port do
6
+ it "should not be connected" do
7
+ described_class.new.should_not be_connected
8
+ end
9
+ end
10
+
11
+ describe HashPort do
12
+ it "should not be connected" do
13
+ config = double('Port Config')
14
+ config.stub(:name).and_return('port')
15
+ config.stub(:uuid).and_return('1')
16
+
17
+ described_class.new(config).should_not be_connected
18
+ end
19
+ end
20
+
21
+ describe InputPort do
22
+ context "#connect!" do
23
+ it "should be connected" do
24
+ connection = double('connection')
25
+ connection.should_receive(:connect_input!)
26
+
27
+ config = double('Port Config')
28
+ config.stub(:name).and_return('port')
29
+ config.stub(:uuid).and_return('1')
30
+
31
+ described_class.new(config).tap do |port|
32
+ port.add_connection(nil, connection)
33
+ port.should_not be_connected
34
+ port.connect!
35
+ port.should be_connected
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ describe OutputPort do
42
+ context "#connect!" do
43
+ it "should be connected" do
44
+ connection = double('connection')
45
+ connection.should_receive(:connect_output!)
46
+
47
+ port_config = double('Port Config')
48
+ port_config.stub(:name).and_return('port')
49
+ port_config.stub(:uuid).and_return('1')
50
+
51
+ described_class.new(port_config).tap do |port|
52
+ port.add_connection(nil, connection)
53
+ port.should_not be_connected
54
+ port.connect!
55
+ port.should be_connected
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end