rflow 1.0.0a1 → 1.0.0a2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
@@ -3,8 +3,7 @@ require 'rflow/configuration/uuid_keyed'
3
3
 
4
4
  class RFlow
5
5
  class Configuration
6
-
7
- class Shard < ConfigDB
6
+ class Shard < ConfigurationItem
8
7
  include UUIDKeyed
9
8
  include ActiveModel::Validations
10
9
 
@@ -12,7 +11,6 @@ class RFlow
12
11
 
13
12
  has_many :components, :primary_key => 'uuid', :foreign_key => 'shard_uuid'
14
13
 
15
- validates_presence_of :name
16
14
  validates_uniqueness_of :name
17
15
  validates_numericality_of :count, :only_integer => true, :greater_than => 0
18
16
  end
@@ -2,7 +2,6 @@ require 'rflow/message'
2
2
 
3
3
  class RFlow
4
4
  class Connection
5
-
6
5
  class << self
7
6
  def build(config)
8
7
  case config.type
@@ -15,7 +14,9 @@ class RFlow
15
14
  end
16
15
 
17
16
  attr_accessor :config, :uuid, :name, :options
18
- attr_accessor :recv_callback
17
+ attr_writer :recv_callback
18
+ protected
19
+ attr_reader :recv_callback
19
20
 
20
21
  def initialize(config)
21
22
  @config = config
@@ -24,15 +25,13 @@ class RFlow
24
25
  @options = config.options
25
26
  end
26
27
 
27
-
28
28
  # Subclass and implement to be able to handle future 'recv'
29
29
  # methods. Will only be called in the context of a running
30
30
  # EventMachine reactor
31
31
  def connect_input!
32
- raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_output method."
32
+ raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_input method."
33
33
  end
34
34
 
35
-
36
35
  # Subclass and implement to be able to handle future 'send'
37
36
  # methods. Will only be called in the context of a running
38
37
  # EventMachine reactor
@@ -40,7 +39,6 @@ class RFlow
40
39
  raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
41
40
  end
42
41
 
43
-
44
42
  # Subclass and implement to handle outgoing messages. The message
45
43
  # will be a RFlow::Message object and the subclasses are expected
46
44
  # to marshal it up into something that will be unmarshalled on the
@@ -58,13 +56,52 @@ class RFlow
58
56
  def recv_callback
59
57
  @recv_callback ||= Proc.new {|message|}
60
58
  end
59
+ end
60
+
61
+ # Primarily for testing purposes. Captures whatever messages are sent on it.
62
+ class MessageCollectingConnection < Connection
63
+ attr_accessor :messages
64
+
65
+ def initialize
66
+ super(RFlow::Configuration::NullConfiguration.new)
67
+ @messages = []
68
+ end
69
+
70
+ def send_message(message)
71
+ @messages << message
72
+ end
73
+ end
61
74
 
62
- end # class Connection
75
+ # Manually shuffle messages in-process from one output port to another output
76
+ # port. Can be used to get a Facade pattern effect - to have one component
77
+ # contain other components within it, shuttling messages in and out without
78
+ # making the internal component visible to the larger RFlow network.
79
+ class ForwardToOutputPort < Connection
80
+ def initialize(receiver, port_name)
81
+ super(RFlow::Configuration::NullConfiguration.new)
82
+ @receiver = receiver
83
+ @port_name = port_name.to_sym
84
+ end
63
85
 
64
- class Disconnection < Connection
65
86
  def send_message(message)
66
- RFlow.logger.debug "Attempting to send without a connection, doing nothing"
87
+ @receiver.send(@port_name).send_message(message)
67
88
  end
68
89
  end
69
90
 
70
- end # class RFlow
91
+ # Manually shuffle messages in-process from one output port to another output
92
+ # port. Can be used to get a Facade pattern effect - to have one component
93
+ # contain other components within it, shuttling messages in and out without
94
+ # making the internal component visible to the larger RFlow network.
95
+ class ForwardToInputPort < Connection
96
+ def initialize(receiver, port_name, port_key)
97
+ super(RFlow::Configuration::NullConfiguration.new)
98
+ @receiver = receiver
99
+ @port_name = port_name.to_sym
100
+ @port_key = port_key
101
+ end
102
+
103
+ def send_message(message)
104
+ @receiver.process_message(@receiver.send(@port_name), @port_key, self, message)
105
+ end
106
+ end
107
+ end
@@ -1,2 +1 @@
1
- # Load the core connection types
2
1
  require 'rflow/connections/zmq_connection'
@@ -1,12 +1,10 @@
1
1
  require 'em-zeromq'
2
-
3
2
  require 'rflow/connection'
4
3
  require 'rflow/message'
5
4
 
6
5
  class RFlow
7
6
  module Connections
8
7
  class ZMQConnection < RFlow::Connection
9
-
10
8
  class << self
11
9
  attr_accessor :zmq_context
12
10
 
@@ -18,8 +16,7 @@ class RFlow
18
16
  EM::ZeroMQ::Context.new(1)
19
17
  end
20
18
 
21
- # Returns the current ZeroMQ context object or creates it if
22
- # it does not exist.
19
+ # Returns the current ZeroMQ context object or creates it if it does not exist.
23
20
  def zmq_context
24
21
  @zmq_context ||= create_zmq_context
25
22
  end
@@ -27,63 +24,42 @@ class RFlow
27
24
 
28
25
  def zmq_context; self.class.zmq_context; end
29
26
 
27
+ private
30
28
  attr_accessor :input_socket, :output_socket
31
29
 
30
+ public
32
31
  def initialize(config)
33
32
  super
34
33
  validate_options!
35
- # Cause the ZMQ context to be created before the reactor is running
36
- zmq_context
37
- end
38
-
39
-
40
- def validate_options!
41
- # TODO: Normalize/validate configuration
42
- missing_options = []
43
-
44
- ['input', 'output'].each do |direction_prefix|
45
- ['_socket_type', '_address', '_responsibility'].each do |option_suffix|
46
- option_name = "#{direction_prefix}#{option_suffix}"
47
- unless options.include? option_name
48
- missing_options << option_name
49
- end
50
- end
51
- end
52
-
53
- unless missing_options.empty?
54
- raise ArgumentError, "#{self.class.to_s}: configuration missing options: #{missing_options.join ', '}"
55
- end
56
-
57
- true
34
+ zmq_context # cause the ZMQ context to be created before the reactor is running
58
35
  end
59
36
 
60
-
61
37
  def connect_input!
62
38
  RFlow.logger.debug "Connecting input #{uuid} with #{options.find_all {|k, v| k.to_s =~ /input/}}"
63
39
  self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type'].to_sym))
64
- input_socket.send(options['input_responsibility'].to_sym,
65
- options['input_address'])
40
+ input_socket.send(options['input_responsibility'].to_sym, options['input_address'])
66
41
 
67
42
  input_socket.on(:message) do |*message_parts|
68
- message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
69
- RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
70
- message_parts.each { |part| part.close } # avoid memory leaks
71
- recv_callback.call(message)
43
+ begin
44
+ message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
45
+ RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
46
+ message_parts.each(&:close) # avoid memory leaks
47
+ recv_callback.call(message)
48
+ rescue Exception => e
49
+ RFlow.logger.error "#{name}: Exception processing message of type '#{message.data_type_name}': #{e.message}, because: #{e.backtrace}"
50
+ end
72
51
  end
73
52
 
74
53
  input_socket
75
54
  end
76
55
 
77
-
78
56
  def connect_output!
79
57
  RFlow.logger.debug "Connecting output #{uuid} with #{options.find_all {|k, v| k.to_s =~ /output/}}"
80
58
  self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type'].to_sym))
81
- output_socket.send(options['output_responsibility'].to_sym,
82
- options['output_address'].to_s)
59
+ output_socket.send(options['output_responsibility'].to_sym, options['output_address'].to_s)
83
60
  output_socket
84
61
  end
85
62
 
86
-
87
63
  # TODO: fix this tight loop of retries
88
64
  def send_message(message)
89
65
  RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
@@ -97,6 +73,26 @@ class RFlow
97
73
  end
98
74
  end
99
75
 
76
+ private
77
+ def validate_options!
78
+ # TODO: Normalize/validate configuration
79
+ missing_options = []
80
+
81
+ ['input', 'output'].each do |direction_prefix|
82
+ ['_socket_type', '_address', '_responsibility'].each do |option_suffix|
83
+ option_name = "#{direction_prefix}#{option_suffix}"
84
+ unless options.include? option_name
85
+ missing_options << option_name
86
+ end
87
+ end
88
+ end
89
+
90
+ unless missing_options.empty?
91
+ raise ArgumentError, "#{self.class.to_s}: configuration missing options: #{missing_options.join ', '}"
92
+ end
93
+
94
+ true
95
+ end
100
96
  end
101
97
  end
102
98
  end
@@ -0,0 +1,155 @@
1
+ class RFlow
2
+ class DaemonProcess
3
+ def initialize(name, role = name)
4
+ @name = name
5
+ @role = role
6
+ end
7
+
8
+ def daemonize!
9
+ RFlow.logger.info "#{@name} daemonizing"
10
+ establish_daemon_pipe
11
+ drop_database_connections
12
+
13
+ parent = fork
14
+ if parent
15
+ exit_after_daemon_starts
16
+ else
17
+ daemonize_process
18
+ end
19
+ end
20
+
21
+ def run!
22
+ register_logging_context
23
+ update_process_name
24
+ handle_signals
25
+ spawn_subprocesses
26
+ signal_successful_start
27
+
28
+ RFlow.logger.info "#{@role} started"
29
+ run_process
30
+ ensure
31
+ unhandle_signals
32
+ end
33
+
34
+ def spawn_subprocesses; end
35
+ def subprocesses; []; end
36
+
37
+ def shutdown!(reason)
38
+ RFlow.logger.info "#{@name} shutting down due to #{reason}"
39
+ unhandle_signals
40
+ signal_subprocesses('QUIT')
41
+ RFlow.logger.info "#{@name} exiting"
42
+ end
43
+
44
+ private
45
+ def establish_daemon_pipe
46
+ @daemon_pipe_r, @daemon_pipe_w = IO.pipe
47
+ [@daemon_pipe_r, @daemon_pipe_w].each {|io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
48
+ end
49
+
50
+ # Holding database connections over the fork causes problems. Instead,
51
+ # let them be automatically restored after the fork.
52
+ def drop_database_connections
53
+ ::ActiveRecord::Base.clear_all_connections!
54
+ end
55
+
56
+ def exit_after_daemon_starts
57
+ @daemon_pipe_w.close
58
+
59
+ # Parent waits for a PID on the pipe indicating that the
60
+ # child successfully started.
61
+ child_pid = (@daemon_pipe_r.readpartial(16) rescue nil).to_i
62
+ @daemon_pipe_r.close
63
+ if child_pid > 1
64
+ RFlow.logger.info "#{@role} indicated successful daemonization"
65
+ exit 0
66
+ else
67
+ RFlow.logger.error "#{@role} failed to start"
68
+ exit! 1
69
+ end
70
+ end
71
+
72
+ def daemonize_process
73
+ @daemon_pipe_r.close
74
+ Process.daemon(true, true)
75
+ close_stdio_streams
76
+ end
77
+
78
+ def close_stdio_streams
79
+ $stdout.sync = $stderr.sync = true
80
+ [$stdin, $stdout, $stderr].each do |stream|
81
+ stream.binmode
82
+ begin; stream.reopen "/dev/null"; rescue ::Exception; end
83
+ end
84
+ end
85
+
86
+ def register_logging_context
87
+ # arrange for process's name to appear in log messages
88
+ Log4r::NDC.clear
89
+ Log4r::NDC.push @name
90
+ end
91
+
92
+ def clone_logging_context
93
+ Log4r::NDC.clone_stack
94
+ end
95
+
96
+ def apply_logging_context(context)
97
+ Log4r::NDC.inherit(context)
98
+ end
99
+
100
+ def update_process_name
101
+ # set the visible process name to match the process's name
102
+ $0 = @name
103
+ end
104
+
105
+ def handle_signals
106
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD'].each do |signal|
107
+ trap_signal(signal) do
108
+ shutdown! signal
109
+ exit! 0
110
+ end
111
+ end
112
+
113
+ trap_signal 'SIGUSR1' do
114
+ RFlow.logger.reopen
115
+ signal_subprocesses signal
116
+ end
117
+
118
+ trap_signal 'SIGUSR2' do
119
+ RFlow.logger.toggle_log_level
120
+ signal_subprocesses signal
121
+ end
122
+ end
123
+
124
+ def unhandle_signals
125
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD', 'SIGUSR1', 'SIGUSR2'].each do |signal|
126
+ Signal.trap signal, 'DEFAULT'
127
+ end
128
+ end
129
+
130
+ def trap_signal(signal)
131
+ # Log4r and traps don't mix, so we need to put it in another thread
132
+ context = clone_logging_context
133
+ Signal.trap signal do
134
+ Thread.new do
135
+ apply_logging_context context
136
+ yield
137
+ end.join
138
+ end
139
+ end
140
+
141
+ def signal_successful_start
142
+ if @daemon_pipe_w
143
+ @daemon_pipe_w.syswrite($$.to_s)
144
+ @daemon_pipe_w.close rescue nil
145
+ end
146
+ end
147
+
148
+ def signal_subprocesses(signal)
149
+ subprocesses.each do |p|
150
+ RFlow.logger.info "Signaling #{p.name} with #{signal}"
151
+ Process.kill(signal, p.pid)
152
+ end
153
+ end
154
+ end
155
+ end
data/lib/rflow/logger.rb CHANGED
@@ -9,37 +9,29 @@ class RFlow
9
9
  DATE_METHOD = 'xmlschema(6)'
10
10
  LOG_PATTERN_FORMATTER = PatternFormatter.new :pattern => LOG_PATTERN_FORMAT, :date_method => DATE_METHOD
11
11
 
12
- attr_accessor :config, :internal_logger
13
- attr_accessor :log_file_path, :log_level
12
+ private
13
+ attr_accessor :internal_logger
14
+ attr_accessor :log_file_path, :log_level, :log_name
15
+
16
+ public
17
+
18
+ # make sure Log4r is initialized; ignored if custom levels are already set
19
+ Log4r.define_levels(*Log4rConfig::LogLevels)
14
20
 
15
21
  # Delegate log methods to internal logger
16
- def_delegators :@internal_logger, :fatal, :error, :warn, :info, :debug
22
+ def_delegators :@internal_logger,
23
+ *Log4r::LNAMES.map(&:downcase).map(&:to_sym),
24
+ *Log4r::LNAMES.map(&:downcase).map {|n| "#{n}?".to_sym }
17
25
 
18
- def initialize(config, include_stdout=nil)
26
+ def initialize(config, include_stdout = false)
19
27
  @log_file_path = config['rflow.log_file_path']
20
28
  @log_level = config['rflow.log_level']
29
+ @log_name = (config['rflow.application_name'] || File.basename(log_file_path))
21
30
 
22
- log_name = (config['rflow.application_name'] rescue File.basename(log_file_path))
23
- @internal_logger = Log4r::Logger.new(log_name)
24
-
25
- internal_logger.level = LNAMES.index log_level
26
- internal_logger.trace = true
27
-
28
- begin
29
- internal_logger.add FileOutputter.new('rflow.log_file', :filename => log_file_path, :formatter => LOG_PATTERN_FORMATTER)
30
- rescue Exception => e
31
- error_message = "Log file '#{File.expand_path log_file_path}' problem: #{e.message}\b#{e.backtrace.join("\n")}"
32
- RFlow.logger.error error_message
33
- raise ArgumentError, error_message
34
- end
35
-
36
- if include_stdout
37
- internal_logger.add StdoutOutputter.new('rflow_stdout', :formatter => LOG_PATTERN_FORMATTER)
38
- end
39
-
40
- # RFlow.logger.info "Transitioning to running log file #{log_file_path} at level #{log_level}"
41
- Log4r::NDC.clear
42
- Log4r::NDC.push(log_name)
31
+ establish_internal_logger
32
+ hook_up_logfile
33
+ hook_up_stdout if include_stdout
34
+ register_logging_context
43
35
 
44
36
  internal_logger
45
37
  end
@@ -63,5 +55,29 @@ class RFlow
63
55
  internal_logger.level = LNAMES.index new_log_level
64
56
  end
65
57
 
58
+ private
59
+ def establish_internal_logger
60
+ @internal_logger = Log4r::Logger.new(log_name).tap do |logger|
61
+ logger.level = LNAMES.index log_level
62
+ logger.trace = true
63
+ end
64
+ end
65
+
66
+ def hook_up_logfile
67
+ begin
68
+ internal_logger.add FileOutputter.new('rflow.log_file', :filename => log_file_path, :formatter => LOG_PATTERN_FORMATTER)
69
+ rescue Exception => e
70
+ raise ArgumentError, "Log file '#{File.expand_path log_file_path}' problem: #{e.message}\b#{e.backtrace.join("\n")}"
71
+ end
72
+ end
73
+
74
+ def hook_up_stdout
75
+ internal_logger.add StdoutOutputter.new('rflow_stdout', :formatter => LOG_PATTERN_FORMATTER)
76
+ end
77
+
78
+ def register_logging_context
79
+ Log4r::NDC.clear
80
+ Log4r::NDC.push(log_name)
81
+ end
66
82
  end
67
83
  end