rflow 1.0.0a1 → 1.0.0a2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
@@ -3,8 +3,7 @@ require 'rflow/configuration/uuid_keyed'
3
3
 
4
4
  class RFlow
5
5
  class Configuration
6
-
7
- class Shard < ConfigDB
6
+ class Shard < ConfigurationItem
8
7
  include UUIDKeyed
9
8
  include ActiveModel::Validations
10
9
 
@@ -12,7 +11,6 @@ class RFlow
12
11
 
13
12
  has_many :components, :primary_key => 'uuid', :foreign_key => 'shard_uuid'
14
13
 
15
- validates_presence_of :name
16
14
  validates_uniqueness_of :name
17
15
  validates_numericality_of :count, :only_integer => true, :greater_than => 0
18
16
  end
@@ -2,7 +2,6 @@ require 'rflow/message'
2
2
 
3
3
  class RFlow
4
4
  class Connection
5
-
6
5
  class << self
7
6
  def build(config)
8
7
  case config.type
@@ -15,7 +14,9 @@ class RFlow
15
14
  end
16
15
 
17
16
  attr_accessor :config, :uuid, :name, :options
18
- attr_accessor :recv_callback
17
+ attr_writer :recv_callback
18
+ protected
19
+ attr_reader :recv_callback
19
20
 
20
21
  def initialize(config)
21
22
  @config = config
@@ -24,15 +25,13 @@ class RFlow
24
25
  @options = config.options
25
26
  end
26
27
 
27
-
28
28
  # Subclass and implement to be able to handle future 'recv'
29
29
  # methods. Will only be called in the context of a running
30
30
  # EventMachine reactor
31
31
  def connect_input!
32
- raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_output method."
32
+ raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_input method."
33
33
  end
34
34
 
35
-
36
35
  # Subclass and implement to be able to handle future 'send'
37
36
  # methods. Will only be called in the context of a running
38
37
  # EventMachine reactor
@@ -40,7 +39,6 @@ class RFlow
40
39
  raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
41
40
  end
42
41
 
43
-
44
42
  # Subclass and implement to handle outgoing messages. The message
45
43
  # will be a RFlow::Message object and the subclasses are expected
46
44
  # to marshal it up into something that will be unmarshalled on the
@@ -58,13 +56,52 @@ class RFlow
58
56
  def recv_callback
59
57
  @recv_callback ||= Proc.new {|message|}
60
58
  end
59
+ end
60
+
61
+ # Primarily for testing purposes. Captures whatever messages are sent on it.
62
+ class MessageCollectingConnection < Connection
63
+ attr_accessor :messages
64
+
65
+ def initialize
66
+ super(RFlow::Configuration::NullConfiguration.new)
67
+ @messages = []
68
+ end
69
+
70
+ def send_message(message)
71
+ @messages << message
72
+ end
73
+ end
61
74
 
62
- end # class Connection
75
+ # Manually shuffle messages in-process from one output port to another output
76
+ # port. Can be used to get a Facade pattern effect - to have one component
77
+ # contain other components within it, shuttling messages in and out without
78
+ # making the internal component visible to the larger RFlow network.
79
+ class ForwardToOutputPort < Connection
80
+ def initialize(receiver, port_name)
81
+ super(RFlow::Configuration::NullConfiguration.new)
82
+ @receiver = receiver
83
+ @port_name = port_name.to_sym
84
+ end
63
85
 
64
- class Disconnection < Connection
65
86
  def send_message(message)
66
- RFlow.logger.debug "Attempting to send without a connection, doing nothing"
87
+ @receiver.send(@port_name).send_message(message)
67
88
  end
68
89
  end
69
90
 
70
- end # class RFlow
91
+ # Manually shuffle messages in-process from one output port to another output
92
+ # port. Can be used to get a Facade pattern effect - to have one component
93
+ # contain other components within it, shuttling messages in and out without
94
+ # making the internal component visible to the larger RFlow network.
95
+ class ForwardToInputPort < Connection
96
+ def initialize(receiver, port_name, port_key)
97
+ super(RFlow::Configuration::NullConfiguration.new)
98
+ @receiver = receiver
99
+ @port_name = port_name.to_sym
100
+ @port_key = port_key
101
+ end
102
+
103
+ def send_message(message)
104
+ @receiver.process_message(@receiver.send(@port_name), @port_key, self, message)
105
+ end
106
+ end
107
+ end
@@ -1,2 +1 @@
1
- # Load the core connection types
2
1
  require 'rflow/connections/zmq_connection'
@@ -1,12 +1,10 @@
1
1
  require 'em-zeromq'
2
-
3
2
  require 'rflow/connection'
4
3
  require 'rflow/message'
5
4
 
6
5
  class RFlow
7
6
  module Connections
8
7
  class ZMQConnection < RFlow::Connection
9
-
10
8
  class << self
11
9
  attr_accessor :zmq_context
12
10
 
@@ -18,8 +16,7 @@ class RFlow
18
16
  EM::ZeroMQ::Context.new(1)
19
17
  end
20
18
 
21
- # Returns the current ZeroMQ context object or creates it if
22
- # it does not exist.
19
+ # Returns the current ZeroMQ context object or creates it if it does not exist.
23
20
  def zmq_context
24
21
  @zmq_context ||= create_zmq_context
25
22
  end
@@ -27,63 +24,42 @@ class RFlow
27
24
 
28
25
  def zmq_context; self.class.zmq_context; end
29
26
 
27
+ private
30
28
  attr_accessor :input_socket, :output_socket
31
29
 
30
+ public
32
31
  def initialize(config)
33
32
  super
34
33
  validate_options!
35
- # Cause the ZMQ context to be created before the reactor is running
36
- zmq_context
37
- end
38
-
39
-
40
- def validate_options!
41
- # TODO: Normalize/validate configuration
42
- missing_options = []
43
-
44
- ['input', 'output'].each do |direction_prefix|
45
- ['_socket_type', '_address', '_responsibility'].each do |option_suffix|
46
- option_name = "#{direction_prefix}#{option_suffix}"
47
- unless options.include? option_name
48
- missing_options << option_name
49
- end
50
- end
51
- end
52
-
53
- unless missing_options.empty?
54
- raise ArgumentError, "#{self.class.to_s}: configuration missing options: #{missing_options.join ', '}"
55
- end
56
-
57
- true
34
+ zmq_context # cause the ZMQ context to be created before the reactor is running
58
35
  end
59
36
 
60
-
61
37
  def connect_input!
62
38
  RFlow.logger.debug "Connecting input #{uuid} with #{options.find_all {|k, v| k.to_s =~ /input/}}"
63
39
  self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type'].to_sym))
64
- input_socket.send(options['input_responsibility'].to_sym,
65
- options['input_address'])
40
+ input_socket.send(options['input_responsibility'].to_sym, options['input_address'])
66
41
 
67
42
  input_socket.on(:message) do |*message_parts|
68
- message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
69
- RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
70
- message_parts.each { |part| part.close } # avoid memory leaks
71
- recv_callback.call(message)
43
+ begin
44
+ message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
45
+ RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
46
+ message_parts.each(&:close) # avoid memory leaks
47
+ recv_callback.call(message)
48
+ rescue Exception => e
49
+ RFlow.logger.error "#{name}: Exception processing message of type '#{message.data_type_name}': #{e.message}, because: #{e.backtrace}"
50
+ end
72
51
  end
73
52
 
74
53
  input_socket
75
54
  end
76
55
 
77
-
78
56
  def connect_output!
79
57
  RFlow.logger.debug "Connecting output #{uuid} with #{options.find_all {|k, v| k.to_s =~ /output/}}"
80
58
  self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type'].to_sym))
81
- output_socket.send(options['output_responsibility'].to_sym,
82
- options['output_address'].to_s)
59
+ output_socket.send(options['output_responsibility'].to_sym, options['output_address'].to_s)
83
60
  output_socket
84
61
  end
85
62
 
86
-
87
63
  # TODO: fix this tight loop of retries
88
64
  def send_message(message)
89
65
  RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
@@ -97,6 +73,26 @@ class RFlow
97
73
  end
98
74
  end
99
75
 
76
+ private
77
+ def validate_options!
78
+ # TODO: Normalize/validate configuration
79
+ missing_options = []
80
+
81
+ ['input', 'output'].each do |direction_prefix|
82
+ ['_socket_type', '_address', '_responsibility'].each do |option_suffix|
83
+ option_name = "#{direction_prefix}#{option_suffix}"
84
+ unless options.include? option_name
85
+ missing_options << option_name
86
+ end
87
+ end
88
+ end
89
+
90
+ unless missing_options.empty?
91
+ raise ArgumentError, "#{self.class.to_s}: configuration missing options: #{missing_options.join ', '}"
92
+ end
93
+
94
+ true
95
+ end
100
96
  end
101
97
  end
102
98
  end
@@ -0,0 +1,155 @@
1
+ class RFlow
2
+ class DaemonProcess
3
+ def initialize(name, role = name)
4
+ @name = name
5
+ @role = role
6
+ end
7
+
8
+ def daemonize!
9
+ RFlow.logger.info "#{@name} daemonizing"
10
+ establish_daemon_pipe
11
+ drop_database_connections
12
+
13
+ parent = fork
14
+ if parent
15
+ exit_after_daemon_starts
16
+ else
17
+ daemonize_process
18
+ end
19
+ end
20
+
21
+ def run!
22
+ register_logging_context
23
+ update_process_name
24
+ handle_signals
25
+ spawn_subprocesses
26
+ signal_successful_start
27
+
28
+ RFlow.logger.info "#{@role} started"
29
+ run_process
30
+ ensure
31
+ unhandle_signals
32
+ end
33
+
34
+ def spawn_subprocesses; end
35
+ def subprocesses; []; end
36
+
37
+ def shutdown!(reason)
38
+ RFlow.logger.info "#{@name} shutting down due to #{reason}"
39
+ unhandle_signals
40
+ signal_subprocesses('QUIT')
41
+ RFlow.logger.info "#{@name} exiting"
42
+ end
43
+
44
+ private
45
+ def establish_daemon_pipe
46
+ @daemon_pipe_r, @daemon_pipe_w = IO.pipe
47
+ [@daemon_pipe_r, @daemon_pipe_w].each {|io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
48
+ end
49
+
50
+ # Holding database connections over the fork causes problems. Instead,
51
+ # let them be automatically restored after the fork.
52
+ def drop_database_connections
53
+ ::ActiveRecord::Base.clear_all_connections!
54
+ end
55
+
56
+ def exit_after_daemon_starts
57
+ @daemon_pipe_w.close
58
+
59
+ # Parent waits for a PID on the pipe indicating that the
60
+ # child successfully started.
61
+ child_pid = (@daemon_pipe_r.readpartial(16) rescue nil).to_i
62
+ @daemon_pipe_r.close
63
+ if child_pid > 1
64
+ RFlow.logger.info "#{@role} indicated successful daemonization"
65
+ exit 0
66
+ else
67
+ RFlow.logger.error "#{@role} failed to start"
68
+ exit! 1
69
+ end
70
+ end
71
+
72
+ def daemonize_process
73
+ @daemon_pipe_r.close
74
+ Process.daemon(true, true)
75
+ close_stdio_streams
76
+ end
77
+
78
+ def close_stdio_streams
79
+ $stdout.sync = $stderr.sync = true
80
+ [$stdin, $stdout, $stderr].each do |stream|
81
+ stream.binmode
82
+ begin; stream.reopen "/dev/null"; rescue ::Exception; end
83
+ end
84
+ end
85
+
86
+ def register_logging_context
87
+ # arrange for process's name to appear in log messages
88
+ Log4r::NDC.clear
89
+ Log4r::NDC.push @name
90
+ end
91
+
92
+ def clone_logging_context
93
+ Log4r::NDC.clone_stack
94
+ end
95
+
96
+ def apply_logging_context(context)
97
+ Log4r::NDC.inherit(context)
98
+ end
99
+
100
+ def update_process_name
101
+ # set the visible process name to match the process's name
102
+ $0 = @name
103
+ end
104
+
105
+ def handle_signals
106
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD'].each do |signal|
107
+ trap_signal(signal) do
108
+ shutdown! signal
109
+ exit! 0
110
+ end
111
+ end
112
+
113
+ trap_signal 'SIGUSR1' do
114
+ RFlow.logger.reopen
115
+ signal_subprocesses signal
116
+ end
117
+
118
+ trap_signal 'SIGUSR2' do
119
+ RFlow.logger.toggle_log_level
120
+ signal_subprocesses signal
121
+ end
122
+ end
123
+
124
+ def unhandle_signals
125
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD', 'SIGUSR1', 'SIGUSR2'].each do |signal|
126
+ Signal.trap signal, 'DEFAULT'
127
+ end
128
+ end
129
+
130
+ def trap_signal(signal)
131
+ # Log4r and traps don't mix, so we need to put it in another thread
132
+ context = clone_logging_context
133
+ Signal.trap signal do
134
+ Thread.new do
135
+ apply_logging_context context
136
+ yield
137
+ end.join
138
+ end
139
+ end
140
+
141
+ def signal_successful_start
142
+ if @daemon_pipe_w
143
+ @daemon_pipe_w.syswrite($$.to_s)
144
+ @daemon_pipe_w.close rescue nil
145
+ end
146
+ end
147
+
148
+ def signal_subprocesses(signal)
149
+ subprocesses.each do |p|
150
+ RFlow.logger.info "Signaling #{p.name} with #{signal}"
151
+ Process.kill(signal, p.pid)
152
+ end
153
+ end
154
+ end
155
+ end
data/lib/rflow/logger.rb CHANGED
@@ -9,37 +9,29 @@ class RFlow
9
9
  DATE_METHOD = 'xmlschema(6)'
10
10
  LOG_PATTERN_FORMATTER = PatternFormatter.new :pattern => LOG_PATTERN_FORMAT, :date_method => DATE_METHOD
11
11
 
12
- attr_accessor :config, :internal_logger
13
- attr_accessor :log_file_path, :log_level
12
+ private
13
+ attr_accessor :internal_logger
14
+ attr_accessor :log_file_path, :log_level, :log_name
15
+
16
+ public
17
+
18
+ # make sure Log4r is initialized; ignored if custom levels are already set
19
+ Log4r.define_levels(*Log4rConfig::LogLevels)
14
20
 
15
21
  # Delegate log methods to internal logger
16
- def_delegators :@internal_logger, :fatal, :error, :warn, :info, :debug
22
+ def_delegators :@internal_logger,
23
+ *Log4r::LNAMES.map(&:downcase).map(&:to_sym),
24
+ *Log4r::LNAMES.map(&:downcase).map {|n| "#{n}?".to_sym }
17
25
 
18
- def initialize(config, include_stdout=nil)
26
+ def initialize(config, include_stdout = false)
19
27
  @log_file_path = config['rflow.log_file_path']
20
28
  @log_level = config['rflow.log_level']
29
+ @log_name = (config['rflow.application_name'] || File.basename(log_file_path))
21
30
 
22
- log_name = (config['rflow.application_name'] rescue File.basename(log_file_path))
23
- @internal_logger = Log4r::Logger.new(log_name)
24
-
25
- internal_logger.level = LNAMES.index log_level
26
- internal_logger.trace = true
27
-
28
- begin
29
- internal_logger.add FileOutputter.new('rflow.log_file', :filename => log_file_path, :formatter => LOG_PATTERN_FORMATTER)
30
- rescue Exception => e
31
- error_message = "Log file '#{File.expand_path log_file_path}' problem: #{e.message}\b#{e.backtrace.join("\n")}"
32
- RFlow.logger.error error_message
33
- raise ArgumentError, error_message
34
- end
35
-
36
- if include_stdout
37
- internal_logger.add StdoutOutputter.new('rflow_stdout', :formatter => LOG_PATTERN_FORMATTER)
38
- end
39
-
40
- # RFlow.logger.info "Transitioning to running log file #{log_file_path} at level #{log_level}"
41
- Log4r::NDC.clear
42
- Log4r::NDC.push(log_name)
31
+ establish_internal_logger
32
+ hook_up_logfile
33
+ hook_up_stdout if include_stdout
34
+ register_logging_context
43
35
 
44
36
  internal_logger
45
37
  end
@@ -63,5 +55,29 @@ class RFlow
63
55
  internal_logger.level = LNAMES.index new_log_level
64
56
  end
65
57
 
58
+ private
59
+ def establish_internal_logger
60
+ @internal_logger = Log4r::Logger.new(log_name).tap do |logger|
61
+ logger.level = LNAMES.index log_level
62
+ logger.trace = true
63
+ end
64
+ end
65
+
66
+ def hook_up_logfile
67
+ begin
68
+ internal_logger.add FileOutputter.new('rflow.log_file', :filename => log_file_path, :formatter => LOG_PATTERN_FORMATTER)
69
+ rescue Exception => e
70
+ raise ArgumentError, "Log file '#{File.expand_path log_file_path}' problem: #{e.message}\b#{e.backtrace.join("\n")}"
71
+ end
72
+ end
73
+
74
+ def hook_up_stdout
75
+ internal_logger.add StdoutOutputter.new('rflow_stdout', :formatter => LOG_PATTERN_FORMATTER)
76
+ end
77
+
78
+ def register_logging_context
79
+ Log4r::NDC.clear
80
+ Log4r::NDC.push(log_name)
81
+ end
66
82
  end
67
83
  end