rflow 0.0.5 → 1.0.0a1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
@@ -5,12 +5,12 @@ class RFlow
5
5
  module UUIDKeyed
6
6
  def self.included(base)
7
7
  base.class_eval do
8
- set_primary_key 'uuid'
8
+ self.primary_key = 'uuid'
9
9
  before_create :generate_uuid
10
-
10
+
11
11
  def generate_uuid
12
12
  self.uuid = UUIDTools::UUID.random_create.to_s
13
- end
13
+ end
14
14
  end
15
15
  end
16
16
  end
@@ -2,15 +2,26 @@ require 'rflow/message'
2
2
 
3
3
  class RFlow
4
4
  class Connection
5
- attr_accessor :instance_uuid, :name, :configuration, :recv_callback
6
5
 
7
- # Attribute that holds the
6
+ class << self
7
+ def build(config)
8
+ case config.type
9
+ when 'RFlow::Configuration::ZMQConnection'
10
+ RFlow::Connections::ZMQConnection.new(config)
11
+ else
12
+ raise ArgumentError, "Only ZMQConnections currently supported"
13
+ end
14
+ end
15
+ end
16
+
17
+ attr_accessor :config, :uuid, :name, :options
8
18
  attr_accessor :recv_callback
9
-
10
- def initialize(connection_instance_uuid, connection_name=nil, connection_configuration={})
11
- @instance_uuid = connection_instance_uuid
12
- @name = connection_name
13
- @configuration = connection_configuration
19
+
20
+ def initialize(config)
21
+ @config = config
22
+ @uuid = config.uuid
23
+ @name = config.name
24
+ @options = config.options
14
25
  end
15
26
 
16
27
 
@@ -29,7 +40,7 @@ class RFlow
29
40
  raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
30
41
  end
31
42
 
32
-
43
+
33
44
  # Subclass and implement to handle outgoing messages. The message
34
45
  # will be a RFlow::Message object and the subclasses are expected
35
46
  # to marshal it up into something that will be unmarshalled on the
@@ -47,7 +58,7 @@ class RFlow
47
58
  def recv_callback
48
59
  @recv_callback ||= Proc.new {|message|}
49
60
  end
50
-
61
+
51
62
  end # class Connection
52
63
 
53
64
  class Disconnection < Connection
@@ -55,5 +66,5 @@ class RFlow
55
66
  RFlow.logger.debug "Attempting to send without a connection, doing nothing"
56
67
  end
57
68
  end
58
-
69
+
59
70
  end # class RFlow
@@ -1,6 +1,4 @@
1
- #require 'ffi'
2
- #require 'ffi-rzmq'
3
- require 'em-zeromq-mri'
1
+ require 'em-zeromq'
4
2
 
5
3
  require 'rflow/connection'
6
4
  require 'rflow/message'
@@ -14,88 +12,91 @@ class RFlow
14
12
 
15
13
  def create_zmq_context
16
14
  RFlow.logger.debug "Creating a new ZeroMQ context"
17
- unless EM.reactor_running?
18
- raise RuntimeError, "EventMachine reactor is not running when attempting to create a ZeroMQ context"
15
+ if EM.reactor_running?
16
+ raise RuntimeError, "EventMachine reactor is running when attempting to create a ZeroMQ context"
19
17
  end
20
18
  EM::ZeroMQ::Context.new(1)
21
19
  end
22
-
20
+
23
21
  # Returns the current ZeroMQ context object or creates it if
24
- # it does not exist. Assumes that we are within a running
25
- # EventMachine reactor
22
+ # it does not exist.
26
23
  def zmq_context
27
24
  @zmq_context ||= create_zmq_context
28
25
  end
29
26
  end
30
27
 
31
- attr_accessor :socket
28
+ def zmq_context; self.class.zmq_context; end
29
+
30
+ attr_accessor :input_socket, :output_socket
32
31
 
33
- REQUIRED_OPTION_SUFFIXES = ['_socket_type', '_address', '_responsibility']
32
+ def initialize(config)
33
+ super
34
+ validate_options!
35
+ # Cause the ZMQ context to be created before the reactor is running
36
+ zmq_context
37
+ end
34
38
 
35
- def self.configuration_errors(configuration)
39
+
40
+ def validate_options!
36
41
  # TODO: Normalize/validate configuration
37
- missing_config_elements = []
42
+ missing_options = []
38
43
 
39
44
  ['input', 'output'].each do |direction_prefix|
40
- REQUIRED_OPTION_SUFFIXES.each do |option_suffix|
41
- config_element = "#{direction_prefix}#{option_suffix}"
42
- unless configuration.include? config_element
43
- missing_config_elements << config_element
45
+ ['_socket_type', '_address', '_responsibility'].each do |option_suffix|
46
+ option_name = "#{direction_prefix}#{option_suffix}"
47
+ unless options.include? option_name
48
+ missing_options << option_name
44
49
  end
45
50
  end
46
51
  end
47
52
 
48
- missing_config_elements
49
- end
50
-
51
-
52
- def initialize(connection_instance_uuid, connection_name, connection_configuration)
53
- configuration_errors = self.class.configuration_errors(connection_configuration)
54
- unless configuration_errors.empty?
55
- raise ArgumentError, "#{self.class.to_s}: configuration missing elements: #{configuration_errors.join ', '}"
53
+ unless missing_options.empty?
54
+ raise ArgumentError, "#{self.class.to_s}: configuration missing options: #{missing_options.join ', '}"
56
55
  end
57
56
 
58
- super
57
+ true
59
58
  end
60
59
 
61
-
60
+
62
61
  def connect_input!
63
- RFlow.logger.debug "Connecting input #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /input/}}"
64
- self.socket = self.class.zmq_context.send(configuration['input_responsibility'],
65
- ZMQ.const_get(configuration['input_socket_type'].to_sym),
66
- configuration['input_address'],
67
- self)
62
+ RFlow.logger.debug "Connecting input #{uuid} with #{options.find_all {|k, v| k.to_s =~ /input/}}"
63
+ self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type'].to_sym))
64
+ input_socket.send(options['input_responsibility'].to_sym,
65
+ options['input_address'])
66
+
67
+ input_socket.on(:message) do |*message_parts|
68
+ message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
69
+ RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
70
+ message_parts.each { |part| part.close } # avoid memory leaks
71
+ recv_callback.call(message)
72
+ end
73
+
74
+ input_socket
68
75
  end
69
76
 
70
77
 
71
78
  def connect_output!
72
- RFlow.logger.debug "Connecting output #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /output/}}"
73
- self.socket = self.class.zmq_context.send(configuration['output_responsibility'].to_s,
74
- ZMQ.const_get(configuration['output_socket_type'].to_sym),
75
- configuration['output_address'].to_s,
76
- self)
79
+ RFlow.logger.debug "Connecting output #{uuid} with #{options.find_all {|k, v| k.to_s =~ /output/}}"
80
+ self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type'].to_sym))
81
+ output_socket.send(options['output_responsibility'].to_sym,
82
+ options['output_address'].to_s)
83
+ output_socket
77
84
  end
78
85
 
79
86
 
80
- def on_readable(socket, message_parts)
81
- message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
82
- RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
83
- recv_callback.call(message)
84
- end
85
-
86
87
  # TODO: fix this tight loop of retries
87
88
  def send_message(message)
88
89
  RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
89
90
 
90
91
  begin
91
- socket.send_msg(message.data_type_name.to_s, message.to_avro)
92
+ output_socket.send_msg(message.data_type_name.to_s, message.to_avro)
92
93
  RFlow.logger.debug "#{name}: Successfully sent message of type '#{message.data_type_name.to_s}'"
93
94
  rescue Exception => e
94
95
  RFlow.logger.debug "Exception #{e.class}: #{e.message}, retrying send"
95
96
  retry
96
97
  end
97
98
  end
98
-
99
+
99
100
  end
100
101
  end
101
102
  end
@@ -0,0 +1,67 @@
1
+ require 'log4r'
2
+
3
+ class RFlow
4
+ class Logger
5
+ extend Forwardable
6
+ include Log4r
7
+
8
+ LOG_PATTERN_FORMAT = '%l [%d] %x (%p) - %M'
9
+ DATE_METHOD = 'xmlschema(6)'
10
+ LOG_PATTERN_FORMATTER = PatternFormatter.new :pattern => LOG_PATTERN_FORMAT, :date_method => DATE_METHOD
11
+
12
+ attr_accessor :config, :internal_logger
13
+ attr_accessor :log_file_path, :log_level
14
+
15
+ # Delegate log methods to internal logger
16
+ def_delegators :@internal_logger, :fatal, :error, :warn, :info, :debug
17
+
18
+ def initialize(config, include_stdout=nil)
19
+ @log_file_path = config['rflow.log_file_path']
20
+ @log_level = config['rflow.log_level']
21
+
22
+ log_name = (config['rflow.application_name'] rescue File.basename(log_file_path))
23
+ @internal_logger = Log4r::Logger.new(log_name)
24
+
25
+ internal_logger.level = LNAMES.index log_level
26
+ internal_logger.trace = true
27
+
28
+ begin
29
+ internal_logger.add FileOutputter.new('rflow.log_file', :filename => log_file_path, :formatter => LOG_PATTERN_FORMATTER)
30
+ rescue Exception => e
31
+ error_message = "Log file '#{File.expand_path log_file_path}' problem: #{e.message}\b#{e.backtrace.join("\n")}"
32
+ RFlow.logger.error error_message
33
+ raise ArgumentError, error_message
34
+ end
35
+
36
+ if include_stdout
37
+ internal_logger.add StdoutOutputter.new('rflow_stdout', :formatter => LOG_PATTERN_FORMATTER)
38
+ end
39
+
40
+ # RFlow.logger.info "Transitioning to running log file #{log_file_path} at level #{log_level}"
41
+ Log4r::NDC.clear
42
+ Log4r::NDC.push(log_name)
43
+
44
+ internal_logger
45
+ end
46
+
47
+ def reopen
48
+ # TODO: Make this less of a hack, although Log4r doesn't support
49
+ # it, so it might be permanent
50
+ log_file = Outputter['rflow.log_file'].instance_variable_get(:@out)
51
+ File.open(log_file.path, 'a') { |tmp_log_file| log_file.reopen(tmp_log_file) }
52
+ end
53
+
54
+ def close
55
+ Outputter['rflow.log_file'].close
56
+ end
57
+
58
+ def toggle_log_level
59
+ original_log_level = LNAMES[logger.level]
60
+ new_log_level = (original_log_level == 'DEBUG' ? log_level : 'DEBUG')
61
+
62
+ internal_logger.warn "Changing log level from #{original_log_level} to #{new_log_level}"
63
+ internal_logger.level = LNAMES.index new_log_level
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,127 @@
1
+ require 'rflow/pid_file'
2
+ require 'rflow/shard'
3
+
4
+ class RFlow
5
+ class Master
6
+
7
+ attr_accessor :name, :pid_file, :ready_write
8
+ attr_accessor :shards
9
+
10
+ def initialize(config)
11
+ @name = config['rflow.application_name']
12
+ @pid_file = PIDFile.new(config['rflow.pid_file_path'])
13
+ @shards = config.shards.map do |shard_config|
14
+ Shard.new(shard_config)
15
+ end
16
+ end
17
+
18
+ def handle_signals
19
+ # Gracefully shutdown on termination signals
20
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD'].each do |signal|
21
+ Signal.trap signal do
22
+ # Log4r and traps don't mix, so we need to put it in another thread
23
+ Thread.new { shutdown(signal) }.join
24
+ end
25
+ end
26
+
27
+ # Reopen logs on USR1
28
+ ['SIGUSR1'].each do |signal|
29
+ Signal.trap signal do
30
+ Thread.new do
31
+ RFlow.logger.reopen
32
+ signal_workers(signal)
33
+ end.join
34
+ end
35
+ end
36
+
37
+ # Toggle log level on USR2
38
+ ['SIGUSR2'].each do |signal|
39
+ Signal.trap signal do
40
+ Thread.new do
41
+ RFlow.logger.toggle_log_level
42
+ signal_workers(signal)
43
+ end.join
44
+ end
45
+ end
46
+ end
47
+
48
+ def run
49
+ Log4r::NDC.clear
50
+ Log4r::NDC.push name
51
+ $0 = name
52
+
53
+ shards.each {|s| s.run!}
54
+
55
+ handle_signals
56
+
57
+ # Signal the grandparent that we are running
58
+ if ready_write
59
+ ready_write.syswrite($$.to_s)
60
+ ready_write.close rescue nil
61
+ end
62
+
63
+ pid_file.write
64
+
65
+ RFlow.logger.info "Master started"
66
+
67
+ EM.run do
68
+ # TODO: Monitor the workers
69
+ end
70
+
71
+ @pid_file.safe_unlink
72
+ end
73
+
74
+ def daemonize!
75
+ RFlow.logger.info "#{name} daemonizing"
76
+
77
+ ready_read, @ready_write = IO.pipe
78
+ [ready_read, @ready_write].each { |io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
79
+
80
+ grandparent = $$
81
+
82
+ if fork
83
+ # Grandparent waits for a PID on the pipe indicating that the
84
+ # master successfully started.
85
+ @ready_write.close # grandparent does not write
86
+ master_pid = (ready_read.readpartial(16) rescue nil).to_i
87
+ unless master_pid > 1
88
+ RFlow.logger.error "Master failed to start"
89
+ exit! 1
90
+ end
91
+ RFlow.logger.info "Master indicated successful daemonization"
92
+ exit 0
93
+ end
94
+
95
+ Process.daemon(true, true)
96
+
97
+ ready_read.close # master does not read
98
+
99
+ # Close standard IO
100
+ $stdout.sync = $stderr.sync = true
101
+ $stdin.binmode; $stdout.binmode; $stderr.binmode
102
+ begin; $stdin.reopen "/dev/null"; rescue ::Exception; end
103
+ begin; $stdout.reopen "/dev/null"; rescue ::Exception; end
104
+ begin; $stderr.reopen "/dev/null"; rescue ::Exception; end
105
+
106
+ $$
107
+ end
108
+
109
+ def signal_workers(signal)
110
+ shards.each do |shard|
111
+ shard.workers.each do |worker|
112
+ RFlow.logger.info "Signalling #{worker.name} with #{signal}"
113
+ Process.kill(signal, worker.pid)
114
+ end
115
+ end
116
+ end
117
+
118
+ def shutdown(reason)
119
+ RFlow.logger.info "#{name} shutting down due to #{reason}"
120
+ signal_workers('QUIT')
121
+ pid_file.safe_unlink
122
+ RFlow.logger.info "#{name} exiting"
123
+ exit 0
124
+ end
125
+
126
+ end
127
+ end
@@ -13,7 +13,7 @@ class RFlow
13
13
 
14
14
  class << self
15
15
  def avro_message_schema; @avro_message_schema ||= Avro::Schema.parse(File.read(File.join(File.dirname(__FILE__), '..', '..', 'schema', 'message.avsc'))); end
16
-
16
+
17
17
  def avro_reader; @avro_reader ||= Avro::IO::DatumReader.new(avro_message_schema, avro_message_schema); end
18
18
  def avro_writer; @avro_writer ||= Avro::IO::DatumWriter.new(avro_message_schema); end
19
19
  def avro_decoder(io_object); Avro::IO::BinaryDecoder.new(io_object); end
@@ -29,8 +29,8 @@ class RFlow
29
29
  message_hash['data'])
30
30
  end
31
31
  end
32
-
33
-
32
+
33
+
34
34
  # Serialize the current message object to Avro using the
35
35
  # org.rflow.Message Avro schema. Note that we have to manually
36
36
  # set the encoding for Ruby 1.9, otherwise the stringio would use
@@ -52,14 +52,14 @@ class RFlow
52
52
  self.class.avro_writer.write deserialized_avro_object, self.class.avro_encoder(avro_serialized_message_bytes_stringio)
53
53
  avro_serialized_message_bytes
54
54
  end
55
-
55
+
56
56
 
57
57
  attr_reader :data_type_name
58
58
  attr_accessor :processing_event
59
59
  attr_accessor :provenance
60
60
  attr_reader :data, :data_extensions
61
61
 
62
-
62
+
63
63
  def initialize(data_type_name, provenance=[], data_serialization_type='avro', data_schema_string=nil, serialized_data_object=nil)
64
64
  # Default the values, in case someone puts in a nil instead of
65
65
  # the default
@@ -79,7 +79,7 @@ class RFlow
79
79
  processing_event_hash_or_object['context'])
80
80
  end
81
81
  end
82
-
82
+
83
83
  # TODO: Make this better. This check is technically
84
84
  # unnecessary, as we are able to completely desrialize the
85
85
  # message without needing to resort to the registered schema.
@@ -97,7 +97,7 @@ class RFlow
97
97
  RFlow.logger.error error_message
98
98
  raise ArgumentError, error_message
99
99
  end
100
-
100
+
101
101
  @data = Data.new(registered_data_schema_string, data_serialization_type.to_s, serialized_data_object)
102
102
 
103
103
  # Get the extensions and apply them to the data object to add capability
@@ -115,16 +115,16 @@ class RFlow
115
115
  def initialize(component_instance_uuid, started_at=nil, completed_at=nil, context=nil)
116
116
  @component_instance_uuid = component_instance_uuid
117
117
  @started_at = case started_at
118
- when String then Time.xmlschema(started_at)
118
+ when String then Time.xmlschema(started_at)
119
119
  when Time then started_at
120
120
  else; nil; end
121
121
  @completed_at = case completed_at
122
- when String then Time.xmlschema(completed_at)
122
+ when String then Time.xmlschema(completed_at)
123
123
  when Time then completed_at
124
124
  else; nil; end
125
125
  @context = context
126
126
  end
127
-
127
+
128
128
  def to_hash
129
129
  {
130
130
  'component_instance_uuid' => component_instance_uuid.to_s,
@@ -134,7 +134,7 @@ class RFlow
134
134
  }
135
135
  end
136
136
  end
137
-
137
+
138
138
  # Should proxy most methods to data_object that we can serialize
139
139
  # to avro using the schema. Extensions should use 'extended' hook
140
140
  # to apply immediate changes.
@@ -159,7 +159,7 @@ class RFlow
159
159
  RFlow.logger.error error_message
160
160
  raise ArgumentError, error_message
161
161
  end
162
-
162
+
163
163
  if serialized_data_object
164
164
  serialized_data_object.force_encoding 'BINARY'
165
165
  avro_decoder = Avro::IO::BinaryDecoder.new StringIO.new(serialized_data_object)
@@ -170,7 +170,7 @@ class RFlow
170
170
  def valid?
171
171
  Avro::Schema.validate @schema, @data_object
172
172
  end
173
-
173
+
174
174
  def to_avro
175
175
  serialized_data_object_bytes = ''
176
176
  serialized_data_object_bytes.force_encoding 'BINARY'
@@ -186,6 +186,6 @@ class RFlow
186
186
  @data_object.send(method_sym, *args, &block)
187
187
  end
188
188
  end
189
-
189
+
190
190
  end
191
191
  end