rflow 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,10 @@
1
1
  require 'rflow/pid_file'
2
2
 
3
3
  class RFlow
4
+ # Encapsulates a master process being managed by RFlow that can run in the foreground
5
+ # or daemonize.
4
6
  class DaemonProcess
7
+ # Symbolic constant for SIGINFO as this is only defined on BSD and not in Ruby.
5
8
  SIGINFO = 29
6
9
 
7
10
  def initialize(name, role = name, options = {})
@@ -10,6 +13,9 @@ class RFlow
10
13
  @pid_file = PIDFile.new(options[:pid_file_path]) if options[:pid_file_path]
11
14
  end
12
15
 
16
+ # Daemonize by forking and exiting the parent after handling
17
+ # IO streams and checking successful start of the new copy.
18
+ # @return [void]
13
19
  def daemonize!
14
20
  RFlow.logger.info "#{@name} daemonizing"
15
21
  establish_daemon_pipe
@@ -23,6 +29,11 @@ class RFlow
23
29
  end
24
30
  end
25
31
 
32
+ # Execute the master process. Writes out a pidfile and updates the process
33
+ # name, installs signal handlers, and spawns all the defined subprocesses.
34
+ # Finally executes {run_process}; when that returns, it will
35
+ # exit with the resulting return code.
36
+ # @return [void]
26
37
  def run!
27
38
  write_pid_file
28
39
  register_logging_context
@@ -38,9 +49,23 @@ class RFlow
38
49
  remove_pid_file
39
50
  end
40
51
 
52
+ # Default implementation. Subclasses should override to provide logic
53
+ # for actually spawning subprocesses.
54
+ # @return [void]
41
55
  def spawn_subprocesses; end
56
+
57
+ # Default implementation. Subclasses should override to provide logic
58
+ # for actually doing something useful.
59
+ # @return [void]
60
+ def run_process; end
61
+
62
+ # A list of {ChildProcess}es to start and signal.
63
+ # @return [Array<ChildProcess>]
42
64
  def subprocesses; []; end
43
65
 
66
+ # Shut down the application. Cleans up the pid file, removes
67
+ # signal handlers, and signals all child processes with +SIGQUIT+.
68
+ # @return [void]
44
69
  def shutdown!(reason)
45
70
  RFlow.logger.info "#{@name} shutting down due to #{reason}"
46
71
  remove_pid_file
@@ -1,12 +1,16 @@
1
1
  require 'log4r'
2
2
 
3
3
  class RFlow
4
+ # The customized logger for RFlow applications that flows to the configured log file.
4
5
  class Logger
5
6
  extend Forwardable
6
7
  include Log4r
7
8
 
9
+ # @!visibility private
8
10
  LOG_PATTERN_FORMAT = '%-5l [%d] %x (%-5p) - %M'
11
+ # @!visibility private
9
12
  DATE_METHOD = 'xmlschema(6)'
13
+ # @!visibility private
10
14
  LOG_PATTERN_FORMATTER = PatternFormatter.new :pattern => LOG_PATTERN_FORMAT, :date_method => DATE_METHOD
11
15
 
12
16
  private
@@ -14,6 +18,8 @@ class RFlow
14
18
  attr_accessor :log_file_path, :log_level, :log_name
15
19
 
16
20
  public
21
+ # For the current logging context, how wide the field is where we're going to write the context/process name.
22
+ # @return [Integer]
17
23
  attr_accessor :context_width
18
24
 
19
25
  # make sure Log4r is initialized; ignored if custom levels are already set
@@ -28,6 +34,8 @@ class RFlow
28
34
  reconfigure(config, include_stdout)
29
35
  end
30
36
 
37
+ # Reconfigure the log file.
38
+ # @return [void]
31
39
  def reconfigure(config, include_stdout = false)
32
40
  @log_file_path = config['rflow.log_file_path']
33
41
  @log_level = config['rflow.log_level'] || 'WARN'
@@ -43,6 +51,9 @@ class RFlow
43
51
  internal_logger
44
52
  end
45
53
 
54
+ # Reopen the logs at their configured filesystem locations. Presumably the previous
55
+ # log files have been renamed by now.
56
+ # @return [void]
46
57
  def reopen
47
58
  # TODO: Make this less of a hack, although Log4r doesn't support
48
59
  # it, so it might be permanent
@@ -50,14 +61,21 @@ class RFlow
50
61
  File.open(log_file.path, 'a') { |tmp_log_file| log_file.reopen(tmp_log_file) }
51
62
  end
52
63
 
64
+ # Close the logger.
65
+ # @return [void]
53
66
  def close
54
67
  Outputter['rflow.log_file'].close
55
68
  end
56
69
 
70
+ # Update the log level.
71
+ # @return [void]
57
72
  def level=(level)
58
73
  internal_logger.level = LNAMES.index(level.to_s) || level
59
74
  end
60
75
 
76
+ # Toggle the log level between +DEBUG+ and whatever the default is. The previous
77
+ # level is saved to be toggled back the next time this method is called.
78
+ # @return [void]
61
79
  def toggle_log_level
62
80
  original_log_level = LNAMES[internal_logger.level]
63
81
  new_log_level = (original_log_level == 'DEBUG' ? log_level : 'DEBUG')
@@ -66,6 +84,8 @@ class RFlow
66
84
  internal_logger.level = LNAMES.index new_log_level
67
85
  end
68
86
 
87
+ # Send a complete thread dump of the current process out to the logger.
88
+ # @return [void]
69
89
  def dump_threads
70
90
  Thread.list.each do |t|
71
91
  info "Thread #{t.inspect}:"
@@ -75,18 +95,27 @@ class RFlow
75
95
  info 'Thread dump complete.'
76
96
  end
77
97
 
98
+ # Clone the logging context so changes to it will not affect the
99
+ # exiting logging context.
100
+ # @return [void]
78
101
  def clone_logging_context
79
102
  Log4r::NDC.clone_stack
80
103
  end
81
104
 
105
+ # Replace the current logging context.
106
+ # @return [void]
82
107
  def apply_logging_context(context)
83
108
  Log4r::NDC.inherit(context)
84
109
  end
85
110
 
111
+ # Clear the current logging context.
112
+ # @return [void]
86
113
  def clear_logging_context
87
114
  Log4r::NDC.clear
88
115
  end
89
116
 
117
+ # Add more logging context to the stack.
118
+ # @return [void]
90
119
  def add_logging_context(context)
91
120
  Log4r::NDC.push context
92
121
  end
@@ -3,8 +3,15 @@ require 'rflow/shard'
3
3
  require 'rflow/broker'
4
4
 
5
5
  class RFlow
6
+ # The master/watchdog process for RFlow. Mostly exists to receive +SIGCHLD+ from subprocesses
7
+ # so it can kill them all with +SIGQUIT+ and get restarted.
6
8
  class Master < DaemonProcess
9
+ # The {Shard}s being managed by the {Master}.
10
+ # @return [Array<Shard>]
7
11
  attr_reader :shards
12
+
13
+ # The {Broker}s being managed by the {Master}.
14
+ # @return [Array<Broker>]
8
15
  attr_reader :brokers
9
16
 
10
17
  def initialize(config)
@@ -14,6 +21,9 @@ class RFlow
14
21
  @brokers = config.connections.flat_map(&:brokers).map {|config| Broker.build(config) }
15
22
  end
16
23
 
24
+ # Override of {spawn_subprocesses} that actually spawns them,
25
+ # then calls {Shard#run!} on each.
26
+ # @return [void]
17
27
  def spawn_subprocesses
18
28
  RFlow.logger.debug "Running #{brokers.count} brokers" if brokers.count > 0
19
29
  brokers.each(&:spawn!)
@@ -22,10 +32,15 @@ class RFlow
22
32
  shards.each(&:run!)
23
33
  end
24
34
 
35
+ # Override of {subprocesses} that includes the {Broker}s and
36
+ # every {Shard::Worker} of every {Shard}.
37
+ # @return [Array<ChildProcess>]
25
38
  def subprocesses
26
39
  brokers + shards.flat_map(&:workers)
27
40
  end
28
41
 
42
+ # Override that starts EventMachine and waits until it gets stopped.
43
+ # @return [void]
29
44
  def run_process
30
45
  EM.run do
31
46
  # TODO: Monitor the workers
@@ -4,11 +4,20 @@ require 'avro'
4
4
  require 'rflow/configuration'
5
5
 
6
6
  class RFlow
7
+ # Utility methods for doing Avro encoding/decoding.
7
8
  class Avro
9
+ # Decode serialized Avro data.
10
+ # @param reader [::Avro::IO::DatumReader] reader preconfigured with schema
11
+ # @param bytes [String] byte string to decode
12
+ # @return decoded object
8
13
  def self.decode(reader, bytes)
9
14
  reader.read ::Avro::IO::BinaryDecoder.new(StringIO.new(bytes.force_encoding('BINARY')))
10
15
  end
11
16
 
17
+ # Encode data to serialized Avro.
18
+ # @param writer [::Avro::IO::DatumWriter] writer preconfigured with schema
19
+ # @param message [String]
20
+ # @return [String]
12
21
  def self.encode(writer, message)
13
22
  String.new.force_encoding('BINARY').tap do |result|
14
23
  writer.write message, ::Avro::IO::BinaryEncoder.new(StringIO.new(result, 'w'))
@@ -16,15 +25,21 @@ class RFlow
16
25
  end
17
26
  end
18
27
 
28
+ # A message to be sent around in the RFlow framework.
19
29
  class Message
20
30
  class << self
31
+ # @!visibility private
21
32
  def schema; @schema ||= ::Avro::Schema.parse(File.read(File.join(File.dirname(__FILE__), '..', '..', 'schema', 'message.avsc'))); end
33
+ # @!visibility private
22
34
  def message_reader; @message_reader ||= ::Avro::IO::DatumReader.new(schema, schema); end
35
+ # @!visibility private
23
36
  def message_writer; @message_writer ||= ::Avro::IO::DatumWriter.new(schema); end
37
+ # @!visibility private
24
38
  def encode(message); RFlow::Avro.encode(message_writer, message); end
25
39
 
26
40
  # Take in an Avro serialization of a message and return a new
27
41
  # Message object. Assumes the org.rflow.Message Avro schema.
42
+ # @!visibility private
28
43
  def from_avro(bytes)
29
44
  message = RFlow::Avro.decode(message_reader, bytes)
30
45
  Message.new(message['data_type_name'], message['provenance'], message['properties'],
@@ -33,13 +48,26 @@ class RFlow
33
48
  end
34
49
  end
35
50
 
36
- attr_accessor :provenance, :properties
37
- attr_reader :data_type_name, :data
51
+ # The message's provenance information.
52
+ # @return [Array<ProcessingEvent>]
53
+ attr_accessor :provenance
54
+
55
+ # The message's properties information.
56
+ # @return [Hash]
57
+ attr_accessor :properties
58
+
59
+ # The data type name of the message.
60
+ # @return [String]
61
+ attr_reader :data_type_name
62
+
63
+ # The actual data string in the message.
64
+ # @return [String]
65
+ attr_reader :data
38
66
 
39
67
  # When creating a new message as a transformation of an existing
40
- # message, its encouraged to copy the provenance and properties of
68
+ # message, it's encouraged to copy the provenance and properties of
41
69
  # the original message into the new message. This allows
42
- # downstream components to potentially use these fields
70
+ # downstream components to potentially use these fields.
43
71
  def initialize(data_type_name, provenance = [], properties = {}, serialization_type = 'avro', schema = nil, serialized_data = nil)
44
72
  @data_type_name = data_type_name.to_s
45
73
 
@@ -86,7 +114,8 @@ class RFlow
86
114
  # org.rflow.Message Avro schema. Note that we have to manually
87
115
  # set the encoding for Ruby 1.9, otherwise the stringio would use
88
116
  # UTF-8 by default, which would not work correctly, as a serialize
89
- # avro string is BINARY, not UTF-8
117
+ # avro string is BINARY, not UTF-8.
118
+ # @return [String]
90
119
  def to_avro
91
120
  # stringify all the properties
92
121
  string_properties = Hash[properties.map { |k,v| [k.to_s, v.to_s] }]
@@ -99,9 +128,20 @@ class RFlow
99
128
  'data' => data.to_avro)
100
129
  end
101
130
 
131
+ # One processing event in the message's provenance.
102
132
  class ProcessingEvent
103
- attr_reader :component_instance_uuid, :started_at
104
- attr_accessor :completed_at, :context
133
+ # The UUID of the component doing the processing.
134
+ # @return [String]
135
+ attr_reader :component_instance_uuid
136
+ # The time processing started, in XML schema format.
137
+ # @return [String]
138
+ attr_reader :started_at
139
+ # The time processing ended, in XML schema format.
140
+ # @return [String]
141
+ attr_accessor :completed_at
142
+ # Arbitrary context bytes.
143
+ # @return [String]
144
+ attr_accessor :context
105
145
 
106
146
  def initialize(component_instance_uuid, started_at = nil, completed_at = nil, context = nil)
107
147
  @component_instance_uuid = component_instance_uuid
@@ -116,6 +156,8 @@ class RFlow
116
156
  @context = context
117
157
  end
118
158
 
159
+ # Represent the processing event as a hash.
160
+ # @return [Hash]
119
161
  def to_hash
120
162
  {
121
163
  'component_instance_uuid' => component_instance_uuid.to_s,
@@ -126,11 +168,21 @@ class RFlow
126
168
  end
127
169
  end
128
170
 
129
- # Should proxy most methods to data_object that we can serialize
130
- # to avro using the schema. Extensions should use 'extended' hook
171
+ # Should proxy most methods to {data_object} that we can serialize
172
+ # to Avro using the schema. Extensions should use +extended+ hook
131
173
  # to apply immediate changes.
132
174
  class Data
133
- attr_reader :schema_string, :schema, :serialization_type
175
+ # The string form of the schema the data follows.
176
+ # @return [String]
177
+ attr_reader :schema_string
178
+ # Avro parsed version of the schema the data follows
179
+ # @return [::Avro::Schema]
180
+ attr_reader :schema
181
+ # Serialization type. Currently, always +avro+.
182
+ # @return [String]
183
+ attr_reader :serialization_type
184
+ # The data object for the message.
185
+ # @return [Object]
134
186
  attr_accessor :data_object
135
187
 
136
188
  def initialize(schema_string, serialization_type = 'avro', serialized_data = nil)
@@ -152,17 +204,22 @@ class RFlow
152
204
  end
153
205
  end
154
206
 
207
+ # Is the message valid per the Avro schema?
208
+ # @return [boolean]
155
209
  def valid?
156
210
  ::Avro::Schema.validate @schema, @data_object
157
211
  end
158
212
 
213
+ # Encode the message out to real Avro.
214
+ # @return [String]
159
215
  def to_avro
160
216
  RFlow::Avro.encode @writer, @data_object
161
217
  end
162
218
 
163
- # Proxy methods down to the underlying data_object, probably a
219
+ # Proxy methods down to the underlying {data_object}, probably a
164
220
  # Hash. Hopefully an extension will provide any additional
165
- # functionality so this won't be called unless needed
221
+ # functionality so this won't be called unless needed.
222
+ # @return [void]
166
223
  def method_missing(method_sym, *args, &block)
167
224
  @data_object.send(method_sym, *args, &block)
168
225
  end
@@ -1,4 +1,5 @@
1
1
  class RFlow
2
+ # Represents a file on disk that contains RFlow's PID, for process management.
2
3
  class PIDFile
3
4
  private
4
5
  attr_reader :path
@@ -8,6 +9,8 @@ class RFlow
8
9
  @path = path
9
10
  end
10
11
 
12
+ # Read the pid file and get the PID from it.
13
+ # @return [Integer]
11
14
  def read
12
15
  return nil unless File.exist? path
13
16
  contents = File.read(path)
@@ -19,6 +22,8 @@ class RFlow
19
22
  end
20
23
  end
21
24
 
25
+ # Write a new PID out to the pid file.
26
+ # @return [Integer] the pid
22
27
  def write(pid = $$)
23
28
  return unless validate?
24
29
 
@@ -42,6 +47,8 @@ class RFlow
42
47
  pid
43
48
  end
44
49
 
50
+ # Determine if the application is running by checking the running PID and the pidfile.
51
+ # @return [boolean]
45
52
  def running?
46
53
  return false unless exist?
47
54
  pid = read
@@ -52,18 +59,22 @@ class RFlow
52
59
  nil
53
60
  end
54
61
 
55
- # unlinks a PID file at given if it contains the current PID still
62
+ # Unlinks a PID file if it contains the current PID. Still
56
63
  # potentially racy without locking the directory (which is
57
64
  # non-portable and may interact badly with other programs), but the
58
- # window for hitting the race condition is small
65
+ # window for hitting the race condition is small.
66
+ # @return [void]
59
67
  def safe_unlink
60
68
  (current_process? and unlink) rescue nil
61
69
  end
62
70
 
71
+ # Signal the process with the matching PID with a given signal.
72
+ # @return [void]
63
73
  def signal(sig)
64
74
  Process.kill(sig, read)
65
75
  end
66
76
 
77
+ # @!visibility private
67
78
  def to_s
68
79
  File.expand_path(path)
69
80
  end
@@ -2,14 +2,23 @@ require 'rflow/child_process'
2
2
 
3
3
  class RFlow
4
4
  # An object implementation shared between two processes. The parent
5
- # process will instantiate, configure, and run! a shard, at which
6
- # point the parent will have access to the shard object and be able
7
- # to monitor the underlying processes. The child implementation,
8
- # running in a separate process, will not return from spawn!, but
5
+ # process will instantiate, configure, and run! a {Shard}, at which
6
+ # point the parent will have access to the {Shard} object and be able
7
+ # to monitor the underlying {Shard::Worker} processes. The child implementation,
8
+ # running in a separate process, will not return from +spawn!+, but
9
9
  # start an EventMachine reactor.
10
10
  class Shard
11
+ # An actual child process under the {Shard}, which coordinates a set of
12
+ # identical {Worker}s.
11
13
  class Worker < ChildProcess
12
- attr_reader :shard, :index
14
+ # A reference to the {Shard} governing this {Worker}.
15
+ # @return [Shard]
16
+ attr_reader :shard
17
+
18
+ # Which worker index this is (for example, in a set of 3 {Worker}s,
19
+ # one would have index 0, one would have index 1, one would have index 2).
20
+ # @return [Integer]
21
+ attr_reader :index
13
22
 
14
23
  def initialize(shard, index = 1)
15
24
  super("#{shard.name}-#{index}", 'Worker')
@@ -20,6 +29,8 @@ class RFlow
20
29
  @components = shard.config.components.map {|config| Component.build(self, config) }
21
30
  end
22
31
 
32
+ # Configure, connect, and actually start running RFlow components.
33
+ # @return [void]
23
34
  def run_process
24
35
  EM.run do
25
36
  begin
@@ -38,6 +49,7 @@ class RFlow
38
49
  RFlow.logger.info 'Shutting down worker after EM stopped'
39
50
  end
40
51
 
52
+ protected
41
53
  def configure_components!
42
54
  RFlow.logger.debug 'Configuring components'
43
55
  @components.zip(shard.config.components.map(&:options)).each do |(component, config)|
@@ -68,6 +80,9 @@ class RFlow
68
80
  end
69
81
  end
70
82
 
83
+ public
84
+ # Shut down the {Worker}. Shuts down each component and kills EventMachine.
85
+ # @return [void]
71
86
  def shutdown!(signal)
72
87
  RFlow.logger.debug 'Shutting down components'
73
88
  @components.each do |component|
@@ -79,7 +94,21 @@ class RFlow
79
94
  end
80
95
  end
81
96
 
82
- attr_reader :config, :name, :count, :workers
97
+ # Reference to the {Shard}'s configuration.
98
+ # @return [Configuration::Shard]
99
+ attr_reader :config
100
+
101
+ # The {Shard}'s name.
102
+ # @return [String]
103
+ attr_reader :name
104
+
105
+ # The count of workers that should be started.
106
+ # @return [Integer]
107
+ attr_reader :count
108
+
109
+ # Reference to the actual {Worker}s.
110
+ # @return [Array<Worker>]
111
+ attr_reader :workers
83
112
 
84
113
  def initialize(config)
85
114
  @config = config
@@ -89,6 +118,8 @@ class RFlow
89
118
  @workers = count.times.map {|i| Worker.new(self, i+1) }
90
119
  end
91
120
 
121
+ # Start the shard by spawning and starting all the workers.
122
+ # @return [void]
92
123
  def run!
93
124
  RFlow.logger.debug "Running shard #{name} with #{count} workers"
94
125
  workers.each(&:spawn!)