rflow 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,10 @@
1
1
  require 'rflow/pid_file'
2
2
 
3
3
  class RFlow
4
+ # Encapsulates a master process being managed by RFlow that can run in the foreground
5
+ # or daemonize.
4
6
  class DaemonProcess
7
+ # Symbolic constant for SIGINFO as this is only defined on BSD and not in Ruby.
5
8
  SIGINFO = 29
6
9
 
7
10
  def initialize(name, role = name, options = {})
@@ -10,6 +13,9 @@ class RFlow
10
13
  @pid_file = PIDFile.new(options[:pid_file_path]) if options[:pid_file_path]
11
14
  end
12
15
 
16
+ # Daemonize by forking and exiting the parent after handling
17
+ # IO streams and checking successful start of the new copy.
18
+ # @return [void]
13
19
  def daemonize!
14
20
  RFlow.logger.info "#{@name} daemonizing"
15
21
  establish_daemon_pipe
@@ -23,6 +29,11 @@ class RFlow
23
29
  end
24
30
  end
25
31
 
32
+ # Execute the master process. Writes out a pidfile and updates the process
33
+ # name, installs signal handlers, and spawns all the defined subprocesses.
34
+ # Finally executes {run_process}; when that returns, it will
35
+ # exit with the resulting return code.
36
+ # @return [void]
26
37
  def run!
27
38
  write_pid_file
28
39
  register_logging_context
@@ -38,9 +49,23 @@ class RFlow
38
49
  remove_pid_file
39
50
  end
40
51
 
52
+ # Default implementation. Subclasses should override to provide logic
53
+ # for actually spawning subprocesses.
54
+ # @return [void]
41
55
  def spawn_subprocesses; end
56
+
57
+ # Default implementation. Subclasses should override to provide logic
58
+ # for actually doing something useful.
59
+ # @return [void]
60
+ def run_process; end
61
+
62
+ # A list of {ChildProcess}es to start and signal.
63
+ # @return [Array<ChildProcess>]
42
64
  def subprocesses; []; end
43
65
 
66
+ # Shut down the application. Cleans up the pid file, removes
67
+ # signal handlers, and signals all child processes with +SIGQUIT+.
68
+ # @return [void]
44
69
  def shutdown!(reason)
45
70
  RFlow.logger.info "#{@name} shutting down due to #{reason}"
46
71
  remove_pid_file
@@ -1,12 +1,16 @@
1
1
  require 'log4r'
2
2
 
3
3
  class RFlow
4
+ # The customized logger for RFlow applications that flows to the configured log file.
4
5
  class Logger
5
6
  extend Forwardable
6
7
  include Log4r
7
8
 
9
+ # @!visibility private
8
10
  LOG_PATTERN_FORMAT = '%-5l [%d] %x (%-5p) - %M'
11
+ # @!visibility private
9
12
  DATE_METHOD = 'xmlschema(6)'
13
+ # @!visibility private
10
14
  LOG_PATTERN_FORMATTER = PatternFormatter.new :pattern => LOG_PATTERN_FORMAT, :date_method => DATE_METHOD
11
15
 
12
16
  private
@@ -14,6 +18,8 @@ class RFlow
14
18
  attr_accessor :log_file_path, :log_level, :log_name
15
19
 
16
20
  public
21
+ # For the current logging context, how wide the field is where we're going to write the context/process name.
22
+ # @return [Integer]
17
23
  attr_accessor :context_width
18
24
 
19
25
  # make sure Log4r is initialized; ignored if custom levels are already set
@@ -28,6 +34,8 @@ class RFlow
28
34
  reconfigure(config, include_stdout)
29
35
  end
30
36
 
37
+ # Reconfigure the log file.
38
+ # @return [void]
31
39
  def reconfigure(config, include_stdout = false)
32
40
  @log_file_path = config['rflow.log_file_path']
33
41
  @log_level = config['rflow.log_level'] || 'WARN'
@@ -43,6 +51,9 @@ class RFlow
43
51
  internal_logger
44
52
  end
45
53
 
54
+ # Reopen the logs at their configured filesystem locations. Presumably the previous
55
+ # log files have been renamed by now.
56
+ # @return [void]
46
57
  def reopen
47
58
  # TODO: Make this less of a hack, although Log4r doesn't support
48
59
  # it, so it might be permanent
@@ -50,14 +61,21 @@ class RFlow
50
61
  File.open(log_file.path, 'a') { |tmp_log_file| log_file.reopen(tmp_log_file) }
51
62
  end
52
63
 
64
+ # Close the logger.
65
+ # @return [void]
53
66
  def close
54
67
  Outputter['rflow.log_file'].close
55
68
  end
56
69
 
70
+ # Update the log level.
71
+ # @return [void]
57
72
  def level=(level)
58
73
  internal_logger.level = LNAMES.index(level.to_s) || level
59
74
  end
60
75
 
76
+ # Toggle the log level between +DEBUG+ and whatever the default is. The previous
77
+ # level is saved to be toggled back the next time this method is called.
78
+ # @return [void]
61
79
  def toggle_log_level
62
80
  original_log_level = LNAMES[internal_logger.level]
63
81
  new_log_level = (original_log_level == 'DEBUG' ? log_level : 'DEBUG')
@@ -66,6 +84,8 @@ class RFlow
66
84
  internal_logger.level = LNAMES.index new_log_level
67
85
  end
68
86
 
87
+ # Send a complete thread dump of the current process out to the logger.
88
+ # @return [void]
69
89
  def dump_threads
70
90
  Thread.list.each do |t|
71
91
  info "Thread #{t.inspect}:"
@@ -75,18 +95,27 @@ class RFlow
75
95
  info 'Thread dump complete.'
76
96
  end
77
97
 
98
+ # Clone the logging context so changes to it will not affect the
99
+ # exiting logging context.
100
+ # @return [void]
78
101
  def clone_logging_context
79
102
  Log4r::NDC.clone_stack
80
103
  end
81
104
 
105
+ # Replace the current logging context.
106
+ # @return [void]
82
107
  def apply_logging_context(context)
83
108
  Log4r::NDC.inherit(context)
84
109
  end
85
110
 
111
+ # Clear the current logging context.
112
+ # @return [void]
86
113
  def clear_logging_context
87
114
  Log4r::NDC.clear
88
115
  end
89
116
 
117
+ # Add more logging context to the stack.
118
+ # @return [void]
90
119
  def add_logging_context(context)
91
120
  Log4r::NDC.push context
92
121
  end
@@ -3,8 +3,15 @@ require 'rflow/shard'
3
3
  require 'rflow/broker'
4
4
 
5
5
  class RFlow
6
+ # The master/watchdog process for RFlow. Mostly exists to receive +SIGCHLD+ from subprocesses
7
+ # so it can kill them all with +SIGQUIT+ and get restarted.
6
8
  class Master < DaemonProcess
9
+ # The {Shard}s being managed by the {Master}.
10
+ # @return [Array<Shard>]
7
11
  attr_reader :shards
12
+
13
+ # The {Broker}s being managed by the {Master}.
14
+ # @return [Array<Broker>]
8
15
  attr_reader :brokers
9
16
 
10
17
  def initialize(config)
@@ -14,6 +21,9 @@ class RFlow
14
21
  @brokers = config.connections.flat_map(&:brokers).map {|config| Broker.build(config) }
15
22
  end
16
23
 
24
+ # Override of {spawn_subprocesses} that actually spawns them,
25
+ # then calls {Shard#run!} on each.
26
+ # @return [void]
17
27
  def spawn_subprocesses
18
28
  RFlow.logger.debug "Running #{brokers.count} brokers" if brokers.count > 0
19
29
  brokers.each(&:spawn!)
@@ -22,10 +32,15 @@ class RFlow
22
32
  shards.each(&:run!)
23
33
  end
24
34
 
35
+ # Override of {subprocesses} that includes the {Broker}s and
36
+ # every {Shard::Worker} of every {Shard}.
37
+ # @return [Array<ChildProcess>]
25
38
  def subprocesses
26
39
  brokers + shards.flat_map(&:workers)
27
40
  end
28
41
 
42
+ # Override that starts EventMachine and waits until it gets stopped.
43
+ # @return [void]
29
44
  def run_process
30
45
  EM.run do
31
46
  # TODO: Monitor the workers
@@ -4,11 +4,20 @@ require 'avro'
4
4
  require 'rflow/configuration'
5
5
 
6
6
  class RFlow
7
+ # Utility methods for doing Avro encoding/decoding.
7
8
  class Avro
9
+ # Decode serialized Avro data.
10
+ # @param reader [::Avro::IO::DatumReader] reader preconfigured with schema
11
+ # @param bytes [String] byte string to decode
12
+ # @return decoded object
8
13
  def self.decode(reader, bytes)
9
14
  reader.read ::Avro::IO::BinaryDecoder.new(StringIO.new(bytes.force_encoding('BINARY')))
10
15
  end
11
16
 
17
+ # Encode data to serialized Avro.
18
+ # @param writer [::Avro::IO::DatumWriter] writer preconfigured with schema
19
+ # @param message [String]
20
+ # @return [String]
12
21
  def self.encode(writer, message)
13
22
  String.new.force_encoding('BINARY').tap do |result|
14
23
  writer.write message, ::Avro::IO::BinaryEncoder.new(StringIO.new(result, 'w'))
@@ -16,15 +25,21 @@ class RFlow
16
25
  end
17
26
  end
18
27
 
28
+ # A message to be sent around in the RFlow framework.
19
29
  class Message
20
30
  class << self
31
+ # @!visibility private
21
32
  def schema; @schema ||= ::Avro::Schema.parse(File.read(File.join(File.dirname(__FILE__), '..', '..', 'schema', 'message.avsc'))); end
33
+ # @!visibility private
22
34
  def message_reader; @message_reader ||= ::Avro::IO::DatumReader.new(schema, schema); end
35
+ # @!visibility private
23
36
  def message_writer; @message_writer ||= ::Avro::IO::DatumWriter.new(schema); end
37
+ # @!visibility private
24
38
  def encode(message); RFlow::Avro.encode(message_writer, message); end
25
39
 
26
40
  # Take in an Avro serialization of a message and return a new
27
41
  # Message object. Assumes the org.rflow.Message Avro schema.
42
+ # @!visibility private
28
43
  def from_avro(bytes)
29
44
  message = RFlow::Avro.decode(message_reader, bytes)
30
45
  Message.new(message['data_type_name'], message['provenance'], message['properties'],
@@ -33,13 +48,26 @@ class RFlow
33
48
  end
34
49
  end
35
50
 
36
- attr_accessor :provenance, :properties
37
- attr_reader :data_type_name, :data
51
+ # The message's provenance information.
52
+ # @return [Array<ProcessingEvent>]
53
+ attr_accessor :provenance
54
+
55
+ # The message's properties information.
56
+ # @return [Hash]
57
+ attr_accessor :properties
58
+
59
+ # The data type name of the message.
60
+ # @return [String]
61
+ attr_reader :data_type_name
62
+
63
+ # The actual data string in the message.
64
+ # @return [String]
65
+ attr_reader :data
38
66
 
39
67
  # When creating a new message as a transformation of an existing
40
- # message, its encouraged to copy the provenance and properties of
68
+ # message, it's encouraged to copy the provenance and properties of
41
69
  # the original message into the new message. This allows
42
- # downstream components to potentially use these fields
70
+ # downstream components to potentially use these fields.
43
71
  def initialize(data_type_name, provenance = [], properties = {}, serialization_type = 'avro', schema = nil, serialized_data = nil)
44
72
  @data_type_name = data_type_name.to_s
45
73
 
@@ -86,7 +114,8 @@ class RFlow
86
114
  # org.rflow.Message Avro schema. Note that we have to manually
87
115
  # set the encoding for Ruby 1.9, otherwise the stringio would use
88
116
  # UTF-8 by default, which would not work correctly, as a serialize
89
- # avro string is BINARY, not UTF-8
117
+ # avro string is BINARY, not UTF-8.
118
+ # @return [String]
90
119
  def to_avro
91
120
  # stringify all the properties
92
121
  string_properties = Hash[properties.map { |k,v| [k.to_s, v.to_s] }]
@@ -99,9 +128,20 @@ class RFlow
99
128
  'data' => data.to_avro)
100
129
  end
101
130
 
131
+ # One processing event in the message's provenance.
102
132
  class ProcessingEvent
103
- attr_reader :component_instance_uuid, :started_at
104
- attr_accessor :completed_at, :context
133
+ # The UUID of the component doing the processing.
134
+ # @return [String]
135
+ attr_reader :component_instance_uuid
136
+ # The time processing started, in XML schema format.
137
+ # @return [String]
138
+ attr_reader :started_at
139
+ # The time processing ended, in XML schema format.
140
+ # @return [String]
141
+ attr_accessor :completed_at
142
+ # Arbitrary context bytes.
143
+ # @return [String]
144
+ attr_accessor :context
105
145
 
106
146
  def initialize(component_instance_uuid, started_at = nil, completed_at = nil, context = nil)
107
147
  @component_instance_uuid = component_instance_uuid
@@ -116,6 +156,8 @@ class RFlow
116
156
  @context = context
117
157
  end
118
158
 
159
+ # Represent the processing event as a hash.
160
+ # @return [Hash]
119
161
  def to_hash
120
162
  {
121
163
  'component_instance_uuid' => component_instance_uuid.to_s,
@@ -126,11 +168,21 @@ class RFlow
126
168
  end
127
169
  end
128
170
 
129
- # Should proxy most methods to data_object that we can serialize
130
- # to avro using the schema. Extensions should use 'extended' hook
171
+ # Should proxy most methods to {data_object} that we can serialize
172
+ # to Avro using the schema. Extensions should use +extended+ hook
131
173
  # to apply immediate changes.
132
174
  class Data
133
- attr_reader :schema_string, :schema, :serialization_type
175
+ # The string form of the schema the data follows.
176
+ # @return [String]
177
+ attr_reader :schema_string
178
+ # Avro parsed version of the schema the data follows
179
+ # @return [::Avro::Schema]
180
+ attr_reader :schema
181
+ # Serialization type. Currently, always +avro+.
182
+ # @return [String]
183
+ attr_reader :serialization_type
184
+ # The data object for the message.
185
+ # @return [Object]
134
186
  attr_accessor :data_object
135
187
 
136
188
  def initialize(schema_string, serialization_type = 'avro', serialized_data = nil)
@@ -152,17 +204,22 @@ class RFlow
152
204
  end
153
205
  end
154
206
 
207
+ # Is the message valid per the Avro schema?
208
+ # @return [boolean]
155
209
  def valid?
156
210
  ::Avro::Schema.validate @schema, @data_object
157
211
  end
158
212
 
213
+ # Encode the message out to real Avro.
214
+ # @return [String]
159
215
  def to_avro
160
216
  RFlow::Avro.encode @writer, @data_object
161
217
  end
162
218
 
163
- # Proxy methods down to the underlying data_object, probably a
219
+ # Proxy methods down to the underlying {data_object}, probably a
164
220
  # Hash. Hopefully an extension will provide any additional
165
- # functionality so this won't be called unless needed
221
+ # functionality so this won't be called unless needed.
222
+ # @return [void]
166
223
  def method_missing(method_sym, *args, &block)
167
224
  @data_object.send(method_sym, *args, &block)
168
225
  end
@@ -1,4 +1,5 @@
1
1
  class RFlow
2
+ # Represents a file on disk that contains RFlow's PID, for process management.
2
3
  class PIDFile
3
4
  private
4
5
  attr_reader :path
@@ -8,6 +9,8 @@ class RFlow
8
9
  @path = path
9
10
  end
10
11
 
12
+ # Read the pid file and get the PID from it.
13
+ # @return [Integer]
11
14
  def read
12
15
  return nil unless File.exist? path
13
16
  contents = File.read(path)
@@ -19,6 +22,8 @@ class RFlow
19
22
  end
20
23
  end
21
24
 
25
+ # Write a new PID out to the pid file.
26
+ # @return [Integer] the pid
22
27
  def write(pid = $$)
23
28
  return unless validate?
24
29
 
@@ -42,6 +47,8 @@ class RFlow
42
47
  pid
43
48
  end
44
49
 
50
+ # Determine if the application is running by checking the running PID and the pidfile.
51
+ # @return [boolean]
45
52
  def running?
46
53
  return false unless exist?
47
54
  pid = read
@@ -52,18 +59,22 @@ class RFlow
52
59
  nil
53
60
  end
54
61
 
55
- # unlinks a PID file at given if it contains the current PID still
62
+ # Unlinks a PID file if it contains the current PID. Still
56
63
  # potentially racy without locking the directory (which is
57
64
  # non-portable and may interact badly with other programs), but the
58
- # window for hitting the race condition is small
65
+ # window for hitting the race condition is small.
66
+ # @return [void]
59
67
  def safe_unlink
60
68
  (current_process? and unlink) rescue nil
61
69
  end
62
70
 
71
+ # Signal the process with the matching PID with a given signal.
72
+ # @return [void]
63
73
  def signal(sig)
64
74
  Process.kill(sig, read)
65
75
  end
66
76
 
77
+ # @!visibility private
67
78
  def to_s
68
79
  File.expand_path(path)
69
80
  end
@@ -2,14 +2,23 @@ require 'rflow/child_process'
2
2
 
3
3
  class RFlow
4
4
  # An object implementation shared between two processes. The parent
5
- # process will instantiate, configure, and run! a shard, at which
6
- # point the parent will have access to the shard object and be able
7
- # to monitor the underlying processes. The child implementation,
8
- # running in a separate process, will not return from spawn!, but
5
+ # process will instantiate, configure, and run! a {Shard}, at which
6
+ # point the parent will have access to the {Shard} object and be able
7
+ # to monitor the underlying {Shard::Worker} processes. The child implementation,
8
+ # running in a separate process, will not return from +spawn!+, but
9
9
  # start an EventMachine reactor.
10
10
  class Shard
11
+ # An actual child process under the {Shard}, which coordinates a set of
12
+ # identical {Worker}s.
11
13
  class Worker < ChildProcess
12
- attr_reader :shard, :index
14
+ # A reference to the {Shard} governing this {Worker}.
15
+ # @return [Shard]
16
+ attr_reader :shard
17
+
18
+ # Which worker index this is (for example, in a set of 3 {Worker}s,
19
+ # one would have index 0, one would have index 1, one would have index 2).
20
+ # @return [Integer]
21
+ attr_reader :index
13
22
 
14
23
  def initialize(shard, index = 1)
15
24
  super("#{shard.name}-#{index}", 'Worker')
@@ -20,6 +29,8 @@ class RFlow
20
29
  @components = shard.config.components.map {|config| Component.build(self, config) }
21
30
  end
22
31
 
32
+ # Configure, connect, and actually start running RFlow components.
33
+ # @return [void]
23
34
  def run_process
24
35
  EM.run do
25
36
  begin
@@ -38,6 +49,7 @@ class RFlow
38
49
  RFlow.logger.info 'Shutting down worker after EM stopped'
39
50
  end
40
51
 
52
+ protected
41
53
  def configure_components!
42
54
  RFlow.logger.debug 'Configuring components'
43
55
  @components.zip(shard.config.components.map(&:options)).each do |(component, config)|
@@ -68,6 +80,9 @@ class RFlow
68
80
  end
69
81
  end
70
82
 
83
+ public
84
+ # Shut down the {Worker}. Shuts down each component and kills EventMachine.
85
+ # @return [void]
71
86
  def shutdown!(signal)
72
87
  RFlow.logger.debug 'Shutting down components'
73
88
  @components.each do |component|
@@ -79,7 +94,21 @@ class RFlow
79
94
  end
80
95
  end
81
96
 
82
- attr_reader :config, :name, :count, :workers
97
+ # Reference to the {Shard}'s configuration.
98
+ # @return [Configuration::Shard]
99
+ attr_reader :config
100
+
101
+ # The {Shard}'s name.
102
+ # @return [String]
103
+ attr_reader :name
104
+
105
+ # The count of workers that should be started.
106
+ # @return [Integer]
107
+ attr_reader :count
108
+
109
+ # Reference to the actual {Worker}s.
110
+ # @return [Array<Worker>]
111
+ attr_reader :workers
83
112
 
84
113
  def initialize(config)
85
114
  @config = config
@@ -89,6 +118,8 @@ class RFlow
89
118
  @workers = count.times.map {|i| Worker.new(self, i+1) }
90
119
  end
91
120
 
121
+ # Start the shard by spawning and starting all the workers.
122
+ # @return [void]
92
123
  def run!
93
124
  RFlow.logger.debug "Running shard #{name} with #{count} workers"
94
125
  workers.each(&:spawn!)