rflow 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -110,13 +110,13 @@ class RFlow
110
110
  def establish_config_database_connection(database_path)
111
111
  RFlow.logger.debug "Establishing connection to config database (#{Dir.getwd}) '#{database_path}'"
112
112
  ActiveRecord::Base.logger = RFlow.logger
113
- ConfigurationItem.establish_connection(:adapter => "sqlite3", :database => database_path)
113
+ ConfigurationItem.establish_connection(:adapter => 'sqlite3', :database => database_path)
114
114
  end
115
115
 
116
116
  # Using default ActiveRecord migrations, attempt to migrate the
117
117
  # database to the latest version.
118
118
  def migrate_database
119
- RFlow.logger.debug "Applying default migrations to config database"
119
+ RFlow.logger.debug 'Applying default migrations to config database'
120
120
  migrations_path = File.join(File.dirname(__FILE__), 'configuration', 'migrations')
121
121
  ActiveRecord::Migration.verbose = false
122
122
  ActiveRecord::Migrator.migrate migrations_path
@@ -135,7 +135,7 @@ class RFlow
135
135
  RFlow.logger.debug "Initializing config database (#{Dir.getwd}) '#{database_path}'"
136
136
 
137
137
  # TODO should not need this line
138
- ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => database_path)
138
+ ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => database_path)
139
139
 
140
140
  establish_config_database_connection database_path
141
141
  migrate_database
@@ -147,7 +147,7 @@ class RFlow
147
147
  process_config_file File.expand_path(config_file_path)
148
148
  end
149
149
 
150
- RFlow.logger.debug "Defaulting non-existing config values"
150
+ RFlow.logger.debug 'Defaulting non-existing config values'
151
151
  merge_defaults!
152
152
 
153
153
  Dir.chdir working_dir
@@ -10,7 +10,7 @@ class RFlow
10
10
 
11
11
  public
12
12
  def initialize
13
- @default_shard = {:name => "DEFAULT", :type => :process, :count => 1, :components => []}
13
+ @default_shard = {:name => 'DEFAULT', :type => :process, :count => 1, :components => []}
14
14
  @current_shard = default_shard
15
15
 
16
16
  @setting_specs = []
@@ -27,8 +27,8 @@ class RFlow
27
27
 
28
28
  # DSL method to specify a shard block for either a process or thread
29
29
  def shard(name, options = {})
30
- raise ArgumentError, "Cannot use DEFAULT as a shard name" if name == 'DEFAULT'
31
- raise ArgumentError, "Cannot nest shards" if @current_shard != default_shard
30
+ raise ArgumentError, 'Cannot use DEFAULT as a shard name' if name == 'DEFAULT'
31
+ raise ArgumentError, 'Cannot nest shards' if @current_shard != default_shard
32
32
 
33
33
  type = if options[:thread] || options[:type] == :thread; :thread
34
34
  else :process
@@ -66,7 +66,7 @@ class RFlow
66
66
  # DSL method to specify a connection between a
67
67
  # component/output_port and another component/input_port. The
68
68
  # component/port specification is a string where the names of
69
- # the two elements are separated by '#', and the "connection" is
69
+ # the two elements are separated by '#', and the 'connection' is
70
70
  # specified by a Ruby Hash, i.e.:
71
71
  # connect 'componentA#output' => 'componentB#input'
72
72
  # Array ports are specified with an key suffix in standard
@@ -10,7 +10,7 @@ class RFlow
10
10
  when 'RFlow::Configuration::BrokeredZMQConnection'
11
11
  RFlow::Connections::BrokeredZMQConnection.new(config)
12
12
  else
13
- raise ArgumentError, "Only ZMQConnections currently supported"
13
+ raise ArgumentError, 'Only ZMQConnections currently supported'
14
14
  end
15
15
  end
16
16
  end
@@ -32,14 +32,14 @@ class RFlow
32
32
  # methods. Will only be called in the context of a running
33
33
  # EventMachine reactor
34
34
  def connect_input!
35
- raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_input method."
35
+ raise NotImplementedError, 'Raw connections do not support connect_input. Please subclass and define a connect_input method.'
36
36
  end
37
37
 
38
38
  # Subclass and implement to be able to handle future 'send'
39
39
  # methods. Will only be called in the context of a running
40
40
  # EventMachine reactor
41
41
  def connect_output!
42
- raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
42
+ raise NotImplementedError, 'Raw connections do not support connect_output. Please subclass and define a connect_output method.'
43
43
  end
44
44
 
45
45
  # Subclass and implement to handle outgoing messages. The message
@@ -47,7 +47,7 @@ class RFlow
47
47
  # to marshal it up into something that will be unmarshalled on the
48
48
  # other side
49
49
  def send_message(message)
50
- raise NotImplementedError, "Raw connections do not support send_message. Please subclass and define a send_message method."
50
+ raise NotImplementedError, 'Raw connections do not support send_message. Please subclass and define a send_message method.'
51
51
  end
52
52
 
53
53
  # Parent component will set this attribute if it expects to
@@ -18,7 +18,7 @@ class RFlow
18
18
  version = LibZMQ::version
19
19
  RFlow.logger.debug { "Creating a new ZeroMQ context; ZeroMQ version is #{version[:major]}.#{version[:minor]}.#{version[:patch]}" }
20
20
  if EM.reactor_running?
21
- raise RuntimeError, "EventMachine reactor is running when attempting to create a ZeroMQ context"
21
+ raise RuntimeError, 'EventMachine reactor is running when attempting to create a ZeroMQ context'
22
22
  end
23
23
  EM::ZeroMQ::Context.new(1)
24
24
  end
@@ -74,15 +74,13 @@ class RFlow
74
74
  output_socket
75
75
  end
76
76
 
77
- # TODO: fix this tight loop of retries
78
77
  def send_message(message)
79
78
  RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
80
79
 
81
80
  begin
82
81
  output_socket.send_msg(message.data_type_name.to_s, message.to_avro)
83
82
  rescue Exception => e
84
- RFlow.logger.debug "Exception #{e.class}: #{e.message}, retrying send"
85
- retry
83
+ RFlow.logger.error "Exception #{e.class}: #{e.message}, because: #{e.backtrace}"
86
84
  end
87
85
  end
88
86
 
@@ -159,9 +157,9 @@ class RFlow
159
157
  end
160
158
  front.bind(connection.options['output_address'])
161
159
  back.bind(connection.options['input_address'])
162
- ZMQ::Proxy.new(front, back)
163
- back.close
164
- front.close
160
+ while true
161
+ ZMQ::Proxy.new(front, back)
162
+ end
165
163
  rescue Exception => e
166
164
  RFlow.logger.error "Error running message broker: #{e.class}: #{e.message}, because: #{e.backtrace.inspect}"
167
165
  ensure
@@ -1,8 +1,11 @@
1
+ require 'rflow/pid_file'
2
+
1
3
  class RFlow
2
4
  class DaemonProcess
3
- def initialize(name, role = name)
5
+ def initialize(name, role = name, options = {})
4
6
  @name = name
5
7
  @role = role
8
+ @pid_file = PIDFile.new(options[:pid_file_path]) if options[:pid_file_path]
6
9
  end
7
10
 
8
11
  def daemonize!
@@ -19,6 +22,7 @@ class RFlow
19
22
  end
20
23
 
21
24
  def run!
25
+ write_pid_file
22
26
  register_logging_context
23
27
  update_process_name
24
28
  handle_signals
@@ -29,6 +33,7 @@ class RFlow
29
33
  run_process
30
34
  ensure
31
35
  unhandle_signals
36
+ remove_pid_file
32
37
  end
33
38
 
34
39
  def spawn_subprocesses; end
@@ -36,6 +41,7 @@ class RFlow
36
41
 
37
42
  def shutdown!(reason)
38
43
  RFlow.logger.info "#{@name} shutting down due to #{reason}"
44
+ remove_pid_file
39
45
  unhandle_signals
40
46
  signal_subprocesses('QUIT')
41
47
  RFlow.logger.info "#{@name} exiting"
@@ -65,6 +71,7 @@ class RFlow
65
71
  exit 0
66
72
  else
67
73
  RFlow.logger.error "#{@role} failed to start"
74
+ STDERR.puts "\n\n*** #{@role} failed to start; see log file for details"
68
75
  exit! 1
69
76
  end
70
77
  end
@@ -79,7 +86,7 @@ class RFlow
79
86
  $stdout.sync = $stderr.sync = true
80
87
  [$stdin, $stdout, $stderr].each do |stream|
81
88
  stream.binmode
82
- begin; stream.reopen "/dev/null"; rescue ::Exception; end
89
+ begin; stream.reopen '/dev/null'; rescue ::Exception; end
83
90
  end
84
91
  end
85
92
 
@@ -151,5 +158,8 @@ class RFlow
151
158
  Process.kill(signal, p.pid)
152
159
  end
153
160
  end
161
+
162
+ def write_pid_file; @pid_file.write if @pid_file; end
163
+ def remove_pid_file; @pid_file.safe_unlink if @pid_file; end
154
164
  end
155
165
  end
data/lib/rflow/logger.rb CHANGED
@@ -30,7 +30,7 @@ class RFlow
30
30
 
31
31
  def reconfigure(config, include_stdout = false)
32
32
  @log_file_path = config['rflow.log_file_path']
33
- @log_level = config['rflow.log_level'] || "WARN"
33
+ @log_level = config['rflow.log_level'] || 'WARN'
34
34
  @log_name = if config['rflow.application_name']; config['rflow.application_name']
35
35
  elsif log_file_path; File.basename(log_file_path)
36
36
  else ''; end
data/lib/rflow/master.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'rflow/daemon_process'
2
- require 'rflow/pid_file'
3
2
  require 'rflow/shard'
4
3
  require 'rflow/broker'
5
4
 
@@ -9,24 +8,16 @@ class RFlow
9
8
  attr_reader :brokers
10
9
 
11
10
  def initialize(config)
12
- super(config['rflow.application_name'], 'Master')
13
- @pid_file = PIDFile.new(config['rflow.pid_file_path'])
11
+ super(config['rflow.application_name'], 'Master', pid_file_path: config['rflow.pid_file_path'])
14
12
  @shards = config.shards.map {|config| Shard.new(config) }
15
13
  RFlow.logger.context_width = @shards.flat_map(&:workers).map(&:name).map(&:length).max
16
14
  @brokers = config.connections.flat_map(&:brokers).map {|config| Broker.build(config) }
17
15
  end
18
16
 
19
- def run!
20
- write_pid_file
21
- super
22
- ensure
23
- remove_pid_file
24
- end
25
-
26
17
  def spawn_subprocesses
27
18
  RFlow.logger.debug "Running #{brokers.count} brokers" if brokers.count > 0
28
19
  brokers.each(&:spawn!)
29
- RFlow.logger.debug "#{brokers.count} brokers started: #{brokers.map { |w| "#{w.name} (#{w.pid})" }.join(", ")}" if brokers.count > 0
20
+ RFlow.logger.debug "#{brokers.count} brokers started: #{brokers.map { |w| "#{w.name} (#{w.pid})" }.join(', ')}" if brokers.count > 0
30
21
 
31
22
  shards.each(&:run!)
32
23
  end
@@ -40,14 +31,5 @@ class RFlow
40
31
  # TODO: Monitor the workers
41
32
  end
42
33
  end
43
-
44
- def shutdown!(reason)
45
- remove_pid_file
46
- super
47
- end
48
-
49
- private
50
- def write_pid_file; @pid_file.write; end
51
- def remove_pid_file; @pid_file.safe_unlink; end
52
34
  end
53
35
  end
data/lib/rflow/message.rb CHANGED
@@ -27,32 +27,22 @@ class RFlow
27
27
  # Message object. Assumes the org.rflow.Message Avro schema.
28
28
  def from_avro(bytes)
29
29
  message = RFlow::Avro.decode(message_reader, bytes)
30
- Message.new(message['data_type_name'], message['provenance'],
30
+ Message.new(message['data_type_name'], message['provenance'], message['properties'],
31
31
  message['data_serialization_type'], message['data_schema'],
32
32
  message['data'])
33
33
  end
34
34
  end
35
35
 
36
+ attr_accessor :provenance, :properties
36
37
  attr_reader :data_type_name, :data
37
- attr_accessor :provenance
38
38
 
39
- def initialize(data_type_name, provenance = [], serialization_type = 'avro', schema = nil, serialized_data = nil)
39
+ # When creating a new message as a transformation of an existing
40
+ # message, its encouraged to copy the provenance and properties of
41
+ # the original message into the new message. This allows
42
+ # downstream components to potentially use these fields
43
+ def initialize(data_type_name, provenance = [], properties = {}, serialization_type = 'avro', schema = nil, serialized_data = nil)
40
44
  @data_type_name = data_type_name.to_s
41
45
 
42
- # TODO: Make this better. This check is technically
43
- # unnecessary, as we are able to completely deserialize the
44
- # message without needing to resort to the registered schema.
45
- registered_schema = RFlow::Configuration.available_data_types[@data_type_name][serialization_type.to_s]
46
- unless registered_schema
47
- raise ArgumentError, "Data type '#{@data_type_name}' with serialization_type '#{serialization_type}' not found"
48
- end
49
-
50
- # TODO: think about registering the schemas automatically if not
51
- # found in Configuration
52
- if schema && (registered_schema != schema)
53
- raise ArgumentError, "Passed schema ('#{schema}') does not equal registered schema ('#{registered_schema}') for data type '#{@data_type_name}' with serialization_type '#{serialization_type}'"
54
- end
55
-
56
46
  # Turn the provenance array of Hashes into an array of
57
47
  # ProcessingEvents for easier access and time validation.
58
48
  # TODO: do this lazily so as not to create/destroy objects that are
@@ -67,6 +57,22 @@ class RFlow
67
57
  end
68
58
  end
69
59
 
60
+ @properties = properties || {}
61
+
62
+ # TODO: Make this better. This check is technically
63
+ # unnecessary, as we are able to completely deserialize the
64
+ # message without needing to resort to the registered schema.
65
+ registered_schema = RFlow::Configuration.available_data_types[@data_type_name][serialization_type.to_s]
66
+ unless registered_schema
67
+ raise ArgumentError, "Data type '#{@data_type_name}' with serialization_type '#{serialization_type}' not found"
68
+ end
69
+
70
+ # TODO: think about registering the schemas automatically if not
71
+ # found in Configuration
72
+ if schema && (registered_schema != schema)
73
+ raise ArgumentError, "Passed schema ('#{schema}') does not equal registered schema ('#{registered_schema}') for data type '#{@data_type_name}' with serialization_type '#{serialization_type}'"
74
+ end
75
+
70
76
  @data = Data.new(registered_schema, serialization_type.to_s, serialized_data)
71
77
 
72
78
  # Get the extensions and apply them to the data object to add capability
@@ -82,8 +88,12 @@ class RFlow
82
88
  # UTF-8 by default, which would not work correctly, as a serialize
83
89
  # avro string is BINARY, not UTF-8
84
90
  def to_avro
91
+ # stringify all the properties
92
+ string_properties = Hash[properties.map { |k,v| [k.to_s, v.to_s] }]
93
+
85
94
  Message.encode('data_type_name' => data_type_name.to_s,
86
95
  'provenance' => provenance.map(&:to_hash),
96
+ 'properties' => string_properties.to_hash,
87
97
  'data_serialization_type' => data.serialization_type.to_s,
88
98
  'data_schema' => data.schema_string,
89
99
  'data' => data.to_avro)
@@ -124,7 +134,7 @@ class RFlow
124
134
  attr_accessor :data_object
125
135
 
126
136
  def initialize(schema_string, serialization_type = 'avro', serialized_data = nil)
127
- raise ArgumentError, "Only Avro serialization_type supported at the moment" unless serialization_type.to_s == 'avro'
137
+ raise ArgumentError, 'Only Avro serialization_type supported at the moment' unless serialization_type.to_s == 'avro'
128
138
 
129
139
  @schema_string = schema_string
130
140
  @serialization_type = serialization_type.to_s
@@ -10,18 +10,30 @@ class RFlow
10
10
 
11
11
  def read
12
12
  return nil unless File.exist? path
13
- File.read(path).to_i
13
+ contents = File.read(path)
14
+ if contents.empty?
15
+ RFlow.logger.warn "Ignoring empty PID file #{path}"
16
+ nil
17
+ else
18
+ contents.to_i
19
+ end
14
20
  end
15
21
 
16
22
  def write(pid = $$)
17
23
  return unless validate?
18
24
 
19
- RFlow.logger.debug "Writing PID #{pid} file '#{to_s}'"
25
+ RFlow.logger.debug "Writing PID #{pid} to file '#{to_s}'"
26
+ tmp_path = File.join(File.dirname(path), ".#{File.basename(path)}")
27
+ if File.exist? tmp_path
28
+ RFlow.logger.warn "Deleting stale temp PID file #{tmp_path}"
29
+ File.delete(tmp_path)
30
+ end
20
31
  pid_fp = begin
21
- tmp_path = File.join(File.dirname(path), ".#{File.basename(path)}")
22
32
  File.open(tmp_path, File::RDWR|File::CREAT|File::EXCL, 0644)
23
- rescue Errno::EEXIST
24
- retry
33
+ rescue Errno::EACCES => e
34
+ RFlow.logger.fatal "Access error while writing temp PID file '#{tmp_path}'"
35
+ RFlow.logger.fatal "Exception #{e.class}: #{e.message}"
36
+ abort
25
37
  end
26
38
  pid_fp.syswrite("#{pid}\n")
27
39
  File.rename(pid_fp.path, path)
data/lib/rflow/shard.rb CHANGED
@@ -35,11 +35,11 @@ class RFlow
35
35
  end
36
36
  end
37
37
 
38
- RFlow.logger.info "Shutting down worker after EM stopped"
38
+ RFlow.logger.info 'Shutting down worker after EM stopped'
39
39
  end
40
40
 
41
41
  def configure_components!
42
- RFlow.logger.debug "Configuring components"
42
+ RFlow.logger.debug 'Configuring components'
43
43
  @components.zip(shard.config.components.map(&:options)).each do |(component, config)|
44
44
  RFlow.logger.debug "Configuring component '#{component.name}' (#{component.uuid})"
45
45
  component.configure! config
@@ -49,7 +49,7 @@ class RFlow
49
49
  # Connect all inputs before all outputs, so connection types that require a 'server'
50
50
  # to be established before a 'client' can connect can get themselves ready.
51
51
  def connect_components!
52
- RFlow.logger.debug "Connecting components"
52
+ RFlow.logger.debug 'Connecting components'
53
53
  @components.each do |component|
54
54
  RFlow.logger.debug "Connecting inputs for component '#{component.name}' (#{component.uuid})"
55
55
  component.connect_inputs!
@@ -61,7 +61,7 @@ class RFlow
61
61
  end
62
62
 
63
63
  def run_components!
64
- RFlow.logger.debug "Running components"
64
+ RFlow.logger.debug 'Running components'
65
65
  @components.each do |component|
66
66
  RFlow.logger.debug "Running component '#{component.name}' (#{component.uuid})"
67
67
  component.run!
@@ -69,7 +69,7 @@ class RFlow
69
69
  end
70
70
 
71
71
  def shutdown!(signal)
72
- RFlow.logger.debug "Shutting down components"
72
+ RFlow.logger.debug 'Shutting down components'
73
73
  @components.each do |component|
74
74
  RFlow.logger.debug "Shutting down component '#{component.name}' (#{component.uuid})"
75
75
  component.shutdown!
@@ -93,7 +93,7 @@ class RFlow
93
93
  RFlow.logger.debug "Running shard #{name} with #{count} workers"
94
94
  workers.each(&:spawn!)
95
95
 
96
- RFlow.logger.debug "#{count} workers started for #{name}: #{workers.map { |w| "#{w.name} (#{w.pid})" }.join(", ")}"
96
+ RFlow.logger.debug "#{count} workers started for #{name}: #{workers.map { |w| "#{w.name} (#{w.pid})" }.join(', ')}"
97
97
  workers
98
98
  end
99
99
  end
data/lib/rflow/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "1.0.1"
2
+ VERSION = '1.1.0'
3
3
  end
data/rflow.gemspec CHANGED
@@ -1,37 +1,37 @@
1
1
  lib = File.expand_path('../lib', __FILE__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
- require "rflow/version"
3
+ require 'rflow/version'
4
4
 
5
5
  Gem::Specification.new do |s|
6
- s.name = "rflow"
6
+ s.name = 'rflow'
7
7
  s.version = RFlow::VERSION
8
8
  s.platform = Gem::Platform::RUBY
9
- s.required_ruby_version = ">= 1.9"
10
- s.authors = ["Michael L. Artz"]
11
- s.email = ["michael.artz@redjack.com"]
12
- s.homepage = "https://github.com/redjack/rflow"
13
- s.license = "Apache-2.0"
9
+ s.required_ruby_version = '>= 1.9'
10
+ s.authors = ['John Stoneham', 'Michael L. Artz']
11
+ s.email = ['john.stoneham@redjack.com', 'mlartz@gmail.com']
12
+ s.homepage = 'https://github.com/redjack/rflow'
13
+ s.license = 'Apache-2.0'
14
14
  s.summary = %q{A Ruby flow-based programming framework}
15
15
  s.description = %q{A Ruby flow-based programming framework that utilizes ZeroMQ for component connections and Avro for serialization}
16
16
 
17
17
  s.files = `git ls-files -z`.split("\x0")
18
18
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
19
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
20
- s.require_paths = ["lib"]
20
+ s.require_paths = ['lib']
21
21
 
22
- s.add_dependency "uuidtools", "~> 2.1"
23
- s.add_dependency "log4r", "~> 1.1"
24
- s.add_dependency "sys-filesystem", "~> 1.1.2"
22
+ s.add_dependency 'uuidtools', '~> 2.1'
23
+ s.add_dependency 'log4r', '~> 1.1'
24
+ s.add_dependency 'sys-filesystem', '~> 1.1'
25
25
 
26
- s.add_dependency "sqlite3", "~> 1.3"
27
- s.add_dependency "activerecord", "~> 3.2"
26
+ s.add_dependency 'sqlite3', '~> 1.3'
27
+ s.add_dependency 'activerecord', '~> 3.2'
28
28
 
29
- s.add_dependency "avro", "~> 1.7.5"
30
- s.add_dependency "em-zeromq", "0.5.0"
29
+ s.add_dependency 'avro', '~> 1.7'
30
+ s.add_dependency 'em-zeromq', '~> 0.5.0'
31
31
 
32
- s.add_development_dependency "bundler", "~> 1.6"
33
- s.add_development_dependency "rspec", "~> 3.0"
34
- s.add_development_dependency "rspec-collection_matchers", "~> 1.0"
35
- s.add_development_dependency "rake", ">= 10.3"
36
- s.add_development_dependency "yard", "~> 0.8"
32
+ s.add_development_dependency 'bundler', '~> 1'
33
+ s.add_development_dependency 'rspec', '~> 3.0'
34
+ s.add_development_dependency 'rspec-collection_matchers', '~> 1.0'
35
+ s.add_development_dependency 'rake', '>= 10.3'
36
+ s.add_development_dependency 'yard', '~> 0.8'
37
37
  end