rflow 0.0.5 → 1.0.0a1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
@@ -0,0 +1,84 @@
1
+ class RFlow
2
+ class PIDFile
3
+ attr_reader :pid_file_path
4
+
5
+ def initialize(pid_file_path)
6
+ @pid_file_path = pid_file_path
7
+ end
8
+
9
+ def validate?
10
+ if current_process?
11
+ RFlow.logger.warn "Already running #{read.to_s}, not writing PID to file '#{to_s}'"
12
+ return nil
13
+ elsif running?
14
+ error_message = "Already running #{read.to_s}, possibly stale PID file '#{to_s}'"
15
+ RFlow.logger.error error_message
16
+ raise ArgumentError, error_message
17
+ elsif exist?
18
+ RFlow.logger.warn "Found stale PID #{read.to_s} in PID file '#{to_s}', removing"
19
+ unlink
20
+ end
21
+ true
22
+ end
23
+
24
+ def read
25
+ return nil unless File.exist? pid_file_path
26
+ File.read(pid_file_path).to_i
27
+ end
28
+
29
+ def write(pid=$$)
30
+ return unless validate?
31
+
32
+ RFlow.logger.debug "Writing PID #{pid} file '#{to_s}'"
33
+ pid_fp = begin
34
+ tmp_pid_file_path = File.join(File.dirname(pid_file_path), ".#{File.basename(pid_file_path)}")
35
+ File.open(tmp_pid_file_path, File::RDWR|File::CREAT|File::EXCL, 0644)
36
+ rescue Errno::EEXIST
37
+ retry
38
+ end
39
+ pid_fp.syswrite("#{pid}\n")
40
+ File.rename(pid_fp.path, pid_file_path)
41
+ pid_fp.close
42
+
43
+ pid
44
+ end
45
+
46
+ def exist?
47
+ File.exist? pid_file_path
48
+ end
49
+
50
+ def running?
51
+ return false unless exist?
52
+ pid = read
53
+ return false unless pid
54
+ Process.kill(0, pid)
55
+ pid
56
+ rescue Errno::ESRCH, Errno::ENOENT
57
+ nil
58
+ end
59
+
60
+ def current_process?
61
+ read == $$
62
+ end
63
+
64
+ def unlink
65
+ File.unlink(pid_file_path)
66
+ end
67
+
68
+ # unlinks a PID file at given if it contains the current PID still
69
+ # potentially racy without locking the directory (which is
70
+ # non-portable and may interact badly with other programs), but the
71
+ # window for hitting the race condition is small
72
+ def safe_unlink
73
+ (current_process? and unlink) rescue nil
74
+ end
75
+
76
+ def signal(sig)
77
+ Process.kill(sig, read)
78
+ end
79
+
80
+ def to_s
81
+ File.expand_path(pid_file_path)
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,148 @@
1
+ class RFlow
2
+
3
+ # An object implementation shared between two processes. The parent
4
+ # process will instantiate, configure, and run! a shard, at which
5
+ # point the parent will have access to the shard object and be able
6
+ # to monitor the underlying processes. The child implementation,
7
+ # running in a separate process, will not return from run!, but
8
+ # start an Eventmachine reactor, connect the components, and not
9
+ # return
10
+ class Shard
11
+
12
+ # An internal class that represents an instance of the running
13
+ # shard, i.e. a process
14
+ class Worker
15
+
16
+ attr_accessor :shard, :index, :name, :pid
17
+ attr_accessor :components
18
+ attr_accessor :worker_read, :master_write
19
+
20
+ def initialize(shard, index=1)
21
+ @shard = shard
22
+ @index = index
23
+ @name = "#{shard.name}-#{index}"
24
+
25
+ # Set up the IPC pipes
26
+ @worker_read, @master_write = IO.pipe
27
+ [@worker_read, @master_write].each do |io|
28
+ io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
29
+ end
30
+
31
+ @components = shard.config.components.map do |component_config|
32
+ Component.build(component_config)
33
+ end
34
+ end
35
+
36
+ def handle_signals
37
+ ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
38
+ Signal.trap signal do
39
+ Thread.new { shutdown(signal) }.join
40
+ end
41
+ end
42
+
43
+ ['SIGUSR1'].each do |signal|
44
+ Signal.trap signal do
45
+ Thread.new do
46
+ RFlow.logger.reopen
47
+ end.join
48
+ end
49
+ end
50
+
51
+ # Toggle log level on USR2
52
+ ['SIGUSR2'].each do |signal|
53
+ Signal.trap signal do
54
+ Thread.new do
55
+ RFlow.logger.toggle_log_level
56
+ end.join
57
+ end
58
+ end
59
+ end
60
+
61
+ # Launch another process to execute the shard. The parent
62
+ # process retains the original worker object (with pid and IPC
63
+ # pipe) to allow for process management
64
+ def launch
65
+ @pid = Process.fork do
66
+ @master_write.close
67
+
68
+ handle_signals
69
+
70
+ $0 += " #{name}"
71
+ Log4r::NDC.push name
72
+
73
+ RFlow.logger.info "Worker started"
74
+ EM.run do
75
+ # TODO: Monitor the master
76
+
77
+ connect_components!
78
+ # TODO: need to do proper node synchronization for ZMQ to
79
+ # remove sleep
80
+ sleep 1
81
+ run_components!
82
+ end
83
+
84
+ RFlow.logger.info "Shutting down worker after EM stopped"
85
+ end
86
+
87
+ @worker_read.close
88
+ self
89
+ end
90
+
91
+ # Send a command to each component to tell them to connect their
92
+ # ports via their connections
93
+ def connect_components!
94
+ RFlow.logger.debug "Connecting components"
95
+ components.each do |component|
96
+ RFlow.logger.debug "Connecting component '#{component.name}' (#{component.uuid})"
97
+ component.connect!
98
+ end
99
+ end
100
+
101
+ # Start each component running
102
+ def run_components!
103
+ RFlow.logger.debug "Running components"
104
+ components.each do |component|
105
+ RFlow.logger.debug "Running component '#{component.name}' (#{component.uuid})"
106
+ component.run!
107
+ end
108
+ end
109
+ end
110
+
111
+
112
+ attr_reader :config, :uuid, :name, :count
113
+ attr_accessor :workers
114
+
115
+
116
+ def initialize(config)
117
+ @config = config
118
+ @uuid = config.uuid
119
+ @name = config.name
120
+ @count = config.count
121
+
122
+ @workers = count.times.map do |i|
123
+ Worker.new(self, i+1)
124
+ end
125
+ end
126
+
127
+
128
+ def run!
129
+ RFlow.logger.debug "Running shard #{name} with #{count} workers"
130
+ workers.each do |worker|
131
+ worker.launch
132
+ end
133
+
134
+ RFlow.logger.debug "#{count} workers started for #{name}: #{workers.map { |w| "#{w.name} (#{w.pid})" }.join(", ")}"
135
+ workers
136
+ end
137
+
138
+
139
+ # TODO: Implement
140
+ def shutdown!
141
+ end
142
+
143
+
144
+ # TODO: Implement
145
+ def cleanup!
146
+ end
147
+ end
148
+ end
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "0.0.5"
2
+ VERSION = "1.0.0a1"
3
3
  end # class RFlow
@@ -1,42 +1,36 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
4
  require "rflow/version"
4
5
 
5
6
  Gem::Specification.new do |s|
6
7
  s.name = "rflow"
7
8
  s.version = RFlow::VERSION
8
9
  s.platform = Gem::Platform::RUBY
9
- s.required_ruby_version = '~> 1.9'
10
+ s.required_ruby_version = ">= 1.9"
10
11
  s.authors = ["Michael L. Artz"]
11
12
  s.email = ["michael.artz@redjack.com"]
12
- s.homepage = ""
13
- s.summary = %q{A Ruby-based workflow framework}
14
- s.description = %q{A Ruby-based workflow framework that utilizes ZeroMQ for component connections and Avro for serialization}
13
+ s.homepage = "https://github.com/redjack/rflow"
14
+ s.license = "Apache-2.0"
15
+ s.summary = %q{A Ruby flow-based programming framework}
16
+ s.description = %q{A Ruby flow-based programming framework that utilizes ZeroMQ for component connections and Avro for serialization}
15
17
 
16
- s.rubyforge_project = "rflow"
17
-
18
- s.files = `git ls-files`.split("\n")
19
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
- s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.files = `git ls-files -z`.split("\x0")
19
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
21
21
  s.require_paths = ["lib"]
22
22
 
23
- s.add_dependency 'uuidtools', '~> 2.1'
24
- s.add_dependency 'log4r', '~> 1.1'
25
-
26
- s.add_dependency 'sqlite3', '~> 1.3'
27
- s.add_dependency 'activerecord', '~> 3.0'
28
-
29
- s.add_dependency 'avro', '>= 1.5.1'
30
- s.add_dependency 'ffi', '~> 1.0'
31
- s.add_dependency 'ffi-rzmq' , '~> 0.8'
23
+ s.add_dependency "uuidtools", "~> 2.1"
24
+ s.add_dependency "log4r", "~> 1.1"
25
+
26
+ s.add_dependency "sqlite3", "~> 1.3"
27
+ s.add_dependency "activerecord", "~> 3.2"
28
+
29
+ s.add_dependency "avro", "~> 1.7.5"
30
+ s.add_dependency "em-zeromq", "~> 0.4.2"
32
31
 
33
- s.add_dependency 'eventmachine', '>= 1.0.0.beta3'
34
- # MRI-only because of the FFI memory leak. TODO: remove when ffi fixed
35
- s.add_dependency 'em-zeromq-mri', '~> 0.2'
36
- # Remove this when we break it out into its own gem
37
- s.add_dependency 'eventmachine_httpserver', '~> 0.2'
38
-
39
- s.add_development_dependency 'rspec', '~> 2.6'
40
- s.add_development_dependency 'rake', '>= 0.8.7'
41
- #s.add_development_dependency 'rcov', '= 0.9.9' # Not 1.9.2 compatible
32
+ s.add_development_dependency "bundler", "~> 1.5"
33
+ s.add_development_dependency "rspec", "~> 2.6"
34
+ s.add_development_dependency "rake", ">= 0.8.7"
35
+ s.add_development_dependency "yard", "~> 0.8.7"
42
36
  end
@@ -5,12 +5,12 @@
5
5
  "aliases": [],
6
6
  "fields": [
7
7
  {"name": "data_type_name", "type": "string"},
8
- {"name": "provenance",
8
+ {"name": "provenance",
9
9
  "type": {
10
- "type": "array",
10
+ "type": "array",
11
11
  "items": {
12
- "type": "record",
13
- "name": "ProcessingEvent",
12
+ "type": "record",
13
+ "name": "ProcessingEvent",
14
14
  "namespace": "org.rflow",
15
15
  "aliases": [],
16
16
  "fields": [
@@ -22,10 +22,10 @@
22
22
  }
23
23
  }
24
24
  },
25
- {"name": "data_serialization_type",
25
+ {"name": "data_serialization_type",
26
26
  "type": {
27
- "type": "enum",
28
- "name": "DataSerializationType",
27
+ "type": "enum",
28
+ "name": "DataSerializationType",
29
29
  "symbols": ["avro", "xml"]
30
30
  }
31
31
  },
@@ -33,4 +33,4 @@
33
33
  {"name": "data", "type": "bytes"}
34
34
  ]
35
35
  }
36
-
36
+
@@ -7,7 +7,7 @@ RFlow::Configuration::RubyDSL.configure do |config|
7
7
  config.setting('rflow.application_directory_path', '../tmp')
8
8
 
9
9
  config.setting('rflow.application_name', 'testapp')
10
-
10
+
11
11
  # Add schemas to the list of available. Not convinced this is necessary
12
12
  # config.schema('schemaname', 'schematype', 'schemadata')
13
13
 
@@ -18,7 +18,7 @@ RFlow::Configuration::RubyDSL.configure do |config|
18
18
  # config.component 'replicate', 'RFlow::Components::Replicate'
19
19
  # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
20
20
  # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
21
-
21
+
22
22
  # Hook components together
23
23
  # config.connect 'generate_ints#out' => 'filter#in'
24
24
  # config.connect 'filter#filtered' => 'replicate#in'
@@ -50,10 +50,10 @@ RFlow::Configuration::RubyDSL.configure do |config|
50
50
  config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
51
51
 
52
52
 
53
-
53
+
54
54
  config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
55
55
  config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
56
-
56
+
57
57
  config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
58
58
 
59
59
  end
@@ -0,0 +1,30 @@
1
+ RFlow::Configuration::RubyDSL.configure do |config|
2
+ config.setting('rflow.log_level', 'DEBUG')
3
+ config.setting('rflow.application_directory_path', '.')
4
+ config.setting('rflow.application_name', 'shardapp')
5
+
6
+ # Instantiate components
7
+ config.shard 's1', :process => 1 do |shard|
8
+ shard.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
9
+ end
10
+
11
+ config.shard 's2', :type => :process, :count => 2 do |shard|
12
+ shard.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
13
+ end
14
+
15
+ config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
+ config.component 'replicate', 'RFlow::Components::Replicate'
17
+
18
+ config.shard 's3', :process => 2 do |shard|
19
+ shard.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
20
+ shard.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
21
+ end
22
+
23
+ # Hook components together
24
+ config.connect 'generate_ints1#out' => 'filter#in'
25
+ config.connect 'generate_ints2#out' => 'filter#in'
26
+ config.connect 'filter#filtered' => 'replicate#in'
27
+ config.connect 'replicate#out' => 'output1#in'
28
+ config.connect 'replicate#out' => 'output2#in'
29
+
30
+ end
@@ -25,7 +25,7 @@ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'a
25
25
  class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
26
  output_port :out
27
27
  output_port :even_odd_out
28
-
28
+
29
29
  def configure!(config)
30
30
  @start = config['start'].to_i
31
31
  @finish = config['finish'].to_i
@@ -37,7 +37,7 @@ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
37
37
  # Note that this uses the timer (sometimes with 0 interval) so as
38
38
  # not to block the reactor
39
39
  def run!
40
- timer = EM::PeriodicTimer.new(@interval_seconds) do
40
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
41
41
  message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
42
  message.data.data_object = @start
43
43
  out.send_message message
@@ -46,7 +46,7 @@ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
46
46
  else
47
47
  even_odd_out['odd'].send_message message
48
48
  end
49
-
49
+
50
50
  @start += @step
51
51
  timer.cancel if @start > @finish
52
52
  end
@@ -58,7 +58,7 @@ class RFlow::Components::Replicate < RFlow::Component
58
58
  input_port :in
59
59
  output_port :out
60
60
  output_port :errored
61
-
61
+
62
62
  def process_message(input_port, input_port_key, connection, message)
63
63
  puts "Processing message in Replicate"
64
64
  out.each do |connections|
@@ -84,7 +84,7 @@ class RFlow::Components::RubyProcFilter < RFlow::Component
84
84
  def configure!(config)
85
85
  @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
86
86
  end
87
-
87
+
88
88
  def process_message(input_port, input_port_key, connection, message)
89
89
  puts "Processing message in RubyProcFilter"
90
90
  begin
@@ -111,18 +111,18 @@ class RFlow::Components::FileOutput < RFlow::Component
111
111
  end
112
112
 
113
113
  #def run!; end
114
-
114
+
115
115
  def process_message(input_port, input_port_key, connection, message)
116
116
  puts "About to output to a file #{output_file_path}"
117
117
  output_file.puts message.data.data_object.inspect
118
118
  output_file.flush
119
119
  end
120
120
 
121
-
121
+
122
122
  def cleanup
123
123
  output_file.close
124
124
  end
125
-
125
+
126
126
  end
127
127
 
128
128
  # TODO: Ensure that all the following methods work as they are