rflow 1.0.0a1 → 1.0.0a2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
4
- data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
3
+ metadata.gz: 01773e5fd8c83119dd4c8f699099848bf3584738
4
+ data.tar.gz: b57b8e3c110e826c8d5db30e1c43e416c56bc627
5
5
  SHA512:
6
- metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
7
- data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
6
+ metadata.gz: 4c51ebe3e627932117535eeb8cfb0cb213e34fccf9938b78a27095aaf9da2792a2f1e16ddc66bb49a4556adbbe7ca37ed0d96a048319c18581546b91fcbf7d29
7
+ data.tar.gz: ad1162c1f0a2902300073cdccc0ccab3a67ace9fd99cfc47c5bd8449c8d551e94e089a4a73e15600910f08a87de7e34d2d8751ba204a891a62f0a35103618fee
data/.gitignore CHANGED
@@ -4,3 +4,5 @@ Gemfile.lock
4
4
  pkg/*
5
5
  \#*
6
6
  .\#*
7
+ spec/tmp/
8
+ *.swp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color -fd
data/Gemfile CHANGED
@@ -1,6 +1,5 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- # Specify your gem's dependencies in rflow.gemspec
4
3
  gemspec
5
4
 
6
5
  group :development do
data/NOTES CHANGED
@@ -1,16 +1,3 @@
1
- RFlow starts
2
- read in DB
3
- create new shards
4
- - Create a set of workers with the shard configuration
5
- - each worker creates a set of components
6
-
7
- - create components
8
-
9
-
10
-
11
-
12
-
13
-
14
1
  RFlow Manager
15
2
 
16
3
  Components
data/README.md CHANGED
@@ -28,8 +28,13 @@ processes and/or threads.
28
28
 
29
29
  Some of the long-term goals of RFlow are to allow for components and
30
30
  portions of the workflow to be defined in any language that supports
31
- Avro and ZeroMQ, which a numerous.
31
+ Avro and ZeroMQ, which are numerous.
32
32
 
33
+ ## Developer Notes
34
+
35
+ You will need ZeroMQ preinstalled. Currently, EventMachine only supports
36
+ v3.2.4, not v4.x, so install that version. Older versions like 2.2 will not
37
+ work.
33
38
 
34
39
  ## Definitions
35
40
 
data/bin/rflow CHANGED
@@ -97,13 +97,11 @@ if options[:config_file_path] && command != 'load'
97
97
  exit 1
98
98
  end
99
99
 
100
-
101
100
  unless options[:config_database_path]
102
101
  startup_logger.warn "Config database not specified, using default 'config.sqlite'"
103
102
  options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
104
103
  end
105
104
 
106
-
107
105
  # Set the standard logger to the startup one in the case that we need
108
106
  # to call into RFlow to check on or setup things, like the config
109
107
  # database. We want those log messages to go to the startup log when
@@ -111,7 +109,6 @@ end
111
109
  # the config database
112
110
  RFlow.logger = startup_logger
113
111
 
114
-
115
112
  case command
116
113
  when 'load'
117
114
  # Load the database with the config file, if it exists. Will
@@ -146,7 +143,6 @@ when 'load'
146
143
  exit 0
147
144
  end
148
145
 
149
-
150
146
  # Load the database config and start setting up environment
151
147
  begin
152
148
  config = RFlow::Configuration.new(options[:config_database_path])
@@ -159,7 +155,6 @@ Dir.chdir(File.dirname(options[:config_database_path]))
159
155
  Dir.chdir(config['rflow.application_directory_path'])
160
156
  pid_file = RFlow::PIDFile.new(config['rflow.pid_file_path'])
161
157
 
162
-
163
158
  case command
164
159
  when 'stop'
165
160
  if pid_file.running?
@@ -187,7 +182,6 @@ when 'start'
187
182
  end
188
183
  end
189
184
 
190
-
191
185
  # We should have eliminated all commands but 'start' at this point
192
186
 
193
187
  # require all the gem extensions
@@ -196,7 +190,6 @@ options[:gems].each do |extension_gem|
196
190
  require extension_gem
197
191
  end
198
192
 
199
-
200
193
  # load all the file extensions
201
194
  options[:extensions_file_paths].each do |extensions_file_path|
202
195
  startup_logger.info "Loading #{extensions_file_path}"
@@ -207,12 +200,12 @@ options[:extensions_file_paths].each do |extensions_file_path|
207
200
  load extensions_file_path
208
201
  end
209
202
 
210
-
211
203
  # Start the flow
212
204
  begin
213
- RFlow.run options[:config_database_path], options[:daemonize]
205
+ RFlow.run! options[:config_database_path], options[:daemonize]
214
206
  rescue Exception => e
215
207
  startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
208
+ exit(1)
216
209
  end
217
210
 
218
211
  __END__
@@ -1,49 +1,17 @@
1
- # Meat of the config file. Stuff above this should probably be in
2
- # separate gems and/or files that are brought in at runtime.
3
1
  RFlow::Configuration::RubyDSL.configure do |config|
4
2
  # Configure the settings, which include paths for various files, log
5
3
  # levels, and component specific stuffs
6
4
  config.setting('rflow.log_level', 'DEBUG')
7
5
  config.setting('rflow.application_directory_path', '.')
8
6
 
9
- # Add schemas to the list of available. Not convinced this is necessary
10
- # config.schema('schemaname', 'schematype', 'schemadata')
11
-
12
7
  # Instantiate components
13
- # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
14
- # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
15
- # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
- # config.component 'replicate', 'RFlow::Components::Replicate'
17
- # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
18
- # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
19
-
20
- # Hook components together
21
- # config.connect 'generate_ints#out' => 'filter#in'
22
- # config.connect 'filter#filtered' => 'replicate#in'
23
- # config.connect 'replicate#out[0]' => 'simple#in'
24
- # config.connect 'replicate#out[one]' => 'complex#in'
25
- # config.connect 'simple#out' => 'output#in'
26
- # config.connect 'complex#out' => 'output#in'
27
-
28
- # config.connect 'generate_ints1#out' => 'filter#in'
29
- # config.connect 'generate_ints2#out' => 'filter#in'
30
- # config.connect 'filter#filtered' => 'replicate#in'
31
- # config.connect 'replicate#out[1]' => 'output1#in'
32
- # config.connect 'replicate#out[2]' => 'output2#in'
33
- # Some tests that should fail
34
- # output should not have an 'out' ports
35
- # config.connect 'output#out' => 'simple#in'
36
-
37
8
  config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
38
9
  config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap'
39
10
  config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_even'
40
11
  config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_odd'
41
12
 
13
+ # Hook components together
42
14
  config.connect 'generate_ints#even_odd_out' => 'output#in'
43
15
  config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
44
16
  config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
45
-
46
-
47
17
  end
48
-
49
-
@@ -2,8 +2,6 @@
2
2
  require 'rflow/components'
3
3
  require 'rflow/message'
4
4
 
5
- #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
-
7
5
  # Example of creating and registering a data extension
8
6
  module SimpleDataExtension
9
7
  # Use this to default/verify the data in data_object
@@ -15,93 +13,6 @@ module SimpleDataExtension
15
13
  end
16
14
  RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
15
 
18
-
19
-
20
- # Example of creating and registering a new schema
21
- long_integer_schema = '{"type": "long"}'
22
- RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
-
24
-
25
- class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
- output_port :out
27
- output_port :even_odd_out
28
-
29
- def configure!(config)
30
- @start = config['start'].to_i
31
- @finish = config['finish'].to_i
32
- @step = config['step'] ? config['step'].to_i : 1
33
- # If interval seconds is not given, it will default to 0
34
- @interval_seconds = config['interval_seconds'].to_i
35
- end
36
-
37
- # Note that this uses the timer (sometimes with 0 interval) so as
38
- # not to block the reactor
39
- def run!
40
- timer = EM::PeriodicTimer.new(@interval_seconds) do
41
- message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
- message.data.data_object = @start
43
- out.send_message message
44
- if @start % 2 == 0
45
- even_odd_out['even'].send_message message
46
- else
47
- even_odd_out['odd'].send_message message
48
- end
49
- even_odd_out.send_message message
50
-
51
- @start += @step
52
- timer.cancel if @start > @finish
53
- end
54
- end
55
-
56
- end
57
-
58
- class RFlow::Components::Replicate < RFlow::Component
59
- input_port :in
60
- output_port :out
61
- output_port :errored
62
-
63
- def process_message(input_port, input_port_key, connection, message)
64
- puts "Processing message in Replicate"
65
- out.each do |connections|
66
- puts "Replicating"
67
- begin
68
- connections.send_message message
69
- rescue Exception => e
70
- puts "Exception #{e.message}"
71
- errored.send_message message
72
- end
73
- end
74
- end
75
- end
76
-
77
- puts "Before RubyProcFilter"
78
- class RFlow::Components::RubyProcFilter < RFlow::Component
79
- input_port :in
80
- output_port :filtered
81
- output_port :dropped
82
- output_port :errored
83
-
84
-
85
- def configure!(config)
86
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
87
- end
88
-
89
- def process_message(input_port, input_port_key, connection, message)
90
- puts "Processing message in RubyProcFilter"
91
- begin
92
- if @filter_proc.call(message)
93
- filtered.send_message message
94
- else
95
- dropped.send_message message
96
- end
97
- rescue Exception => e
98
- puts "Attempting to send message to errored #{e.message}"
99
- errored.send_message message
100
- end
101
- end
102
- end
103
-
104
- puts "Before FileOutput"
105
16
  class RFlow::Components::FileOutput < RFlow::Component
106
17
  attr_accessor :output_file_path, :output_file
107
18
  input_port :in
@@ -111,23 +22,16 @@ class RFlow::Components::FileOutput < RFlow::Component
111
22
  self.output_file = File.new output_file_path, 'w+'
112
23
  end
113
24
 
114
- #def run!; end
115
-
116
25
  def process_message(input_port, input_port_key, connection, message)
117
- puts "About to output to a file #{output_file_path}"
118
26
  output_file.puts message.data.data_object.inspect
119
27
  output_file.flush
120
28
  end
121
29
 
122
-
123
30
  def cleanup
124
31
  output_file.close
125
32
  end
126
-
127
33
  end
128
34
 
129
- # TODO: Ensure that all the following methods work as they are
130
- # supposed to. This is the interface that I'm adhering to
131
35
  class SimpleComponent < RFlow::Component
132
36
  input_port :in
133
37
  output_port :out
@@ -138,5 +42,3 @@ class SimpleComponent < RFlow::Component
138
42
  def shutdown!; end
139
43
  def cleanup!; end
140
44
  end
141
-
142
-
@@ -11,11 +11,10 @@ RFlow::Configuration::RubyDSL.configure do |config|
11
11
  config.component 'file_output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/http_crap'
12
12
  config.component 'http_responder', 'HTTPResponder', 'response_code' => 200, 'content' => 'Hi, this teh awesome'
13
13
 
14
- # config.connect 'http_server#request_port' => 'filter#in'
15
- # config.connect 'filter#filtered' => 'replicate#in'
14
+ #config.connect 'http_server#request_port' => 'filter#in'
15
+ #config.connect 'filter#filtered' => 'replicate#in'
16
16
  config.connect 'http_server#request_port' => 'replicate#in'
17
17
  config.connect 'replicate#out[1]' => 'file_output#in'
18
18
  config.connect 'replicate#out[2]' => 'http_responder#request'
19
19
  config.connect 'http_responder#response' => 'http_server#response_port'
20
20
  end
21
-
@@ -1,52 +1,8 @@
1
1
  # This will/should bring in available components and their schemas
2
2
  require 'rflow/components'
3
3
  require 'rflow/message'
4
-
5
-
6
- class RFlow::Components::Replicate < RFlow::Component
7
- input_port :in
8
- output_port :out
9
- output_port :errored
10
-
11
- def process_message(input_port, input_port_key, connection, message)
12
- out.each do |connections|
13
- begin
14
- connections.send_message message
15
- rescue Exception => e
16
- RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
17
- errored.send_message message
18
- end
19
- end
20
- end
21
- end
22
-
23
-
24
- class RFlow::Components::RubyProcFilter < RFlow::Component
25
- input_port :in
26
- output_port :filtered
27
- output_port :dropped
28
- output_port :errored
29
-
30
-
31
- def configure!(config)
32
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
33
- end
34
-
35
- def process_message(input_port, input_port_key, connection, message)
36
- RFlow.logger.debug "Filtering message"
37
- begin
38
- if @filter_proc.call(message)
39
- filtered.send_message message
40
- else
41
- dropped.send_message message
42
- end
43
- rescue Exception => e
44
- RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
45
- errored.send_message message
46
- end
47
- end
48
- end
49
-
4
+ require 'eventmachine'
5
+ require 'evma_httpserver'
50
6
 
51
7
  class RFlow::Components::FileOutput < RFlow::Component
52
8
  attr_accessor :output_file_path, :output_file
@@ -67,8 +23,6 @@ class RFlow::Components::FileOutput < RFlow::Component
67
23
  end
68
24
  end
69
25
 
70
- # TODO: Ensure that all the following methods work as they are
71
- # supposed to. This is the interface that I'm adhering to
72
26
  class SimpleComponent < RFlow::Component
73
27
  input_port :in
74
28
  output_port :out
@@ -80,7 +34,6 @@ class SimpleComponent < RFlow::Component
80
34
  def cleanup!; end
81
35
  end
82
36
 
83
-
84
37
  http_request_schema =<<EOS
85
38
  {
86
39
  "type": "record",
@@ -108,7 +61,6 @@ http_response_schema =<<EOS
108
61
  EOS
109
62
  RFlow::Configuration.add_available_data_type('HTTPResponse', 'avro', http_response_schema)
110
63
 
111
-
112
64
  # Need to be careful when extending to not clobber data already in data_object
113
65
  module HTTPRequestExtension
114
66
  def self.extended(base_data)
@@ -120,7 +72,6 @@ module HTTPRequestExtension
120
72
  end
121
73
  RFlow::Configuration.add_available_data_extension('HTTPRequest', HTTPRequestExtension)
122
74
 
123
-
124
75
  # Need to be careful when extending to not clobber data already in data_object
125
76
  module HTTPResponseExtension
126
77
  def self.extended(base_data)
@@ -135,10 +86,6 @@ module HTTPResponseExtension
135
86
  end
136
87
  RFlow::Configuration.add_available_data_extension('HTTPResponse', HTTPResponseExtension)
137
88
 
138
-
139
- require 'eventmachine'
140
- require 'evma_httpserver'
141
-
142
89
  class HTTPServer < RFlow::Component
143
90
  input_port :response_port
144
91
  output_port :request_port
@@ -148,7 +95,7 @@ class HTTPServer < RFlow::Component
148
95
  def configure!(config)
149
96
  @listen = config['listen'] ? config['listen'] : '127.0.0.1'
150
97
  @port = config['port'] ? config['port'].to_i : 8000
151
- @connections = Hash.new
98
+ @connections = {}
152
99
  end
153
100
 
154
101
  def run!
@@ -169,6 +116,7 @@ class HTTPServer < RFlow::Component
169
116
  RFlow.logger.debug "Received a HTTPResponse message, determining if its mine"
170
117
  my_events = message.provenance.find_all {|processing_event| processing_event.component_instance_uuid == instance_uuid}
171
118
  RFlow.logger.debug "Found #{my_events.size} processing events from me"
119
+
172
120
  # Attempt to send the data to each context match
173
121
  my_events.each do |processing_event|
174
122
  RFlow.logger.debug "Inspecting #{processing_event.context}"
@@ -192,13 +140,11 @@ class HTTPServer < RFlow::Component
192
140
  no_environment_strings
193
141
  end
194
142
 
195
-
196
143
  def receive_data(data)
197
144
  RFlow.logger.debug "Received #{data.bytesize} data from #{client_ip}:#{client_port}"
198
145
  super
199
146
  end
200
147
 
201
-
202
148
  def process_http_request
203
149
  RFlow.logger.debug "Received a full HTTP request from #{client_ip}:#{client_port}"
204
150
 
@@ -214,8 +160,7 @@ class HTTPServer < RFlow::Component
214
160
  server.request_port.send_message request_message
215
161
  end
216
162
 
217
-
218
- def send_http_response(response_message=nil)
163
+ def send_http_response(response_message = nil)
219
164
  RFlow.logger.debug "Sending an HTTP response to #{client_ip}:#{client_port}"
220
165
  resp = EventMachine::DelegatedHttpResponse.new(self)
221
166
 
@@ -234,11 +179,10 @@ class HTTPServer < RFlow::Component
234
179
  close_connection_after_writing
235
180
  end
236
181
 
237
-
238
182
  # Called when a connection is torn down for whatever reason.
239
183
  # Remove this connection from the server's list
240
184
  def unbind
241
- RFlow.logger.debug "Connection to lost"
185
+ RFlow.logger.debug "Connection lost"
242
186
  server.connections.delete(self.signature)
243
187
  end
244
188
  end
@@ -259,4 +203,3 @@ class HTTPResponder < RFlow::Component
259
203
  response.send_message response_message
260
204
  end
261
205
  end
262
-