rflow 1.0.0a1 → 1.0.0a2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
4
- data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
3
+ metadata.gz: 01773e5fd8c83119dd4c8f699099848bf3584738
4
+ data.tar.gz: b57b8e3c110e826c8d5db30e1c43e416c56bc627
5
5
  SHA512:
6
- metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
7
- data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
6
+ metadata.gz: 4c51ebe3e627932117535eeb8cfb0cb213e34fccf9938b78a27095aaf9da2792a2f1e16ddc66bb49a4556adbbe7ca37ed0d96a048319c18581546b91fcbf7d29
7
+ data.tar.gz: ad1162c1f0a2902300073cdccc0ccab3a67ace9fd99cfc47c5bd8449c8d551e94e089a4a73e15600910f08a87de7e34d2d8751ba204a891a62f0a35103618fee
data/.gitignore CHANGED
@@ -4,3 +4,5 @@ Gemfile.lock
4
4
  pkg/*
5
5
  \#*
6
6
  .\#*
7
+ spec/tmp/
8
+ *.swp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color -fd
data/Gemfile CHANGED
@@ -1,6 +1,5 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- # Specify your gem's dependencies in rflow.gemspec
4
3
  gemspec
5
4
 
6
5
  group :development do
data/NOTES CHANGED
@@ -1,16 +1,3 @@
1
- RFlow starts
2
- read in DB
3
- create new shards
4
- - Create a set of workers with the shard configuration
5
- - each worker creates a set of components
6
-
7
- - create components
8
-
9
-
10
-
11
-
12
-
13
-
14
1
  RFlow Manager
15
2
 
16
3
  Components
data/README.md CHANGED
@@ -28,8 +28,13 @@ processes and/or threads.
28
28
 
29
29
  Some of the long-term goals of RFlow are to allow for components and
30
30
  portions of the workflow to be defined in any language that supports
31
- Avro and ZeroMQ, which a numerous.
31
+ Avro and ZeroMQ, which are numerous.
32
32
 
33
+ ## Developer Notes
34
+
35
+ You will need ZeroMQ preinstalled. Currently, EventMachine only supports
36
+ v3.2.4, not v4.x, so install that version. Older versions like 2.2 will not
37
+ work.
33
38
 
34
39
  ## Definitions
35
40
 
data/bin/rflow CHANGED
@@ -97,13 +97,11 @@ if options[:config_file_path] && command != 'load'
97
97
  exit 1
98
98
  end
99
99
 
100
-
101
100
  unless options[:config_database_path]
102
101
  startup_logger.warn "Config database not specified, using default 'config.sqlite'"
103
102
  options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
104
103
  end
105
104
 
106
-
107
105
  # Set the standard logger to the startup one in the case that we need
108
106
  # to call into RFlow to check on or setup things, like the config
109
107
  # database. We want those log messages to go to the startup log when
@@ -111,7 +109,6 @@ end
111
109
  # the config database
112
110
  RFlow.logger = startup_logger
113
111
 
114
-
115
112
  case command
116
113
  when 'load'
117
114
  # Load the database with the config file, if it exists. Will
@@ -146,7 +143,6 @@ when 'load'
146
143
  exit 0
147
144
  end
148
145
 
149
-
150
146
  # Load the database config and start setting up environment
151
147
  begin
152
148
  config = RFlow::Configuration.new(options[:config_database_path])
@@ -159,7 +155,6 @@ Dir.chdir(File.dirname(options[:config_database_path]))
159
155
  Dir.chdir(config['rflow.application_directory_path'])
160
156
  pid_file = RFlow::PIDFile.new(config['rflow.pid_file_path'])
161
157
 
162
-
163
158
  case command
164
159
  when 'stop'
165
160
  if pid_file.running?
@@ -187,7 +182,6 @@ when 'start'
187
182
  end
188
183
  end
189
184
 
190
-
191
185
  # We should have eliminated all commands but 'start' at this point
192
186
 
193
187
  # require all the gem extensions
@@ -196,7 +190,6 @@ options[:gems].each do |extension_gem|
196
190
  require extension_gem
197
191
  end
198
192
 
199
-
200
193
  # load all the file extensions
201
194
  options[:extensions_file_paths].each do |extensions_file_path|
202
195
  startup_logger.info "Loading #{extensions_file_path}"
@@ -207,12 +200,12 @@ options[:extensions_file_paths].each do |extensions_file_path|
207
200
  load extensions_file_path
208
201
  end
209
202
 
210
-
211
203
  # Start the flow
212
204
  begin
213
- RFlow.run options[:config_database_path], options[:daemonize]
205
+ RFlow.run! options[:config_database_path], options[:daemonize]
214
206
  rescue Exception => e
215
207
  startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
208
+ exit(1)
216
209
  end
217
210
 
218
211
  __END__
@@ -1,49 +1,17 @@
1
- # Meat of the config file. Stuff above this should probably be in
2
- # separate gems and/or files that are brought in at runtime.
3
1
  RFlow::Configuration::RubyDSL.configure do |config|
4
2
  # Configure the settings, which include paths for various files, log
5
3
  # levels, and component specific stuffs
6
4
  config.setting('rflow.log_level', 'DEBUG')
7
5
  config.setting('rflow.application_directory_path', '.')
8
6
 
9
- # Add schemas to the list of available. Not convinced this is necessary
10
- # config.schema('schemaname', 'schematype', 'schemadata')
11
-
12
7
  # Instantiate components
13
- # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
14
- # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
15
- # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
- # config.component 'replicate', 'RFlow::Components::Replicate'
17
- # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
18
- # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
19
-
20
- # Hook components together
21
- # config.connect 'generate_ints#out' => 'filter#in'
22
- # config.connect 'filter#filtered' => 'replicate#in'
23
- # config.connect 'replicate#out[0]' => 'simple#in'
24
- # config.connect 'replicate#out[one]' => 'complex#in'
25
- # config.connect 'simple#out' => 'output#in'
26
- # config.connect 'complex#out' => 'output#in'
27
-
28
- # config.connect 'generate_ints1#out' => 'filter#in'
29
- # config.connect 'generate_ints2#out' => 'filter#in'
30
- # config.connect 'filter#filtered' => 'replicate#in'
31
- # config.connect 'replicate#out[1]' => 'output1#in'
32
- # config.connect 'replicate#out[2]' => 'output2#in'
33
- # Some tests that should fail
34
- # output should not have an 'out' ports
35
- # config.connect 'output#out' => 'simple#in'
36
-
37
8
  config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
38
9
  config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap'
39
10
  config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_even'
40
11
  config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_odd'
41
12
 
13
+ # Hook components together
42
14
  config.connect 'generate_ints#even_odd_out' => 'output#in'
43
15
  config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
44
16
  config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
45
-
46
-
47
17
  end
48
-
49
-
@@ -2,8 +2,6 @@
2
2
  require 'rflow/components'
3
3
  require 'rflow/message'
4
4
 
5
- #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
-
7
5
  # Example of creating and registering a data extension
8
6
  module SimpleDataExtension
9
7
  # Use this to default/verify the data in data_object
@@ -15,93 +13,6 @@ module SimpleDataExtension
15
13
  end
16
14
  RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
15
 
18
-
19
-
20
- # Example of creating and registering a new schema
21
- long_integer_schema = '{"type": "long"}'
22
- RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
-
24
-
25
- class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
- output_port :out
27
- output_port :even_odd_out
28
-
29
- def configure!(config)
30
- @start = config['start'].to_i
31
- @finish = config['finish'].to_i
32
- @step = config['step'] ? config['step'].to_i : 1
33
- # If interval seconds is not given, it will default to 0
34
- @interval_seconds = config['interval_seconds'].to_i
35
- end
36
-
37
- # Note that this uses the timer (sometimes with 0 interval) so as
38
- # not to block the reactor
39
- def run!
40
- timer = EM::PeriodicTimer.new(@interval_seconds) do
41
- message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
- message.data.data_object = @start
43
- out.send_message message
44
- if @start % 2 == 0
45
- even_odd_out['even'].send_message message
46
- else
47
- even_odd_out['odd'].send_message message
48
- end
49
- even_odd_out.send_message message
50
-
51
- @start += @step
52
- timer.cancel if @start > @finish
53
- end
54
- end
55
-
56
- end
57
-
58
- class RFlow::Components::Replicate < RFlow::Component
59
- input_port :in
60
- output_port :out
61
- output_port :errored
62
-
63
- def process_message(input_port, input_port_key, connection, message)
64
- puts "Processing message in Replicate"
65
- out.each do |connections|
66
- puts "Replicating"
67
- begin
68
- connections.send_message message
69
- rescue Exception => e
70
- puts "Exception #{e.message}"
71
- errored.send_message message
72
- end
73
- end
74
- end
75
- end
76
-
77
- puts "Before RubyProcFilter"
78
- class RFlow::Components::RubyProcFilter < RFlow::Component
79
- input_port :in
80
- output_port :filtered
81
- output_port :dropped
82
- output_port :errored
83
-
84
-
85
- def configure!(config)
86
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
87
- end
88
-
89
- def process_message(input_port, input_port_key, connection, message)
90
- puts "Processing message in RubyProcFilter"
91
- begin
92
- if @filter_proc.call(message)
93
- filtered.send_message message
94
- else
95
- dropped.send_message message
96
- end
97
- rescue Exception => e
98
- puts "Attempting to send message to errored #{e.message}"
99
- errored.send_message message
100
- end
101
- end
102
- end
103
-
104
- puts "Before FileOutput"
105
16
  class RFlow::Components::FileOutput < RFlow::Component
106
17
  attr_accessor :output_file_path, :output_file
107
18
  input_port :in
@@ -111,23 +22,16 @@ class RFlow::Components::FileOutput < RFlow::Component
111
22
  self.output_file = File.new output_file_path, 'w+'
112
23
  end
113
24
 
114
- #def run!; end
115
-
116
25
  def process_message(input_port, input_port_key, connection, message)
117
- puts "About to output to a file #{output_file_path}"
118
26
  output_file.puts message.data.data_object.inspect
119
27
  output_file.flush
120
28
  end
121
29
 
122
-
123
30
  def cleanup
124
31
  output_file.close
125
32
  end
126
-
127
33
  end
128
34
 
129
- # TODO: Ensure that all the following methods work as they are
130
- # supposed to. This is the interface that I'm adhering to
131
35
  class SimpleComponent < RFlow::Component
132
36
  input_port :in
133
37
  output_port :out
@@ -138,5 +42,3 @@ class SimpleComponent < RFlow::Component
138
42
  def shutdown!; end
139
43
  def cleanup!; end
140
44
  end
141
-
142
-
@@ -11,11 +11,10 @@ RFlow::Configuration::RubyDSL.configure do |config|
11
11
  config.component 'file_output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/http_crap'
12
12
  config.component 'http_responder', 'HTTPResponder', 'response_code' => 200, 'content' => 'Hi, this teh awesome'
13
13
 
14
- # config.connect 'http_server#request_port' => 'filter#in'
15
- # config.connect 'filter#filtered' => 'replicate#in'
14
+ #config.connect 'http_server#request_port' => 'filter#in'
15
+ #config.connect 'filter#filtered' => 'replicate#in'
16
16
  config.connect 'http_server#request_port' => 'replicate#in'
17
17
  config.connect 'replicate#out[1]' => 'file_output#in'
18
18
  config.connect 'replicate#out[2]' => 'http_responder#request'
19
19
  config.connect 'http_responder#response' => 'http_server#response_port'
20
20
  end
21
-
@@ -1,52 +1,8 @@
1
1
  # This will/should bring in available components and their schemas
2
2
  require 'rflow/components'
3
3
  require 'rflow/message'
4
-
5
-
6
- class RFlow::Components::Replicate < RFlow::Component
7
- input_port :in
8
- output_port :out
9
- output_port :errored
10
-
11
- def process_message(input_port, input_port_key, connection, message)
12
- out.each do |connections|
13
- begin
14
- connections.send_message message
15
- rescue Exception => e
16
- RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
17
- errored.send_message message
18
- end
19
- end
20
- end
21
- end
22
-
23
-
24
- class RFlow::Components::RubyProcFilter < RFlow::Component
25
- input_port :in
26
- output_port :filtered
27
- output_port :dropped
28
- output_port :errored
29
-
30
-
31
- def configure!(config)
32
- @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
33
- end
34
-
35
- def process_message(input_port, input_port_key, connection, message)
36
- RFlow.logger.debug "Filtering message"
37
- begin
38
- if @filter_proc.call(message)
39
- filtered.send_message message
40
- else
41
- dropped.send_message message
42
- end
43
- rescue Exception => e
44
- RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
45
- errored.send_message message
46
- end
47
- end
48
- end
49
-
4
+ require 'eventmachine'
5
+ require 'evma_httpserver'
50
6
 
51
7
  class RFlow::Components::FileOutput < RFlow::Component
52
8
  attr_accessor :output_file_path, :output_file
@@ -67,8 +23,6 @@ class RFlow::Components::FileOutput < RFlow::Component
67
23
  end
68
24
  end
69
25
 
70
- # TODO: Ensure that all the following methods work as they are
71
- # supposed to. This is the interface that I'm adhering to
72
26
  class SimpleComponent < RFlow::Component
73
27
  input_port :in
74
28
  output_port :out
@@ -80,7 +34,6 @@ class SimpleComponent < RFlow::Component
80
34
  def cleanup!; end
81
35
  end
82
36
 
83
-
84
37
  http_request_schema =<<EOS
85
38
  {
86
39
  "type": "record",
@@ -108,7 +61,6 @@ http_response_schema =<<EOS
108
61
  EOS
109
62
  RFlow::Configuration.add_available_data_type('HTTPResponse', 'avro', http_response_schema)
110
63
 
111
-
112
64
  # Need to be careful when extending to not clobber data already in data_object
113
65
  module HTTPRequestExtension
114
66
  def self.extended(base_data)
@@ -120,7 +72,6 @@ module HTTPRequestExtension
120
72
  end
121
73
  RFlow::Configuration.add_available_data_extension('HTTPRequest', HTTPRequestExtension)
122
74
 
123
-
124
75
  # Need to be careful when extending to not clobber data already in data_object
125
76
  module HTTPResponseExtension
126
77
  def self.extended(base_data)
@@ -135,10 +86,6 @@ module HTTPResponseExtension
135
86
  end
136
87
  RFlow::Configuration.add_available_data_extension('HTTPResponse', HTTPResponseExtension)
137
88
 
138
-
139
- require 'eventmachine'
140
- require 'evma_httpserver'
141
-
142
89
  class HTTPServer < RFlow::Component
143
90
  input_port :response_port
144
91
  output_port :request_port
@@ -148,7 +95,7 @@ class HTTPServer < RFlow::Component
148
95
  def configure!(config)
149
96
  @listen = config['listen'] ? config['listen'] : '127.0.0.1'
150
97
  @port = config['port'] ? config['port'].to_i : 8000
151
- @connections = Hash.new
98
+ @connections = {}
152
99
  end
153
100
 
154
101
  def run!
@@ -169,6 +116,7 @@ class HTTPServer < RFlow::Component
169
116
  RFlow.logger.debug "Received a HTTPResponse message, determining if its mine"
170
117
  my_events = message.provenance.find_all {|processing_event| processing_event.component_instance_uuid == instance_uuid}
171
118
  RFlow.logger.debug "Found #{my_events.size} processing events from me"
119
+
172
120
  # Attempt to send the data to each context match
173
121
  my_events.each do |processing_event|
174
122
  RFlow.logger.debug "Inspecting #{processing_event.context}"
@@ -192,13 +140,11 @@ class HTTPServer < RFlow::Component
192
140
  no_environment_strings
193
141
  end
194
142
 
195
-
196
143
  def receive_data(data)
197
144
  RFlow.logger.debug "Received #{data.bytesize} data from #{client_ip}:#{client_port}"
198
145
  super
199
146
  end
200
147
 
201
-
202
148
  def process_http_request
203
149
  RFlow.logger.debug "Received a full HTTP request from #{client_ip}:#{client_port}"
204
150
 
@@ -214,8 +160,7 @@ class HTTPServer < RFlow::Component
214
160
  server.request_port.send_message request_message
215
161
  end
216
162
 
217
-
218
- def send_http_response(response_message=nil)
163
+ def send_http_response(response_message = nil)
219
164
  RFlow.logger.debug "Sending an HTTP response to #{client_ip}:#{client_port}"
220
165
  resp = EventMachine::DelegatedHttpResponse.new(self)
221
166
 
@@ -234,11 +179,10 @@ class HTTPServer < RFlow::Component
234
179
  close_connection_after_writing
235
180
  end
236
181
 
237
-
238
182
  # Called when a connection is torn down for whatever reason.
239
183
  # Remove this connection from the server's list
240
184
  def unbind
241
- RFlow.logger.debug "Connection to lost"
185
+ RFlow.logger.debug "Connection lost"
242
186
  server.connections.delete(self.signature)
243
187
  end
244
188
  end
@@ -259,4 +203,3 @@ class HTTPResponder < RFlow::Component
259
203
  response.send_message response_message
260
204
  end
261
205
  end
262
-