rflow 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,49 @@
1
+ # Meat of the config file. Stuff above this should probably be in
2
+ # separate gems and/or files that are brought in at runtime.
3
+ RFlow::Configuration::RubyDSL.configure do |config|
4
+ # Configure the settings, which include paths for various files, log
5
+ # levels, and component specific stuffs
6
+ config.setting('rflow.log_level', 'DEBUG')
7
+ config.setting('rflow.application_directory_path', '.')
8
+
9
+ # Add schemas to the list of available. Not convinced this is necessary
10
+ # config.schema('schemaname', 'schematype', 'schemadata')
11
+
12
+ # Instantiate components
13
+ # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
14
+ # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
15
+ # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
+ # config.component 'replicate', 'RFlow::Components::Replicate'
17
+ # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
18
+ # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
19
+
20
+ # Hook components together
21
+ # config.connect 'generate_ints#out' => 'filter#in'
22
+ # config.connect 'filter#filtered' => 'replicate#in'
23
+ # config.connect 'replicate#out[0]' => 'simple#in'
24
+ # config.connect 'replicate#out[one]' => 'complex#in'
25
+ # config.connect 'simple#out' => 'output#in'
26
+ # config.connect 'complex#out' => 'output#in'
27
+
28
+ # config.connect 'generate_ints1#out' => 'filter#in'
29
+ # config.connect 'generate_ints2#out' => 'filter#in'
30
+ # config.connect 'filter#filtered' => 'replicate#in'
31
+ # config.connect 'replicate#out[1]' => 'output1#in'
32
+ # config.connect 'replicate#out[2]' => 'output2#in'
33
+ # Some tests that should fail
34
+ # output should not have an 'out' ports
35
+ # config.connect 'output#out' => 'simple#in'
36
+
37
+ config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
38
+ config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap'
39
+ config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_even'
40
+ config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_odd'
41
+
42
+ config.connect 'generate_ints#even_odd_out' => 'output#in'
43
+ config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
44
+ config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
45
+
46
+
47
+ end
48
+
49
+
@@ -0,0 +1,142 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+ #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
+
7
+ # Example of creating and registering a data extension
8
+ module SimpleDataExtension
9
+ # Use this to default/verify the data in data_object
10
+ def self.extended(base_data)
11
+ base_data.data_object
12
+ end
13
+
14
+ def my_method; end
15
+ end
16
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
+
18
+
19
+
20
+ # Example of creating and registering a new schema
21
+ long_integer_schema = '{"type": "long"}'
22
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
+
24
+
25
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
+ output_port :out
27
+ output_port :even_odd_out
28
+
29
+ def configure!(config)
30
+ @start = config['start'].to_i
31
+ @finish = config['finish'].to_i
32
+ @step = config['step'] ? config['step'].to_i : 1
33
+ # If interval seconds is not given, it will default to 0
34
+ @interval_seconds = config['interval_seconds'].to_i
35
+ end
36
+
37
+ # Note that this uses the timer (sometimes with 0 interval) so as
38
+ # not to block the reactor
39
+ def run!
40
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
41
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
+ message.data.data_object = @start
43
+ out.send_message message
44
+ if @start % 2 == 0
45
+ even_odd_out['even'].send_message message
46
+ else
47
+ even_odd_out['odd'].send_message message
48
+ end
49
+ even_odd_out.send_message message
50
+
51
+ @start += @step
52
+ timer.cancel if @start > @finish
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ class RFlow::Components::Replicate < RFlow::Component
59
+ input_port :in
60
+ output_port :out
61
+ output_port :errored
62
+
63
+ def process_message(input_port, input_port_key, connection, message)
64
+ puts "Processing message in Replicate"
65
+ out.each do |connections|
66
+ puts "Replicating"
67
+ begin
68
+ connections.send_message message
69
+ rescue Exception => e
70
+ puts "Exception #{e.message}"
71
+ errored.send_message message
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ puts "Before RubyProcFilter"
78
+ class RFlow::Components::RubyProcFilter < RFlow::Component
79
+ input_port :in
80
+ output_port :filtered
81
+ output_port :dropped
82
+ output_port :errored
83
+
84
+
85
+ def configure!(config)
86
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
87
+ end
88
+
89
+ def process_message(input_port, input_port_key, connection, message)
90
+ puts "Processing message in RubyProcFilter"
91
+ begin
92
+ if @filter_proc.call(message)
93
+ filtered.send_message message
94
+ else
95
+ dropped.send_message message
96
+ end
97
+ rescue Exception => e
98
+ puts "Attempting to send message to errored #{e.message}"
99
+ errored.send_message message
100
+ end
101
+ end
102
+ end
103
+
104
+ puts "Before FileOutput"
105
+ class RFlow::Components::FileOutput < RFlow::Component
106
+ attr_accessor :output_file_path, :output_file
107
+ input_port :in
108
+
109
+ def configure!(config)
110
+ self.output_file_path = config['output_file_path']
111
+ self.output_file = File.new output_file_path, 'w+'
112
+ end
113
+
114
+ #def run!; end
115
+
116
+ def process_message(input_port, input_port_key, connection, message)
117
+ puts "About to output to a file #{output_file_path}"
118
+ output_file.puts message.data.data_object.inspect
119
+ output_file.flush
120
+ end
121
+
122
+
123
+ def cleanup
124
+ output_file.close
125
+ end
126
+
127
+ end
128
+
129
+ # TODO: Ensure that all the following methods work as they are
130
+ # supposed to. This is the interface that I'm adhering to
131
+ class SimpleComponent < RFlow::Component
132
+ input_port :in
133
+ output_port :out
134
+
135
+ def configure!(config); end
136
+ def run!; end
137
+ def process_message(input_port, input_port_key, connection, message); end
138
+ def shutdown!; end
139
+ def cleanup!; end
140
+ end
141
+
142
+
@@ -0,0 +1,21 @@
1
+ RFlow::Configuration::RubyDSL.configure do |config|
2
+ # Configure the settings, which include paths for various files, log
3
+ # levels, and component specific stuffs
4
+ config.setting('rflow.log_level', 'INFO')
5
+ config.setting('rflow.application_directory_path', '.')
6
+
7
+ # Instantiate components
8
+ config.component 'http_server', 'HTTPServer', 'port' => 8080
9
+ config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'message.data.path == "/awesome"'
10
+ config.component 'replicate', 'RFlow::Components::Replicate'
11
+ config.component 'file_output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/http_crap'
12
+ config.component 'http_responder', 'HTTPResponder', 'response_code' => 200, 'content' => 'Hi, this teh awesome'
13
+
14
+ # config.connect 'http_server#request_port' => 'filter#in'
15
+ # config.connect 'filter#filtered' => 'replicate#in'
16
+ config.connect 'http_server#request_port' => 'replicate#in'
17
+ config.connect 'replicate#out[1]' => 'file_output#in'
18
+ config.connect 'replicate#out[2]' => 'http_responder#request'
19
+ config.connect 'http_responder#response' => 'http_server#response_port'
20
+ end
21
+
@@ -0,0 +1,262 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+
6
+ class RFlow::Components::Replicate < RFlow::Component
7
+ input_port :in
8
+ output_port :out
9
+ output_port :errored
10
+
11
+ def process_message(input_port, input_port_key, connection, message)
12
+ out.each do |connections|
13
+ begin
14
+ connections.send_message message
15
+ rescue Exception => e
16
+ RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
17
+ errored.send_message message
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+
24
+ class RFlow::Components::RubyProcFilter < RFlow::Component
25
+ input_port :in
26
+ output_port :filtered
27
+ output_port :dropped
28
+ output_port :errored
29
+
30
+
31
+ def configure!(config)
32
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
33
+ end
34
+
35
+ def process_message(input_port, input_port_key, connection, message)
36
+ RFlow.logger.debug "Filtering message"
37
+ begin
38
+ if @filter_proc.call(message)
39
+ filtered.send_message message
40
+ else
41
+ dropped.send_message message
42
+ end
43
+ rescue Exception => e
44
+ RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
45
+ errored.send_message message
46
+ end
47
+ end
48
+ end
49
+
50
+
51
+ class RFlow::Components::FileOutput < RFlow::Component
52
+ attr_accessor :output_file_path, :output_file
53
+ input_port :in
54
+
55
+ def configure!(config)
56
+ self.output_file_path = config['output_file_path']
57
+ self.output_file = File.new output_file_path, 'w+'
58
+ end
59
+
60
+ def process_message(input_port, input_port_key, connection, message)
61
+ output_file.puts message.data.data_object.inspect
62
+ output_file.flush
63
+ end
64
+
65
+ def cleanup!
66
+ output_file.close
67
+ end
68
+ end
69
+
70
+ # TODO: Ensure that all the following methods work as they are
71
+ # supposed to. This is the interface that I'm adhering to
72
+ class SimpleComponent < RFlow::Component
73
+ input_port :in
74
+ output_port :out
75
+
76
+ def configure!(config); end
77
+ def run!; end
78
+ def process_message(input_port, input_port_key, connection, message); end
79
+ def shutdown!; end
80
+ def cleanup!; end
81
+ end
82
+
83
+
84
+ http_request_schema =<<EOS
85
+ {
86
+ "type": "record",
87
+ "name": "HTTPRequest",
88
+ "namespace": "org.rflow",
89
+ "aliases": [],
90
+ "fields": [
91
+ {"name": "path", "type": "string"}
92
+ ]
93
+ }
94
+ EOS
95
+ RFlow::Configuration.add_available_data_type('HTTPRequest', 'avro', http_request_schema)
96
+
97
+ http_response_schema =<<EOS
98
+ {
99
+ "type": "record",
100
+ "name": "HTTPResponse",
101
+ "namespace": "org.rflow",
102
+ "aliases": [],
103
+ "fields": [
104
+ {"name": "status", "type": "int"},
105
+ {"name": "content", "type": "bytes"}
106
+ ]
107
+ }
108
+ EOS
109
+ RFlow::Configuration.add_available_data_type('HTTPResponse', 'avro', http_response_schema)
110
+
111
+
112
+ # Need to be careful when extending to not clobber data already in data_object
113
+ module HTTPRequestExtension
114
+ def self.extended(base_data)
115
+ base_data.data_object ||= {'path' => ''}
116
+ end
117
+
118
+ def path; data_object['path']; end
119
+ def path=(new_path); data_object['path'] = new_path; end
120
+ end
121
+ RFlow::Configuration.add_available_data_extension('HTTPRequest', HTTPRequestExtension)
122
+
123
+
124
+ # Need to be careful when extending to not clobber data already in data_object
125
+ module HTTPResponseExtension
126
+ def self.extended(base_data)
127
+ base_data.data_object ||= {'status' => 200, 'content' => ''}
128
+ end
129
+
130
+ def status; data_object['status']; end
131
+ def status=(new_status); data_object['status'] = new_status; end
132
+
133
+ def content; data_object['content']; end
134
+ def content=(new_content); data_object['content'] = new_content; end
135
+ end
136
+ RFlow::Configuration.add_available_data_extension('HTTPResponse', HTTPResponseExtension)
137
+
138
+
139
+ require 'eventmachine'
140
+ require 'evma_httpserver'
141
+
142
+ class HTTPServer < RFlow::Component
143
+ input_port :response_port
144
+ output_port :request_port
145
+
146
+ attr_accessor :port, :listen, :server_signature, :connections
147
+
148
+ def configure!(config)
149
+ @listen = config['listen'] ? config['listen'] : '127.0.0.1'
150
+ @port = config['port'] ? config['port'].to_i : 8000
151
+ @connections = Hash.new
152
+ end
153
+
154
+ def run!
155
+ @server_signature = EM.start_server(@listen, @port, Connection) do |conn|
156
+ conn.server = self
157
+ self.connections[conn.signature.to_s] = conn
158
+ end
159
+ end
160
+
161
+ # Getting all messages to response_port, which we need to filter for
162
+ # those that pertain to this component and have active connections.
163
+ # This is done by inspecting the provenance, specifically the
164
+ # context attribute that we stored originally
165
+ def process_message(input_port, input_port_key, connection, message)
166
+ RFlow.logger.debug "Received a message"
167
+ return unless message.data_type_name == 'HTTPResponse'
168
+
169
+ RFlow.logger.debug "Received a HTTPResponse message, determining if its mine"
170
+ my_events = message.provenance.find_all {|processing_event| processing_event.component_instance_uuid == instance_uuid}
171
+ RFlow.logger.debug "Found #{my_events.size} processing events from me"
172
+ # Attempt to send the data to each context match
173
+ my_events.each do |processing_event|
174
+ RFlow.logger.debug "Inspecting #{processing_event.context}"
175
+ ip, port, connection_signature = processing_event.context.split ':'
176
+ if connections[connection_signature]
177
+ RFlow.logger.debug "Found connection for #{processing_event.context}"
178
+ connections[connection_signature].send_http_response message
179
+ end
180
+ end
181
+ end
182
+
183
+ class Connection < EventMachine::Connection
184
+ include EventMachine::HttpServer
185
+
186
+ attr_accessor :server, :client_ip, :client_port
187
+
188
+ def post_init
189
+ @client_port, @client_ip = Socket.unpack_sockaddr_in(get_peername) rescue ["?", "?.?.?.?"]
190
+ RFlow.logger.debug "Connection from #{@client_ip}:#{@client_port}"
191
+ super
192
+ no_environment_strings
193
+ end
194
+
195
+
196
+ def receive_data(data)
197
+ RFlow.logger.debug "Received #{data.bytesize} data from #{client_ip}:#{client_port}"
198
+ super
199
+ end
200
+
201
+
202
+ def process_http_request
203
+ RFlow.logger.debug "Received a full HTTP request from #{client_ip}:#{client_port}"
204
+
205
+ processing_event = RFlow::Message::ProcessingEvent.new(server.instance_uuid, Time.now.utc)
206
+
207
+ request_message = RFlow::Message.new('HTTPRequest')
208
+ request_message.data.path = @http_request_uri
209
+
210
+ processing_event.context = "#{client_ip}:#{client_port}:#{signature}"
211
+ processing_event.completed_at = Time.now.utc
212
+ request_message.provenance << processing_event
213
+
214
+ server.request_port.send_message request_message
215
+ end
216
+
217
+
218
+ def send_http_response(response_message=nil)
219
+ RFlow.logger.debug "Sending an HTTP response to #{client_ip}:#{client_port}"
220
+ resp = EventMachine::DelegatedHttpResponse.new(self)
221
+
222
+ # Default values
223
+ resp.status = 200
224
+ resp.content = ""
225
+ resp.headers["Content-Type"] = "text/html; charset=UTF-8"
226
+ resp.headers["Server"] = "Apache/2.2.3 (CentOS)"
227
+
228
+ if response_message
229
+ resp.status = response_message.data.status
230
+ resp.content = response_message.data.content
231
+ end
232
+
233
+ resp.send_response
234
+ close_connection_after_writing
235
+ end
236
+
237
+
238
+ # Called when a connection is torn down for whatever reason.
239
+ # Remove this connection from the server's list
240
+ def unbind
241
+ RFlow.logger.debug "Connection to lost"
242
+ server.connections.delete(self.signature)
243
+ end
244
+ end
245
+ end
246
+
247
+ # As this component creates a new message type based on another, it
248
+ # copies over the provenance. It does not bother to add its own
249
+ # processing_event to the provenance, but it could/should
250
+ class HTTPResponder < RFlow::Component
251
+ input_port :request
252
+ output_port :response
253
+
254
+ def process_message(input_port, input_port_key, connection, message)
255
+ response_message = RFlow::Message.new('HTTPResponse')
256
+ response_message.data.status = 404
257
+ response_message.data.content = "CONTENT: #{message.data.path} was accessed"
258
+ response_message.provenance = message.provenance
259
+ response.send_message response_message
260
+ end
261
+ end
262
+