rflow 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,49 @@
1
+ # Meat of the config file. Stuff above this should probably be in
2
+ # separate gems and/or files that are brought in at runtime.
3
+ RFlow::Configuration::RubyDSL.configure do |config|
4
+ # Configure the settings, which include paths for various files, log
5
+ # levels, and component specific stuffs
6
+ config.setting('rflow.log_level', 'DEBUG')
7
+ config.setting('rflow.application_directory_path', '.')
8
+
9
+ # Add schemas to the list of available. Not convinced this is necessary
10
+ # config.schema('schemaname', 'schematype', 'schemadata')
11
+
12
+ # Instantiate components
13
+ # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
14
+ # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
15
+ # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
+ # config.component 'replicate', 'RFlow::Components::Replicate'
17
+ # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
18
+ # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
19
+
20
+ # Hook components together
21
+ # config.connect 'generate_ints#out' => 'filter#in'
22
+ # config.connect 'filter#filtered' => 'replicate#in'
23
+ # config.connect 'replicate#out[0]' => 'simple#in'
24
+ # config.connect 'replicate#out[one]' => 'complex#in'
25
+ # config.connect 'simple#out' => 'output#in'
26
+ # config.connect 'complex#out' => 'output#in'
27
+
28
+ # config.connect 'generate_ints1#out' => 'filter#in'
29
+ # config.connect 'generate_ints2#out' => 'filter#in'
30
+ # config.connect 'filter#filtered' => 'replicate#in'
31
+ # config.connect 'replicate#out[1]' => 'output1#in'
32
+ # config.connect 'replicate#out[2]' => 'output2#in'
33
+ # Some tests that should fail
34
+ # output should not have an 'out' ports
35
+ # config.connect 'output#out' => 'simple#in'
36
+
37
+ config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
38
+ config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap'
39
+ config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_even'
40
+ config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap_odd'
41
+
42
+ config.connect 'generate_ints#even_odd_out' => 'output#in'
43
+ config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
44
+ config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
45
+
46
+
47
+ end
48
+
49
+
@@ -0,0 +1,142 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+ #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
+
7
+ # Example of creating and registering a data extension
8
+ module SimpleDataExtension
9
+ # Use this to default/verify the data in data_object
10
+ def self.extended(base_data)
11
+ base_data.data_object
12
+ end
13
+
14
+ def my_method; end
15
+ end
16
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
+
18
+
19
+
20
+ # Example of creating and registering a new schema
21
+ long_integer_schema = '{"type": "long"}'
22
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
+
24
+
25
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
+ output_port :out
27
+ output_port :even_odd_out
28
+
29
+ def configure!(config)
30
+ @start = config['start'].to_i
31
+ @finish = config['finish'].to_i
32
+ @step = config['step'] ? config['step'].to_i : 1
33
+ # If interval seconds is not given, it will default to 0
34
+ @interval_seconds = config['interval_seconds'].to_i
35
+ end
36
+
37
+ # Note that this uses the timer (sometimes with 0 interval) so as
38
+ # not to block the reactor
39
+ def run!
40
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
41
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
+ message.data.data_object = @start
43
+ out.send_message message
44
+ if @start % 2 == 0
45
+ even_odd_out['even'].send_message message
46
+ else
47
+ even_odd_out['odd'].send_message message
48
+ end
49
+ even_odd_out.send_message message
50
+
51
+ @start += @step
52
+ timer.cancel if @start > @finish
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ class RFlow::Components::Replicate < RFlow::Component
59
+ input_port :in
60
+ output_port :out
61
+ output_port :errored
62
+
63
+ def process_message(input_port, input_port_key, connection, message)
64
+ puts "Processing message in Replicate"
65
+ out.each do |connections|
66
+ puts "Replicating"
67
+ begin
68
+ connections.send_message message
69
+ rescue Exception => e
70
+ puts "Exception #{e.message}"
71
+ errored.send_message message
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ puts "Before RubyProcFilter"
78
+ class RFlow::Components::RubyProcFilter < RFlow::Component
79
+ input_port :in
80
+ output_port :filtered
81
+ output_port :dropped
82
+ output_port :errored
83
+
84
+
85
+ def configure!(config)
86
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
87
+ end
88
+
89
+ def process_message(input_port, input_port_key, connection, message)
90
+ puts "Processing message in RubyProcFilter"
91
+ begin
92
+ if @filter_proc.call(message)
93
+ filtered.send_message message
94
+ else
95
+ dropped.send_message message
96
+ end
97
+ rescue Exception => e
98
+ puts "Attempting to send message to errored #{e.message}"
99
+ errored.send_message message
100
+ end
101
+ end
102
+ end
103
+
104
+ puts "Before FileOutput"
105
+ class RFlow::Components::FileOutput < RFlow::Component
106
+ attr_accessor :output_file_path, :output_file
107
+ input_port :in
108
+
109
+ def configure!(config)
110
+ self.output_file_path = config['output_file_path']
111
+ self.output_file = File.new output_file_path, 'w+'
112
+ end
113
+
114
+ #def run!; end
115
+
116
+ def process_message(input_port, input_port_key, connection, message)
117
+ puts "About to output to a file #{output_file_path}"
118
+ output_file.puts message.data.data_object.inspect
119
+ output_file.flush
120
+ end
121
+
122
+
123
+ def cleanup
124
+ output_file.close
125
+ end
126
+
127
+ end
128
+
129
+ # TODO: Ensure that all the following methods work as they are
130
+ # supposed to. This is the interface that I'm adhering to
131
+ class SimpleComponent < RFlow::Component
132
+ input_port :in
133
+ output_port :out
134
+
135
+ def configure!(config); end
136
+ def run!; end
137
+ def process_message(input_port, input_port_key, connection, message); end
138
+ def shutdown!; end
139
+ def cleanup!; end
140
+ end
141
+
142
+
@@ -0,0 +1,21 @@
1
+ RFlow::Configuration::RubyDSL.configure do |config|
2
+ # Configure the settings, which include paths for various files, log
3
+ # levels, and component specific stuffs
4
+ config.setting('rflow.log_level', 'INFO')
5
+ config.setting('rflow.application_directory_path', '.')
6
+
7
+ # Instantiate components
8
+ config.component 'http_server', 'HTTPServer', 'port' => 8080
9
+ config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'message.data.path == "/awesome"'
10
+ config.component 'replicate', 'RFlow::Components::Replicate'
11
+ config.component 'file_output', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/http_crap'
12
+ config.component 'http_responder', 'HTTPResponder', 'response_code' => 200, 'content' => 'Hi, this teh awesome'
13
+
14
+ # config.connect 'http_server#request_port' => 'filter#in'
15
+ # config.connect 'filter#filtered' => 'replicate#in'
16
+ config.connect 'http_server#request_port' => 'replicate#in'
17
+ config.connect 'replicate#out[1]' => 'file_output#in'
18
+ config.connect 'replicate#out[2]' => 'http_responder#request'
19
+ config.connect 'http_responder#response' => 'http_server#response_port'
20
+ end
21
+
@@ -0,0 +1,262 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+
6
+ class RFlow::Components::Replicate < RFlow::Component
7
+ input_port :in
8
+ output_port :out
9
+ output_port :errored
10
+
11
+ def process_message(input_port, input_port_key, connection, message)
12
+ out.each do |connections|
13
+ begin
14
+ connections.send_message message
15
+ rescue Exception => e
16
+ RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
17
+ errored.send_message message
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+
24
+ class RFlow::Components::RubyProcFilter < RFlow::Component
25
+ input_port :in
26
+ output_port :filtered
27
+ output_port :dropped
28
+ output_port :errored
29
+
30
+
31
+ def configure!(config)
32
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
33
+ end
34
+
35
+ def process_message(input_port, input_port_key, connection, message)
36
+ RFlow.logger.debug "Filtering message"
37
+ begin
38
+ if @filter_proc.call(message)
39
+ filtered.send_message message
40
+ else
41
+ dropped.send_message message
42
+ end
43
+ rescue Exception => e
44
+ RFlow.logger.debug "#{self.class} Message caused exception: #{e.class}: #{e.message}: #{e.backtrace}"
45
+ errored.send_message message
46
+ end
47
+ end
48
+ end
49
+
50
+
51
+ class RFlow::Components::FileOutput < RFlow::Component
52
+ attr_accessor :output_file_path, :output_file
53
+ input_port :in
54
+
55
+ def configure!(config)
56
+ self.output_file_path = config['output_file_path']
57
+ self.output_file = File.new output_file_path, 'w+'
58
+ end
59
+
60
+ def process_message(input_port, input_port_key, connection, message)
61
+ output_file.puts message.data.data_object.inspect
62
+ output_file.flush
63
+ end
64
+
65
+ def cleanup!
66
+ output_file.close
67
+ end
68
+ end
69
+
70
+ # TODO: Ensure that all the following methods work as they are
71
+ # supposed to. This is the interface that I'm adhering to
72
+ class SimpleComponent < RFlow::Component
73
+ input_port :in
74
+ output_port :out
75
+
76
+ def configure!(config); end
77
+ def run!; end
78
+ def process_message(input_port, input_port_key, connection, message); end
79
+ def shutdown!; end
80
+ def cleanup!; end
81
+ end
82
+
83
+
84
+ http_request_schema =<<EOS
85
+ {
86
+ "type": "record",
87
+ "name": "HTTPRequest",
88
+ "namespace": "org.rflow",
89
+ "aliases": [],
90
+ "fields": [
91
+ {"name": "path", "type": "string"}
92
+ ]
93
+ }
94
+ EOS
95
+ RFlow::Configuration.add_available_data_type('HTTPRequest', 'avro', http_request_schema)
96
+
97
+ http_response_schema =<<EOS
98
+ {
99
+ "type": "record",
100
+ "name": "HTTPResponse",
101
+ "namespace": "org.rflow",
102
+ "aliases": [],
103
+ "fields": [
104
+ {"name": "status", "type": "int"},
105
+ {"name": "content", "type": "bytes"}
106
+ ]
107
+ }
108
+ EOS
109
+ RFlow::Configuration.add_available_data_type('HTTPResponse', 'avro', http_response_schema)
110
+
111
+
112
+ # Need to be careful when extending to not clobber data already in data_object
113
+ module HTTPRequestExtension
114
+ def self.extended(base_data)
115
+ base_data.data_object ||= {'path' => ''}
116
+ end
117
+
118
+ def path; data_object['path']; end
119
+ def path=(new_path); data_object['path'] = new_path; end
120
+ end
121
+ RFlow::Configuration.add_available_data_extension('HTTPRequest', HTTPRequestExtension)
122
+
123
+
124
+ # Need to be careful when extending to not clobber data already in data_object
125
+ module HTTPResponseExtension
126
+ def self.extended(base_data)
127
+ base_data.data_object ||= {'status' => 200, 'content' => ''}
128
+ end
129
+
130
+ def status; data_object['status']; end
131
+ def status=(new_status); data_object['status'] = new_status; end
132
+
133
+ def content; data_object['content']; end
134
+ def content=(new_content); data_object['content'] = new_content; end
135
+ end
136
+ RFlow::Configuration.add_available_data_extension('HTTPResponse', HTTPResponseExtension)
137
+
138
+
139
+ require 'eventmachine'
140
+ require 'evma_httpserver'
141
+
142
+ class HTTPServer < RFlow::Component
143
+ input_port :response_port
144
+ output_port :request_port
145
+
146
+ attr_accessor :port, :listen, :server_signature, :connections
147
+
148
+ def configure!(config)
149
+ @listen = config['listen'] ? config['listen'] : '127.0.0.1'
150
+ @port = config['port'] ? config['port'].to_i : 8000
151
+ @connections = Hash.new
152
+ end
153
+
154
+ def run!
155
+ @server_signature = EM.start_server(@listen, @port, Connection) do |conn|
156
+ conn.server = self
157
+ self.connections[conn.signature.to_s] = conn
158
+ end
159
+ end
160
+
161
+ # Getting all messages to response_port, which we need to filter for
162
+ # those that pertain to this component and have active connections.
163
+ # This is done by inspecting the provenance, specifically the
164
+ # context attribute that we stored originally
165
+ def process_message(input_port, input_port_key, connection, message)
166
+ RFlow.logger.debug "Received a message"
167
+ return unless message.data_type_name == 'HTTPResponse'
168
+
169
+ RFlow.logger.debug "Received a HTTPResponse message, determining if its mine"
170
+ my_events = message.provenance.find_all {|processing_event| processing_event.component_instance_uuid == instance_uuid}
171
+ RFlow.logger.debug "Found #{my_events.size} processing events from me"
172
+ # Attempt to send the data to each context match
173
+ my_events.each do |processing_event|
174
+ RFlow.logger.debug "Inspecting #{processing_event.context}"
175
+ ip, port, connection_signature = processing_event.context.split ':'
176
+ if connections[connection_signature]
177
+ RFlow.logger.debug "Found connection for #{processing_event.context}"
178
+ connections[connection_signature].send_http_response message
179
+ end
180
+ end
181
+ end
182
+
183
+ class Connection < EventMachine::Connection
184
+ include EventMachine::HttpServer
185
+
186
+ attr_accessor :server, :client_ip, :client_port
187
+
188
+ def post_init
189
+ @client_port, @client_ip = Socket.unpack_sockaddr_in(get_peername) rescue ["?", "?.?.?.?"]
190
+ RFlow.logger.debug "Connection from #{@client_ip}:#{@client_port}"
191
+ super
192
+ no_environment_strings
193
+ end
194
+
195
+
196
+ def receive_data(data)
197
+ RFlow.logger.debug "Received #{data.bytesize} data from #{client_ip}:#{client_port}"
198
+ super
199
+ end
200
+
201
+
202
+ def process_http_request
203
+ RFlow.logger.debug "Received a full HTTP request from #{client_ip}:#{client_port}"
204
+
205
+ processing_event = RFlow::Message::ProcessingEvent.new(server.instance_uuid, Time.now.utc)
206
+
207
+ request_message = RFlow::Message.new('HTTPRequest')
208
+ request_message.data.path = @http_request_uri
209
+
210
+ processing_event.context = "#{client_ip}:#{client_port}:#{signature}"
211
+ processing_event.completed_at = Time.now.utc
212
+ request_message.provenance << processing_event
213
+
214
+ server.request_port.send_message request_message
215
+ end
216
+
217
+
218
+ def send_http_response(response_message=nil)
219
+ RFlow.logger.debug "Sending an HTTP response to #{client_ip}:#{client_port}"
220
+ resp = EventMachine::DelegatedHttpResponse.new(self)
221
+
222
+ # Default values
223
+ resp.status = 200
224
+ resp.content = ""
225
+ resp.headers["Content-Type"] = "text/html; charset=UTF-8"
226
+ resp.headers["Server"] = "Apache/2.2.3 (CentOS)"
227
+
228
+ if response_message
229
+ resp.status = response_message.data.status
230
+ resp.content = response_message.data.content
231
+ end
232
+
233
+ resp.send_response
234
+ close_connection_after_writing
235
+ end
236
+
237
+
238
+ # Called when a connection is torn down for whatever reason.
239
+ # Remove this connection from the server's list
240
+ def unbind
241
+ RFlow.logger.debug "Connection to lost"
242
+ server.connections.delete(self.signature)
243
+ end
244
+ end
245
+ end
246
+
247
+ # As this component creates a new message type based on another, it
248
+ # copies over the provenance. It does not bother to add its own
249
+ # processing_event to the provenance, but it could/should
250
+ class HTTPResponder < RFlow::Component
251
+ input_port :request
252
+ output_port :response
253
+
254
+ def process_message(input_port, input_port_key, connection, message)
255
+ response_message = RFlow::Message.new('HTTPResponse')
256
+ response_message.data.status = 404
257
+ response_message.data.content = "CONTENT: #{message.data.path} was accessed"
258
+ response_message.provenance = message.provenance
259
+ response.send_message response_message
260
+ end
261
+ end
262
+