rflow 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,100 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow'
4
+
5
+ describe RFlow do
6
+ before(:each) do
7
+ @fixture_directory_path = File.join(File.dirname(__FILE__), 'fixtures')
8
+ end
9
+
10
+
11
+ describe 'logger' do
12
+ it "should initialize correctly" do
13
+ log_file_path = File.join(@temp_directory_path, 'logfile')
14
+ RFlow.initialize_logger log_file_path
15
+
16
+ File.exist?(log_file_path).should_not be_nil
17
+
18
+ RFlow.logger.error "TESTTESTTEST"
19
+ File.read(log_file_path).should match(/TESTTESTTEST/)
20
+
21
+ RFlow.close_log_file
22
+ end
23
+
24
+ it "should reopen correctly" do
25
+ log_file_path = File.join(@temp_directory_path, 'logfile')
26
+ moved_path = log_file_path + '.old'
27
+
28
+ RFlow.initialize_logger log_file_path
29
+ File.exist?(log_file_path).should be_true
30
+ File.exist?(moved_path).should be_false
31
+
32
+ File.rename log_file_path, moved_path
33
+
34
+ RFlow.reopen_log_file
35
+
36
+ RFlow.logger.error "TESTTESTTEST"
37
+ File.read(log_file_path).should match(/TESTTESTTEST/)
38
+ File.read(moved_path).should_not match(/TESTTESTTEST/)
39
+
40
+ RFlow.close_log_file
41
+ end
42
+
43
+ it "should toggle log level" do
44
+ end
45
+ end
46
+
47
+ describe '.run' do
48
+ before(:each) do
49
+ @run_directory_path = File.join(@temp_directory_path, 'run')
50
+ @log_directory_path = File.join(@temp_directory_path, 'log')
51
+ Dir.mkdir @run_directory_path
52
+ Dir.mkdir @log_directory_path
53
+ end
54
+
55
+ it "should startup and run correctly with non-trivial workflow" do
56
+ config_file_path = File.join(@fixture_directory_path, 'config_ints.rb')
57
+ extensions_path = File.join(@fixture_directory_path, 'extensions_ints.rb')
58
+ config_database_path = File.join(@temp_directory_path, 'config.sqlite')
59
+
60
+ # Load the new database with the fixtured config file
61
+ RFlow::Configuration::initialize_database(config_database_path, config_file_path)
62
+ File.exist?(config_database_path).should be_true
63
+
64
+ # Load the fixtured extensions
65
+ load extensions_path
66
+
67
+ # Startup RFlow in its own thread
68
+ rflow_thread = Thread.new do
69
+ RFlow.run config_database_path, false
70
+ end
71
+
72
+ # TODO: figure out a way to get rid of this sleep, as there
73
+ # should be a better way
74
+ sleep(5)
75
+
76
+ all_file_path = File.join(@temp_directory_path, 'out')
77
+ all2_file_path = File.join(@temp_directory_path, 'out2')
78
+ even_file_path = File.join(@temp_directory_path, 'out_even')
79
+ odd_file_path = File.join(@temp_directory_path, 'out_odd')
80
+ even_odd_file_path = File.join(@temp_directory_path, 'out_even_odd')
81
+ even_odd2_file_path = File.join(@temp_directory_path, 'out_even_odd2')
82
+
83
+ File.exist?(all_file_path).should be_true
84
+ File.exist?(all2_file_path).should be_true
85
+ File.exist?(even_file_path).should be_true
86
+ File.exist?(odd_file_path).should be_true
87
+ File.exist?(even_odd_file_path).should be_true
88
+ File.exist?(even_odd2_file_path).should be_true
89
+
90
+ File.readlines(all_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
91
+ File.readlines(all2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
92
+ File.readlines(even_file_path).map(&:to_i).should == [20, 22, 24, 26, 28, 30]
93
+ File.readlines(odd_file_path).map(&:to_i).should == [21, 23, 25, 27, 29]
94
+ File.readlines(even_odd_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
95
+ File.readlines(even_odd2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
96
+ end
97
+ end
98
+
99
+
100
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe 'RFlow::Message::Data::Raw Avro Schema' do
4
+ before(:each) do
5
+ @schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
6
+ end
7
+
8
+ it "should encode and decode an object" do
9
+ raw = {
10
+ 'raw' => Array.new(256) { rand(256) }.pack('c*')
11
+ }
12
+
13
+ expect {encode_avro(@schema_string, raw)}.to_not raise_error
14
+ avro_encoded_raw = encode_avro(@schema_string, raw)
15
+
16
+ expect {decode_avro(@schema_string, avro_encoded_raw)}.to_not raise_error
17
+ decoded_raw = decode_avro(@schema_string, avro_encoded_raw)
18
+
19
+ decoded_raw.should == raw
20
+
21
+ p decoded_raw['raw'].encoding
22
+ p raw['raw'].encoding
23
+
24
+ decoded_raw['raw'].should == raw['raw']
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,37 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'rflow'))
2
+
3
+ require 'fileutils'
4
+ require 'log4r'
5
+
6
+ RSpec.configure do |config|
7
+ config.before(:all) do
8
+ RFlow.logger = Log4r::Logger.new 'test'
9
+ RFlow.logger.add Log4r::StdoutOutputter.new('test_stdout', :formatter => RFlow::LOG_PATTERN_FORMATTER)
10
+ end
11
+
12
+ config.before(:each) do
13
+ @temp_directory_path = File.expand_path(File.join(File.dirname(__FILE__), 'tmp'))
14
+ Dir.mkdir @temp_directory_path
15
+ end
16
+
17
+ config.after(:each) do
18
+ FileUtils.rm_rf @temp_directory_path
19
+ end
20
+ end
21
+
22
+
23
+ def decode_avro(schema_string, serialized_object)
24
+ schema = Avro::Schema.parse(schema_string)
25
+ serialized_object.force_encoding 'BINARY'
26
+ sio = StringIO.new(serialized_object)
27
+ Avro::IO::DatumReader.new(schema, schema).read Avro::IO::BinaryDecoder.new(sio)
28
+ end
29
+
30
+ def encode_avro(schema_string, object)
31
+ encoded_string = ''
32
+ encoded_string.force_encoding 'BINARY'
33
+ schema = Avro::Schema.parse(schema_string)
34
+ sio = StringIO.new(encoded_string)
35
+ Avro::IO::DatumWriter.new(schema).write object, Avro::IO::BinaryEncoder.new(sio)
36
+ encoded_string
37
+ end
data/temp.rb ADDED
@@ -0,0 +1,295 @@
1
+ module RFlow
2
+ def self.run(config)
3
+ # Take in the config file
4
+ # Set a module-level config
5
+ # Set module-level attributes (logger)
6
+ # Create manager
7
+ # Start manager with parsed config elements
8
+ end
9
+
10
+ class Manager
11
+
12
+ def initialize(config)
13
+ end
14
+ # Find each component
15
+ # Instantiate (process management)
16
+ end
17
+ end
18
+
19
+ class SchemaRegistry
20
+ # maps data type names to schemas based on schema type
21
+ find_by_data_type_name
22
+ end
23
+
24
+ class MessageDataRegistry
25
+ def find(data_type_name)
26
+ # returns a data type class if registered, nil otherwise
27
+ end
28
+ end
29
+
30
+ class Message::Data
31
+ # contains the schema + data information
32
+ # subclasses can add extra functionality, otherwise will just have
33
+ # acces to standard messagedata stuffs (i.e. standard avro data types)
34
+ # delegates a lot to standard Avro types
35
+
36
+ # how does this get access to the registry at the class level?
37
+ class << self
38
+ attr_accessor :class_registry
39
+ attr_accessor :schema_registry
40
+ end
41
+
42
+ # Pointer to encapsulating message
43
+ attr_accessor :message
44
+
45
+ def initialize(data_type_name, serialized_data=nil, schema_name=nil, schema_type=nil, schema=nil, message=nil)
46
+ # schema_name ||= 'org.rflow.Messages.GenericStringMap'
47
+ # schema_type ||= 'avro'
48
+ # schema ||= 'default avro schema'
49
+
50
+ merge_options
51
+
52
+ # TODO: think about schema resolution and conflicts between passed
53
+ # data and schema registry
54
+ # Lookup schema based on data type name
55
+ registered_schema_name, registered_schema, registered_schema_type = self.class.schema_registry.find(data_type_name)
56
+ if registered_schema.nil? && schema
57
+ # If you were given a schema and didn't get one from the
58
+ # registry register the schema?
59
+ self.class.schema_registry.register(data_type_name, schema_name, schema_type, schema)
60
+ else
61
+
62
+ end
63
+
64
+ end
65
+
66
+ def self.create(data_type_name, data=nil, schema_name=nil, schema_type=nil, schema=nil)
67
+ # look for object in registry by data_type_name
68
+ # if object found, call new on that object
69
+ # otherwise, call new on the default object
70
+ message_class = self.class.data_class_registry.find(data_type_name)
71
+ if message_class.nil?
72
+ MessageData.new(data_type_name, data, schema_name, schema_type, schema)
73
+ else
74
+ message_class.create(data_type_name, data, schema_name, schema_type, schema)
75
+ end
76
+ end
77
+ end
78
+
79
+ module HTTPResponse
80
+ end
81
+
82
+ Message.new.extend(HTTPResponse)
83
+
84
+ class HTTPRequest < RFlow::Message::Data
85
+ # used to add methods, defaults, and more to data object, if required
86
+
87
+ # Put this in the registry
88
+ AVRO_SCHEMA_NAME = 'org.rflow.http_request'
89
+ DATA_TYPE_NAME = "HTTPRequest"
90
+
91
+ # All subclasses must have the same initialize signature. They need
92
+ # to figure out what to do when they get the extra parameters that
93
+ # might conflict with expectations. Subclasses are usually meant to
94
+ # enable extra functionality on a given data type, so as long as it
95
+ # operates properly, it might not care (duck typing)
96
+ def initialize(data_type_name, data, schema_name, schema_type, schema)
97
+ super(DATA_TYPE_NAME, data, AVRO_SCHEMA_NAME)
98
+ # do nice stuff with data here
99
+ end
100
+
101
+ def self.create(data_type_name, data, schema_name, schema_type, schema)
102
+ # figure out if you are being called with incompatible arguments,
103
+ # i.e. schema stuff
104
+ end
105
+
106
+ end
107
+
108
+ class Message
109
+ # contains all definitions about what to do for a message
110
+ # has a default Avro schema for a data type
111
+
112
+ class << self
113
+ attr_accessor :data_class_registry
114
+ end
115
+
116
+
117
+ # Should load all the data stuff, perhaps to top level method on object
118
+
119
+ attr_accessor :data_type_name, :provenance, :origination_context, :data_type_schema, :data
120
+
121
+ def initialize(data_type_name, provenance=nil, origination_context=nil, data_type_schema=nil, data=nil)
122
+ if data
123
+ # Potentially register this data_type_name to the schema
124
+ else
125
+ # Lookup MessageData type in the MessageDataRegistry
126
+ # if found and a class, create a specific MessageData object
127
+ # extend it with the module
128
+ # else, create generic MessageData object which will use
129
+ # the schema registry, under the hood
130
+ # if found and a module, extend object with found module
131
+
132
+ message_data_class = self.class.data_class_registry.find(data_type_name)
133
+ if message_data_class && message_data_class.class.is_a? Class
134
+ message_data = message_data_class.new
135
+ else
136
+ message_data = Message::Data.new
137
+ message_data.extend message_data_class if message_data_class.is_a? Module
138
+ end
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ class Port
145
+ def read_message
146
+ parts = read_all_parts
147
+ parts.assemble
148
+ data_type_name = read_message_part
149
+ provenance = read_message_part
150
+ origination_context = read_message_part
151
+ data_type_schema = read_message_part
152
+ data = read_message_part
153
+
154
+ message = Message.new(data_type_name, provenance, origination_context, data_type_schema, data)
155
+
156
+ message
157
+ end
158
+ end
159
+
160
+ class PortCollection
161
+ end
162
+
163
+ class Logger
164
+ end
165
+
166
+ class Component
167
+ def self.input_port(port_def)
168
+ @@input_ports ||= PortCollection.new
169
+ if port_def.is_a? Array
170
+ port_name = port_def.first.to_sym
171
+ port_incidence = :array
172
+ else
173
+ port_name = port_def
174
+ port_incidence = :single
175
+ end
176
+ @@input_ports[port_name] = InputPort.new port_name, port_incidence
177
+ end
178
+
179
+ def self.output_port
180
+ # same as input port with different stuffs
181
+ end
182
+
183
+ STATES = [:initialized, :started, :configured, :running, :stopping, :stopped]
184
+ attr_accessor :state
185
+ attr_accessor :input_ports
186
+ attr_accessor :output_ports
187
+
188
+ attr_accessor :uuid
189
+ attr_accessor :name
190
+
191
+ CONFIG_DEFAULTS = {
192
+ :logger,
193
+ :working_directory_path,
194
+ }
195
+
196
+ def initialize(config, run_directory)
197
+ # configure component
198
+ config = {
199
+ }
200
+
201
+ # TODO: where is the management bus listener configured/started
202
+ end
203
+
204
+ def run
205
+ input_ports.ready do |port|
206
+ message = port.read_message
207
+ process_input(port, message)
208
+ # read from the port and think about things
209
+ out.send('stuff')
210
+ another_out.send('more stuff')
211
+ end
212
+ # listen to
213
+ end
214
+
215
+ def process_message(input_port, message)
216
+
217
+ end
218
+
219
+ def receive_message(port)
220
+ port.receive
221
+ end
222
+
223
+ def send_message(port, message)
224
+ port.send(message)
225
+ end
226
+
227
+ end
228
+
229
+ class HTTPServer < RFlow::Component
230
+ input_port :responses
231
+ output_port :requests
232
+
233
+ input_types "HTTP::Response"
234
+ output_types "HTTP::Request"
235
+
236
+
237
+ end
238
+
239
+ class PassThrough < RFlow::Component
240
+ input_port [:in]
241
+ input_port :another_in
242
+ output_port :out
243
+ output_port :another_out
244
+
245
+ output_types
246
+
247
+ def initialize(config, run_directory)
248
+ # This will initialize the ports
249
+ super
250
+ # Do stuff to initialize component. Don't assume singleton
251
+ end
252
+
253
+
254
+ def process_message(input_port, data)
255
+ out.send(message)
256
+ another_out.send(message)
257
+
258
+
259
+ end
260
+
261
+ def process_data(input_port
262
+
263
+ end
264
+
265
+
266
+ class Transform < RFlow::Component
267
+
268
+ end
269
+
270
+ # Plugins:
271
+
272
+ # MessageData subclass: rflow-data-http_request
273
+ # lib/rflow-data-http_request.rb
274
+ require 'rflow'
275
+ require 'lib/data_name'
276
+ RFlow.available_data_types << data_name_object
277
+
278
+
279
+ # Component: rflow-component-http_server
280
+ # lib/rflow-component-http_server
281
+ require 'rflow'
282
+ require 'lib/component_name'
283
+ RFlow.available_components << component_class
284
+
285
+
286
+
287
+
288
+ # lib/component_name.rb ->
289
+ # data_type_name => schema + registration: just register in the application
290
+
291
+
292
+
293
+ # Server -> (HttpRequest -> Translate -> HTTPResponse) -> Server
294
+
295
+