rflow 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,36 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Message",
4
+ "namespace": "org.rflow",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "data_type_name", "type": "string"},
8
+ {"name": "provenance",
9
+ "type": {
10
+ "type": "array",
11
+ "items": {
12
+ "type": "record",
13
+ "name": "ProcessingEvent",
14
+ "namespace": "org.rflow",
15
+ "aliases": [],
16
+ "fields": [
17
+ {"name": "component_instance_uuid", "type": "string"},
18
+ {"name": "started_at", "type": ["string", "null"]},
19
+ {"name": "completed_at", "type": ["string", "null"]},
20
+ {"name": "context", "type": ["bytes", "null"]}
21
+ ]
22
+ }
23
+ }
24
+ },
25
+ {"name": "data_serialization_type",
26
+ "type": {
27
+ "type": "enum",
28
+ "name": "DataSerializationType",
29
+ "symbols": ["avro", "xml"]
30
+ }
31
+ },
32
+ {"name": "data_schema", "type": "string"},
33
+ {"name": "data", "type": "bytes"}
34
+ ]
35
+ }
36
+
data/schema/raw.avsc ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Raw",
4
+ "namespace": "org.rflow.message.data",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "raw", "type": "bytes"}
8
+ ]
9
+ }
@@ -0,0 +1,61 @@
1
+ # Meat of the config file. Stuff above this should probably be in
2
+ # separate gems and/or files that are brought in at runtime.
3
+ RFlow::Configuration::RubyDSL.configure do |config|
4
+ # Configure the settings, which include paths for various files, log
5
+ # levels, and component specific stuffs
6
+ config.setting('rflow.log_level', 'DEBUG')
7
+ config.setting('rflow.application_directory_path', '../tmp')
8
+
9
+ config.setting('rflow.application_name', 'testapp')
10
+
11
+ # Add schemas to the list of available. Not convinced this is necessary
12
+ # config.schema('schemaname', 'schematype', 'schemadata')
13
+
14
+ # Instantiate components
15
+ # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
16
+ # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
17
+ # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
18
+ # config.component 'replicate', 'RFlow::Components::Replicate'
19
+ # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
20
+ # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
21
+
22
+ # Hook components together
23
+ # config.connect 'generate_ints#out' => 'filter#in'
24
+ # config.connect 'filter#filtered' => 'replicate#in'
25
+ # config.connect 'replicate#out[0]' => 'simple#in'
26
+ # config.connect 'replicate#out[one]' => 'complex#in'
27
+ # config.connect 'simple#out' => 'output#in'
28
+ # config.connect 'complex#out' => 'output#in'
29
+
30
+ # config.connect 'generate_ints1#out' => 'filter#in'
31
+ # config.connect 'generate_ints2#out' => 'filter#in'
32
+ # config.connect 'filter#filtered' => 'replicate#in'
33
+ # config.connect 'replicate#out[1]' => 'output1#in'
34
+ # config.connect 'replicate#out[2]' => 'output2#in'
35
+ # Some tests that should fail
36
+ # output should not have an 'out' ports
37
+ # config.connect 'output#out' => 'simple#in'
38
+
39
+ config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
40
+ config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
41
+ config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
42
+ config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
43
+ config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
44
+ config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
45
+
46
+ config.connect 'generate_ints#out' => 'output#in'
47
+ config.connect 'generate_ints#out' => 'output2#in'
48
+ config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
49
+ config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
50
+ config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
51
+
52
+
53
+
54
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
55
+ config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
56
+
57
+ config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
58
+
59
+ end
60
+
61
+
@@ -0,0 +1,141 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+ #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
+
7
+ # Example of creating and registering a data extension
8
+ module SimpleDataExtension
9
+ # Use this to default/verify the data in data_object
10
+ def self.extended(base_data)
11
+ base_data.data_object
12
+ end
13
+
14
+ def my_method; end
15
+ end
16
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
+
18
+
19
+
20
+ # Example of creating and registering a new schema
21
+ long_integer_schema = '{"type": "long"}'
22
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
+
24
+
25
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
+ output_port :out
27
+ output_port :even_odd_out
28
+
29
+ def configure!(config)
30
+ @start = config['start'].to_i
31
+ @finish = config['finish'].to_i
32
+ @step = config['step'] ? config['step'].to_i : 1
33
+ # If interval seconds is not given, it will default to 0
34
+ @interval_seconds = config['interval_seconds'].to_i
35
+ end
36
+
37
+ # Note that this uses the timer (sometimes with 0 interval) so as
38
+ # not to block the reactor
39
+ def run!
40
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
41
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
+ message.data.data_object = @start
43
+ out.send_message message
44
+ if @start % 2 == 0
45
+ even_odd_out['even'].send_message message
46
+ else
47
+ even_odd_out['odd'].send_message message
48
+ end
49
+
50
+ @start += @step
51
+ timer.cancel if @start > @finish
52
+ end
53
+ end
54
+
55
+ end
56
+
57
+ class RFlow::Components::Replicate < RFlow::Component
58
+ input_port :in
59
+ output_port :out
60
+ output_port :errored
61
+
62
+ def process_message(input_port, input_port_key, connection, message)
63
+ puts "Processing message in Replicate"
64
+ out.each do |connections|
65
+ puts "Replicating"
66
+ begin
67
+ connections.send_message message
68
+ rescue Exception => e
69
+ puts "Exception #{e.message}"
70
+ errored.send_message message
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ puts "Before RubyProcFilter"
77
+ class RFlow::Components::RubyProcFilter < RFlow::Component
78
+ input_port :in
79
+ output_port :filtered
80
+ output_port :dropped
81
+ output_port :errored
82
+
83
+
84
+ def configure!(config)
85
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
86
+ end
87
+
88
+ def process_message(input_port, input_port_key, connection, message)
89
+ puts "Processing message in RubyProcFilter"
90
+ begin
91
+ if @filter_proc.call(message)
92
+ filtered.send_message message
93
+ else
94
+ dropped.send_message message
95
+ end
96
+ rescue Exception => e
97
+ puts "Attempting to send message to errored #{e.message}"
98
+ errored.send_message message
99
+ end
100
+ end
101
+ end
102
+
103
+ puts "Before FileOutput"
104
+ class RFlow::Components::FileOutput < RFlow::Component
105
+ attr_accessor :output_file_path, :output_file
106
+ input_port :in
107
+
108
+ def configure!(config)
109
+ self.output_file_path = config['output_file_path']
110
+ self.output_file = File.new output_file_path, 'w+'
111
+ end
112
+
113
+ #def run!; end
114
+
115
+ def process_message(input_port, input_port_key, connection, message)
116
+ puts "About to output to a file #{output_file_path}"
117
+ output_file.puts message.data.data_object.inspect
118
+ output_file.flush
119
+ end
120
+
121
+
122
+ def cleanup
123
+ output_file.close
124
+ end
125
+
126
+ end
127
+
128
+ # TODO: Ensure that all the following methods work as they are
129
+ # supposed to. This is the interface that I'm adhering to
130
+ class SimpleComponent < RFlow::Component
131
+ input_port :in
132
+ output_port :out
133
+
134
+ def configure!(config); end
135
+ def run!; end
136
+ def process_message(input_port, input_port_key, connection, message); end
137
+ def shutdown!; end
138
+ def cleanup!; end
139
+ end
140
+
141
+
@@ -0,0 +1,73 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/configuration'
4
+
5
+
6
+ describe RFlow::Configuration do
7
+ before(:each) do
8
+ # RFlow::Configuration.available_data_types.clear
9
+ # RFlow::Configuration.available_data_extensions.clear
10
+ end
11
+
12
+
13
+ describe '.add_available_data_type' do
14
+ context 'if passed a data_serialization that is not avro or xml' do
15
+ it "should throw an exception" do
16
+ expect do
17
+ RFlow::Configuration.add_available_data_type('A', 'boom', 'schema')
18
+ end.to raise_error(ArgumentError)
19
+ end
20
+
21
+ it "should not update the available_data_types" do
22
+ num_types = RFlow::Configuration.available_data_types.size
23
+ RFlow::Configuration.add_available_data_type('A', 'boom', 'schema') rescue nil
24
+ RFlow::Configuration.available_data_types.should have(num_types).items
25
+ end
26
+ end
27
+ end
28
+
29
+ describe "Data Extensions" do
30
+
31
+ describe ".add_available_data_extension" do
32
+ context 'if passed a non-module data extension' do
33
+ it "should throw an exception" do
34
+ expect do
35
+ RFlow::Configuration.add_available_data_extension('data_type', 'not a Module')
36
+ end.to raise_error(ArgumentError)
37
+ end
38
+ end
39
+
40
+ context "if passed a valid Module as a data extension" do
41
+ it "should update the available_data_extensions" do
42
+ num_extensions = RFlow::Configuration.available_data_extensions['data_type'].size
43
+ expect do
44
+ RFlow::Configuration.add_available_data_extension('data_type', Module.new)
45
+ end.to_not raise_error
46
+ RFlow::Configuration.available_data_extensions['data_type'].should have(num_extensions+1).items
47
+ end
48
+ end
49
+ end
50
+
51
+ it "should perform simple 'prefix'-based inheritance for extensions" do
52
+ RFlow::Configuration.add_available_data_extension('A', A = Module.new)
53
+ RFlow::Configuration.add_available_data_extension('A::B', B = Module.new)
54
+ RFlow::Configuration.add_available_data_extension('A::B::C', C = Module.new)
55
+ RFlow::Configuration.add_available_data_extension('A::B::C::D', D = Module.new)
56
+
57
+ RFlow::Configuration.available_data_extensions['A'].should have(1).item
58
+ RFlow::Configuration.available_data_extensions['A'].should == [A]
59
+
60
+ RFlow::Configuration.available_data_extensions['A::B'].should have(2).item
61
+ RFlow::Configuration.available_data_extensions['A::B'].should == [A, B]
62
+
63
+ RFlow::Configuration.available_data_extensions['A::B::C'].should have(3).item
64
+ RFlow::Configuration.available_data_extensions['A::B::C'].should == [A, B, C]
65
+
66
+ RFlow::Configuration.available_data_extensions['A::B::C::D'].should have(4).item
67
+ RFlow::Configuration.available_data_extensions['A::B::C::D'].should == [A, B, C, D]
68
+
69
+ RFlow::Configuration.available_data_extensions['A::B::C::D::E'].should have(4).item
70
+ RFlow::Configuration.available_data_extensions['A::B::C::D::E'].should == [A, B, C, D]
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/components/raw'
4
+
5
+ describe 'RFlow::Message::Data::Raw Avro Schema' do
6
+ before(:each) do
7
+ @schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
8
+ end
9
+
10
+ it "should load the schema" do
11
+ @schema_string.should_not == nil
12
+ end
13
+
14
+ it "should encode and decode an object" do
15
+ raw = {'raw' => 'rawdata'}
16
+
17
+ expect {encode_avro(@schema_string, raw)}.to_not raise_error
18
+ avro_encoded_raw = encode_avro(@schema_string, raw)
19
+
20
+ expect {decode_avro(@schema_string, avro_encoded_raw)}.to_not raise_error
21
+ decoded_raw = decode_avro(@schema_string, avro_encoded_raw)
22
+
23
+ decoded_raw.should == raw
24
+ end
25
+
26
+ end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/message'
4
+
5
+ describe RFlow::Message::Data do
6
+ before(:all) do
7
+ @string = 'this is a string to be serialized'
8
+ @invalid_avro_schema_string = 'invalid schema'
9
+ @valid_avro_string_schema_string = '{"type": "string"}'
10
+ @avro_serialized_string = encode_avro(@valid_avro_string_schema_string, @string)
11
+ end
12
+
13
+ context "if created without a schema" do
14
+ it "should throw an exception" do
15
+ expect {RFlow::Message::Data.new()}.to raise_error(ArgumentError)
16
+ end
17
+ end
18
+
19
+ context "if created with an invalid schema for the serialization" do
20
+ it "should throw and exception" do
21
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string)}.to raise_error(ArgumentError)
22
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
23
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
24
+ end
25
+ end
26
+
27
+ context "if created with a valid avro schema and serialization" do
28
+ end
29
+
30
+ context "if created with a valid avro schema" do
31
+ it "should instantiate correctly" do
32
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
33
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
34
+ end
35
+
36
+ context "if created with a non-avro data serialization" do
37
+ it "should throw an exception" do
38
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'unknown')}.to raise_error(ArgumentError)
39
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :unknown)}.to raise_error(ArgumentError)
40
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'xml')}.to raise_error(ArgumentError)
41
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :xml)}.to raise_error(ArgumentError)
42
+ end
43
+ end
44
+
45
+ context "if created with an avro serialization" do
46
+ it "should instantiate correctly" do
47
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
48
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
49
+ end
50
+
51
+ context "if created with a serialized data object" do
52
+ it "should instantiate correctly" do
53
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro', @avro_serialized_string)}.to_not raise_error
54
+ message = RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro', @avro_serialized_string)
55
+ p message
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,182 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'digest/md5'
4
+
5
+ require 'rflow/message'
6
+
7
+ describe RFlow::Message do
8
+
9
+ context "if created with an unknown data type" do
10
+ it "should throw an exception" do
11
+ expect {RFlow::Message.new('non_existant_data_type')}.to raise_error(ArgumentError)
12
+ end
13
+ end
14
+
15
+
16
+ context "if created with a known data type" do
17
+ before(:all) do
18
+ @avro_string_schema_string = '{"type": "string"}'
19
+ RFlow::Configuration.add_available_data_type(:string_type, 'avro', @avro_string_schema_string)
20
+ end
21
+
22
+ it "should instantiate correctly" do
23
+ expect {RFlow::Message.new('string_type')}.to_not raise_error
24
+ end
25
+
26
+ context "if created with empty provenance" do
27
+ context "if created with an unknown data serialization" do
28
+ it "should throw an exception" do
29
+ expect {RFlow::Message.new('string_type', [], 'unknown')}.to raise_error(ArgumentError)
30
+ expect {RFlow::Message.new('string_type', [], :unknown)}.to raise_error(ArgumentError)
31
+ end
32
+ end
33
+
34
+ context "if created with a known data serialization" do
35
+ it "should instantiate correctly" do
36
+ expect {RFlow::Message.new('string_type', [], 'avro')}.to_not raise_error
37
+ expect {RFlow::Message.new('string_type', [], 'avro')}.to_not raise_error
38
+ end
39
+
40
+ context "if created with a mismatched schema" do
41
+ end
42
+
43
+ context "if created with a matched schema" do
44
+ end
45
+
46
+
47
+ context "if created with a nil schema" do
48
+ context "if created with a serialized data object" do
49
+ before(:all) do
50
+ @string = 'this is a string to be serialized'
51
+ @avro_serialized_string = encode_avro(@avro_string_schema_string, @string)
52
+ end
53
+
54
+ it "should instantiate correctly" do
55
+ expect {RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)}.to_not raise_error
56
+ message = RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ context "if created with invalid provenance" do
64
+ before(:all) do
65
+ @invalid_processing_event_hash = {'started_at' => 'bad time string'}
66
+ @invalid_provenance = [@invalid_processing_event_hash]
67
+ end
68
+
69
+ it "should throw an exception" do
70
+ expect {RFlow::Message.new('string_type', @invalid_provenance)}.to raise_error(ArgumentError)
71
+ end
72
+ end
73
+
74
+ context "if created with valid provenance" do
75
+ before(:all) do
76
+ @valid_xmlschema_time = '2001-01-01T01:01:01.000001Z'
77
+ @valid_processing_event_hash = {'component_instance_uuid' => 'uuid', 'started_at' => @valid_xmlschema_time}
78
+ @valid_processing_event = RFlow::Message::ProcessingEvent.new('uuid', @valid_xmlschema_time, @valid_xmlschema_time, 'context')
79
+ @valid_provenance = [
80
+ RFlow::Message::ProcessingEvent.new('uuid'),
81
+ @valid_processing_event_hash,
82
+ @valid_processing_event,
83
+ ]
84
+ @valid_provenance_hashes = [
85
+ {"component_instance_uuid"=>"uuid", "started_at"=>nil, "completed_at"=>nil, "context"=>nil},
86
+ {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>nil, "context"=>nil},
87
+ {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>@valid_xmlschema_time, "context"=>"context"},
88
+ ]
89
+ end
90
+
91
+ it "should instantiate correctly" do
92
+ p @valid_provenance
93
+ expect {RFlow::Message.new('string_type', @valid_provenance)}.to_not raise_error
94
+ end
95
+
96
+ it "should correctly set the provenance processing events" do
97
+ message = RFlow::Message.new('string_type', @valid_provenance)
98
+ message.provenance[1].component_instance_uuid.should == 'uuid'
99
+ message.provenance[1].started_at.should == Time.xmlschema(@valid_xmlschema_time)
100
+ message.provenance[1].completed_at.should == nil
101
+ message.provenance[1].context.should == nil
102
+ end
103
+
104
+ it "should to_hash its provenance correctly" do
105
+ message = RFlow::Message.new('string_type', @valid_provenance)
106
+ message.provenance.map(&:to_hash).should == @valid_provenance_hashes
107
+ end
108
+
109
+ end
110
+
111
+ context "if correctly created" do
112
+ it "should serialize and deserialized correctly to/from avro" do
113
+ message = RFlow::Message.new('string_type')
114
+ message.provenance << RFlow::Message::ProcessingEvent.new('UUID')
115
+ message.data.data_object = 'teh awesome'
116
+
117
+ processed_message = RFlow::Message.from_avro(message.to_avro)
118
+ message.data.to_avro.should == processed_message.data.to_avro
119
+ message.data.data_object.should == processed_message.data.data_object
120
+ end
121
+ end
122
+
123
+ context "if data extensions exist" do
124
+ it "should extend the data element with the extension" do
125
+ module ExtensionModule; def ext_method; end; end
126
+
127
+ message = RFlow::Message.new('string_type')
128
+ message.data.methods.should_not include(:ext_method)
129
+
130
+ RFlow::Configuration.add_available_data_extension('string_type', ExtensionModule)
131
+ message = RFlow::Message.new('string_type')
132
+ message.data.methods.should include(:ext_method)
133
+
134
+ end
135
+ end
136
+ end
137
+
138
+ it "should correctly handle large raw types" do
139
+ message = RFlow::Message.new('RFlow::Message::Data::Raw')
140
+ message.data.raw = Array.new(101) { rand(256) }.pack('c*')
141
+
142
+ message_avro = message.to_avro.force_encoding('BINARY')
143
+
144
+ processed_message = RFlow::Message.from_avro(message_avro)
145
+ processed_message_avro = processed_message.to_avro.force_encoding('BINARY')
146
+
147
+ @raw_schema = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
148
+
149
+ encode_avro(@raw_schema, message.data.data_object).should == message.data.to_avro
150
+ decode_avro(@raw_schema, message.data.to_avro).should == message.data.data_object
151
+
152
+ p message.data.raw
153
+ p message_avro
154
+ p message_avro.bytesize
155
+ p processed_message_avro
156
+ p processed_message_avro.bytesize
157
+
158
+ p message_avro.encoding
159
+ p message_avro.valid_encoding?
160
+
161
+ p processed_message_avro.encoding
162
+ p processed_message_avro.valid_encoding?
163
+
164
+ message_data_avro = message.data.to_avro.force_encoding('BINARY')
165
+ processed_message_data_avro = processed_message.data.to_avro.force_encoding('BINARY')
166
+
167
+ p message_data_avro.encoding
168
+ p message_data_avro.valid_encoding?
169
+ p message_data_avro
170
+ p processed_message_data_avro.encoding
171
+ p processed_message_data_avro.valid_encoding?
172
+ p processed_message_data_avro
173
+
174
+ Digest::MD5.hexdigest(message_avro).should == Digest::MD5.hexdigest(processed_message_avro)
175
+
176
+
177
+ message_data_avro.should == processed_message_data_avro
178
+ Digest::MD5.hexdigest(message_data_avro).should == Digest::MD5.hexdigest(processed_message_data_avro)
179
+ Digest::MD5.hexdigest(message.data.raw).should == Digest::MD5.hexdigest(processed_message.data.raw)
180
+ end
181
+
182
+ end