rflow 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,36 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Message",
4
+ "namespace": "org.rflow",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "data_type_name", "type": "string"},
8
+ {"name": "provenance",
9
+ "type": {
10
+ "type": "array",
11
+ "items": {
12
+ "type": "record",
13
+ "name": "ProcessingEvent",
14
+ "namespace": "org.rflow",
15
+ "aliases": [],
16
+ "fields": [
17
+ {"name": "component_instance_uuid", "type": "string"},
18
+ {"name": "started_at", "type": ["string", "null"]},
19
+ {"name": "completed_at", "type": ["string", "null"]},
20
+ {"name": "context", "type": ["bytes", "null"]}
21
+ ]
22
+ }
23
+ }
24
+ },
25
+ {"name": "data_serialization_type",
26
+ "type": {
27
+ "type": "enum",
28
+ "name": "DataSerializationType",
29
+ "symbols": ["avro", "xml"]
30
+ }
31
+ },
32
+ {"name": "data_schema", "type": "string"},
33
+ {"name": "data", "type": "bytes"}
34
+ ]
35
+ }
36
+
data/schema/raw.avsc ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "type": "record",
3
+ "name": "Raw",
4
+ "namespace": "org.rflow.message.data",
5
+ "aliases": [],
6
+ "fields": [
7
+ {"name": "raw", "type": "bytes"}
8
+ ]
9
+ }
@@ -0,0 +1,61 @@
1
+ # Meat of the config file. Stuff above this should probably be in
2
+ # separate gems and/or files that are brought in at runtime.
3
+ RFlow::Configuration::RubyDSL.configure do |config|
4
+ # Configure the settings, which include paths for various files, log
5
+ # levels, and component specific stuffs
6
+ config.setting('rflow.log_level', 'DEBUG')
7
+ config.setting('rflow.application_directory_path', '../tmp')
8
+
9
+ config.setting('rflow.application_name', 'testapp')
10
+
11
+ # Add schemas to the list of available. Not convinced this is necessary
12
+ # config.schema('schemaname', 'schematype', 'schemadata')
13
+
14
+ # Instantiate components
15
+ # config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3, 'interval_seconds' => 1
16
+ # config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
17
+ # config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
18
+ # config.component 'replicate', 'RFlow::Components::Replicate'
19
+ # config.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap1'
20
+ # config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '/tmp/crap2'
21
+
22
+ # Hook components together
23
+ # config.connect 'generate_ints#out' => 'filter#in'
24
+ # config.connect 'filter#filtered' => 'replicate#in'
25
+ # config.connect 'replicate#out[0]' => 'simple#in'
26
+ # config.connect 'replicate#out[one]' => 'complex#in'
27
+ # config.connect 'simple#out' => 'output#in'
28
+ # config.connect 'complex#out' => 'output#in'
29
+
30
+ # config.connect 'generate_ints1#out' => 'filter#in'
31
+ # config.connect 'generate_ints2#out' => 'filter#in'
32
+ # config.connect 'filter#filtered' => 'replicate#in'
33
+ # config.connect 'replicate#out[1]' => 'output1#in'
34
+ # config.connect 'replicate#out[2]' => 'output2#in'
35
+ # Some tests that should fail
36
+ # output should not have an 'out' ports
37
+ # config.connect 'output#out' => 'simple#in'
38
+
39
+ config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
40
+ config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
41
+ config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
42
+ config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
43
+ config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
44
+ config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
45
+
46
+ config.connect 'generate_ints#out' => 'output#in'
47
+ config.connect 'generate_ints#out' => 'output2#in'
48
+ config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
49
+ config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
50
+ config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
51
+
52
+
53
+
54
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
55
+ config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
56
+
57
+ config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
58
+
59
+ end
60
+
61
+
@@ -0,0 +1,141 @@
1
+ # This will/should bring in available components and their schemas
2
+ require 'rflow/components'
3
+ require 'rflow/message'
4
+
5
+ #RFlow::Configuration.add_available_data_schema RFlow::Message::Data::AvroSchema.new('Integer', long_integer_schema)
6
+
7
+ # Example of creating and registering a data extension
8
+ module SimpleDataExtension
9
+ # Use this to default/verify the data in data_object
10
+ def self.extended(base_data)
11
+ base_data.data_object
12
+ end
13
+
14
+ def my_method; end
15
+ end
16
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
17
+
18
+
19
+
20
+ # Example of creating and registering a new schema
21
+ long_integer_schema = '{"type": "long"}'
22
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
23
+
24
+
25
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
26
+ output_port :out
27
+ output_port :even_odd_out
28
+
29
+ def configure!(config)
30
+ @start = config['start'].to_i
31
+ @finish = config['finish'].to_i
32
+ @step = config['step'] ? config['step'].to_i : 1
33
+ # If interval seconds is not given, it will default to 0
34
+ @interval_seconds = config['interval_seconds'].to_i
35
+ end
36
+
37
+ # Note that this uses the timer (sometimes with 0 interval) so as
38
+ # not to block the reactor
39
+ def run!
40
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
41
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
42
+ message.data.data_object = @start
43
+ out.send_message message
44
+ if @start % 2 == 0
45
+ even_odd_out['even'].send_message message
46
+ else
47
+ even_odd_out['odd'].send_message message
48
+ end
49
+
50
+ @start += @step
51
+ timer.cancel if @start > @finish
52
+ end
53
+ end
54
+
55
+ end
56
+
57
+ class RFlow::Components::Replicate < RFlow::Component
58
+ input_port :in
59
+ output_port :out
60
+ output_port :errored
61
+
62
+ def process_message(input_port, input_port_key, connection, message)
63
+ puts "Processing message in Replicate"
64
+ out.each do |connections|
65
+ puts "Replicating"
66
+ begin
67
+ connections.send_message message
68
+ rescue Exception => e
69
+ puts "Exception #{e.message}"
70
+ errored.send_message message
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ puts "Before RubyProcFilter"
77
+ class RFlow::Components::RubyProcFilter < RFlow::Component
78
+ input_port :in
79
+ output_port :filtered
80
+ output_port :dropped
81
+ output_port :errored
82
+
83
+
84
+ def configure!(config)
85
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
86
+ end
87
+
88
+ def process_message(input_port, input_port_key, connection, message)
89
+ puts "Processing message in RubyProcFilter"
90
+ begin
91
+ if @filter_proc.call(message)
92
+ filtered.send_message message
93
+ else
94
+ dropped.send_message message
95
+ end
96
+ rescue Exception => e
97
+ puts "Attempting to send message to errored #{e.message}"
98
+ errored.send_message message
99
+ end
100
+ end
101
+ end
102
+
103
+ puts "Before FileOutput"
104
+ class RFlow::Components::FileOutput < RFlow::Component
105
+ attr_accessor :output_file_path, :output_file
106
+ input_port :in
107
+
108
+ def configure!(config)
109
+ self.output_file_path = config['output_file_path']
110
+ self.output_file = File.new output_file_path, 'w+'
111
+ end
112
+
113
+ #def run!; end
114
+
115
+ def process_message(input_port, input_port_key, connection, message)
116
+ puts "About to output to a file #{output_file_path}"
117
+ output_file.puts message.data.data_object.inspect
118
+ output_file.flush
119
+ end
120
+
121
+
122
+ def cleanup
123
+ output_file.close
124
+ end
125
+
126
+ end
127
+
128
+ # TODO: Ensure that all the following methods work as they are
129
+ # supposed to. This is the interface that I'm adhering to
130
+ class SimpleComponent < RFlow::Component
131
+ input_port :in
132
+ output_port :out
133
+
134
+ def configure!(config); end
135
+ def run!; end
136
+ def process_message(input_port, input_port_key, connection, message); end
137
+ def shutdown!; end
138
+ def cleanup!; end
139
+ end
140
+
141
+
@@ -0,0 +1,73 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/configuration'
4
+
5
+
6
+ describe RFlow::Configuration do
7
+ before(:each) do
8
+ # RFlow::Configuration.available_data_types.clear
9
+ # RFlow::Configuration.available_data_extensions.clear
10
+ end
11
+
12
+
13
+ describe '.add_available_data_type' do
14
+ context 'if passed a data_serialization that is not avro or xml' do
15
+ it "should throw an exception" do
16
+ expect do
17
+ RFlow::Configuration.add_available_data_type('A', 'boom', 'schema')
18
+ end.to raise_error(ArgumentError)
19
+ end
20
+
21
+ it "should not update the available_data_types" do
22
+ num_types = RFlow::Configuration.available_data_types.size
23
+ RFlow::Configuration.add_available_data_type('A', 'boom', 'schema') rescue nil
24
+ RFlow::Configuration.available_data_types.should have(num_types).items
25
+ end
26
+ end
27
+ end
28
+
29
+ describe "Data Extensions" do
30
+
31
+ describe ".add_available_data_extension" do
32
+ context 'if passed a non-module data extension' do
33
+ it "should throw an exception" do
34
+ expect do
35
+ RFlow::Configuration.add_available_data_extension('data_type', 'not a Module')
36
+ end.to raise_error(ArgumentError)
37
+ end
38
+ end
39
+
40
+ context "if passed a valid Module as a data extension" do
41
+ it "should update the available_data_extensions" do
42
+ num_extensions = RFlow::Configuration.available_data_extensions['data_type'].size
43
+ expect do
44
+ RFlow::Configuration.add_available_data_extension('data_type', Module.new)
45
+ end.to_not raise_error
46
+ RFlow::Configuration.available_data_extensions['data_type'].should have(num_extensions+1).items
47
+ end
48
+ end
49
+ end
50
+
51
+ it "should perform simple 'prefix'-based inheritance for extensions" do
52
+ RFlow::Configuration.add_available_data_extension('A', A = Module.new)
53
+ RFlow::Configuration.add_available_data_extension('A::B', B = Module.new)
54
+ RFlow::Configuration.add_available_data_extension('A::B::C', C = Module.new)
55
+ RFlow::Configuration.add_available_data_extension('A::B::C::D', D = Module.new)
56
+
57
+ RFlow::Configuration.available_data_extensions['A'].should have(1).item
58
+ RFlow::Configuration.available_data_extensions['A'].should == [A]
59
+
60
+ RFlow::Configuration.available_data_extensions['A::B'].should have(2).item
61
+ RFlow::Configuration.available_data_extensions['A::B'].should == [A, B]
62
+
63
+ RFlow::Configuration.available_data_extensions['A::B::C'].should have(3).item
64
+ RFlow::Configuration.available_data_extensions['A::B::C'].should == [A, B, C]
65
+
66
+ RFlow::Configuration.available_data_extensions['A::B::C::D'].should have(4).item
67
+ RFlow::Configuration.available_data_extensions['A::B::C::D'].should == [A, B, C, D]
68
+
69
+ RFlow::Configuration.available_data_extensions['A::B::C::D::E'].should have(4).item
70
+ RFlow::Configuration.available_data_extensions['A::B::C::D::E'].should == [A, B, C, D]
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/components/raw'
4
+
5
+ describe 'RFlow::Message::Data::Raw Avro Schema' do
6
+ before(:each) do
7
+ @schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
8
+ end
9
+
10
+ it "should load the schema" do
11
+ @schema_string.should_not == nil
12
+ end
13
+
14
+ it "should encode and decode an object" do
15
+ raw = {'raw' => 'rawdata'}
16
+
17
+ expect {encode_avro(@schema_string, raw)}.to_not raise_error
18
+ avro_encoded_raw = encode_avro(@schema_string, raw)
19
+
20
+ expect {decode_avro(@schema_string, avro_encoded_raw)}.to_not raise_error
21
+ decoded_raw = decode_avro(@schema_string, avro_encoded_raw)
22
+
23
+ decoded_raw.should == raw
24
+ end
25
+
26
+ end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'rflow/message'
4
+
5
+ describe RFlow::Message::Data do
6
+ before(:all) do
7
+ @string = 'this is a string to be serialized'
8
+ @invalid_avro_schema_string = 'invalid schema'
9
+ @valid_avro_string_schema_string = '{"type": "string"}'
10
+ @avro_serialized_string = encode_avro(@valid_avro_string_schema_string, @string)
11
+ end
12
+
13
+ context "if created without a schema" do
14
+ it "should throw an exception" do
15
+ expect {RFlow::Message::Data.new()}.to raise_error(ArgumentError)
16
+ end
17
+ end
18
+
19
+ context "if created with an invalid schema for the serialization" do
20
+ it "should throw and exception" do
21
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string)}.to raise_error(ArgumentError)
22
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
23
+ expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
24
+ end
25
+ end
26
+
27
+ context "if created with a valid avro schema and serialization" do
28
+ end
29
+
30
+ context "if created with a valid avro schema" do
31
+ it "should instantiate correctly" do
32
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
33
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
34
+ end
35
+
36
+ context "if created with a non-avro data serialization" do
37
+ it "should throw an exception" do
38
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'unknown')}.to raise_error(ArgumentError)
39
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :unknown)}.to raise_error(ArgumentError)
40
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'xml')}.to raise_error(ArgumentError)
41
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :xml)}.to raise_error(ArgumentError)
42
+ end
43
+ end
44
+
45
+ context "if created with an avro serialization" do
46
+ it "should instantiate correctly" do
47
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
48
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
49
+ end
50
+
51
+ context "if created with a serialized data object" do
52
+ it "should instantiate correctly" do
53
+ expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro', @avro_serialized_string)}.to_not raise_error
54
+ message = RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro', @avro_serialized_string)
55
+ p message
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,182 @@
1
+ require 'spec_helper.rb'
2
+
3
+ require 'digest/md5'
4
+
5
+ require 'rflow/message'
6
+
7
+ describe RFlow::Message do
8
+
9
+ context "if created with an unknown data type" do
10
+ it "should throw an exception" do
11
+ expect {RFlow::Message.new('non_existant_data_type')}.to raise_error(ArgumentError)
12
+ end
13
+ end
14
+
15
+
16
+ context "if created with a known data type" do
17
+ before(:all) do
18
+ @avro_string_schema_string = '{"type": "string"}'
19
+ RFlow::Configuration.add_available_data_type(:string_type, 'avro', @avro_string_schema_string)
20
+ end
21
+
22
+ it "should instantiate correctly" do
23
+ expect {RFlow::Message.new('string_type')}.to_not raise_error
24
+ end
25
+
26
+ context "if created with empty provenance" do
27
+ context "if created with an unknown data serialization" do
28
+ it "should throw an exception" do
29
+ expect {RFlow::Message.new('string_type', [], 'unknown')}.to raise_error(ArgumentError)
30
+ expect {RFlow::Message.new('string_type', [], :unknown)}.to raise_error(ArgumentError)
31
+ end
32
+ end
33
+
34
+ context "if created with a known data serialization" do
35
+ it "should instantiate correctly" do
36
+ expect {RFlow::Message.new('string_type', [], 'avro')}.to_not raise_error
37
+ expect {RFlow::Message.new('string_type', [], 'avro')}.to_not raise_error
38
+ end
39
+
40
+ context "if created with a mismatched schema" do
41
+ end
42
+
43
+ context "if created with a matched schema" do
44
+ end
45
+
46
+
47
+ context "if created with a nil schema" do
48
+ context "if created with a serialized data object" do
49
+ before(:all) do
50
+ @string = 'this is a string to be serialized'
51
+ @avro_serialized_string = encode_avro(@avro_string_schema_string, @string)
52
+ end
53
+
54
+ it "should instantiate correctly" do
55
+ expect {RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)}.to_not raise_error
56
+ message = RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ context "if created with invalid provenance" do
64
+ before(:all) do
65
+ @invalid_processing_event_hash = {'started_at' => 'bad time string'}
66
+ @invalid_provenance = [@invalid_processing_event_hash]
67
+ end
68
+
69
+ it "should throw an exception" do
70
+ expect {RFlow::Message.new('string_type', @invalid_provenance)}.to raise_error(ArgumentError)
71
+ end
72
+ end
73
+
74
+ context "if created with valid provenance" do
75
+ before(:all) do
76
+ @valid_xmlschema_time = '2001-01-01T01:01:01.000001Z'
77
+ @valid_processing_event_hash = {'component_instance_uuid' => 'uuid', 'started_at' => @valid_xmlschema_time}
78
+ @valid_processing_event = RFlow::Message::ProcessingEvent.new('uuid', @valid_xmlschema_time, @valid_xmlschema_time, 'context')
79
+ @valid_provenance = [
80
+ RFlow::Message::ProcessingEvent.new('uuid'),
81
+ @valid_processing_event_hash,
82
+ @valid_processing_event,
83
+ ]
84
+ @valid_provenance_hashes = [
85
+ {"component_instance_uuid"=>"uuid", "started_at"=>nil, "completed_at"=>nil, "context"=>nil},
86
+ {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>nil, "context"=>nil},
87
+ {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>@valid_xmlschema_time, "context"=>"context"},
88
+ ]
89
+ end
90
+
91
+ it "should instantiate correctly" do
92
+ p @valid_provenance
93
+ expect {RFlow::Message.new('string_type', @valid_provenance)}.to_not raise_error
94
+ end
95
+
96
+ it "should correctly set the provenance processing events" do
97
+ message = RFlow::Message.new('string_type', @valid_provenance)
98
+ message.provenance[1].component_instance_uuid.should == 'uuid'
99
+ message.provenance[1].started_at.should == Time.xmlschema(@valid_xmlschema_time)
100
+ message.provenance[1].completed_at.should == nil
101
+ message.provenance[1].context.should == nil
102
+ end
103
+
104
+ it "should to_hash its provenance correctly" do
105
+ message = RFlow::Message.new('string_type', @valid_provenance)
106
+ message.provenance.map(&:to_hash).should == @valid_provenance_hashes
107
+ end
108
+
109
+ end
110
+
111
+ context "if correctly created" do
112
+ it "should serialize and deserialized correctly to/from avro" do
113
+ message = RFlow::Message.new('string_type')
114
+ message.provenance << RFlow::Message::ProcessingEvent.new('UUID')
115
+ message.data.data_object = 'teh awesome'
116
+
117
+ processed_message = RFlow::Message.from_avro(message.to_avro)
118
+ message.data.to_avro.should == processed_message.data.to_avro
119
+ message.data.data_object.should == processed_message.data.data_object
120
+ end
121
+ end
122
+
123
+ context "if data extensions exist" do
124
+ it "should extend the data element with the extension" do
125
+ module ExtensionModule; def ext_method; end; end
126
+
127
+ message = RFlow::Message.new('string_type')
128
+ message.data.methods.should_not include(:ext_method)
129
+
130
+ RFlow::Configuration.add_available_data_extension('string_type', ExtensionModule)
131
+ message = RFlow::Message.new('string_type')
132
+ message.data.methods.should include(:ext_method)
133
+
134
+ end
135
+ end
136
+ end
137
+
138
+ it "should correctly handle large raw types" do
139
+ message = RFlow::Message.new('RFlow::Message::Data::Raw')
140
+ message.data.raw = Array.new(101) { rand(256) }.pack('c*')
141
+
142
+ message_avro = message.to_avro.force_encoding('BINARY')
143
+
144
+ processed_message = RFlow::Message.from_avro(message_avro)
145
+ processed_message_avro = processed_message.to_avro.force_encoding('BINARY')
146
+
147
+ @raw_schema = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
148
+
149
+ encode_avro(@raw_schema, message.data.data_object).should == message.data.to_avro
150
+ decode_avro(@raw_schema, message.data.to_avro).should == message.data.data_object
151
+
152
+ p message.data.raw
153
+ p message_avro
154
+ p message_avro.bytesize
155
+ p processed_message_avro
156
+ p processed_message_avro.bytesize
157
+
158
+ p message_avro.encoding
159
+ p message_avro.valid_encoding?
160
+
161
+ p processed_message_avro.encoding
162
+ p processed_message_avro.valid_encoding?
163
+
164
+ message_data_avro = message.data.to_avro.force_encoding('BINARY')
165
+ processed_message_data_avro = processed_message.data.to_avro.force_encoding('BINARY')
166
+
167
+ p message_data_avro.encoding
168
+ p message_data_avro.valid_encoding?
169
+ p message_data_avro
170
+ p processed_message_data_avro.encoding
171
+ p processed_message_data_avro.valid_encoding?
172
+ p processed_message_data_avro
173
+
174
+ Digest::MD5.hexdigest(message_avro).should == Digest::MD5.hexdigest(processed_message_avro)
175
+
176
+
177
+ message_data_avro.should == processed_message_data_avro
178
+ Digest::MD5.hexdigest(message_data_avro).should == Digest::MD5.hexdigest(processed_message_data_avro)
179
+ Digest::MD5.hexdigest(message.data.raw).should == Digest::MD5.hexdigest(processed_message.data.raw)
180
+ end
181
+
182
+ end