rflow 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,18 @@
1
+ require 'uuidtools'
2
+
3
+ class RFlow
4
+ class Configuration
5
+ module UUIDKeyed
6
+ def self.included(base)
7
+ base.class_eval do
8
+ set_primary_key 'uuid'
9
+ before_create :generate_uuid
10
+
11
+ def generate_uuid
12
+ self.uuid = UUIDTools::UUID.random_create.to_s
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,59 @@
1
+ require 'rflow/message'
2
+
3
+ class RFlow
4
+ class Connection
5
+ attr_accessor :instance_uuid, :name, :configuration, :recv_callback
6
+
7
+ # Attribute that holds the
8
+ attr_accessor :recv_callback
9
+
10
+ def initialize(connection_instance_uuid, connection_name=nil, connection_configuration={})
11
+ @instance_uuid = connection_instance_uuid
12
+ @name = connection_name
13
+ @configuration = connection_configuration
14
+ end
15
+
16
+
17
+ # Subclass and implement to be able to handle future 'recv'
18
+ # methods. Will only be called in the context of a running
19
+ # EventMachine reactor
20
+ def connect_input!
21
+ raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_output method."
22
+ end
23
+
24
+
25
+ # Subclass and implement to be able to handle future 'send'
26
+ # methods. Will only be called in the context of a running
27
+ # EventMachine reactor
28
+ def connect_output!
29
+ raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
30
+ end
31
+
32
+
33
+ # Subclass and implement to handle outgoing messages. The message
34
+ # will be a RFlow::Message object and the subclasses are expected
35
+ # to marshal it up into something that will be unmarshalled on the
36
+ # other side
37
+ def send_message(message)
38
+ raise NotImplementedError, "Raw connections do not support send_message. Please subclass and define a send_message method."
39
+ end
40
+
41
+ # Parent component will set this attribute if it expects to
42
+ # recieve messages. Connection subclass should call it
43
+ # (recv_callback.call(message)) when it gets a new message, which
44
+ # will be transmitted back to the parent component's
45
+ # process_message method. Sublcass is responsible for
46
+ # deserializing whatever was on the wire into a RFlow::Message object
47
+ def recv_callback
48
+ @recv_callback ||= Proc.new {|message|}
49
+ end
50
+
51
+ end # class Connection
52
+
53
+ class Disconnection < Connection
54
+ def send_message(message)
55
+ RFlow.logger.debug "Attempting to send without a connection, doing nothing"
56
+ end
57
+ end
58
+
59
+ end # class RFlow
@@ -0,0 +1,2 @@
1
+ # Load the core connection types
2
+ require 'rflow/connections/zmq_connection'
@@ -0,0 +1,101 @@
1
+ #require 'ffi'
2
+ #require 'ffi-rzmq'
3
+ require 'em-zeromq-mri'
4
+
5
+ require 'rflow/connection'
6
+ require 'rflow/message'
7
+
8
+ class RFlow
9
+ module Connections
10
+ class ZMQConnection < RFlow::Connection
11
+
12
+ class << self
13
+ attr_accessor :zmq_context
14
+
15
+ def create_zmq_context
16
+ RFlow.logger.debug "Creating a new ZeroMQ context"
17
+ unless EM.reactor_running?
18
+ raise RuntimeError, "EventMachine reactor is not running when attempting to create a ZeroMQ context"
19
+ end
20
+ EM::ZeroMQ::Context.new(1)
21
+ end
22
+
23
+ # Returns the current ZeroMQ context object or creates it if
24
+ # it does not exist. Assumes that we are within a running
25
+ # EventMachine reactor
26
+ def zmq_context
27
+ @zmq_context ||= create_zmq_context
28
+ end
29
+ end
30
+
31
+ attr_accessor :socket
32
+
33
+ REQUIRED_OPTION_SUFFIXES = ['_socket_type', '_address', '_responsibility']
34
+
35
+ def self.configuration_errors(configuration)
36
+ # TODO: Normalize/validate configuration
37
+ missing_config_elements = []
38
+
39
+ ['input', 'output'].each do |direction_prefix|
40
+ REQUIRED_OPTION_SUFFIXES.each do |option_suffix|
41
+ config_element = "#{direction_prefix}#{option_suffix}"
42
+ unless configuration.include? config_element
43
+ missing_config_elements << config_element
44
+ end
45
+ end
46
+ end
47
+
48
+ missing_config_elements
49
+ end
50
+
51
+
52
+ def initialize(connection_instance_uuid, connection_name, connection_configuration)
53
+ configuration_errors = self.class.configuration_errors(connection_configuration)
54
+ unless configuration_errors.empty?
55
+ raise ArgumentError, "#{self.class.to_s}: configuration missing elements: #{configuration_errors.join ', '}"
56
+ end
57
+
58
+ super
59
+ end
60
+
61
+
62
+ def connect_input!
63
+ RFlow.logger.debug "Connecting input #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /input/}}"
64
+ self.socket = self.class.zmq_context.send(configuration['input_responsibility'],
65
+ ZMQ.const_get(configuration['input_socket_type'].to_sym),
66
+ configuration['input_address'],
67
+ self)
68
+ end
69
+
70
+
71
+ def connect_output!
72
+ RFlow.logger.debug "Connecting output #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /output/}}"
73
+ self.socket = self.class.zmq_context.send(configuration['output_responsibility'].to_s,
74
+ ZMQ.const_get(configuration['output_socket_type'].to_sym),
75
+ configuration['output_address'].to_s,
76
+ self)
77
+ end
78
+
79
+
80
+ def on_readable(socket, message_parts)
81
+ message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
82
+ RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
83
+ recv_callback.call(message)
84
+ end
85
+
86
+ # TODO: fix this tight loop of retries
87
+ def send_message(message)
88
+ RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
89
+
90
+ begin
91
+ socket.send_msg(message.data_type_name.to_s, message.to_avro)
92
+ RFlow.logger.debug "#{name}: Successfully sent message of type '#{message.data_type_name.to_s}'"
93
+ rescue Exception => e
94
+ RFlow.logger.debug "Exception #{e.class}: #{e.message}, retrying send"
95
+ retry
96
+ end
97
+ end
98
+
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,191 @@
1
+ require 'stringio'
2
+ require 'time'
3
+
4
+ require 'avro'
5
+
6
+ require 'rflow/configuration'
7
+
8
+ class RFlow
9
+
10
+ # TODO: reduce reliance/expectation on avro serialization in method
11
+ # names and such.
12
+ class Message
13
+
14
+ class << self
15
+ def avro_message_schema; @avro_message_schema ||= Avro::Schema.parse(File.read(File.join(File.dirname(__FILE__), '..', '..', 'schema', 'message.avsc'))); end
16
+
17
+ def avro_reader; @avro_reader ||= Avro::IO::DatumReader.new(avro_message_schema, avro_message_schema); end
18
+ def avro_writer; @avro_writer ||= Avro::IO::DatumWriter.new(avro_message_schema); end
19
+ def avro_decoder(io_object); Avro::IO::BinaryDecoder.new(io_object); end
20
+ def avro_encoder(io_object); Avro::IO::BinaryEncoder.new(io_object); end
21
+
22
+ # Take in an Avro serialization of a message and return a new
23
+ # Message object. Assumes the org.rflow.Message Avro schema.
24
+ def from_avro(avro_serialized_message_byte_string)
25
+ avro_serialized_message_byte_stringio = StringIO.new(avro_serialized_message_byte_string)
26
+ message_hash = avro_reader.read avro_decoder(avro_serialized_message_byte_stringio)
27
+ Message.new(message_hash['data_type_name'], message_hash['provenance'],
28
+ message_hash['data_serialization_type'], message_hash['data_schema'],
29
+ message_hash['data'])
30
+ end
31
+ end
32
+
33
+
34
+ # Serialize the current message object to Avro using the
35
+ # org.rflow.Message Avro schema. Note that we have to manually
36
+ # set the encoding for Ruby 1.9, otherwise the stringio would use
37
+ # UTF-8 by default, which would not work correctly, as a serialize
38
+ # avro string is BINARY, not UTF-8
39
+ def to_avro
40
+ avro_serialized_message_bytes = ''
41
+ avro_serialized_message_bytes.force_encoding 'BINARY'
42
+ avro_serialized_message_bytes_stringio = StringIO.new(avro_serialized_message_bytes, 'w')
43
+
44
+ deserialized_avro_object = {
45
+ 'data_type_name' => self.data_type_name.to_s,
46
+ 'provenance' => self.provenance.map(&:to_hash),
47
+ 'data_serialization_type' => self.data.serialization_type.to_s,
48
+ 'data_schema' => self.data.schema_string,
49
+ 'data' => self.data.to_avro
50
+ }
51
+
52
+ self.class.avro_writer.write deserialized_avro_object, self.class.avro_encoder(avro_serialized_message_bytes_stringio)
53
+ avro_serialized_message_bytes
54
+ end
55
+
56
+
57
+ attr_reader :data_type_name
58
+ attr_accessor :processing_event
59
+ attr_accessor :provenance
60
+ attr_reader :data, :data_extensions
61
+
62
+
63
+ def initialize(data_type_name, provenance=[], data_serialization_type='avro', data_schema_string=nil, serialized_data_object=nil)
64
+ # Default the values, in case someone puts in a nil instead of
65
+ # the default
66
+ @data_type_name = data_type_name.to_s
67
+
68
+ # Turn the provenance array of Hashes into an array of
69
+ # ProcessingEvents for easier access and time validation. TODO:
70
+ # do this lazily so as not to create/destroy objects that are
71
+ # never used
72
+ @provenance = (provenance || []).map do |processing_event_hash_or_object|
73
+ if processing_event_hash_or_object.is_a? ProcessingEvent
74
+ processing_event_hash_or_object
75
+ else
76
+ ProcessingEvent.new(processing_event_hash_or_object['component_instance_uuid'],
77
+ processing_event_hash_or_object['started_at'],
78
+ processing_event_hash_or_object['completed_at'],
79
+ processing_event_hash_or_object['context'])
80
+ end
81
+ end
82
+
83
+ # TODO: Make this better. This check is technically
84
+ # unnecessary, as we are able to completely desrialize the
85
+ # message without needing to resort to the registered schema.
86
+ registered_data_schema_string = RFlow::Configuration.available_data_types[@data_type_name][data_serialization_type.to_s]
87
+ unless registered_data_schema_string
88
+ error_message = "Data type '#{@data_type_name}' with serialization_type '#{data_serialization_type}' not found"
89
+ RFlow.logger.error error_message
90
+ raise ArgumentError, error_message
91
+ end
92
+
93
+ # TODO: think about registering the schemas automatically if not
94
+ # found in Configuration
95
+ if data_schema_string && (registered_data_schema_string != data_schema_string)
96
+ error_message = "Passed schema ('#{data_schema_string}') does not equal registered schema ('#{registered_data_schema_string}') for data type '#{@data_type_name}' with serialization_type '#{data_serialization_type}'"
97
+ RFlow.logger.error error_message
98
+ raise ArgumentError, error_message
99
+ end
100
+
101
+ @data = Data.new(registered_data_schema_string, data_serialization_type.to_s, serialized_data_object)
102
+
103
+ # Get the extensions and apply them to the data object to add capability
104
+ @data_extensions = RFlow::Configuration.available_data_extensions[@data_type_name]
105
+ @data_extensions.each do |data_extension|
106
+ RFlow.logger.debug "Extending '#{data_type_name}' with extension '#{data_extension}'"
107
+ @data.extend data_extension
108
+ end
109
+ end
110
+
111
+
112
+ class ProcessingEvent
113
+ attr_accessor :component_instance_uuid, :started_at, :completed_at, :context
114
+
115
+ def initialize(component_instance_uuid, started_at=nil, completed_at=nil, context=nil)
116
+ @component_instance_uuid = component_instance_uuid
117
+ @started_at = case started_at
118
+ when String then Time.xmlschema(started_at)
119
+ when Time then started_at
120
+ else; nil; end
121
+ @completed_at = case completed_at
122
+ when String then Time.xmlschema(completed_at)
123
+ when Time then completed_at
124
+ else; nil; end
125
+ @context = context
126
+ end
127
+
128
+ def to_hash
129
+ {
130
+ 'component_instance_uuid' => component_instance_uuid.to_s,
131
+ 'started_at' => started_at ? started_at.xmlschema(6) : nil,
132
+ 'completed_at' => completed_at ? completed_at.xmlschema(6) : nil,
133
+ 'context' => context ? context.to_s : nil,
134
+ }
135
+ end
136
+ end
137
+
138
+ # Should proxy most methods to data_object that we can serialize
139
+ # to avro using the schema. Extensions should use 'extended' hook
140
+ # to apply immediate changes.
141
+ class Data
142
+ attr_reader :schema_string, :schema, :serialization_type
143
+ attr_accessor :data_object
144
+
145
+ def initialize(schema_string, serialization_type='avro', serialized_data_object=nil)
146
+ unless serialization_type == 'avro'
147
+ error_message = "Only Avro serialization_type supported at the moment"
148
+ RFlow.logger.error error_message
149
+ raise ArgumentError, error_message
150
+ end
151
+
152
+ @schema_string = schema_string
153
+ @serialization_type = serialization_type
154
+
155
+ begin
156
+ @schema = Avro::Schema.parse(schema_string)
157
+ rescue Exception => e
158
+ error_message = "Invalid schema '#{@schema_string}': #{e}: #{e.message}"
159
+ RFlow.logger.error error_message
160
+ raise ArgumentError, error_message
161
+ end
162
+
163
+ if serialized_data_object
164
+ serialized_data_object.force_encoding 'BINARY'
165
+ avro_decoder = Avro::IO::BinaryDecoder.new StringIO.new(serialized_data_object)
166
+ @data_object = Avro::IO::DatumReader.new(schema, schema).read avro_decoder
167
+ end
168
+ end
169
+
170
+ def valid?
171
+ Avro::Schema.validate @schema, @data_object
172
+ end
173
+
174
+ def to_avro
175
+ serialized_data_object_bytes = ''
176
+ serialized_data_object_bytes.force_encoding 'BINARY'
177
+ serialized_data_object_bytes_stringio = StringIO.new(serialized_data_object_bytes)
178
+ Avro::IO::DatumWriter.new(@schema).write @data_object, Avro::IO::BinaryEncoder.new(serialized_data_object_bytes_stringio)
179
+ serialized_data_object_bytes
180
+ end
181
+
182
+ # Proxy methods down to the underlying data_object, probably a
183
+ # Hash. Hopefully an extension will provide any additional
184
+ # functionality so this won't be called unless needed
185
+ def method_missing(method_sym, *args, &block)
186
+ @data_object.send(method_sym, *args, &block)
187
+ end
188
+ end
189
+
190
+ end
191
+ end
data/lib/rflow/port.rb ADDED
@@ -0,0 +1,4 @@
1
+ class RFlow
2
+ class Port
3
+ end # class Port
4
+ end # class RFlow
data/lib/rflow/util.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'uuidtools'
2
+
3
+ class RFlow
4
+ module Util
5
+ # Generate a UUID based on either the SHA1 of a seed string (v5) with a
6
+ # 'zero' UUID namespace, or using a purely random generation
7
+ # (v4) if no seed string is present
8
+ def generate_uuid_string(seed=nil)
9
+ uuid = if seed
10
+ UUIDTools::UUID.sha1_create(UUIDTools::UUID.parse_int(0), seed)
11
+ else
12
+ UUIDTools::UUID.random_create
13
+ end
14
+ uuid.to_s
15
+ end
16
+
17
+
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ class RFlow
2
+ VERSION = "0.0.5"
3
+ end # class RFlow
data/rflow.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "rflow/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "rflow"
7
+ s.version = RFlow::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.required_ruby_version = '~> 1.9'
10
+ s.authors = ["Michael L. Artz"]
11
+ s.email = ["michael.artz@redjack.com"]
12
+ s.homepage = ""
13
+ s.summary = %q{A Ruby-based workflow framework}
14
+ s.description = %q{A Ruby-based workflow framework that utilizes ZeroMQ for component connections and Avro for serialization}
15
+
16
+ s.rubyforge_project = "rflow"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency 'uuidtools', '~> 2.1'
24
+ s.add_dependency 'log4r', '~> 1.1'
25
+
26
+ s.add_dependency 'sqlite3', '~> 1.3'
27
+ s.add_dependency 'activerecord', '~> 3.0'
28
+
29
+ s.add_dependency 'avro', '>= 1.5.1'
30
+ s.add_dependency 'ffi', '~> 1.0'
31
+ s.add_dependency 'ffi-rzmq' , '~> 0.8'
32
+
33
+ s.add_dependency 'eventmachine', '>= 1.0.0.beta3'
34
+ # MRI-only because of the FFI memory leak. TODO: remove when ffi fixed
35
+ s.add_dependency 'em-zeromq-mri', '~> 0.2'
36
+ # Remove this when we break it out into its own gem
37
+ s.add_dependency 'eventmachine_httpserver', '~> 0.2'
38
+
39
+ s.add_development_dependency 'rspec', '~> 2.6'
40
+ s.add_development_dependency 'rake', '>= 0.8.7'
41
+ #s.add_development_dependency 'rcov', '= 0.9.9' # Not 1.9.2 compatible
42
+ end