rflow 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
@@ -0,0 +1,18 @@
1
+ require 'uuidtools'
2
+
3
+ class RFlow
4
+ class Configuration
5
+ module UUIDKeyed
6
+ def self.included(base)
7
+ base.class_eval do
8
+ set_primary_key 'uuid'
9
+ before_create :generate_uuid
10
+
11
+ def generate_uuid
12
+ self.uuid = UUIDTools::UUID.random_create.to_s
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,59 @@
1
+ require 'rflow/message'
2
+
3
+ class RFlow
4
+ class Connection
5
+ attr_accessor :instance_uuid, :name, :configuration, :recv_callback
6
+
7
+ # Attribute that holds the
8
+ attr_accessor :recv_callback
9
+
10
+ def initialize(connection_instance_uuid, connection_name=nil, connection_configuration={})
11
+ @instance_uuid = connection_instance_uuid
12
+ @name = connection_name
13
+ @configuration = connection_configuration
14
+ end
15
+
16
+
17
+ # Subclass and implement to be able to handle future 'recv'
18
+ # methods. Will only be called in the context of a running
19
+ # EventMachine reactor
20
+ def connect_input!
21
+ raise NotImplementedError, "Raw connections do not support connect_input. Please subclass and define a connect_output method."
22
+ end
23
+
24
+
25
+ # Subclass and implement to be able to handle future 'send'
26
+ # methods. Will only be called in the context of a running
27
+ # EventMachine reactor
28
+ def connect_output!
29
+ raise NotImplementedError, "Raw connections do not support connect_output. Please subclass and define a connect_output method."
30
+ end
31
+
32
+
33
+ # Subclass and implement to handle outgoing messages. The message
34
+ # will be a RFlow::Message object and the subclasses are expected
35
+ # to marshal it up into something that will be unmarshalled on the
36
+ # other side
37
+ def send_message(message)
38
+ raise NotImplementedError, "Raw connections do not support send_message. Please subclass and define a send_message method."
39
+ end
40
+
41
+ # Parent component will set this attribute if it expects to
42
+ # recieve messages. Connection subclass should call it
43
+ # (recv_callback.call(message)) when it gets a new message, which
44
+ # will be transmitted back to the parent component's
45
+ # process_message method. Sublcass is responsible for
46
+ # deserializing whatever was on the wire into a RFlow::Message object
47
+ def recv_callback
48
+ @recv_callback ||= Proc.new {|message|}
49
+ end
50
+
51
+ end # class Connection
52
+
53
+ class Disconnection < Connection
54
+ def send_message(message)
55
+ RFlow.logger.debug "Attempting to send without a connection, doing nothing"
56
+ end
57
+ end
58
+
59
+ end # class RFlow
@@ -0,0 +1,2 @@
1
+ # Load the core connection types
2
+ require 'rflow/connections/zmq_connection'
@@ -0,0 +1,101 @@
1
+ #require 'ffi'
2
+ #require 'ffi-rzmq'
3
+ require 'em-zeromq-mri'
4
+
5
+ require 'rflow/connection'
6
+ require 'rflow/message'
7
+
8
+ class RFlow
9
+ module Connections
10
+ class ZMQConnection < RFlow::Connection
11
+
12
+ class << self
13
+ attr_accessor :zmq_context
14
+
15
+ def create_zmq_context
16
+ RFlow.logger.debug "Creating a new ZeroMQ context"
17
+ unless EM.reactor_running?
18
+ raise RuntimeError, "EventMachine reactor is not running when attempting to create a ZeroMQ context"
19
+ end
20
+ EM::ZeroMQ::Context.new(1)
21
+ end
22
+
23
+ # Returns the current ZeroMQ context object or creates it if
24
+ # it does not exist. Assumes that we are within a running
25
+ # EventMachine reactor
26
+ def zmq_context
27
+ @zmq_context ||= create_zmq_context
28
+ end
29
+ end
30
+
31
+ attr_accessor :socket
32
+
33
+ REQUIRED_OPTION_SUFFIXES = ['_socket_type', '_address', '_responsibility']
34
+
35
+ def self.configuration_errors(configuration)
36
+ # TODO: Normalize/validate configuration
37
+ missing_config_elements = []
38
+
39
+ ['input', 'output'].each do |direction_prefix|
40
+ REQUIRED_OPTION_SUFFIXES.each do |option_suffix|
41
+ config_element = "#{direction_prefix}#{option_suffix}"
42
+ unless configuration.include? config_element
43
+ missing_config_elements << config_element
44
+ end
45
+ end
46
+ end
47
+
48
+ missing_config_elements
49
+ end
50
+
51
+
52
+ def initialize(connection_instance_uuid, connection_name, connection_configuration)
53
+ configuration_errors = self.class.configuration_errors(connection_configuration)
54
+ unless configuration_errors.empty?
55
+ raise ArgumentError, "#{self.class.to_s}: configuration missing elements: #{configuration_errors.join ', '}"
56
+ end
57
+
58
+ super
59
+ end
60
+
61
+
62
+ def connect_input!
63
+ RFlow.logger.debug "Connecting input #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /input/}}"
64
+ self.socket = self.class.zmq_context.send(configuration['input_responsibility'],
65
+ ZMQ.const_get(configuration['input_socket_type'].to_sym),
66
+ configuration['input_address'],
67
+ self)
68
+ end
69
+
70
+
71
+ def connect_output!
72
+ RFlow.logger.debug "Connecting output #{instance_uuid} with #{configuration.find_all {|k, v| k.to_s =~ /output/}}"
73
+ self.socket = self.class.zmq_context.send(configuration['output_responsibility'].to_s,
74
+ ZMQ.const_get(configuration['output_socket_type'].to_sym),
75
+ configuration['output_address'].to_s,
76
+ self)
77
+ end
78
+
79
+
80
+ def on_readable(socket, message_parts)
81
+ message = RFlow::Message.from_avro(message_parts.last.copy_out_string)
82
+ RFlow.logger.debug "#{name}: Received message of type '#{message_parts.first.copy_out_string}'"
83
+ recv_callback.call(message)
84
+ end
85
+
86
+ # TODO: fix this tight loop of retries
87
+ def send_message(message)
88
+ RFlow.logger.debug "#{name}: Sending message of type '#{message.data_type_name.to_s}'"
89
+
90
+ begin
91
+ socket.send_msg(message.data_type_name.to_s, message.to_avro)
92
+ RFlow.logger.debug "#{name}: Successfully sent message of type '#{message.data_type_name.to_s}'"
93
+ rescue Exception => e
94
+ RFlow.logger.debug "Exception #{e.class}: #{e.message}, retrying send"
95
+ retry
96
+ end
97
+ end
98
+
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,191 @@
1
+ require 'stringio'
2
+ require 'time'
3
+
4
+ require 'avro'
5
+
6
+ require 'rflow/configuration'
7
+
8
+ class RFlow
9
+
10
+ # TODO: reduce reliance/expectation on avro serialization in method
11
+ # names and such.
12
+ class Message
13
+
14
+ class << self
15
+ def avro_message_schema; @avro_message_schema ||= Avro::Schema.parse(File.read(File.join(File.dirname(__FILE__), '..', '..', 'schema', 'message.avsc'))); end
16
+
17
+ def avro_reader; @avro_reader ||= Avro::IO::DatumReader.new(avro_message_schema, avro_message_schema); end
18
+ def avro_writer; @avro_writer ||= Avro::IO::DatumWriter.new(avro_message_schema); end
19
+ def avro_decoder(io_object); Avro::IO::BinaryDecoder.new(io_object); end
20
+ def avro_encoder(io_object); Avro::IO::BinaryEncoder.new(io_object); end
21
+
22
+ # Take in an Avro serialization of a message and return a new
23
+ # Message object. Assumes the org.rflow.Message Avro schema.
24
+ def from_avro(avro_serialized_message_byte_string)
25
+ avro_serialized_message_byte_stringio = StringIO.new(avro_serialized_message_byte_string)
26
+ message_hash = avro_reader.read avro_decoder(avro_serialized_message_byte_stringio)
27
+ Message.new(message_hash['data_type_name'], message_hash['provenance'],
28
+ message_hash['data_serialization_type'], message_hash['data_schema'],
29
+ message_hash['data'])
30
+ end
31
+ end
32
+
33
+
34
+ # Serialize the current message object to Avro using the
35
+ # org.rflow.Message Avro schema. Note that we have to manually
36
+ # set the encoding for Ruby 1.9, otherwise the stringio would use
37
+ # UTF-8 by default, which would not work correctly, as a serialize
38
+ # avro string is BINARY, not UTF-8
39
+ def to_avro
40
+ avro_serialized_message_bytes = ''
41
+ avro_serialized_message_bytes.force_encoding 'BINARY'
42
+ avro_serialized_message_bytes_stringio = StringIO.new(avro_serialized_message_bytes, 'w')
43
+
44
+ deserialized_avro_object = {
45
+ 'data_type_name' => self.data_type_name.to_s,
46
+ 'provenance' => self.provenance.map(&:to_hash),
47
+ 'data_serialization_type' => self.data.serialization_type.to_s,
48
+ 'data_schema' => self.data.schema_string,
49
+ 'data' => self.data.to_avro
50
+ }
51
+
52
+ self.class.avro_writer.write deserialized_avro_object, self.class.avro_encoder(avro_serialized_message_bytes_stringio)
53
+ avro_serialized_message_bytes
54
+ end
55
+
56
+
57
+ attr_reader :data_type_name
58
+ attr_accessor :processing_event
59
+ attr_accessor :provenance
60
+ attr_reader :data, :data_extensions
61
+
62
+
63
+ def initialize(data_type_name, provenance=[], data_serialization_type='avro', data_schema_string=nil, serialized_data_object=nil)
64
+ # Default the values, in case someone puts in a nil instead of
65
+ # the default
66
+ @data_type_name = data_type_name.to_s
67
+
68
+ # Turn the provenance array of Hashes into an array of
69
+ # ProcessingEvents for easier access and time validation. TODO:
70
+ # do this lazily so as not to create/destroy objects that are
71
+ # never used
72
+ @provenance = (provenance || []).map do |processing_event_hash_or_object|
73
+ if processing_event_hash_or_object.is_a? ProcessingEvent
74
+ processing_event_hash_or_object
75
+ else
76
+ ProcessingEvent.new(processing_event_hash_or_object['component_instance_uuid'],
77
+ processing_event_hash_or_object['started_at'],
78
+ processing_event_hash_or_object['completed_at'],
79
+ processing_event_hash_or_object['context'])
80
+ end
81
+ end
82
+
83
+ # TODO: Make this better. This check is technically
84
+ # unnecessary, as we are able to completely desrialize the
85
+ # message without needing to resort to the registered schema.
86
+ registered_data_schema_string = RFlow::Configuration.available_data_types[@data_type_name][data_serialization_type.to_s]
87
+ unless registered_data_schema_string
88
+ error_message = "Data type '#{@data_type_name}' with serialization_type '#{data_serialization_type}' not found"
89
+ RFlow.logger.error error_message
90
+ raise ArgumentError, error_message
91
+ end
92
+
93
+ # TODO: think about registering the schemas automatically if not
94
+ # found in Configuration
95
+ if data_schema_string && (registered_data_schema_string != data_schema_string)
96
+ error_message = "Passed schema ('#{data_schema_string}') does not equal registered schema ('#{registered_data_schema_string}') for data type '#{@data_type_name}' with serialization_type '#{data_serialization_type}'"
97
+ RFlow.logger.error error_message
98
+ raise ArgumentError, error_message
99
+ end
100
+
101
+ @data = Data.new(registered_data_schema_string, data_serialization_type.to_s, serialized_data_object)
102
+
103
+ # Get the extensions and apply them to the data object to add capability
104
+ @data_extensions = RFlow::Configuration.available_data_extensions[@data_type_name]
105
+ @data_extensions.each do |data_extension|
106
+ RFlow.logger.debug "Extending '#{data_type_name}' with extension '#{data_extension}'"
107
+ @data.extend data_extension
108
+ end
109
+ end
110
+
111
+
112
+ class ProcessingEvent
113
+ attr_accessor :component_instance_uuid, :started_at, :completed_at, :context
114
+
115
+ def initialize(component_instance_uuid, started_at=nil, completed_at=nil, context=nil)
116
+ @component_instance_uuid = component_instance_uuid
117
+ @started_at = case started_at
118
+ when String then Time.xmlschema(started_at)
119
+ when Time then started_at
120
+ else; nil; end
121
+ @completed_at = case completed_at
122
+ when String then Time.xmlschema(completed_at)
123
+ when Time then completed_at
124
+ else; nil; end
125
+ @context = context
126
+ end
127
+
128
+ def to_hash
129
+ {
130
+ 'component_instance_uuid' => component_instance_uuid.to_s,
131
+ 'started_at' => started_at ? started_at.xmlschema(6) : nil,
132
+ 'completed_at' => completed_at ? completed_at.xmlschema(6) : nil,
133
+ 'context' => context ? context.to_s : nil,
134
+ }
135
+ end
136
+ end
137
+
138
+ # Should proxy most methods to data_object that we can serialize
139
+ # to avro using the schema. Extensions should use 'extended' hook
140
+ # to apply immediate changes.
141
+ class Data
142
+ attr_reader :schema_string, :schema, :serialization_type
143
+ attr_accessor :data_object
144
+
145
+ def initialize(schema_string, serialization_type='avro', serialized_data_object=nil)
146
+ unless serialization_type == 'avro'
147
+ error_message = "Only Avro serialization_type supported at the moment"
148
+ RFlow.logger.error error_message
149
+ raise ArgumentError, error_message
150
+ end
151
+
152
+ @schema_string = schema_string
153
+ @serialization_type = serialization_type
154
+
155
+ begin
156
+ @schema = Avro::Schema.parse(schema_string)
157
+ rescue Exception => e
158
+ error_message = "Invalid schema '#{@schema_string}': #{e}: #{e.message}"
159
+ RFlow.logger.error error_message
160
+ raise ArgumentError, error_message
161
+ end
162
+
163
+ if serialized_data_object
164
+ serialized_data_object.force_encoding 'BINARY'
165
+ avro_decoder = Avro::IO::BinaryDecoder.new StringIO.new(serialized_data_object)
166
+ @data_object = Avro::IO::DatumReader.new(schema, schema).read avro_decoder
167
+ end
168
+ end
169
+
170
+ def valid?
171
+ Avro::Schema.validate @schema, @data_object
172
+ end
173
+
174
+ def to_avro
175
+ serialized_data_object_bytes = ''
176
+ serialized_data_object_bytes.force_encoding 'BINARY'
177
+ serialized_data_object_bytes_stringio = StringIO.new(serialized_data_object_bytes)
178
+ Avro::IO::DatumWriter.new(@schema).write @data_object, Avro::IO::BinaryEncoder.new(serialized_data_object_bytes_stringio)
179
+ serialized_data_object_bytes
180
+ end
181
+
182
+ # Proxy methods down to the underlying data_object, probably a
183
+ # Hash. Hopefully an extension will provide any additional
184
+ # functionality so this won't be called unless needed
185
+ def method_missing(method_sym, *args, &block)
186
+ @data_object.send(method_sym, *args, &block)
187
+ end
188
+ end
189
+
190
+ end
191
+ end
data/lib/rflow/port.rb ADDED
@@ -0,0 +1,4 @@
1
+ class RFlow
2
+ class Port
3
+ end # class Port
4
+ end # class RFlow
data/lib/rflow/util.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'uuidtools'
2
+
3
+ class RFlow
4
+ module Util
5
+ # Generate a UUID based on either the SHA1 of a seed string (v5) with a
6
+ # 'zero' UUID namespace, or using a purely random generation
7
+ # (v4) if no seed string is present
8
+ def generate_uuid_string(seed=nil)
9
+ uuid = if seed
10
+ UUIDTools::UUID.sha1_create(UUIDTools::UUID.parse_int(0), seed)
11
+ else
12
+ UUIDTools::UUID.random_create
13
+ end
14
+ uuid.to_s
15
+ end
16
+
17
+
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ class RFlow
2
+ VERSION = "0.0.5"
3
+ end # class RFlow
data/rflow.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "rflow/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "rflow"
7
+ s.version = RFlow::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.required_ruby_version = '~> 1.9'
10
+ s.authors = ["Michael L. Artz"]
11
+ s.email = ["michael.artz@redjack.com"]
12
+ s.homepage = ""
13
+ s.summary = %q{A Ruby-based workflow framework}
14
+ s.description = %q{A Ruby-based workflow framework that utilizes ZeroMQ for component connections and Avro for serialization}
15
+
16
+ s.rubyforge_project = "rflow"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency 'uuidtools', '~> 2.1'
24
+ s.add_dependency 'log4r', '~> 1.1'
25
+
26
+ s.add_dependency 'sqlite3', '~> 1.3'
27
+ s.add_dependency 'activerecord', '~> 3.0'
28
+
29
+ s.add_dependency 'avro', '>= 1.5.1'
30
+ s.add_dependency 'ffi', '~> 1.0'
31
+ s.add_dependency 'ffi-rzmq' , '~> 0.8'
32
+
33
+ s.add_dependency 'eventmachine', '>= 1.0.0.beta3'
34
+ # MRI-only because of the FFI memory leak. TODO: remove when ffi fixed
35
+ s.add_dependency 'em-zeromq-mri', '~> 0.2'
36
+ # Remove this when we break it out into its own gem
37
+ s.add_dependency 'eventmachine_httpserver', '~> 0.2'
38
+
39
+ s.add_development_dependency 'rspec', '~> 2.6'
40
+ s.add_development_dependency 'rake', '>= 0.8.7'
41
+ #s.add_development_dependency 'rcov', '= 0.9.9' # Not 1.9.2 compatible
42
+ end