rflow 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +5 -0
- data/NOTES +187 -0
- data/README +0 -0
- data/Rakefile +16 -0
- data/bin/rflow +215 -0
- data/example/basic_config.rb +49 -0
- data/example/basic_extensions.rb +142 -0
- data/example/http_config.rb +21 -0
- data/example/http_extensions.rb +262 -0
- data/lib/rflow.rb +440 -0
- data/lib/rflow/component.rb +192 -0
- data/lib/rflow/component/port.rb +150 -0
- data/lib/rflow/components.rb +10 -0
- data/lib/rflow/components/raw.rb +26 -0
- data/lib/rflow/components/raw/extensions.rb +18 -0
- data/lib/rflow/configuration.rb +290 -0
- data/lib/rflow/configuration/component.rb +27 -0
- data/lib/rflow/configuration/connection.rb +98 -0
- data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
- data/lib/rflow/configuration/port.rb +30 -0
- data/lib/rflow/configuration/ruby_dsl.rb +183 -0
- data/lib/rflow/configuration/setting.rb +67 -0
- data/lib/rflow/configuration/uuid_keyed.rb +18 -0
- data/lib/rflow/connection.rb +59 -0
- data/lib/rflow/connections.rb +2 -0
- data/lib/rflow/connections/zmq_connection.rb +101 -0
- data/lib/rflow/message.rb +191 -0
- data/lib/rflow/port.rb +4 -0
- data/lib/rflow/util.rb +19 -0
- data/lib/rflow/version.rb +3 -0
- data/rflow.gemspec +42 -0
- data/schema/message.avsc +36 -0
- data/schema/raw.avsc +9 -0
- data/spec/fixtures/config_ints.rb +61 -0
- data/spec/fixtures/extensions_ints.rb +141 -0
- data/spec/rflow_configuration_spec.rb +73 -0
- data/spec/rflow_message_data_raw.rb +26 -0
- data/spec/rflow_message_data_spec.rb +60 -0
- data/spec/rflow_message_spec.rb +182 -0
- data/spec/rflow_spec.rb +100 -0
- data/spec/schema_spec.rb +28 -0
- data/spec/spec_helper.rb +37 -0
- data/temp.rb +295 -0
- metadata +270 -0
@@ -0,0 +1,150 @@
|
|
1
|
+
class RFlow
|
2
|
+
class Component
|
3
|
+
|
4
|
+
# TODO: make this into a class to limit the amount of extensions
|
5
|
+
# that we have to do when operating on these "Arrays", i.e. when
|
6
|
+
# adding two together
|
7
|
+
module ConnectionCollection
|
8
|
+
def send_message(message)
|
9
|
+
each do |connection|
|
10
|
+
connection.send_message(message)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Collection class to make it easier to index by both names,
|
16
|
+
# UUIDs, and types.
|
17
|
+
class PortCollection
|
18
|
+
attr_reader :ports, :by_uuid, :by_name, :by_type
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@ports = Array.new
|
22
|
+
@by_uuid = Hash.new
|
23
|
+
@by_name = Hash.new
|
24
|
+
@by_type = Hash.new {|hash, key| hash[key.to_s] = []}
|
25
|
+
end
|
26
|
+
|
27
|
+
def <<(port)
|
28
|
+
by_uuid[port.instance_uuid.to_s] = port
|
29
|
+
by_name[port.name.to_s] = port
|
30
|
+
by_type[port.class.to_s] << port
|
31
|
+
ports << port
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Enumerate through each connected (or disconnected but
|
37
|
+
# referenced) port
|
38
|
+
# TODO: simplify with enumerators and procs
|
39
|
+
def each
|
40
|
+
ports.each do |port|
|
41
|
+
yield port
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
# Bare superclass for (potential) later methods. Currently empty
|
48
|
+
class Port; end
|
49
|
+
|
50
|
+
|
51
|
+
# Allows for a list of connections to be assigned to each port/key
|
52
|
+
# combination. Note that binding an input port to an un-indexed
|
53
|
+
# output port will result in messages from all indexed connections
|
54
|
+
# being received. Similarly, sending to an unindexed port will
|
55
|
+
# result in the same message being sent to all indexed
|
56
|
+
# connections.
|
57
|
+
class HashPort < Port
|
58
|
+
attr_reader :name, :instance_uuid, :options, :connections_for
|
59
|
+
|
60
|
+
def initialize(name, instance_uuid, options={})
|
61
|
+
@name = name
|
62
|
+
@instance_uuid = instance_uuid
|
63
|
+
@connections_for = Hash.new {|hash, key| hash[key] = Array.new.extend(ConnectionCollection)}
|
64
|
+
end
|
65
|
+
|
66
|
+
# Returns an extended Array of all the connections that should
|
67
|
+
# be sent/received on this port. Merges the nil-keyed port
|
68
|
+
# (i.e. any connections for a port without a key) to those
|
69
|
+
# specific for the key, so should only be used to read a list of
|
70
|
+
# connections, not to add new ones. Use add_connection to add a
|
71
|
+
# new connection for a given key.
|
72
|
+
def [](key)
|
73
|
+
(connections_for[key] + connections_for[nil]).extend(ConnectionCollection)
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
# Adds a connection for a given key
|
78
|
+
def add_connection(key, connection)
|
79
|
+
connections_for[key] << connection
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# Return a list of connected keys
|
84
|
+
def keys
|
85
|
+
connections_for.keys
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
# Enumerate through all the ConnectionCollections
|
90
|
+
# TODO: simplify with enumerators and procs
|
91
|
+
def each
|
92
|
+
connections_for.values.each do |connections|
|
93
|
+
yield connections
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
# Send a message to all connections on all keys for this port,
|
99
|
+
# but only once per connection.
|
100
|
+
def send_message(message)
|
101
|
+
all_connections.send_message(message)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# Should be overridden. Called when it is time to actually
|
106
|
+
# establish the connection
|
107
|
+
def connect!; raise NotImplementedError, "Raw ports do not know which direction to connect"; end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def all_connections
|
112
|
+
@all_connections ||= connections_for.map do |port_key, connections|
|
113
|
+
connections
|
114
|
+
end.flatten.uniq.extend(ConnectionCollection)
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
class InputPort < HashPort
|
121
|
+
def connect!
|
122
|
+
connections_for.each do |port_key, connections|
|
123
|
+
connections.each do |connection|
|
124
|
+
connection.connect_input!
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
class OutputPort < HashPort
|
132
|
+
def connect!
|
133
|
+
connections_for.each do |port_key, keyed_connections|
|
134
|
+
keyed_connections.each do |connection|
|
135
|
+
connection.connect_output!
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class DisconnectedPort < HashPort; end
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
__END__
|
147
|
+
|
148
|
+
out[even] -> a
|
149
|
+
out[odd] -> b
|
150
|
+
out[nil] -> c
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rflow/components/raw/extensions'
|
2
|
+
|
3
|
+
class RFlow
|
4
|
+
module Components
|
5
|
+
module Raw
|
6
|
+
|
7
|
+
# Load the schemas
|
8
|
+
SCHEMA_DIRECTORY = ::File.expand_path(::File.join(::File.dirname(__FILE__), '..', '..', '..', 'schema'))
|
9
|
+
|
10
|
+
SCHEMA_FILES = {
|
11
|
+
'raw.avsc' => 'RFlow::Message::Data::Raw',
|
12
|
+
}
|
13
|
+
|
14
|
+
SCHEMA_FILES.each do |file_name, data_type_name|
|
15
|
+
schema_string = ::File.read(::File.join(SCHEMA_DIRECTORY, file_name))
|
16
|
+
RFlow::Configuration.add_available_data_type data_type_name, 'avro', schema_string
|
17
|
+
end
|
18
|
+
|
19
|
+
# Load the data extensions
|
20
|
+
RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Raw',
|
21
|
+
RFlow::Components::Raw::Extensions::RawExtension)
|
22
|
+
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class RFlow
|
2
|
+
module Components
|
3
|
+
module Raw
|
4
|
+
|
5
|
+
module Extensions
|
6
|
+
|
7
|
+
module RawExtension
|
8
|
+
def self.extended(base_data)
|
9
|
+
base_data.data_object ||= {'raw' => ''}
|
10
|
+
end
|
11
|
+
|
12
|
+
def raw; data_object['raw']; end
|
13
|
+
def raw=(new_raw); data_object['raw'] = new_raw; end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,290 @@
|
|
1
|
+
require 'rflow/util'
|
2
|
+
|
3
|
+
class RFlow
|
4
|
+
|
5
|
+
# Contains all the configuration data and methods for RFlow.
|
6
|
+
# Interacts directly with underlying sqlite database, and keeps a
|
7
|
+
# registry of available data types, extensions, and components.
|
8
|
+
# Also includes an external DSL, RubyDSL, that can be used in
|
9
|
+
# crafting config-like files that load the database.
|
10
|
+
#
|
11
|
+
# Configuration provides a MVC-like framework for config files,
|
12
|
+
# where the models are the Setting, Component, Port, and Connection
|
13
|
+
# subclasses, the controllers are things like RubyDSL, and the views
|
14
|
+
# are defined relative to the controllers
|
15
|
+
class Configuration
|
16
|
+
|
17
|
+
# An exception class
|
18
|
+
class ConfigurationInvalid < StandardError; end
|
19
|
+
|
20
|
+
|
21
|
+
# A class to hold DB config and connection information
|
22
|
+
class ConfigDB < ActiveRecord::Base
|
23
|
+
self.abstract_class = true
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
# A collection class for data extensions that supports a naive
|
28
|
+
# prefix-based 'inheritance' on lookup. When looking up a key
|
29
|
+
# with [] all existing keys will be examined to determine if the
|
30
|
+
# existing key is a string prefix of the lookup key. All the
|
31
|
+
# results are consolidated into a single, flattened array.
|
32
|
+
class DataExtensionCollection
|
33
|
+
|
34
|
+
def initialize
|
35
|
+
# TODO: choose a different data structure ...
|
36
|
+
@hash = Hash.new {|hash, key| hash[key] = Array.new}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Return an array of all of the values that have keys that are
|
40
|
+
# prefixes of the lookup key.
|
41
|
+
def [](key)
|
42
|
+
key_string = key.to_s
|
43
|
+
@hash.map do |data_type, extensions|
|
44
|
+
key_string.start_with?(data_type) ? extensions : nil
|
45
|
+
end.flatten.compact
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adds a data extension for a given data type to the collection
|
49
|
+
def add(data_type, extension)
|
50
|
+
@hash[data_type.to_s] << extension
|
51
|
+
end
|
52
|
+
|
53
|
+
# Remove all elements from the collection. Useful for testing,
|
54
|
+
# not much else
|
55
|
+
def clear
|
56
|
+
@hash.clear
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
class << self
|
63
|
+
|
64
|
+
# A collection of data types (schemas) indexed by their name and
|
65
|
+
# their schema type ('avro').
|
66
|
+
def available_data_types
|
67
|
+
@available_data_types ||= Hash.new {|hash, key| hash[key] = Hash.new}
|
68
|
+
end
|
69
|
+
|
70
|
+
# A DataExtensionCollection to hold available extensions that
|
71
|
+
# will be applied to the de-serialized data types
|
72
|
+
def available_data_extensions
|
73
|
+
@available_data_extensions ||= DataExtensionCollection.new
|
74
|
+
end
|
75
|
+
|
76
|
+
# A Hash of defined components, usually automatically populated
|
77
|
+
# when a component subclasses RFlow::Component
|
78
|
+
def available_components
|
79
|
+
@available_components ||= Hash.new
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# TODO: refactor each of these add_available_* into collections to
|
84
|
+
# make DRYer. Also figure out what to do with all to to_syms
|
85
|
+
|
86
|
+
# Add a schema to the available_data_types class attribute.
|
87
|
+
# Schema is indexed by data_type_name and schema/serialization
|
88
|
+
# type. 'avro' is currently the only supported
|
89
|
+
# data_serialization_type.
|
90
|
+
def self.add_available_data_type(data_type_name, data_serialization_type, data_schema)
|
91
|
+
unless data_serialization_type == 'avro'
|
92
|
+
error_message = "Data serialization_type must be 'avro' for '#{data_type_name}'"
|
93
|
+
RFlow.logger.error error_message
|
94
|
+
raise ArgumentError, error_message
|
95
|
+
end
|
96
|
+
|
97
|
+
if available_data_types[data_type_name.to_s].include? data_serialization_type.to_s
|
98
|
+
error_message = "Data type '#{data_type_name}' already defined for serialization_type '#{data_serialization_type}'"
|
99
|
+
RFlow.logger.error error_message
|
100
|
+
raise ArgumentError, error_message
|
101
|
+
end
|
102
|
+
|
103
|
+
available_data_types[data_type_name.to_s][data_serialization_type.to_s] = data_schema
|
104
|
+
end
|
105
|
+
|
106
|
+
# Add a data extension to the available_data_extensions class
|
107
|
+
# attributes. The data_extension parameter should be the name of
|
108
|
+
# a ruby module that will extend RFlow::Message::Data object to
|
109
|
+
# provide additional methods/capability. Naive, prefix-based
|
110
|
+
# inheritance is possible, see available_data_extensions or the
|
111
|
+
# DataExtensionCollection class
|
112
|
+
def self.add_available_data_extension(data_type_name, data_extension)
|
113
|
+
unless data_extension.is_a? Module
|
114
|
+
error_message = "Invalid data extension #{data_extension} for #{data_type_name}. Only Ruby Modules allowed"
|
115
|
+
RFlow.logger.error error_message
|
116
|
+
raise ArgumentError, error_message
|
117
|
+
end
|
118
|
+
|
119
|
+
available_data_extensions.add data_type_name, data_extension
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
# Used when RFlow::Component is subclassed to add another
|
124
|
+
# available component to the list.
|
125
|
+
def self.add_available_component(component)
|
126
|
+
if available_components.include?(component.name)
|
127
|
+
error_message = "Component already '#{component.name}' already defined"
|
128
|
+
RFlow.logger.error error_message
|
129
|
+
raise ArgumentError, error_message
|
130
|
+
end
|
131
|
+
available_components[component.name] = component
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Connect to the configuration sqlite database, but use the
|
136
|
+
# ConfigDB subclass to protect the connection information from
|
137
|
+
# other ActiveRecord apps (i.e. Rails)
|
138
|
+
def self.establish_config_database_connection(config_database_path)
|
139
|
+
RFlow.logger.debug "Establishing connection to config database (#{Dir.getwd}) '#{config_database_path}'"
|
140
|
+
ActiveRecord::Base.logger = RFlow.logger
|
141
|
+
ConfigDB.establish_connection(:adapter => "sqlite3",
|
142
|
+
:database => config_database_path)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
# Using default ActiveRecord migrations, attempt to migrate the
|
147
|
+
# database to the latest version.
|
148
|
+
def self.migrate_database
|
149
|
+
RFlow.logger.debug "Applying default migrations to config database"
|
150
|
+
migrations_directory_path = File.join(File.dirname(__FILE__), 'configuration', 'migrations')
|
151
|
+
# ActiveRecord::Migration.verbose = RFlow.logger
|
152
|
+
ActiveRecord::Migrator.migrate migrations_directory_path
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
# Load the config file, which should load/process/store all the
|
157
|
+
# elements. Only run this after the database has been setup
|
158
|
+
def self.process_config_file(config_file_path)
|
159
|
+
RFlow.logger.info "Processing config file (#{Dir.getwd}) '#{config_file_path}'"
|
160
|
+
load config_file_path
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
# Connect to the configuration database, migrate it to the latest
|
165
|
+
# version, and process a config file if provided.
|
166
|
+
def self.initialize_database(config_database_path, config_file_path=nil)
|
167
|
+
RFlow.logger.debug "Initializing config database (#{Dir.getwd}) '#{config_database_path}'"
|
168
|
+
|
169
|
+
RFlow.logger.debug "Establishing connection to config database (#{Dir.getwd}) '#{config_database_path}'"
|
170
|
+
ActiveRecord::Base.logger = RFlow.logger
|
171
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3",
|
172
|
+
:database => config_database_path)
|
173
|
+
|
174
|
+
migrate_database
|
175
|
+
|
176
|
+
expanded_config_file_path = File.expand_path config_file_path if config_file_path
|
177
|
+
|
178
|
+
working_dir = Dir.getwd
|
179
|
+
Dir.chdir File.dirname(config_database_path)
|
180
|
+
|
181
|
+
if config_file_path
|
182
|
+
process_config_file(expanded_config_file_path)
|
183
|
+
end
|
184
|
+
|
185
|
+
RFlow.logger.debug "Defaulting non-existing config values"
|
186
|
+
merge_defaults!
|
187
|
+
|
188
|
+
Dir.chdir working_dir
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
# Make sure that the configuration has all the necessary values set.
|
193
|
+
def self.merge_defaults!
|
194
|
+
Setting::DEFAULTS.each do |name, default_value_or_proc|
|
195
|
+
setting = Setting.find_or_create_by_name(:name => name,
|
196
|
+
:value => default_value_or_proc.is_a?(Proc) ? default_value_or_proc.call() : default_value_or_proc)
|
197
|
+
unless setting.valid?
|
198
|
+
error_message = setting.errors.map do |attribute, error_string|
|
199
|
+
error_string
|
200
|
+
end.join ', '
|
201
|
+
raise RuntimeError, error_message
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
|
207
|
+
attr_accessor :config_database_path
|
208
|
+
attr_accessor :cached_settings
|
209
|
+
attr_accessor :cached_components
|
210
|
+
attr_accessor :cached_ports
|
211
|
+
attr_accessor :cached_connections
|
212
|
+
|
213
|
+
|
214
|
+
def initialize(config_database_path)
|
215
|
+
@cached_settings = Hash.new
|
216
|
+
@cached_components = Hash.new
|
217
|
+
@cached_ports = []
|
218
|
+
@cached_connections = []
|
219
|
+
|
220
|
+
@config_database_path = config_database_path
|
221
|
+
self.class.establish_config_database_connection(config_database_path)
|
222
|
+
|
223
|
+
# Validate the connected database. TODO: make this more
|
224
|
+
# complete, i.e. validate the various columns
|
225
|
+
begin
|
226
|
+
Setting.first
|
227
|
+
Component.first
|
228
|
+
Port.first
|
229
|
+
Connection.first
|
230
|
+
rescue ActiveRecord::StatementInvalid => e
|
231
|
+
error_message = "Invalid schema in configuration database: #{e.message}"
|
232
|
+
RFlow.logger.error error_message
|
233
|
+
raise ArgumentError, error_message
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
def to_s
|
239
|
+
string = "Configuration:\n"
|
240
|
+
settings.each do |setting|
|
241
|
+
string << "Setting: '#{setting.name}' = '#{setting.value}'\n"
|
242
|
+
end
|
243
|
+
components.each do |component|
|
244
|
+
string << "Component '#{component.name}' as #{component.specification} (#{component.uuid})\n"
|
245
|
+
component.output_ports.each do |output_port|
|
246
|
+
output_port.output_connections.each do |output_connection|
|
247
|
+
input_port = output_connection.input_port
|
248
|
+
string << "\tOutputPort '#{output_port.name}' key '#{output_connection.output_port_key}' (#{output_port.uuid}) =>\n"
|
249
|
+
string << "\t\tConnection '#{output_connection.name}' as #{output_connection.type} (#{output_connection.uuid}) =>\n"
|
250
|
+
string << "\t\tInputPort '#{input_port.name}' key '#{output_connection.input_port_key}' (#{input_port.uuid}) Component '#{input_port.component.name}' (#{input_port.component.uuid})\n"
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
string
|
255
|
+
end
|
256
|
+
|
257
|
+
# Helper method to access settings with minimal syntax
|
258
|
+
def [](setting_name)
|
259
|
+
Setting.find_by_name(setting_name).value rescue nil
|
260
|
+
end
|
261
|
+
|
262
|
+
|
263
|
+
def components
|
264
|
+
Component.all
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
def component(component_instance_uuid)
|
269
|
+
Component.find_by_uuid component_instance_uuid
|
270
|
+
end
|
271
|
+
|
272
|
+
|
273
|
+
def settings
|
274
|
+
Setting.all
|
275
|
+
end
|
276
|
+
|
277
|
+
def available_components
|
278
|
+
self.class.available_components
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Load the models
|
284
|
+
require 'rflow/configuration/setting'
|
285
|
+
require 'rflow/configuration/component'
|
286
|
+
require 'rflow/configuration/port'
|
287
|
+
require 'rflow/configuration/connection'
|
288
|
+
|
289
|
+
# Incorporate various config file processors
|
290
|
+
require 'rflow/configuration/ruby_dsl'
|