rflow 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,43 +1,94 @@
1
1
  class RFlow
2
+ # @!parse
3
+ # class Message
4
+ # # Clock messages.
5
+ # module Clock
6
+ # # Message emitted by {RFlow::Components::Clock}. Of course the real class is {RFlow::Message}
7
+ # # with type +RFlow::Message::Clock::Tick+.
8
+ # class Tick
9
+ # # @!attribute name
10
+ # # The name of the clock.
11
+ # # @return [String]
12
+ #
13
+ # # @!attribute timestamp
14
+ # # The timestamp of the tick, in milliseconds from epoch.
15
+ # # @return [Integer]
16
+ #
17
+ # # Just here to force Yard to create documentation.
18
+ # # @!visibility private
19
+ # def initialize; end
20
+ # end
21
+ # end
22
+ # end
23
+
24
+ # Components.
2
25
  module Components
26
+ # A clock. It ticks every _n_ seconds. Get it?
27
+ #
28
+ # Accepts config parameters:
29
+ # - +name+ - name of the clock, to disambiguate more than one
30
+ # - +tick_interval+ - how long to wait between ticks
31
+ #
32
+ # Emits {RFlow::Message}s whose internal type is {RFlow::Message::Clock::Tick}.
3
33
  class Clock < Component
34
+ # @!visibility private
4
35
  module Tick
36
+ # @!visibility private
5
37
  SCHEMA_DIRECTORY = ::File.expand_path(::File.join(::File.dirname(__FILE__), '..', '..', '..', 'schema'))
38
+ # @!visibility private
6
39
  SCHEMA_FILES = {'tick.avsc' => 'RFlow::Message::Clock::Tick'}
7
40
  SCHEMA_FILES.each do |file_name, data_type_name|
8
41
  schema_string = ::File.read(::File.join(SCHEMA_DIRECTORY, file_name))
9
42
  RFlow::Configuration.add_available_data_type data_type_name, 'avro', schema_string
10
43
  end
44
+ # @!visibility private
11
45
  module Extension
46
+ # @!visibility private
12
47
  def self.extended(base_data); base_data.data_object ||= {}; end
48
+ # @!visibility private
13
49
  def name; data_object['name']; end
50
+ # @!visibility private
14
51
  def name=(new_name); data_object['name'] = new_name; end
52
+ # @!visibility private
15
53
  def timestamp; data_object['timestamp']; end
54
+ # @!visibility private
16
55
  def timestamp=(new_ts); data_object['timestamp'] = new_ts; end
17
56
  end
18
57
  RFlow::Configuration.add_available_data_extension('RFlow::Message::Clock::Tick', Extension)
19
58
  end
20
59
 
60
+ # @!attribute [r] tick_port
61
+ # Outputs {RFlow::Message::Clock::Tick} messages.
62
+ # @return [Component::OutputPort]
21
63
  output_port :tick_port
22
64
 
65
+ # Default configuration.
23
66
  DEFAULT_CONFIG = {
24
67
  'name' => 'Clock',
25
68
  'tick_interval' => 1
26
69
  }
27
70
 
71
+ # @!visibility private
28
72
  attr_reader :config, :tick_interval
29
73
 
74
+ # RFlow-called method at startup.
75
+ # @param config [Hash] configuration from the RFlow config file
76
+ # @return [void]
30
77
  def configure!(config)
31
78
  @config = DEFAULT_CONFIG.merge config
32
79
  @tick_interval = Float(@config['tick_interval'])
33
80
  end
34
81
 
82
+ # @!visibility private
35
83
  def clock_name; config['name']; end
36
84
 
85
+ # RFlow-called method at startup.
86
+ # @return [void]
37
87
  def run!
38
88
  @timer = EventMachine::PeriodicTimer.new(tick_interval) { tick }
39
89
  end
40
90
 
91
+ # @!visibility private
41
92
  def tick
42
93
  tick_port.send_message(RFlow::Message.new('RFlow::Message::Clock::Tick').tap do |m|
43
94
  m.data.name = clock_name
@@ -1,11 +1,46 @@
1
1
  class RFlow
2
+ # @!parse
3
+ # class Message
4
+ # class Data
5
+ # # Message emitted by {RFlow::Components::GenerateIntegerSequence}.
6
+ # # Of course the real class is {RFlow::Message} with type +RFlow::Message::Data::Integer+.
7
+ # #
8
+ # # {RFlow::Message::Data#data_object} will return the integer.
9
+ # class Integer
10
+ # # Just here to force Yard to create documentation.
11
+ # # @!visibility private
12
+ # def initialize; end
13
+ # end
14
+ # end
15
+ # end
16
+
17
+ # Components.
2
18
  module Components
3
19
  Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', '{"type": "long"}')
4
20
 
21
+ # An integer sequence generator that ticks every _n_ seconds.
22
+ #
23
+ # Accepts config parameters:
24
+ # - +start+ - the number to start at (defaults to +0+)
25
+ # - +finish+ - the number to finish at (defaults to +0+; no numbers greater than this will be emitted)
26
+ # - +step+ - the number to step (defaults to +1+)
27
+ # - +interval_seconds+ - how long to wait, in seconds, between ticks (defaults to +0+)
28
+ #
29
+ # Emits {RFlow::Message}s whose internal type is {RFlow::Message::Data::Integer}.
5
30
  class GenerateIntegerSequence < Component
31
+ # @!attribute [r] out
32
+ # Outputs {RFlow::Message::Data::Integer} messages.
33
+ # @return [Component::OutputPort]
6
34
  output_port :out
35
+ # @!attribute [r] even_odd_out
36
+ # Outputs the same messages as {out}. Also addressable with subports +even+ and +odd+
37
+ # to select those subsequences.
38
+ # @return [Component::OutputPort]
7
39
  output_port :even_odd_out
8
40
 
41
+ # RFlow-called method at startup.
42
+ # @param config [Hash] configuration from the RFlow config file
43
+ # @return [void]
9
44
  def configure!(config)
10
45
  @start = config['start'].to_i
11
46
  @finish = config['finish'].to_i
@@ -14,12 +49,15 @@ class RFlow
14
49
  @interval_seconds = config['interval_seconds'].to_i
15
50
  end
16
51
 
17
- # Note that this uses the timer (sometimes with 0 interval) so as
18
- # not to block the reactor
52
+ # RFlow-called method at startup.
53
+ # @return [void]
19
54
  def run!
55
+ # Note that this uses the timer (sometimes with 0 interval) so as
56
+ # not to block the reactor.
20
57
  @timer = EM::PeriodicTimer.new(@interval_seconds) { generate }
21
58
  end
22
59
 
60
+ # @!visibility private
23
61
  def generate
24
62
  Message.new('RFlow::Message::Data::Integer').tap do |m|
25
63
  m.data.data_object = @start
@@ -1,23 +1,62 @@
1
1
  class RFlow
2
+ # @!parse
3
+ # class Message
4
+ # class Data
5
+ # # RFlow format defined for log messages which can be emitted by components.
6
+ # # Of course the real class is {RFlow::Message}
7
+ # # with type +RFlow::Message::Data::Log+.
8
+ # class Log
9
+ # # @!attribute timestamp
10
+ # # The timestamp of the log, in ms since epoch.
11
+ # # @return [Integer]
12
+ #
13
+ # # @!attribute level
14
+ # # The log level (INFO, WARN, ERROR, etc.).
15
+ # # @return [String]
16
+ #
17
+ # # @!attribute text
18
+ # # The text of the log message.
19
+ # # @return [String]
20
+ #
21
+ # # Just here to force Yard to create documentation.
22
+ # # @!visibility private
23
+ # def initialize; end
24
+ # end
25
+ # end
26
+ # end
27
+
28
+ # Components.
2
29
  module Components
30
+ # @!visibility private
3
31
  module Log
32
+ # @!visibility private
4
33
  module Extensions
34
+ # @!visibility private
5
35
  module LogExtension
36
+ # @!visibility private
6
37
  def self.extended(base_data)
7
38
  base_data.data_object ||= {'timestamp' => 0, 'level' => 'INFO', 'text' => ''}
8
39
  end
9
40
 
41
+ # @!visibility private
10
42
  def timestamp; data_object['timestamp']; end
43
+ # @!visibility private
11
44
  def timestamp=(new_timestamp); data_object['timestamp'] = new_timestamp; end
45
+ # @!visibility private
12
46
  def level; data_object['level']; end
47
+ # @!visibility private
13
48
  def level=(new_level); data_object['level'] = new_level; end
49
+ # @!visibility private
14
50
  def text; data_object['text']; end
51
+ # @!visibility private
15
52
  def text=(new_text); data_object['text'] = new_text; end
16
53
  end
17
54
  end
18
55
 
56
+ # @!visibility private
19
57
  SCHEMA_DIRECTORY = ::File.expand_path(::File.join(::File.dirname(__FILE__), '..', '..', '..', 'schema'))
20
58
 
59
+ # @!visibility private
21
60
  SCHEMA_FILES = {
22
61
  'log.avsc' => 'RFlow::Message::Data::Log',
23
62
  }
@@ -1,19 +1,46 @@
1
1
  class RFlow
2
+ # @!parse
3
+ # class Message
4
+ # class Data
5
+ # # RFlow format defined for raw-data messages which can be emitted by components.
6
+ # # Of course the real class is {RFlow::Message}
7
+ # # with type +RFlow::Message::Data::Raw+.
8
+ # class Raw
9
+ # # @!attribute raw
10
+ # # The raw data.
11
+ # # @return [String]
12
+ #
13
+ # # Just here to force Yard to create documentation.
14
+ # # @!visibility private
15
+ # def initialize; end
16
+ # end
17
+ # end
18
+ # end
19
+
20
+ # Components.
2
21
  module Components
22
+ # @!visibility private
3
23
  module Raw
24
+ # @!visibility private
4
25
  module Extensions
26
+ # @!visibility private
5
27
  module RawExtension
28
+ # @!visibility private
6
29
  def self.extended(base_data)
7
30
  base_data.data_object ||= {'raw' => ''}
8
31
  end
9
32
 
33
+ # @!visibility private
10
34
  def raw; data_object['raw']; end
35
+ # @!visibility private
11
36
  def raw=(new_raw); data_object['raw'] = new_raw; end
12
37
  end
13
38
  end
14
39
 
40
+ # @!visibility private
15
41
  SCHEMA_DIRECTORY = ::File.expand_path(::File.join(::File.dirname(__FILE__), '..', '..', '..', 'schema'))
16
42
 
43
+ # @!visibility private
17
44
  SCHEMA_FILES = {
18
45
  'raw.avsc' => 'RFlow::Message::Data::Raw',
19
46
  }
@@ -1,10 +1,29 @@
1
1
  class RFlow
2
+ # Components.
2
3
  module Components
4
+ # A component that replicates all inbound messages onto a single out port in
5
+ # order to easily support a many-to-many connection pattern (connect all the
6
+ # ins to this component and all the outs to this component instead of
7
+ # all of the ins to all of the outs).
8
+ #
9
+ # Emits {RFlow::Message}s of whatever type was sent in. Any messages with
10
+ # problems being sent to {out} will be sent to {errored} instead.
3
11
  class Replicate < Component
12
+ # @!attribute [r] in
13
+ # Receives {RFlow::Message}s.
14
+ # @return [Component::InputPort]
4
15
  input_port :in
16
+ # @!attribute [r] out
17
+ # Outputs {RFlow::Message}s.
18
+ # @return [Component::OutputPort]
5
19
  output_port :out
20
+ # @!attribute [r] errored
21
+ # Outputs {RFlow::Messages}s that could not be sent to {errored}.
22
+ # @return [Component::OutputPort]
6
23
  output_port :errored
7
24
 
25
+ # RFlow-called method on message arrival.
26
+ # @return [void]
8
27
  def process_message(input_port, input_port_key, connection, message)
9
28
  out.each do |connections|
10
29
  begin
@@ -1,15 +1,39 @@
1
1
  class RFlow
2
+ # Components.
2
3
  module Components
4
+ # Component that filters messages based on Ruby defined in the RFlow config file.
5
+ # Inbound messages will be sent out {filtered} if the predicate returns truthy,
6
+ # {dropped} if it returns falsey, or {errored} if it raises an exception.
7
+ #
8
+ # Accept config parameter +filter_proc_string+ which is the text of a +lambda+
9
+ # receiving a message +message+. For example, +message.data.data_object['foo'] > 2+.
3
10
  class RubyProcFilter < Component
11
+ # @!attribute [r] in
12
+ # Receives {RFlow::Message}s.
13
+ # @return [Component::InputPort]
4
14
  input_port :in
15
+ # @!attribute [r] filtered
16
+ # Outputs {RFlow::Message}s that pass the filter predicate.
17
+ # @return [Component::OutputPort]
5
18
  output_port :filtered
19
+ # @!attribute [r] dropped
20
+ # Outputs {RFlow::Message}s that do not pass the filter predicate.
21
+ # @return [Component::OutputPort]
6
22
  output_port :dropped
23
+ # @!attribute [r] errored
24
+ # Outputs {RFlow::Message}s that raise from the filter predicate.
25
+ # @return [Component::OutputPort]
7
26
  output_port :errored
8
27
 
28
+ # RFlow-called method at startup.
29
+ # @param config [Hash] configuration from the RFlow config file
30
+ # @return [void]
9
31
  def configure!(config)
10
32
  @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
11
33
  end
12
34
 
35
+ # RFlow-called method on message arrival.
36
+ # @return [void]
13
37
  def process_message(input_port, input_port_key, connection, message)
14
38
  begin
15
39
  if @filter_proc.call(message)
@@ -1,18 +1,18 @@
1
1
  class RFlow
2
2
  # Contains all the configuration data and methods for RFlow.
3
- # Interacts directly with underlying sqlite database, and keeps a
3
+ # Interacts directly with underlying SQLite database, and keeps a
4
4
  # registry of available data types, extensions, and components.
5
5
  # Also includes an external DSL, RubyDSL, that can be used in
6
6
  # crafting config-like files that load the database.
7
7
  #
8
- # Configuration provides a MVC-like framework for config files,
9
- # where the models are the Setting, Component, Port, and Connection
8
+ # {Configuration} provides a MVC-like framework for config files,
9
+ # where the models are the {Setting}, {Component}, {Port}, and {Connection}
10
10
  # subclasses, the controllers are things like RubyDSL, and the views
11
- # are defined relative to the controllers
11
+ # are defined relative to the controllers.
12
12
  class Configuration
13
13
  # A collection class for data extensions that supports a naive
14
14
  # prefix-based 'inheritance' on lookup. When looking up a key
15
- # with [] all existing keys will be examined to determine if the
15
+ # with {[]} all existing keys will be examined to determine if the
16
16
  # existing key is a string prefix of the lookup key. All the
17
17
  # results are consolidated into a single, flattened array.
18
18
  class DataExtensionCollection
@@ -23,6 +23,7 @@ class RFlow
23
23
 
24
24
  # Return an array of all of the values that have keys that are
25
25
  # prefixes of the lookup key.
26
+ # @return [Array]
26
27
  def [](key)
27
28
  @extensions_for.
28
29
  find_all {|data_type, _| key.to_s.start_with?(data_type) }.
@@ -30,12 +31,14 @@ class RFlow
30
31
  end
31
32
 
32
33
  # Adds a data extension for a given data type to the collection
34
+ # @return [void]
33
35
  def add(data_type, extension)
34
36
  @extensions_for[data_type.to_s] << extension
35
37
  end
36
38
 
37
39
  # Remove all elements from the collection. Useful for testing,
38
40
  # not much else
41
+ # @return [void]
39
42
  def clear
40
43
  @extensions_for.clear
41
44
  end
@@ -49,29 +52,32 @@ class RFlow
49
52
  class << self
50
53
  # A collection of data types (schemas) indexed by their name and
51
54
  # their schema type ('avro').
55
+ # @return [Hash]
52
56
  def available_data_types
53
57
  @available_data_types ||= Hash.new {|hash, key| hash[key] = {}}
54
58
  end
55
59
 
56
- # A DataExtensionCollection to hold available extensions that
57
- # will be applied to the de-serialized data types
60
+ # A {DataExtensionCollection} to hold available extensions that
61
+ # will be applied to the de-serialized data types.
62
+ # @return [DataExtensionCollection]
58
63
  def available_data_extensions
59
64
  @available_data_extensions ||= DataExtensionCollection.new
60
65
  end
61
66
 
62
67
  # A Hash of defined components, usually automatically populated
63
- # when a component subclasses RFlow::Component
68
+ # when a component subclasses {RFlow::Component}.
69
+ # @return [Hash]
64
70
  def available_components
65
71
  @available_components ||= {}
66
72
  end
67
73
 
68
- # TODO: refactor each of these add_available_* into collections to
69
- # make DRYer. Also figure out what to do with all to to_syms
70
-
71
- # Add a schema to the available_data_types class attribute.
72
- # Schema is indexed by data_type_name and schema/serialization
73
- # type. 'avro' is currently the only supported serialization_type.
74
+ # Add a schema to the {available_data_types} class attribute.
75
+ # Schema is indexed by +name+ and +serialization_type+.
76
+ # +avro+ is currently the only supported +serialization_type+.
77
+ # @return [void]
74
78
  def add_available_data_type(name, serialization_type, schema)
79
+ # TODO: refactor each of these add_available_* into collections to
80
+ # make DRYer. Also figure out what to do with all to to_syms
75
81
  raise ArgumentError, "Data serialization_type must be 'avro' for '#{name}'" unless serialization_type == 'avro'
76
82
 
77
83
  if available_data_types[name.to_s].include? serialization_type.to_s
@@ -81,12 +87,13 @@ class RFlow
81
87
  available_data_types[name.to_s][serialization_type.to_s] = schema
82
88
  end
83
89
 
84
- # Add a data extension to the available_data_extensions class
85
- # attributes. The data_extension parameter should be the name of
86
- # a ruby module that will extend RFlow::Message::Data object to
87
- # provide additional methods/capability. Naive, prefix-based
88
- # inheritance is possible, see available_data_extensions or the
89
- # DataExtensionCollection class
90
+ # Add a data extension to the {available_data_extensions} class
91
+ # attribute. The +extension+ parameter should be the name of
92
+ # a ruby module that will extend {RFlow::Message::Data} to
93
+ # provide additional methods/capability. Naive, prefix-based
94
+ # inheritance is possible, see {available_data_extensions} or
95
+ # {DataExtensionCollection}.
96
+ # @return [void]
90
97
  def add_available_data_extension(data_type_name, extension)
91
98
  unless extension.is_a? Module
92
99
  raise ArgumentError, "Invalid data extension #{extension} for #{data_type_name}. Only Ruby Modules allowed"
@@ -95,8 +102,9 @@ class RFlow
95
102
  available_data_extensions.add data_type_name, extension
96
103
  end
97
104
 
98
- # Used when RFlow::Component is subclassed to add another
105
+ # Used when {RFlow::Component} is subclassed to add another
99
106
  # available component to the list.
107
+ # @return [void]
100
108
  def add_available_component(component)
101
109
  if available_components.include?(component.name)
102
110
  raise ArgumentError, "Component already '#{component.name}' already defined"
@@ -104,9 +112,10 @@ class RFlow
104
112
  available_components[component.name] = component
105
113
  end
106
114
 
107
- # Connect to the configuration sqlite database, but use the
108
- # ConfigurationItem class to protect the connection information from
109
- # other ActiveRecord apps (i.e. Rails)
115
+ # Connect to the configuration SQLite database, but use
116
+ # {ConfigurationItem} to protect the connection information from
117
+ # other ActiveRecord apps (i.e. Rails).
118
+ # @return [void]
110
119
  def establish_config_database_connection(database_path)
111
120
  RFlow.logger.debug "Establishing connection to config database (#{Dir.getwd}) '#{database_path}'"
112
121
  ActiveRecord::Base.logger = RFlow.logger
@@ -115,6 +124,7 @@ class RFlow
115
124
 
116
125
  # Using default ActiveRecord migrations, attempt to migrate the
117
126
  # database to the latest version.
127
+ # @return [void]
118
128
  def migrate_database
119
129
  RFlow.logger.debug 'Applying default migrations to config database'
120
130
  migrations_path = File.join(File.dirname(__FILE__), 'configuration', 'migrations')
@@ -123,7 +133,8 @@ class RFlow
123
133
  end
124
134
 
125
135
  # Load the config file, which should load/process/store all the
126
- # elements. Only run this after the database has been setup
136
+ # elements. Only run this after the database has been setup
137
+ # @return [void]
127
138
  def process_config_file(path)
128
139
  RFlow.logger.info "Processing config file (#{Dir.getwd}) '#{path}'"
129
140
  load path
@@ -131,6 +142,7 @@ class RFlow
131
142
 
132
143
  # Connect to the configuration database, migrate it to the latest
133
144
  # version, and process a config file if provided.
145
+ # @return [void]
134
146
  def initialize_database(database_path, config_file_path = nil)
135
147
  RFlow.logger.debug "Initializing config database (#{Dir.getwd}) '#{database_path}'"
136
148
 
@@ -156,6 +168,7 @@ class RFlow
156
168
  end
157
169
 
158
170
  # Make sure that the configuration has all the necessary values set.
171
+ # @return [void]
159
172
  def merge_defaults!
160
173
  Setting::DEFAULTS.each do |name, default_value_or_proc|
161
174
  value = default_value_or_proc.is_a?(Proc) ? default_value_or_proc.call() : default_value_or_proc
@@ -184,6 +197,8 @@ class RFlow
184
197
  end
185
198
  end
186
199
 
200
+ # Output the RFlow configuration to a pretty-printed String.
201
+ # @return [String]
187
202
  def to_s
188
203
  string = "Configuration:\n"
189
204
 
@@ -208,13 +223,36 @@ class RFlow
208
223
  string
209
224
  end
210
225
 
226
+ # Retrieve a setting value by name from the SQLite database.
227
+ # @return [Object]
211
228
  def [](name); Setting.find_by_name(name).value rescue nil; end
229
+
230
+ # Retrieve all the {Setting}s from the SQLite database.
231
+ # @return [Array<Setting>]
212
232
  def settings; Setting.all; end
233
+
234
+ # Retrieve all the {Shard}s from the SQLite database.
235
+ # @return [Array<Shard>]
213
236
  def shards; Shard.all; end
237
+
238
+ # Retrieve all the {Connection}s from the SQLite database.
239
+ # @return [Array<Connection>]
214
240
  def connections; Connection.all; end
241
+
242
+ # Retrieve a single {Shard} by UUID from the SQLite database.
243
+ # @return [Shard]
215
244
  def shard(uuid); Shard.find_by_uuid uuid; end
245
+
246
+ # Retrieve all the {Component}s from the SQLite database.
247
+ # @return [Array<Component>]
216
248
  def components; Component.all; end
249
+
250
+ # Retrieve a single {Component} by UUID from the SQLite database.
251
+ # @return [Shard]
217
252
  def component(uuid); Component.find_by_uuid uuid; end
253
+
254
+ # Retrieve the mapping from component name to {Component}.
255
+ # @return [Hash]
218
256
  def available_components; Configuration.available_components; end
219
257
  end
220
258
  end