rflow 1.0.0a2 → 1.0.0a3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/README.VAGRANT +63 -0
- data/README.md +118 -33
- data/Vagrantfile +53 -0
- data/bin/rflow +6 -1
- data/example/basic_extensions.rb +7 -8
- data/example/http_extensions.rb +7 -8
- data/lib/rflow/broker.rb +18 -0
- data/lib/rflow/child_process.rb +3 -1
- data/lib/rflow/component.rb +51 -61
- data/lib/rflow/component/port.rb +24 -15
- data/lib/rflow/configuration.rb +1 -0
- data/lib/rflow/configuration/connection.rb +35 -17
- data/lib/rflow/configuration/ruby_dsl.rb +47 -9
- data/lib/rflow/connection.rb +13 -9
- data/lib/rflow/connections/zmq_connection.rb +46 -3
- data/lib/rflow/daemon_process.rb +1 -1
- data/lib/rflow/master.rb +8 -1
- data/lib/rflow/shard.rb +8 -2
- data/lib/rflow/version.rb +1 -1
- data/rflow.gemspec +6 -6
- data/spec/fixtures/extensions_ints.rb +7 -8
- data/spec/rflow/component/port_spec.rb +16 -22
- data/spec/rflow/components/clock_spec.rb +12 -17
- data/spec/rflow/configuration/ruby_dsl_spec.rb +234 -46
- data/spec/rflow/configuration_spec.rb +5 -5
- data/spec/rflow/forward_to_input_port_spec.rb +10 -18
- data/spec/rflow/forward_to_output_port_spec.rb +6 -13
- data/spec/rflow/logger_spec.rb +6 -6
- data/spec/rflow/message/data/raw_spec.rb +3 -3
- data/spec/rflow/message_spec.rb +16 -16
- data/spec/rflow_spec.rb +37 -37
- data/spec/spec_helper.rb +3 -5
- metadata +20 -17
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 82d81c66d6a26aa814d88893e294951247434585
         | 
| 4 | 
            +
              data.tar.gz: 03edc9684d65ffb4e0729000f8948f8dbe69c16c
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7ca15c17aa77a9e15d5309c5e9dd22b533b0108ca6f8c29031899f8c824f3c4939c5c37e57bbe51db5834eefc40f0f52a840874af9660fbbe07729b8a66ea3d0
         | 
| 7 | 
            +
              data.tar.gz: a8f7a76ca8eba6aec45cdf56433792938610e11332dd0523917795dd4a59edc84660d80b08a48a67c222f3a312ba6837b238a36e9db9e982b62fac2742f93417
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/.ruby-version
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            ruby-2.1. | 
| 1 | 
            +
            ruby-2.1.2
         | 
    
        data/README.VAGRANT
    ADDED
    
    | @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            How to build the vagrant boxes (VMWare Fusion 6):
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            First:
         | 
| 4 | 
            +
            Add /Applications/VMware Fusion.app//Contents/Library/ to your PATH.
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            CentOS 6.2 (http://cbednarski.com/articles/creating-vagrant-base-box-for-centos-62/)
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            curl -O http://mirrors.usc.edu/pub/linux/distributions/centos/6.2/isos/x86_64/CentOS-6.2-x86_64-minimal.iso
         | 
| 9 | 
            +
            Create a new VMWare box with the iso. Start the image. Don't use Easy Install.
         | 
| 10 | 
            +
              Hostname: centos62
         | 
| 11 | 
            +
              Root password: vagrant
         | 
| 12 | 
            +
              Disk partition: Replace Existing Linux Partition.
         | 
| 13 | 
            +
              Reboot.
         | 
| 14 | 
            +
            Log in as root/vagrant.
         | 
| 15 | 
            +
            vi /etc/sysconfig/network-scripts/ifcfg-eth0
         | 
| 16 | 
            +
              Change ONBOOT="no" to yes.
         | 
| 17 | 
            +
              Remove HWADDR line.
         | 
| 18 | 
            +
              Add BOOTPROTO="dhcp"
         | 
| 19 | 
            +
            vi /etc/udev/rules.d/70-persistent-net.rules
         | 
| 20 | 
            +
              Replace the eth0 line with: SUBSYSTEM=="net", ACTION=="add", DRIVERS="?*", ATTR{type}=="1", KERNEL=="eth*", NAME="eth0"
         | 
| 21 | 
            +
            shutdown -r now
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            Log back in as root.
         | 
| 24 | 
            +
            VMWare Fusion > Virtual Machine > Install VMWare Tools
         | 
| 25 | 
            +
            mkdir /media/cdrom
         | 
| 26 | 
            +
            mount /dev/cdrom /media/cdrom
         | 
| 27 | 
            +
            cd /tmp
         | 
| 28 | 
            +
            tar -xzf /media/cdrom/VM[tab].tar.gz
         | 
| 29 | 
            +
            yum install -y perl eject
         | 
| 30 | 
            +
            /tmp/vmware-tools-distrib/vmware-install.pl --default
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            yum install -y sudo
         | 
| 33 | 
            +
            useradd -m vagrant
         | 
| 34 | 
            +
            usermod -aG wheel vagrant
         | 
| 35 | 
            +
            echo vagrant | passwd vagrant --stdin
         | 
| 36 | 
            +
            echo "vagrant ALL=(ALL) ALL" >> /etc/sudoers
         | 
| 37 | 
            +
            echo "%wheel ALL=NOPASSWD: ALL" >> /etc/sudoers
         | 
| 38 | 
            +
            echo 'Defaults env_keep="SSH_AUTH_SOCK"' >> /etc/sudoers
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            vi /etc/sudoers
         | 
| 41 | 
            +
              Change requiretty to !requiretty
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            yum install -y openssh-server
         | 
| 44 | 
            +
            echo "UseDNS no" >> /etc/ssh/sshd_config
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            mkdir -m 0700 /home/vagrant/.ssh
         | 
| 47 | 
            +
            curl -s https://raw.githubusercontent.com/mitchellh/vagrant/master/keys/vagrant.pub > /home/vagrant/.ssh/authorized_keys
         | 
| 48 | 
            +
            chown -R vagrant:vagrant /home/vagrant/.ssh
         | 
| 49 | 
            +
            chmod -R 0600 /home/vagrant/.ssh/*
         | 
| 50 | 
            +
            shutdown -r now
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            Verify logging in as vagrant/vagrant and `sudo ls /root` with no password.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            sudo shutdown -h now
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            cd to wherever you have stored the VMWare box (~/Documents/Virtual Machines.localized/ by default).
         | 
| 57 | 
            +
            vmware-vdiskmanager -d Virtual\ Disk.vmdk   (ignore the warning message)
         | 
| 58 | 
            +
            vmware-vdiskmanager -k Virtual\ Disk.vmdk   (ignore the warning message)
         | 
| 59 | 
            +
            cat > metadata.json
         | 
| 60 | 
            +
            {
         | 
| 61 | 
            +
              "provider":"vmware_fusion"
         | 
| 62 | 
            +
            }
         | 
| 63 | 
            +
             | 
    
        data/README.md
    CHANGED
    
    | @@ -20,21 +20,34 @@ however only two are in current use, namely ZeroMQ connections and | |
| 20 20 | 
             
            Avro serialization.
         | 
| 21 21 |  | 
| 22 22 | 
             
            RFlow currently runs as a single-threaded, evented system on top of
         | 
| 23 | 
            -
            [ | 
| 23 | 
            +
            [EventMachine](http://rubyeventmachine.com/), meaning that any code
         | 
| 24 24 | 
             
            should be coded in an asynchronous style so as to not block the
         | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 25 | 
            +
            EventMachine reactor (and thus block all the other components). Use
         | 
| 26 | 
            +
            `EM.defer` and other such patterns, along with EventMachine plugins
         | 
| 27 | 
            +
            for various servers and clients, to work in this style and defer
         | 
| 28 | 
            +
            computation to background threads.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            RFlow component workflows may be split into `shards` to improve
         | 
| 31 | 
            +
            parallelism. Each shard is currently represented by a separate process,
         | 
| 32 | 
            +
            though threads may be supported in the future. Multiple copies of a
         | 
| 33 | 
            +
            shard may be instantiated, which will cooperate to round-robin
         | 
| 34 | 
            +
            incoming messages.
         | 
| 28 35 |  | 
| 29 36 | 
             
            Some of the long-term goals of RFlow are to allow for components and
         | 
| 30 37 | 
             
            portions of the workflow to be defined in any language that supports
         | 
| 31 | 
            -
            Avro and ZeroMQ, which are numerous.
         | 
| 38 | 
            +
            Avro and ZeroMQ, which are numerous. For this reason, the official
         | 
| 39 | 
            +
            specification of an RFlow workflow is a SQLite database containing
         | 
| 40 | 
            +
            information on its components, connections, ports, settings, etc.
         | 
| 41 | 
            +
            There is a Ruby DSL that aids in populating the database but the intent
         | 
| 42 | 
            +
            is that multiple processes and languages could access and manipulate
         | 
| 43 | 
            +
            the database form.
         | 
| 32 44 |  | 
| 33 45 | 
             
            ## Developer Notes
         | 
| 34 46 |  | 
| 35 47 | 
             
            You will need ZeroMQ preinstalled. Currently, EventMachine only supports
         | 
| 36 48 | 
             
            v3.2.4, not v4.x, so install that version. Older versions like 2.2 will not
         | 
| 37 | 
            -
            work.
         | 
| 49 | 
            +
            work. (You will probably get errors saying arcane things like
         | 
| 50 | 
            +
            `assertion failed, mailbox.cpp(84)`).
         | 
| 38 51 |  | 
| 39 52 | 
             
            ## Definitions
         | 
| 40 53 |  | 
| @@ -50,8 +63,8 @@ work. | |
| 50 63 | 
             
              accessed by an array.
         | 
| 51 64 |  | 
| 52 65 | 
             
            * __Connection__ - a directed link between an output port and an input
         | 
| 53 | 
            -
              port.  RFlow supports generalized connection types | 
| 54 | 
            -
              ZeroMQ  | 
| 66 | 
            +
              port.  RFlow supports generalized connection types; however, only
         | 
| 67 | 
            +
              ZeroMQ links are currently used.
         | 
| 55 68 |  | 
| 56 69 | 
             
            * __Message__ - a bit of serialized data that is sent out an output
         | 
| 57 70 | 
             
              port and recieved on an input port. Due to the serialization,
         | 
| @@ -63,7 +76,6 @@ work. | |
| 63 76 | 
             
              components (nodes) are wired together via connections to their
         | 
| 64 77 | 
             
              respective output/input ports.
         | 
| 65 78 |  | 
| 66 | 
            -
             | 
| 67 79 | 
             
            ## Component Examples
         | 
| 68 80 |  | 
| 69 81 | 
             
            The following describes the API of an RFlow component:
         | 
| @@ -90,7 +102,7 @@ end | |
| 90 102 | 
             
            * `configure!` (called with a hash configuration) is called after the
         | 
| 91 103 | 
             
              component is instantiated but before the workflow has been wired or
         | 
| 92 104 | 
             
              any messages have been sent. Note that this is called outside the
         | 
| 93 | 
            -
               | 
| 105 | 
            +
              EventMachine reactor.
         | 
| 94 106 |  | 
| 95 107 | 
             
            * `run!` is called after all the components have been wired together
         | 
| 96 108 | 
             
              with connections and the entire workflow has been created. For a
         | 
| @@ -98,11 +110,11 @@ end | |
| 98 110 | 
             
              be sent. For example, if the component is reading from a file, this
         | 
| 99 111 | 
             
              is where the file will be opened, the contents read into a message,
         | 
| 100 112 | 
             
              and the message sent out the output port. `run!` is called within
         | 
| 101 | 
            -
              the  | 
| 113 | 
            +
              the EventMachine reactor.
         | 
| 102 114 |  | 
| 103 115 | 
             
            * `process_message` is an evented callback that is called whenever the
         | 
| 104 116 | 
             
              component receives a message on one of its input ports.
         | 
| 105 | 
            -
              `process_message` is called  | 
| 117 | 
            +
              `process_message` is called within the EventMachine reactor
         | 
| 106 118 |  | 
| 107 119 | 
             
            * `shutdown!` is called when the flow is being terminated, and is
         | 
| 108 120 | 
             
              meant to allow the components to do penultimate processing and send
         | 
| @@ -186,20 +198,19 @@ configuration): | |
| 186 198 | 
             
            class RFlow::Components::FileOutput < RFlow::Component
         | 
| 187 199 | 
             
              input_port :in
         | 
| 188 200 |  | 
| 189 | 
            -
              attr_accessor :output_file_path | 
| 201 | 
            +
              attr_accessor :output_file_path
         | 
| 190 202 |  | 
| 191 203 | 
             
              def configure!(config)
         | 
| 192 204 | 
             
                self.output_file_path = config['output_file_path']
         | 
| 193 | 
            -
                self.output_file = File.new output_file_path, 'w+'
         | 
| 194 205 | 
             
              end
         | 
| 195 206 |  | 
| 196 207 | 
             
              def process_message(input_port, input_port_key, connection, message)
         | 
| 197 | 
            -
                 | 
| 198 | 
            -
             | 
| 199 | 
            -
             | 
| 200 | 
            -
             | 
| 201 | 
            -
             | 
| 202 | 
            -
                 | 
| 208 | 
            +
                File.open(output_file_path, 'a') do |f|
         | 
| 209 | 
            +
                  f.flock(File::LOCK_EX)
         | 
| 210 | 
            +
                  f.puts message.data.data_object.inspect
         | 
| 211 | 
            +
                  f.flush
         | 
| 212 | 
            +
                  f.flock(File::LOCK_UN)
         | 
| 213 | 
            +
                end
         | 
| 203 214 | 
             
              end
         | 
| 204 215 | 
             
            end
         | 
| 205 216 | 
             
            ```
         | 
| @@ -314,7 +325,7 @@ messaga.data.default?   # => false | |
| 314 325 | 
             
            RFlow currently stores its configuration in a SQLite database which
         | 
| 315 326 | 
             
            are internally accessed via ActiveRecord.  Given that SQLite is a
         | 
| 316 327 | 
             
            rather simple and standard interface, non-RFlow components could
         | 
| 317 | 
            -
            access it and determine their  | 
| 328 | 
            +
            access it and determine their respective ZMQ connections.
         | 
| 318 329 |  | 
| 319 330 | 
             
            DB schemas for the configuration database are in
         | 
| 320 331 | 
             
            [lib/rflow/configuration/migrations](lib/rflow/configuration/migrations)
         | 
| @@ -323,26 +334,29 @@ tables uses a UUID primary key, and UUIDs are used within RFlow to | |
| 323 334 | 
             
            identify specific components.
         | 
| 324 335 |  | 
| 325 336 | 
             
            * settings - general application settings, such as log levels, app
         | 
| 326 | 
            -
              names, directories, etc
         | 
| 337 | 
            +
              names, directories, etc.
         | 
| 338 | 
            +
             | 
| 339 | 
            +
            * shards - a list of the shards defined for the workflow, including
         | 
| 340 | 
            +
              UUID, type, and number of workers for the shard
         | 
| 327 341 |  | 
| 328 342 | 
             
            * components - a list of the components including its name,
         | 
| 329 | 
            -
              specification (Ruby class), and options. Note that the options are
         | 
| 343 | 
            +
              specification (Ruby class), shard, and options. Note that the options are
         | 
| 330 344 | 
             
              serialized to the database as YAML, and components should understand
         | 
| 331 345 | 
             
              that the round-trip through the database might not be perfect (e.g.
         | 
| 332 346 | 
             
              Ruby symbols might become strings). A component also has a number of
         | 
| 333 347 | 
             
              input ports and output ports.
         | 
| 334 348 |  | 
| 335 349 | 
             
            * ports - belonging to a component (via `component_uuid` foreign key),
         | 
| 336 | 
            -
              also has a `type`  | 
| 350 | 
            +
              also has a `type` column for ActiveRecord STI, which gets set to
         | 
| 337 351 | 
             
              either a `RFlow::Configuration::InputPort` or
         | 
| 338 352 | 
             
              `RFlow::Configuration::OutputPort`.
         | 
| 339 353 |  | 
| 340 354 | 
             
            * connections - a connection between two ports via foriegn keys
         | 
| 341 355 | 
             
              `input_port_uuid` and `output_port_uuid`. Like ports, connections
         | 
| 342 | 
            -
              are typed via AR STI (`RFlow::Configuration::ZMQConnection`  | 
| 343 | 
            -
               | 
| 344 | 
            -
               | 
| 345 | 
            -
              keys.
         | 
| 356 | 
            +
              are typed via AR STI (`RFlow::Configuration::ZMQConnection` and
         | 
| 357 | 
            +
              'RFlow::Configuration::BrokeredZMGConnection` are the only
         | 
| 358 | 
            +
              supported values for now) and have a YAML serialized `options`
         | 
| 359 | 
            +
              hash. A connection also (potentially) defines the port keys.
         | 
| 346 360 |  | 
| 347 361 | 
             
            RFlow also provides a RubyDSL for configuration-like file to be used
         | 
| 348 362 | 
             
            to load the database:
         | 
| @@ -351,10 +365,9 @@ to load the database: | |
| 351 365 | 
             
            RFlow::Configuration::RubyDSL.configure do |config|
         | 
| 352 366 | 
             
              # Configure the settings, which include paths for various files, log
         | 
| 353 367 | 
             
              # levels, and component specific stuffs
         | 
| 354 | 
            -
              config.setting | 
| 355 | 
            -
              config.setting | 
| 356 | 
            -
             | 
| 357 | 
            -
              config.setting('rflow.application_name', 'testapp')
         | 
| 368 | 
            +
              config.setting 'rflow.log_level', 'DEBUG'
         | 
| 369 | 
            +
              config.setting 'rflow.application_directory_path', '../tmp'
         | 
| 370 | 
            +
              config.setting 'rflow.application_name', 'testapp'
         | 
| 358 371 |  | 
| 359 372 | 
             
              # Instantiate components
         | 
| 360 373 | 
             
              config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
         | 
| @@ -386,10 +399,82 @@ RFlow::Configuration::RubyDSL.configure do |config| | |
| 386 399 | 
             
            end
         | 
| 387 400 | 
             
            ```
         | 
| 388 401 |  | 
| 402 | 
            +
            ## Parallelism
         | 
| 403 | 
            +
             | 
| 404 | 
            +
            RFlow supports parallelizing workflows and splitting them into multiple
         | 
| 405 | 
            +
            `shard`s. By default, components declared in the Ruby DSL exist in the
         | 
| 406 | 
            +
            default shard, named `DEFAULT`. There is only one worker for the default
         | 
| 407 | 
            +
            shard.
         | 
| 408 | 
            +
             | 
| 409 | 
            +
            ZeroMQ communication between components in the same shard uses ZeroMQ's
         | 
| 410 | 
            +
            `inproc` socket type for maximum performance. ZeroMQ communication between
         | 
| 411 | 
            +
            components in different shards is accomplished with a ZeroMQ `ipc` socket.
         | 
| 412 | 
            +
            In the case of a many-to-many connection (many workers in a producing
         | 
| 413 | 
            +
            shard and many workers in a consuming shard), a ZeroMQ message broker
         | 
| 414 | 
            +
            process is created to route the messages appropriately. Senders round-robin
         | 
| 415 | 
            +
            to receivers and receivers fair-queue the messages from the senders.
         | 
| 416 | 
            +
            Load balancing based on receiver responsiveness is not currently implemented.
         | 
| 417 | 
            +
             | 
| 418 | 
            +
            To define a custom shard in the Ruby DSL, use the `shard` method. For
         | 
| 419 | 
            +
            example:
         | 
| 420 | 
            +
             | 
| 421 | 
            +
            ```ruby
         | 
| 422 | 
            +
            RFlow::Configuration::RubyDSL.configure do |config|
         | 
| 423 | 
            +
              # Configure the settings, which include paths for various files, log
         | 
| 424 | 
            +
              # levels, and component specific stuffs
         | 
| 425 | 
            +
              config.setting 'rflow.log_level', 'DEBUG'
         | 
| 426 | 
            +
              config.setting 'rflow.application_directory_path', '../tmp'
         | 
| 427 | 
            +
              config.setting 'rflow.application_name', 'testapp'
         | 
| 428 | 
            +
             | 
| 429 | 
            +
              config.shard 'integers', :process => 2 do |shard|
         | 
| 430 | 
            +
                shard.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
         | 
| 431 | 
            +
                  'start' => 0,
         | 
| 432 | 
            +
                  'finish' => 10,
         | 
| 433 | 
            +
                  'step' => 3,
         | 
| 434 | 
            +
                  'interval_seconds' => 1
         | 
| 435 | 
            +
                }
         | 
| 436 | 
            +
                shard.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', {
         | 
| 437 | 
            +
                  'start' => 20,
         | 
| 438 | 
            +
                  'finish' => 30
         | 
| 439 | 
            +
                }
         | 
| 440 | 
            +
              end
         | 
| 441 | 
            +
             | 
| 442 | 
            +
              # another style of specifying type and count; count defaults to 1
         | 
| 443 | 
            +
              config.shard 'filters', :type => :process, :count => 1 do |shard|
         | 
| 444 | 
            +
                shard.component 'filter', 'RFlow::Components::RubyProcFilter', {
         | 
| 445 | 
            +
                  'filter_proc_string' => 'lambda {|message| true}'
         | 
| 446 | 
            +
                }
         | 
| 447 | 
            +
              end
         | 
| 448 | 
            +
             | 
| 449 | 
            +
              # another way of specifying type
         | 
| 450 | 
            +
              config.process 'filters', :count => 2 do |shard|
         | 
| 451 | 
            +
                shard.component 'output1', 'RFlow::Components::FileOutput', {
         | 
| 452 | 
            +
                  'output_file_path' => '/tmp/out1'
         | 
| 453 | 
            +
                }
         | 
| 454 | 
            +
              end
         | 
| 455 | 
            +
             | 
| 456 | 
            +
              # this component will be created in the DEFAULT shard
         | 
| 457 | 
            +
              config.component 'output2', 'RFlow::Components::FileOutput', {
         | 
| 458 | 
            +
                'output_file_path' => '/tmp/out2'
         | 
| 459 | 
            +
              }
         | 
| 460 | 
            +
             | 
| 461 | 
            +
              # Wire components together
         | 
| 462 | 
            +
              config.connect 'generate_ints1#out' => 'filter#in'
         | 
| 463 | 
            +
              config.connect 'generate_ints2#out' => 'filter#in'
         | 
| 464 | 
            +
              config.connect 'filter#filtered' => 'replicate#in'
         | 
| 465 | 
            +
              config.connect 'filter#out' => 'output1#in'
         | 
| 466 | 
            +
              config.connect 'filter#filtered' => 'output2#in'
         | 
| 467 | 
            +
            end
         | 
| 468 | 
            +
            ```
         | 
| 469 | 
            +
             | 
| 470 | 
            +
            At runtime, shards with no components defined will have no workers and
         | 
| 471 | 
            +
            will not be started. (So, if you put all components in a custom shard,
         | 
| 472 | 
            +
            no `DEFAULT` workers will be seen.)
         | 
| 473 | 
            +
             | 
| 389 474 | 
             
            ## Command-Line Operation
         | 
| 390 475 |  | 
| 391 476 | 
             
            RFlow includes the `rflow` binary that can load a database from a Ruby
         | 
| 392 | 
            -
            DSL, as well as start/stop the  | 
| 477 | 
            +
            DSL, as well as start/stop the workflow application as a daemon.
         | 
| 393 478 | 
             
            Invoking the `rflow` binary without any options will give a brief help:
         | 
| 394 479 |  | 
| 395 480 | 
             
            ```
         | 
    
        data/Vagrantfile
    ADDED
    
    | @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            # -*- mode: ruby -*-
         | 
| 2 | 
            +
            # vi: set ft=ruby :
         | 
| 3 | 
            +
            VAGRANTFILE_API_VERSION = "2"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
         | 
| 6 | 
            +
              config.vm.define 'centos62' do |c|
         | 
| 7 | 
            +
                c.vm.box = "jstoneham/rflow-centos62"
         | 
| 8 | 
            +
              end
         | 
| 9 | 
            +
              config.vm.define 'centos64' do |c|
         | 
| 10 | 
            +
                c.vm.box = "box-cutter/centos64"
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
              config.vm.define 'centos65' do |c|
         | 
| 13 | 
            +
                c.vm.box = "chef/centos-6.5"
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              config.vm.synced_folder '.', '/rflow'
         | 
| 17 | 
            +
              # bring over rflow examples; use rsync so it's safe to create IPCs in the rflow-examples directory
         | 
| 18 | 
            +
              # (that is, avoid NFS)
         | 
| 19 | 
            +
              # run 'vagrant rsync-auto' to get syncing to happen automatically
         | 
| 20 | 
            +
              config.vm.synced_folder '../rflow_examples', '/rflow_examples', type: 'rsync', rsync__exclude: '.git/'
         | 
| 21 | 
            +
              config.vm.synced_folder '../rflow-components-http', '/rflow-components-http'
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              # forward http for rflow testing
         | 
| 24 | 
            +
              config.vm.network "forwarded_port", guest: 8000, host: 8000
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              # install RPM dependencies for rflow and zeromq
         | 
| 27 | 
            +
              config.vm.provision "shell", privileged: true, inline: <<-EOS
         | 
| 28 | 
            +
                curl -O https://dl.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm
         | 
| 29 | 
            +
                rpm -ivh epel-release-6-8.noarch.rpm
         | 
| 30 | 
            +
                yum -y install libyaml-devel patch libffi-devel glibc-headers autoconf gcc-c++ glibc-devel readline-devel zlib-devel openssl-devel automake libtool bison git sqlite-devel rpm-build libuuid-devel vim
         | 
| 31 | 
            +
              EOS
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              # build zeromq as vagrant user
         | 
| 34 | 
            +
              config.vm.provision "shell", privileged: false, inline: <<-EOS
         | 
| 35 | 
            +
                curl -O http://download.zeromq.org/zeromq-3.2.4.tar.gz
         | 
| 36 | 
            +
                rpmbuild -tb zeromq-3.2.4.tar.gz
         | 
| 37 | 
            +
              EOS
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              # install zeromq
         | 
| 40 | 
            +
              config.vm.provision "shell", privileged: true, inline: <<-EOS
         | 
| 41 | 
            +
                rpm -ivh ~vagrant/rpmbuild/RPMS/x86_64/zeromq-*
         | 
| 42 | 
            +
              EOS
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              # set up RVM and bundler
         | 
| 45 | 
            +
              config.vm.provision "shell", privileged: false, inline: <<-EOS
         | 
| 46 | 
            +
                rm -f .profile
         | 
| 47 | 
            +
                curl -sSL https://get.rvm.io | bash -s stable
         | 
| 48 | 
            +
                source .rvm/scripts/rvm
         | 
| 49 | 
            +
                rvm install `cat /rflow/.ruby-version`
         | 
| 50 | 
            +
                cd /rflow
         | 
| 51 | 
            +
                bundle update
         | 
| 52 | 
            +
              EOS
         | 
| 53 | 
            +
            end
         | 
    
        data/bin/rflow
    CHANGED
    
    | @@ -66,7 +66,12 @@ end | |
| 66 66 |  | 
| 67 67 | 
             
            # Now require rflow because the following parts of the startup require
         | 
| 68 68 | 
             
            # pieces (usually RFlow::Configuration or RFlow.logger)
         | 
| 69 | 
            -
             | 
| 69 | 
            +
            begin
         | 
| 70 | 
            +
              require 'rflow'
         | 
| 71 | 
            +
            rescue Exception => e
         | 
| 72 | 
            +
              STDERR.puts "Error loading RFlow: #{e.class} - #{e.message}"
         | 
| 73 | 
            +
              exit 1
         | 
| 74 | 
            +
            end
         | 
| 70 75 |  | 
| 71 76 | 
             
            # Set up the startup logging, which is distinct from the runtime
         | 
| 72 77 | 
             
            # logging that is defined in the config database.  The startup logging
         | 
    
        data/example/basic_extensions.rb
    CHANGED
    
    | @@ -14,21 +14,20 @@ end | |
| 14 14 | 
             
            RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
         | 
| 15 15 |  | 
| 16 16 | 
             
            class RFlow::Components::FileOutput < RFlow::Component
         | 
| 17 | 
            -
              attr_accessor :output_file_path | 
| 17 | 
            +
              attr_accessor :output_file_path
         | 
| 18 18 | 
             
              input_port :in
         | 
| 19 19 |  | 
| 20 20 | 
             
              def configure!(config)
         | 
| 21 21 | 
             
                self.output_file_path = config['output_file_path']
         | 
| 22 | 
            -
                self.output_file = File.new output_file_path, 'w+'
         | 
| 23 22 | 
             
              end
         | 
| 24 23 |  | 
| 25 24 | 
             
              def process_message(input_port, input_port_key, connection, message)
         | 
| 26 | 
            -
                 | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                 | 
| 25 | 
            +
                File.open(output_file_path, 'a') do |f|
         | 
| 26 | 
            +
                  f.flock(File::LOCK_EX)
         | 
| 27 | 
            +
                  f.puts message.data.data_object.inspect
         | 
| 28 | 
            +
                  f.flush
         | 
| 29 | 
            +
                  f.flock(File::LOCK_UN)
         | 
| 30 | 
            +
                end
         | 
| 32 31 | 
             
              end
         | 
| 33 32 | 
             
            end
         | 
| 34 33 |  | 
    
        data/example/http_extensions.rb
    CHANGED
    
    | @@ -5,21 +5,20 @@ require 'eventmachine' | |
| 5 5 | 
             
            require 'evma_httpserver'
         | 
| 6 6 |  | 
| 7 7 | 
             
            class RFlow::Components::FileOutput < RFlow::Component
         | 
| 8 | 
            -
              attr_accessor :output_file_path | 
| 8 | 
            +
              attr_accessor :output_file_path
         | 
| 9 9 | 
             
              input_port :in
         | 
| 10 10 |  | 
| 11 11 | 
             
              def configure!(config)
         | 
| 12 12 | 
             
                self.output_file_path = config['output_file_path']
         | 
| 13 | 
            -
                self.output_file = File.new output_file_path, 'w+'
         | 
| 14 13 | 
             
              end
         | 
| 15 14 |  | 
| 16 15 | 
             
              def process_message(input_port, input_port_key, connection, message)
         | 
| 17 | 
            -
                 | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
                 | 
| 16 | 
            +
                File.open(output_file_path, 'a') do |f|
         | 
| 17 | 
            +
                  f.flock(File::LOCK_EX)
         | 
| 18 | 
            +
                  f.puts message.data.data_object.inspect
         | 
| 19 | 
            +
                  f.flush
         | 
| 20 | 
            +
                  f.flock(File::LOCK_UN)
         | 
| 21 | 
            +
                end
         | 
| 23 22 | 
             
              end
         | 
| 24 23 | 
             
            end
         | 
| 25 24 |  |