RubyGems - andromeda - Versions diffs - 0.1 → 0.1.2 - Mend

andromeda 0.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

data/.gitignore +1 -0
data/.rvmrc +1 -0
data/CHANGELOG.md +49 -0
data/Gemfile +17 -9
data/Gemfile.lock +5 -0
data/LICENSE.txt +1 -2
data/README.md +150 -15
data/ROADMAP.md +73 -0
data/Rakefile +4 -6
data/andromeda.gemspec +2 -2
data/lib/andromeda.rb +52 -9
data/lib/andromeda/atom.rb +105 -0
data/lib/andromeda/cmd.rb +242 -0
data/lib/andromeda/common.rb +0 -0
data/lib/andromeda/copy_clone.rb +44 -0
data/lib/andromeda/error.rb +42 -0
data/lib/andromeda/guide.rb +50 -0
data/lib/andromeda/guide_track.rb +98 -0
data/lib/andromeda/id.rb +10 -83
data/lib/andromeda/impl/atom.rb +47 -0
data/lib/andromeda/impl/class_attr.rb +31 -0
data/lib/andromeda/impl/proto_plan.rb +219 -0
data/lib/andromeda/impl/to_s.rb +48 -0
data/lib/andromeda/impl/xor_id.rb +89 -0
data/lib/andromeda/kit.rb +172 -0
data/lib/andromeda/map_reduce.rb +3 -0
data/lib/andromeda/plan.rb +130 -0
data/lib/andromeda/pool_guide.rb +70 -0
data/lib/andromeda/spot.rb +132 -0
data/lib/andromeda/sugar.rb +41 -0
data/lib/andromeda/sync.rb +68 -0
data/lib/andromeda/version.rb +1 -1
data/yard_extensions/andromeda.rb +28 -0
metadata +30 -13
data/lib/andromeda/andromeda.rb +0 -225
data/lib/andromeda/commando.rb +0 -106
data/lib/andromeda/helpers.rb +0 -134
data/lib/andromeda/join.rb +0 -48
data/lib/andromeda/pools.rb +0 -69
data/lib/andromeda/scope.rb +0 -38

data/.gitignore CHANGED

@@ -1,6 +1,7 @@
 .idea
 .bundle
 .yardoc
+.rbx
 doc
 db
 pkg

data/.rvmrc CHANGED

@@ -1,2 +1,3 @@
 rvm --create gemset use andromeda
+export RBXOPT="$RBXOPT -X19 -v"
 export JRUBY_OPTS="$JRUBY_OPTS -Xcompat.version=1.9"

data/CHANGELOG.md ADDED

@@ -0,0 +1,49 @@
+# CHANGELOG for andromeda
+*Note* Not all versions are released gems, many version numbers just exist in the github repository.
+## 0.1.2 Architecture Refactoring
+* via(:emit), Spot::>>, entry/dest, enter/exit separation
+* ConnectorBase, post_data clean up
+* (meth_|attr_)spot queries
+* Tested with rbx, mri, and jruby
+* Renaming and reorganization of architecture:
+Stages are now called plans, chunks data, opts tags and dests spots. Construction of Pools and state management (i.e. copying) of Plans and Tags has been factored into two new abstractions: Guards (state management, track selection), and Tracks (Executors/Thread pools).
+* Reorganization of modules (Plan is toplevel + Kit, Sync, Cmd, Atom)
+* New code: error.rb, copy_clone.rb, sugar.rb
+* Beginning docs: CHANGELOG, ROADMAP
+* Cleaned up output in irb considerably
+* Wrote helper support for testing: Atom::(Var, Region, FillOnce, Combiner)
+* Renamed Command to Cmd and moved into Cmd:: module
+* Added guide nick names to Guides.self
+## 0.1.1 Architecture Refactoringm
+* Commands have support for comments
+* some work left todo for chunking
+* emit is protected now
+* exit as default "emitter" for on_enter (allow overloading in subclasses)
+* Tested with rubinius
+* Set pool from other stage
+* Added globally shared single pool
+* Andromeda.reload! + maruku installed as fallback for yard by default
+* Added FileChunker and FileReader to helpers
+* Added signals (keeping track of dests not intended for map/filter by Transf etc)
+* Renamed Bases to Stages (was talking about stages all the time anyhow)
+* Polished logging/error catching and helpers
+* PoolSupport.num_processors caches num_cpus value (dont trust Facter that much to be quick)
+* Made >> chaining and added chunk_val for easier mapping
+* Added chunk keys for map reduce like handling
+* Added GathererBase and a plain Reducer to helpers.rb
+* Overhauled Join for concurrent synchronization
+* Avoids cloning in single-threaded scenarios for shared state in gatherers
+* Modified thread pool creation to happen on init if possible
+* Added trace_pool for debugging which pools get used by whom
+## 0.1 (Release)
+Initial Version

data/Gemfile CHANGED

@@ -1,22 +1,30 @@
 source "http://rubygems.org"
 gem 'json', '>=1.6.5'
-gem 'threadpool'
-gem 'facter'
 gem 'atomic'
+gem 'facter'
+gem 'statval'
+gem 'threadpool'
 group :development do
   gem 'rake'
-  gem 'redcarpet', :require => false
-  gem 'yard', :require => false
-  gem 'irbtools', :require => false
-end
-group :jruby do
-  gem 'maruku'
+  gem 'yard'
+  gem 'irbtools'
 end
 group :test do
   gem 'rspec', '2.6.0'
   gem 'simplecov'
 end
+platforms :ruby do
+  gem 'redcarpet'
+end
+platforms :rbx do
+  gem 'redcarpet'
+end
+platforms :jruby do
+  gem 'maruku'
+end

data/Gemfile.lock CHANGED

@@ -2,6 +2,7 @@ GEM
   remote: http://rubygems.org/
   specs:
     atomic (1.0.0)
+    atomic (1.0.0-java)
     awesome_print (1.0.2)
     boson (1.1.1)
     clipboard (1.0.1)
@@ -36,6 +37,7 @@ GEM
       wirb (>= 0.4.2)
       zucker (>= 12.1)
     json (1.6.6)
+    json (1.6.6-java)
     maruku (0.6.0)
       syntax (>= 1.0.0)
     method_locator (0.0.4)
@@ -61,6 +63,7 @@ GEM
     simplecov-html (0.5.3)
     sketches (0.1.1)
     spoon (0.0.1)
+    statval (0.1.2)
     syntax (1.0.0)
     threadpool (0.1.0.1)
     unicode-display_width (0.1.1)
@@ -69,6 +72,7 @@ GEM
     zucker (12.1)
 PLATFORMS
+  java
   ruby
 DEPENDENCIES
@@ -81,5 +85,6 @@ DEPENDENCIES
   redcarpet
   rspec (= 2.6.0)
   simplecov
+  statval
   threadpool
   yard

data/LICENSE.txt CHANGED

@@ -17,5 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md CHANGED

@@ -1,28 +1,163 @@
 # andromeda
-Andromeda is a ultra light weight multicore stream processing framework based on a small dataflow DSL
+Andromeda is a light weight framework for complex event processing on multicore architectures. Andromeda users construct networks of plans that are interconnected via endpoint spots, describe how plans are scheduled onto threads, and process data by feeding data events to the resulting structure.
-It is currently untested and undocumented.
+It currently comes without tests but the core architectures is stable (i.e. the concepts have been fleshed out).
-Below is an example that writes events to a file and reads them back in, to give an idea of what it does:
+## Example
+Below is an example that writes events to a file and reads them back such that the JSON gets parsed in parallel, to give an idea of what it does:
     require 'andromeda'
-    w = Andromeda::CommandoWriter.new path: '/tmp/some_file'
-    w << (Commando.new :test)
-    w << (Commando.new :test, weight: 40)
-    w << (Commando.new :test, height: 20)
+    # Enter scope 'Andromeda' in irb
+    cb Andromeda
+    # Write Cmd instances to a log file, nothing fancy here
+    w = Cmd::Writer.new path: '/tmp/some_file'
+    w << :open
+    w << (Cmd::Cmd.new_input :test)
+    w << (Cmd::Cmd.new_input :test, weight: 40)
+    w << (Cmd::Cmd.new_input :test, height: 20)
     w << :close
-    r = Andromeda::CommandParser.new path: '/tmp/some_file'
-    # make r process events using a global thread pool of num_cpus threads
-    r.pool = :global
-    t = Andromeda::Tee.new
-    # make r output to t
-    r >> t
-    # start reading
-    r << :start
+    r = Cmd::Reader.new path: '/tmp/some_file'
+    p = Cmd::Parser.new
+    t = Kit::Tee.new
+    s = Sync::ScopeWaiter.new
+    # Connect the processing steps (Plans)
+    s >> r >> p >> t
+    # Enfore reader to run on a separate single thread
+    r.guide = Guides.single
+    # Set multicore processing behaviour to parse Cmd's in parallel
+    p.guide = Guides.shared_pool
+    # Set logger to execute in sending thread (i.e. Parser)
+    t.guide = Guides.local
+    # Start reading and wait till processing finishes
+    s << :start
     # t will log to a Logger.new(STDERR) by default
 There is much more, dig the source, luke!
+*Note* All active development happens on the devel branch, cf. boggle/devel, too.
+## Installation
+    gem install andromeda
+## Requirements
+Any ruby that has working atomic and threadpool gems should do.
+Effectively, that is rubinius, jruby and mri ruby (if the provided threading of mri ruby is enough for your purpose).
+## Online Docs
+Docs for the latest released gems are to be found in:
+http://rubydoc.info/gems/andromeda
+## Overview
+### Key Concepts: Spots, Plans, Guides, and Tracks
+Andromeda works by sending data as events over a network of interconnected event handler endpoints (called spots).  Each spot is implemented in a container object that is called it's plan.  A plan can contain multiple spots, either in the form of event handling method spots (on_name methods of the plan) or as attribute spots that point to spots in other plans. Each plan has a default entry spot, a default exit spot, and an optional spot attribute called errors for signaling exceptions. Plans are connected with each other by assigning spot references to a plan's spot attributes.
+Event handling is initiated by sending data to a plan's start spot (a special spot that encapsulates the plan's entry spot). Sendin data to a spot is called method spot activation.
+During processing, andromeda distinguised between two kinds of state, plan state and tag state. Plan state is the state of the concret plan instance that contains an event handling method spot prior to its activation. Tag state is state that gets passed along between spots as a side-effect of event handling.
+Each plan is associated to a guide. First, guides control if and how plan instances are *copied* (or locked) prior to method spot activation to ensure isolated state access. Secondly, guides assign each method spot activation to a track that describes how and where (on which thread) it actually gets executed.
+Out of the box andromeda supports various guides: single thread (per plan or globally shared), thread pool (per plan or globally shared), execution in current thread, and spawning of a new thread per data event.
+To sum up, plans are factory objects that describe the instantiation of concrete data processing networks as guided by their associated guide objects and according to the rules of the underlying, executing tracks.
+### Quick Usage Example
+    class MyPlan < Andromeda::Plan
+      attr_spot :a, :b
+      meth_spot :alternative
+      def data_key(name, data) ; data end
+      def on_enter(key, val)
+        exit << val
+      end
+      def on_alternative(key, val)
+        return (a << val) if key == :a
+        return (b << val) if key == :b
+        signal_error ArgumentError.new("Unknown key: #{data}")
+      end
+    end
+    p = MyPlan.new
+    p.guide = Andromeda::Guides.shared_pool
+    p >> Andromeda::Kit::Tee.new(nick: :red)
+    p.a = Andromeda::Kit::Tee.new(nick: :green)
+    p.b = Andromeda::Kit::Tee.new(nick: :blue)
+    p << :a # logs to :red
+    p << :b # logs to :red
+    p.alternative << :a # logs to :green
+    p.alternative << :b # logs to :blue
+    p << :c # logs error
+### Event handling details
+Data processing starts when a data object is submitted to a spot. Processing
+happens mainly in two steps, preprocessing in the sending thread, and actual
+execution (processing) on the target track.
+#### Preprocessing
+During preprocessing, the data object may be mapped, split into a key for routing and an actual data value, it may be used to modify the set of associated tags, and finally get filtered out before sending.  Furthermore, the key may be used to switch the target spot name and track label.  All of these steps are optional and aim to push preprocessing fucntionality to the sending thread to avoid unneccesary thread context switches.
+Please consult the documentation of class Plan to discover the preprocessing methods that are available for overloading in subclasses.
+#### Execution/Processing
+Prior to execution, the plan's guide selects a track for spot activation, packs the plan (i.e. copies/freezes/locks it's plan state as necessary), and optionally modifies the associated incoming tags.  Finally, the method spot gets activated by calling the spot's method on the packed plan inside the chosen track with the accumulated tags (plan tags and incoming tags).
+#### Tags
+Each method activation is associated with a set of tags (a hash) that contains optional parameters.  The tags may be modified by the spot method
+and are passed on whenever a spot method activates another.
+Andromeda provides a small set of reserved default tags that should not be overwritten:
+* `tags[:name]` final target spot name
+* `tags[:scope]` an Atom::Region instance that is used to wait for completion of processing (cf. below)
+* `tags[:label]` the label passed on to the guide to select the track for execution (usually identical to name)
+* `tags[:mark]` used for xor-mark based tracking of event flow
+#### Wating for event handling completion
+Waiting for event handling completion may be achieved by utilizing a special wrapper plan (cf. Sync::ScopeWaiter). This is implemented using an atomic counter (cf. Atom::Region).
+#### Performance
+Andromeda's event handling mechanism is powerful but associated with some performance overhead due to the associated state management. It was written for using it with larger events (i.e. array slices) that user plans iterate over and is not intended for the processing of massively many small events. YMMV.
+#### Correctness
+Andromeda provides guides to ensure that state is only accessed by a single thread or that it's state is locked apropriately otherwise.  However this only works if you assign correct guides to your plans. Please read and understand the documentation of the various available guides to make sure that no unintended concurrent access of plans takes place.
+Alternatively, look at the provided plan implementations for example code.
+## Remarks
+### Inspiration
+Andromeda takes inspiration from several existing approaches / techniques in the area of concurrent programming.
+* actor model: state encapsulation
+* event processing: preprocessing in sender thread, large events
+* libdispatch: abstracting over used queues / thread pool
+* join calculus: Sync::Sync
+### Status
+Alpha at best.

data/ROADMAP.md ADDED

@@ -0,0 +1,73 @@
+# Roadmap
+This document contains planning steps and ideas for the future of andromeda.
+## Short-Term Todo's
+* Test with macruby, figure out if rubinius pre-compilation should be added
+* Convert old Pool code into Guards
+* Convert Kit into Plans
+* Convert Command into Plans, moving it into a submodule
+* Test scope, tags, threading in IRB
+* Subscopes
+### Write Test-Cases
+This needs to be done as soon as the general API has matured enough, i.e.
+around when andromeda is re-used by neoscout.
+### Write Docs
+* Get started on stable calls in the API first
+* Complete as time goes by
+* Add high-level description to README.md
+* Add examples to README.md
+* Add link to yardocs to README.md as soon as that makes sense
+* Figure out yardoc methods for documenting meth_spot and attr_spot
+### Write a better DSL for connecting plans
+* connect
+* Arrow Calculus via Kit comes to mind
+* More operators like '>>': Add multiple via splitter, join results etc.
+* This needs more practical experience with the framework first.
+### Implement map_reduce.rb
+### Implement ActorGuide
+### Implement csv.rb
+* map statval over everything that looks like a number
+## Long-Term Ideas
+### Implement more synchronization primitives
+* Buffered join
+### Implement zmq.rb
+### Implement network visualization using GraphViz
+* Really should use an abstract graph builder interface
+### Implement additional connectors
+* TCP
+* Syslog
+* EventMachine
+## Open Issues
+### Avoid memory leaks
+I'm undecied on this, but spots could be cached instead of being recreated
+on intern using the yet to be written Atom::* vars.
+## Far, far in the future
+Add automatic distribution support.
+It should not be that hard.  In the end this is just a mildly interesting graph transformation on the topology, a bit of rewiring, and some support code to run stuff on remote machines.  Ah maybe we just use capistrano for that. Of course, that would be static only. Dynamic job submission is a diffrerent story, as is at-most-once messaging (i.e. transactionality).

data/Rakefile CHANGED

@@ -5,6 +5,7 @@ require 'rspec/core/rake_task'
 require 'yard'
 require 'yard/rake/yardoc_task'
+require File.dirname(__FILE__) + '/yard_extensions/andromeda'
 desc 'Run all rspecs'
 RSpec::Core::RakeTask.new(:spec) do |spec|
@@ -15,15 +16,12 @@ end
 desc 'Run yardoc over project sources'
 YARD::Rake::YardocTask.new(:ydoc) do |t|
-  t.options = ['--verbose']
+  t.options = ['--verbose']
   t.files   = ['lib/**/*.rb', '-', 'README.md', 'AUTHORS', 'LICENSE.txt']
+  t.files  << 'CHANGELOG.md'
+  t.files  << 'ROADMAP.md'
 end
-#RDoc::Task.new(:rdoc) do |rdoc|
-#  # rdoc.main = "README.rdoc"
-#  rdoc.rdoc_files.include("lib/**/*.rb")
-#end
 desc 'Run irb in project environment'
 task :console do
   require 'irb'

data/andromeda.gemspec CHANGED

@@ -5,8 +5,8 @@ require 'andromeda/version'
 Gem::Specification.new do |s|
   s.name        = 'andromeda'
   s.version     = Andromeda::VERSION
-  s.summary     = 'Ultra light weight multicore stream processing framework based on a dataflow DSL'
-  s.description = s.summary
+  s.summary     = 'light weght framework for complex event processing based on a dataflow DSL'
+  s.description = 'Andromeda is a light weight framework for complex event processing on multicore architectures. Andromeda users construct networks of plans that are interconnected via endpoint spots, describe how plans are scheduled onto threads, and process data by feeding data events to the resulting structure.'
   s.author      = 'Stefan Plantikow'
   s.email       = 'stefanp@moviepilot.com'
   s.homepage    = 'https://github.com/moviepilot/andromeda'

data/lib/andromeda.rb CHANGED

@@ -1,14 +1,57 @@
+require 'rubygems'
+require 'set'
 require 'json'
 require 'logger'
-require 'threadpool'
-require 'facter'
+require 'delegate'
+require 'singleton'
+require 'atomic'
 require 'thread'
+require 'facter'
+require 'threadpool'
 Facter.loadfacts
-require 'andromeda/id'
-require 'andromeda/pools'
-require 'andromeda/scope'
-require 'andromeda/andromeda'
-require 'andromeda/helpers'
-require 'andromeda/join'
-require 'andromeda/commando'
+require 'andromeda/version'
+module Andromeda
+	def self.files
+		f = []
+		f << 'andromeda/impl/to_s'
+		f << 'andromeda/impl/atom'
+		f << 'andromeda/impl/xor_id'
+		f << 'andromeda/impl/class_attr'
+		f << 'andromeda/impl/proto_plan'
+		f << 'andromeda/id'
+		f << 'andromeda/atom'
+		f << 'andromeda/error'
+		f << 'andromeda/copy_clone'
+		f << 'andromeda/guide_track'
+		f << 'andromeda/pool_guide'
+		f << 'andromeda/spot'
+		f << 'andromeda/plan'
+		f << 'andromeda/sync'
+		f << 'andromeda/sugar'
+		f << 'andromeda/kit'
+		f << 'andromeda/cmd'
+		f << 'andromeda/map_reduce'
+		f
+	end
+	def self.load_relative(f)
+		path = "#{File.join(File.dirname(caller[0]), f)}.rb"
+ 	  load path
+	end
+	def self.reload!
+		files.each { |f| load_relative f }
+	end
+end
+Andromeda.files.each { |f| require f }