RubyGems - crunchpipe - Versions diffs - 0.0.1beta1 - Mend

crunchpipe 0.0.1beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/.gitignore +19 -0
data/.rvmrc +1 -0
data/Gemfile +17 -0
data/Gemfile.lock +49 -0
data/README.md +74 -0
data/Rakefile +14 -0
data/configure +9 -0
data/crunchpipe.gemspec +21 -0
data/examples/complete_hello_world.rb +25 -0
data/lib/crunchpipe.rb +12 -0
data/lib/crunchpipe/data_end_point.rb +17 -0
data/lib/crunchpipe/data_provider.rb +22 -0
data/lib/crunchpipe/pipeline.rb +63 -0
data/lib/crunchpipe/stream.rb +21 -0
data/lib/crunchpipe/version.rb +3 -0
data/spec/data_end_point_spec.rb +39 -0
data/spec/data_provider_spec.rb +44 -0
data/spec/pipeline_spec.rb +163 -0
data/spec/spec_helper.rb +5 -0
data/spec/stream_spec.rb +49 -0
metadata +70 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,19 @@
+## MAC OS
+.DS_Store
+## TEXTMATE
+*.tmproj
+tmtags
+## EMACS
+*~
+\#*
+.\#*
+## VIM
+*.swp
+## PROJECT
+.bundle
+coverage*
+pkg*

data/.rvmrc ADDED Viewed

	@@ -0,0 +1 @@
1	+ rvm use 1.9.2@CrunchPipe --create

data/Gemfile ADDED Viewed

@@ -0,0 +1,17 @@
+# A sample Gemfile
+source "http://rubygems.org"
+gem 'rake'
+gem 'parallel'
+gem 'ruby-debug19'
+group :development do
+  gem 'ruby-debug19'
+end
+group :test do
+  gem 'rspec'
+  gem 'simplecov'
+end
+gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,49 @@
+PATH
+  remote: .
+  specs:
+    crunchpipe (0.0.1beta1)
+GEM
+  remote: http://rubygems.org/
+  specs:
+    archive-tar-minitar (0.5.2)
+    columnize (0.3.4)
+    diff-lcs (1.1.3)
+    linecache19 (0.5.12)
+      ruby_core_source (>= 0.1.4)
+    multi_json (1.0.3)
+    parallel (0.5.9)
+    rake (0.9.2)
+    rspec (2.6.0)
+      rspec-core (~> 2.6.0)
+      rspec-expectations (~> 2.6.0)
+      rspec-mocks (~> 2.6.0)
+    rspec-core (2.6.4)
+    rspec-expectations (2.6.0)
+      diff-lcs (~> 1.1.2)
+    rspec-mocks (2.6.0)
+    ruby-debug-base19 (0.11.25)
+      columnize (>= 0.3.1)
+      linecache19 (>= 0.5.11)
+      ruby_core_source (>= 0.1.4)
+    ruby-debug19 (0.11.6)
+      columnize (>= 0.3.1)
+      linecache19 (>= 0.5.11)
+      ruby-debug-base19 (>= 0.11.19)
+    ruby_core_source (0.1.5)
+      archive-tar-minitar (>= 0.5.2)
+    simplecov (0.5.3)
+      multi_json (~> 1.0.3)
+      simplecov-html (~> 0.5.3)
+    simplecov-html (0.5.3)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  crunchpipe!
+  parallel
+  rake
+  rspec
+  ruby-debug19
+  simplecov

data/README.md ADDED Viewed

@@ -0,0 +1,74 @@
+CrunchPipe
+==========
+CrunchPipe is a library for creating and coordinating modular
+computation pipelines. Computation can take place in parallel and data
+sources are kept separate from the computation itself leading to
+modular and maintainable programs.
+The Basics
+----------
+CrunchPipe utilized computation pipelines connected to streams to
+model the processing of data.
+    /--------------\
+    | Input Stream |
+    \--------------/
+        ||
+        \/
+    /----------\
+    | Pipeline |
+    |----------|
+    | Op 1     |
+    |----------|
+    | Op 2     |
+    |----------|
+    | Op 3     |
+    \----------/
+        ||
+        \/
+    /---------------\
+    | Output Stream |
+    \---------------/
+Streams
+----------
+Streams are the sources and sinks of data. You create a stream and add
+elements to it. All pipelines connected to the stream will be alerted
+when data is added to a stream. Pipelines also write their finished
+results to a stream which can, optionally, have other pipelines
+connected to it. Since streams are also data sinks, streams can be
+provided with the means to save the results of computation in an
+abstract and general way.
+Pipelines
+----------
+Pipelines represent computational processes. When a pipeline is
+created, you can bind an arbitrary number of transformations to it in
+the form of blocks to create an "assembly line" of operations to be
+performed on data. Pipelines are connected to streams and will be
+notified when new data is available. Each new element from the stream
+will be run through the bound operations in the order in which they
+were bound to the pipeline. However, the elements obtained from
+streams can be processed in parallel (threads or processes) thus
+leading to performance improvements. Since the order of operation
+application is preserved, it is the elements from the stream which are
+processed in parallel. The parallelism is encapsulated within the
+pipeline thus freeing the developer from the concerns traditionally
+associated with writing parallel code.
+ToDo
+----------
+* Get specs passing, dammit
+* Improved DSL

data/Rakefile ADDED Viewed

@@ -0,0 +1,14 @@
+require "rubygems"
+require "bundler/setup"
+require 'rake'
+require 'rspec/core/rake_task'
+require 'bundler/gem_tasks'
+task :default => :spec
+desc "Run all examples"
+RSpec::Core::RakeTask.new(:spec) do |t|
+  t.rspec_opts = '--format documentation --color'
+end

data/configure ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# Configure build environment.
+#
+# @author Benjamin Oakes <hello@benjaminoakes.com>
+echo "[$0] starting"
+gem install bundler --version '~> 1.0.21' --no-rdoc --no-ri
+bundle install
+echo "[$0] finished"

data/crunchpipe.gemspec ADDED Viewed

@@ -0,0 +1,21 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "crunchpipe/version"
+Gem::Specification.new do |s|
+  s.name        = "crunchpipe"
+  s.version     = Crunchpipe::VERSION
+  s.authors     = ["yonkeltron"]
+  s.email       = ["yonkeltron@gmail.com"]
+  s.homepage    = "https://github.com/yonkeltron/CrunchPipe"
+  s.summary     = %q{A library for modular, pipeline-based computation}
+  s.description = %q{Using the data-pipeline pattern loosely-based on dataflow programming, CrunchPipe helps you to write modular, cohesive, loosely-coupled programs for computation with optional features for parallelization.}
+  s.has_rdoc = false
+  s.rubyforge_project = "crunchpipe"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+end

data/examples/complete_hello_world.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require_relative '../lib/crunchpipe'
+provider = CrunchPipe::DataProvider.new
+input_stream = CrunchPipe::Stream.new
+pipeline = CrunchPipe::Pipeline.new(:parallel => true)
+pipeline.bind do |element|
+  puts "--- Processing #{element}..."
+  element + 1
+end
+output_stream = CrunchPipe::Stream.new
+end_point = CrunchPipe::DataEndPoint.new do |data|
+  puts "+++ End point got #{data}"
+end
+provider | input_stream
+pipeline << input_stream
+pipeline | output_stream
+output_stream > end_point
+provider.provide([1,2,3,4,5])

data/lib/crunchpipe.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require_relative './crunchpipe/pipeline'
+require_relative './crunchpipe/stream'
+require_relative './crunchpipe/data_end_point'
+require_relative './crunchpipe/data_provider'
+module CrunchPipe
+  class InvalidProcessorError < Exception
+  end
+  class MissingActionError < Exception
+  end
+end

data/lib/crunchpipe/data_end_point.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module CrunchPipe
+  class DataEndPoint
+    def initialize(&block)
+      if block
+        @default_action_block = block
+      else
+        fail MissingActionError
+      end
+    end
+    def receive(data)
+      @default_action_block.yield data
+    end
+  end
+end

data/lib/crunchpipe/data_provider.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module CrunchPipe
+  class DataProvider
+    attr_reader :output_streams
+    def initialize(stream = nil)
+      if stream
+        @output_streams = [stream]
+      else
+        @output_streams = []
+      end
+    end
+    def |(stream)
+      @output_streams.push stream
+    end
+    def provide(data)
+      @output_streams.each {|stream| stream.add data }
+    end
+  end
+end

data/lib/crunchpipe/pipeline.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require 'parallel'
+module CrunchPipe
+  class Pipeline
+    attr_accessor :parallel
+    attr_reader :processors, :default_sink
+    def initialize(args)
+      @parallel = args[:parallel]
+      @processors = []
+      @default_sink = nil
+    end
+    def bind(processor = nil, &block)
+      if block_given?
+        Pipeline.check_arity(block)
+        @processors.push block
+      elsif processor
+        Pipeline.check_arity(processor)
+        @processors.push processor
+      end
+    end
+    def update(stream, elements)
+      if @parallel
+        results = Parallel.map(elements) {|element| process element }
+      else
+        results = elements.map {|element| process element }
+      end
+      @default_sink.add results if @default_sink
+      results
+    end
+    def process(element)
+      result = element
+      @processors.each do |processor|
+        result = processor.yield(result)
+      end
+      result
+    end
+    def <(stream)
+      stream.add_observer(self)
+    end
+    def |(stream)
+      @default_sink = stream
+    end
+    def self.check_arity(processor)
+      unless processor.is_a?(Proc)
+        fail CrunchPipe::InvalidProcessorError, "Processor must be a Proc but was a #{processor.class}"
+      end
+      unless processor.arity == 1
+        fail CrunchPipe::InvalidProcessorError, "Processor must take 1 argument but instead takes #{processor.arity}"
+      end
+    end
+  end
+end

data/lib/crunchpipe/stream.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require 'thread'
+require 'observer'
+module CrunchPipe
+  class Stream
+    include Observable
+    attr_reader :default_end_point
+    def add(elements = [])
+      changed
+      notify_observers(self, elements)
+      @default_end_point.receive elements if @default_end_point
+    end
+    def >(end_point)
+      @default_end_point = end_point
+    end
+  end
+end

data/lib/crunchpipe/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Crunchpipe
+  VERSION = "0.0.1beta1"
+end

data/spec/data_end_point_spec.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'spec_helper'
+describe CrunchPipe::DataEndPoint do
+  describe '.new' do
+    context 'given a block' do
+      it 'does not throw' do
+        lambda {
+          CrunchPipe::DataEndPoint.new {}
+        }.should_not raise_error
+      end
+    end
+    context 'given no block' do
+      it 'throws' do
+        lambda {
+          CrunchPipe::DataEndPoint.new
+        }.should raise_error(CrunchPipe::MissingActionError)
+      end
+    end
+  end
+  describe '#receive' do
+    it 'yields data to the block' do
+      results = []
+      data_end_point = CrunchPipe::DataEndPoint.new do |data|
+        results.push data
+      end
+      fake_data = [1,2,3]
+      data_end_point.receive(fake_data)
+      results.should include(fake_data)
+    end
+  end
+end

data/spec/data_provider_spec.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'spec_helper'
+describe CrunchPipe::DataProvider do
+  let(:stream) { stub(CrunchPipe::Stream) }
+  describe '.new' do
+    context 'given no output stream' do
+      it 'sets an empty array for output streams' do
+        provider = CrunchPipe::DataProvider.new
+        provider.output_streams.should == []
+      end
+    end
+    context 'given an output stream'do
+      it 'the output stream list contains the stream' do
+        provider = CrunchPipe::DataProvider.new(stream)
+        provider.output_streams.should include(stream)
+      end
+    end
+  end
+  describe '#|' do
+    it 'adds stream to output stream list' do
+      provider = CrunchPipe::DataProvider.new
+      provider | stream
+      provider.output_streams.should include(stream)
+    end
+  end
+  describe '#provide' do
+    it 'sends data to registered streams' do
+      streams = (0..5).map { stub(CrunchPipe::Stream, :add => true) }
+      provider = CrunchPipe::DataProvider.new
+      fake_data = [1,2,3,4,5]
+      streams.each do |stream|
+        provider | stream
+        stream.should_receive(:add).with(fake_data)
+      end
+      provider.provide fake_data
+    end
+  end
+end

data/spec/pipeline_spec.rb ADDED Viewed

@@ -0,0 +1,163 @@
+require 'spec_helper'
+describe CrunchPipe::Pipeline do
+  before(:each) do
+    @pipeline_name = 'panda'
+    @pipeline_parallel = false
+    @pipeline = CrunchPipe::Pipeline.new(:parallel => @pipeline_parallel)
+  end
+  context 'initialization' do
+    it "sets parallel flag" do
+      @pipeline.parallel.should == @pipeline_parallel
+    end
+    it 'sets an empty processor array' do
+      @pipeline.processors.should == []
+    end
+  end
+  describe "#bind" do
+    context 'given a proc' do
+      it 'adds proc to pipeline' do
+        processor = Proc.new {|a|}
+        expect {
+          @pipeline.bind processor
+        }.to change(@pipeline.processors, :count).by(1)
+        @pipeline.processors.should include(processor)
+      end
+    end
+    context 'given a block' do
+      it 'adds block to pipeline' do
+        expect {
+          @pipeline.bind do |i|
+          end
+        }.to change(@pipeline.processors, :count).by(1)
+      end
+    end
+  end
+  describe '#<' do
+    let(:fake_source) do
+      stub(:add_observer => nil,
+           :delete_observer => nil)
+    end
+    it 'subscribes to source' do
+      fake_source.should_receive(:add_observer).with(@pipeline)
+      @pipeline < fake_source
+    end
+  end
+  describe '#|' do
+    it 'sets stream as default sink' do
+      fake_sink = 'Fake Stream'
+      @pipeline | fake_sink
+      @pipeline.default_sink.should == fake_sink
+    end
+  end
+  describe '#process' do
+    context 'given a single processor' do
+      it 'runs element through the processors' do
+        @pipeline.bind do |elem|
+          elem + 1
+        end
+        @pipeline.process(1).should == 2
+      end
+    end
+    context 'given multiple processors' do
+      it 'runs element through all processors' do
+        n = 5
+        n.times do
+          @pipeline.bind lambda {|elem|
+            elem + 1
+          }
+        end
+        @pipeline.processors.count.should == n
+        @pipeline.process(1).should == n+1
+      end
+    end
+  end
+  describe '#update' do
+    let(:data) { [1,1,1,1] }
+    let(:output) { stub(CrunchPipe::Stream, :add_observer => true, :add => true) }
+    before(:each) do
+      @pipeline.bind lambda {|elem|
+        elem + 1
+      }
+      @pipeline | output
+      @pipeline.parallel = false
+    end
+    context 'given a non-parallel pipeline' do
+      it 'processes all elements' do
+        @pipeline.should_receive(:process).with(1).exactly(data.length).times.and_return(2)
+        @pipeline.update(output, data)
+      end
+      it 'adds results to output stream' do
+        output.should_receive(:add).with(data.map {|i| i + 1 })
+        @pipeline.update(output, data)
+      end
+    end
+    context 'given a parallel pipeline' do
+      before(:each) do
+        @pipeline.parallel = true
+      end
+      it 'processes all elements in parallel' do
+        Parallel.should_receive(:map)
+        @pipeline.update(output, data)
+      end
+    end
+  end
+  describe '.check_arity' do
+    context 'given a non-proc' do
+      it 'throws' do
+        lambda {
+          CrunchPipe::Pipeline.check_arity('Panda')
+        }.should raise_error
+      end
+    end
+    context 'given a Proc' do
+      context 'with an arity of 0' do
+        it 'throws' do
+          lambda {
+            CrunchPipe::Pipeline.check_arity(Proc.new {})
+          }.should raise_error
+        end
+      end
+      context 'with an arity of 1' do
+        it 'does not throw' do
+          lambda {
+            CrunchPipe::Pipeline.check_arity(Proc.new {|a|})
+          }.should_not raise_error
+        end
+      end
+      context 'with an arity greater than 1' do
+        it 'throws' do
+          lambda {
+            CrunchPipe::Pipeline.check_arity(Proc.new {|a,b|})
+          }.should raise_error
+        end
+      end
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'simplecov'
+SimpleCov.start
+require_relative File.join(File.dirname(__FILE__), '..', 'lib', 'crunchpipe')
+require 'parallel'

data/spec/stream_spec.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require 'spec_helper'
+describe CrunchPipe::Stream do
+  before(:each) do
+    @stream = CrunchPipe::Stream.new
+  end
+  it 'is observable' do
+    @stream.class.should include(Observable)
+  end
+  describe '#add' do
+    let(:pipeline) { stub(CrunchPipe::Pipeline, :update => true)}
+    let(:data) { [1,1,1,1] }
+    let(:fake_end_point) { fake_end_point = stub(CrunchPipe::DataEndPoint, :receive => true) }
+    it 'notifies observers' do
+      @stream.add_observer(pipeline)
+      pipeline.should_receive(:update).exactly(1).times
+      @stream.add data
+    end
+    context 'given a default_end_point' do
+      it 'calls receive on the end point with the elements' do
+        @stream > fake_end_point
+        fake_end_point.should_receive(:receive).with(data)
+        @stream.add data
+      end
+    end
+    context 'given no default_end_point' do
+      it 'does not pass elements to end point' do
+        fake_end_point.should_not_receive(:receive)
+        @stream.add data
+      end
+    end
+  end
+  describe '#>' do
+    it 'sets the default end point' do
+      fake_end_point = "Fake End Point"
+      @stream > fake_end_point
+      @stream.default_end_point.should == fake_end_point
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,70 @@
+--- !ruby/object:Gem::Specification
+name: crunchpipe
+version: !ruby/object:Gem::Version
+  version: 0.0.1beta1
+  prerelease: 5
+platform: ruby
+authors:
+- yonkeltron
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-10-16 00:00:00.000000000Z
+dependencies: []
+description: Using the data-pipeline pattern loosely-based on dataflow programming,
+  CrunchPipe helps you to write modular, cohesive, loosely-coupled programs for computation
+  with optional features for parallelization.
+email:
+- yonkeltron@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rvmrc
+- Gemfile
+- Gemfile.lock
+- README.md
+- Rakefile
+- configure
+- crunchpipe.gemspec
+- examples/complete_hello_world.rb
+- lib/crunchpipe.rb
+- lib/crunchpipe/data_end_point.rb
+- lib/crunchpipe/data_provider.rb
+- lib/crunchpipe/pipeline.rb
+- lib/crunchpipe/stream.rb
+- lib/crunchpipe/version.rb
+- spec/data_end_point_spec.rb
+- spec/data_provider_spec.rb
+- spec/pipeline_spec.rb
+- spec/spec_helper.rb
+- spec/stream_spec.rb
+homepage: https://github.com/yonkeltron/CrunchPipe
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+      segments:
+      - 0
+      hash: -269882111
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>'
+    - !ruby/object:Gem::Version
+      version: 1.3.1
+requirements: []
+rubyforge_project: crunchpipe
+rubygems_version: 1.8.11
+signing_key:
+specification_version: 3
+summary: A library for modular, pipeline-based computation
+test_files: []