RubyGems - pyper_rb - Versions diffs - 1.2.0 - Mend

pyper_rb 1.2.0

Files changed (57) hide show

checksums.yaml +7 -0
data/.gitignore +16 -0
data/Gemfile +24 -0
data/LICENSE.txt +22 -0
data/README.md +178 -0
data/Rakefile +10 -0
data/lib/pyper/all.rb +4 -0
data/lib/pyper/pipeline.rb +63 -0
data/lib/pyper/pipes/cassandra/all_items_reader.rb +40 -0
data/lib/pyper/pipes/cassandra/deleter.rb +19 -0
data/lib/pyper/pipes/cassandra/mod_key.rb +32 -0
data/lib/pyper/pipes/cassandra/mod_key_reader.rb +41 -0
data/lib/pyper/pipes/cassandra/pagination_decoding.rb +22 -0
data/lib/pyper/pipes/cassandra/pagination_encoding.rb +17 -0
data/lib/pyper/pipes/cassandra/reader.rb +35 -0
data/lib/pyper/pipes/cassandra/writer.rb +24 -0
data/lib/pyper/pipes/cassandra.rb +8 -0
data/lib/pyper/pipes/content/fetch.rb +30 -0
data/lib/pyper/pipes/content/store.rb +36 -0
data/lib/pyper/pipes/content.rb +2 -0
data/lib/pyper/pipes/default_values.rb +15 -0
data/lib/pyper/pipes/field_rename.rb +23 -0
data/lib/pyper/pipes/force_enumerator.rb +13 -0
data/lib/pyper/pipes/model/attribute_deserializer.rb +27 -0
data/lib/pyper/pipes/model/attribute_serializer.rb +34 -0
data/lib/pyper/pipes/model/attribute_validation.rb +57 -0
data/lib/pyper/pipes/model/virtus_deserializer.rb +39 -0
data/lib/pyper/pipes/model/virtus_parser.rb +13 -0
data/lib/pyper/pipes/model.rb +5 -0
data/lib/pyper/pipes/no_op.rb +15 -0
data/lib/pyper/pipes/pry.rb +9 -0
data/lib/pyper/pipes/remove_fields.rb +22 -0
data/lib/pyper/pipes.rb +8 -0
data/lib/pyper/version.rb +3 -0
data/lib/pyper.rb +4 -0
data/pyper_rb.gemspec +22 -0
data/test/fixtures/cass_schema_config.yml +6 -0
data/test/fixtures/test_datastore/schema.cql +23 -0
data/test/test_helper.rb +34 -0
data/test/unit/pyper/pipeline_test.rb +81 -0
data/test/unit/pyper/pipes/cassandra/all_items_reader_test.rb +47 -0
data/test/unit/pyper/pipes/cassandra/deleter_test.rb +37 -0
data/test/unit/pyper/pipes/cassandra/mod_key_reader_test.rb +47 -0
data/test/unit/pyper/pipes/cassandra/pagination_decoding_test.rb +29 -0
data/test/unit/pyper/pipes/cassandra/pagination_encoding_test.rb +29 -0
data/test/unit/pyper/pipes/cassandra/reader_test.rb +79 -0
data/test/unit/pyper/pipes/cassandra/writer_test.rb +51 -0
data/test/unit/pyper/pipes/content/fetch_test.rb +38 -0
data/test/unit/pyper/pipes/content/store_test.rb +49 -0
data/test/unit/pyper/pipes/field_rename_test.rb +24 -0
data/test/unit/pyper/pipes/model/attribute_deserializer_test.rb +69 -0
data/test/unit/pyper/pipes/model/attribute_serializer_test.rb +60 -0
data/test/unit/pyper/pipes/model/attribute_validation_test.rb +96 -0
data/test/unit/pyper/pipes/model/virtus_deserializer_test.rb +75 -0
data/test/unit/pyper/pipes/no_op_test.rb +12 -0
data/test/unit/pyper/pipes/remove_fields_test.rb +24 -0
metadata +147 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 646bae638d6adae8b2456c3b6cfc07a7d0f0968f
+  data.tar.gz: 44c1fc2cc772dd12205a052f3e974ecec835d553
+SHA512:
+  metadata.gz: 29e850650ad4223b4e34920b7fb2d0d87ebc7434e09867bf7d6232f19f1dd43fae76c9fcf7e44c4d83508a838b78854575b3b497790f4b2bc933e128eaddda90
+  data.tar.gz: 7f741d99c2d945128b6b8c454cfd84d6709d18955b5f0cc29c35dda57b4ca78a448797d68705f5cb352017491f02806387d1e3acbc334b17bdc5f3cc50e81531

data/.gitignore ADDED Viewed

@@ -0,0 +1,16 @@
+/.bundle/
+/.yardoc
+/Gemfile.lock
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+*.bundle
+*.so
+*.o
+*.a
+mkmf.log
+.ruby-version
+.idea

data/Gemfile ADDED Viewed

@@ -0,0 +1,24 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in storage_pipeline.gemspec
+gemspec
+group :development, :test do
+  gem "pry"
+  gem "awesome_print"
+  gem 'm', :git => 'git@github.com:ANorwell/m.git', :branch => 'minitest_5'
+  gem 'cass_schema', :git => 'git@github.com:backupify/cass_schema.git', :tag => "0.0.4"
+  # make sure to use v2.0.1 to avoid issues with super column families
+  gem 'cassandra-driver', :git => 'git@github.com:datastax/ruby-driver.git', :tag => 'v2.0.1'
+end
+group :test do
+  gem 'minitest_should', :git => 'git@github.com:citrus/minitest_should.git'
+  gem "google-api-client", "0.7.1"
+  gem "mocha"
+  gem 'virtus'
+end
+gem 'storage_strategy', :git => 'git@github.com:backupify/storage_strategy.git'
+gem 'cassava', :git => 'git@github.com:backupify/cassava.git'

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2015 Datto
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,178 @@
+# Pyper
+Flexible pipelines for content storage and retrieval.
+Pyper allows the construction of pipelines to store and retrieve data. Each pipe in the pipeline modifies the
+information in the pipeline before passing it to the next step. By composing pipes in different ways, different
+data access patterns can be created.
+## Usage
+Require the pyper library and the pipes that you need:
+```ruby
+require 'pyper'
+require 'pyper/model'      # Import model-related pipes
+require 'pyper/cassandra'  # Import Cassandra-related pipes
+require 'pyper/content'    # Import content storage-related pipes
+```
+Or, import the entire library using `require 'pyper/all'`
+Create a pipeline composed of a set of pipes:
+```ruby
+write_pipeline = Pyper::Pipeline.create do
+  add Pyper::Pipes::Write::AttributeSerializer.new
+  add Pyper::Pipes::FieldRename.new(:to => :to_emails, :from => :from_email)
+  add Pyper::Pipes::ModKey.new
+  add Pyper::Pipes::Cassandra::Writer.new(:table_1, metadata_client)
+  add Pyper::Pipes::Cassandra::Writer.new(:table_2, indexes_client)
+  add Pyper::Pipes::Cassandra::Writer.new(:table_3, indexes_client)
+end
+```
+Then, push data down the pipe:
+```ruby
+result = write_pipeline.push(attributes)
+```
+View the value of the set of successive transformations performed by the pipe:
+```ruby
+result.value
+```
+A pipeline performs a bunch of sequential transformations to the data being passed down the pipe. It may also have side
+effects, such as storing data. The specific pipes provided in this library aim are aimed at two uses: writing and
+reading data.
+A write pipeline takes an initial set of attributes, performing a set of transfomations such as serialization and so on,
+before storing the data in one or more storage outputs. For example, this gem provides storage pipes for Cassandra and
+Amazon S3, but it is easy to write a pipe for other storage backends.
+Conversely, a read pipeline takes initially a set of options. These options be transformed by the pipeline, and then used
+to read data from an external source. This data may then be transformed by the pipeline - for example, performing
+deserialization or data mapping operations.
+```ruby
+read_pipeline = Pyper::Pipeline.create do
+  add Pyper::Pipes::Cassandra::PaginationDecoding.new
+  add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
+  add Pyper::Pipes::FieldRename.new(:to_emails => :to, :from_email => :from)
+  add Pyper::Pipes::Cassandra::PaginationEncoding.new
+  add Pyper::Pipes::Model::VirtusDeserializer.new(message_attributes)
+  add Pyper::Pipes::Model::VirtusParser.new(MyModelClass)
+end
+result = read_pipeline.push(:row => '1', :id => 'i', :page_token => 'sdf')
+result.value # Enumerator with matching instances of MyModelClass
+```
+Note that pipe order matters. In the example read pipe above, `Cassandra::PaginationDecoding` decodes pagination options, thus
+performing an operation on the initial options provided. The `Cassandra::Reader` pipe uses the options to retrieve items from
+Cassandra, and subsequent elements of the pipeline are designed to transform this retrieved data. Thus, it would not be
+sensible for the `Cassandra::PaginationDecoding` pipe to come after the `Cassandra::Reader` pipe.
+### Creating and using pipelines
+A pipeline is an instance of `Pyper::Pipeline`, to which pipes are appended using the `<<` or `add` operators.
+```ruby
+my_pipeline = Pyper::Pipeline.new <<
+  Pyper::Pipes::Cassandra::PaginationDecoding.new <<
+  Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client) <<
+  Pyper::Pipes::Cassandra::PaginationEncoding.new
+```
+However, the `create` method makes pipeline construction easier. The above example becomes the following:
+```ruby
+my_pipeline = Pyper::Pipeline.create do
+  add Pyper::Pipes::Cassandra::PaginationDecoding.new
+  add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
+  add Pyper::Pipes::Cassandra::PaginationEncoding.new
+end
+```
+To invoke the pipeline, use the `push` method and provide the data to enter the pipeline:
+```ruby
+pipe_status = my_pipeline.push(:row => '1', :id => 'i')
+```
+Here, `pipe_status` is a `Pyper::PipeStatus` object, which contains two attributes, `pipe_status.value` and
+`pipe_status.status`. The value is the returned result of the series of tranformations applied by the pipeline. The status
+contains metadata about the push operation that might be created by each pipe in the pipeline.
+### Creating new pipes
+A pipe must implement the `call` method, which takes two arguments: the object entering the pipe, as well as the status. It
+should return the object leaving the pipe:
+```ruby
+class MyPipe
+  def call(attributes, status = {})
+    attributes[:c] = attributes[:a] + attributes[:b]
+    status[:processed_by_my_pipe] = true
+    attributes
+  end
+end
+```
+This example pipe above modifies `attributes` before returning it. It also sets a flag on the status object.
+Note that because the pipe need only respond to `call`, lambdas and procs are valid pipes.
+Generally, pipes in a write pipeline operate on an attributes hash (containing the attributes meant to be written to a data
+store). Pipes in a read pipeline initially might modify arguments. A data retrieval pipe would then use the arguments to
+fetch data, and subsequent pipes would perform operations on the enumeration of data items. Thus, a read pipe might look
+something like:
+```ruby
+class Deserialize
+  def call(items, status = {})
+    items.map { |item| deserialize(item) }
+  end
+  def deserialize(item)
+    # ...
+  end
+end
+```
+### Debugging Pipelines
+Because pipes are expected to respond to `#call` you can simply add Procs and Lambdas in your code to debug pipelines:
+```rb
+pl = Pyper::Pipeline.create do
+  add Pyper::Pipes::Model::AttributeSerializer.new
+  add -> (*args) { binding.pry }
+  add Pyper::Pipes::Cassandra::Writer.new(:my_table, client, fields)
+end
+```
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'pyper_rb', :git => 'git@github.com:backupify/pyper.git'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install pyper_rb
+## Contributing
+1. Fork it ( https://github.com/backupify/pyper/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create a new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+require "bundler/gem_tasks"
+require "rake"
+require "rake/testtask"
+Rake::TestTask.new(:test) do |test|
+  test.libs.concat(%w[test])
+  test.pattern = "test/**/*_test.rb"
+end
+task :default => :test

data/lib/pyper/all.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require_relative '../pyper'
+require_relative 'pipes/cassandra'
+require_relative 'pipes/content'
+require_relative 'pipes/model'

data/lib/pyper/pipeline.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require_relative 'pipes'
+module Pyper
+  class PipeStatus < Struct.new(:value, :status); end
+  class Pipeline
+    class << self
+      # Provides an interface for creating a pipeline. The provided block will be called
+      # in the context of a newly-created pipeline, to which pipes can be added using #add.
+      # @return [Pyper::Pipeline] The created pipeline.
+      def create(&block)
+        new.tap do |pipeline|
+          if block_given?
+            original_self = eval('self', block.binding)
+            pipeline.instance_variable_set(:@original_self, original_self)
+            pipeline.instance_eval(&block)
+            pipeline.remove_instance_variable(:@original_self)
+          end
+        end
+      end
+    end
+    attr_reader :pipes
+    def initialize(pipes = [])
+      @pipes = pipes
+    end
+    # @param pipe [#pipe|#call] A pipe to append to the pipeline
+    def <<(pipe)
+      pipes << pipe
+      self
+    end
+    alias_method :add, :<<
+    # Insert something into the pipeline to be processed
+    # @param input [Object] The original input data to enter the pipeline. This may be mutated by each pipe in the pipeline.
+    # @return [PipeStatus] the pipe status, containing both the value and a status hash.
+    def push(input)
+      status = {}
+      value = pipes.inject(input) do |attributes, p|
+        if p.respond_to?(:call)
+          p.call(attributes, status)
+        else
+          p.pipe(attributes, status)
+        end
+      end
+      PipeStatus.new(value, status)
+    end
+    def method_missing(sym, *args, &block)
+      @original_self ? @original_self.send(sym, *args, &block) : super
+    end
+    def respond_to_missing?(sym, include_all = false)
+      @original_self ? @original_self.respond_to?(sym, include_all) : super
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/all_items_reader.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module Pyper::Pipes::Cassandra
+  # A pipe for reading all items from a single row in cassandra
+  # @param [Symbol] table name
+  # @param [Cassava::Client] client to query cassandra with
+  class AllItemsReader < Struct.new(:table, :client)
+    attr_reader :page_size
+    # @param table [Symbol] the name of the cassandra table to fetch data from
+    # @param client [Cassava::Client]
+    # @param mod_size [Integer] the mod size
+    # @param page_size [Integer] the page size
+    def initialize(table, client, page_size = 1000)
+      @table = table
+      @client = client
+      @page_size = page_size
+    end
+    # @param arguments [Hash] Arguments passed to the cassandra client where statement
+    # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
+    # @option arguments [Integer] :page_size
+    # @param status [Hash] The mutable status field
+    # @return [Enumerator::Lazy<Hash>] enumerator of items
+    def pipe(arguments, status = {})
+      columns = arguments.delete(:columns)
+      enum = Enumerator.new do |yielder|
+        options = { :page_size => page_size }
+        paging_state = nil
+        loop do
+          options[:paging_state] = paging_state if paging_state.present?
+          result = @client.select(@table, columns).where(arguments).execute(options)
+          result.each { |item| yielder << item }
+          break if result.last_page?
+          paging_state = result.paging_state
+        end
+      end
+      enum.lazy
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/deleter.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module Pyper::Pipes::Cassandra
+  # Deletes from a specified cassandra table.
+  # @param table_name [Symbol] The table from which to delete
+  # @param client [Cassava::Client] client to query cassandra with
+  class Deleter < Struct.new(:table_name, :client)
+    # @param args [Hash] Should contain the primary keys to delete. Can contain a :columns key to remove specific values.
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The original attributes
+    def pipe(arguments, status = {})
+      local_args = arguments.dup
+      columns = local_args.delete(:columns)
+      statement = columns.present? ? client.delete(table_name, columns) : client.delete(table_name)
+      statement.where(local_args).execute
+      arguments
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/mod_key.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Pyper::Pipes::Cassandra
+  # Adds the :mod_key field to the output attributes, which is based on the hash of
+  # a particular field in the input attributes.
+  # @example
+  # If the pipe is configured with an id field of :id, then the input
+  #   { id: 'abc' }
+  # would result in an output of
+  #   { id: 'abc', mod_key: 22 }
+  # Here the value 22 is within the range [0,mod_size - 1] and is uniquely
+  # determined by id.
+  class ModKey
+    attr_reader :mod_size, :id_field
+    # @param mod_size [Integer] mod keys will fall within the range [0,mod_key - 1]
+    # @param id_field [Symbol] the attribute to use when generating the mod key.
+    def initialize(mod_size = 100, id_field = :id)
+      @mod_size = mod_size
+      @id_field = id_field
+    end
+    # @param attributes [Hash] An attribute hash
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The attribute hash with the mod_key field added
+    def pipe(attributes, status)
+      attributes.merge!(:mod_key => mod(attributes[id_field]))
+    end
+    def mod(value)
+      Zlib::crc32(value) % mod_size
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/mod_key_reader.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module Pyper::Pipes::Cassandra
+  # This pipe is for reading data from sharded rows in Cassandra. The table must have rows sharded by the 'mod_key' field.
+  # For a fixed number of such shards, this pipe reads all data from all of those shards, returning a lazy enumerator
+  # over all of those rows.
+  # For example, if mod_size is 100, it will read the 100 rows with mod_key between 0 and 99.
+  class ModKeyReader
+    # @param table [Symbol] the name of the cassandra table to fetch data from
+    # @param client [Cassava::Client]
+    # @param mod_size [Integer] the mod size
+    # @param page_size [Integer] the page size
+    attr_reader :table, :client, :mod_size, :page_size
+    def initialize(table, client, mod_size = 100, page_size = 1000)
+      @table = table
+      @client = client
+      @mod_size = mod_size
+      @page_size = page_size
+    end
+    # @param arguments [Hash] Arguments passed to the cassandra client where statement
+    # @param status [Hash] The mutable status field
+    # @return [Enumerator::Lazy<Hash>] enumerator of items from all rows
+    def pipe(arguments, status = {})
+      (Enumerator.new do |yielder|
+         (0...mod_size).each do |mod_id|
+           options = { :page_size => page_size }
+           paging_state = nil
+           loop do
+             options[:paging_state] = paging_state if paging_state.present?
+             result = client.select(table).where(arguments.merge(:mod_key => mod_id)).execute(options)
+             result.each { |item| yielder << item }
+             break if result.last_page?
+             paging_state = result.paging_state
+           end
+         end
+       end).lazy
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/pagination_decoding.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'base64'
+module Pyper::Pipes::Cassandra
+  # This pipe extracts an encoded paging_state, decodes it, and passes on a decoded
+  # paging state.
+  # This pipe is intended to be used before the Cassandra::Reader pipe, as that pipe
+  # can interpret the :paging_state argument.
+  #
+  # This pipe pairs with the PaginationEncoding pipe, which performs the reverse
+  # transformation
+  class PaginationDecoding
+    # @param args [Hash] Arguments that include an encoded :paging_state
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The list of arguments with :paging_state decoded, if present
+    def pipe(args, status = {})
+      page_state = args[:paging_state]
+      args[:paging_state] = Base64.urlsafe_decode64(page_state) if page_state
+      args
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/pagination_encoding.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require 'base64'
+module Pyper::Pipes::Cassandra
+  # Given a :paging_state in the status field, encodes it. This is the reverse transformation of
+  # the PaginationDecoding pipe.
+  class PaginationEncoding
+    # @param items [Enumerable<Hash>]
+    # @param status [Hash] The mutable status field
+    # @return [Enumerable<Hash>] The unchanged list of items
+    def pipe(items, status)
+      page_state = status[:paging_state]
+      status[:paging_state] = Base64.urlsafe_encode64(page_state) if page_state
+      items
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/reader.rb ADDED Viewed

@@ -0,0 +1,35 @@
+module Pyper::Pipes::Cassandra
+  # A pipe for reading items from a single row in cassandra
+  # @param [Symbol] table name
+  # @param [Cassava::Client] client to query cassandra with
+  # @param [Hash] Additional/default options to pass to the Cassava execute statement.
+  class Reader < Struct.new(:table, :client, :options)
+    # @param arguments [Hash] Arguments passed to the cassandra client where statement
+    # @option arguments [Integer] :limit
+    # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
+    # @option arguments [Object] :paging_state
+    # @option arguments [Integer] :page_size
+    # @param status [Hash] The mutable status field
+    # @return [Enumerator::Lazy<Hash>] enumerator of items
+    def pipe(arguments, status = {})
+      limit = arguments.delete(:limit)
+      page_size = arguments.delete(:page_size)
+      paging_state = arguments.delete(:paging_state)
+      order = arguments.delete(:order)
+      columns = arguments.delete(:columns)
+      opts = (options || {}).merge({ page_size: page_size, paging_state: paging_state})
+      query = client.select(table, columns).where(arguments)
+      query = query.limit(limit) if limit
+      query = query.order(order.first, order.last) if order
+      result = query.execute(opts)
+      status[:paging_state] = result.paging_state
+      status[:last_page] = result.last_page?
+      result.rows.lazy
+    end
+  end
+end

data/lib/pyper/pipes/cassandra/writer.rb ADDED Viewed

@@ -0,0 +1,24 @@
+module Pyper::Pipes::Cassandra
+  # Writes a set of attributes to a specified cassandra table.
+  # @param table_name [Symbol] The table in which to store the attributes
+  # @param client [Cassava::Client] client to query cassandra with
+  # @param attribute_filter_set [Set] Optionally, a set of attributes which should be written. If none is provided,
+  #   all attributes will be written.
+  class Writer < Struct.new(:table_name, :client, :attribute_filter_set)
+    # @param args [Hash] Arguments to store in cassandra
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The original attributes
+    def pipe(attributes, status = {})
+      attributes_to_write = if attribute_filter_set
+                              attributes.select { |k,v| attribute_filter_set.member?(k) }
+                            else
+                              attributes
+                            end
+      client.insert(table_name, attributes_to_write)
+      attributes
+    end
+  end
+end

data/lib/pyper/pipes/cassandra.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require_relative 'cassandra/reader'
+require_relative 'cassandra/writer'
+require_relative 'cassandra/mod_key'
+require_relative 'cassandra/mod_key_reader'
+require_relative 'cassandra/all_items_reader'
+require_relative 'cassandra/deleter'
+require_relative 'cassandra/pagination_decoding'
+require_relative 'cassandra/pagination_encoding'

data/lib/pyper/pipes/content/fetch.rb ADDED Viewed

@@ -0,0 +1,30 @@
+module Pyper::Pipes::Content
+  class Fetch
+    attr_reader :storage_field, :storage_strategy_builder
+    # @param storage_field [Symbol] For each item hash, the field in which to insert the content
+    # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
+    def initialize(storage_field, &storage_strategy_builder)
+      @storage_field = storage_field
+      @storage_strategy_builder = storage_strategy_builder
+    end
+    # @param items [Enumerable<Hash>] A list of items
+    # @param status [Hash] The mutable status field
+    # @return [Enumerable<Hash>] The items, with the retrieved content inserted in the storage field
+    def pipe(items, status = {})
+      items.map do |item|
+        strategy = storage_strategy_builder.call(item)
+        content =
+          begin
+            strategy.read
+          rescue Errno::ENOENT, StorageStrategy::NotFound
+            nil
+          end
+        item.merge(storage_field => content)
+      end
+    end
+  end
+end

data/lib/pyper/pipes/content/store.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Pyper::Pipes::Content
+  # A pipe for storing content to an object store. Uses the StorageStrategy gem.
+  class Store
+    attr_reader :storage_field, :storage_strategy_builder
+    # @param storage_field [Symbol] The attributes field in which the content is located.
+    # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
+    def initialize(storage_field, &storage_strategy_builder)
+      @storage_field = storage_field
+      @storage_strategy_builder = storage_strategy_builder
+    end
+    # Stores content using the specified storage strategy
+    # @param attributes [Hash] The attributes of the item for which content is to be stored
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The item attributes, with the storage_field deleted.
+    def pipe(attributes, status = {})
+      strategy = storage_strategy_builder.call(attributes)
+      content = attributes.delete(storage_field)
+      raise ArgumentError.new("#{storage_field} must be present in ContentStorage") unless content
+      case content
+      when NilClass then # do nothing -- there's no content to write
+      when String then strategy.write(content)
+      else strategy.write_from(content)
+      end
+      attributes.merge!(strategy.metadata)
+      attributes
+    end
+  end
+end

data/lib/pyper/pipes/content.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require_relative 'content/fetch'
2	+ require_relative 'content/store'

data/lib/pyper/pipes/default_values.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Pyper::Pipes
+  # @param default_values [Hash] A hash of default values to set within the provided attrs if they are not already present.
+  class DefaultValues < Struct.new(:default_values)
+    # @param attrs [Hash] The attributes of the item
+    # @param status [Hash] The mutable status field
+    # @return [Hash] The item attributes with default values inserted
+    def pipe(attrs, status = {})
+      default_values.each do |field, value|
+        attrs[field] = value unless attrs[field]
+      end
+      attrs
+    end
+  end
+end