pyper_rb 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/Gemfile +24 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +178 -0
  6. data/Rakefile +10 -0
  7. data/lib/pyper/all.rb +4 -0
  8. data/lib/pyper/pipeline.rb +63 -0
  9. data/lib/pyper/pipes/cassandra/all_items_reader.rb +40 -0
  10. data/lib/pyper/pipes/cassandra/deleter.rb +19 -0
  11. data/lib/pyper/pipes/cassandra/mod_key.rb +32 -0
  12. data/lib/pyper/pipes/cassandra/mod_key_reader.rb +41 -0
  13. data/lib/pyper/pipes/cassandra/pagination_decoding.rb +22 -0
  14. data/lib/pyper/pipes/cassandra/pagination_encoding.rb +17 -0
  15. data/lib/pyper/pipes/cassandra/reader.rb +35 -0
  16. data/lib/pyper/pipes/cassandra/writer.rb +24 -0
  17. data/lib/pyper/pipes/cassandra.rb +8 -0
  18. data/lib/pyper/pipes/content/fetch.rb +30 -0
  19. data/lib/pyper/pipes/content/store.rb +36 -0
  20. data/lib/pyper/pipes/content.rb +2 -0
  21. data/lib/pyper/pipes/default_values.rb +15 -0
  22. data/lib/pyper/pipes/field_rename.rb +23 -0
  23. data/lib/pyper/pipes/force_enumerator.rb +13 -0
  24. data/lib/pyper/pipes/model/attribute_deserializer.rb +27 -0
  25. data/lib/pyper/pipes/model/attribute_serializer.rb +34 -0
  26. data/lib/pyper/pipes/model/attribute_validation.rb +57 -0
  27. data/lib/pyper/pipes/model/virtus_deserializer.rb +39 -0
  28. data/lib/pyper/pipes/model/virtus_parser.rb +13 -0
  29. data/lib/pyper/pipes/model.rb +5 -0
  30. data/lib/pyper/pipes/no_op.rb +15 -0
  31. data/lib/pyper/pipes/pry.rb +9 -0
  32. data/lib/pyper/pipes/remove_fields.rb +22 -0
  33. data/lib/pyper/pipes.rb +8 -0
  34. data/lib/pyper/version.rb +3 -0
  35. data/lib/pyper.rb +4 -0
  36. data/pyper_rb.gemspec +22 -0
  37. data/test/fixtures/cass_schema_config.yml +6 -0
  38. data/test/fixtures/test_datastore/schema.cql +23 -0
  39. data/test/test_helper.rb +34 -0
  40. data/test/unit/pyper/pipeline_test.rb +81 -0
  41. data/test/unit/pyper/pipes/cassandra/all_items_reader_test.rb +47 -0
  42. data/test/unit/pyper/pipes/cassandra/deleter_test.rb +37 -0
  43. data/test/unit/pyper/pipes/cassandra/mod_key_reader_test.rb +47 -0
  44. data/test/unit/pyper/pipes/cassandra/pagination_decoding_test.rb +29 -0
  45. data/test/unit/pyper/pipes/cassandra/pagination_encoding_test.rb +29 -0
  46. data/test/unit/pyper/pipes/cassandra/reader_test.rb +79 -0
  47. data/test/unit/pyper/pipes/cassandra/writer_test.rb +51 -0
  48. data/test/unit/pyper/pipes/content/fetch_test.rb +38 -0
  49. data/test/unit/pyper/pipes/content/store_test.rb +49 -0
  50. data/test/unit/pyper/pipes/field_rename_test.rb +24 -0
  51. data/test/unit/pyper/pipes/model/attribute_deserializer_test.rb +69 -0
  52. data/test/unit/pyper/pipes/model/attribute_serializer_test.rb +60 -0
  53. data/test/unit/pyper/pipes/model/attribute_validation_test.rb +96 -0
  54. data/test/unit/pyper/pipes/model/virtus_deserializer_test.rb +75 -0
  55. data/test/unit/pyper/pipes/no_op_test.rb +12 -0
  56. data/test/unit/pyper/pipes/remove_fields_test.rb +24 -0
  57. metadata +147 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 646bae638d6adae8b2456c3b6cfc07a7d0f0968f
4
+ data.tar.gz: 44c1fc2cc772dd12205a052f3e974ecec835d553
5
+ SHA512:
6
+ metadata.gz: 29e850650ad4223b4e34920b7fb2d0d87ebc7434e09867bf7d6232f19f1dd43fae76c9fcf7e44c4d83508a838b78854575b3b497790f4b2bc933e128eaddda90
7
+ data.tar.gz: 7f741d99c2d945128b6b8c454cfd84d6709d18955b5f0cc29c35dda57b4ca78a448797d68705f5cb352017491f02806387d1e3acbc334b17bdc5f3cc50e81531
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ .ruby-version
16
+ .idea
data/Gemfile ADDED
@@ -0,0 +1,24 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in storage_pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem "pry"
8
+ gem "awesome_print"
9
+ gem 'm', :git => 'git@github.com:ANorwell/m.git', :branch => 'minitest_5'
10
+ gem 'cass_schema', :git => 'git@github.com:backupify/cass_schema.git', :tag => "0.0.4"
11
+
12
+ # make sure to use v2.0.1 to avoid issues with super column families
13
+ gem 'cassandra-driver', :git => 'git@github.com:datastax/ruby-driver.git', :tag => 'v2.0.1'
14
+ end
15
+
16
+ group :test do
17
+ gem 'minitest_should', :git => 'git@github.com:citrus/minitest_should.git'
18
+ gem "google-api-client", "0.7.1"
19
+ gem "mocha"
20
+ gem 'virtus'
21
+ end
22
+
23
+ gem 'storage_strategy', :git => 'git@github.com:backupify/storage_strategy.git'
24
+ gem 'cassava', :git => 'git@github.com:backupify/cassava.git'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Datto
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,178 @@
1
+ # Pyper
2
+
3
+ Flexible pipelines for content storage and retrieval.
4
+
5
+ Pyper allows the construction of pipelines to store and retrieve data. Each pipe in the pipeline modifies the
6
+ information in the pipeline before passing it to the next step. By composing pipes in different ways, different
7
+ data access patterns can be created.
8
+
9
+ ## Usage
10
+
11
+ Require the pyper library and the pipes that you need:
12
+
13
+ ```ruby
14
+ require 'pyper'
15
+ require 'pyper/model' # Import model-related pipes
16
+ require 'pyper/cassandra' # Import Cassandra-related pipes
17
+ require 'pyper/content' # Import content storage-related pipes
18
+ ```
19
+
20
+ Or, import the entire library using `require 'pyper/all'`
21
+
22
+ Create a pipeline composed of a set of pipes:
23
+
24
+ ```ruby
25
+ write_pipeline = Pyper::Pipeline.create do
26
+ add Pyper::Pipes::Write::AttributeSerializer.new
27
+ add Pyper::Pipes::FieldRename.new(:to => :to_emails, :from => :from_email)
28
+ add Pyper::Pipes::ModKey.new
29
+ add Pyper::Pipes::Cassandra::Writer.new(:table_1, metadata_client)
30
+ add Pyper::Pipes::Cassandra::Writer.new(:table_2, indexes_client)
31
+ add Pyper::Pipes::Cassandra::Writer.new(:table_3, indexes_client)
32
+ end
33
+ ```
34
+
35
+ Then, push data down the pipe:
36
+
37
+ ```ruby
38
+ result = write_pipeline.push(attributes)
39
+ ```
40
+
41
+ View the value of the set of successive transformations performed by the pipe:
42
+ ```ruby
43
+ result.value
44
+ ```
45
+
46
+ A pipeline performs a bunch of sequential transformations to the data being passed down the pipe. It may also have side
47
+ effects, such as storing data. The specific pipes provided in this library aim are aimed at two uses: writing and
48
+ reading data.
49
+
50
+ A write pipeline takes an initial set of attributes, performing a set of transfomations such as serialization and so on,
51
+ before storing the data in one or more storage outputs. For example, this gem provides storage pipes for Cassandra and
52
+ Amazon S3, but it is easy to write a pipe for other storage backends.
53
+
54
+ Conversely, a read pipeline takes initially a set of options. These options be transformed by the pipeline, and then used
55
+ to read data from an external source. This data may then be transformed by the pipeline - for example, performing
56
+ deserialization or data mapping operations.
57
+
58
+ ```ruby
59
+ read_pipeline = Pyper::Pipeline.create do
60
+ add Pyper::Pipes::Cassandra::PaginationDecoding.new
61
+ add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
62
+ add Pyper::Pipes::FieldRename.new(:to_emails => :to, :from_email => :from)
63
+ add Pyper::Pipes::Cassandra::PaginationEncoding.new
64
+ add Pyper::Pipes::Model::VirtusDeserializer.new(message_attributes)
65
+ add Pyper::Pipes::Model::VirtusParser.new(MyModelClass)
66
+ end
67
+
68
+ result = read_pipeline.push(:row => '1', :id => 'i', :page_token => 'sdf')
69
+ result.value # Enumerator with matching instances of MyModelClass
70
+ ```
71
+
72
+ Note that pipe order matters. In the example read pipe above, `Cassandra::PaginationDecoding` decodes pagination options, thus
73
+ performing an operation on the initial options provided. The `Cassandra::Reader` pipe uses the options to retrieve items from
74
+ Cassandra, and subsequent elements of the pipeline are designed to transform this retrieved data. Thus, it would not be
75
+ sensible for the `Cassandra::PaginationDecoding` pipe to come after the `Cassandra::Reader` pipe.
76
+
77
+ ### Creating and using pipelines
78
+
79
+ A pipeline is an instance of `Pyper::Pipeline`, to which pipes are appended using the `<<` or `add` operators.
80
+
81
+ ```ruby
82
+ my_pipeline = Pyper::Pipeline.new <<
83
+ Pyper::Pipes::Cassandra::PaginationDecoding.new <<
84
+ Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client) <<
85
+ Pyper::Pipes::Cassandra::PaginationEncoding.new
86
+ ```
87
+
88
+ However, the `create` method makes pipeline construction easier. The above example becomes the following:
89
+
90
+ ```ruby
91
+ my_pipeline = Pyper::Pipeline.create do
92
+ add Pyper::Pipes::Cassandra::PaginationDecoding.new
93
+ add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
94
+ add Pyper::Pipes::Cassandra::PaginationEncoding.new
95
+ end
96
+ ```
97
+
98
+ To invoke the pipeline, use the `push` method and provide the data to enter the pipeline:
99
+
100
+ ```ruby
101
+ pipe_status = my_pipeline.push(:row => '1', :id => 'i')
102
+ ```
103
+
104
+ Here, `pipe_status` is a `Pyper::PipeStatus` object, which contains two attributes, `pipe_status.value` and
105
+ `pipe_status.status`. The value is the returned result of the series of tranformations applied by the pipeline. The status
106
+ contains metadata about the push operation that might be created by each pipe in the pipeline.
107
+
108
+ ### Creating new pipes
109
+
110
+ A pipe must implement the `call` method, which takes two arguments: the object entering the pipe, as well as the status. It
111
+ should return the object leaving the pipe:
112
+
113
+ ```ruby
114
+ class MyPipe
115
+ def call(attributes, status = {})
116
+ attributes[:c] = attributes[:a] + attributes[:b]
117
+ status[:processed_by_my_pipe] = true
118
+ attributes
119
+ end
120
+ end
121
+ ```
122
+
123
+ This example pipe above modifies `attributes` before returning it. It also sets a flag on the status object.
124
+
125
+ Note that because the pipe need only respond to `call`, lambdas and procs are valid pipes.
126
+
127
+ Generally, pipes in a write pipeline operate on an attributes hash (containing the attributes meant to be written to a data
128
+ store). Pipes in a read pipeline initially might modify arguments. A data retrieval pipe would then use the arguments to
129
+ fetch data, and subsequent pipes would perform operations on the enumeration of data items. Thus, a read pipe might look
130
+ something like:
131
+
132
+ ```ruby
133
+ class Deserialize
134
+ def call(items, status = {})
135
+ items.map { |item| deserialize(item) }
136
+ end
137
+
138
+ def deserialize(item)
139
+ # ...
140
+ end
141
+ end
142
+ ```
143
+
144
+ ### Debugging Pipelines
145
+
146
+ Because pipes are expected to respond to `#call` you can simply add Procs and Lambdas in your code to debug pipelines:
147
+
148
+ ```rb
149
+ pl = Pyper::Pipeline.create do
150
+ add Pyper::Pipes::Model::AttributeSerializer.new
151
+ add -> (*args) { binding.pry }
152
+ add Pyper::Pipes::Cassandra::Writer.new(:my_table, client, fields)
153
+ end
154
+ ```
155
+
156
+ ## Installation
157
+
158
+ Add this line to your application's Gemfile:
159
+
160
+ ```ruby
161
+ gem 'pyper_rb', :git => 'git@github.com:backupify/pyper.git'
162
+ ```
163
+
164
+ And then execute:
165
+
166
+ $ bundle
167
+
168
+ Or install it yourself as:
169
+
170
+ $ gem install pyper_rb
171
+
172
+ ## Contributing
173
+
174
+ 1. Fork it ( https://github.com/backupify/pyper/fork )
175
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
176
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
177
+ 4. Push to the branch (`git push origin my-new-feature`)
178
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake"
3
+ require "rake/testtask"
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs.concat(%w[test])
7
+ test.pattern = "test/**/*_test.rb"
8
+ end
9
+
10
+ task :default => :test
data/lib/pyper/all.rb ADDED
@@ -0,0 +1,4 @@
1
+ require_relative '../pyper'
2
+ require_relative 'pipes/cassandra'
3
+ require_relative 'pipes/content'
4
+ require_relative 'pipes/model'
@@ -0,0 +1,63 @@
1
+ require_relative 'pipes'
2
+
3
+ module Pyper
4
+
5
+ class PipeStatus < Struct.new(:value, :status); end
6
+
7
+ class Pipeline
8
+ class << self
9
+
10
+ # Provides an interface for creating a pipeline. The provided block will be called
11
+ # in the context of a newly-created pipeline, to which pipes can be added using #add.
12
+ # @return [Pyper::Pipeline] The created pipeline.
13
+ def create(&block)
14
+ new.tap do |pipeline|
15
+ if block_given?
16
+ original_self = eval('self', block.binding)
17
+ pipeline.instance_variable_set(:@original_self, original_self)
18
+ pipeline.instance_eval(&block)
19
+ pipeline.remove_instance_variable(:@original_self)
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ attr_reader :pipes
26
+
27
+ def initialize(pipes = [])
28
+ @pipes = pipes
29
+ end
30
+
31
+ # @param pipe [#pipe|#call] A pipe to append to the pipeline
32
+ def <<(pipe)
33
+ pipes << pipe
34
+ self
35
+ end
36
+
37
+ alias_method :add, :<<
38
+
39
+ # Insert something into the pipeline to be processed
40
+ # @param input [Object] The original input data to enter the pipeline. This may be mutated by each pipe in the pipeline.
41
+ # @return [PipeStatus] the pipe status, containing both the value and a status hash.
42
+ def push(input)
43
+ status = {}
44
+ value = pipes.inject(input) do |attributes, p|
45
+ if p.respond_to?(:call)
46
+ p.call(attributes, status)
47
+ else
48
+ p.pipe(attributes, status)
49
+ end
50
+ end
51
+
52
+ PipeStatus.new(value, status)
53
+ end
54
+
55
+ def method_missing(sym, *args, &block)
56
+ @original_self ? @original_self.send(sym, *args, &block) : super
57
+ end
58
+
59
+ def respond_to_missing?(sym, include_all = false)
60
+ @original_self ? @original_self.respond_to?(sym, include_all) : super
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,40 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # A pipe for reading all items from a single row in cassandra
3
+ # @param [Symbol] table name
4
+ # @param [Cassava::Client] client to query cassandra with
5
+ class AllItemsReader < Struct.new(:table, :client)
6
+ attr_reader :page_size
7
+
8
+ # @param table [Symbol] the name of the cassandra table to fetch data from
9
+ # @param client [Cassava::Client]
10
+ # @param mod_size [Integer] the mod size
11
+ # @param page_size [Integer] the page size
12
+ def initialize(table, client, page_size = 1000)
13
+ @table = table
14
+ @client = client
15
+ @page_size = page_size
16
+ end
17
+
18
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
19
+ # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
20
+ # @option arguments [Integer] :page_size
21
+ # @param status [Hash] The mutable status field
22
+ # @return [Enumerator::Lazy<Hash>] enumerator of items
23
+ def pipe(arguments, status = {})
24
+ columns = arguments.delete(:columns)
25
+ enum = Enumerator.new do |yielder|
26
+ options = { :page_size => page_size }
27
+ paging_state = nil
28
+ loop do
29
+ options[:paging_state] = paging_state if paging_state.present?
30
+ result = @client.select(@table, columns).where(arguments).execute(options)
31
+ result.each { |item| yielder << item }
32
+
33
+ break if result.last_page?
34
+ paging_state = result.paging_state
35
+ end
36
+ end
37
+ enum.lazy
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,19 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Deletes from a specified cassandra table.
3
+
4
+ # @param table_name [Symbol] The table from which to delete
5
+ # @param client [Cassava::Client] client to query cassandra with
6
+ class Deleter < Struct.new(:table_name, :client)
7
+ # @param args [Hash] Should contain the primary keys to delete. Can contain a :columns key to remove specific values.
8
+ # @param status [Hash] The mutable status field
9
+ # @return [Hash] The original attributes
10
+ def pipe(arguments, status = {})
11
+ local_args = arguments.dup
12
+ columns = local_args.delete(:columns)
13
+
14
+ statement = columns.present? ? client.delete(table_name, columns) : client.delete(table_name)
15
+ statement.where(local_args).execute
16
+ arguments
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,32 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Adds the :mod_key field to the output attributes, which is based on the hash of
3
+ # a particular field in the input attributes.
4
+ # @example
5
+ # If the pipe is configured with an id field of :id, then the input
6
+ # { id: 'abc' }
7
+ # would result in an output of
8
+ # { id: 'abc', mod_key: 22 }
9
+ # Here the value 22 is within the range [0,mod_size - 1] and is uniquely
10
+ # determined by id.
11
+ class ModKey
12
+ attr_reader :mod_size, :id_field
13
+
14
+ # @param mod_size [Integer] mod keys will fall within the range [0,mod_key - 1]
15
+ # @param id_field [Symbol] the attribute to use when generating the mod key.
16
+ def initialize(mod_size = 100, id_field = :id)
17
+ @mod_size = mod_size
18
+ @id_field = id_field
19
+ end
20
+
21
+ # @param attributes [Hash] An attribute hash
22
+ # @param status [Hash] The mutable status field
23
+ # @return [Hash] The attribute hash with the mod_key field added
24
+ def pipe(attributes, status)
25
+ attributes.merge!(:mod_key => mod(attributes[id_field]))
26
+ end
27
+
28
+ def mod(value)
29
+ Zlib::crc32(value) % mod_size
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ module Pyper::Pipes::Cassandra
2
+
3
+ # This pipe is for reading data from sharded rows in Cassandra. The table must have rows sharded by the 'mod_key' field.
4
+ # For a fixed number of such shards, this pipe reads all data from all of those shards, returning a lazy enumerator
5
+ # over all of those rows.
6
+ # For example, if mod_size is 100, it will read the 100 rows with mod_key between 0 and 99.
7
+ class ModKeyReader
8
+
9
+ # @param table [Symbol] the name of the cassandra table to fetch data from
10
+ # @param client [Cassava::Client]
11
+ # @param mod_size [Integer] the mod size
12
+ # @param page_size [Integer] the page size
13
+ attr_reader :table, :client, :mod_size, :page_size
14
+ def initialize(table, client, mod_size = 100, page_size = 1000)
15
+ @table = table
16
+ @client = client
17
+ @mod_size = mod_size
18
+ @page_size = page_size
19
+ end
20
+
21
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
22
+ # @param status [Hash] The mutable status field
23
+ # @return [Enumerator::Lazy<Hash>] enumerator of items from all rows
24
+ def pipe(arguments, status = {})
25
+ (Enumerator.new do |yielder|
26
+ (0...mod_size).each do |mod_id|
27
+ options = { :page_size => page_size }
28
+ paging_state = nil
29
+ loop do
30
+ options[:paging_state] = paging_state if paging_state.present?
31
+ result = client.select(table).where(arguments.merge(:mod_key => mod_id)).execute(options)
32
+ result.each { |item| yielder << item }
33
+
34
+ break if result.last_page?
35
+ paging_state = result.paging_state
36
+ end
37
+ end
38
+ end).lazy
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,22 @@
1
+ require 'base64'
2
+
3
+ module Pyper::Pipes::Cassandra
4
+ # This pipe extracts an encoded paging_state, decodes it, and passes on a decoded
5
+ # paging state.
6
+ # This pipe is intended to be used before the Cassandra::Reader pipe, as that pipe
7
+ # can interpret the :paging_state argument.
8
+ #
9
+ # This pipe pairs with the PaginationEncoding pipe, which performs the reverse
10
+ # transformation
11
+ class PaginationDecoding
12
+
13
+ # @param args [Hash] Arguments that include an encoded :paging_state
14
+ # @param status [Hash] The mutable status field
15
+ # @return [Hash] The list of arguments with :paging_state decoded, if present
16
+ def pipe(args, status = {})
17
+ page_state = args[:paging_state]
18
+ args[:paging_state] = Base64.urlsafe_decode64(page_state) if page_state
19
+ args
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ require 'base64'
2
+
3
+ module Pyper::Pipes::Cassandra
4
+ # Given a :paging_state in the status field, encodes it. This is the reverse transformation of
5
+ # the PaginationDecoding pipe.
6
+ class PaginationEncoding
7
+
8
+ # @param items [Enumerable<Hash>]
9
+ # @param status [Hash] The mutable status field
10
+ # @return [Enumerable<Hash>] The unchanged list of items
11
+ def pipe(items, status)
12
+ page_state = status[:paging_state]
13
+ status[:paging_state] = Base64.urlsafe_encode64(page_state) if page_state
14
+ items
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # A pipe for reading items from a single row in cassandra
3
+ # @param [Symbol] table name
4
+ # @param [Cassava::Client] client to query cassandra with
5
+ # @param [Hash] Additional/default options to pass to the Cassava execute statement.
6
+ class Reader < Struct.new(:table, :client, :options)
7
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
8
+ # @option arguments [Integer] :limit
9
+ # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
10
+ # @option arguments [Object] :paging_state
11
+ # @option arguments [Integer] :page_size
12
+ # @param status [Hash] The mutable status field
13
+ # @return [Enumerator::Lazy<Hash>] enumerator of items
14
+ def pipe(arguments, status = {})
15
+ limit = arguments.delete(:limit)
16
+ page_size = arguments.delete(:page_size)
17
+ paging_state = arguments.delete(:paging_state)
18
+ order = arguments.delete(:order)
19
+ columns = arguments.delete(:columns)
20
+
21
+ opts = (options || {}).merge({ page_size: page_size, paging_state: paging_state})
22
+
23
+ query = client.select(table, columns).where(arguments)
24
+ query = query.limit(limit) if limit
25
+ query = query.order(order.first, order.last) if order
26
+
27
+ result = query.execute(opts)
28
+
29
+ status[:paging_state] = result.paging_state
30
+ status[:last_page] = result.last_page?
31
+
32
+ result.rows.lazy
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Writes a set of attributes to a specified cassandra table.
3
+
4
+ # @param table_name [Symbol] The table in which to store the attributes
5
+ # @param client [Cassava::Client] client to query cassandra with
6
+ # @param attribute_filter_set [Set] Optionally, a set of attributes which should be written. If none is provided,
7
+ # all attributes will be written.
8
+ class Writer < Struct.new(:table_name, :client, :attribute_filter_set)
9
+
10
+ # @param args [Hash] Arguments to store in cassandra
11
+ # @param status [Hash] The mutable status field
12
+ # @return [Hash] The original attributes
13
+ def pipe(attributes, status = {})
14
+ attributes_to_write = if attribute_filter_set
15
+ attributes.select { |k,v| attribute_filter_set.member?(k) }
16
+ else
17
+ attributes
18
+ end
19
+
20
+ client.insert(table_name, attributes_to_write)
21
+ attributes
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,8 @@
1
+ require_relative 'cassandra/reader'
2
+ require_relative 'cassandra/writer'
3
+ require_relative 'cassandra/mod_key'
4
+ require_relative 'cassandra/mod_key_reader'
5
+ require_relative 'cassandra/all_items_reader'
6
+ require_relative 'cassandra/deleter'
7
+ require_relative 'cassandra/pagination_decoding'
8
+ require_relative 'cassandra/pagination_encoding'
@@ -0,0 +1,30 @@
1
+ module Pyper::Pipes::Content
2
+ class Fetch
3
+
4
+ attr_reader :storage_field, :storage_strategy_builder
5
+
6
+ # @param storage_field [Symbol] For each item hash, the field in which to insert the content
7
+ # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
8
+ def initialize(storage_field, &storage_strategy_builder)
9
+ @storage_field = storage_field
10
+ @storage_strategy_builder = storage_strategy_builder
11
+ end
12
+
13
+ # @param items [Enumerable<Hash>] A list of items
14
+ # @param status [Hash] The mutable status field
15
+ # @return [Enumerable<Hash>] The items, with the retrieved content inserted in the storage field
16
+ def pipe(items, status = {})
17
+ items.map do |item|
18
+ strategy = storage_strategy_builder.call(item)
19
+
20
+ content =
21
+ begin
22
+ strategy.read
23
+ rescue Errno::ENOENT, StorageStrategy::NotFound
24
+ nil
25
+ end
26
+ item.merge(storage_field => content)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,36 @@
1
+ module Pyper::Pipes::Content
2
+ # A pipe for storing content to an object store. Uses the StorageStrategy gem.
3
+ class Store
4
+
5
+ attr_reader :storage_field, :storage_strategy_builder
6
+
7
+ # @param storage_field [Symbol] The attributes field in which the content is located.
8
+ # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
9
+ def initialize(storage_field, &storage_strategy_builder)
10
+ @storage_field = storage_field
11
+ @storage_strategy_builder = storage_strategy_builder
12
+ end
13
+
14
+ # Stores content using the specified storage strategy
15
+ # @param attributes [Hash] The attributes of the item for which content is to be stored
16
+ # @param status [Hash] The mutable status field
17
+ # @return [Hash] The item attributes, with the storage_field deleted.
18
+ def pipe(attributes, status = {})
19
+ strategy = storage_strategy_builder.call(attributes)
20
+
21
+ content = attributes.delete(storage_field)
22
+
23
+ raise ArgumentError.new("#{storage_field} must be present in ContentStorage") unless content
24
+
25
+ case content
26
+ when NilClass then # do nothing -- there's no content to write
27
+ when String then strategy.write(content)
28
+ else strategy.write_from(content)
29
+ end
30
+
31
+ attributes.merge!(strategy.metadata)
32
+
33
+ attributes
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'content/fetch'
2
+ require_relative 'content/store'
@@ -0,0 +1,15 @@
1
+ module Pyper::Pipes
2
+ # @param default_values [Hash] A hash of default values to set within the provided attrs if they are not already present.
3
+ class DefaultValues < Struct.new(:default_values)
4
+
5
+ # @param attrs [Hash] The attributes of the item
6
+ # @param status [Hash] The mutable status field
7
+ # @return [Hash] The item attributes with default values inserted
8
+ def pipe(attrs, status = {})
9
+ default_values.each do |field, value|
10
+ attrs[field] = value unless attrs[field]
11
+ end
12
+ attrs
13
+ end
14
+ end
15
+ end