pyper_rb 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +16 -0
  3. data/Gemfile +24 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +178 -0
  6. data/Rakefile +10 -0
  7. data/lib/pyper/all.rb +4 -0
  8. data/lib/pyper/pipeline.rb +63 -0
  9. data/lib/pyper/pipes/cassandra/all_items_reader.rb +40 -0
  10. data/lib/pyper/pipes/cassandra/deleter.rb +19 -0
  11. data/lib/pyper/pipes/cassandra/mod_key.rb +32 -0
  12. data/lib/pyper/pipes/cassandra/mod_key_reader.rb +41 -0
  13. data/lib/pyper/pipes/cassandra/pagination_decoding.rb +22 -0
  14. data/lib/pyper/pipes/cassandra/pagination_encoding.rb +17 -0
  15. data/lib/pyper/pipes/cassandra/reader.rb +35 -0
  16. data/lib/pyper/pipes/cassandra/writer.rb +24 -0
  17. data/lib/pyper/pipes/cassandra.rb +8 -0
  18. data/lib/pyper/pipes/content/fetch.rb +30 -0
  19. data/lib/pyper/pipes/content/store.rb +36 -0
  20. data/lib/pyper/pipes/content.rb +2 -0
  21. data/lib/pyper/pipes/default_values.rb +15 -0
  22. data/lib/pyper/pipes/field_rename.rb +23 -0
  23. data/lib/pyper/pipes/force_enumerator.rb +13 -0
  24. data/lib/pyper/pipes/model/attribute_deserializer.rb +27 -0
  25. data/lib/pyper/pipes/model/attribute_serializer.rb +34 -0
  26. data/lib/pyper/pipes/model/attribute_validation.rb +57 -0
  27. data/lib/pyper/pipes/model/virtus_deserializer.rb +39 -0
  28. data/lib/pyper/pipes/model/virtus_parser.rb +13 -0
  29. data/lib/pyper/pipes/model.rb +5 -0
  30. data/lib/pyper/pipes/no_op.rb +15 -0
  31. data/lib/pyper/pipes/pry.rb +9 -0
  32. data/lib/pyper/pipes/remove_fields.rb +22 -0
  33. data/lib/pyper/pipes.rb +8 -0
  34. data/lib/pyper/version.rb +3 -0
  35. data/lib/pyper.rb +4 -0
  36. data/pyper_rb.gemspec +22 -0
  37. data/test/fixtures/cass_schema_config.yml +6 -0
  38. data/test/fixtures/test_datastore/schema.cql +23 -0
  39. data/test/test_helper.rb +34 -0
  40. data/test/unit/pyper/pipeline_test.rb +81 -0
  41. data/test/unit/pyper/pipes/cassandra/all_items_reader_test.rb +47 -0
  42. data/test/unit/pyper/pipes/cassandra/deleter_test.rb +37 -0
  43. data/test/unit/pyper/pipes/cassandra/mod_key_reader_test.rb +47 -0
  44. data/test/unit/pyper/pipes/cassandra/pagination_decoding_test.rb +29 -0
  45. data/test/unit/pyper/pipes/cassandra/pagination_encoding_test.rb +29 -0
  46. data/test/unit/pyper/pipes/cassandra/reader_test.rb +79 -0
  47. data/test/unit/pyper/pipes/cassandra/writer_test.rb +51 -0
  48. data/test/unit/pyper/pipes/content/fetch_test.rb +38 -0
  49. data/test/unit/pyper/pipes/content/store_test.rb +49 -0
  50. data/test/unit/pyper/pipes/field_rename_test.rb +24 -0
  51. data/test/unit/pyper/pipes/model/attribute_deserializer_test.rb +69 -0
  52. data/test/unit/pyper/pipes/model/attribute_serializer_test.rb +60 -0
  53. data/test/unit/pyper/pipes/model/attribute_validation_test.rb +96 -0
  54. data/test/unit/pyper/pipes/model/virtus_deserializer_test.rb +75 -0
  55. data/test/unit/pyper/pipes/no_op_test.rb +12 -0
  56. data/test/unit/pyper/pipes/remove_fields_test.rb +24 -0
  57. metadata +147 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 646bae638d6adae8b2456c3b6cfc07a7d0f0968f
4
+ data.tar.gz: 44c1fc2cc772dd12205a052f3e974ecec835d553
5
+ SHA512:
6
+ metadata.gz: 29e850650ad4223b4e34920b7fb2d0d87ebc7434e09867bf7d6232f19f1dd43fae76c9fcf7e44c4d83508a838b78854575b3b497790f4b2bc933e128eaddda90
7
+ data.tar.gz: 7f741d99c2d945128b6b8c454cfd84d6709d18955b5f0cc29c35dda57b4ca78a448797d68705f5cb352017491f02806387d1e3acbc334b17bdc5f3cc50e81531
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ .ruby-version
16
+ .idea
data/Gemfile ADDED
@@ -0,0 +1,24 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in storage_pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem "pry"
8
+ gem "awesome_print"
9
+ gem 'm', :git => 'git@github.com:ANorwell/m.git', :branch => 'minitest_5'
10
+ gem 'cass_schema', :git => 'git@github.com:backupify/cass_schema.git', :tag => "0.0.4"
11
+
12
+ # make sure to use v2.0.1 to avoid issues with super column families
13
+ gem 'cassandra-driver', :git => 'git@github.com:datastax/ruby-driver.git', :tag => 'v2.0.1'
14
+ end
15
+
16
+ group :test do
17
+ gem 'minitest_should', :git => 'git@github.com:citrus/minitest_should.git'
18
+ gem "google-api-client", "0.7.1"
19
+ gem "mocha"
20
+ gem 'virtus'
21
+ end
22
+
23
+ gem 'storage_strategy', :git => 'git@github.com:backupify/storage_strategy.git'
24
+ gem 'cassava', :git => 'git@github.com:backupify/cassava.git'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Datto
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,178 @@
1
+ # Pyper
2
+
3
+ Flexible pipelines for content storage and retrieval.
4
+
5
+ Pyper allows the construction of pipelines to store and retrieve data. Each pipe in the pipeline modifies the
6
+ information in the pipeline before passing it to the next step. By composing pipes in different ways, different
7
+ data access patterns can be created.
8
+
9
+ ## Usage
10
+
11
+ Require the pyper library and the pipes that you need:
12
+
13
+ ```ruby
14
+ require 'pyper'
15
+ require 'pyper/model' # Import model-related pipes
16
+ require 'pyper/cassandra' # Import Cassandra-related pipes
17
+ require 'pyper/content' # Import content storage-related pipes
18
+ ```
19
+
20
+ Or, import the entire library using `require 'pyper/all'`
21
+
22
+ Create a pipeline composed of a set of pipes:
23
+
24
+ ```ruby
25
+ write_pipeline = Pyper::Pipeline.create do
26
+ add Pyper::Pipes::Write::AttributeSerializer.new
27
+ add Pyper::Pipes::FieldRename.new(:to => :to_emails, :from => :from_email)
28
+ add Pyper::Pipes::ModKey.new
29
+ add Pyper::Pipes::Cassandra::Writer.new(:table_1, metadata_client)
30
+ add Pyper::Pipes::Cassandra::Writer.new(:table_2, indexes_client)
31
+ add Pyper::Pipes::Cassandra::Writer.new(:table_3, indexes_client)
32
+ end
33
+ ```
34
+
35
+ Then, push data down the pipe:
36
+
37
+ ```ruby
38
+ result = write_pipeline.push(attributes)
39
+ ```
40
+
41
+ View the value of the set of successive transformations performed by the pipe:
42
+ ```ruby
43
+ result.value
44
+ ```
45
+
46
+ A pipeline performs a bunch of sequential transformations to the data being passed down the pipe. It may also have side
47
+ effects, such as storing data. The specific pipes provided in this library aim are aimed at two uses: writing and
48
+ reading data.
49
+
50
+ A write pipeline takes an initial set of attributes, performing a set of transfomations such as serialization and so on,
51
+ before storing the data in one or more storage outputs. For example, this gem provides storage pipes for Cassandra and
52
+ Amazon S3, but it is easy to write a pipe for other storage backends.
53
+
54
+ Conversely, a read pipeline takes initially a set of options. These options be transformed by the pipeline, and then used
55
+ to read data from an external source. This data may then be transformed by the pipeline - for example, performing
56
+ deserialization or data mapping operations.
57
+
58
+ ```ruby
59
+ read_pipeline = Pyper::Pipeline.create do
60
+ add Pyper::Pipes::Cassandra::PaginationDecoding.new
61
+ add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
62
+ add Pyper::Pipes::FieldRename.new(:to_emails => :to, :from_email => :from)
63
+ add Pyper::Pipes::Cassandra::PaginationEncoding.new
64
+ add Pyper::Pipes::Model::VirtusDeserializer.new(message_attributes)
65
+ add Pyper::Pipes::Model::VirtusParser.new(MyModelClass)
66
+ end
67
+
68
+ result = read_pipeline.push(:row => '1', :id => 'i', :page_token => 'sdf')
69
+ result.value # Enumerator with matching instances of MyModelClass
70
+ ```
71
+
72
+ Note that pipe order matters. In the example read pipe above, `Cassandra::PaginationDecoding` decodes pagination options, thus
73
+ performing an operation on the initial options provided. The `Cassandra::Reader` pipe uses the options to retrieve items from
74
+ Cassandra, and subsequent elements of the pipeline are designed to transform this retrieved data. Thus, it would not be
75
+ sensible for the `Cassandra::PaginationDecoding` pipe to come after the `Cassandra::Reader` pipe.
76
+
77
+ ### Creating and using pipelines
78
+
79
+ A pipeline is an instance of `Pyper::Pipeline`, to which pipes are appended using the `<<` or `add` operators.
80
+
81
+ ```ruby
82
+ my_pipeline = Pyper::Pipeline.new <<
83
+ Pyper::Pipes::Cassandra::PaginationDecoding.new <<
84
+ Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client) <<
85
+ Pyper::Pipes::Cassandra::PaginationEncoding.new
86
+ ```
87
+
88
+ However, the `create` method makes pipeline construction easier. The above example becomes the following:
89
+
90
+ ```ruby
91
+ my_pipeline = Pyper::Pipeline.create do
92
+ add Pyper::Pipes::Cassandra::PaginationDecoding.new
93
+ add Pyper::Pipes::Cassandra::Reader.new(:table, indexes_client)
94
+ add Pyper::Pipes::Cassandra::PaginationEncoding.new
95
+ end
96
+ ```
97
+
98
+ To invoke the pipeline, use the `push` method and provide the data to enter the pipeline:
99
+
100
+ ```ruby
101
+ pipe_status = my_pipeline.push(:row => '1', :id => 'i')
102
+ ```
103
+
104
+ Here, `pipe_status` is a `Pyper::PipeStatus` object, which contains two attributes, `pipe_status.value` and
105
+ `pipe_status.status`. The value is the returned result of the series of tranformations applied by the pipeline. The status
106
+ contains metadata about the push operation that might be created by each pipe in the pipeline.
107
+
108
+ ### Creating new pipes
109
+
110
+ A pipe must implement the `call` method, which takes two arguments: the object entering the pipe, as well as the status. It
111
+ should return the object leaving the pipe:
112
+
113
+ ```ruby
114
+ class MyPipe
115
+ def call(attributes, status = {})
116
+ attributes[:c] = attributes[:a] + attributes[:b]
117
+ status[:processed_by_my_pipe] = true
118
+ attributes
119
+ end
120
+ end
121
+ ```
122
+
123
+ This example pipe above modifies `attributes` before returning it. It also sets a flag on the status object.
124
+
125
+ Note that because the pipe need only respond to `call`, lambdas and procs are valid pipes.
126
+
127
+ Generally, pipes in a write pipeline operate on an attributes hash (containing the attributes meant to be written to a data
128
+ store). Pipes in a read pipeline initially might modify arguments. A data retrieval pipe would then use the arguments to
129
+ fetch data, and subsequent pipes would perform operations on the enumeration of data items. Thus, a read pipe might look
130
+ something like:
131
+
132
+ ```ruby
133
+ class Deserialize
134
+ def call(items, status = {})
135
+ items.map { |item| deserialize(item) }
136
+ end
137
+
138
+ def deserialize(item)
139
+ # ...
140
+ end
141
+ end
142
+ ```
143
+
144
+ ### Debugging Pipelines
145
+
146
+ Because pipes are expected to respond to `#call` you can simply add Procs and Lambdas in your code to debug pipelines:
147
+
148
+ ```rb
149
+ pl = Pyper::Pipeline.create do
150
+ add Pyper::Pipes::Model::AttributeSerializer.new
151
+ add -> (*args) { binding.pry }
152
+ add Pyper::Pipes::Cassandra::Writer.new(:my_table, client, fields)
153
+ end
154
+ ```
155
+
156
+ ## Installation
157
+
158
+ Add this line to your application's Gemfile:
159
+
160
+ ```ruby
161
+ gem 'pyper_rb', :git => 'git@github.com:backupify/pyper.git'
162
+ ```
163
+
164
+ And then execute:
165
+
166
+ $ bundle
167
+
168
+ Or install it yourself as:
169
+
170
+ $ gem install pyper_rb
171
+
172
+ ## Contributing
173
+
174
+ 1. Fork it ( https://github.com/backupify/pyper/fork )
175
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
176
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
177
+ 4. Push to the branch (`git push origin my-new-feature`)
178
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake"
3
+ require "rake/testtask"
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs.concat(%w[test])
7
+ test.pattern = "test/**/*_test.rb"
8
+ end
9
+
10
+ task :default => :test
data/lib/pyper/all.rb ADDED
@@ -0,0 +1,4 @@
1
+ require_relative '../pyper'
2
+ require_relative 'pipes/cassandra'
3
+ require_relative 'pipes/content'
4
+ require_relative 'pipes/model'
@@ -0,0 +1,63 @@
1
+ require_relative 'pipes'
2
+
3
+ module Pyper
4
+
5
+ class PipeStatus < Struct.new(:value, :status); end
6
+
7
+ class Pipeline
8
+ class << self
9
+
10
+ # Provides an interface for creating a pipeline. The provided block will be called
11
+ # in the context of a newly-created pipeline, to which pipes can be added using #add.
12
+ # @return [Pyper::Pipeline] The created pipeline.
13
+ def create(&block)
14
+ new.tap do |pipeline|
15
+ if block_given?
16
+ original_self = eval('self', block.binding)
17
+ pipeline.instance_variable_set(:@original_self, original_self)
18
+ pipeline.instance_eval(&block)
19
+ pipeline.remove_instance_variable(:@original_self)
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ attr_reader :pipes
26
+
27
+ def initialize(pipes = [])
28
+ @pipes = pipes
29
+ end
30
+
31
+ # @param pipe [#pipe|#call] A pipe to append to the pipeline
32
+ def <<(pipe)
33
+ pipes << pipe
34
+ self
35
+ end
36
+
37
+ alias_method :add, :<<
38
+
39
+ # Insert something into the pipeline to be processed
40
+ # @param input [Object] The original input data to enter the pipeline. This may be mutated by each pipe in the pipeline.
41
+ # @return [PipeStatus] the pipe status, containing both the value and a status hash.
42
+ def push(input)
43
+ status = {}
44
+ value = pipes.inject(input) do |attributes, p|
45
+ if p.respond_to?(:call)
46
+ p.call(attributes, status)
47
+ else
48
+ p.pipe(attributes, status)
49
+ end
50
+ end
51
+
52
+ PipeStatus.new(value, status)
53
+ end
54
+
55
+ def method_missing(sym, *args, &block)
56
+ @original_self ? @original_self.send(sym, *args, &block) : super
57
+ end
58
+
59
+ def respond_to_missing?(sym, include_all = false)
60
+ @original_self ? @original_self.respond_to?(sym, include_all) : super
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,40 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # A pipe for reading all items from a single row in cassandra
3
+ # @param [Symbol] table name
4
+ # @param [Cassava::Client] client to query cassandra with
5
+ class AllItemsReader < Struct.new(:table, :client)
6
+ attr_reader :page_size
7
+
8
+ # @param table [Symbol] the name of the cassandra table to fetch data from
9
+ # @param client [Cassava::Client]
10
+ # @param mod_size [Integer] the mod size
11
+ # @param page_size [Integer] the page size
12
+ def initialize(table, client, page_size = 1000)
13
+ @table = table
14
+ @client = client
15
+ @page_size = page_size
16
+ end
17
+
18
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
19
+ # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
20
+ # @option arguments [Integer] :page_size
21
+ # @param status [Hash] The mutable status field
22
+ # @return [Enumerator::Lazy<Hash>] enumerator of items
23
+ def pipe(arguments, status = {})
24
+ columns = arguments.delete(:columns)
25
+ enum = Enumerator.new do |yielder|
26
+ options = { :page_size => page_size }
27
+ paging_state = nil
28
+ loop do
29
+ options[:paging_state] = paging_state if paging_state.present?
30
+ result = @client.select(@table, columns).where(arguments).execute(options)
31
+ result.each { |item| yielder << item }
32
+
33
+ break if result.last_page?
34
+ paging_state = result.paging_state
35
+ end
36
+ end
37
+ enum.lazy
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,19 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Deletes from a specified cassandra table.
3
+
4
+ # @param table_name [Symbol] The table from which to delete
5
+ # @param client [Cassava::Client] client to query cassandra with
6
+ class Deleter < Struct.new(:table_name, :client)
7
+ # @param args [Hash] Should contain the primary keys to delete. Can contain a :columns key to remove specific values.
8
+ # @param status [Hash] The mutable status field
9
+ # @return [Hash] The original attributes
10
+ def pipe(arguments, status = {})
11
+ local_args = arguments.dup
12
+ columns = local_args.delete(:columns)
13
+
14
+ statement = columns.present? ? client.delete(table_name, columns) : client.delete(table_name)
15
+ statement.where(local_args).execute
16
+ arguments
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,32 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Adds the :mod_key field to the output attributes, which is based on the hash of
3
+ # a particular field in the input attributes.
4
+ # @example
5
+ # If the pipe is configured with an id field of :id, then the input
6
+ # { id: 'abc' }
7
+ # would result in an output of
8
+ # { id: 'abc', mod_key: 22 }
9
+ # Here the value 22 is within the range [0,mod_size - 1] and is uniquely
10
+ # determined by id.
11
+ class ModKey
12
+ attr_reader :mod_size, :id_field
13
+
14
+ # @param mod_size [Integer] mod keys will fall within the range [0,mod_key - 1]
15
+ # @param id_field [Symbol] the attribute to use when generating the mod key.
16
+ def initialize(mod_size = 100, id_field = :id)
17
+ @mod_size = mod_size
18
+ @id_field = id_field
19
+ end
20
+
21
+ # @param attributes [Hash] An attribute hash
22
+ # @param status [Hash] The mutable status field
23
+ # @return [Hash] The attribute hash with the mod_key field added
24
+ def pipe(attributes, status)
25
+ attributes.merge!(:mod_key => mod(attributes[id_field]))
26
+ end
27
+
28
+ def mod(value)
29
+ Zlib::crc32(value) % mod_size
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ module Pyper::Pipes::Cassandra
2
+
3
+ # This pipe is for reading data from sharded rows in Cassandra. The table must have rows sharded by the 'mod_key' field.
4
+ # For a fixed number of such shards, this pipe reads all data from all of those shards, returning a lazy enumerator
5
+ # over all of those rows.
6
+ # For example, if mod_size is 100, it will read the 100 rows with mod_key between 0 and 99.
7
+ class ModKeyReader
8
+
9
+ # @param table [Symbol] the name of the cassandra table to fetch data from
10
+ # @param client [Cassava::Client]
11
+ # @param mod_size [Integer] the mod size
12
+ # @param page_size [Integer] the page size
13
+ attr_reader :table, :client, :mod_size, :page_size
14
+ def initialize(table, client, mod_size = 100, page_size = 1000)
15
+ @table = table
16
+ @client = client
17
+ @mod_size = mod_size
18
+ @page_size = page_size
19
+ end
20
+
21
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
22
+ # @param status [Hash] The mutable status field
23
+ # @return [Enumerator::Lazy<Hash>] enumerator of items from all rows
24
+ def pipe(arguments, status = {})
25
+ (Enumerator.new do |yielder|
26
+ (0...mod_size).each do |mod_id|
27
+ options = { :page_size => page_size }
28
+ paging_state = nil
29
+ loop do
30
+ options[:paging_state] = paging_state if paging_state.present?
31
+ result = client.select(table).where(arguments.merge(:mod_key => mod_id)).execute(options)
32
+ result.each { |item| yielder << item }
33
+
34
+ break if result.last_page?
35
+ paging_state = result.paging_state
36
+ end
37
+ end
38
+ end).lazy
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,22 @@
1
+ require 'base64'
2
+
3
+ module Pyper::Pipes::Cassandra
4
+ # This pipe extracts an encoded paging_state, decodes it, and passes on a decoded
5
+ # paging state.
6
+ # This pipe is intended to be used before the Cassandra::Reader pipe, as that pipe
7
+ # can interpret the :paging_state argument.
8
+ #
9
+ # This pipe pairs with the PaginationEncoding pipe, which performs the reverse
10
+ # transformation
11
+ class PaginationDecoding
12
+
13
+ # @param args [Hash] Arguments that include an encoded :paging_state
14
+ # @param status [Hash] The mutable status field
15
+ # @return [Hash] The list of arguments with :paging_state decoded, if present
16
+ def pipe(args, status = {})
17
+ page_state = args[:paging_state]
18
+ args[:paging_state] = Base64.urlsafe_decode64(page_state) if page_state
19
+ args
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ require 'base64'
2
+
3
+ module Pyper::Pipes::Cassandra
4
+ # Given a :paging_state in the status field, encodes it. This is the reverse transformation of
5
+ # the PaginationDecoding pipe.
6
+ class PaginationEncoding
7
+
8
+ # @param items [Enumerable<Hash>]
9
+ # @param status [Hash] The mutable status field
10
+ # @return [Enumerable<Hash>] The unchanged list of items
11
+ def pipe(items, status)
12
+ page_state = status[:paging_state]
13
+ status[:paging_state] = Base64.urlsafe_encode64(page_state) if page_state
14
+ items
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # A pipe for reading items from a single row in cassandra
3
+ # @param [Symbol] table name
4
+ # @param [Cassava::Client] client to query cassandra with
5
+ # @param [Hash] Additional/default options to pass to the Cassava execute statement.
6
+ class Reader < Struct.new(:table, :client, :options)
7
+ # @param arguments [Hash] Arguments passed to the cassandra client where statement
8
+ # @option arguments [Integer] :limit
9
+ # @option arguments [Array] :order A pair [clustering_column, :desc|:asc] determining how to order the results.
10
+ # @option arguments [Object] :paging_state
11
+ # @option arguments [Integer] :page_size
12
+ # @param status [Hash] The mutable status field
13
+ # @return [Enumerator::Lazy<Hash>] enumerator of items
14
+ def pipe(arguments, status = {})
15
+ limit = arguments.delete(:limit)
16
+ page_size = arguments.delete(:page_size)
17
+ paging_state = arguments.delete(:paging_state)
18
+ order = arguments.delete(:order)
19
+ columns = arguments.delete(:columns)
20
+
21
+ opts = (options || {}).merge({ page_size: page_size, paging_state: paging_state})
22
+
23
+ query = client.select(table, columns).where(arguments)
24
+ query = query.limit(limit) if limit
25
+ query = query.order(order.first, order.last) if order
26
+
27
+ result = query.execute(opts)
28
+
29
+ status[:paging_state] = result.paging_state
30
+ status[:last_page] = result.last_page?
31
+
32
+ result.rows.lazy
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module Pyper::Pipes::Cassandra
2
+ # Writes a set of attributes to a specified cassandra table.
3
+
4
+ # @param table_name [Symbol] The table in which to store the attributes
5
+ # @param client [Cassava::Client] client to query cassandra with
6
+ # @param attribute_filter_set [Set] Optionally, a set of attributes which should be written. If none is provided,
7
+ # all attributes will be written.
8
+ class Writer < Struct.new(:table_name, :client, :attribute_filter_set)
9
+
10
+ # @param args [Hash] Arguments to store in cassandra
11
+ # @param status [Hash] The mutable status field
12
+ # @return [Hash] The original attributes
13
+ def pipe(attributes, status = {})
14
+ attributes_to_write = if attribute_filter_set
15
+ attributes.select { |k,v| attribute_filter_set.member?(k) }
16
+ else
17
+ attributes
18
+ end
19
+
20
+ client.insert(table_name, attributes_to_write)
21
+ attributes
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,8 @@
1
+ require_relative 'cassandra/reader'
2
+ require_relative 'cassandra/writer'
3
+ require_relative 'cassandra/mod_key'
4
+ require_relative 'cassandra/mod_key_reader'
5
+ require_relative 'cassandra/all_items_reader'
6
+ require_relative 'cassandra/deleter'
7
+ require_relative 'cassandra/pagination_decoding'
8
+ require_relative 'cassandra/pagination_encoding'
@@ -0,0 +1,30 @@
1
+ module Pyper::Pipes::Content
2
+ class Fetch
3
+
4
+ attr_reader :storage_field, :storage_strategy_builder
5
+
6
+ # @param storage_field [Symbol] For each item hash, the field in which to insert the content
7
+ # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
8
+ def initialize(storage_field, &storage_strategy_builder)
9
+ @storage_field = storage_field
10
+ @storage_strategy_builder = storage_strategy_builder
11
+ end
12
+
13
+ # @param items [Enumerable<Hash>] A list of items
14
+ # @param status [Hash] The mutable status field
15
+ # @return [Enumerable<Hash>] The items, with the retrieved content inserted in the storage field
16
+ def pipe(items, status = {})
17
+ items.map do |item|
18
+ strategy = storage_strategy_builder.call(item)
19
+
20
+ content =
21
+ begin
22
+ strategy.read
23
+ rescue Errno::ENOENT, StorageStrategy::NotFound
24
+ nil
25
+ end
26
+ item.merge(storage_field => content)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,36 @@
1
+ module Pyper::Pipes::Content
2
+ # A pipe for storing content to an object store. Uses the StorageStrategy gem.
3
+ class Store
4
+
5
+ attr_reader :storage_field, :storage_strategy_builder
6
+
7
+ # @param storage_field [Symbol] The attributes field in which the content is located.
8
+ # @param storage_strategy_builder [Block] A block that takes an item and returns a StorageStrategy.
9
+ def initialize(storage_field, &storage_strategy_builder)
10
+ @storage_field = storage_field
11
+ @storage_strategy_builder = storage_strategy_builder
12
+ end
13
+
14
+ # Stores content using the specified storage strategy
15
+ # @param attributes [Hash] The attributes of the item for which content is to be stored
16
+ # @param status [Hash] The mutable status field
17
+ # @return [Hash] The item attributes, with the storage_field deleted.
18
+ def pipe(attributes, status = {})
19
+ strategy = storage_strategy_builder.call(attributes)
20
+
21
+ content = attributes.delete(storage_field)
22
+
23
+ raise ArgumentError.new("#{storage_field} must be present in ContentStorage") unless content
24
+
25
+ case content
26
+ when NilClass then # do nothing -- there's no content to write
27
+ when String then strategy.write(content)
28
+ else strategy.write_from(content)
29
+ end
30
+
31
+ attributes.merge!(strategy.metadata)
32
+
33
+ attributes
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'content/fetch'
2
+ require_relative 'content/store'
@@ -0,0 +1,15 @@
1
+ module Pyper::Pipes
2
+ # @param default_values [Hash] A hash of default values to set within the provided attrs if they are not already present.
3
+ class DefaultValues < Struct.new(:default_values)
4
+
5
+ # @param attrs [Hash] The attributes of the item
6
+ # @param status [Hash] The mutable status field
7
+ # @return [Hash] The item attributes with default values inserted
8
+ def pipe(attrs, status = {})
9
+ default_values.each do |field, value|
10
+ attrs[field] = value unless attrs[field]
11
+ end
12
+ attrs
13
+ end
14
+ end
15
+ end