setl 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5a69608ef7938b0e490055db9bc3cf5d1924f9e0
4
+ data.tar.gz: 66b1a67765d78496ab1791d65a4c5f699d4b178f
5
+ SHA512:
6
+ metadata.gz: 5ff6e5d5cce17d475f08d5ad2965dc1c22d75343b4d4f04ceee9be83a15e0957a5b75a3de58f6c9c9ed374dfcb3e3a703eacd4be872549bb20a5b5393fd286f7
7
+ data.tar.gz: d3123a6e871b3b6ff720ff0fbd271cdbde7b4b14032317c58240a39d4bad09f6cb710a8b0942c800095852a94dd03817627eca39c9991de446386b6ea8231410
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.2.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in setl.gemspec
4
+ gemspec
5
+
6
+ gem 'byebug', require: false
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Leonard Garvey
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Setl
2
+
3
+ A simple framework for writing testable ETL systems. Takes concepts from Kiba but reorganises them to use simple objects, composition and dependency injection. This means your ETL stays simple, reliable and testable.
4
+
5
+ DSLs are cool, but composable, testable objects are cooler.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'setl'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install setl
22
+
23
+ ## Usage
24
+
25
+ 1. Define an object that is responsible for sourcing the data. Must respond to `#each` and `yield` to the provided block.
26
+ 2. Define your transformations. These are simple objects that will receive one of the things provided by the source object.
27
+ 3. Provide your transforms to a controller object. Setl provides `Setl::Controller` and you can simply add your transforms into this object. Can actually be any object that responds to `#call`
28
+ 4. Define a destination object. Must respond to `#call` and sends output of each transformed row of data to a destination.
29
+
30
+ ```ruby
31
+ source = [{id: 1, name: 'foo'},{id: 2, name: 'bar'}]
32
+ destination = lambda {|d| puts d.inspect }
33
+
34
+ transform = lambda do |d|
35
+ d[:name].upcase!
36
+ d
37
+ end
38
+
39
+ Setl::ETL.new(source, destination).process(transform)
40
+ #=> {id: 1, name: 'FOO'}
41
+ #=> {id: 2, name: 'BAR'}
42
+ ```
43
+
44
+ See the examples folder for some more extensive, and realistic, implementations.
45
+
46
+ ## Contributing
47
+
48
+ 1. Fork it ( https://github.com/[my-github-username]/setl/fork )
49
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
50
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
51
+ 4. Push to the branch (`git push origin my-new-feature`)
52
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/rake ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rake' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rake', 'rake')
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
@@ -0,0 +1,16 @@
1
+ module Setl
2
+ class Controller
3
+ def initialize(*pipeline)
4
+ @pipeline = pipeline
5
+ end
6
+
7
+ def call(row)
8
+ pipeline.each { |t| row = t.(row) }
9
+ row
10
+ end
11
+
12
+ private
13
+
14
+ attr_reader :pipeline
15
+ end
16
+ end
data/lib/setl/etl.rb ADDED
@@ -0,0 +1,45 @@
1
+ module Setl
2
+ class ETL
3
+ def initialize(source, destination, stop_on_errors: false, error_handler: nil)
4
+ @source = source
5
+ @destination = destination
6
+ @stop_on_errors = stop_on_errors
7
+ @error_handler = error_handler || DefaultHandler.new(stop_on_errors)
8
+ end
9
+
10
+ def process(transform)
11
+ source.each do |row|
12
+ row_copy = row.dup
13
+
14
+ begin
15
+ destination.(transform.(row_copy))
16
+ rescue StandardError => e
17
+ error_handler.(row, e)
18
+ end
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :source, :destination, :stop_on_errors, :error_handler
25
+ end
26
+
27
+ class ProcessingError < StandardError
28
+ end
29
+
30
+ class DefaultHandler
31
+ def initialize(reraise)
32
+ @reraise = reraise
33
+ end
34
+
35
+ def call(row, exception)
36
+ raise ProcessingError, "Failed to process #{row}" if reraise?
37
+ end
38
+
39
+ private
40
+
41
+ def reraise?
42
+ @reraise
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module Setl
2
+ VERSION = "0.0.2"
3
+ end
data/lib/setl.rb ADDED
@@ -0,0 +1,3 @@
1
+ require "setl/version"
2
+ require "setl/etl"
3
+ require "setl/controller"
data/setl.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'setl/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "setl"
8
+ spec.version = Setl::VERSION
9
+ spec.authors = ["Leonard Garvey"]
10
+ spec.email = ["lengarvey@gmail.com"]
11
+ spec.summary = %q{Simple Extract Transform & Load - setl}
12
+ spec.description = %q{Can you setl for a tool that barely provides anything?}
13
+ spec.homepage = "https://github.com/lengarvey/setl"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject {|f| f.match /examples/}
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ spec.add_development_dependency "rspec", "~> 3.0"
23
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+ require 'setl/controller'
3
+
4
+ module Setl
5
+ RSpec.describe 'controller' do
6
+ let(:controller) { Controller.new(transform1, transform2) }
7
+ let(:transform1) { double('Transform1', call: 'output') }
8
+ let(:transform2) { double('Transform2', call: 'final result') }
9
+ let(:row) { double('Row') }
10
+
11
+ describe 'processing a row of data' do
12
+ it 'creates a pipeline of transforms' do
13
+ expect(transform1).to receive(:call).with(row)
14
+ expect(transform2).to receive(:call).with('output')
15
+
16
+ controller.call(row)
17
+ end
18
+
19
+ it 'returns the result of all the transformations' do
20
+ expect(controller.call(row)).to eq 'final result'
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+ require 'setl/etl'
3
+
4
+ module Setl
5
+ RSpec.describe 'error handling' do
6
+ let(:etl) { ETL.new(source, destination) }
7
+ let(:source) { [1, 2] }
8
+ let(:destination) { double('Destination', call: true) }
9
+ let(:transform) do
10
+ Class.new do
11
+ def self.call(row)
12
+ raise 'nope' if row == 1
13
+ end
14
+ end
15
+ end
16
+
17
+ let(:process) { etl.process(transform) }
18
+
19
+ context 'by default' do
20
+ it 'rescues the error' do
21
+ expect { process }.to_not raise_error
22
+ end
23
+
24
+ it 'processes the next row' do
25
+ allow(transform).to receive(:call).with(1)
26
+ expect(transform).to receive(:call).with(2)
27
+ process
28
+ end
29
+ end
30
+
31
+ context 'when configured to stop on errors' do
32
+ let(:etl) { ETL.new(source, destination, stop_on_errors: true) }
33
+
34
+ it 'stops processing when a processing error occurs' do
35
+ expect { process }.to raise_error(ProcessingError, 'Failed to process 1')
36
+ end
37
+
38
+ it 'wraps the original error' do
39
+ begin
40
+ process
41
+ rescue ProcessingError => e
42
+ expect(e.cause).to be_a RuntimeError
43
+ end
44
+ end
45
+ end
46
+
47
+ context 'when provided an error handler' do
48
+ let(:handler) { double('Error Handler') }
49
+ let(:etl) { ETL.new(source, destination, error_handler: handler) }
50
+
51
+ it 'sends the row and exception to the handler' do
52
+ expect(handler).to receive(:call).with(1, an_instance_of(RuntimeError))
53
+
54
+ process
55
+ end
56
+ end
57
+ end
58
+ end
data/spec/etl_spec.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'setl/etl'
3
+
4
+ module Setl
5
+ RSpec.describe 'etl' do
6
+ let(:etl) { ETL.new(source, destination) }
7
+
8
+ describe 'processing a row' do
9
+ let(:row) { 'hello' }
10
+ let(:source) { [row] }
11
+ let(:destination) { double('Destination', call: true) }
12
+ let(:processed_data) { double('Processed row') }
13
+ let(:transform) { double('Transform', call: processed_data) }
14
+
15
+ before do
16
+ etl.process(transform)
17
+ end
18
+
19
+ it 'delegates the processing to the transform' do
20
+ expect(transform).to have_received(:call).with(row)
21
+ end
22
+
23
+ it 'sends the processed row to the destination' do
24
+ expect(destination).to have_received(:call).with(processed_data)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,96 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # The settings below are suggested to provide a good initial experience
44
+ # with RSpec, but feel free to customize to your heart's content.
45
+ =begin
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Allows RSpec to persist some state between runs in order to support
54
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
55
+ # you configure your source control system to ignore this file.
56
+ config.example_status_persistence_file_path = "spec/examples.txt"
57
+
58
+ # Limits the available syntax to the non-monkey patched syntax that is
59
+ # recommended. For more details, see:
60
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
61
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
62
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
63
+ config.disable_monkey_patching!
64
+
65
+ # This setting enables warnings. It's recommended, but in some cases may
66
+ # be too noisy due to issues in dependencies.
67
+ config.warnings = true
68
+
69
+ # Many RSpec users commonly either run the entire suite or an individual
70
+ # file, and it's useful to allow more verbose output when running an
71
+ # individual spec file.
72
+ if config.files_to_run.one?
73
+ # Use the documentation formatter for detailed output,
74
+ # unless a formatter has already been configured
75
+ # (e.g. via a command-line flag).
76
+ config.default_formatter = 'doc'
77
+ end
78
+
79
+ # Print the 10 slowest examples and example groups at the
80
+ # end of the spec run, to help surface which specs are running
81
+ # particularly slow.
82
+ config.profile_examples = 10
83
+
84
+ # Run specs in random order to surface order dependencies. If you find an
85
+ # order dependency and want to debug it, you can fix the order by providing
86
+ # the seed, which is printed after each run.
87
+ # --seed 1234
88
+ config.order = :random
89
+
90
+ # Seed global randomization in this process using the `--seed` CLI option.
91
+ # Setting this allows you to use `--seed` to deterministically reproduce
92
+ # test failures related to randomization by passing the same `--seed` value
93
+ # as the one that triggered the failure.
94
+ Kernel.srand config.seed
95
+ =end
96
+ end
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: setl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Leonard Garvey
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description: Can you setl for a tool that barely provides anything?
56
+ email:
57
+ - lengarvey@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - bin/rake
70
+ - bin/rspec
71
+ - lib/setl.rb
72
+ - lib/setl/controller.rb
73
+ - lib/setl/etl.rb
74
+ - lib/setl/version.rb
75
+ - setl.gemspec
76
+ - spec/controller_spec.rb
77
+ - spec/error_handling_spec.rb
78
+ - spec/etl_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: https://github.com/lengarvey/setl
81
+ licenses:
82
+ - MIT
83
+ metadata: {}
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.4.5
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Simple Extract Transform & Load - setl
104
+ test_files:
105
+ - spec/controller_spec.rb
106
+ - spec/error_handling_spec.rb
107
+ - spec/etl_spec.rb
108
+ - spec/spec_helper.rb