petl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c4aef860e78445984a2fa4c4c32216a699abd257
4
+ data.tar.gz: 8fb4ea2f027fa88819d368c1cfe5e4c9d3a63458
5
+ SHA512:
6
+ metadata.gz: 19567edbfd6e82b3cd1bb7626317820fdc91611b23021837f28d2b5a4bcd64486287cca1c225a98768126d6b2ca500d5064660258e163c0fc5b7f17b1e720c0b
7
+ data.tar.gz: 70019a5f685728ce2651df6929ecf01eb3058096fb40d20102a772e6e959e0f6c9627b2de126b9ced1c00303a9b84f958f4f1385de43259901d99706317faf8b
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in petl.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 ZestFinance
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,65 @@
1
+ # petl [![Build Status](https://travis-ci.org/ZestFinance/petl.png?branch=master)](https://travis-ci.org/ZestFinance/petl) [![Code Climate](https://codeclimate.com/github/ZestFinance/petl.png)](https://codeclimate.com/github/ZestFinance/petl)
2
+
3
+ Pretty good ETL framework
4
+
5
+ ## Features
6
+ 1. Batching support
7
+ 2. Automatic validity check
8
+ 3. Logging of running times
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'petl'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install petl
23
+
24
+ ## Usage
25
+
26
+ ```ruby
27
+ require 'petl'
28
+
29
+ module ETL::Example
30
+ extend Petl::ETL
31
+ extend self
32
+
33
+ def extract
34
+ # Grab all data from source(s) here.
35
+ # Perferrably return an array of hashes.
36
+ end
37
+
38
+ def transform rows
39
+ # Manipulate the data extracted by the previous extract method.
40
+ end
41
+
42
+ def load rows
43
+ # Load the transformed data here into the destination(s).
44
+ end
45
+
46
+ def source_count
47
+ # Count the number of records from your source(s).
48
+ end
49
+
50
+ def destination_count
51
+ # Same as #source_count but with your destination(s).
52
+ end
53
+ end
54
+
55
+ # Run it!
56
+ ETL::Example.perform
57
+ ```
58
+
59
+ ## Contributing
60
+
61
+ 1. Fork it
62
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
63
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
64
+ 4. Push to the branch (`git push origin my-new-feature`)
65
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec) do |t|
4
+ t.rspec_opts = '--color'
5
+ t.pattern = 'spec/**/*_spec.rb'
6
+ end
7
+
8
+ task default: :spec
@@ -0,0 +1,2 @@
1
+ require "petl/version"
2
+ require "petl/etl"
@@ -0,0 +1,71 @@
1
+ module Petl
2
+ module ETL
3
+ def perform logger = Rails.logger
4
+ if batch
5
+ batch_perform logger
6
+ else
7
+ not_batch_perform logger
8
+ end
9
+ end
10
+
11
+ def extract
12
+ raise NotImplementedError.new "#{self}#extract not implemented."
13
+ end
14
+
15
+ def transform
16
+ raise NotImplementedError.new "#{self}#transform not implemented."
17
+ end
18
+
19
+ def load
20
+ raise NotImplementedError.new "#{self}#load not implemented."
21
+ end
22
+
23
+ def source_count
24
+ raise NotImplementedError.new "#{self}#source_count not implemented."
25
+ end
26
+
27
+ def destination_count
28
+ raise NotImplementedError.new "#{self}#destination_count not implemented."
29
+ end
30
+
31
+ def verify logger = Rails.logger
32
+ if source_count != destination_count
33
+ logger.error "#{self}: counts don't match"
34
+ end
35
+
36
+ logger.info "#{self}: source count #{source_count}"
37
+ logger.info "#{self}: destination count #{destination_count}"
38
+ end
39
+
40
+ def batch
41
+ false
42
+ end
43
+
44
+ private
45
+
46
+ def not_batch_perform logger = Rails.logger
47
+ with_profiling logger do
48
+ load(transform(extract))
49
+ verify logger
50
+ end
51
+ end
52
+
53
+ def batch_perform logger = Rails.logger
54
+ with_profiling logger do
55
+ extract do |batch|
56
+ load(transform batch)
57
+ end
58
+ verify logger
59
+ end
60
+ end
61
+
62
+ def with_profiling logger, &block
63
+ start_time = Time.now
64
+ logger.info "#{self} starting at #{start_time}"
65
+
66
+ yield block
67
+
68
+ logger.info "#{self} finished at #{Time.now}. Took #{(Time.now - start_time).round} seconds"
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,3 @@
1
+ module Petl
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'petl/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "petl"
8
+ spec.version = Petl::VERSION
9
+ spec.authors = ["Alexander Tamoykin", "Chris Rosario"]
10
+ spec.email = ["at@zestfinance.com", "car@zestfinance.com"]
11
+ spec.description = %q{Pretty good ETL framework}
12
+ spec.summary = %q{Pretty good ETL framework}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ end
@@ -0,0 +1,116 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Petl::ETL" do
4
+
5
+ module TestETL
6
+ extend Petl::ETL
7
+ extend self
8
+
9
+ def source_count
10
+ 1
11
+ end
12
+
13
+ def destination_count
14
+ 1
15
+ end
16
+
17
+ def extract
18
+ []
19
+ end
20
+
21
+ def transform data
22
+ data
23
+ end
24
+
25
+ def load data
26
+ end
27
+ end
28
+
29
+ describe TestETL do
30
+
31
+ describe '#verify' do
32
+ before do
33
+ @logger = double
34
+ end
35
+
36
+ context "counts don't match" do
37
+ before do
38
+ TestETL.stub(:source_count).and_return(1)
39
+ TestETL.stub(:destination_count).and_return(2)
40
+ end
41
+
42
+ it "logs the error" do
43
+ @logger.should_receive(:error).with(/counts don't match/i).once
44
+ @logger.should_receive(:info).twice
45
+ described_class.verify @logger
46
+ end
47
+ end
48
+
49
+ it "logs the source count" do
50
+ @logger.should_receive(:info).with(/source count\s*1/).once
51
+ @logger.should_receive(:info).once
52
+ described_class.verify @logger
53
+ end
54
+
55
+ it "logs the destination count" do
56
+ @logger.should_receive(:info).once
57
+ @logger.should_receive(:info).with(/destination count\s*1/).once
58
+ described_class.verify @logger
59
+ end
60
+ end
61
+
62
+ context '#perform' do
63
+ before do
64
+ @logger = double
65
+ end
66
+
67
+ context "with batching" do
68
+ module BatchETL
69
+ extend Petl::ETL
70
+ extend self
71
+
72
+ def batch
73
+ true
74
+ end
75
+
76
+ def source_count
77
+ 1
78
+ end
79
+
80
+ def destination_count
81
+ 1
82
+ end
83
+
84
+ def extract &block
85
+ yield []
86
+ end
87
+
88
+ def transform data
89
+ data
90
+ end
91
+
92
+ def load data
93
+ end
94
+ end
95
+
96
+ before do
97
+ @logger.stub(:info)
98
+ end
99
+
100
+ it 'yields to extract' do
101
+ BatchETL.should_receive(:extract).and_yield []
102
+ BatchETL.perform @logger
103
+ end
104
+ end
105
+
106
+ it 'logs the start time, end time and time elapsed' do
107
+ @logger.should_receive(:info).with(/starting/i).once
108
+ @logger.should_receive(:info).once
109
+ @logger.should_receive(:info).once
110
+ @logger.should_receive(:info).with(/finished.*took/i).once
111
+
112
+ described_class.perform @logger
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,10 @@
1
+ $:.unshift File.expand_path '../../lib', __FILE__
2
+ Bundler.require
3
+ require 'petl'
4
+
5
+ RSpec.configure do |config|
6
+ config.treat_symbols_as_metadata_keys_with_true_values = true
7
+ config.run_all_when_everything_filtered = true
8
+ config.filter_run :focus
9
+ config.order = 'random'
10
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: petl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Tamoykin
8
+ - Chris Rosario
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ~>
19
+ - !ruby/object:Gem::Version
20
+ version: '1.3'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ version: '1.3'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: Pretty good ETL framework
57
+ email:
58
+ - at@zestfinance.com
59
+ - car@zestfinance.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - .gitignore
65
+ - .rspec
66
+ - .travis.yml
67
+ - Gemfile
68
+ - LICENSE
69
+ - README.md
70
+ - Rakefile
71
+ - lib/petl.rb
72
+ - lib/petl/etl.rb
73
+ - lib/petl/version.rb
74
+ - petl.gemspec
75
+ - spec/lib/petl/etl_spec.rb
76
+ - spec/spec_helper.rb
77
+ homepage: ''
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.0.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Pretty good ETL framework
101
+ test_files:
102
+ - spec/lib/petl/etl_spec.rb
103
+ - spec/spec_helper.rb