petl 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c4aef860e78445984a2fa4c4c32216a699abd257
4
+ data.tar.gz: 8fb4ea2f027fa88819d368c1cfe5e4c9d3a63458
5
+ SHA512:
6
+ metadata.gz: 19567edbfd6e82b3cd1bb7626317820fdc91611b23021837f28d2b5a4bcd64486287cca1c225a98768126d6b2ca500d5064660258e163c0fc5b7f17b1e720c0b
7
+ data.tar.gz: 70019a5f685728ce2651df6929ecf01eb3058096fb40d20102a772e6e959e0f6c9627b2de126b9ced1c00303a9b84f958f4f1385de43259901d99706317faf8b
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in petl.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 ZestFinance
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,65 @@
1
+ # petl [![Build Status](https://travis-ci.org/ZestFinance/petl.png?branch=master)](https://travis-ci.org/ZestFinance/petl) [![Code Climate](https://codeclimate.com/github/ZestFinance/petl.png)](https://codeclimate.com/github/ZestFinance/petl)
2
+
3
+ Pretty good ETL framework
4
+
5
+ ## Features
6
+ 1. Batching support
7
+ 2. Automatic validity check
8
+ 3. Logging of running times
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'petl'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install petl
23
+
24
+ ## Usage
25
+
26
+ ```ruby
27
+ require 'petl'
28
+
29
+ module ETL::Example
30
+ extend Petl::ETL
31
+ extend self
32
+
33
+ def extract
34
+ # Grab all data from source(s) here.
35
+ # Perferrably return an array of hashes.
36
+ end
37
+
38
+ def transform rows
39
+ # Manipulate the data extracted by the previous extract method.
40
+ end
41
+
42
+ def load rows
43
+ # Load the transformed data here into the destination(s).
44
+ end
45
+
46
+ def source_count
47
+ # Count the number of records from your source(s).
48
+ end
49
+
50
+ def destination_count
51
+ # Same as #source_count but with your destination(s).
52
+ end
53
+ end
54
+
55
+ # Run it!
56
+ ETL::Example.perform
57
+ ```
58
+
59
+ ## Contributing
60
+
61
+ 1. Fork it
62
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
63
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
64
+ 4. Push to the branch (`git push origin my-new-feature`)
65
+ 5. Create new Pull Request
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec) do |t|
4
+ t.rspec_opts = '--color'
5
+ t.pattern = 'spec/**/*_spec.rb'
6
+ end
7
+
8
+ task default: :spec
@@ -0,0 +1,2 @@
1
+ require "petl/version"
2
+ require "petl/etl"
@@ -0,0 +1,71 @@
1
+ module Petl
2
+ module ETL
3
+ def perform logger = Rails.logger
4
+ if batch
5
+ batch_perform logger
6
+ else
7
+ not_batch_perform logger
8
+ end
9
+ end
10
+
11
+ def extract
12
+ raise NotImplementedError.new "#{self}#extract not implemented."
13
+ end
14
+
15
+ def transform
16
+ raise NotImplementedError.new "#{self}#transform not implemented."
17
+ end
18
+
19
+ def load
20
+ raise NotImplementedError.new "#{self}#load not implemented."
21
+ end
22
+
23
+ def source_count
24
+ raise NotImplementedError.new "#{self}#source_count not implemented."
25
+ end
26
+
27
+ def destination_count
28
+ raise NotImplementedError.new "#{self}#destination_count not implemented."
29
+ end
30
+
31
+ def verify logger = Rails.logger
32
+ if source_count != destination_count
33
+ logger.error "#{self}: counts don't match"
34
+ end
35
+
36
+ logger.info "#{self}: source count #{source_count}"
37
+ logger.info "#{self}: destination count #{destination_count}"
38
+ end
39
+
40
+ def batch
41
+ false
42
+ end
43
+
44
+ private
45
+
46
+ def not_batch_perform logger = Rails.logger
47
+ with_profiling logger do
48
+ load(transform(extract))
49
+ verify logger
50
+ end
51
+ end
52
+
53
+ def batch_perform logger = Rails.logger
54
+ with_profiling logger do
55
+ extract do |batch|
56
+ load(transform batch)
57
+ end
58
+ verify logger
59
+ end
60
+ end
61
+
62
+ def with_profiling logger, &block
63
+ start_time = Time.now
64
+ logger.info "#{self} starting at #{start_time}"
65
+
66
+ yield block
67
+
68
+ logger.info "#{self} finished at #{Time.now}. Took #{(Time.now - start_time).round} seconds"
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,3 @@
1
+ module Petl
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'petl/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "petl"
8
+ spec.version = Petl::VERSION
9
+ spec.authors = ["Alexander Tamoykin", "Chris Rosario"]
10
+ spec.email = ["at@zestfinance.com", "car@zestfinance.com"]
11
+ spec.description = %q{Pretty good ETL framework}
12
+ spec.summary = %q{Pretty good ETL framework}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ end
@@ -0,0 +1,116 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Petl::ETL" do
4
+
5
+ module TestETL
6
+ extend Petl::ETL
7
+ extend self
8
+
9
+ def source_count
10
+ 1
11
+ end
12
+
13
+ def destination_count
14
+ 1
15
+ end
16
+
17
+ def extract
18
+ []
19
+ end
20
+
21
+ def transform data
22
+ data
23
+ end
24
+
25
+ def load data
26
+ end
27
+ end
28
+
29
+ describe TestETL do
30
+
31
+ describe '#verify' do
32
+ before do
33
+ @logger = double
34
+ end
35
+
36
+ context "counts don't match" do
37
+ before do
38
+ TestETL.stub(:source_count).and_return(1)
39
+ TestETL.stub(:destination_count).and_return(2)
40
+ end
41
+
42
+ it "logs the error" do
43
+ @logger.should_receive(:error).with(/counts don't match/i).once
44
+ @logger.should_receive(:info).twice
45
+ described_class.verify @logger
46
+ end
47
+ end
48
+
49
+ it "logs the source count" do
50
+ @logger.should_receive(:info).with(/source count\s*1/).once
51
+ @logger.should_receive(:info).once
52
+ described_class.verify @logger
53
+ end
54
+
55
+ it "logs the destination count" do
56
+ @logger.should_receive(:info).once
57
+ @logger.should_receive(:info).with(/destination count\s*1/).once
58
+ described_class.verify @logger
59
+ end
60
+ end
61
+
62
+ context '#perform' do
63
+ before do
64
+ @logger = double
65
+ end
66
+
67
+ context "with batching" do
68
+ module BatchETL
69
+ extend Petl::ETL
70
+ extend self
71
+
72
+ def batch
73
+ true
74
+ end
75
+
76
+ def source_count
77
+ 1
78
+ end
79
+
80
+ def destination_count
81
+ 1
82
+ end
83
+
84
+ def extract &block
85
+ yield []
86
+ end
87
+
88
+ def transform data
89
+ data
90
+ end
91
+
92
+ def load data
93
+ end
94
+ end
95
+
96
+ before do
97
+ @logger.stub(:info)
98
+ end
99
+
100
+ it 'yields to extract' do
101
+ BatchETL.should_receive(:extract).and_yield []
102
+ BatchETL.perform @logger
103
+ end
104
+ end
105
+
106
+ it 'logs the start time, end time and time elapsed' do
107
+ @logger.should_receive(:info).with(/starting/i).once
108
+ @logger.should_receive(:info).once
109
+ @logger.should_receive(:info).once
110
+ @logger.should_receive(:info).with(/finished.*took/i).once
111
+
112
+ described_class.perform @logger
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,10 @@
1
+ $:.unshift File.expand_path '../../lib', __FILE__
2
+ Bundler.require
3
+ require 'petl'
4
+
5
+ RSpec.configure do |config|
6
+ config.treat_symbols_as_metadata_keys_with_true_values = true
7
+ config.run_all_when_everything_filtered = true
8
+ config.filter_run :focus
9
+ config.order = 'random'
10
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: petl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Tamoykin
8
+ - Chris Rosario
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-10-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ~>
19
+ - !ruby/object:Gem::Version
20
+ version: '1.3'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ version: '1.3'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: Pretty good ETL framework
57
+ email:
58
+ - at@zestfinance.com
59
+ - car@zestfinance.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - .gitignore
65
+ - .rspec
66
+ - .travis.yml
67
+ - Gemfile
68
+ - LICENSE
69
+ - README.md
70
+ - Rakefile
71
+ - lib/petl.rb
72
+ - lib/petl/etl.rb
73
+ - lib/petl/version.rb
74
+ - petl.gemspec
75
+ - spec/lib/petl/etl_spec.rb
76
+ - spec/spec_helper.rb
77
+ homepage: ''
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.0.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Pretty good ETL framework
101
+ test_files:
102
+ - spec/lib/petl/etl_spec.rb
103
+ - spec/spec_helper.rb