reindeer-etl 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/License.md +596 -0
- data/README.md +112 -0
- data/Rakefile +13 -0
- data/bin/console +15 -0
- data/bin/setup +7 -0
- data/etl/.gitignore +2 -0
- data/lib/reindeer-etl.rb +31 -0
- data/lib/reindeer-etl/destinations/csv_dest.rb +21 -0
- data/lib/reindeer-etl/errors.rb +3 -0
- data/lib/reindeer-etl/sources/base_source.rb +13 -0
- data/lib/reindeer-etl/sources/csv_source.rb +25 -0
- data/lib/reindeer-etl/sources/multi_source.rb +51 -0
- data/lib/reindeer-etl/transforms/recode.rb +61 -0
- data/lib/reindeer-etl/transforms/rename_fields.rb +26 -0
- data/lib/reindeer-etl/transforms/response_status.rb +46 -0
- data/lib/reindeer-etl/transforms/simple_transforms.rb +33 -0
- data/lib/reindeer-etl/version.rb +3 -0
- data/reindeer-etl.gemspec +37 -0
- data/test/lib/destinations_csv_dest_test.rb +0 -0
- data/test/lib/sources_base_source_test.rb +17 -0
- data/test/lib/sources_csv_source_test.rb +24 -0
- data/test/lib/sources_multi_source_test.rb +20 -0
- data/test/lib/transforms_recode_test.rb +34 -0
- data/test/lib/transforms_rename_fields_test.rb +0 -0
- data/test/lib/transforms_response_status_test.rb +22 -0
- data/test/minitest_helper.rb +7 -0
- data/test/reindeer_waterworks_test.rb +8 -0
- metadata +156 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
module ReindeerETL::Transforms
|
2
|
+
module SimpleTransforms
|
3
|
+
def st_initialize opts={}
|
4
|
+
@only_cols = (opts.delete(:only) || []).to_set
|
5
|
+
@require_cols = (opts.delete(:require) || []).to_set
|
6
|
+
end
|
7
|
+
|
8
|
+
##
|
9
|
+
# Configurable transforms in source
|
10
|
+
def simple_transforms row
|
11
|
+
st_only_cols(row) unless @only_cols.empty?
|
12
|
+
st_require_cols(row) unless @require_cols.empty?
|
13
|
+
row
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Filter out everything except these columns
|
18
|
+
def st_only_cols dict
|
19
|
+
(dict.keys.to_set - @only_cols).each{|col|dict.delete(col)}
|
20
|
+
dict
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# require these columns
|
25
|
+
def st_require_cols dict
|
26
|
+
dcols = dict.keys.to_set
|
27
|
+
unless @require_cols.subset? dict.keys.to_set
|
28
|
+
missing_cols = (@require_cols - dcols).to_a
|
29
|
+
raise ReindeerETL::Errors::RecordInvalid.new("Missing required columns: #{missing_cols}")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'reindeer-etl/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "reindeer-etl"
|
8
|
+
spec.version = ReindeerETL::VERSION
|
9
|
+
spec.authors = ["William Hatt", "Patrick Chung"]
|
10
|
+
spec.email = ["hattb@ohsu.edu", "chungp@ohsu.edu"]
|
11
|
+
|
12
|
+
spec.summary = %q{A simple ETL pipeline for use with project reindeer and LimeSurvey}
|
13
|
+
spec.description = %q{An ETL pipeline tool for automatic data modifications to LimeSurvey}
|
14
|
+
spec.homepage = "https://github.com/OHSU-FM/reindeer-etl"
|
15
|
+
spec.license = "GPL"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
18
|
+
# delete this section to allow pushing this gem to any host.
|
19
|
+
#if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
#else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
23
|
+
#end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
26
|
+
spec.test_files = Dir.glob("test/**/*.rb")
|
27
|
+
spec.bindir = "exe"
|
28
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
29
|
+
spec.require_paths = ["lib"]
|
30
|
+
|
31
|
+
spec.add_dependency "rest-client", "~> 1.8"
|
32
|
+
spec.add_development_dependency "minitest", "~> 5.7"
|
33
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
34
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
35
|
+
spec.add_development_dependency "pry", "~> 0.10.1"
|
36
|
+
|
37
|
+
end
|
File without changes
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
describe ReindeerETL::Sources::BaseSource do
|
4
|
+
it 'must remove columns when asked' do
|
5
|
+
transform = ReindeerETL::Sources::BaseSource.new '', :only=>[:aa, :bb]
|
6
|
+
row = {aa: 1, bb: 2, cc: 3}
|
7
|
+
transform.simple_transforms row
|
8
|
+
row.keys.must_equal [:aa, :bb]
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'must raise error if required fields missing' do
|
12
|
+
transform = ReindeerETL::Sources::BaseSource.new '', :require=>[:aa]
|
13
|
+
row = {cc: 3}
|
14
|
+
assert_raises(ReindeerETL::Errors::RecordInvalid){transform.simple_transforms row}
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
def loads_as_hash path, opts={}
|
4
|
+
source = ReindeerETL::Sources::CSVSource.new(path, opts)
|
5
|
+
counter = 0
|
6
|
+
source.each do |row|
|
7
|
+
row.keys.must_equal %w{a b c d e f g h i}
|
8
|
+
counter += 1
|
9
|
+
counter.must_equal row['a'].to_i
|
10
|
+
end
|
11
|
+
counter.must_equal 13
|
12
|
+
end
|
13
|
+
|
14
|
+
describe ReindeerETL::Sources::CSVSource do
|
15
|
+
it 'must yield each line of a csv file as a hash' do
|
16
|
+
path = "#{$dir}/fixtures/comma_delimited.csv"
|
17
|
+
loads_as_hash(path, :col_sep=>',')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'must yield each line of a tabbed csv file as a hash' do
|
21
|
+
path = "#{$dir}/fixtures/tab_delimited.csv"
|
22
|
+
loads_as_hash(path, :col_sep=>"\t")
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
describe ReindeerETL::Sources::MultiSource do
|
4
|
+
it 'must merge data from multiple sources' do
|
5
|
+
klass = ReindeerETL::Sources::CSVSource
|
6
|
+
path1 = "#{$dir}/fixtures/comma_delimited.csv"
|
7
|
+
path2 = "#{$dir}/fixtures/comma_delimited_join_on_a.csv"
|
8
|
+
source = ReindeerETL::Sources::MultiSource.new('a', [path1, path2], :klass=>klass)
|
9
|
+
rows = []
|
10
|
+
keys = %w[a b c d e f g h i j k l]
|
11
|
+
source.each do |row|
|
12
|
+
rows.push row
|
13
|
+
row.keys.must_equal keys
|
14
|
+
end
|
15
|
+
rows.count.must_equal 13
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
describe ReindeerETL::Transforms::Recode do
|
4
|
+
|
5
|
+
it 'must recode values' do
|
6
|
+
row = {g: :a}
|
7
|
+
transform = ReindeerETL::Transforms::Recode.new :codes=>{:a=>:b}, :cols=>[:g]
|
8
|
+
transform.process row
|
9
|
+
row[:g].must_equal :b
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'must raise an error if the column does not exist' do
|
13
|
+
row = {hhh: :a}
|
14
|
+
transform = ReindeerETL::Transforms::Recode.new codes: {a: :b}, cols: [:g]
|
15
|
+
|
16
|
+
assert_raises(ReindeerETL::Errors::RecordInvalid){
|
17
|
+
transform.process row
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'must raise an error if parameters are missing' do
|
22
|
+
assert_raises(ArgumentError){
|
23
|
+
ReindeerETL::Transforms::Recode.new codes: {}
|
24
|
+
}
|
25
|
+
assert_raises(ArgumentError){
|
26
|
+
ReindeerETL::Transforms::Recode.new cols: {}
|
27
|
+
}
|
28
|
+
ReindeerETL::Transforms::Recode.new codes: {a: :b}, cols: [:hhh]
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
describe ReindeerETL::Transforms::RenameFields do
|
4
|
+
before do
|
5
|
+
cols = {:aaa=>:bbb, :ccc=>:ddd}
|
6
|
+
@transform = ReindeerETL::Transforms::RenameFields.new cols
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'must raise an error when the column is not in the row' do
|
10
|
+
row = {}
|
11
|
+
assert_raises(ReindeerETL::Errors::RecordInvalid){ @transform.process row }
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'must rename fields' do
|
15
|
+
row = {:aaa=>555, :ccc=>888}
|
16
|
+
row = @transform.process row
|
17
|
+
row.keys.include?(:aaa).must_equal false
|
18
|
+
row.keys.include?(:bbb).must_equal true
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
metadata
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: reindeer-etl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- William Hatt
|
8
|
+
- Patrick Chung
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-07-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.8'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.8'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: minitest
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '5.7'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '5.7'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '1.9'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '1.9'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '10.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '10.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: pry
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 0.10.1
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 0.10.1
|
84
|
+
description: An ETL pipeline tool for automatic data modifications to LimeSurvey
|
85
|
+
email:
|
86
|
+
- hattb@ohsu.edu
|
87
|
+
- chungp@ohsu.edu
|
88
|
+
executables: []
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- ".gitignore"
|
93
|
+
- ".travis.yml"
|
94
|
+
- CODE_OF_CONDUCT.md
|
95
|
+
- Gemfile
|
96
|
+
- License.md
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/setup
|
101
|
+
- etl/.gitignore
|
102
|
+
- lib/reindeer-etl.rb
|
103
|
+
- lib/reindeer-etl/destinations/csv_dest.rb
|
104
|
+
- lib/reindeer-etl/errors.rb
|
105
|
+
- lib/reindeer-etl/sources/base_source.rb
|
106
|
+
- lib/reindeer-etl/sources/csv_source.rb
|
107
|
+
- lib/reindeer-etl/sources/multi_source.rb
|
108
|
+
- lib/reindeer-etl/transforms/recode.rb
|
109
|
+
- lib/reindeer-etl/transforms/rename_fields.rb
|
110
|
+
- lib/reindeer-etl/transforms/response_status.rb
|
111
|
+
- lib/reindeer-etl/transforms/simple_transforms.rb
|
112
|
+
- lib/reindeer-etl/version.rb
|
113
|
+
- reindeer-etl.gemspec
|
114
|
+
- test/lib/destinations_csv_dest_test.rb
|
115
|
+
- test/lib/sources_base_source_test.rb
|
116
|
+
- test/lib/sources_csv_source_test.rb
|
117
|
+
- test/lib/sources_multi_source_test.rb
|
118
|
+
- test/lib/transforms_recode_test.rb
|
119
|
+
- test/lib/transforms_rename_fields_test.rb
|
120
|
+
- test/lib/transforms_response_status_test.rb
|
121
|
+
- test/minitest_helper.rb
|
122
|
+
- test/reindeer_waterworks_test.rb
|
123
|
+
homepage: https://github.com/OHSU-FM/reindeer-etl
|
124
|
+
licenses:
|
125
|
+
- GPL
|
126
|
+
metadata: {}
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 2.4.6
|
144
|
+
signing_key:
|
145
|
+
specification_version: 4
|
146
|
+
summary: A simple ETL pipeline for use with project reindeer and LimeSurvey
|
147
|
+
test_files:
|
148
|
+
- test/lib/transforms_recode_test.rb
|
149
|
+
- test/lib/transforms_response_status_test.rb
|
150
|
+
- test/lib/sources_csv_source_test.rb
|
151
|
+
- test/lib/transforms_rename_fields_test.rb
|
152
|
+
- test/lib/sources_multi_source_test.rb
|
153
|
+
- test/lib/sources_base_source_test.rb
|
154
|
+
- test/lib/destinations_csv_dest_test.rb
|
155
|
+
- test/reindeer_waterworks_test.rb
|
156
|
+
- test/minitest_helper.rb
|