reindeer-etl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ module ReindeerETL::Transforms
2
+ module SimpleTransforms
3
+ def st_initialize opts={}
4
+ @only_cols = (opts.delete(:only) || []).to_set
5
+ @require_cols = (opts.delete(:require) || []).to_set
6
+ end
7
+
8
+ ##
9
+ # Configurable transforms in source
10
+ def simple_transforms row
11
+ st_only_cols(row) unless @only_cols.empty?
12
+ st_require_cols(row) unless @require_cols.empty?
13
+ row
14
+ end
15
+
16
+ ##
17
+ # Filter out everything except these columns
18
+ def st_only_cols dict
19
+ (dict.keys.to_set - @only_cols).each{|col|dict.delete(col)}
20
+ dict
21
+ end
22
+
23
+ ##
24
+ # require these columns
25
+ def st_require_cols dict
26
+ dcols = dict.keys.to_set
27
+ unless @require_cols.subset? dict.keys.to_set
28
+ missing_cols = (@require_cols - dcols).to_a
29
+ raise ReindeerETL::Errors::RecordInvalid.new("Missing required columns: #{missing_cols}")
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module ReindeerETL
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'reindeer-etl/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "reindeer-etl"
8
+ spec.version = ReindeerETL::VERSION
9
+ spec.authors = ["William Hatt", "Patrick Chung"]
10
+ spec.email = ["hattb@ohsu.edu", "chungp@ohsu.edu"]
11
+
12
+ spec.summary = %q{A simple ETL pipeline for use with project reindeer and LimeSurvey}
13
+ spec.description = %q{An ETL pipeline tool for automatic data modifications to LimeSurvey}
14
+ spec.homepage = "https://github.com/OHSU-FM/reindeer-etl"
15
+ spec.license = "GPL"
16
+
17
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
18
+ # delete this section to allow pushing this gem to any host.
19
+ #if spec.respond_to?(:metadata)
20
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
21
+ #else
22
+ # raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
23
+ #end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ spec.test_files = Dir.glob("test/**/*.rb")
27
+ spec.bindir = "exe"
28
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_dependency "rest-client", "~> 1.8"
32
+ spec.add_development_dependency "minitest", "~> 5.7"
33
+ spec.add_development_dependency "bundler", "~> 1.9"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "pry", "~> 0.10.1"
36
+
37
+ end
File without changes
@@ -0,0 +1,17 @@
1
+ require 'minitest_helper'
2
+
3
+ describe ReindeerETL::Sources::BaseSource do
4
+ it 'must remove columns when asked' do
5
+ transform = ReindeerETL::Sources::BaseSource.new '', :only=>[:aa, :bb]
6
+ row = {aa: 1, bb: 2, cc: 3}
7
+ transform.simple_transforms row
8
+ row.keys.must_equal [:aa, :bb]
9
+ end
10
+
11
+ it 'must raise error if required fields missing' do
12
+ transform = ReindeerETL::Sources::BaseSource.new '', :require=>[:aa]
13
+ row = {cc: 3}
14
+ assert_raises(ReindeerETL::Errors::RecordInvalid){transform.simple_transforms row}
15
+ end
16
+
17
+ end
@@ -0,0 +1,24 @@
1
+ require 'minitest_helper'
2
+
3
+ def loads_as_hash path, opts={}
4
+ source = ReindeerETL::Sources::CSVSource.new(path, opts)
5
+ counter = 0
6
+ source.each do |row|
7
+ row.keys.must_equal %w{a b c d e f g h i}
8
+ counter += 1
9
+ counter.must_equal row['a'].to_i
10
+ end
11
+ counter.must_equal 13
12
+ end
13
+
14
+ describe ReindeerETL::Sources::CSVSource do
15
+ it 'must yield each line of a csv file as a hash' do
16
+ path = "#{$dir}/fixtures/comma_delimited.csv"
17
+ loads_as_hash(path, :col_sep=>',')
18
+ end
19
+
20
+ it 'must yield each line of a tabbed csv file as a hash' do
21
+ path = "#{$dir}/fixtures/tab_delimited.csv"
22
+ loads_as_hash(path, :col_sep=>"\t")
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ require 'minitest_helper'
2
+
3
+ describe ReindeerETL::Sources::MultiSource do
4
+ it 'must merge data from multiple sources' do
5
+ klass = ReindeerETL::Sources::CSVSource
6
+ path1 = "#{$dir}/fixtures/comma_delimited.csv"
7
+ path2 = "#{$dir}/fixtures/comma_delimited_join_on_a.csv"
8
+ source = ReindeerETL::Sources::MultiSource.new('a', [path1, path2], :klass=>klass)
9
+ rows = []
10
+ keys = %w[a b c d e f g h i j k l]
11
+ source.each do |row|
12
+ rows.push row
13
+ row.keys.must_equal keys
14
+ end
15
+ rows.count.must_equal 13
16
+ end
17
+
18
+ end
19
+
20
+
@@ -0,0 +1,34 @@
1
+ require 'minitest_helper'
2
+
3
+ describe ReindeerETL::Transforms::Recode do
4
+
5
+ it 'must recode values' do
6
+ row = {g: :a}
7
+ transform = ReindeerETL::Transforms::Recode.new :codes=>{:a=>:b}, :cols=>[:g]
8
+ transform.process row
9
+ row[:g].must_equal :b
10
+ end
11
+
12
+ it 'must raise an error if the column does not exist' do
13
+ row = {hhh: :a}
14
+ transform = ReindeerETL::Transforms::Recode.new codes: {a: :b}, cols: [:g]
15
+
16
+ assert_raises(ReindeerETL::Errors::RecordInvalid){
17
+ transform.process row
18
+ }
19
+ end
20
+
21
+ it 'must raise an error if parameters are missing' do
22
+ assert_raises(ArgumentError){
23
+ ReindeerETL::Transforms::Recode.new codes: {}
24
+ }
25
+ assert_raises(ArgumentError){
26
+ ReindeerETL::Transforms::Recode.new cols: {}
27
+ }
28
+ ReindeerETL::Transforms::Recode.new codes: {a: :b}, cols: [:hhh]
29
+ end
30
+
31
+
32
+ end
33
+
34
+
File without changes
@@ -0,0 +1,22 @@
1
+ require 'minitest_helper'
2
+
3
+ describe ReindeerETL::Transforms::RenameFields do
4
+ before do
5
+ cols = {:aaa=>:bbb, :ccc=>:ddd}
6
+ @transform = ReindeerETL::Transforms::RenameFields.new cols
7
+ end
8
+
9
+ it 'must raise an error when the column is not in the row' do
10
+ row = {}
11
+ assert_raises(ReindeerETL::Errors::RecordInvalid){ @transform.process row }
12
+ end
13
+
14
+ it 'must rename fields' do
15
+ row = {:aaa=>555, :ccc=>888}
16
+ row = @transform.process row
17
+ row.keys.include?(:aaa).must_equal false
18
+ row.keys.include?(:bbb).must_equal true
19
+ end
20
+
21
+ end
22
+
@@ -0,0 +1,7 @@
1
+ require 'pry'
2
+ require 'minitest/autorun'
3
+ require 'minitest/unit'
4
+ require 'minitest/pride'
5
+
6
+ require File.expand_path('../../lib/reindeer-etl.rb', __FILE__)
7
+ $dir = File.dirname(File.expand_path(__FILE__))
@@ -0,0 +1,8 @@
1
+ require 'minitest_helper'
2
+
3
+ class TestReindeerETL < Minitest::Test
4
+ def test_that_it_has_a_version_number
5
+ refute_nil ::ReindeerETL::VERSION
6
+ end
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: reindeer-etl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - William Hatt
8
+ - Patrick Chung
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2015-07-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.8'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.8'
28
+ - !ruby/object:Gem::Dependency
29
+ name: minitest
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '5.7'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '5.7'
42
+ - !ruby/object:Gem::Dependency
43
+ name: bundler
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '1.9'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '1.9'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '10.0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '10.0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: pry
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: 0.10.1
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: 0.10.1
84
+ description: An ETL pipeline tool for automatic data modifications to LimeSurvey
85
+ email:
86
+ - hattb@ohsu.edu
87
+ - chungp@ohsu.edu
88
+ executables: []
89
+ extensions: []
90
+ extra_rdoc_files: []
91
+ files:
92
+ - ".gitignore"
93
+ - ".travis.yml"
94
+ - CODE_OF_CONDUCT.md
95
+ - Gemfile
96
+ - License.md
97
+ - README.md
98
+ - Rakefile
99
+ - bin/console
100
+ - bin/setup
101
+ - etl/.gitignore
102
+ - lib/reindeer-etl.rb
103
+ - lib/reindeer-etl/destinations/csv_dest.rb
104
+ - lib/reindeer-etl/errors.rb
105
+ - lib/reindeer-etl/sources/base_source.rb
106
+ - lib/reindeer-etl/sources/csv_source.rb
107
+ - lib/reindeer-etl/sources/multi_source.rb
108
+ - lib/reindeer-etl/transforms/recode.rb
109
+ - lib/reindeer-etl/transforms/rename_fields.rb
110
+ - lib/reindeer-etl/transforms/response_status.rb
111
+ - lib/reindeer-etl/transforms/simple_transforms.rb
112
+ - lib/reindeer-etl/version.rb
113
+ - reindeer-etl.gemspec
114
+ - test/lib/destinations_csv_dest_test.rb
115
+ - test/lib/sources_base_source_test.rb
116
+ - test/lib/sources_csv_source_test.rb
117
+ - test/lib/sources_multi_source_test.rb
118
+ - test/lib/transforms_recode_test.rb
119
+ - test/lib/transforms_rename_fields_test.rb
120
+ - test/lib/transforms_response_status_test.rb
121
+ - test/minitest_helper.rb
122
+ - test/reindeer_waterworks_test.rb
123
+ homepage: https://github.com/OHSU-FM/reindeer-etl
124
+ licenses:
125
+ - GPL
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.4.6
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: A simple ETL pipeline for use with project reindeer and LimeSurvey
147
+ test_files:
148
+ - test/lib/transforms_recode_test.rb
149
+ - test/lib/transforms_response_status_test.rb
150
+ - test/lib/sources_csv_source_test.rb
151
+ - test/lib/transforms_rename_fields_test.rb
152
+ - test/lib/sources_multi_source_test.rb
153
+ - test/lib/sources_base_source_test.rb
154
+ - test/lib/destinations_csv_dest_test.rb
155
+ - test/reindeer_waterworks_test.rb
156
+ - test/minitest_helper.rb