cranium 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aa44efc27b0fc354b994ea3c8738d5e6aeab2415
|
4
|
+
data.tar.gz: 036dfbc242e5858bad5fd374cc0b1c27bda4b6da
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a90bc3b7ee0cd635b13e9deb325e4bc58fe83889816ac9b7bc02637318f096b953caa01ce44a6314ac67a74d0374beba991548ba46349250f01914e591cf1554
|
7
|
+
data.tar.gz: d95a477491949134ef37a86c165d4fdc19615fa12e53fed042af1bd838788a290c64b9ffa9a59db420b446e46544103315f6a18f337c1d09d2713665a9a2fc7d
|
data/.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
.vagrant
|
7
|
+
.idea
|
8
|
+
Gemfile.lock
|
9
|
+
InstalledFiles
|
10
|
+
_yardoc
|
11
|
+
coverage
|
12
|
+
doc/
|
13
|
+
lib/bundler/man
|
14
|
+
pkg
|
15
|
+
rdoc
|
16
|
+
spec/reports
|
17
|
+
test/tmp
|
18
|
+
test/version_tmp
|
19
|
+
tmp
|
20
|
+
atlassian-ide-plugin.xml
|
21
|
+
log/
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Zoltan Ormandi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Cranium
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'cranium'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install cranium
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/Vagrantfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
# vi: set ft=ruby :
|
3
|
+
|
4
|
+
FileUtils.mkdir_p("tmp/custdata") unless Dir.exists?("tmp/custdata")
|
5
|
+
|
6
|
+
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
|
7
|
+
VAGRANTFILE_API_VERSION = "2"
|
8
|
+
|
9
|
+
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
|
10
|
+
config.vm.box = "si-build.v2"
|
11
|
+
config.vm.box_url = "http://vboxes.ett.local/si-build.v2.box"
|
12
|
+
config.vbguest.auto_update = false
|
13
|
+
|
14
|
+
config.vm.hostname = 'cranium-build'
|
15
|
+
config.vm.network :private_network, ip: "192.168.56.43"
|
16
|
+
|
17
|
+
config.vm.provider :virtualbox do |virtual_machine|
|
18
|
+
virtual_machine.name = "cranium"
|
19
|
+
end
|
20
|
+
|
21
|
+
config.vm.synced_folder "tmp/custdata", "/home/gpadmin/gpfdist-data", owner: "gpadmin", group: "gpadmin"
|
22
|
+
|
23
|
+
config.vm.provision :shell, inline: "su - gpadmin -c 'cat /vagrant/db/setup.sql | psql'"
|
24
|
+
end
|
data/bin/cranium
ADDED
data/config/cucumber.yml
ADDED
data/cranium.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = 'cranium'
|
3
|
+
spec.version = '0.2.0'
|
4
|
+
spec.authors = ['Emarsys Technologies']
|
5
|
+
spec.email = ['smart-insight-dev@emarsys.com']
|
6
|
+
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
7
|
+
spec.summary = %q{Pure Ruby ETL framework}
|
8
|
+
spec.homepage = 'https://github.com/emartech/cranium'
|
9
|
+
spec.license = 'MIT'
|
10
|
+
|
11
|
+
spec.files = `git ls-files`.split($/)
|
12
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
13
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
|
+
spec.require_paths = ['lib']
|
15
|
+
|
16
|
+
spec.add_runtime_dependency 'pg', '~> 0'
|
17
|
+
spec.add_runtime_dependency 'progressbar', '~> 0'
|
18
|
+
spec.add_runtime_dependency 'sequel', '~> 4'
|
19
|
+
spec.add_runtime_dependency 'slop', '~> 3'
|
20
|
+
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
22
|
+
spec.add_development_dependency 'rake', '~> 10'
|
23
|
+
spec.add_development_dependency 'rspec', '~> 3'
|
24
|
+
spec.add_development_dependency 'ruby-prof', '~> 0'
|
25
|
+
spec.add_development_dependency 'cucumber', '~> 1'
|
26
|
+
end
|
data/db/setup.sql
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
|
2
|
+
|
3
|
+
CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
|
4
|
+
COMMENT ON ROLE cranium IS 'Cranium test user';
|
5
|
+
|
6
|
+
CREATE DATABASE cranium WITH OWNER=cranium;
|
7
|
+
|
8
|
+
CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';
|
data/docker-compose.yml
ADDED
data/examples/config.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
Cranium.configure do |config|
|
4
|
+
config.greenplum_connection_string = "postgres://cranium:cranium@192.168.56.43:5432/cranium"
|
5
|
+
config.gpfdist_url = "192.168.56.43:8123"
|
6
|
+
config.gpfdist_home_directory = "tmp/custdata"
|
7
|
+
config.upload_directory = "cranium_build"
|
8
|
+
config.archive_directory = "cranium_archive"
|
9
|
+
config.loggers << Logger.new("log/application.log")
|
10
|
+
end
|
11
|
+
|
12
|
+
database :suite do
|
13
|
+
connect_to "postgres://cranium:cranium@192.168.56.43:5432/cranium"
|
14
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative 'config'
|
2
|
+
|
3
|
+
source :sales_items do
|
4
|
+
file "sales_items*.csv"
|
5
|
+
field :order_id, String
|
6
|
+
field :date, Date
|
7
|
+
field :customer, Integer
|
8
|
+
field :item, String
|
9
|
+
field :item_name, String
|
10
|
+
field :quantity, Float
|
11
|
+
field :c_sales_amount, Float
|
12
|
+
end
|
13
|
+
|
14
|
+
source :products do
|
15
|
+
field :item_id
|
16
|
+
field :item_name
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
deduplicate :sales_items, into: :products, by: [:item]
|
21
|
+
|
22
|
+
# Equivalent to
|
23
|
+
|
24
|
+
transform :sales_items => :products do |record|
|
25
|
+
deduplicate_by :item
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative 'config'
|
2
|
+
|
3
|
+
source :purchases do
|
4
|
+
field :user_id, String
|
5
|
+
field :amount, String
|
6
|
+
end
|
7
|
+
|
8
|
+
source :transformed_purchases do
|
9
|
+
field :contact_key, Integer
|
10
|
+
field :amount, String
|
11
|
+
end
|
12
|
+
|
13
|
+
transform :purchases => :transformed_purchases do |record|
|
14
|
+
record[:contact_key] = lookup :contact_key,
|
15
|
+
from_table: :dim_contact,
|
16
|
+
match_column: :user_id,
|
17
|
+
to_value: record[:user_id],
|
18
|
+
if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
|
19
|
+
name: "Unknown contact #{record[:user_id]}" }
|
20
|
+
end
|
21
|
+
|
22
|
+
import :transformed_purchases do
|
23
|
+
into :fct_purchases
|
24
|
+
put :contact_key
|
25
|
+
put :amount
|
26
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative 'config'
|
2
|
+
|
3
|
+
extract :contacts do
|
4
|
+
from :suite
|
5
|
+
incrementally_by :created
|
6
|
+
query <<-sql
|
7
|
+
SELECT *
|
8
|
+
FROM contacts
|
9
|
+
WHERE created BETWEEN '#{last_extracted_value_of :created, "1970-01-01 00:00:00"}' AND '#{Time.now - 60*10}'
|
10
|
+
sql
|
11
|
+
end
|
12
|
+
|
13
|
+
extract :contacts do
|
14
|
+
from :suite
|
15
|
+
incrementally_by :id
|
16
|
+
query "SELECT * FROM akarmi WHERE id > #{last_extracted_value_of :id, 0}"
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'config'
|
2
|
+
|
3
|
+
source :purchases do
|
4
|
+
field :user_id, String
|
5
|
+
field :amount, String
|
6
|
+
end
|
7
|
+
|
8
|
+
source :transformed_purchases do
|
9
|
+
field :contact_key, Integer
|
10
|
+
field :amount, String
|
11
|
+
end
|
12
|
+
|
13
|
+
transform :purchases => :transformed_purchases do |record|
|
14
|
+
record[:contact_key] = lookup :contact_key,
|
15
|
+
from_table: :dim_contact,
|
16
|
+
match: {:user_id => record[:user_id], :another_field => record[:another_value]},
|
17
|
+
if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
|
18
|
+
name: "Unknown contact #{record[:user_id]}" }
|
19
|
+
end
|
20
|
+
|
21
|
+
import :transformed_purchases do
|
22
|
+
into :fct_purchases
|
23
|
+
put :contact_key
|
24
|
+
put :amount
|
25
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
Feature: Archive source files
|
2
|
+
|
3
|
+
Scenario:
|
4
|
+
Given no "/tmp/cranium_archive" directory
|
5
|
+
And a "products_1.csv" data file containing:
|
6
|
+
"""
|
7
|
+
"""
|
8
|
+
And a "products_2.csv" data file containing:
|
9
|
+
"""
|
10
|
+
"""
|
11
|
+
And a "contacts.csv" data file containing:
|
12
|
+
"""
|
13
|
+
"""
|
14
|
+
And a "purchases.csv" data file containing:
|
15
|
+
"""
|
16
|
+
"""
|
17
|
+
And the following definition:
|
18
|
+
"""
|
19
|
+
Cranium.configure do |config|
|
20
|
+
config.archive_directory = "/tmp/cranium_archive"
|
21
|
+
end
|
22
|
+
|
23
|
+
source :products do
|
24
|
+
file "products_*.csv"
|
25
|
+
end
|
26
|
+
|
27
|
+
source :products_transformed do end
|
28
|
+
|
29
|
+
source :contacts do
|
30
|
+
file "contacts.csv"
|
31
|
+
end
|
32
|
+
|
33
|
+
source :purchases do
|
34
|
+
file "purchases.csv"
|
35
|
+
end
|
36
|
+
|
37
|
+
transform :products => :products_transformed do |record|
|
38
|
+
output record
|
39
|
+
end
|
40
|
+
|
41
|
+
archive :products, :contacts
|
42
|
+
"""
|
43
|
+
When I execute the definition
|
44
|
+
Then the process should exit successfully
|
45
|
+
And the "/tmp/cranium_archive/" directory should contain the following files:
|
46
|
+
| filename |
|
47
|
+
| .*contacts.csv |
|
48
|
+
| .*products_1.csv |
|
49
|
+
| .*products_2.csv |
|
@@ -0,0 +1,56 @@
|
|
1
|
+
Feature: Extracting data incrementally from a database table to CSV
|
2
|
+
|
3
|
+
Incremental extracts work by indicating that a field (or fields) should be used to detect new data rows
|
4
|
+
in the table. The highest extracted values are saved from one process and passed on to the next when the
|
5
|
+
process is run again. This approach typically works best with id or timestamp fields.
|
6
|
+
|
7
|
+
|
8
|
+
Background:
|
9
|
+
Given the following definition:
|
10
|
+
"""
|
11
|
+
database :suite do
|
12
|
+
connect_to Cranium.configuration.greenplum_connection_string
|
13
|
+
end
|
14
|
+
|
15
|
+
extract :contacts do
|
16
|
+
from :suite
|
17
|
+
incrementally_by :id
|
18
|
+
query "SELECT id, name FROM contacts WHERE id > #{last_extracted_value_of :id, 0} ORDER BY id DESC"
|
19
|
+
end
|
20
|
+
"""
|
21
|
+
And a database table called "contacts" with the following fields:
|
22
|
+
| field_name | field_type |
|
23
|
+
| id | INTEGER |
|
24
|
+
| name | TEXT |
|
25
|
+
And only the following rows in the "contacts" database table:
|
26
|
+
| id | name |
|
27
|
+
| 1 | John Doe |
|
28
|
+
| 2 | Jane Doe |
|
29
|
+
And the definition is executed
|
30
|
+
And the "contacts.csv" file is deleted
|
31
|
+
|
32
|
+
|
33
|
+
Scenario: Successful extract
|
34
|
+
Given the following new rows in the "contacts" database table:
|
35
|
+
| id | name |
|
36
|
+
| 3 | John Smith |
|
37
|
+
| 4 | Jane Smith |
|
38
|
+
When I execute the definition again
|
39
|
+
Then the process should exit successfully
|
40
|
+
And there should be a "contacts.csv" data file in the upload directory containing:
|
41
|
+
"""
|
42
|
+
id,name
|
43
|
+
4,Jane Smith
|
44
|
+
3,John Smith
|
45
|
+
"""
|
46
|
+
|
47
|
+
|
48
|
+
Scenario: Incremental extract doesn't remember empty 'last extracted value' - bugfix
|
49
|
+
Given the definition is executed again
|
50
|
+
And the "contacts.csv" file is deleted
|
51
|
+
When I execute the definition again
|
52
|
+
Then the process should exit successfully
|
53
|
+
And there should be a "contacts.csv" data file in the upload directory containing:
|
54
|
+
"""
|
55
|
+
id,name
|
56
|
+
"""
|
@@ -0,0 +1,85 @@
|
|
1
|
+
Feature: Extracting data from a database table to CSV
|
2
|
+
|
3
|
+
Data can be extracted from a database table into a CSV file. The CSV file is named after the extract process
|
4
|
+
and is placed in the upload directory specified in the configuration.
|
5
|
+
|
6
|
+
|
7
|
+
Background:
|
8
|
+
Given a database table called "contacts" with the following fields:
|
9
|
+
| field_name | field_type |
|
10
|
+
| id | INTEGER |
|
11
|
+
| name | TEXT |
|
12
|
+
|
13
|
+
|
14
|
+
Scenario: Successful extract using raw SQL
|
15
|
+
Given only the following rows in the "contacts" database table:
|
16
|
+
| id | name |
|
17
|
+
| 1 | John Doe |
|
18
|
+
| 2 | Jane Doe |
|
19
|
+
| 3 | John Smith |
|
20
|
+
And the following definition:
|
21
|
+
"""
|
22
|
+
database :suite do
|
23
|
+
connect_to Cranium.configuration.greenplum_connection_string
|
24
|
+
end
|
25
|
+
|
26
|
+
extract :contacts do
|
27
|
+
from :suite
|
28
|
+
query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
|
29
|
+
end
|
30
|
+
"""
|
31
|
+
When I execute the definition
|
32
|
+
Then the process should exit successfully
|
33
|
+
And there should be a "contacts.csv" data file in the upload directory containing:
|
34
|
+
"""
|
35
|
+
id,name
|
36
|
+
1,John Doe
|
37
|
+
2,Jane Doe
|
38
|
+
"""
|
39
|
+
|
40
|
+
Scenario: Successful extract with overrided columns
|
41
|
+
Given only the following rows in the "contacts" database table:
|
42
|
+
| id | name |
|
43
|
+
| 1 | John Doe |
|
44
|
+
| 2 | Jane Doe |
|
45
|
+
| 3 | John Smith |
|
46
|
+
And the following definition:
|
47
|
+
"""
|
48
|
+
database :suite do
|
49
|
+
connect_to Cranium.configuration.greenplum_connection_string
|
50
|
+
end
|
51
|
+
|
52
|
+
extract :contacts do
|
53
|
+
from :suite
|
54
|
+
columns %w(uid full_name)
|
55
|
+
query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
|
56
|
+
end
|
57
|
+
"""
|
58
|
+
When I execute the definition
|
59
|
+
Then the process should exit successfully
|
60
|
+
And there should be a "contacts.csv" data file in the upload directory containing:
|
61
|
+
"""
|
62
|
+
uid,full_name
|
63
|
+
1,John Doe
|
64
|
+
2,Jane Doe
|
65
|
+
"""
|
66
|
+
|
67
|
+
Scenario: Extract should fail if file already exists
|
68
|
+
Given an empty "contacts.csv" data file
|
69
|
+
And the following definition:
|
70
|
+
"""
|
71
|
+
database :suite do
|
72
|
+
connect_to Cranium.configuration.greenplum_connection_string
|
73
|
+
end
|
74
|
+
|
75
|
+
extract :contacts do
|
76
|
+
from :suite
|
77
|
+
query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
|
78
|
+
end
|
79
|
+
"""
|
80
|
+
When I execute the definition
|
81
|
+
Then the process should exit with an error
|
82
|
+
And the error message should contain:
|
83
|
+
"""
|
84
|
+
Extract halted: a file named "contacts.csv" already exists
|
85
|
+
"""
|
@@ -0,0 +1,38 @@
|
|
1
|
+
Feature: Import a CSV file into the database as a delta
|
2
|
+
|
3
|
+
Scenario: Successful import
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| item | TEXT |
|
7
|
+
| title | TEXT |
|
8
|
+
| category | TEXT |
|
9
|
+
| description | TEXT |
|
10
|
+
And a "products.csv" data file containing:
|
11
|
+
"""
|
12
|
+
id,name,category,description
|
13
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory,Some description
|
14
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts,Another description
|
15
|
+
"""
|
16
|
+
And the following definition:
|
17
|
+
"""
|
18
|
+
source :products do
|
19
|
+
field :id, String
|
20
|
+
field :name, String
|
21
|
+
field :category, String
|
22
|
+
field :description, String
|
23
|
+
end
|
24
|
+
|
25
|
+
import :products do
|
26
|
+
into :dim_product
|
27
|
+
put :id => :item
|
28
|
+
put :name => :title
|
29
|
+
put :category => :category
|
30
|
+
put :description => :description
|
31
|
+
end
|
32
|
+
"""
|
33
|
+
When I execute the definition
|
34
|
+
Then the process should exit successfully
|
35
|
+
And the "dim_product" table should contain:
|
36
|
+
| item | title | category | description |
|
37
|
+
| JNI-123 | Just a product name | Main category > Subcategory > Sub-subcategory | Some description |
|
38
|
+
| CDI-234 | Another product name | Smart Insight > Cool stuff > Scripts | Another description |
|
@@ -0,0 +1,51 @@
|
|
1
|
+
Feature: Import a CSV file into the database with merging
|
2
|
+
|
3
|
+
The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
|
4
|
+
Duplicates are updated and new items are added.
|
5
|
+
|
6
|
+
Scenario: Successful import with merged items
|
7
|
+
Given a database table called "lkp_categories" with the following fields:
|
8
|
+
| field_name | field_type |
|
9
|
+
| contact_id | INTEGER |
|
10
|
+
| category_id | TEXT |
|
11
|
+
And only the following rows in the "lkp_categories" database table:
|
12
|
+
| contact_id (i) | category_id (s) |
|
13
|
+
| 1 | A |
|
14
|
+
| 1 | B |
|
15
|
+
| 1 | C |
|
16
|
+
| 2 | A |
|
17
|
+
| 2 | D |
|
18
|
+
And a "category_lookup.csv" data file containing:
|
19
|
+
"""
|
20
|
+
user_id,category_id
|
21
|
+
1,A
|
22
|
+
1,E
|
23
|
+
3,E
|
24
|
+
3,F
|
25
|
+
"""
|
26
|
+
And the following definition:
|
27
|
+
"""
|
28
|
+
source :category_lookup do
|
29
|
+
field :user_id, Integer
|
30
|
+
field :category_id, String
|
31
|
+
end
|
32
|
+
|
33
|
+
import :category_lookup do
|
34
|
+
into :lkp_categories
|
35
|
+
|
36
|
+
put :user_id => :contact_id
|
37
|
+
put :category_id => :category_id
|
38
|
+
|
39
|
+
delete_insert_on :contact_id
|
40
|
+
end
|
41
|
+
"""
|
42
|
+
When I execute the definition
|
43
|
+
Then the process should exit successfully
|
44
|
+
And the "lkp_categories" table should contain:
|
45
|
+
| contact_id (i) | category_id (s) |
|
46
|
+
| 1 | A |
|
47
|
+
| 1 | E |
|
48
|
+
| 2 | A |
|
49
|
+
| 2 | D |
|
50
|
+
| 3 | E |
|
51
|
+
| 3 | F |
|
@@ -0,0 +1,49 @@
|
|
1
|
+
Feature: Import a CSV file into the database with truncation
|
2
|
+
|
3
|
+
The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
|
4
|
+
Duplicates are updated and new items are added.
|
5
|
+
|
6
|
+
Scenario: Successful import with merged items
|
7
|
+
Given a database table called "lkp_categories" with the following fields:
|
8
|
+
| field_name | field_type |
|
9
|
+
| contact_id | INTEGER |
|
10
|
+
| category_id | TEXT |
|
11
|
+
And only the following rows in the "lkp_categories" database table:
|
12
|
+
| contact_id (i) | category_id (s) |
|
13
|
+
| 1 | A |
|
14
|
+
| 1 | B |
|
15
|
+
| 1 | C |
|
16
|
+
| 2 | A |
|
17
|
+
| 2 | D |
|
18
|
+
And a "category_lookup.csv" data file containing:
|
19
|
+
"""
|
20
|
+
user_id,category_id
|
21
|
+
1,A
|
22
|
+
1,E
|
23
|
+
3,E
|
24
|
+
3,F
|
25
|
+
"""
|
26
|
+
And the following definition:
|
27
|
+
"""
|
28
|
+
source :category_lookup do
|
29
|
+
field :user_id, Integer
|
30
|
+
field :category_id, String
|
31
|
+
end
|
32
|
+
|
33
|
+
import :category_lookup do
|
34
|
+
into :lkp_categories
|
35
|
+
|
36
|
+
put :user_id => :contact_id
|
37
|
+
put :category_id => :category_id
|
38
|
+
|
39
|
+
truncate_insert true
|
40
|
+
end
|
41
|
+
"""
|
42
|
+
When I execute the definition
|
43
|
+
Then the process should exit successfully
|
44
|
+
And the "lkp_categories" table should contain:
|
45
|
+
| contact_id (i) | category_id (s) |
|
46
|
+
| 1 | A |
|
47
|
+
| 1 | E |
|
48
|
+
| 3 | E |
|
49
|
+
| 3 | F |
|