cranium 0.5 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env +3 -1
- data/.gitignore +2 -0
- data/Dockerfile +22 -0
- data/LICENSE.txt +1 -1
- data/bin/run_tests.sh +4 -0
- data/codeship-services.yml +24 -0
- data/codeship-steps.yml +3 -0
- data/cranium.gemspec +4 -4
- data/docker/create_databases.sql +8 -0
- data/docker/initialize_greenplum.sh +9 -0
- data/docker-compose.yml +10 -9
- data/features/import/delete_from_table_based_on_csv.feature +39 -0
- data/features/restore_database_connection.feature +1 -1
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/features/support/env.rb +2 -2
- data/lib/cranium/data_importer.rb +4 -2
- data/lib/cranium/database.rb +16 -10
- data/lib/cranium/dsl/database_definition.rb +4 -0
- data/lib/cranium/dsl/import_definition.rb +2 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/lib/cranium/import_strategy/delete.rb +34 -0
- data/lib/cranium/import_strategy.rb +1 -0
- data/repo-info.json +7 -0
- data/spec/cranium/data_importer_spec.rb +26 -5
- data/spec/cranium/database_spec.rb +34 -0
- data/spec/cranium/dsl/database_definition_spec.rb +34 -0
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +22 -13
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4715c818c4f8f98f7d1a489979c3cf5ac1a2b50114052c6d619526fc8d0a4ca0
|
4
|
+
data.tar.gz: 6c619a8f8570cae9ef6c51002db8cf2ea6785983b70e0ace6e28b14f37e72330
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e12f463f677b70d7765120658c0efcb00da40c92ef9dd321eb4e6804106cf22f1da85505dfcfd1c5baf8fb46f67f5aa3784cccd4bc7214c6ab767f4d46cd5fa9
|
7
|
+
data.tar.gz: '080472dd1a1c70907496fdf0978231b82f6b6c8da0baf6c6b0e8488398387ef75a2b2ce95731818dd2c712eff316823ac168fc4063202ad887d662304f4a30a2'
|
data/.env
CHANGED
data/.gitignore
CHANGED
data/Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
FROM ruby:2.7.3-slim
|
2
|
+
|
3
|
+
RUN apt-get update \
|
4
|
+
&& apt-get install -y --no-install-recommends \
|
5
|
+
build-essential \
|
6
|
+
libpq-dev \
|
7
|
+
postgresql-client \
|
8
|
+
git-core
|
9
|
+
|
10
|
+
RUN mkdir /app
|
11
|
+
WORKDIR /app
|
12
|
+
|
13
|
+
RUN gem install bundler
|
14
|
+
|
15
|
+
COPY Gemfile .
|
16
|
+
COPY cranium.gemspec .
|
17
|
+
|
18
|
+
ARG https_proxy
|
19
|
+
ARG http_proxy
|
20
|
+
RUN bundle install -j 5
|
21
|
+
COPY . .
|
22
|
+
RUN mkdir /tmp/custdata/
|
data/LICENSE.txt
CHANGED
data/bin/run_tests.sh
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
cranium:
|
2
|
+
build:
|
3
|
+
image: cranium
|
4
|
+
dockerfile: Dockerfile
|
5
|
+
cached: true
|
6
|
+
|
7
|
+
greenplum:
|
8
|
+
image: kevinmtrowbridge/gpdb-docker
|
9
|
+
cached: true
|
10
|
+
volumes:
|
11
|
+
- gpfdist-init:/home/gpadmin/gpfdist-data
|
12
|
+
- ./docker/:/docker-entrypoint-initdb.d
|
13
|
+
- ./db:/db
|
14
|
+
command: sh /docker-entrypoint-initdb.d/initialize_greenplum.sh
|
15
|
+
|
16
|
+
build-app:
|
17
|
+
image: cranium
|
18
|
+
depends_on:
|
19
|
+
- greenplum
|
20
|
+
environment:
|
21
|
+
GREENPLUM_HOST: "greenplum"
|
22
|
+
GPFDIST_HOST: "127.0.0.1"
|
23
|
+
volumes:
|
24
|
+
- gpfdist-init:/app/tmp/custdata
|
data/codeship-steps.yml
ADDED
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.8.1'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
14
|
spec.require_paths = ['lib']
|
15
15
|
|
16
|
-
spec.add_runtime_dependency 'pg', '
|
16
|
+
spec.add_runtime_dependency 'pg', '>= 0'
|
17
17
|
spec.add_runtime_dependency 'progressbar', '~> 0'
|
18
18
|
spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
|
19
19
|
spec.add_runtime_dependency 'slop', '~> 3'
|
20
20
|
|
21
|
-
spec.add_development_dependency 'bundler'
|
22
|
-
spec.add_development_dependency 'rake', '~>
|
21
|
+
spec.add_development_dependency 'bundler'
|
22
|
+
spec.add_development_dependency 'rake', '~> 13'
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3'
|
24
24
|
spec.add_development_dependency 'ruby-prof', '~> 0'
|
25
25
|
spec.add_development_dependency 'cucumber', '~> 1'
|
@@ -0,0 +1,8 @@
|
|
1
|
+
CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
|
2
|
+
|
3
|
+
CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
|
4
|
+
COMMENT ON ROLE cranium IS 'Cranium test user';
|
5
|
+
|
6
|
+
CREATE DATABASE cranium WITH OWNER=cranium;
|
7
|
+
|
8
|
+
CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
echo "127.0.0.1 $(cat ~/orig_hostname)" >> /etc/hosts
|
4
|
+
|
5
|
+
service sshd start
|
6
|
+
|
7
|
+
su - gpadmin -c /usr/local/bin/run.sh
|
8
|
+
su - gpadmin -c 'cat /docker-entrypoint-initdb.d/create_databases.sql | psql -e'
|
9
|
+
su - gpadmin -c 'gpfdist -d /home/gpadmin/gpfdist-data -p 8123'
|
data/docker-compose.yml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
1
|
+
services:
|
2
|
+
greenplum:
|
3
|
+
image: kevinmtrowbridge/gpdb-docker
|
4
|
+
ports:
|
5
|
+
- 5432:5432
|
6
|
+
- 8123:8123
|
7
|
+
volumes:
|
8
|
+
- ./tmp/custdata:/home/gpadmin/gpfdist-data
|
9
|
+
- ./db:/db
|
10
|
+
command: sh /db/start.sh
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Feature: Delete rows from table provided by CSV file
|
2
|
+
|
3
|
+
Scenario: Successful delete
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| source_id | TEXT |
|
7
|
+
And only the following rows in the "dim_contact" database table:
|
8
|
+
| source_id (i) |
|
9
|
+
| 1 |
|
10
|
+
| 2 |
|
11
|
+
| 3 |
|
12
|
+
| 4 |
|
13
|
+
| 5 |
|
14
|
+
And a "deleted_contacts_extract.csv" data file containing:
|
15
|
+
"""
|
16
|
+
source_id
|
17
|
+
3
|
18
|
+
4
|
19
|
+
"""
|
20
|
+
And the following definition:
|
21
|
+
"""
|
22
|
+
source :deleted_contacts_extract do
|
23
|
+
field :source_id, String
|
24
|
+
end
|
25
|
+
|
26
|
+
import :deleted_contacts_extract do
|
27
|
+
into :dim_contact
|
28
|
+
put :source_id
|
29
|
+
|
30
|
+
delete_on :source_id
|
31
|
+
end
|
32
|
+
"""
|
33
|
+
When I execute the definition
|
34
|
+
Then the process should exit successfully
|
35
|
+
And the "dim_contact" table should contain:
|
36
|
+
| source_id |
|
37
|
+
| 1 |
|
38
|
+
| 2 |
|
39
|
+
| 5 |
|
@@ -16,7 +16,7 @@ Feature: Sequel database connections are fault tolerant
|
|
16
16
|
require 'sequel'
|
17
17
|
|
18
18
|
def terminate_connections
|
19
|
-
connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['
|
19
|
+
connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
|
20
20
|
connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
|
21
21
|
end
|
22
22
|
|
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
|
|
3
3
|
end
|
4
4
|
|
5
5
|
|
6
|
-
Given
|
6
|
+
Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
7
|
database_table(table_name).clear
|
8
8
|
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
9
|
end
|
10
10
|
|
11
11
|
|
12
|
-
Given
|
12
|
+
Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
13
|
database_table(table_name).insert rows.data
|
14
14
|
end
|
15
15
|
|
data/features/support/env.rb
CHANGED
@@ -5,8 +5,8 @@ require_relative "../../lib/cranium"
|
|
5
5
|
FileUtils.mkdir_p("log") unless Dir.exists?("log")
|
6
6
|
|
7
7
|
Cranium.configure do |config|
|
8
|
-
config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['
|
9
|
-
config.gpfdist_url = "#{ENV['
|
8
|
+
config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium"
|
9
|
+
config.gpfdist_url = "#{ ENV['GPFDIST_HOST'] || '192.168.56.43'}:8123"
|
10
10
|
config.gpfdist_home_directory = "tmp/custdata"
|
11
11
|
config.upload_directory = "cranium_build"
|
12
12
|
config.loggers << Logger.new("log/cucumber.log")
|
@@ -17,14 +17,16 @@ class Cranium::DataImporter
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def importer_for_definition(import_definition)
|
20
|
-
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
-
raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
20
|
+
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
+
raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
22
|
end
|
23
23
|
|
24
24
|
if !import_definition.merge_fields.empty?
|
25
25
|
Cranium::ImportStrategy::Merge.new(import_definition)
|
26
26
|
elsif !import_definition.delete_insert_on.empty?
|
27
27
|
Cranium::ImportStrategy::DeleteInsert.new(import_definition)
|
28
|
+
elsif !import_definition.delete_on.empty?
|
29
|
+
Cranium::ImportStrategy::Delete.new(import_definition)
|
28
30
|
elsif import_definition.truncate_insert
|
29
31
|
Cranium::ImportStrategy::TruncateInsert.new(import_definition)
|
30
32
|
else
|
data/lib/cranium/database.rb
CHANGED
@@ -13,7 +13,9 @@ module Cranium::Database
|
|
13
13
|
|
14
14
|
def self.[](name)
|
15
15
|
@connections ||= {}
|
16
|
-
@connections[name] ||= setup_connection(@definitions[name].connect_to
|
16
|
+
@connections[name] ||= setup_connection(@definitions[name].connect_to,
|
17
|
+
@definitions[name].retry_count,
|
18
|
+
@definitions[name].retry_delay)
|
17
19
|
end
|
18
20
|
|
19
21
|
|
@@ -28,15 +30,19 @@ module Cranium::Database
|
|
28
30
|
private
|
29
31
|
|
30
32
|
|
31
|
-
def self.setup_connection(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
|
34
|
+
(retry_count + 1).times do |try_count|
|
35
|
+
connection = if Cranium.configuration.log_queries
|
36
|
+
Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
|
37
|
+
else
|
38
|
+
Sequel.connect(connection_details)
|
39
|
+
end
|
40
|
+
connection.extension :connection_validator
|
41
|
+
connection.pool.connection_validation_timeout = -1
|
42
|
+
break connection
|
43
|
+
rescue Sequel::DatabaseConnectionError
|
44
|
+
(try_count == retry_count) ? raise : sleep(retry_delay)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
|
42
48
|
end
|
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
|
|
7
7
|
attr_reader :name
|
8
8
|
|
9
9
|
define_attribute :connect_to
|
10
|
+
define_attribute :retry_count
|
11
|
+
define_attribute :retry_delay
|
10
12
|
|
11
13
|
|
12
14
|
|
13
15
|
def initialize(name)
|
14
16
|
@name = name
|
17
|
+
@retry_count = 0
|
18
|
+
@retry_delay = 0
|
15
19
|
end
|
16
20
|
|
17
21
|
|
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
|
|
10
10
|
attr_reader :merge_fields
|
11
11
|
|
12
12
|
define_attribute :into
|
13
|
+
define_attribute :error_threshold
|
13
14
|
define_boolean_attribute :truncate_insert
|
14
15
|
define_array_attribute :delete_insert_on
|
16
|
+
define_array_attribute :delete_on
|
15
17
|
|
16
18
|
|
17
19
|
def initialize(name)
|
@@ -1,46 +1,45 @@
|
|
1
1
|
class Cranium::ExternalTable
|
2
2
|
|
3
|
-
def initialize(source, db_connection)
|
4
|
-
@source
|
3
|
+
def initialize(source, db_connection, error_threshold: nil)
|
4
|
+
@source = source
|
5
|
+
@connection = db_connection
|
6
|
+
@error_threshold = error_threshold
|
5
7
|
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
9
|
def create
|
10
|
-
@connection.run
|
11
|
-
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
-
#{field_definitions}
|
13
|
-
)
|
14
|
-
LOCATION (#{external_location})
|
15
|
-
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
-
ENCODING 'UTF8'
|
17
|
-
sql
|
10
|
+
@connection.run external_table_sql
|
18
11
|
end
|
19
12
|
|
20
|
-
|
21
|
-
|
22
13
|
def destroy
|
23
14
|
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
15
|
end
|
25
16
|
|
26
|
-
|
27
|
-
|
28
17
|
def name
|
29
18
|
:"external_#{@source.name}"
|
30
19
|
end
|
31
20
|
|
21
|
+
private
|
32
22
|
|
23
|
+
def external_table_sql
|
24
|
+
external_table_sql = <<~sql
|
25
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
26
|
+
#{field_definitions}
|
27
|
+
)
|
28
|
+
LOCATION (#{external_location})
|
29
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
30
|
+
ENCODING 'UTF8'
|
31
|
+
sql
|
33
32
|
|
34
|
-
|
33
|
+
external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
|
34
|
+
external_table_sql
|
35
|
+
end
|
35
36
|
|
36
37
|
def field_definitions
|
37
38
|
@source.fields.map do |name, type|
|
38
39
|
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
-
end.join ",\n
|
40
|
+
end.join ",\n "
|
40
41
|
end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
43
|
def sql_type_for_ruby_type(type)
|
45
44
|
case type.to_s
|
46
45
|
when "Integer" then
|
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
|
|
58
57
|
end
|
59
58
|
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
60
|
def quote(text)
|
64
61
|
text.gsub "'", "''"
|
65
62
|
end
|
66
63
|
|
67
|
-
|
68
|
-
|
69
64
|
def external_location
|
70
65
|
@source.files.map do |file_name|
|
71
66
|
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
|
|
11
11
|
|
12
12
|
|
13
13
|
def import
|
14
|
-
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
|
15
15
|
|
16
16
|
external_table.create
|
17
17
|
number_of_items_imported = import_from external_table.name
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
delete_existing_records
|
7
|
+
puts @source_table
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def delete_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def delete_by_fields
|
25
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def delete_field_mapping
|
31
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
|
|
2
2
|
|
3
3
|
autoload :Base, 'cranium/import_strategy/base'
|
4
4
|
autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
|
5
|
+
autoload :Delete, 'cranium/import_strategy/delete'
|
5
6
|
autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
|
6
7
|
autoload :Delta, 'cranium/import_strategy/delta'
|
7
8
|
autoload :Merge, 'cranium/import_strategy/merge'
|
data/repo-info.json
ADDED
@@ -1,11 +1,12 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
|
3
3
|
describe Cranium::DataImporter do
|
4
|
+
let(:connection) { double 'a_connection' }
|
4
5
|
|
5
6
|
before do
|
6
|
-
connection = double
|
7
7
|
allow(Cranium::Database).to receive(:connection).and_return connection
|
8
8
|
allow(connection).to receive(:transaction).and_yield
|
9
|
+
allow(Cranium.application).to receive(:apply_hook).with :after_import
|
9
10
|
end
|
10
11
|
|
11
12
|
let(:importer) { Cranium::DataImporter.new }
|
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
|
|
13
14
|
|
14
15
|
describe "#import" do
|
15
16
|
|
17
|
+
context "when called with delete_on strategy" do
|
18
|
+
it "calls Delete strategy" do
|
19
|
+
import_strategy = instance_double Cranium::ImportStrategy::Delete
|
20
|
+
allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
|
21
|
+
expect(import_strategy).to receive(:import).and_return 0
|
22
|
+
definition.delete_on :source_id
|
23
|
+
|
24
|
+
importer.import definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "when called with both merge and delete_on fields set" do
|
29
|
+
it "should raise an exception" do
|
30
|
+
definition.delete_on :source_id
|
31
|
+
definition.merge_on :another_field
|
32
|
+
|
33
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
16
37
|
context "when called with both merge and delete_insert fields set" do
|
17
38
|
it "should raise an exception" do
|
18
39
|
definition.delete_insert_on :some_field
|
19
40
|
definition.merge_on :another_field
|
20
41
|
|
21
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
42
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
43
|
end
|
23
44
|
end
|
24
45
|
|
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
|
|
27
48
|
definition.truncate_insert true
|
28
49
|
definition.merge_on :another_field
|
29
50
|
|
30
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
51
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
31
52
|
end
|
32
53
|
end
|
33
54
|
|
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
|
|
36
57
|
definition.delete_insert_on :some_field
|
37
58
|
definition.truncate_insert true
|
38
59
|
|
39
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
60
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
40
61
|
end
|
41
62
|
end
|
42
63
|
|
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
|
|
46
67
|
definition.merge_on :another_field
|
47
68
|
definition.truncate_insert true
|
48
69
|
|
49
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
70
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
@@ -81,6 +81,40 @@ describe Cranium::Database do
|
|
81
81
|
|
82
82
|
expect(database[:dwh]).not_to eq database[:dwh2]
|
83
83
|
end
|
84
|
+
|
85
|
+
context 'when retry_count is specified' do
|
86
|
+
before do
|
87
|
+
database.register_database :dwh do
|
88
|
+
connect_to "other connection string"
|
89
|
+
retry_count 3
|
90
|
+
retry_delay 15
|
91
|
+
end
|
92
|
+
allow(database).to receive(:sleep)
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should retry connecting to the DB the specified number of times" do
|
96
|
+
call_count = 0
|
97
|
+
allow(Sequel).to receive(:connect) do
|
98
|
+
call_count += 1
|
99
|
+
call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
|
100
|
+
end
|
101
|
+
|
102
|
+
expect(database[:dwh]).to eq connection
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not retry connecting to the DB more than the specified number of times" do
|
106
|
+
allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
|
107
|
+
|
108
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should wait retry_delay seconds between connection attempts" do
|
112
|
+
allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
|
113
|
+
expect(database).to receive(:sleep).with(15).exactly(3).times
|
114
|
+
|
115
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
116
|
+
end
|
117
|
+
end
|
84
118
|
end
|
85
119
|
|
86
120
|
end
|
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
|
24
|
+
describe "#retry_count" do
|
25
|
+
context 'when not set' do
|
26
|
+
it "should return 0 by default" do
|
27
|
+
expect(database.retry_count).to eq(0)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'when set' do
|
32
|
+
it "should return the number of retries specified for the database" do
|
33
|
+
database.retry_count 3
|
34
|
+
|
35
|
+
expect(database.retry_count).to eq(3)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
describe "#retry_delay" do
|
42
|
+
context 'when not set' do
|
43
|
+
it "should return 0 by default" do
|
44
|
+
expect(database.retry_delay).to eq(0)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when set' do
|
49
|
+
it "should return the number of retries specified for the database" do
|
50
|
+
database.retry_delay 15
|
51
|
+
|
52
|
+
expect(database.retry_delay).to eq(15)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
23
57
|
end
|
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
describe "#error_threshold" do
|
16
|
+
it "should set the error threshold to the given percentage" do
|
17
|
+
import.error_threshold 10
|
18
|
+
|
19
|
+
expect(import.error_threshold).to eq 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
|
16
24
|
describe "#name" do
|
17
25
|
it "should return the name of the import definition" do
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'ostruct'
|
3
|
+
require 'date'
|
3
4
|
|
4
5
|
describe Cranium::ExternalTable do
|
5
6
|
|
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
|
|
19
20
|
source.escape "'"
|
20
21
|
end
|
21
22
|
end
|
22
|
-
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
23
|
|
24
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection }
|
24
25
|
|
25
26
|
describe "#create" do
|
26
|
-
|
27
|
+
before do
|
27
28
|
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
gpfdist_url: "gpfdist-url",
|
30
|
+
gpfdist_home_directory: "/gpfdist-home",
|
31
|
+
upload_directory: "upload-dir"
|
31
32
|
)
|
32
33
|
|
33
34
|
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
35
|
+
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
it "should create an external table from the specified source" do
|
38
|
+
expect(connection).to receive(:run).with(<<~sql
|
39
|
+
CREATE EXTERNAL TABLE "external_products" (
|
40
|
+
"text_field" TEXT,
|
41
|
+
"integer_field" INTEGER,
|
42
|
+
"numeric_field" NUMERIC,
|
43
|
+
"date_field" DATE,
|
44
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
45
|
+
"boolean_field1" BOOLEAN,
|
46
|
+
"boolean_field2" BOOLEAN
|
47
|
+
)
|
48
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
49
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
50
|
+
ENCODING 'UTF8'
|
48
51
|
sql
|
49
52
|
)
|
50
53
|
|
51
54
|
external_table.create
|
52
55
|
end
|
56
|
+
|
57
|
+
context "with error_threshold argument" do
|
58
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
|
59
|
+
|
60
|
+
it "should create an external table from the specified source" do
|
61
|
+
expect(connection).to receive(:run).with(<<~sql
|
62
|
+
CREATE EXTERNAL TABLE "external_products" (
|
63
|
+
"text_field" TEXT,
|
64
|
+
"integer_field" INTEGER,
|
65
|
+
"numeric_field" NUMERIC,
|
66
|
+
"date_field" DATE,
|
67
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
68
|
+
"boolean_field1" BOOLEAN,
|
69
|
+
"boolean_field2" BOOLEAN
|
70
|
+
)
|
71
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
72
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
73
|
+
ENCODING 'UTF8'
|
74
|
+
SEGMENT REJECT LIMIT 10 PERCENT
|
75
|
+
sql
|
76
|
+
)
|
77
|
+
|
78
|
+
external_table.create
|
79
|
+
end
|
80
|
+
end
|
53
81
|
end
|
54
82
|
|
55
83
|
|
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
|
|
67
95
|
expect(external_table.name).to eq(:external_products)
|
68
96
|
end
|
69
97
|
end
|
70
|
-
|
71
98
|
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -76,30 +76,30 @@ dependencies:
|
|
76
76
|
name: bundler
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|
78
78
|
requirements:
|
79
|
-
- - "
|
79
|
+
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
|
-
version: '
|
81
|
+
version: '0'
|
82
82
|
type: :development
|
83
83
|
prerelease: false
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
|
-
- - "
|
86
|
+
- - ">="
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: '
|
88
|
+
version: '0'
|
89
89
|
- !ruby/object:Gem::Dependency
|
90
90
|
name: rake
|
91
91
|
requirement: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
93
|
- - "~>"
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: '
|
95
|
+
version: '13'
|
96
96
|
type: :development
|
97
97
|
prerelease: false
|
98
98
|
version_requirements: !ruby/object:Gem::Requirement
|
99
99
|
requirements:
|
100
100
|
- - "~>"
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: '
|
102
|
+
version: '13'
|
103
103
|
- !ruby/object:Gem::Dependency
|
104
104
|
name: rspec
|
105
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -162,24 +162,30 @@ email:
|
|
162
162
|
- smart-insight-dev@emarsys.com
|
163
163
|
executables:
|
164
164
|
- cranium
|
165
|
+
- run_tests.sh
|
165
166
|
extensions: []
|
166
167
|
extra_rdoc_files: []
|
167
168
|
files:
|
168
169
|
- ".env"
|
169
170
|
- ".gitignore"
|
170
171
|
- ".rspec"
|
171
|
-
-
|
172
|
+
- Dockerfile
|
172
173
|
- Gemfile
|
173
174
|
- LICENSE.txt
|
174
175
|
- README.md
|
175
176
|
- Rakefile
|
176
177
|
- Vagrantfile
|
177
178
|
- bin/cranium
|
179
|
+
- bin/run_tests.sh
|
180
|
+
- codeship-services.yml
|
181
|
+
- codeship-steps.yml
|
178
182
|
- config/cucumber.yml
|
179
183
|
- cranium.gemspec
|
180
184
|
- db/setup.sql
|
181
185
|
- db/start.sh
|
182
186
|
- docker-compose.yml
|
187
|
+
- docker/create_databases.sql
|
188
|
+
- docker/initialize_greenplum.sh
|
183
189
|
- examples/config.rb
|
184
190
|
- examples/deduplication.rb
|
185
191
|
- examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
|
@@ -188,6 +194,7 @@ files:
|
|
188
194
|
- features/archive.feature
|
189
195
|
- features/extract/incremental_extract.feature
|
190
196
|
- features/extract/simple_extract.feature
|
197
|
+
- features/import/delete_from_table_based_on_csv.feature
|
191
198
|
- features/import/import_csv_to_database_as_delta.feature
|
192
199
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
193
200
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
@@ -247,6 +254,7 @@ files:
|
|
247
254
|
- lib/cranium/file_utils.rb
|
248
255
|
- lib/cranium/import_strategy.rb
|
249
256
|
- lib/cranium/import_strategy/base.rb
|
257
|
+
- lib/cranium/import_strategy/delete.rb
|
250
258
|
- lib/cranium/import_strategy/delete_insert.rb
|
251
259
|
- lib/cranium/import_strategy/delta.rb
|
252
260
|
- lib/cranium/import_strategy/merge.rb
|
@@ -271,6 +279,7 @@ files:
|
|
271
279
|
- lib/cranium/transformation/sequence.rb
|
272
280
|
- lib/cranium/transformation_record.rb
|
273
281
|
- rake/test.rake
|
282
|
+
- repo-info.json
|
274
283
|
- spec/cranium/application_spec.rb
|
275
284
|
- spec/cranium/archiver_spec.rb
|
276
285
|
- spec/cranium/command_line_options_spec.rb
|
@@ -318,8 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
318
327
|
- !ruby/object:Gem::Version
|
319
328
|
version: '0'
|
320
329
|
requirements: []
|
321
|
-
|
322
|
-
rubygems_version: 2.7.6
|
330
|
+
rubygems_version: 3.2.27
|
323
331
|
signing_key:
|
324
332
|
specification_version: 4
|
325
333
|
summary: Pure Ruby ETL framework
|
@@ -327,6 +335,7 @@ test_files:
|
|
327
335
|
- features/archive.feature
|
328
336
|
- features/extract/incremental_extract.feature
|
329
337
|
- features/extract/simple_extract.feature
|
338
|
+
- features/import/delete_from_table_based_on_csv.feature
|
330
339
|
- features/import/import_csv_to_database_as_delta.feature
|
331
340
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
332
341
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.5.1
|