cranium 0.5 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env +3 -1
- data/.gitignore +2 -0
- data/Dockerfile +22 -0
- data/LICENSE.txt +1 -1
- data/bin/run_tests.sh +4 -0
- data/codeship-services.yml +24 -0
- data/codeship-steps.yml +3 -0
- data/cranium.gemspec +4 -4
- data/docker/create_databases.sql +8 -0
- data/docker/initialize_greenplum.sh +9 -0
- data/docker-compose.yml +10 -9
- data/features/import/delete_from_table_based_on_csv.feature +39 -0
- data/features/restore_database_connection.feature +1 -1
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/features/support/env.rb +2 -2
- data/lib/cranium/data_importer.rb +4 -2
- data/lib/cranium/database.rb +16 -10
- data/lib/cranium/dsl/database_definition.rb +4 -0
- data/lib/cranium/dsl/import_definition.rb +2 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/lib/cranium/import_strategy/delete.rb +34 -0
- data/lib/cranium/import_strategy.rb +1 -0
- data/repo-info.json +7 -0
- data/spec/cranium/data_importer_spec.rb +26 -5
- data/spec/cranium/database_spec.rb +34 -0
- data/spec/cranium/dsl/database_definition_spec.rb +34 -0
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +22 -13
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4715c818c4f8f98f7d1a489979c3cf5ac1a2b50114052c6d619526fc8d0a4ca0
|
4
|
+
data.tar.gz: 6c619a8f8570cae9ef6c51002db8cf2ea6785983b70e0ace6e28b14f37e72330
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e12f463f677b70d7765120658c0efcb00da40c92ef9dd321eb4e6804106cf22f1da85505dfcfd1c5baf8fb46f67f5aa3784cccd4bc7214c6ab767f4d46cd5fa9
|
7
|
+
data.tar.gz: '080472dd1a1c70907496fdf0978231b82f6b6c8da0baf6c6b0e8488398387ef75a2b2ce95731818dd2c712eff316823ac168fc4063202ad887d662304f4a30a2'
|
data/.env
CHANGED
data/.gitignore
CHANGED
data/Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
FROM ruby:2.7.3-slim
|
2
|
+
|
3
|
+
RUN apt-get update \
|
4
|
+
&& apt-get install -y --no-install-recommends \
|
5
|
+
build-essential \
|
6
|
+
libpq-dev \
|
7
|
+
postgresql-client \
|
8
|
+
git-core
|
9
|
+
|
10
|
+
RUN mkdir /app
|
11
|
+
WORKDIR /app
|
12
|
+
|
13
|
+
RUN gem install bundler
|
14
|
+
|
15
|
+
COPY Gemfile .
|
16
|
+
COPY cranium.gemspec .
|
17
|
+
|
18
|
+
ARG https_proxy
|
19
|
+
ARG http_proxy
|
20
|
+
RUN bundle install -j 5
|
21
|
+
COPY . .
|
22
|
+
RUN mkdir /tmp/custdata/
|
data/LICENSE.txt
CHANGED
data/bin/run_tests.sh
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
cranium:
|
2
|
+
build:
|
3
|
+
image: cranium
|
4
|
+
dockerfile: Dockerfile
|
5
|
+
cached: true
|
6
|
+
|
7
|
+
greenplum:
|
8
|
+
image: kevinmtrowbridge/gpdb-docker
|
9
|
+
cached: true
|
10
|
+
volumes:
|
11
|
+
- gpfdist-init:/home/gpadmin/gpfdist-data
|
12
|
+
- ./docker/:/docker-entrypoint-initdb.d
|
13
|
+
- ./db:/db
|
14
|
+
command: sh /docker-entrypoint-initdb.d/initialize_greenplum.sh
|
15
|
+
|
16
|
+
build-app:
|
17
|
+
image: cranium
|
18
|
+
depends_on:
|
19
|
+
- greenplum
|
20
|
+
environment:
|
21
|
+
GREENPLUM_HOST: "greenplum"
|
22
|
+
GPFDIST_HOST: "127.0.0.1"
|
23
|
+
volumes:
|
24
|
+
- gpfdist-init:/app/tmp/custdata
|
data/codeship-steps.yml
ADDED
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.8.1'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
14
14
|
spec.require_paths = ['lib']
|
15
15
|
|
16
|
-
spec.add_runtime_dependency 'pg', '
|
16
|
+
spec.add_runtime_dependency 'pg', '>= 0'
|
17
17
|
spec.add_runtime_dependency 'progressbar', '~> 0'
|
18
18
|
spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
|
19
19
|
spec.add_runtime_dependency 'slop', '~> 3'
|
20
20
|
|
21
|
-
spec.add_development_dependency 'bundler'
|
22
|
-
spec.add_development_dependency 'rake', '~>
|
21
|
+
spec.add_development_dependency 'bundler'
|
22
|
+
spec.add_development_dependency 'rake', '~> 13'
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3'
|
24
24
|
spec.add_development_dependency 'ruby-prof', '~> 0'
|
25
25
|
spec.add_development_dependency 'cucumber', '~> 1'
|
@@ -0,0 +1,8 @@
|
|
1
|
+
CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
|
2
|
+
|
3
|
+
CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
|
4
|
+
COMMENT ON ROLE cranium IS 'Cranium test user';
|
5
|
+
|
6
|
+
CREATE DATABASE cranium WITH OWNER=cranium;
|
7
|
+
|
8
|
+
CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
echo "127.0.0.1 $(cat ~/orig_hostname)" >> /etc/hosts
|
4
|
+
|
5
|
+
service sshd start
|
6
|
+
|
7
|
+
su - gpadmin -c /usr/local/bin/run.sh
|
8
|
+
su - gpadmin -c 'cat /docker-entrypoint-initdb.d/create_databases.sql | psql -e'
|
9
|
+
su - gpadmin -c 'gpfdist -d /home/gpadmin/gpfdist-data -p 8123'
|
data/docker-compose.yml
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
1
|
+
services:
|
2
|
+
greenplum:
|
3
|
+
image: kevinmtrowbridge/gpdb-docker
|
4
|
+
ports:
|
5
|
+
- 5432:5432
|
6
|
+
- 8123:8123
|
7
|
+
volumes:
|
8
|
+
- ./tmp/custdata:/home/gpadmin/gpfdist-data
|
9
|
+
- ./db:/db
|
10
|
+
command: sh /db/start.sh
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Feature: Delete rows from table provided by CSV file
|
2
|
+
|
3
|
+
Scenario: Successful delete
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| source_id | TEXT |
|
7
|
+
And only the following rows in the "dim_contact" database table:
|
8
|
+
| source_id (i) |
|
9
|
+
| 1 |
|
10
|
+
| 2 |
|
11
|
+
| 3 |
|
12
|
+
| 4 |
|
13
|
+
| 5 |
|
14
|
+
And a "deleted_contacts_extract.csv" data file containing:
|
15
|
+
"""
|
16
|
+
source_id
|
17
|
+
3
|
18
|
+
4
|
19
|
+
"""
|
20
|
+
And the following definition:
|
21
|
+
"""
|
22
|
+
source :deleted_contacts_extract do
|
23
|
+
field :source_id, String
|
24
|
+
end
|
25
|
+
|
26
|
+
import :deleted_contacts_extract do
|
27
|
+
into :dim_contact
|
28
|
+
put :source_id
|
29
|
+
|
30
|
+
delete_on :source_id
|
31
|
+
end
|
32
|
+
"""
|
33
|
+
When I execute the definition
|
34
|
+
Then the process should exit successfully
|
35
|
+
And the "dim_contact" table should contain:
|
36
|
+
| source_id |
|
37
|
+
| 1 |
|
38
|
+
| 2 |
|
39
|
+
| 5 |
|
@@ -16,7 +16,7 @@ Feature: Sequel database connections are fault tolerant
|
|
16
16
|
require 'sequel'
|
17
17
|
|
18
18
|
def terminate_connections
|
19
|
-
connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['
|
19
|
+
connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
|
20
20
|
connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
|
21
21
|
end
|
22
22
|
|
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
|
|
3
3
|
end
|
4
4
|
|
5
5
|
|
6
|
-
Given
|
6
|
+
Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
7
|
database_table(table_name).clear
|
8
8
|
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
9
|
end
|
10
10
|
|
11
11
|
|
12
|
-
Given
|
12
|
+
Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
13
|
database_table(table_name).insert rows.data
|
14
14
|
end
|
15
15
|
|
data/features/support/env.rb
CHANGED
@@ -5,8 +5,8 @@ require_relative "../../lib/cranium"
|
|
5
5
|
FileUtils.mkdir_p("log") unless Dir.exists?("log")
|
6
6
|
|
7
7
|
Cranium.configure do |config|
|
8
|
-
config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['
|
9
|
-
config.gpfdist_url = "#{ENV['
|
8
|
+
config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium"
|
9
|
+
config.gpfdist_url = "#{ ENV['GPFDIST_HOST'] || '192.168.56.43'}:8123"
|
10
10
|
config.gpfdist_home_directory = "tmp/custdata"
|
11
11
|
config.upload_directory = "cranium_build"
|
12
12
|
config.loggers << Logger.new("log/cucumber.log")
|
@@ -17,14 +17,16 @@ class Cranium::DataImporter
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def importer_for_definition(import_definition)
|
20
|
-
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
-
raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
20
|
+
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
+
raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
22
|
end
|
23
23
|
|
24
24
|
if !import_definition.merge_fields.empty?
|
25
25
|
Cranium::ImportStrategy::Merge.new(import_definition)
|
26
26
|
elsif !import_definition.delete_insert_on.empty?
|
27
27
|
Cranium::ImportStrategy::DeleteInsert.new(import_definition)
|
28
|
+
elsif !import_definition.delete_on.empty?
|
29
|
+
Cranium::ImportStrategy::Delete.new(import_definition)
|
28
30
|
elsif import_definition.truncate_insert
|
29
31
|
Cranium::ImportStrategy::TruncateInsert.new(import_definition)
|
30
32
|
else
|
data/lib/cranium/database.rb
CHANGED
@@ -13,7 +13,9 @@ module Cranium::Database
|
|
13
13
|
|
14
14
|
def self.[](name)
|
15
15
|
@connections ||= {}
|
16
|
-
@connections[name] ||= setup_connection(@definitions[name].connect_to
|
16
|
+
@connections[name] ||= setup_connection(@definitions[name].connect_to,
|
17
|
+
@definitions[name].retry_count,
|
18
|
+
@definitions[name].retry_delay)
|
17
19
|
end
|
18
20
|
|
19
21
|
|
@@ -28,15 +30,19 @@ module Cranium::Database
|
|
28
30
|
private
|
29
31
|
|
30
32
|
|
31
|
-
def self.setup_connection(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
|
34
|
+
(retry_count + 1).times do |try_count|
|
35
|
+
connection = if Cranium.configuration.log_queries
|
36
|
+
Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
|
37
|
+
else
|
38
|
+
Sequel.connect(connection_details)
|
39
|
+
end
|
40
|
+
connection.extension :connection_validator
|
41
|
+
connection.pool.connection_validation_timeout = -1
|
42
|
+
break connection
|
43
|
+
rescue Sequel::DatabaseConnectionError
|
44
|
+
(try_count == retry_count) ? raise : sleep(retry_delay)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
|
42
48
|
end
|
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
|
|
7
7
|
attr_reader :name
|
8
8
|
|
9
9
|
define_attribute :connect_to
|
10
|
+
define_attribute :retry_count
|
11
|
+
define_attribute :retry_delay
|
10
12
|
|
11
13
|
|
12
14
|
|
13
15
|
def initialize(name)
|
14
16
|
@name = name
|
17
|
+
@retry_count = 0
|
18
|
+
@retry_delay = 0
|
15
19
|
end
|
16
20
|
|
17
21
|
|
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
|
|
10
10
|
attr_reader :merge_fields
|
11
11
|
|
12
12
|
define_attribute :into
|
13
|
+
define_attribute :error_threshold
|
13
14
|
define_boolean_attribute :truncate_insert
|
14
15
|
define_array_attribute :delete_insert_on
|
16
|
+
define_array_attribute :delete_on
|
15
17
|
|
16
18
|
|
17
19
|
def initialize(name)
|
@@ -1,46 +1,45 @@
|
|
1
1
|
class Cranium::ExternalTable
|
2
2
|
|
3
|
-
def initialize(source, db_connection)
|
4
|
-
@source
|
3
|
+
def initialize(source, db_connection, error_threshold: nil)
|
4
|
+
@source = source
|
5
|
+
@connection = db_connection
|
6
|
+
@error_threshold = error_threshold
|
5
7
|
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
9
|
def create
|
10
|
-
@connection.run
|
11
|
-
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
-
#{field_definitions}
|
13
|
-
)
|
14
|
-
LOCATION (#{external_location})
|
15
|
-
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
-
ENCODING 'UTF8'
|
17
|
-
sql
|
10
|
+
@connection.run external_table_sql
|
18
11
|
end
|
19
12
|
|
20
|
-
|
21
|
-
|
22
13
|
def destroy
|
23
14
|
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
15
|
end
|
25
16
|
|
26
|
-
|
27
|
-
|
28
17
|
def name
|
29
18
|
:"external_#{@source.name}"
|
30
19
|
end
|
31
20
|
|
21
|
+
private
|
32
22
|
|
23
|
+
def external_table_sql
|
24
|
+
external_table_sql = <<~sql
|
25
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
26
|
+
#{field_definitions}
|
27
|
+
)
|
28
|
+
LOCATION (#{external_location})
|
29
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
30
|
+
ENCODING 'UTF8'
|
31
|
+
sql
|
33
32
|
|
34
|
-
|
33
|
+
external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
|
34
|
+
external_table_sql
|
35
|
+
end
|
35
36
|
|
36
37
|
def field_definitions
|
37
38
|
@source.fields.map do |name, type|
|
38
39
|
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
-
end.join ",\n
|
40
|
+
end.join ",\n "
|
40
41
|
end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
43
|
def sql_type_for_ruby_type(type)
|
45
44
|
case type.to_s
|
46
45
|
when "Integer" then
|
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
|
|
58
57
|
end
|
59
58
|
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
60
|
def quote(text)
|
64
61
|
text.gsub "'", "''"
|
65
62
|
end
|
66
63
|
|
67
|
-
|
68
|
-
|
69
64
|
def external_location
|
70
65
|
@source.files.map do |file_name|
|
71
66
|
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
|
|
11
11
|
|
12
12
|
|
13
13
|
def import
|
14
|
-
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
|
15
15
|
|
16
16
|
external_table.create
|
17
17
|
number_of_items_imported = import_from external_table.name
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
delete_existing_records
|
7
|
+
puts @source_table
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def delete_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def delete_by_fields
|
25
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def delete_field_mapping
|
31
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
|
|
2
2
|
|
3
3
|
autoload :Base, 'cranium/import_strategy/base'
|
4
4
|
autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
|
5
|
+
autoload :Delete, 'cranium/import_strategy/delete'
|
5
6
|
autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
|
6
7
|
autoload :Delta, 'cranium/import_strategy/delta'
|
7
8
|
autoload :Merge, 'cranium/import_strategy/merge'
|
data/repo-info.json
ADDED
@@ -1,11 +1,12 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
|
3
3
|
describe Cranium::DataImporter do
|
4
|
+
let(:connection) { double 'a_connection' }
|
4
5
|
|
5
6
|
before do
|
6
|
-
connection = double
|
7
7
|
allow(Cranium::Database).to receive(:connection).and_return connection
|
8
8
|
allow(connection).to receive(:transaction).and_yield
|
9
|
+
allow(Cranium.application).to receive(:apply_hook).with :after_import
|
9
10
|
end
|
10
11
|
|
11
12
|
let(:importer) { Cranium::DataImporter.new }
|
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
|
|
13
14
|
|
14
15
|
describe "#import" do
|
15
16
|
|
17
|
+
context "when called with delete_on strategy" do
|
18
|
+
it "calls Delete strategy" do
|
19
|
+
import_strategy = instance_double Cranium::ImportStrategy::Delete
|
20
|
+
allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
|
21
|
+
expect(import_strategy).to receive(:import).and_return 0
|
22
|
+
definition.delete_on :source_id
|
23
|
+
|
24
|
+
importer.import definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "when called with both merge and delete_on fields set" do
|
29
|
+
it "should raise an exception" do
|
30
|
+
definition.delete_on :source_id
|
31
|
+
definition.merge_on :another_field
|
32
|
+
|
33
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
16
37
|
context "when called with both merge and delete_insert fields set" do
|
17
38
|
it "should raise an exception" do
|
18
39
|
definition.delete_insert_on :some_field
|
19
40
|
definition.merge_on :another_field
|
20
41
|
|
21
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
42
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
43
|
end
|
23
44
|
end
|
24
45
|
|
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
|
|
27
48
|
definition.truncate_insert true
|
28
49
|
definition.merge_on :another_field
|
29
50
|
|
30
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
51
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
31
52
|
end
|
32
53
|
end
|
33
54
|
|
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
|
|
36
57
|
definition.delete_insert_on :some_field
|
37
58
|
definition.truncate_insert true
|
38
59
|
|
39
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
60
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
40
61
|
end
|
41
62
|
end
|
42
63
|
|
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
|
|
46
67
|
definition.merge_on :another_field
|
47
68
|
definition.truncate_insert true
|
48
69
|
|
49
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
70
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
@@ -81,6 +81,40 @@ describe Cranium::Database do
|
|
81
81
|
|
82
82
|
expect(database[:dwh]).not_to eq database[:dwh2]
|
83
83
|
end
|
84
|
+
|
85
|
+
context 'when retry_count is specified' do
|
86
|
+
before do
|
87
|
+
database.register_database :dwh do
|
88
|
+
connect_to "other connection string"
|
89
|
+
retry_count 3
|
90
|
+
retry_delay 15
|
91
|
+
end
|
92
|
+
allow(database).to receive(:sleep)
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should retry connecting to the DB the specified number of times" do
|
96
|
+
call_count = 0
|
97
|
+
allow(Sequel).to receive(:connect) do
|
98
|
+
call_count += 1
|
99
|
+
call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
|
100
|
+
end
|
101
|
+
|
102
|
+
expect(database[:dwh]).to eq connection
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not retry connecting to the DB more than the specified number of times" do
|
106
|
+
allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
|
107
|
+
|
108
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should wait retry_delay seconds between connection attempts" do
|
112
|
+
allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
|
113
|
+
expect(database).to receive(:sleep).with(15).exactly(3).times
|
114
|
+
|
115
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
116
|
+
end
|
117
|
+
end
|
84
118
|
end
|
85
119
|
|
86
120
|
end
|
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
|
24
|
+
describe "#retry_count" do
|
25
|
+
context 'when not set' do
|
26
|
+
it "should return 0 by default" do
|
27
|
+
expect(database.retry_count).to eq(0)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'when set' do
|
32
|
+
it "should return the number of retries specified for the database" do
|
33
|
+
database.retry_count 3
|
34
|
+
|
35
|
+
expect(database.retry_count).to eq(3)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
describe "#retry_delay" do
|
42
|
+
context 'when not set' do
|
43
|
+
it "should return 0 by default" do
|
44
|
+
expect(database.retry_delay).to eq(0)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when set' do
|
49
|
+
it "should return the number of retries specified for the database" do
|
50
|
+
database.retry_delay 15
|
51
|
+
|
52
|
+
expect(database.retry_delay).to eq(15)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
23
57
|
end
|
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
describe "#error_threshold" do
|
16
|
+
it "should set the error threshold to the given percentage" do
|
17
|
+
import.error_threshold 10
|
18
|
+
|
19
|
+
expect(import.error_threshold).to eq 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
|
16
24
|
describe "#name" do
|
17
25
|
it "should return the name of the import definition" do
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'ostruct'
|
3
|
+
require 'date'
|
3
4
|
|
4
5
|
describe Cranium::ExternalTable do
|
5
6
|
|
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
|
|
19
20
|
source.escape "'"
|
20
21
|
end
|
21
22
|
end
|
22
|
-
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
23
|
|
24
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection }
|
24
25
|
|
25
26
|
describe "#create" do
|
26
|
-
|
27
|
+
before do
|
27
28
|
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
gpfdist_url: "gpfdist-url",
|
30
|
+
gpfdist_home_directory: "/gpfdist-home",
|
31
|
+
upload_directory: "upload-dir"
|
31
32
|
)
|
32
33
|
|
33
34
|
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
35
|
+
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
it "should create an external table from the specified source" do
|
38
|
+
expect(connection).to receive(:run).with(<<~sql
|
39
|
+
CREATE EXTERNAL TABLE "external_products" (
|
40
|
+
"text_field" TEXT,
|
41
|
+
"integer_field" INTEGER,
|
42
|
+
"numeric_field" NUMERIC,
|
43
|
+
"date_field" DATE,
|
44
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
45
|
+
"boolean_field1" BOOLEAN,
|
46
|
+
"boolean_field2" BOOLEAN
|
47
|
+
)
|
48
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
49
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
50
|
+
ENCODING 'UTF8'
|
48
51
|
sql
|
49
52
|
)
|
50
53
|
|
51
54
|
external_table.create
|
52
55
|
end
|
56
|
+
|
57
|
+
context "with error_threshold argument" do
|
58
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
|
59
|
+
|
60
|
+
it "should create an external table from the specified source" do
|
61
|
+
expect(connection).to receive(:run).with(<<~sql
|
62
|
+
CREATE EXTERNAL TABLE "external_products" (
|
63
|
+
"text_field" TEXT,
|
64
|
+
"integer_field" INTEGER,
|
65
|
+
"numeric_field" NUMERIC,
|
66
|
+
"date_field" DATE,
|
67
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
68
|
+
"boolean_field1" BOOLEAN,
|
69
|
+
"boolean_field2" BOOLEAN
|
70
|
+
)
|
71
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
72
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
73
|
+
ENCODING 'UTF8'
|
74
|
+
SEGMENT REJECT LIMIT 10 PERCENT
|
75
|
+
sql
|
76
|
+
)
|
77
|
+
|
78
|
+
external_table.create
|
79
|
+
end
|
80
|
+
end
|
53
81
|
end
|
54
82
|
|
55
83
|
|
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
|
|
67
95
|
expect(external_table.name).to eq(:external_products)
|
68
96
|
end
|
69
97
|
end
|
70
|
-
|
71
98
|
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -76,30 +76,30 @@ dependencies:
|
|
76
76
|
name: bundler
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|
78
78
|
requirements:
|
79
|
-
- - "
|
79
|
+
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
|
-
version: '
|
81
|
+
version: '0'
|
82
82
|
type: :development
|
83
83
|
prerelease: false
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
|
-
- - "
|
86
|
+
- - ">="
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: '
|
88
|
+
version: '0'
|
89
89
|
- !ruby/object:Gem::Dependency
|
90
90
|
name: rake
|
91
91
|
requirement: !ruby/object:Gem::Requirement
|
92
92
|
requirements:
|
93
93
|
- - "~>"
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: '
|
95
|
+
version: '13'
|
96
96
|
type: :development
|
97
97
|
prerelease: false
|
98
98
|
version_requirements: !ruby/object:Gem::Requirement
|
99
99
|
requirements:
|
100
100
|
- - "~>"
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: '
|
102
|
+
version: '13'
|
103
103
|
- !ruby/object:Gem::Dependency
|
104
104
|
name: rspec
|
105
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -162,24 +162,30 @@ email:
|
|
162
162
|
- smart-insight-dev@emarsys.com
|
163
163
|
executables:
|
164
164
|
- cranium
|
165
|
+
- run_tests.sh
|
165
166
|
extensions: []
|
166
167
|
extra_rdoc_files: []
|
167
168
|
files:
|
168
169
|
- ".env"
|
169
170
|
- ".gitignore"
|
170
171
|
- ".rspec"
|
171
|
-
-
|
172
|
+
- Dockerfile
|
172
173
|
- Gemfile
|
173
174
|
- LICENSE.txt
|
174
175
|
- README.md
|
175
176
|
- Rakefile
|
176
177
|
- Vagrantfile
|
177
178
|
- bin/cranium
|
179
|
+
- bin/run_tests.sh
|
180
|
+
- codeship-services.yml
|
181
|
+
- codeship-steps.yml
|
178
182
|
- config/cucumber.yml
|
179
183
|
- cranium.gemspec
|
180
184
|
- db/setup.sql
|
181
185
|
- db/start.sh
|
182
186
|
- docker-compose.yml
|
187
|
+
- docker/create_databases.sql
|
188
|
+
- docker/initialize_greenplum.sh
|
183
189
|
- examples/config.rb
|
184
190
|
- examples/deduplication.rb
|
185
191
|
- examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
|
@@ -188,6 +194,7 @@ files:
|
|
188
194
|
- features/archive.feature
|
189
195
|
- features/extract/incremental_extract.feature
|
190
196
|
- features/extract/simple_extract.feature
|
197
|
+
- features/import/delete_from_table_based_on_csv.feature
|
191
198
|
- features/import/import_csv_to_database_as_delta.feature
|
192
199
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
193
200
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
@@ -247,6 +254,7 @@ files:
|
|
247
254
|
- lib/cranium/file_utils.rb
|
248
255
|
- lib/cranium/import_strategy.rb
|
249
256
|
- lib/cranium/import_strategy/base.rb
|
257
|
+
- lib/cranium/import_strategy/delete.rb
|
250
258
|
- lib/cranium/import_strategy/delete_insert.rb
|
251
259
|
- lib/cranium/import_strategy/delta.rb
|
252
260
|
- lib/cranium/import_strategy/merge.rb
|
@@ -271,6 +279,7 @@ files:
|
|
271
279
|
- lib/cranium/transformation/sequence.rb
|
272
280
|
- lib/cranium/transformation_record.rb
|
273
281
|
- rake/test.rake
|
282
|
+
- repo-info.json
|
274
283
|
- spec/cranium/application_spec.rb
|
275
284
|
- spec/cranium/archiver_spec.rb
|
276
285
|
- spec/cranium/command_line_options_spec.rb
|
@@ -318,8 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
318
327
|
- !ruby/object:Gem::Version
|
319
328
|
version: '0'
|
320
329
|
requirements: []
|
321
|
-
|
322
|
-
rubygems_version: 2.7.6
|
330
|
+
rubygems_version: 3.2.27
|
323
331
|
signing_key:
|
324
332
|
specification_version: 4
|
325
333
|
summary: Pure Ruby ETL framework
|
@@ -327,6 +335,7 @@ test_files:
|
|
327
335
|
- features/archive.feature
|
328
336
|
- features/extract/incremental_extract.feature
|
329
337
|
- features/extract/simple_extract.feature
|
338
|
+
- features/import/delete_from_table_based_on_csv.feature
|
330
339
|
- features/import/import_csv_to_database_as_delta.feature
|
331
340
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
332
341
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.5.1
|