cranium 0.5 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63bb4574d9b494202c0aff700e4263c1b14753b0b973b9dcb52148ed24a7a861
4
- data.tar.gz: ee3238eee62a1c6ee3cd9d3c69ef7ab19a42133996b5fa6334d1a56a9f2f5ff0
3
+ metadata.gz: 4715c818c4f8f98f7d1a489979c3cf5ac1a2b50114052c6d619526fc8d0a4ca0
4
+ data.tar.gz: 6c619a8f8570cae9ef6c51002db8cf2ea6785983b70e0ace6e28b14f37e72330
5
5
  SHA512:
6
- metadata.gz: 2351552d7be2849c5698df99b977c3c56d54ef376d351125bdb51526b6b826d2f4afecc79292479d0d8308af7edd086fd47213479e693c18ecfe9bc31d40549a
7
- data.tar.gz: a091064e7f4fa3a56a741b4704012ab1e8a5ced7b6dcad0b6826d226dfd06c5520dddfc1a8e8eabbf04dcd7585d7f1bb97801b539f67a3fdb7fbdd063713be4d
6
+ metadata.gz: e12f463f677b70d7765120658c0efcb00da40c92ef9dd321eb4e6804106cf22f1da85505dfcfd1c5baf8fb46f67f5aa3784cccd4bc7214c6ab767f4d46cd5fa9
7
+ data.tar.gz: '080472dd1a1c70907496fdf0978231b82f6b6c8da0baf6c6b0e8488398387ef75a2b2ce95731818dd2c712eff316823ac168fc4063202ad887d662304f4a30a2'
data/.env CHANGED
@@ -1 +1,3 @@
1
- DATABASE_HOST=127.0.0.1
1
+ GREENPLUM_HOST=127.0.0.1
2
+ GPFDIST_HOST=127.0.0.1
3
+
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ *.aes
1
2
  *.gem
2
3
  *.rbc
3
4
  .bundle
@@ -5,6 +6,7 @@
5
6
  .yardoc
6
7
  .vagrant
7
8
  .idea
9
+ .ruby-version
8
10
  Gemfile.lock
9
11
  InstalledFiles
10
12
  _yardoc
data/Dockerfile ADDED
@@ -0,0 +1,22 @@
1
+ FROM ruby:2.7.3-slim
2
+
3
+ RUN apt-get update \
4
+ && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ libpq-dev \
7
+ postgresql-client \
8
+ git-core
9
+
10
+ RUN mkdir /app
11
+ WORKDIR /app
12
+
13
+ RUN gem install bundler
14
+
15
+ COPY Gemfile .
16
+ COPY cranium.gemspec .
17
+
18
+ ARG https_proxy
19
+ ARG http_proxy
20
+ RUN bundle install -j 5
21
+ COPY . .
22
+ RUN mkdir /tmp/custdata/
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Zoltan Ormandi
1
+ Copyright (c) 2013-2020 Emarsys
2
2
 
3
3
  MIT License
4
4
 
data/bin/run_tests.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bash
2
+
3
+ sleep 30
4
+ bundle exec rake
@@ -0,0 +1,24 @@
1
+ cranium:
2
+ build:
3
+ image: cranium
4
+ dockerfile: Dockerfile
5
+ cached: true
6
+
7
+ greenplum:
8
+ image: kevinmtrowbridge/gpdb-docker
9
+ cached: true
10
+ volumes:
11
+ - gpfdist-init:/home/gpadmin/gpfdist-data
12
+ - ./docker/:/docker-entrypoint-initdb.d
13
+ - ./db:/db
14
+ command: sh /docker-entrypoint-initdb.d/initialize_greenplum.sh
15
+
16
+ build-app:
17
+ image: cranium
18
+ depends_on:
19
+ - greenplum
20
+ environment:
21
+ GREENPLUM_HOST: "greenplum"
22
+ GPFDIST_HOST: "127.0.0.1"
23
+ volumes:
24
+ - gpfdist-init:/app/tmp/custdata
@@ -0,0 +1,3 @@
1
+ - name: Run tests
2
+ service: build-app
3
+ command: sh bin/run_tests.sh
data/cranium.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.5'
3
+ spec.version = '0.8.1'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
13
13
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
14
  spec.require_paths = ['lib']
15
15
 
16
- spec.add_runtime_dependency 'pg', '~> 0'
16
+ spec.add_runtime_dependency 'pg', '>= 0'
17
17
  spec.add_runtime_dependency 'progressbar', '~> 0'
18
18
  spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
19
19
  spec.add_runtime_dependency 'slop', '~> 3'
20
20
 
21
- spec.add_development_dependency 'bundler', '~> 1'
22
- spec.add_development_dependency 'rake', '~> 10'
21
+ spec.add_development_dependency 'bundler'
22
+ spec.add_development_dependency 'rake', '~> 13'
23
23
  spec.add_development_dependency 'rspec', '~> 3'
24
24
  spec.add_development_dependency 'ruby-prof', '~> 0'
25
25
  spec.add_development_dependency 'cucumber', '~> 1'
@@ -0,0 +1,8 @@
1
+ CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
2
+
3
+ CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
4
+ COMMENT ON ROLE cranium IS 'Cranium test user';
5
+
6
+ CREATE DATABASE cranium WITH OWNER=cranium;
7
+
8
+ CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env bash
2
+
3
+ echo "127.0.0.1 $(cat ~/orig_hostname)" >> /etc/hosts
4
+
5
+ service sshd start
6
+
7
+ su - gpadmin -c /usr/local/bin/run.sh
8
+ su - gpadmin -c 'cat /docker-entrypoint-initdb.d/create_databases.sql | psql -e'
9
+ su - gpadmin -c 'gpfdist -d /home/gpadmin/gpfdist-data -p 8123'
data/docker-compose.yml CHANGED
@@ -1,9 +1,10 @@
1
- greenplum:
2
- image: kevinmtrowbridge/gpdb-docker
3
- ports:
4
- - 5432:5432
5
- - 8123:8123
6
- volumes:
7
- - ./tmp/custdata:/home/gpadmin/gpfdist-data
8
- - ./db:/db
9
- command: sh /db/start.sh
1
+ services:
2
+ greenplum:
3
+ image: kevinmtrowbridge/gpdb-docker
4
+ ports:
5
+ - 5432:5432
6
+ - 8123:8123
7
+ volumes:
8
+ - ./tmp/custdata:/home/gpadmin/gpfdist-data
9
+ - ./db:/db
10
+ command: sh /db/start.sh
@@ -0,0 +1,39 @@
1
+ Feature: Delete rows from table provided by CSV file
2
+
3
+ Scenario: Successful delete
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | source_id | TEXT |
7
+ And only the following rows in the "dim_contact" database table:
8
+ | source_id (i) |
9
+ | 1 |
10
+ | 2 |
11
+ | 3 |
12
+ | 4 |
13
+ | 5 |
14
+ And a "deleted_contacts_extract.csv" data file containing:
15
+ """
16
+ source_id
17
+ 3
18
+ 4
19
+ """
20
+ And the following definition:
21
+ """
22
+ source :deleted_contacts_extract do
23
+ field :source_id, String
24
+ end
25
+
26
+ import :deleted_contacts_extract do
27
+ into :dim_contact
28
+ put :source_id
29
+
30
+ delete_on :source_id
31
+ end
32
+ """
33
+ When I execute the definition
34
+ Then the process should exit successfully
35
+ And the "dim_contact" table should contain:
36
+ | source_id |
37
+ | 1 |
38
+ | 2 |
39
+ | 5 |
@@ -16,7 +16,7 @@ Feature: Sequel database connections are fault tolerant
16
16
  require 'sequel'
17
17
 
18
18
  def terminate_connections
19
- connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['DATABASE_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
19
+ connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
20
20
  connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
21
21
  end
22
22
 
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
3
3
  end
4
4
 
5
5
 
6
- Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
6
+ Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
7
  database_table(table_name).clear
8
8
  step %Q(the following new rows in the "#{table_name}" database table:), rows
9
9
  end
10
10
 
11
11
 
12
- Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
12
+ Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
13
  database_table(table_name).insert rows.data
14
14
  end
15
15
 
@@ -5,8 +5,8 @@ require_relative "../../lib/cranium"
5
5
  FileUtils.mkdir_p("log") unless Dir.exists?("log")
6
6
 
7
7
  Cranium.configure do |config|
8
- config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['DATABASE_HOST'] || '192.168.56.43'}:5432/cranium"
9
- config.gpfdist_url = "#{ENV['DATABASE_HOST'] || '192.168.56.43'}:8123"
8
+ config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium"
9
+ config.gpfdist_url = "#{ ENV['GPFDIST_HOST'] || '192.168.56.43'}:8123"
10
10
  config.gpfdist_home_directory = "tmp/custdata"
11
11
  config.upload_directory = "cranium_build"
12
12
  config.loggers << Logger.new("log/cucumber.log")
@@ -17,14 +17,16 @@ class Cranium::DataImporter
17
17
  private
18
18
 
19
19
  def importer_for_definition(import_definition)
20
- if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
- raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
22
  end
23
23
 
24
24
  if !import_definition.merge_fields.empty?
25
25
  Cranium::ImportStrategy::Merge.new(import_definition)
26
26
  elsif !import_definition.delete_insert_on.empty?
27
27
  Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif !import_definition.delete_on.empty?
29
+ Cranium::ImportStrategy::Delete.new(import_definition)
28
30
  elsif import_definition.truncate_insert
29
31
  Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
32
  else
@@ -13,7 +13,9 @@ module Cranium::Database
13
13
 
14
14
  def self.[](name)
15
15
  @connections ||= {}
16
- @connections[name] ||= setup_connection(@definitions[name].connect_to)
16
+ @connections[name] ||= setup_connection(@definitions[name].connect_to,
17
+ @definitions[name].retry_count,
18
+ @definitions[name].retry_delay)
17
19
  end
18
20
 
19
21
 
@@ -28,15 +30,19 @@ module Cranium::Database
28
30
  private
29
31
 
30
32
 
31
- def self.setup_connection(connection_string)
32
- connection = if Cranium.configuration.log_queries
33
- Sequel.connect(connection_string, loggers: Cranium.configuration.loggers)
34
- else
35
- Sequel.connect(connection_string)
36
- end
37
- connection.extension :connection_validator
38
- connection.pool.connection_validation_timeout = -1
39
- return connection
33
+ def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
34
+ (retry_count + 1).times do |try_count|
35
+ connection = if Cranium.configuration.log_queries
36
+ Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
37
+ else
38
+ Sequel.connect(connection_details)
39
+ end
40
+ connection.extension :connection_validator
41
+ connection.pool.connection_validation_timeout = -1
42
+ break connection
43
+ rescue Sequel::DatabaseConnectionError
44
+ (try_count == retry_count) ? raise : sleep(retry_delay)
45
+ end
40
46
  end
41
47
 
42
48
  end
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
7
7
  attr_reader :name
8
8
 
9
9
  define_attribute :connect_to
10
+ define_attribute :retry_count
11
+ define_attribute :retry_delay
10
12
 
11
13
 
12
14
 
13
15
  def initialize(name)
14
16
  @name = name
17
+ @retry_count = 0
18
+ @retry_delay = 0
15
19
  end
16
20
 
17
21
 
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
10
10
  attr_reader :merge_fields
11
11
 
12
12
  define_attribute :into
13
+ define_attribute :error_threshold
13
14
  define_boolean_attribute :truncate_insert
14
15
  define_array_attribute :delete_insert_on
16
+ define_array_attribute :delete_on
15
17
 
16
18
 
17
19
  def initialize(name)
@@ -1,46 +1,45 @@
1
1
  class Cranium::ExternalTable
2
2
 
3
- def initialize(source, db_connection)
4
- @source, @connection = source, db_connection
3
+ def initialize(source, db_connection, error_threshold: nil)
4
+ @source = source
5
+ @connection = db_connection
6
+ @error_threshold = error_threshold
5
7
  end
6
8
 
7
-
8
-
9
9
  def create
10
- @connection.run <<-sql
11
- CREATE EXTERNAL TABLE "#{name}" (
12
- #{field_definitions}
13
- )
14
- LOCATION (#{external_location})
15
- FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
16
- ENCODING 'UTF8'
17
- sql
10
+ @connection.run external_table_sql
18
11
  end
19
12
 
20
-
21
-
22
13
  def destroy
23
14
  @connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
24
15
  end
25
16
 
26
-
27
-
28
17
  def name
29
18
  :"external_#{@source.name}"
30
19
  end
31
20
 
21
+ private
32
22
 
23
+ def external_table_sql
24
+ external_table_sql = <<~sql
25
+ CREATE EXTERNAL TABLE "#{name}" (
26
+ #{field_definitions}
27
+ )
28
+ LOCATION (#{external_location})
29
+ FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
30
+ ENCODING 'UTF8'
31
+ sql
33
32
 
34
- private
33
+ external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
34
+ external_table_sql
35
+ end
35
36
 
36
37
  def field_definitions
37
38
  @source.fields.map do |name, type|
38
39
  %Q("#{name}" #{sql_type_for_ruby_type(type)})
39
- end.join ",\n "
40
+ end.join ",\n "
40
41
  end
41
42
 
42
-
43
-
44
43
  def sql_type_for_ruby_type(type)
45
44
  case type.to_s
46
45
  when "Integer" then
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
58
57
  end
59
58
  end
60
59
 
61
-
62
-
63
60
  def quote(text)
64
61
  text.gsub "'", "''"
65
62
  end
66
63
 
67
-
68
-
69
64
  def external_location
70
65
  @source.files.map do |file_name|
71
66
  "'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
11
11
 
12
12
 
13
13
  def import
14
- external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
14
+ external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
15
15
 
16
16
  external_table.create
17
17
  number_of_items_imported = import_from external_table.name
@@ -0,0 +1,34 @@
1
+ class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
2
+
3
+ def import_from(source_table)
4
+ @source_table = source_table
5
+
6
+ delete_existing_records
7
+ puts @source_table
8
+ database[@source_table].count
9
+ end
10
+
11
+
12
+
13
+ private
14
+
15
+ def delete_existing_records
16
+ database.
17
+ from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
18
+ where(delete_by_fields.qualify keys_with: :source, values_with: :target).
19
+ delete
20
+ end
21
+
22
+
23
+
24
+ def delete_by_fields
25
+ Cranium::Sequel::Hash[delete_field_mapping]
26
+ end
27
+
28
+
29
+
30
+ def delete_field_mapping
31
+ import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
32
+ end
33
+
34
+ end
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
2
2
 
3
3
  autoload :Base, 'cranium/import_strategy/base'
4
4
  autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
5
+ autoload :Delete, 'cranium/import_strategy/delete'
5
6
  autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
6
7
  autoload :Delta, 'cranium/import_strategy/delta'
7
8
  autoload :Merge, 'cranium/import_strategy/merge'
data/repo-info.json ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "is_in_production": true,
3
+ "is_scannable": true,
4
+ "is_critical": false,
5
+ "contact": "g-gsuite-smartinsight@emarsys.com",
6
+ "hosted": null
7
+ }
@@ -1,11 +1,12 @@
1
1
  require_relative '../spec_helper'
2
2
 
3
3
  describe Cranium::DataImporter do
4
+ let(:connection) { double 'a_connection' }
4
5
 
5
6
  before do
6
- connection = double
7
7
  allow(Cranium::Database).to receive(:connection).and_return connection
8
8
  allow(connection).to receive(:transaction).and_yield
9
+ allow(Cranium.application).to receive(:apply_hook).with :after_import
9
10
  end
10
11
 
11
12
  let(:importer) { Cranium::DataImporter.new }
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
13
14
 
14
15
  describe "#import" do
15
16
 
17
+ context "when called with delete_on strategy" do
18
+ it "calls Delete strategy" do
19
+ import_strategy = instance_double Cranium::ImportStrategy::Delete
20
+ allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
21
+ expect(import_strategy).to receive(:import).and_return 0
22
+ definition.delete_on :source_id
23
+
24
+ importer.import definition
25
+ end
26
+ end
27
+
28
+ context "when called with both merge and delete_on fields set" do
29
+ it "should raise an exception" do
30
+ definition.delete_on :source_id
31
+ definition.merge_on :another_field
32
+
33
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
34
+ end
35
+ end
36
+
16
37
  context "when called with both merge and delete_insert fields set" do
17
38
  it "should raise an exception" do
18
39
  definition.delete_insert_on :some_field
19
40
  definition.merge_on :another_field
20
41
 
21
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
42
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
43
  end
23
44
  end
24
45
 
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
27
48
  definition.truncate_insert true
28
49
  definition.merge_on :another_field
29
50
 
30
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
51
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
31
52
  end
32
53
  end
33
54
 
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
36
57
  definition.delete_insert_on :some_field
37
58
  definition.truncate_insert true
38
59
 
39
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
60
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
40
61
  end
41
62
  end
42
63
 
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
46
67
  definition.merge_on :another_field
47
68
  definition.truncate_insert true
48
69
 
49
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
70
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
50
71
  end
51
72
  end
52
73
 
@@ -81,6 +81,40 @@ describe Cranium::Database do
81
81
 
82
82
  expect(database[:dwh]).not_to eq database[:dwh2]
83
83
  end
84
+
85
+ context 'when retry_count is specified' do
86
+ before do
87
+ database.register_database :dwh do
88
+ connect_to "other connection string"
89
+ retry_count 3
90
+ retry_delay 15
91
+ end
92
+ allow(database).to receive(:sleep)
93
+ end
94
+
95
+ it "should retry connecting to the DB the specified number of times" do
96
+ call_count = 0
97
+ allow(Sequel).to receive(:connect) do
98
+ call_count += 1
99
+ call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
100
+ end
101
+
102
+ expect(database[:dwh]).to eq connection
103
+ end
104
+
105
+ it "should not retry connecting to the DB more than the specified number of times" do
106
+ allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
107
+
108
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
109
+ end
110
+
111
+ it "should wait retry_delay seconds between connection attempts" do
112
+ allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
113
+ expect(database).to receive(:sleep).with(15).exactly(3).times
114
+
115
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
116
+ end
117
+ end
84
118
  end
85
119
 
86
120
  end
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
20
20
  end
21
21
  end
22
22
 
23
+
24
+ describe "#retry_count" do
25
+ context 'when not set' do
26
+ it "should return 0 by default" do
27
+ expect(database.retry_count).to eq(0)
28
+ end
29
+ end
30
+
31
+ context 'when set' do
32
+ it "should return the number of retries specified for the database" do
33
+ database.retry_count 3
34
+
35
+ expect(database.retry_count).to eq(3)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ describe "#retry_delay" do
42
+ context 'when not set' do
43
+ it "should return 0 by default" do
44
+ expect(database.retry_delay).to eq(0)
45
+ end
46
+ end
47
+
48
+ context 'when set' do
49
+ it "should return the number of retries specified for the database" do
50
+ database.retry_delay 15
51
+
52
+ expect(database.retry_delay).to eq(15)
53
+ end
54
+ end
55
+ end
56
+
23
57
  end
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
12
12
  end
13
13
  end
14
14
 
15
+ describe "#error_threshold" do
16
+ it "should set the error threshold to the given percentage" do
17
+ import.error_threshold 10
18
+
19
+ expect(import.error_threshold).to eq 10
20
+ end
21
+ end
22
+
15
23
 
16
24
  describe "#name" do
17
25
  it "should return the name of the import definition" do
@@ -1,5 +1,6 @@
1
1
  require_relative '../spec_helper'
2
2
  require 'ostruct'
3
+ require 'date'
3
4
 
4
5
  describe Cranium::ExternalTable do
5
6
 
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
19
20
  source.escape "'"
20
21
  end
21
22
  end
22
- let(:external_table) { Cranium::ExternalTable.new source, connection }
23
23
 
24
+ subject(:external_table) { Cranium::ExternalTable.new source, connection }
24
25
 
25
26
  describe "#create" do
26
- it "should create an external table from the specified source" do
27
+ before do
27
28
  allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
- gpfdist_url: "gpfdist-url",
29
- gpfdist_home_directory: "/gpfdist-home",
30
- upload_directory: "upload-dir"
29
+ gpfdist_url: "gpfdist-url",
30
+ gpfdist_home_directory: "/gpfdist-home",
31
+ upload_directory: "upload-dir"
31
32
  )
32
33
 
33
34
  allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
35
+ end
34
36
 
35
- expect(connection).to receive(:run).with(<<-sql
36
- CREATE EXTERNAL TABLE "external_products" (
37
- "text_field" TEXT,
38
- "integer_field" INTEGER,
39
- "numeric_field" NUMERIC,
40
- "date_field" DATE,
41
- "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
- "boolean_field1" BOOLEAN,
43
- "boolean_field2" BOOLEAN
44
- )
45
- LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
- FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
- ENCODING 'UTF8'
37
+ it "should create an external table from the specified source" do
38
+ expect(connection).to receive(:run).with(<<~sql
39
+ CREATE EXTERNAL TABLE "external_products" (
40
+ "text_field" TEXT,
41
+ "integer_field" INTEGER,
42
+ "numeric_field" NUMERIC,
43
+ "date_field" DATE,
44
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
45
+ "boolean_field1" BOOLEAN,
46
+ "boolean_field2" BOOLEAN
47
+ )
48
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
49
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
50
+ ENCODING 'UTF8'
48
51
  sql
49
52
  )
50
53
 
51
54
  external_table.create
52
55
  end
56
+
57
+ context "with error_threshold argument" do
58
+ subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
59
+
60
+ it "should create an external table from the specified source" do
61
+ expect(connection).to receive(:run).with(<<~sql
62
+ CREATE EXTERNAL TABLE "external_products" (
63
+ "text_field" TEXT,
64
+ "integer_field" INTEGER,
65
+ "numeric_field" NUMERIC,
66
+ "date_field" DATE,
67
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
68
+ "boolean_field1" BOOLEAN,
69
+ "boolean_field2" BOOLEAN
70
+ )
71
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
72
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
73
+ ENCODING 'UTF8'
74
+ SEGMENT REJECT LIMIT 10 PERCENT
75
+ sql
76
+ )
77
+
78
+ external_table.create
79
+ end
80
+ end
53
81
  end
54
82
 
55
83
 
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
67
95
  expect(external_table.name).to eq(:external_products)
68
96
  end
69
97
  end
70
-
71
98
  end
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-03 00:00:00.000000000 Z
11
+ date: 2021-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
@@ -76,30 +76,30 @@ dependencies:
76
76
  name: bundler
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
- - - "~>"
79
+ - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '1'
81
+ version: '0'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - "~>"
86
+ - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '1'
88
+ version: '0'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: rake
91
91
  requirement: !ruby/object:Gem::Requirement
92
92
  requirements:
93
93
  - - "~>"
94
94
  - !ruby/object:Gem::Version
95
- version: '10'
95
+ version: '13'
96
96
  type: :development
97
97
  prerelease: false
98
98
  version_requirements: !ruby/object:Gem::Requirement
99
99
  requirements:
100
100
  - - "~>"
101
101
  - !ruby/object:Gem::Version
102
- version: '10'
102
+ version: '13'
103
103
  - !ruby/object:Gem::Dependency
104
104
  name: rspec
105
105
  requirement: !ruby/object:Gem::Requirement
@@ -162,24 +162,30 @@ email:
162
162
  - smart-insight-dev@emarsys.com
163
163
  executables:
164
164
  - cranium
165
+ - run_tests.sh
165
166
  extensions: []
166
167
  extra_rdoc_files: []
167
168
  files:
168
169
  - ".env"
169
170
  - ".gitignore"
170
171
  - ".rspec"
171
- - ".ruby-version"
172
+ - Dockerfile
172
173
  - Gemfile
173
174
  - LICENSE.txt
174
175
  - README.md
175
176
  - Rakefile
176
177
  - Vagrantfile
177
178
  - bin/cranium
179
+ - bin/run_tests.sh
180
+ - codeship-services.yml
181
+ - codeship-steps.yml
178
182
  - config/cucumber.yml
179
183
  - cranium.gemspec
180
184
  - db/setup.sql
181
185
  - db/start.sh
182
186
  - docker-compose.yml
187
+ - docker/create_databases.sql
188
+ - docker/initialize_greenplum.sh
183
189
  - examples/config.rb
184
190
  - examples/deduplication.rb
185
191
  - examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
@@ -188,6 +194,7 @@ files:
188
194
  - features/archive.feature
189
195
  - features/extract/incremental_extract.feature
190
196
  - features/extract/simple_extract.feature
197
+ - features/import/delete_from_table_based_on_csv.feature
191
198
  - features/import/import_csv_to_database_as_delta.feature
192
199
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
193
200
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -247,6 +254,7 @@ files:
247
254
  - lib/cranium/file_utils.rb
248
255
  - lib/cranium/import_strategy.rb
249
256
  - lib/cranium/import_strategy/base.rb
257
+ - lib/cranium/import_strategy/delete.rb
250
258
  - lib/cranium/import_strategy/delete_insert.rb
251
259
  - lib/cranium/import_strategy/delta.rb
252
260
  - lib/cranium/import_strategy/merge.rb
@@ -271,6 +279,7 @@ files:
271
279
  - lib/cranium/transformation/sequence.rb
272
280
  - lib/cranium/transformation_record.rb
273
281
  - rake/test.rake
282
+ - repo-info.json
274
283
  - spec/cranium/application_spec.rb
275
284
  - spec/cranium/archiver_spec.rb
276
285
  - spec/cranium/command_line_options_spec.rb
@@ -318,8 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
318
327
  - !ruby/object:Gem::Version
319
328
  version: '0'
320
329
  requirements: []
321
- rubyforge_project:
322
- rubygems_version: 2.7.6
330
+ rubygems_version: 3.2.27
323
331
  signing_key:
324
332
  specification_version: 4
325
333
  summary: Pure Ruby ETL framework
@@ -327,6 +335,7 @@ test_files:
327
335
  - features/archive.feature
328
336
  - features/extract/incremental_extract.feature
329
337
  - features/extract/simple_extract.feature
338
+ - features/import/delete_from_table_based_on_csv.feature
330
339
  - features/import/import_csv_to_database_as_delta.feature
331
340
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
332
341
  - features/import/import_csv_to_database_with_truncate_insert.feature
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.5.1