cranium 0.5 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 63bb4574d9b494202c0aff700e4263c1b14753b0b973b9dcb52148ed24a7a861
4
- data.tar.gz: ee3238eee62a1c6ee3cd9d3c69ef7ab19a42133996b5fa6334d1a56a9f2f5ff0
3
+ metadata.gz: 4715c818c4f8f98f7d1a489979c3cf5ac1a2b50114052c6d619526fc8d0a4ca0
4
+ data.tar.gz: 6c619a8f8570cae9ef6c51002db8cf2ea6785983b70e0ace6e28b14f37e72330
5
5
  SHA512:
6
- metadata.gz: 2351552d7be2849c5698df99b977c3c56d54ef376d351125bdb51526b6b826d2f4afecc79292479d0d8308af7edd086fd47213479e693c18ecfe9bc31d40549a
7
- data.tar.gz: a091064e7f4fa3a56a741b4704012ab1e8a5ced7b6dcad0b6826d226dfd06c5520dddfc1a8e8eabbf04dcd7585d7f1bb97801b539f67a3fdb7fbdd063713be4d
6
+ metadata.gz: e12f463f677b70d7765120658c0efcb00da40c92ef9dd321eb4e6804106cf22f1da85505dfcfd1c5baf8fb46f67f5aa3784cccd4bc7214c6ab767f4d46cd5fa9
7
+ data.tar.gz: '080472dd1a1c70907496fdf0978231b82f6b6c8da0baf6c6b0e8488398387ef75a2b2ce95731818dd2c712eff316823ac168fc4063202ad887d662304f4a30a2'
data/.env CHANGED
@@ -1 +1,3 @@
1
- DATABASE_HOST=127.0.0.1
1
+ GREENPLUM_HOST=127.0.0.1
2
+ GPFDIST_HOST=127.0.0.1
3
+
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ *.aes
1
2
  *.gem
2
3
  *.rbc
3
4
  .bundle
@@ -5,6 +6,7 @@
5
6
  .yardoc
6
7
  .vagrant
7
8
  .idea
9
+ .ruby-version
8
10
  Gemfile.lock
9
11
  InstalledFiles
10
12
  _yardoc
data/Dockerfile ADDED
@@ -0,0 +1,22 @@
1
+ FROM ruby:2.7.3-slim
2
+
3
+ RUN apt-get update \
4
+ && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ libpq-dev \
7
+ postgresql-client \
8
+ git-core
9
+
10
+ RUN mkdir /app
11
+ WORKDIR /app
12
+
13
+ RUN gem install bundler
14
+
15
+ COPY Gemfile .
16
+ COPY cranium.gemspec .
17
+
18
+ ARG https_proxy
19
+ ARG http_proxy
20
+ RUN bundle install -j 5
21
+ COPY . .
22
+ RUN mkdir /tmp/custdata/
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Zoltan Ormandi
1
+ Copyright (c) 2013-2020 Emarsys
2
2
 
3
3
  MIT License
4
4
 
data/bin/run_tests.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bash
2
+
3
+ sleep 30
4
+ bundle exec rake
@@ -0,0 +1,24 @@
1
+ cranium:
2
+ build:
3
+ image: cranium
4
+ dockerfile: Dockerfile
5
+ cached: true
6
+
7
+ greenplum:
8
+ image: kevinmtrowbridge/gpdb-docker
9
+ cached: true
10
+ volumes:
11
+ - gpfdist-init:/home/gpadmin/gpfdist-data
12
+ - ./docker/:/docker-entrypoint-initdb.d
13
+ - ./db:/db
14
+ command: sh /docker-entrypoint-initdb.d/initialize_greenplum.sh
15
+
16
+ build-app:
17
+ image: cranium
18
+ depends_on:
19
+ - greenplum
20
+ environment:
21
+ GREENPLUM_HOST: "greenplum"
22
+ GPFDIST_HOST: "127.0.0.1"
23
+ volumes:
24
+ - gpfdist-init:/app/tmp/custdata
@@ -0,0 +1,3 @@
1
+ - name: Run tests
2
+ service: build-app
3
+ command: sh bin/run_tests.sh
data/cranium.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.5'
3
+ spec.version = '0.8.1'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
13
13
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
14
14
  spec.require_paths = ['lib']
15
15
 
16
- spec.add_runtime_dependency 'pg', '~> 0'
16
+ spec.add_runtime_dependency 'pg', '>= 0'
17
17
  spec.add_runtime_dependency 'progressbar', '~> 0'
18
18
  spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
19
19
  spec.add_runtime_dependency 'slop', '~> 3'
20
20
 
21
- spec.add_development_dependency 'bundler', '~> 1'
22
- spec.add_development_dependency 'rake', '~> 10'
21
+ spec.add_development_dependency 'bundler'
22
+ spec.add_development_dependency 'rake', '~> 13'
23
23
  spec.add_development_dependency 'rspec', '~> 3'
24
24
  spec.add_development_dependency 'ruby-prof', '~> 0'
25
25
  spec.add_development_dependency 'cucumber', '~> 1'
@@ -0,0 +1,8 @@
1
+ CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
2
+
3
+ CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
4
+ COMMENT ON ROLE cranium IS 'Cranium test user';
5
+
6
+ CREATE DATABASE cranium WITH OWNER=cranium;
7
+
8
+ CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env bash
2
+
3
+ echo "127.0.0.1 $(cat ~/orig_hostname)" >> /etc/hosts
4
+
5
+ service sshd start
6
+
7
+ su - gpadmin -c /usr/local/bin/run.sh
8
+ su - gpadmin -c 'cat /docker-entrypoint-initdb.d/create_databases.sql | psql -e'
9
+ su - gpadmin -c 'gpfdist -d /home/gpadmin/gpfdist-data -p 8123'
data/docker-compose.yml CHANGED
@@ -1,9 +1,10 @@
1
- greenplum:
2
- image: kevinmtrowbridge/gpdb-docker
3
- ports:
4
- - 5432:5432
5
- - 8123:8123
6
- volumes:
7
- - ./tmp/custdata:/home/gpadmin/gpfdist-data
8
- - ./db:/db
9
- command: sh /db/start.sh
1
+ services:
2
+ greenplum:
3
+ image: kevinmtrowbridge/gpdb-docker
4
+ ports:
5
+ - 5432:5432
6
+ - 8123:8123
7
+ volumes:
8
+ - ./tmp/custdata:/home/gpadmin/gpfdist-data
9
+ - ./db:/db
10
+ command: sh /db/start.sh
@@ -0,0 +1,39 @@
1
+ Feature: Delete rows from table provided by CSV file
2
+
3
+ Scenario: Successful delete
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | source_id | TEXT |
7
+ And only the following rows in the "dim_contact" database table:
8
+ | source_id (i) |
9
+ | 1 |
10
+ | 2 |
11
+ | 3 |
12
+ | 4 |
13
+ | 5 |
14
+ And a "deleted_contacts_extract.csv" data file containing:
15
+ """
16
+ source_id
17
+ 3
18
+ 4
19
+ """
20
+ And the following definition:
21
+ """
22
+ source :deleted_contacts_extract do
23
+ field :source_id, String
24
+ end
25
+
26
+ import :deleted_contacts_extract do
27
+ into :dim_contact
28
+ put :source_id
29
+
30
+ delete_on :source_id
31
+ end
32
+ """
33
+ When I execute the definition
34
+ Then the process should exit successfully
35
+ And the "dim_contact" table should contain:
36
+ | source_id |
37
+ | 1 |
38
+ | 2 |
39
+ | 5 |
@@ -16,7 +16,7 @@ Feature: Sequel database connections are fault tolerant
16
16
  require 'sequel'
17
17
 
18
18
  def terminate_connections
19
- connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['DATABASE_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
19
+ connection = Sequel.connect "postgres://database_administrator:emarsys@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium", loggers: Cranium.configuration.loggers
20
20
  connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
21
21
  end
22
22
 
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
3
3
  end
4
4
 
5
5
 
6
- Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
6
+ Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
7
  database_table(table_name).clear
8
8
  step %Q(the following new rows in the "#{table_name}" database table:), rows
9
9
  end
10
10
 
11
11
 
12
- Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
12
+ Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
13
  database_table(table_name).insert rows.data
14
14
  end
15
15
 
@@ -5,8 +5,8 @@ require_relative "../../lib/cranium"
5
5
  FileUtils.mkdir_p("log") unless Dir.exists?("log")
6
6
 
7
7
  Cranium.configure do |config|
8
- config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['DATABASE_HOST'] || '192.168.56.43'}:5432/cranium"
9
- config.gpfdist_url = "#{ENV['DATABASE_HOST'] || '192.168.56.43'}:8123"
8
+ config.greenplum_connection_string = "postgres://cranium:cranium@#{ENV['GREENPLUM_HOST'] || '192.168.56.43'}:5432/cranium"
9
+ config.gpfdist_url = "#{ ENV['GPFDIST_HOST'] || '192.168.56.43'}:8123"
10
10
  config.gpfdist_home_directory = "tmp/custdata"
11
11
  config.upload_directory = "cranium_build"
12
12
  config.loggers << Logger.new("log/cucumber.log")
@@ -17,14 +17,16 @@ class Cranium::DataImporter
17
17
  private
18
18
 
19
19
  def importer_for_definition(import_definition)
20
- if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
- raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
22
  end
23
23
 
24
24
  if !import_definition.merge_fields.empty?
25
25
  Cranium::ImportStrategy::Merge.new(import_definition)
26
26
  elsif !import_definition.delete_insert_on.empty?
27
27
  Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif !import_definition.delete_on.empty?
29
+ Cranium::ImportStrategy::Delete.new(import_definition)
28
30
  elsif import_definition.truncate_insert
29
31
  Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
32
  else
@@ -13,7 +13,9 @@ module Cranium::Database
13
13
 
14
14
  def self.[](name)
15
15
  @connections ||= {}
16
- @connections[name] ||= setup_connection(@definitions[name].connect_to)
16
+ @connections[name] ||= setup_connection(@definitions[name].connect_to,
17
+ @definitions[name].retry_count,
18
+ @definitions[name].retry_delay)
17
19
  end
18
20
 
19
21
 
@@ -28,15 +30,19 @@ module Cranium::Database
28
30
  private
29
31
 
30
32
 
31
- def self.setup_connection(connection_string)
32
- connection = if Cranium.configuration.log_queries
33
- Sequel.connect(connection_string, loggers: Cranium.configuration.loggers)
34
- else
35
- Sequel.connect(connection_string)
36
- end
37
- connection.extension :connection_validator
38
- connection.pool.connection_validation_timeout = -1
39
- return connection
33
+ def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
34
+ (retry_count + 1).times do |try_count|
35
+ connection = if Cranium.configuration.log_queries
36
+ Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
37
+ else
38
+ Sequel.connect(connection_details)
39
+ end
40
+ connection.extension :connection_validator
41
+ connection.pool.connection_validation_timeout = -1
42
+ break connection
43
+ rescue Sequel::DatabaseConnectionError
44
+ (try_count == retry_count) ? raise : sleep(retry_delay)
45
+ end
40
46
  end
41
47
 
42
48
  end
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
7
7
  attr_reader :name
8
8
 
9
9
  define_attribute :connect_to
10
+ define_attribute :retry_count
11
+ define_attribute :retry_delay
10
12
 
11
13
 
12
14
 
13
15
  def initialize(name)
14
16
  @name = name
17
+ @retry_count = 0
18
+ @retry_delay = 0
15
19
  end
16
20
 
17
21
 
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
10
10
  attr_reader :merge_fields
11
11
 
12
12
  define_attribute :into
13
+ define_attribute :error_threshold
13
14
  define_boolean_attribute :truncate_insert
14
15
  define_array_attribute :delete_insert_on
16
+ define_array_attribute :delete_on
15
17
 
16
18
 
17
19
  def initialize(name)
@@ -1,46 +1,45 @@
1
1
  class Cranium::ExternalTable
2
2
 
3
- def initialize(source, db_connection)
4
- @source, @connection = source, db_connection
3
+ def initialize(source, db_connection, error_threshold: nil)
4
+ @source = source
5
+ @connection = db_connection
6
+ @error_threshold = error_threshold
5
7
  end
6
8
 
7
-
8
-
9
9
  def create
10
- @connection.run <<-sql
11
- CREATE EXTERNAL TABLE "#{name}" (
12
- #{field_definitions}
13
- )
14
- LOCATION (#{external_location})
15
- FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
16
- ENCODING 'UTF8'
17
- sql
10
+ @connection.run external_table_sql
18
11
  end
19
12
 
20
-
21
-
22
13
  def destroy
23
14
  @connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
24
15
  end
25
16
 
26
-
27
-
28
17
  def name
29
18
  :"external_#{@source.name}"
30
19
  end
31
20
 
21
+ private
32
22
 
23
+ def external_table_sql
24
+ external_table_sql = <<~sql
25
+ CREATE EXTERNAL TABLE "#{name}" (
26
+ #{field_definitions}
27
+ )
28
+ LOCATION (#{external_location})
29
+ FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
30
+ ENCODING 'UTF8'
31
+ sql
33
32
 
34
- private
33
+ external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
34
+ external_table_sql
35
+ end
35
36
 
36
37
  def field_definitions
37
38
  @source.fields.map do |name, type|
38
39
  %Q("#{name}" #{sql_type_for_ruby_type(type)})
39
- end.join ",\n "
40
+ end.join ",\n "
40
41
  end
41
42
 
42
-
43
-
44
43
  def sql_type_for_ruby_type(type)
45
44
  case type.to_s
46
45
  when "Integer" then
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
58
57
  end
59
58
  end
60
59
 
61
-
62
-
63
60
  def quote(text)
64
61
  text.gsub "'", "''"
65
62
  end
66
63
 
67
-
68
-
69
64
  def external_location
70
65
  @source.files.map do |file_name|
71
66
  "'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
11
11
 
12
12
 
13
13
  def import
14
- external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
14
+ external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
15
15
 
16
16
  external_table.create
17
17
  number_of_items_imported = import_from external_table.name
@@ -0,0 +1,34 @@
1
+ class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
2
+
3
+ def import_from(source_table)
4
+ @source_table = source_table
5
+
6
+ delete_existing_records
7
+ puts @source_table
8
+ database[@source_table].count
9
+ end
10
+
11
+
12
+
13
+ private
14
+
15
+ def delete_existing_records
16
+ database.
17
+ from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
18
+ where(delete_by_fields.qualify keys_with: :source, values_with: :target).
19
+ delete
20
+ end
21
+
22
+
23
+
24
+ def delete_by_fields
25
+ Cranium::Sequel::Hash[delete_field_mapping]
26
+ end
27
+
28
+
29
+
30
+ def delete_field_mapping
31
+ import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
32
+ end
33
+
34
+ end
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
2
2
 
3
3
  autoload :Base, 'cranium/import_strategy/base'
4
4
  autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
5
+ autoload :Delete, 'cranium/import_strategy/delete'
5
6
  autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
6
7
  autoload :Delta, 'cranium/import_strategy/delta'
7
8
  autoload :Merge, 'cranium/import_strategy/merge'
data/repo-info.json ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "is_in_production": true,
3
+ "is_scannable": true,
4
+ "is_critical": false,
5
+ "contact": "g-gsuite-smartinsight@emarsys.com",
6
+ "hosted": null
7
+ }
@@ -1,11 +1,12 @@
1
1
  require_relative '../spec_helper'
2
2
 
3
3
  describe Cranium::DataImporter do
4
+ let(:connection) { double 'a_connection' }
4
5
 
5
6
  before do
6
- connection = double
7
7
  allow(Cranium::Database).to receive(:connection).and_return connection
8
8
  allow(connection).to receive(:transaction).and_yield
9
+ allow(Cranium.application).to receive(:apply_hook).with :after_import
9
10
  end
10
11
 
11
12
  let(:importer) { Cranium::DataImporter.new }
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
13
14
 
14
15
  describe "#import" do
15
16
 
17
+ context "when called with delete_on strategy" do
18
+ it "calls Delete strategy" do
19
+ import_strategy = instance_double Cranium::ImportStrategy::Delete
20
+ allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
21
+ expect(import_strategy).to receive(:import).and_return 0
22
+ definition.delete_on :source_id
23
+
24
+ importer.import definition
25
+ end
26
+ end
27
+
28
+ context "when called with both merge and delete_on fields set" do
29
+ it "should raise an exception" do
30
+ definition.delete_on :source_id
31
+ definition.merge_on :another_field
32
+
33
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
34
+ end
35
+ end
36
+
16
37
  context "when called with both merge and delete_insert fields set" do
17
38
  it "should raise an exception" do
18
39
  definition.delete_insert_on :some_field
19
40
  definition.merge_on :another_field
20
41
 
21
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
42
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
43
  end
23
44
  end
24
45
 
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
27
48
  definition.truncate_insert true
28
49
  definition.merge_on :another_field
29
50
 
30
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
51
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
31
52
  end
32
53
  end
33
54
 
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
36
57
  definition.delete_insert_on :some_field
37
58
  definition.truncate_insert true
38
59
 
39
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
60
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
40
61
  end
41
62
  end
42
63
 
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
46
67
  definition.merge_on :another_field
47
68
  definition.truncate_insert true
48
69
 
49
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
70
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
50
71
  end
51
72
  end
52
73
 
@@ -81,6 +81,40 @@ describe Cranium::Database do
81
81
 
82
82
  expect(database[:dwh]).not_to eq database[:dwh2]
83
83
  end
84
+
85
+ context 'when retry_count is specified' do
86
+ before do
87
+ database.register_database :dwh do
88
+ connect_to "other connection string"
89
+ retry_count 3
90
+ retry_delay 15
91
+ end
92
+ allow(database).to receive(:sleep)
93
+ end
94
+
95
+ it "should retry connecting to the DB the specified number of times" do
96
+ call_count = 0
97
+ allow(Sequel).to receive(:connect) do
98
+ call_count += 1
99
+ call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
100
+ end
101
+
102
+ expect(database[:dwh]).to eq connection
103
+ end
104
+
105
+ it "should not retry connecting to the DB more than the specified number of times" do
106
+ allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
107
+
108
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
109
+ end
110
+
111
+ it "should wait retry_delay seconds between connection attempts" do
112
+ allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
113
+ expect(database).to receive(:sleep).with(15).exactly(3).times
114
+
115
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
116
+ end
117
+ end
84
118
  end
85
119
 
86
120
  end
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
20
20
  end
21
21
  end
22
22
 
23
+
24
+ describe "#retry_count" do
25
+ context 'when not set' do
26
+ it "should return 0 by default" do
27
+ expect(database.retry_count).to eq(0)
28
+ end
29
+ end
30
+
31
+ context 'when set' do
32
+ it "should return the number of retries specified for the database" do
33
+ database.retry_count 3
34
+
35
+ expect(database.retry_count).to eq(3)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ describe "#retry_delay" do
42
+ context 'when not set' do
43
+ it "should return 0 by default" do
44
+ expect(database.retry_delay).to eq(0)
45
+ end
46
+ end
47
+
48
+ context 'when set' do
49
+ it "should return the number of retries specified for the database" do
50
+ database.retry_delay 15
51
+
52
+ expect(database.retry_delay).to eq(15)
53
+ end
54
+ end
55
+ end
56
+
23
57
  end
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
12
12
  end
13
13
  end
14
14
 
15
+ describe "#error_threshold" do
16
+ it "should set the error threshold to the given percentage" do
17
+ import.error_threshold 10
18
+
19
+ expect(import.error_threshold).to eq 10
20
+ end
21
+ end
22
+
15
23
 
16
24
  describe "#name" do
17
25
  it "should return the name of the import definition" do
@@ -1,5 +1,6 @@
1
1
  require_relative '../spec_helper'
2
2
  require 'ostruct'
3
+ require 'date'
3
4
 
4
5
  describe Cranium::ExternalTable do
5
6
 
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
19
20
  source.escape "'"
20
21
  end
21
22
  end
22
- let(:external_table) { Cranium::ExternalTable.new source, connection }
23
23
 
24
+ subject(:external_table) { Cranium::ExternalTable.new source, connection }
24
25
 
25
26
  describe "#create" do
26
- it "should create an external table from the specified source" do
27
+ before do
27
28
  allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
- gpfdist_url: "gpfdist-url",
29
- gpfdist_home_directory: "/gpfdist-home",
30
- upload_directory: "upload-dir"
29
+ gpfdist_url: "gpfdist-url",
30
+ gpfdist_home_directory: "/gpfdist-home",
31
+ upload_directory: "upload-dir"
31
32
  )
32
33
 
33
34
  allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
35
+ end
34
36
 
35
- expect(connection).to receive(:run).with(<<-sql
36
- CREATE EXTERNAL TABLE "external_products" (
37
- "text_field" TEXT,
38
- "integer_field" INTEGER,
39
- "numeric_field" NUMERIC,
40
- "date_field" DATE,
41
- "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
- "boolean_field1" BOOLEAN,
43
- "boolean_field2" BOOLEAN
44
- )
45
- LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
- FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
- ENCODING 'UTF8'
37
+ it "should create an external table from the specified source" do
38
+ expect(connection).to receive(:run).with(<<~sql
39
+ CREATE EXTERNAL TABLE "external_products" (
40
+ "text_field" TEXT,
41
+ "integer_field" INTEGER,
42
+ "numeric_field" NUMERIC,
43
+ "date_field" DATE,
44
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
45
+ "boolean_field1" BOOLEAN,
46
+ "boolean_field2" BOOLEAN
47
+ )
48
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
49
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
50
+ ENCODING 'UTF8'
48
51
  sql
49
52
  )
50
53
 
51
54
  external_table.create
52
55
  end
56
+
57
+ context "with error_threshold argument" do
58
+ subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
59
+
60
+ it "should create an external table from the specified source" do
61
+ expect(connection).to receive(:run).with(<<~sql
62
+ CREATE EXTERNAL TABLE "external_products" (
63
+ "text_field" TEXT,
64
+ "integer_field" INTEGER,
65
+ "numeric_field" NUMERIC,
66
+ "date_field" DATE,
67
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
68
+ "boolean_field1" BOOLEAN,
69
+ "boolean_field2" BOOLEAN
70
+ )
71
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
72
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
73
+ ENCODING 'UTF8'
74
+ SEGMENT REJECT LIMIT 10 PERCENT
75
+ sql
76
+ )
77
+
78
+ external_table.create
79
+ end
80
+ end
53
81
  end
54
82
 
55
83
 
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
67
95
  expect(external_table.name).to eq(:external_products)
68
96
  end
69
97
  end
70
-
71
98
  end
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-03 00:00:00.000000000 Z
11
+ date: 2021-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
@@ -76,30 +76,30 @@ dependencies:
76
76
  name: bundler
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
- - - "~>"
79
+ - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '1'
81
+ version: '0'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - "~>"
86
+ - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '1'
88
+ version: '0'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: rake
91
91
  requirement: !ruby/object:Gem::Requirement
92
92
  requirements:
93
93
  - - "~>"
94
94
  - !ruby/object:Gem::Version
95
- version: '10'
95
+ version: '13'
96
96
  type: :development
97
97
  prerelease: false
98
98
  version_requirements: !ruby/object:Gem::Requirement
99
99
  requirements:
100
100
  - - "~>"
101
101
  - !ruby/object:Gem::Version
102
- version: '10'
102
+ version: '13'
103
103
  - !ruby/object:Gem::Dependency
104
104
  name: rspec
105
105
  requirement: !ruby/object:Gem::Requirement
@@ -162,24 +162,30 @@ email:
162
162
  - smart-insight-dev@emarsys.com
163
163
  executables:
164
164
  - cranium
165
+ - run_tests.sh
165
166
  extensions: []
166
167
  extra_rdoc_files: []
167
168
  files:
168
169
  - ".env"
169
170
  - ".gitignore"
170
171
  - ".rspec"
171
- - ".ruby-version"
172
+ - Dockerfile
172
173
  - Gemfile
173
174
  - LICENSE.txt
174
175
  - README.md
175
176
  - Rakefile
176
177
  - Vagrantfile
177
178
  - bin/cranium
179
+ - bin/run_tests.sh
180
+ - codeship-services.yml
181
+ - codeship-steps.yml
178
182
  - config/cucumber.yml
179
183
  - cranium.gemspec
180
184
  - db/setup.sql
181
185
  - db/start.sh
182
186
  - docker-compose.yml
187
+ - docker/create_databases.sql
188
+ - docker/initialize_greenplum.sh
183
189
  - examples/config.rb
184
190
  - examples/deduplication.rb
185
191
  - examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
@@ -188,6 +194,7 @@ files:
188
194
  - features/archive.feature
189
195
  - features/extract/incremental_extract.feature
190
196
  - features/extract/simple_extract.feature
197
+ - features/import/delete_from_table_based_on_csv.feature
191
198
  - features/import/import_csv_to_database_as_delta.feature
192
199
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
193
200
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -247,6 +254,7 @@ files:
247
254
  - lib/cranium/file_utils.rb
248
255
  - lib/cranium/import_strategy.rb
249
256
  - lib/cranium/import_strategy/base.rb
257
+ - lib/cranium/import_strategy/delete.rb
250
258
  - lib/cranium/import_strategy/delete_insert.rb
251
259
  - lib/cranium/import_strategy/delta.rb
252
260
  - lib/cranium/import_strategy/merge.rb
@@ -271,6 +279,7 @@ files:
271
279
  - lib/cranium/transformation/sequence.rb
272
280
  - lib/cranium/transformation_record.rb
273
281
  - rake/test.rake
282
+ - repo-info.json
274
283
  - spec/cranium/application_spec.rb
275
284
  - spec/cranium/archiver_spec.rb
276
285
  - spec/cranium/command_line_options_spec.rb
@@ -318,8 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
318
327
  - !ruby/object:Gem::Version
319
328
  version: '0'
320
329
  requirements: []
321
- rubyforge_project:
322
- rubygems_version: 2.7.6
330
+ rubygems_version: 3.2.27
323
331
  signing_key:
324
332
  specification_version: 4
325
333
  summary: Pure Ruby ETL framework
@@ -327,6 +335,7 @@ test_files:
327
335
  - features/archive.feature
328
336
  - features/extract/incremental_extract.feature
329
337
  - features/extract/simple_extract.feature
338
+ - features/import/delete_from_table_based_on_csv.feature
330
339
  - features/import/import_csv_to_database_as_delta.feature
331
340
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
332
341
  - features/import/import_csv_to_database_with_truncate_insert.feature
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.5.1