cranium 0.4.2 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5b878ec72b1afbd62b6d0c576653c4d5e5b1a0ee
4
- data.tar.gz: 41b765f1e0cf25648cf30cee548555b7f0ffaaaf
2
+ SHA256:
3
+ metadata.gz: 3d6f7888fe46d32c156a48c081e57ddf8942d483b6a42c1b69faa4e0f276a128
4
+ data.tar.gz: 9f49d912ca7cf12a8d3e90e1e6d00565099f4bf0fe4cee90e29f439c345a86df
5
5
  SHA512:
6
- metadata.gz: b52aa636e49d51d48fc86397330527fe31c40214a055ef7c405befe1e5b53fe255064259de4857a57e0b8374ae111ce5e66a3d677878b010502f5d8faadc9f3d
7
- data.tar.gz: 98d0e5a309f127c50e7910c5510908dfb5e71a6f4f347f804c0118bb21a680bf45fb64b059b30866febb81e827dac8f65ea95a2830ea3826927d47e54f773c68
6
+ metadata.gz: fb4ed093460aaf4eddde16e0ed1236e34d232ef128047683111459a836c7c544a4c5fe3ad5d05abb6609927affaf286c88f8269695153f21c1513b4c802445ea
7
+ data.tar.gz: 65730273e55b87a3ba2b35f303f21613af3e7cf9328a8f7612cee60c37c54e606f2d50a3891597275ea084ce4405bc445c3d7277d2f5cb3d31b7be1ae805a740
data/.env ADDED
@@ -0,0 +1 @@
1
+ DATABASE_HOST=127.0.0.1
data/.gitignore CHANGED
@@ -5,6 +5,7 @@
5
5
  .yardoc
6
6
  .vagrant
7
7
  .idea
8
+ .ruby-version
8
9
  Gemfile.lock
9
10
  InstalledFiles
10
11
  _yardoc
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --color
2
+ --require spec_helper
3
+ --order random
4
+ --fail-fast
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Zoltan Ormandi
1
+ Copyright (c) 2013-2020 Emarsys
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -18,21 +18,9 @@ Or install it yourself as:
18
18
 
19
19
  ## Development
20
20
 
21
- start up the db
22
-
23
- docker-compose create && docker-compose start
24
-
25
- find out what's the ip is (in case you're using native docker)
26
-
27
- docker-compose ps
28
-
29
- (if using docker-machine use the machine's ip)
30
- setup the DATABASE_HOST enviroment variable to this IP (192.168.64.4 in my case)
31
-
32
- export DATABASE_HOST=192.168.64.4
33
-
34
- Now, your ready to run the integration tests :)
21
+ start up the db:
35
22
 
23
+ $ docker-compose up -d
36
24
 
37
25
  ## Contributing
38
26
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.4.2'
3
+ spec.version = '0.8'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
 
16
16
  spec.add_runtime_dependency 'pg', '~> 0'
17
17
  spec.add_runtime_dependency 'progressbar', '~> 0'
18
- spec.add_runtime_dependency 'sequel', '~> 4'
18
+ spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
19
19
  spec.add_runtime_dependency 'slop', '~> 3'
20
20
 
21
21
  spec.add_development_dependency 'bundler', '~> 1'
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency 'rspec', '~> 3'
24
24
  spec.add_development_dependency 'ruby-prof', '~> 0'
25
25
  spec.add_development_dependency 'cucumber', '~> 1'
26
+ spec.add_development_dependency 'dotenv', '~> 2.5'
26
27
  end
@@ -2,6 +2,7 @@ Feature: Archive source files
2
2
 
3
3
  Scenario:
4
4
  Given no "/tmp/cranium_archive" directory
5
+ And no "/tmp/cranium_storage" directory
5
6
  And a "products_1.csv" data file containing:
6
7
  """
7
8
  """
@@ -39,6 +40,8 @@ Feature: Archive source files
39
40
  end
40
41
 
41
42
  archive :products, :contacts
43
+
44
+ move :purchases, to: "/tmp/cranium_storage"
42
45
  """
43
46
  When I execute the definition
44
47
  Then the process should exit successfully
@@ -47,3 +50,6 @@ Feature: Archive source files
47
50
  | .*contacts.csv |
48
51
  | .*products_1.csv |
49
52
  | .*products_2.csv |
53
+ And the "/tmp/cranium_storage" directory should contain the following files:
54
+ | filename |
55
+ | purchases.csv |
@@ -0,0 +1,39 @@
1
+ Feature: Delete rows from table provided by CSV file
2
+
3
+ Scenario: Successful delete
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | source_id | TEXT |
7
+ And only the following rows in the "dim_contact" database table:
8
+ | source_id (i) |
9
+ | 1 |
10
+ | 2 |
11
+ | 3 |
12
+ | 4 |
13
+ | 5 |
14
+ And a "deleted_contacts_extract.csv" data file containing:
15
+ """
16
+ source_id
17
+ 3
18
+ 4
19
+ """
20
+ And the following definition:
21
+ """
22
+ source :deleted_contacts_extract do
23
+ field :source_id, String
24
+ end
25
+
26
+ import :deleted_contacts_extract do
27
+ into :dim_contact
28
+ put :source_id
29
+
30
+ delete_on :source_id
31
+ end
32
+ """
33
+ When I execute the definition
34
+ Then the process should exit successfully
35
+ And the "dim_contact" table should contain:
36
+ | source_id |
37
+ | 1 |
38
+ | 2 |
39
+ | 5 |
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
3
3
  end
4
4
 
5
5
 
6
- Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
6
+ Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
7
  database_table(table_name).clear
8
8
  step %Q(the following new rows in the "#{table_name}" database table:), rows
9
9
  end
10
10
 
11
11
 
12
- Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
12
+ Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
13
  database_table(table_name).insert rows.data
14
14
  end
15
15
 
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'dotenv/load'
2
3
  require_relative "../../lib/cranium"
3
4
 
4
5
  FileUtils.mkdir_p("log") unless Dir.exists?("log")
@@ -1,36 +1,37 @@
1
1
  require 'fileutils'
2
2
 
3
3
  module Cranium::Archiver
4
-
5
- def self.archive(*files)
6
- create_archive_directory
7
- archive_files files
8
- end
9
-
10
-
11
-
12
- def self.remove(*files)
13
- files.each do |file_name|
14
- FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
4
+ class << self
5
+ def archive(*files)
6
+ create_directory(Cranium.configuration.archive_directory)
7
+ archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
8
+ move_files_from_upload_directory(files, Cranium.configuration.archive_directory, prefix: "#{archive_datetime}_")
15
9
  end
16
- end
17
-
18
10
 
11
+ def remove(*files)
12
+ files.each do |file_name|
13
+ FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
14
+ end
15
+ end
19
16
 
20
- private
21
-
22
- def self.create_archive_directory
23
- FileUtils.mkpath Cranium.configuration.archive_directory unless Dir.exists? Cranium.configuration.archive_directory
24
- end
17
+ def move(*files, target_directory:)
18
+ create_directory(target_directory)
19
+ move_files_from_upload_directory(files, target_directory)
20
+ end
25
21
 
22
+ private
26
23
 
24
+ def create_directory(path)
25
+ FileUtils.mkdir_p(path)
26
+ end
27
27
 
28
- def self.archive_files(files)
29
- archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
30
- files.each do |file_name|
31
- FileUtils.mv File.join(Cranium.configuration.upload_path, file_name),
32
- File.join(Cranium.configuration.archive_directory, "#{archive_datetime}_#{file_name}")
28
+ def move_files_from_upload_directory(files, target_directory, prefix: "")
29
+ files.each do |file_name|
30
+ FileUtils.mv(
31
+ File.join(Cranium.configuration.upload_path, file_name),
32
+ File.join(target_directory, "#{prefix}#{file_name}")
33
+ )
34
+ end
33
35
  end
34
36
  end
35
-
36
37
  end
@@ -17,14 +17,16 @@ class Cranium::DataImporter
17
17
  private
18
18
 
19
19
  def importer_for_definition(import_definition)
20
- if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
- raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
22
  end
23
23
 
24
24
  if !import_definition.merge_fields.empty?
25
25
  Cranium::ImportStrategy::Merge.new(import_definition)
26
26
  elsif !import_definition.delete_insert_on.empty?
27
27
  Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif !import_definition.delete_on.empty?
29
+ Cranium::ImportStrategy::Delete.new(import_definition)
28
30
  elsif import_definition.truncate_insert
29
31
  Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
32
  else
@@ -13,7 +13,9 @@ module Cranium::Database
13
13
 
14
14
  def self.[](name)
15
15
  @connections ||= {}
16
- @connections[name] ||= setup_connection(@definitions[name].connect_to)
16
+ @connections[name] ||= setup_connection(@definitions[name].connect_to,
17
+ @definitions[name].retry_count,
18
+ @definitions[name].retry_delay)
17
19
  end
18
20
 
19
21
 
@@ -28,15 +30,19 @@ module Cranium::Database
28
30
  private
29
31
 
30
32
 
31
- def self.setup_connection(connection_string)
32
- connection = if Cranium.configuration.log_queries
33
- Sequel.connect(connection_string, loggers: Cranium.configuration.loggers)
34
- else
35
- Sequel.connect(connection_string)
36
- end
37
- connection.extension :connection_validator
38
- connection.pool.connection_validation_timeout = -1
39
- return connection
33
+ def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
34
+ (retry_count + 1).times do |try_count|
35
+ connection = if Cranium.configuration.log_queries
36
+ Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
37
+ else
38
+ Sequel.connect(connection_details)
39
+ end
40
+ connection.extension :connection_validator
41
+ connection.pool.connection_validation_timeout = -1
42
+ break connection
43
+ rescue Sequel::DatabaseConnectionError
44
+ (try_count == retry_count) ? raise : sleep(retry_delay)
45
+ end
40
46
  end
41
47
 
42
48
  end
@@ -87,6 +87,14 @@ module Cranium::DSL
87
87
 
88
88
 
89
89
 
90
+ def move(*sources, to: "")
91
+ sources.each do |source_name|
92
+ Cranium::Archiver.move *Cranium.application.sources[source_name].files, target_directory: to
93
+ end
94
+ end
95
+
96
+
97
+
90
98
  def sequence(name)
91
99
  Cranium::Transformation::Sequence.new name
92
100
  end
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
7
7
  attr_reader :name
8
8
 
9
9
  define_attribute :connect_to
10
+ define_attribute :retry_count
11
+ define_attribute :retry_delay
10
12
 
11
13
 
12
14
 
13
15
  def initialize(name)
14
16
  @name = name
17
+ @retry_count = 0
18
+ @retry_delay = 0
15
19
  end
16
20
 
17
21
 
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
10
10
  attr_reader :merge_fields
11
11
 
12
12
  define_attribute :into
13
+ define_attribute :error_threshold
13
14
  define_boolean_attribute :truncate_insert
14
15
  define_array_attribute :delete_insert_on
16
+ define_array_attribute :delete_on
15
17
 
16
18
 
17
19
  def initialize(name)
@@ -1,46 +1,45 @@
1
1
  class Cranium::ExternalTable
2
2
 
3
- def initialize(source, db_connection)
4
- @source, @connection = source, db_connection
3
+ def initialize(source, db_connection, error_threshold: nil)
4
+ @source = source
5
+ @connection = db_connection
6
+ @error_threshold = error_threshold
5
7
  end
6
8
 
7
-
8
-
9
9
  def create
10
- @connection.run <<-sql
11
- CREATE EXTERNAL TABLE "#{name}" (
12
- #{field_definitions}
13
- )
14
- LOCATION (#{external_location})
15
- FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
16
- ENCODING 'UTF8'
17
- sql
10
+ @connection.run external_table_sql
18
11
  end
19
12
 
20
-
21
-
22
13
  def destroy
23
14
  @connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
24
15
  end
25
16
 
26
-
27
-
28
17
  def name
29
18
  :"external_#{@source.name}"
30
19
  end
31
20
 
21
+ private
32
22
 
23
+ def external_table_sql
24
+ external_table_sql = <<~sql
25
+ CREATE EXTERNAL TABLE "#{name}" (
26
+ #{field_definitions}
27
+ )
28
+ LOCATION (#{external_location})
29
+ FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
30
+ ENCODING 'UTF8'
31
+ sql
33
32
 
34
- private
33
+ external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
34
+ external_table_sql
35
+ end
35
36
 
36
37
  def field_definitions
37
38
  @source.fields.map do |name, type|
38
39
  %Q("#{name}" #{sql_type_for_ruby_type(type)})
39
- end.join ",\n "
40
+ end.join ",\n "
40
41
  end
41
42
 
42
-
43
-
44
43
  def sql_type_for_ruby_type(type)
45
44
  case type.to_s
46
45
  when "Integer" then
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
58
57
  end
59
58
  end
60
59
 
61
-
62
-
63
60
  def quote(text)
64
61
  text.gsub "'", "''"
65
62
  end
66
63
 
67
-
68
-
69
64
  def external_location
70
65
  @source.files.map do |file_name|
71
66
  "'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
2
2
 
3
3
  autoload :Base, 'cranium/import_strategy/base'
4
4
  autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
5
+ autoload :Delete, 'cranium/import_strategy/delete'
5
6
  autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
6
7
  autoload :Delta, 'cranium/import_strategy/delta'
7
8
  autoload :Merge, 'cranium/import_strategy/merge'
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
11
11
 
12
12
 
13
13
  def import
14
- external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
14
+ external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
15
15
 
16
16
  external_table.create
17
17
  number_of_items_imported = import_from external_table.name
@@ -0,0 +1,34 @@
1
+ class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
2
+
3
+ def import_from(source_table)
4
+ @source_table = source_table
5
+
6
+ delete_existing_records
7
+ puts @source_table
8
+ database[@source_table].count
9
+ end
10
+
11
+
12
+
13
+ private
14
+
15
+ def delete_existing_records
16
+ database.
17
+ from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
18
+ where(delete_by_fields.qualify keys_with: :source, values_with: :target).
19
+ delete
20
+ end
21
+
22
+
23
+
24
+ def delete_by_fields
25
+ Cranium::Sequel::Hash[delete_field_mapping]
26
+ end
27
+
28
+
29
+
30
+ def delete_field_mapping
31
+ import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
32
+ end
33
+
34
+ end
@@ -22,8 +22,9 @@ class Cranium::TestFramework::World
22
22
 
23
23
 
24
24
  def save_definition(definition)
25
- config = <<-config_string
25
+ config = <<~config_string
26
26
  require 'logger'
27
+ require 'date'
27
28
 
28
29
  Cranium.configure do |config|
29
30
  config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
@@ -32,6 +33,7 @@ class Cranium::TestFramework::World
32
33
  config.upload_directory = "#{Cranium.configuration.upload_directory}"
33
34
  config.loggers << Logger.new("log/application.log")
34
35
  end
36
+
35
37
  config_string
36
38
 
37
39
  upload_directory.save_file DEFINITION_FILE, config + definition
@@ -1,44 +1,74 @@
1
- require_relative '../spec_helper'
1
+ RSpec.describe Cranium::Archiver do
2
+ subject(:archiver) { described_class }
2
3
 
3
- describe Cranium::Archiver do
4
-
5
- before(:each) do
6
- allow(Cranium).to receive_messages(configuration: Cranium::Configuration.new.tap do |config|
7
- config.gpfdist_home_directory = "gpfdist_home"
8
- config.upload_directory = "upload_dir"
9
- config.archive_directory = "path/to/archive"
10
- end)
4
+ let(:configuration) do
5
+ Cranium::Configuration.new.tap do |config|
6
+ config.gpfdist_home_directory = "tmp"
7
+ config.upload_directory = "upload_directory"
8
+ config.archive_directory = "tmp/archive_directory"
9
+ end
11
10
  end
11
+ let(:file1) { "file.txt" }
12
+ let(:file2) { "another_file.txt" }
12
13
 
14
+ before do
15
+ allow(Cranium).to receive_messages(configuration: configuration)
16
+
17
+ FileUtils.mkdir_p(configuration.upload_path)
18
+ FileUtils.touch(File.join(configuration.upload_path, file1))
19
+ FileUtils.touch(File.join(configuration.upload_path, file2))
20
+ end
13
21
 
14
22
  describe ".archive" do
15
- it "should create the archive directory if it doesn't exist" do
16
- allow(Dir).to receive(:exists?).with("path/to/archive").and_return(false)
23
+ context "when archive directory does not exist" do
24
+ before { FileUtils.rm_rf configuration.archive_directory }
17
25
 
18
- expect(FileUtils).to receive(:mkpath).with "path/to/archive"
26
+ it "creates the archive directory" do
27
+ archiver.archive file1, file2
19
28
 
20
- Cranium::Archiver.archive
29
+ expect(File.exists?(configuration.archive_directory)).to eq true
30
+ end
21
31
  end
22
32
 
23
- it "should move files to the archive directory" do
24
- allow(Dir).to receive(:exists?).with("path/to/archive").and_return(true)
25
- allow(Time).to receive(:now).and_return Time.new(2000, 1, 1, 1, 2, 3)
33
+ context "when there are some file in the upload directory" do
34
+ it "moves files to the archive directory" do
35
+ archiver.archive file1, file2
26
36
 
27
- expect(FileUtils).to receive(:mv).with "gpfdist_home/upload_dir/file.txt", "path/to/archive/2000-01-01_01h02m03s_file.txt"
28
- expect(FileUtils).to receive(:mv).with "gpfdist_home/upload_dir/another_file.txt", "path/to/archive/2000-01-01_01h02m03s_another_file.txt"
29
-
30
- Cranium::Archiver.archive "file.txt", "another_file.txt"
37
+ expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
38
+ expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
39
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
40
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
41
+ end
31
42
  end
32
43
  end
33
44
 
34
-
35
45
  describe ".remove" do
36
- it "should remove files from the upload directory" do
37
- expect(FileUtils).to receive(:rm).with "gpfdist_home/upload_dir/file.txt"
38
- expect(FileUtils).to receive(:rm).with "gpfdist_home/upload_dir/another_file.txt"
46
+ before { FileUtils.mkdir_p configuration.archive_directory }
47
+
48
+ it "removes files from the upload directory" do
49
+ archiver.remove file1, file2
39
50
 
40
- Cranium::Archiver.remove "file.txt", "another_file.txt"
51
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
52
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
41
53
  end
42
54
  end
43
55
 
56
+ describe ".move" do
57
+ let(:target_directory) { "tmp/target_directory" }
58
+
59
+ it "creates given directory if it does not exist" do
60
+ archiver.move(file1, file2, target_directory: target_directory)
61
+
62
+ expect(File.exists?(target_directory)).to eq true
63
+ end
64
+
65
+ it "moves files from upload directory into a given directory" do
66
+ archiver.move(file1, file2, target_directory: target_directory)
67
+
68
+ expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
69
+ expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
70
+ expect(File.exist?(File.join(target_directory, file1))).to eq true
71
+ expect(File.exist?(File.join(target_directory, file2))).to eq true
72
+ end
73
+ end
44
74
  end
@@ -1,11 +1,12 @@
1
1
  require_relative '../spec_helper'
2
2
 
3
3
  describe Cranium::DataImporter do
4
+ let(:connection) { double 'a_connection' }
4
5
 
5
6
  before do
6
- connection = double
7
7
  allow(Cranium::Database).to receive(:connection).and_return connection
8
8
  allow(connection).to receive(:transaction).and_yield
9
+ allow(Cranium.application).to receive(:apply_hook).with :after_import
9
10
  end
10
11
 
11
12
  let(:importer) { Cranium::DataImporter.new }
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
13
14
 
14
15
  describe "#import" do
15
16
 
17
+ context "when called with delete_on strategy" do
18
+ it "calls Delete strategy" do
19
+ import_strategy = instance_double Cranium::ImportStrategy::Delete
20
+ allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
21
+ expect(import_strategy).to receive(:import).and_return 0
22
+ definition.delete_on :source_id
23
+
24
+ importer.import definition
25
+ end
26
+ end
27
+
28
+ context "when called with both merge and delete_on fields set" do
29
+ it "should raise an exception" do
30
+ definition.delete_on :source_id
31
+ definition.merge_on :another_field
32
+
33
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
34
+ end
35
+ end
36
+
16
37
  context "when called with both merge and delete_insert fields set" do
17
38
  it "should raise an exception" do
18
39
  definition.delete_insert_on :some_field
19
40
  definition.merge_on :another_field
20
41
 
21
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
42
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
43
  end
23
44
  end
24
45
 
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
27
48
  definition.truncate_insert true
28
49
  definition.merge_on :another_field
29
50
 
30
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
51
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
31
52
  end
32
53
  end
33
54
 
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
36
57
  definition.delete_insert_on :some_field
37
58
  definition.truncate_insert true
38
59
 
39
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
60
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
40
61
  end
41
62
  end
42
63
 
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
46
67
  definition.merge_on :another_field
47
68
  definition.truncate_insert true
48
69
 
49
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
70
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
50
71
  end
51
72
  end
52
73
 
@@ -81,6 +81,40 @@ describe Cranium::Database do
81
81
 
82
82
  expect(database[:dwh]).not_to eq database[:dwh2]
83
83
  end
84
+
85
+ context 'when retry_count is specified' do
86
+ before do
87
+ database.register_database :dwh do
88
+ connect_to "other connection string"
89
+ retry_count 3
90
+ retry_delay 15
91
+ end
92
+ allow(database).to receive(:sleep)
93
+ end
94
+
95
+ it "should retry connecting to the DB the specified number of times" do
96
+ call_count = 0
97
+ allow(Sequel).to receive(:connect) do
98
+ call_count += 1
99
+ call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
100
+ end
101
+
102
+ expect(database[:dwh]).to eq connection
103
+ end
104
+
105
+ it "should not retry connecting to the DB more than the specified number of times" do
106
+ allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
107
+
108
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
109
+ end
110
+
111
+ it "should wait retry_delay seconds between connection attempts" do
112
+ allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
113
+ expect(database).to receive(:sleep).with(15).exactly(3).times
114
+
115
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
116
+ end
117
+ end
84
118
  end
85
119
 
86
120
  end
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
20
20
  end
21
21
  end
22
22
 
23
+
24
+ describe "#retry_count" do
25
+ context 'when not set' do
26
+ it "should return 0 by default" do
27
+ expect(database.retry_count).to eq(0)
28
+ end
29
+ end
30
+
31
+ context 'when set' do
32
+ it "should return the number of retries specified for the database" do
33
+ database.retry_count 3
34
+
35
+ expect(database.retry_count).to eq(3)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ describe "#retry_delay" do
42
+ context 'when not set' do
43
+ it "should return 0 by default" do
44
+ expect(database.retry_delay).to eq(0)
45
+ end
46
+ end
47
+
48
+ context 'when set' do
49
+ it "should return the number of retries specified for the database" do
50
+ database.retry_delay 15
51
+
52
+ expect(database.retry_delay).to eq(15)
53
+ end
54
+ end
55
+ end
56
+
23
57
  end
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
12
12
  end
13
13
  end
14
14
 
15
+ describe "#error_threshold" do
16
+ it "should set the error threshold to the given percentage" do
17
+ import.error_threshold 10
18
+
19
+ expect(import.error_threshold).to eq 10
20
+ end
21
+ end
22
+
15
23
 
16
24
  describe "#name" do
17
25
  it "should return the name of the import definition" do
@@ -38,11 +38,11 @@ describe Cranium::DSL::SourceDefinition do
38
38
 
39
39
  it "should return the fields and types that were set" do
40
40
  source.field :field1, String
41
- source.field :field2, Fixnum
41
+ source.field :field2, Integer
42
42
 
43
43
  expect(source.fields).to eq({
44
44
  field1: String,
45
- field2: Fixnum
45
+ field2: Integer
46
46
  })
47
47
  end
48
48
  end
@@ -97,6 +97,22 @@ describe Cranium::DSL do
97
97
  end
98
98
 
99
99
 
100
+ describe "#move" do
101
+ let(:target_directory) { "/tmp/target" }
102
+
103
+ it "should move files for the specified sources" do
104
+ allow(Cranium.application).to receive_messages sources: {first_source: double(files: ["file1", "file2"]),
105
+ second_source: double(files: ["file3"]),
106
+ third_source: double(files: ["file4"])}
107
+
108
+ expect(Cranium::Archiver).to receive(:move).with "file1", "file2", target_directory: target_directory
109
+ expect(Cranium::Archiver).to receive(:move).with "file3", target_directory: target_directory
110
+
111
+ dsl_object.move :first_source, :second_source, to: target_directory
112
+ end
113
+ end
114
+
115
+
100
116
  describe "#sequence" do
101
117
  it "should return a sequence with the specified name" do
102
118
  result = dsl_object.sequence "test_sequence"
@@ -1,5 +1,6 @@
1
1
  require_relative '../spec_helper'
2
2
  require 'ostruct'
3
+ require 'date'
3
4
 
4
5
  describe Cranium::ExternalTable do
5
6
 
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
19
20
  source.escape "'"
20
21
  end
21
22
  end
22
- let(:external_table) { Cranium::ExternalTable.new source, connection }
23
23
 
24
+ subject(:external_table) { Cranium::ExternalTable.new source, connection }
24
25
 
25
26
  describe "#create" do
26
- it "should create an external table from the specified source" do
27
+ before do
27
28
  allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
- gpfdist_url: "gpfdist-url",
29
- gpfdist_home_directory: "/gpfdist-home",
30
- upload_directory: "upload-dir"
29
+ gpfdist_url: "gpfdist-url",
30
+ gpfdist_home_directory: "/gpfdist-home",
31
+ upload_directory: "upload-dir"
31
32
  )
32
33
 
33
34
  allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
35
+ end
34
36
 
35
- expect(connection).to receive(:run).with(<<-sql
36
- CREATE EXTERNAL TABLE "external_products" (
37
- "text_field" TEXT,
38
- "integer_field" INTEGER,
39
- "numeric_field" NUMERIC,
40
- "date_field" DATE,
41
- "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
- "boolean_field1" BOOLEAN,
43
- "boolean_field2" BOOLEAN
44
- )
45
- LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
- FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
- ENCODING 'UTF8'
37
+ it "should create an external table from the specified source" do
38
+ expect(connection).to receive(:run).with(<<~sql
39
+ CREATE EXTERNAL TABLE "external_products" (
40
+ "text_field" TEXT,
41
+ "integer_field" INTEGER,
42
+ "numeric_field" NUMERIC,
43
+ "date_field" DATE,
44
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
45
+ "boolean_field1" BOOLEAN,
46
+ "boolean_field2" BOOLEAN
47
+ )
48
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
49
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
50
+ ENCODING 'UTF8'
48
51
  sql
49
52
  )
50
53
 
51
54
  external_table.create
52
55
  end
56
+
57
+ context "with error_threshold argument" do
58
+ subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
59
+
60
+ it "should create an external table from the specified source" do
61
+ expect(connection).to receive(:run).with(<<~sql
62
+ CREATE EXTERNAL TABLE "external_products" (
63
+ "text_field" TEXT,
64
+ "integer_field" INTEGER,
65
+ "numeric_field" NUMERIC,
66
+ "date_field" DATE,
67
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
68
+ "boolean_field1" BOOLEAN,
69
+ "boolean_field2" BOOLEAN
70
+ )
71
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
72
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
73
+ ENCODING 'UTF8'
74
+ SEGMENT REJECT LIMIT 10 PERCENT
75
+ sql
76
+ )
77
+
78
+ external_table.create
79
+ end
80
+ end
53
81
  end
54
82
 
55
83
 
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
67
95
  expect(external_table.name).to eq(:external_products)
68
96
  end
69
97
  end
70
-
71
98
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-01 00:00:00.000000000 Z
11
+ date: 2020-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -42,16 +42,22 @@ dependencies:
42
42
  name: sequel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '4'
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: '6'
48
51
  type: :runtime
49
52
  prerelease: false
50
53
  version_requirements: !ruby/object:Gem::Requirement
51
54
  requirements:
52
- - - "~>"
55
+ - - ">="
53
56
  - !ruby/object:Gem::Version
54
57
  version: '4'
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: '6'
55
61
  - !ruby/object:Gem::Dependency
56
62
  name: slop
57
63
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +142,20 @@ dependencies:
136
142
  - - "~>"
137
143
  - !ruby/object:Gem::Version
138
144
  version: '1'
145
+ - !ruby/object:Gem::Dependency
146
+ name: dotenv
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '2.5'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '2.5'
139
159
  description: Provides Extract, Transform and Load functionality for loading data from
140
160
  CSV files to a Greenplum database.
141
161
  email:
@@ -145,8 +165,9 @@ executables:
145
165
  extensions: []
146
166
  extra_rdoc_files: []
147
167
  files:
168
+ - ".env"
148
169
  - ".gitignore"
149
- - ".ruby-version"
170
+ - ".rspec"
150
171
  - Gemfile
151
172
  - LICENSE.txt
152
173
  - README.md
@@ -166,6 +187,7 @@ files:
166
187
  - features/archive.feature
167
188
  - features/extract/incremental_extract.feature
168
189
  - features/extract/simple_extract.feature
190
+ - features/import/delete_from_table_based_on_csv.feature
169
191
  - features/import/import_csv_to_database_as_delta.feature
170
192
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
171
193
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -225,6 +247,7 @@ files:
225
247
  - lib/cranium/file_utils.rb
226
248
  - lib/cranium/import_strategy.rb
227
249
  - lib/cranium/import_strategy/base.rb
250
+ - lib/cranium/import_strategy/delete.rb
228
251
  - lib/cranium/import_strategy/delete_insert.rb
229
252
  - lib/cranium/import_strategy/delta.rb
230
253
  - lib/cranium/import_strategy/merge.rb
@@ -296,8 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
319
  - !ruby/object:Gem::Version
297
320
  version: '0'
298
321
  requirements: []
299
- rubyforge_project:
300
- rubygems_version: 2.6.4
322
+ rubygems_version: 3.0.3
301
323
  signing_key:
302
324
  specification_version: 4
303
325
  summary: Pure Ruby ETL framework
@@ -305,6 +327,7 @@ test_files:
305
327
  - features/archive.feature
306
328
  - features/extract/incremental_extract.feature
307
329
  - features/extract/simple_extract.feature
330
+ - features/import/delete_from_table_based_on_csv.feature
308
331
  - features/import/import_csv_to_database_as_delta.feature
309
332
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
310
333
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -1 +0,0 @@
1
- 2.3.0