cranium 0.4.2 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5b878ec72b1afbd62b6d0c576653c4d5e5b1a0ee
4
- data.tar.gz: 41b765f1e0cf25648cf30cee548555b7f0ffaaaf
2
+ SHA256:
3
+ metadata.gz: 3d6f7888fe46d32c156a48c081e57ddf8942d483b6a42c1b69faa4e0f276a128
4
+ data.tar.gz: 9f49d912ca7cf12a8d3e90e1e6d00565099f4bf0fe4cee90e29f439c345a86df
5
5
  SHA512:
6
- metadata.gz: b52aa636e49d51d48fc86397330527fe31c40214a055ef7c405befe1e5b53fe255064259de4857a57e0b8374ae111ce5e66a3d677878b010502f5d8faadc9f3d
7
- data.tar.gz: 98d0e5a309f127c50e7910c5510908dfb5e71a6f4f347f804c0118bb21a680bf45fb64b059b30866febb81e827dac8f65ea95a2830ea3826927d47e54f773c68
6
+ metadata.gz: fb4ed093460aaf4eddde16e0ed1236e34d232ef128047683111459a836c7c544a4c5fe3ad5d05abb6609927affaf286c88f8269695153f21c1513b4c802445ea
7
+ data.tar.gz: 65730273e55b87a3ba2b35f303f21613af3e7cf9328a8f7612cee60c37c54e606f2d50a3891597275ea084ce4405bc445c3d7277d2f5cb3d31b7be1ae805a740
data/.env ADDED
@@ -0,0 +1 @@
1
+ DATABASE_HOST=127.0.0.1
data/.gitignore CHANGED
@@ -5,6 +5,7 @@
5
5
  .yardoc
6
6
  .vagrant
7
7
  .idea
8
+ .ruby-version
8
9
  Gemfile.lock
9
10
  InstalledFiles
10
11
  _yardoc
data/.rspec ADDED
@@ -0,0 +1,4 @@
1
+ --color
2
+ --require spec_helper
3
+ --order random
4
+ --fail-fast
@@ -1,4 +1,4 @@
1
- Copyright (c) 2013 Zoltan Ormandi
1
+ Copyright (c) 2013-2020 Emarsys
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -18,21 +18,9 @@ Or install it yourself as:
18
18
 
19
19
  ## Development
20
20
 
21
- start up the db
22
-
23
- docker-compose create && docker-compose start
24
-
25
- find out what's the ip is (in case you're using native docker)
26
-
27
- docker-compose ps
28
-
29
- (if using docker-machine use the machine's ip)
30
- setup the DATABASE_HOST enviroment variable to this IP (192.168.64.4 in my case)
31
-
32
- export DATABASE_HOST=192.168.64.4
33
-
34
- Now, your ready to run the integration tests :)
21
+ start up the db:
35
22
 
23
+ $ docker-compose up -d
36
24
 
37
25
  ## Contributing
38
26
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.4.2'
3
+ spec.version = '0.8'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
 
16
16
  spec.add_runtime_dependency 'pg', '~> 0'
17
17
  spec.add_runtime_dependency 'progressbar', '~> 0'
18
- spec.add_runtime_dependency 'sequel', '~> 4'
18
+ spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
19
19
  spec.add_runtime_dependency 'slop', '~> 3'
20
20
 
21
21
  spec.add_development_dependency 'bundler', '~> 1'
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency 'rspec', '~> 3'
24
24
  spec.add_development_dependency 'ruby-prof', '~> 0'
25
25
  spec.add_development_dependency 'cucumber', '~> 1'
26
+ spec.add_development_dependency 'dotenv', '~> 2.5'
26
27
  end
@@ -2,6 +2,7 @@ Feature: Archive source files
2
2
 
3
3
  Scenario:
4
4
  Given no "/tmp/cranium_archive" directory
5
+ And no "/tmp/cranium_storage" directory
5
6
  And a "products_1.csv" data file containing:
6
7
  """
7
8
  """
@@ -39,6 +40,8 @@ Feature: Archive source files
39
40
  end
40
41
 
41
42
  archive :products, :contacts
43
+
44
+ move :purchases, to: "/tmp/cranium_storage"
42
45
  """
43
46
  When I execute the definition
44
47
  Then the process should exit successfully
@@ -47,3 +50,6 @@ Feature: Archive source files
47
50
  | .*contacts.csv |
48
51
  | .*products_1.csv |
49
52
  | .*products_2.csv |
53
+ And the "/tmp/cranium_storage" directory should contain the following files:
54
+ | filename |
55
+ | purchases.csv |
@@ -0,0 +1,39 @@
1
+ Feature: Delete rows from table provided by CSV file
2
+
3
+ Scenario: Successful delete
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | source_id | TEXT |
7
+ And only the following rows in the "dim_contact" database table:
8
+ | source_id (i) |
9
+ | 1 |
10
+ | 2 |
11
+ | 3 |
12
+ | 4 |
13
+ | 5 |
14
+ And a "deleted_contacts_extract.csv" data file containing:
15
+ """
16
+ source_id
17
+ 3
18
+ 4
19
+ """
20
+ And the following definition:
21
+ """
22
+ source :deleted_contacts_extract do
23
+ field :source_id, String
24
+ end
25
+
26
+ import :deleted_contacts_extract do
27
+ into :dim_contact
28
+ put :source_id
29
+
30
+ delete_on :source_id
31
+ end
32
+ """
33
+ When I execute the definition
34
+ Then the process should exit successfully
35
+ And the "dim_contact" table should contain:
36
+ | source_id |
37
+ | 1 |
38
+ | 2 |
39
+ | 5 |
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
3
3
  end
4
4
 
5
5
 
6
- Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
6
+ Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
7
  database_table(table_name).clear
8
8
  step %Q(the following new rows in the "#{table_name}" database table:), rows
9
9
  end
10
10
 
11
11
 
12
- Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
12
+ Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
13
  database_table(table_name).insert rows.data
14
14
  end
15
15
 
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'dotenv/load'
2
3
  require_relative "../../lib/cranium"
3
4
 
4
5
  FileUtils.mkdir_p("log") unless Dir.exists?("log")
@@ -1,36 +1,37 @@
1
1
  require 'fileutils'
2
2
 
3
3
  module Cranium::Archiver
4
-
5
- def self.archive(*files)
6
- create_archive_directory
7
- archive_files files
8
- end
9
-
10
-
11
-
12
- def self.remove(*files)
13
- files.each do |file_name|
14
- FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
4
+ class << self
5
+ def archive(*files)
6
+ create_directory(Cranium.configuration.archive_directory)
7
+ archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
8
+ move_files_from_upload_directory(files, Cranium.configuration.archive_directory, prefix: "#{archive_datetime}_")
15
9
  end
16
- end
17
-
18
10
 
11
+ def remove(*files)
12
+ files.each do |file_name|
13
+ FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
14
+ end
15
+ end
19
16
 
20
- private
21
-
22
- def self.create_archive_directory
23
- FileUtils.mkpath Cranium.configuration.archive_directory unless Dir.exists? Cranium.configuration.archive_directory
24
- end
17
+ def move(*files, target_directory:)
18
+ create_directory(target_directory)
19
+ move_files_from_upload_directory(files, target_directory)
20
+ end
25
21
 
22
+ private
26
23
 
24
+ def create_directory(path)
25
+ FileUtils.mkdir_p(path)
26
+ end
27
27
 
28
- def self.archive_files(files)
29
- archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
30
- files.each do |file_name|
31
- FileUtils.mv File.join(Cranium.configuration.upload_path, file_name),
32
- File.join(Cranium.configuration.archive_directory, "#{archive_datetime}_#{file_name}")
28
+ def move_files_from_upload_directory(files, target_directory, prefix: "")
29
+ files.each do |file_name|
30
+ FileUtils.mv(
31
+ File.join(Cranium.configuration.upload_path, file_name),
32
+ File.join(target_directory, "#{prefix}#{file_name}")
33
+ )
34
+ end
33
35
  end
34
36
  end
35
-
36
37
  end
@@ -17,14 +17,16 @@ class Cranium::DataImporter
17
17
  private
18
18
 
19
19
  def importer_for_definition(import_definition)
20
- if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
- raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
22
  end
23
23
 
24
24
  if !import_definition.merge_fields.empty?
25
25
  Cranium::ImportStrategy::Merge.new(import_definition)
26
26
  elsif !import_definition.delete_insert_on.empty?
27
27
  Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif !import_definition.delete_on.empty?
29
+ Cranium::ImportStrategy::Delete.new(import_definition)
28
30
  elsif import_definition.truncate_insert
29
31
  Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
32
  else
@@ -13,7 +13,9 @@ module Cranium::Database
13
13
 
14
14
  def self.[](name)
15
15
  @connections ||= {}
16
- @connections[name] ||= setup_connection(@definitions[name].connect_to)
16
+ @connections[name] ||= setup_connection(@definitions[name].connect_to,
17
+ @definitions[name].retry_count,
18
+ @definitions[name].retry_delay)
17
19
  end
18
20
 
19
21
 
@@ -28,15 +30,19 @@ module Cranium::Database
28
30
  private
29
31
 
30
32
 
31
- def self.setup_connection(connection_string)
32
- connection = if Cranium.configuration.log_queries
33
- Sequel.connect(connection_string, loggers: Cranium.configuration.loggers)
34
- else
35
- Sequel.connect(connection_string)
36
- end
37
- connection.extension :connection_validator
38
- connection.pool.connection_validation_timeout = -1
39
- return connection
33
+ def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
34
+ (retry_count + 1).times do |try_count|
35
+ connection = if Cranium.configuration.log_queries
36
+ Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
37
+ else
38
+ Sequel.connect(connection_details)
39
+ end
40
+ connection.extension :connection_validator
41
+ connection.pool.connection_validation_timeout = -1
42
+ break connection
43
+ rescue Sequel::DatabaseConnectionError
44
+ (try_count == retry_count) ? raise : sleep(retry_delay)
45
+ end
40
46
  end
41
47
 
42
48
  end
@@ -87,6 +87,14 @@ module Cranium::DSL
87
87
 
88
88
 
89
89
 
90
+ def move(*sources, to: "")
91
+ sources.each do |source_name|
92
+ Cranium::Archiver.move *Cranium.application.sources[source_name].files, target_directory: to
93
+ end
94
+ end
95
+
96
+
97
+
90
98
  def sequence(name)
91
99
  Cranium::Transformation::Sequence.new name
92
100
  end
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
7
7
  attr_reader :name
8
8
 
9
9
  define_attribute :connect_to
10
+ define_attribute :retry_count
11
+ define_attribute :retry_delay
10
12
 
11
13
 
12
14
 
13
15
  def initialize(name)
14
16
  @name = name
17
+ @retry_count = 0
18
+ @retry_delay = 0
15
19
  end
16
20
 
17
21
 
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
10
10
  attr_reader :merge_fields
11
11
 
12
12
  define_attribute :into
13
+ define_attribute :error_threshold
13
14
  define_boolean_attribute :truncate_insert
14
15
  define_array_attribute :delete_insert_on
16
+ define_array_attribute :delete_on
15
17
 
16
18
 
17
19
  def initialize(name)
@@ -1,46 +1,45 @@
1
1
  class Cranium::ExternalTable
2
2
 
3
- def initialize(source, db_connection)
4
- @source, @connection = source, db_connection
3
+ def initialize(source, db_connection, error_threshold: nil)
4
+ @source = source
5
+ @connection = db_connection
6
+ @error_threshold = error_threshold
5
7
  end
6
8
 
7
-
8
-
9
9
  def create
10
- @connection.run <<-sql
11
- CREATE EXTERNAL TABLE "#{name}" (
12
- #{field_definitions}
13
- )
14
- LOCATION (#{external_location})
15
- FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
16
- ENCODING 'UTF8'
17
- sql
10
+ @connection.run external_table_sql
18
11
  end
19
12
 
20
-
21
-
22
13
  def destroy
23
14
  @connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
24
15
  end
25
16
 
26
-
27
-
28
17
  def name
29
18
  :"external_#{@source.name}"
30
19
  end
31
20
 
21
+ private
32
22
 
23
+ def external_table_sql
24
+ external_table_sql = <<~sql
25
+ CREATE EXTERNAL TABLE "#{name}" (
26
+ #{field_definitions}
27
+ )
28
+ LOCATION (#{external_location})
29
+ FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
30
+ ENCODING 'UTF8'
31
+ sql
33
32
 
34
- private
33
+ external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
34
+ external_table_sql
35
+ end
35
36
 
36
37
  def field_definitions
37
38
  @source.fields.map do |name, type|
38
39
  %Q("#{name}" #{sql_type_for_ruby_type(type)})
39
- end.join ",\n "
40
+ end.join ",\n "
40
41
  end
41
42
 
42
-
43
-
44
43
  def sql_type_for_ruby_type(type)
45
44
  case type.to_s
46
45
  when "Integer" then
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
58
57
  end
59
58
  end
60
59
 
61
-
62
-
63
60
  def quote(text)
64
61
  text.gsub "'", "''"
65
62
  end
66
63
 
67
-
68
-
69
64
  def external_location
70
65
  @source.files.map do |file_name|
71
66
  "'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
2
2
 
3
3
  autoload :Base, 'cranium/import_strategy/base'
4
4
  autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
5
+ autoload :Delete, 'cranium/import_strategy/delete'
5
6
  autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
6
7
  autoload :Delta, 'cranium/import_strategy/delta'
7
8
  autoload :Merge, 'cranium/import_strategy/merge'
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
11
11
 
12
12
 
13
13
  def import
14
- external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
14
+ external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
15
15
 
16
16
  external_table.create
17
17
  number_of_items_imported = import_from external_table.name
@@ -0,0 +1,34 @@
1
+ class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
2
+
3
+ def import_from(source_table)
4
+ @source_table = source_table
5
+
6
+ delete_existing_records
7
+ puts @source_table
8
+ database[@source_table].count
9
+ end
10
+
11
+
12
+
13
+ private
14
+
15
+ def delete_existing_records
16
+ database.
17
+ from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
18
+ where(delete_by_fields.qualify keys_with: :source, values_with: :target).
19
+ delete
20
+ end
21
+
22
+
23
+
24
+ def delete_by_fields
25
+ Cranium::Sequel::Hash[delete_field_mapping]
26
+ end
27
+
28
+
29
+
30
+ def delete_field_mapping
31
+ import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
32
+ end
33
+
34
+ end
@@ -22,8 +22,9 @@ class Cranium::TestFramework::World
22
22
 
23
23
 
24
24
  def save_definition(definition)
25
- config = <<-config_string
25
+ config = <<~config_string
26
26
  require 'logger'
27
+ require 'date'
27
28
 
28
29
  Cranium.configure do |config|
29
30
  config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
@@ -32,6 +33,7 @@ class Cranium::TestFramework::World
32
33
  config.upload_directory = "#{Cranium.configuration.upload_directory}"
33
34
  config.loggers << Logger.new("log/application.log")
34
35
  end
36
+
35
37
  config_string
36
38
 
37
39
  upload_directory.save_file DEFINITION_FILE, config + definition
@@ -1,44 +1,74 @@
1
- require_relative '../spec_helper'
1
+ RSpec.describe Cranium::Archiver do
2
+ subject(:archiver) { described_class }
2
3
 
3
- describe Cranium::Archiver do
4
-
5
- before(:each) do
6
- allow(Cranium).to receive_messages(configuration: Cranium::Configuration.new.tap do |config|
7
- config.gpfdist_home_directory = "gpfdist_home"
8
- config.upload_directory = "upload_dir"
9
- config.archive_directory = "path/to/archive"
10
- end)
4
+ let(:configuration) do
5
+ Cranium::Configuration.new.tap do |config|
6
+ config.gpfdist_home_directory = "tmp"
7
+ config.upload_directory = "upload_directory"
8
+ config.archive_directory = "tmp/archive_directory"
9
+ end
11
10
  end
11
+ let(:file1) { "file.txt" }
12
+ let(:file2) { "another_file.txt" }
12
13
 
14
+ before do
15
+ allow(Cranium).to receive_messages(configuration: configuration)
16
+
17
+ FileUtils.mkdir_p(configuration.upload_path)
18
+ FileUtils.touch(File.join(configuration.upload_path, file1))
19
+ FileUtils.touch(File.join(configuration.upload_path, file2))
20
+ end
13
21
 
14
22
  describe ".archive" do
15
- it "should create the archive directory if it doesn't exist" do
16
- allow(Dir).to receive(:exists?).with("path/to/archive").and_return(false)
23
+ context "when archive directory does not exist" do
24
+ before { FileUtils.rm_rf configuration.archive_directory }
17
25
 
18
- expect(FileUtils).to receive(:mkpath).with "path/to/archive"
26
+ it "creates the archive directory" do
27
+ archiver.archive file1, file2
19
28
 
20
- Cranium::Archiver.archive
29
+ expect(File.exists?(configuration.archive_directory)).to eq true
30
+ end
21
31
  end
22
32
 
23
- it "should move files to the archive directory" do
24
- allow(Dir).to receive(:exists?).with("path/to/archive").and_return(true)
25
- allow(Time).to receive(:now).and_return Time.new(2000, 1, 1, 1, 2, 3)
33
+ context "when there are some file in the upload directory" do
34
+ it "moves files to the archive directory" do
35
+ archiver.archive file1, file2
26
36
 
27
- expect(FileUtils).to receive(:mv).with "gpfdist_home/upload_dir/file.txt", "path/to/archive/2000-01-01_01h02m03s_file.txt"
28
- expect(FileUtils).to receive(:mv).with "gpfdist_home/upload_dir/another_file.txt", "path/to/archive/2000-01-01_01h02m03s_another_file.txt"
29
-
30
- Cranium::Archiver.archive "file.txt", "another_file.txt"
37
+ expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
38
+ expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
39
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
40
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
41
+ end
31
42
  end
32
43
  end
33
44
 
34
-
35
45
  describe ".remove" do
36
- it "should remove files from the upload directory" do
37
- expect(FileUtils).to receive(:rm).with "gpfdist_home/upload_dir/file.txt"
38
- expect(FileUtils).to receive(:rm).with "gpfdist_home/upload_dir/another_file.txt"
46
+ before { FileUtils.mkdir_p configuration.archive_directory }
47
+
48
+ it "removes files from the upload directory" do
49
+ archiver.remove file1, file2
39
50
 
40
- Cranium::Archiver.remove "file.txt", "another_file.txt"
51
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
52
+ expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
41
53
  end
42
54
  end
43
55
 
56
+ describe ".move" do
57
+ let(:target_directory) { "tmp/target_directory" }
58
+
59
+ it "creates given directory if it does not exist" do
60
+ archiver.move(file1, file2, target_directory: target_directory)
61
+
62
+ expect(File.exists?(target_directory)).to eq true
63
+ end
64
+
65
+ it "moves files from upload directory into a given directory" do
66
+ archiver.move(file1, file2, target_directory: target_directory)
67
+
68
+ expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
69
+ expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
70
+ expect(File.exist?(File.join(target_directory, file1))).to eq true
71
+ expect(File.exist?(File.join(target_directory, file2))).to eq true
72
+ end
73
+ end
44
74
  end
@@ -1,11 +1,12 @@
1
1
  require_relative '../spec_helper'
2
2
 
3
3
  describe Cranium::DataImporter do
4
+ let(:connection) { double 'a_connection' }
4
5
 
5
6
  before do
6
- connection = double
7
7
  allow(Cranium::Database).to receive(:connection).and_return connection
8
8
  allow(connection).to receive(:transaction).and_yield
9
+ allow(Cranium.application).to receive(:apply_hook).with :after_import
9
10
  end
10
11
 
11
12
  let(:importer) { Cranium::DataImporter.new }
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
13
14
 
14
15
  describe "#import" do
15
16
 
17
+ context "when called with delete_on strategy" do
18
+ it "calls Delete strategy" do
19
+ import_strategy = instance_double Cranium::ImportStrategy::Delete
20
+ allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
21
+ expect(import_strategy).to receive(:import).and_return 0
22
+ definition.delete_on :source_id
23
+
24
+ importer.import definition
25
+ end
26
+ end
27
+
28
+ context "when called with both merge and delete_on fields set" do
29
+ it "should raise an exception" do
30
+ definition.delete_on :source_id
31
+ definition.merge_on :another_field
32
+
33
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
34
+ end
35
+ end
36
+
16
37
  context "when called with both merge and delete_insert fields set" do
17
38
  it "should raise an exception" do
18
39
  definition.delete_insert_on :some_field
19
40
  definition.merge_on :another_field
20
41
 
21
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
42
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
22
43
  end
23
44
  end
24
45
 
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
27
48
  definition.truncate_insert true
28
49
  definition.merge_on :another_field
29
50
 
30
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
51
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
31
52
  end
32
53
  end
33
54
 
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
36
57
  definition.delete_insert_on :some_field
37
58
  definition.truncate_insert true
38
59
 
39
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
60
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
40
61
  end
41
62
  end
42
63
 
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
46
67
  definition.merge_on :another_field
47
68
  definition.truncate_insert true
48
69
 
49
- expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
70
+ expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
50
71
  end
51
72
  end
52
73
 
@@ -81,6 +81,40 @@ describe Cranium::Database do
81
81
 
82
82
  expect(database[:dwh]).not_to eq database[:dwh2]
83
83
  end
84
+
85
+ context 'when retry_count is specified' do
86
+ before do
87
+ database.register_database :dwh do
88
+ connect_to "other connection string"
89
+ retry_count 3
90
+ retry_delay 15
91
+ end
92
+ allow(database).to receive(:sleep)
93
+ end
94
+
95
+ it "should retry connecting to the DB the specified number of times" do
96
+ call_count = 0
97
+ allow(Sequel).to receive(:connect) do
98
+ call_count += 1
99
+ call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
100
+ end
101
+
102
+ expect(database[:dwh]).to eq connection
103
+ end
104
+
105
+ it "should not retry connecting to the DB more than the specified number of times" do
106
+ allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
107
+
108
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
109
+ end
110
+
111
+ it "should wait retry_delay seconds between connection attempts" do
112
+ allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
113
+ expect(database).to receive(:sleep).with(15).exactly(3).times
114
+
115
+ expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
116
+ end
117
+ end
84
118
  end
85
119
 
86
120
  end
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
20
20
  end
21
21
  end
22
22
 
23
+
24
+ describe "#retry_count" do
25
+ context 'when not set' do
26
+ it "should return 0 by default" do
27
+ expect(database.retry_count).to eq(0)
28
+ end
29
+ end
30
+
31
+ context 'when set' do
32
+ it "should return the number of retries specified for the database" do
33
+ database.retry_count 3
34
+
35
+ expect(database.retry_count).to eq(3)
36
+ end
37
+ end
38
+ end
39
+
40
+
41
+ describe "#retry_delay" do
42
+ context 'when not set' do
43
+ it "should return 0 by default" do
44
+ expect(database.retry_delay).to eq(0)
45
+ end
46
+ end
47
+
48
+ context 'when set' do
49
+ it "should return the number of retries specified for the database" do
50
+ database.retry_delay 15
51
+
52
+ expect(database.retry_delay).to eq(15)
53
+ end
54
+ end
55
+ end
56
+
23
57
  end
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
12
12
  end
13
13
  end
14
14
 
15
+ describe "#error_threshold" do
16
+ it "should set the error threshold to the given percentage" do
17
+ import.error_threshold 10
18
+
19
+ expect(import.error_threshold).to eq 10
20
+ end
21
+ end
22
+
15
23
 
16
24
  describe "#name" do
17
25
  it "should return the name of the import definition" do
@@ -38,11 +38,11 @@ describe Cranium::DSL::SourceDefinition do
38
38
 
39
39
  it "should return the fields and types that were set" do
40
40
  source.field :field1, String
41
- source.field :field2, Fixnum
41
+ source.field :field2, Integer
42
42
 
43
43
  expect(source.fields).to eq({
44
44
  field1: String,
45
- field2: Fixnum
45
+ field2: Integer
46
46
  })
47
47
  end
48
48
  end
@@ -97,6 +97,22 @@ describe Cranium::DSL do
97
97
  end
98
98
 
99
99
 
100
+ describe "#move" do
101
+ let(:target_directory) { "/tmp/target" }
102
+
103
+ it "should move files for the specified sources" do
104
+ allow(Cranium.application).to receive_messages sources: {first_source: double(files: ["file1", "file2"]),
105
+ second_source: double(files: ["file3"]),
106
+ third_source: double(files: ["file4"])}
107
+
108
+ expect(Cranium::Archiver).to receive(:move).with "file1", "file2", target_directory: target_directory
109
+ expect(Cranium::Archiver).to receive(:move).with "file3", target_directory: target_directory
110
+
111
+ dsl_object.move :first_source, :second_source, to: target_directory
112
+ end
113
+ end
114
+
115
+
100
116
  describe "#sequence" do
101
117
  it "should return a sequence with the specified name" do
102
118
  result = dsl_object.sequence "test_sequence"
@@ -1,5 +1,6 @@
1
1
  require_relative '../spec_helper'
2
2
  require 'ostruct'
3
+ require 'date'
3
4
 
4
5
  describe Cranium::ExternalTable do
5
6
 
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
19
20
  source.escape "'"
20
21
  end
21
22
  end
22
- let(:external_table) { Cranium::ExternalTable.new source, connection }
23
23
 
24
+ subject(:external_table) { Cranium::ExternalTable.new source, connection }
24
25
 
25
26
  describe "#create" do
26
- it "should create an external table from the specified source" do
27
+ before do
27
28
  allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
- gpfdist_url: "gpfdist-url",
29
- gpfdist_home_directory: "/gpfdist-home",
30
- upload_directory: "upload-dir"
29
+ gpfdist_url: "gpfdist-url",
30
+ gpfdist_home_directory: "/gpfdist-home",
31
+ upload_directory: "upload-dir"
31
32
  )
32
33
 
33
34
  allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
35
+ end
34
36
 
35
- expect(connection).to receive(:run).with(<<-sql
36
- CREATE EXTERNAL TABLE "external_products" (
37
- "text_field" TEXT,
38
- "integer_field" INTEGER,
39
- "numeric_field" NUMERIC,
40
- "date_field" DATE,
41
- "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
- "boolean_field1" BOOLEAN,
43
- "boolean_field2" BOOLEAN
44
- )
45
- LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
- FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
- ENCODING 'UTF8'
37
+ it "should create an external table from the specified source" do
38
+ expect(connection).to receive(:run).with(<<~sql
39
+ CREATE EXTERNAL TABLE "external_products" (
40
+ "text_field" TEXT,
41
+ "integer_field" INTEGER,
42
+ "numeric_field" NUMERIC,
43
+ "date_field" DATE,
44
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
45
+ "boolean_field1" BOOLEAN,
46
+ "boolean_field2" BOOLEAN
47
+ )
48
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
49
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
50
+ ENCODING 'UTF8'
48
51
  sql
49
52
  )
50
53
 
51
54
  external_table.create
52
55
  end
56
+
57
+ context "with error_threshold argument" do
58
+ subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
59
+
60
+ it "should create an external table from the specified source" do
61
+ expect(connection).to receive(:run).with(<<~sql
62
+ CREATE EXTERNAL TABLE "external_products" (
63
+ "text_field" TEXT,
64
+ "integer_field" INTEGER,
65
+ "numeric_field" NUMERIC,
66
+ "date_field" DATE,
67
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
68
+ "boolean_field1" BOOLEAN,
69
+ "boolean_field2" BOOLEAN
70
+ )
71
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
72
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
73
+ ENCODING 'UTF8'
74
+ SEGMENT REJECT LIMIT 10 PERCENT
75
+ sql
76
+ )
77
+
78
+ external_table.create
79
+ end
80
+ end
53
81
  end
54
82
 
55
83
 
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
67
95
  expect(external_table.name).to eq(:external_products)
68
96
  end
69
97
  end
70
-
71
98
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-01 00:00:00.000000000 Z
11
+ date: 2020-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg
@@ -42,16 +42,22 @@ dependencies:
42
42
  name: sequel
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '4'
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: '6'
48
51
  type: :runtime
49
52
  prerelease: false
50
53
  version_requirements: !ruby/object:Gem::Requirement
51
54
  requirements:
52
- - - "~>"
55
+ - - ">="
53
56
  - !ruby/object:Gem::Version
54
57
  version: '4'
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: '6'
55
61
  - !ruby/object:Gem::Dependency
56
62
  name: slop
57
63
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +142,20 @@ dependencies:
136
142
  - - "~>"
137
143
  - !ruby/object:Gem::Version
138
144
  version: '1'
145
+ - !ruby/object:Gem::Dependency
146
+ name: dotenv
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '2.5'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '2.5'
139
159
  description: Provides Extract, Transform and Load functionality for loading data from
140
160
  CSV files to a Greenplum database.
141
161
  email:
@@ -145,8 +165,9 @@ executables:
145
165
  extensions: []
146
166
  extra_rdoc_files: []
147
167
  files:
168
+ - ".env"
148
169
  - ".gitignore"
149
- - ".ruby-version"
170
+ - ".rspec"
150
171
  - Gemfile
151
172
  - LICENSE.txt
152
173
  - README.md
@@ -166,6 +187,7 @@ files:
166
187
  - features/archive.feature
167
188
  - features/extract/incremental_extract.feature
168
189
  - features/extract/simple_extract.feature
190
+ - features/import/delete_from_table_based_on_csv.feature
169
191
  - features/import/import_csv_to_database_as_delta.feature
170
192
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
171
193
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -225,6 +247,7 @@ files:
225
247
  - lib/cranium/file_utils.rb
226
248
  - lib/cranium/import_strategy.rb
227
249
  - lib/cranium/import_strategy/base.rb
250
+ - lib/cranium/import_strategy/delete.rb
228
251
  - lib/cranium/import_strategy/delete_insert.rb
229
252
  - lib/cranium/import_strategy/delta.rb
230
253
  - lib/cranium/import_strategy/merge.rb
@@ -296,8 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
319
  - !ruby/object:Gem::Version
297
320
  version: '0'
298
321
  requirements: []
299
- rubyforge_project:
300
- rubygems_version: 2.6.4
322
+ rubygems_version: 3.0.3
301
323
  signing_key:
302
324
  specification_version: 4
303
325
  summary: Pure Ruby ETL framework
@@ -305,6 +327,7 @@ test_files:
305
327
  - features/archive.feature
306
328
  - features/extract/incremental_extract.feature
307
329
  - features/extract/simple_extract.feature
330
+ - features/import/delete_from_table_based_on_csv.feature
308
331
  - features/import/import_csv_to_database_as_delta.feature
309
332
  - features/import/import_csv_to_database_with_delete_insert_merging.feature
310
333
  - features/import/import_csv_to_database_with_truncate_insert.feature
@@ -1 +0,0 @@
1
- 2.3.0