cranium 0.4.2 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.env +1 -0
- data/.gitignore +1 -0
- data/.rspec +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -14
- data/cranium.gemspec +3 -2
- data/features/archive.feature +6 -0
- data/features/import/delete_from_table_based_on_csv.feature +39 -0
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/features/support/env.rb +1 -0
- data/lib/cranium/archiver.rb +25 -24
- data/lib/cranium/data_importer.rb +4 -2
- data/lib/cranium/database.rb +16 -10
- data/lib/cranium/dsl.rb +8 -0
- data/lib/cranium/dsl/database_definition.rb +4 -0
- data/lib/cranium/dsl/import_definition.rb +2 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy.rb +1 -0
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/lib/cranium/import_strategy/delete.rb +34 -0
- data/lib/cranium/test_framework/world.rb +3 -1
- data/spec/cranium/archiver_spec.rb +55 -25
- data/spec/cranium/data_importer_spec.rb +26 -5
- data/spec/cranium/database_spec.rb +34 -0
- data/spec/cranium/dsl/database_definition_spec.rb +34 -0
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/dsl/source_definition_spec.rb +2 -2
- data/spec/cranium/dsl_spec.rb +16 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +30 -7
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3d6f7888fe46d32c156a48c081e57ddf8942d483b6a42c1b69faa4e0f276a128
|
4
|
+
data.tar.gz: 9f49d912ca7cf12a8d3e90e1e6d00565099f4bf0fe4cee90e29f439c345a86df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb4ed093460aaf4eddde16e0ed1236e34d232ef128047683111459a836c7c544a4c5fe3ad5d05abb6609927affaf286c88f8269695153f21c1513b4c802445ea
|
7
|
+
data.tar.gz: 65730273e55b87a3ba2b35f303f21613af3e7cf9328a8f7612cee60c37c54e606f2d50a3891597275ea084ce4405bc445c3d7277d2f5cb3d31b7be1ae805a740
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -18,21 +18,9 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Development
|
20
20
|
|
21
|
-
start up the db
|
22
|
-
|
23
|
-
docker-compose create && docker-compose start
|
24
|
-
|
25
|
-
find out what's the ip is (in case you're using native docker)
|
26
|
-
|
27
|
-
docker-compose ps
|
28
|
-
|
29
|
-
(if using docker-machine use the machine's ip)
|
30
|
-
setup the DATABASE_HOST enviroment variable to this IP (192.168.64.4 in my case)
|
31
|
-
|
32
|
-
export DATABASE_HOST=192.168.64.4
|
33
|
-
|
34
|
-
Now, your ready to run the integration tests :)
|
21
|
+
start up the db:
|
35
22
|
|
23
|
+
$ docker-compose up -d
|
36
24
|
|
37
25
|
## Contributing
|
38
26
|
|
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.8'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.add_runtime_dependency 'pg', '~> 0'
|
17
17
|
spec.add_runtime_dependency 'progressbar', '~> 0'
|
18
|
-
spec.add_runtime_dependency 'sequel', '
|
18
|
+
spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
|
19
19
|
spec.add_runtime_dependency 'slop', '~> 3'
|
20
20
|
|
21
21
|
spec.add_development_dependency 'bundler', '~> 1'
|
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3'
|
24
24
|
spec.add_development_dependency 'ruby-prof', '~> 0'
|
25
25
|
spec.add_development_dependency 'cucumber', '~> 1'
|
26
|
+
spec.add_development_dependency 'dotenv', '~> 2.5'
|
26
27
|
end
|
data/features/archive.feature
CHANGED
@@ -2,6 +2,7 @@ Feature: Archive source files
|
|
2
2
|
|
3
3
|
Scenario:
|
4
4
|
Given no "/tmp/cranium_archive" directory
|
5
|
+
And no "/tmp/cranium_storage" directory
|
5
6
|
And a "products_1.csv" data file containing:
|
6
7
|
"""
|
7
8
|
"""
|
@@ -39,6 +40,8 @@ Feature: Archive source files
|
|
39
40
|
end
|
40
41
|
|
41
42
|
archive :products, :contacts
|
43
|
+
|
44
|
+
move :purchases, to: "/tmp/cranium_storage"
|
42
45
|
"""
|
43
46
|
When I execute the definition
|
44
47
|
Then the process should exit successfully
|
@@ -47,3 +50,6 @@ Feature: Archive source files
|
|
47
50
|
| .*contacts.csv |
|
48
51
|
| .*products_1.csv |
|
49
52
|
| .*products_2.csv |
|
53
|
+
And the "/tmp/cranium_storage" directory should contain the following files:
|
54
|
+
| filename |
|
55
|
+
| purchases.csv |
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Feature: Delete rows from table provided by CSV file
|
2
|
+
|
3
|
+
Scenario: Successful delete
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| source_id | TEXT |
|
7
|
+
And only the following rows in the "dim_contact" database table:
|
8
|
+
| source_id (i) |
|
9
|
+
| 1 |
|
10
|
+
| 2 |
|
11
|
+
| 3 |
|
12
|
+
| 4 |
|
13
|
+
| 5 |
|
14
|
+
And a "deleted_contacts_extract.csv" data file containing:
|
15
|
+
"""
|
16
|
+
source_id
|
17
|
+
3
|
18
|
+
4
|
19
|
+
"""
|
20
|
+
And the following definition:
|
21
|
+
"""
|
22
|
+
source :deleted_contacts_extract do
|
23
|
+
field :source_id, String
|
24
|
+
end
|
25
|
+
|
26
|
+
import :deleted_contacts_extract do
|
27
|
+
into :dim_contact
|
28
|
+
put :source_id
|
29
|
+
|
30
|
+
delete_on :source_id
|
31
|
+
end
|
32
|
+
"""
|
33
|
+
When I execute the definition
|
34
|
+
Then the process should exit successfully
|
35
|
+
And the "dim_contact" table should contain:
|
36
|
+
| source_id |
|
37
|
+
| 1 |
|
38
|
+
| 2 |
|
39
|
+
| 5 |
|
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
|
|
3
3
|
end
|
4
4
|
|
5
5
|
|
6
|
-
Given
|
6
|
+
Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
7
|
database_table(table_name).clear
|
8
8
|
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
9
|
end
|
10
10
|
|
11
11
|
|
12
|
-
Given
|
12
|
+
Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
13
|
database_table(table_name).insert rows.data
|
14
14
|
end
|
15
15
|
|
data/features/support/env.rb
CHANGED
data/lib/cranium/archiver.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
|
3
3
|
module Cranium::Archiver
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def self.remove(*files)
|
13
|
-
files.each do |file_name|
|
14
|
-
FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
|
4
|
+
class << self
|
5
|
+
def archive(*files)
|
6
|
+
create_directory(Cranium.configuration.archive_directory)
|
7
|
+
archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
|
8
|
+
move_files_from_upload_directory(files, Cranium.configuration.archive_directory, prefix: "#{archive_datetime}_")
|
15
9
|
end
|
16
|
-
end
|
17
|
-
|
18
10
|
|
11
|
+
def remove(*files)
|
12
|
+
files.each do |file_name|
|
13
|
+
FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
|
14
|
+
end
|
15
|
+
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
17
|
+
def move(*files, target_directory:)
|
18
|
+
create_directory(target_directory)
|
19
|
+
move_files_from_upload_directory(files, target_directory)
|
20
|
+
end
|
25
21
|
|
22
|
+
private
|
26
23
|
|
24
|
+
def create_directory(path)
|
25
|
+
FileUtils.mkdir_p(path)
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
def move_files_from_upload_directory(files, target_directory, prefix: "")
|
29
|
+
files.each do |file_name|
|
30
|
+
FileUtils.mv(
|
31
|
+
File.join(Cranium.configuration.upload_path, file_name),
|
32
|
+
File.join(target_directory, "#{prefix}#{file_name}")
|
33
|
+
)
|
34
|
+
end
|
33
35
|
end
|
34
36
|
end
|
35
|
-
|
36
37
|
end
|
@@ -17,14 +17,16 @@ class Cranium::DataImporter
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def importer_for_definition(import_definition)
|
20
|
-
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
-
raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
20
|
+
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
+
raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
22
|
end
|
23
23
|
|
24
24
|
if !import_definition.merge_fields.empty?
|
25
25
|
Cranium::ImportStrategy::Merge.new(import_definition)
|
26
26
|
elsif !import_definition.delete_insert_on.empty?
|
27
27
|
Cranium::ImportStrategy::DeleteInsert.new(import_definition)
|
28
|
+
elsif !import_definition.delete_on.empty?
|
29
|
+
Cranium::ImportStrategy::Delete.new(import_definition)
|
28
30
|
elsif import_definition.truncate_insert
|
29
31
|
Cranium::ImportStrategy::TruncateInsert.new(import_definition)
|
30
32
|
else
|
data/lib/cranium/database.rb
CHANGED
@@ -13,7 +13,9 @@ module Cranium::Database
|
|
13
13
|
|
14
14
|
def self.[](name)
|
15
15
|
@connections ||= {}
|
16
|
-
@connections[name] ||= setup_connection(@definitions[name].connect_to
|
16
|
+
@connections[name] ||= setup_connection(@definitions[name].connect_to,
|
17
|
+
@definitions[name].retry_count,
|
18
|
+
@definitions[name].retry_delay)
|
17
19
|
end
|
18
20
|
|
19
21
|
|
@@ -28,15 +30,19 @@ module Cranium::Database
|
|
28
30
|
private
|
29
31
|
|
30
32
|
|
31
|
-
def self.setup_connection(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
|
34
|
+
(retry_count + 1).times do |try_count|
|
35
|
+
connection = if Cranium.configuration.log_queries
|
36
|
+
Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
|
37
|
+
else
|
38
|
+
Sequel.connect(connection_details)
|
39
|
+
end
|
40
|
+
connection.extension :connection_validator
|
41
|
+
connection.pool.connection_validation_timeout = -1
|
42
|
+
break connection
|
43
|
+
rescue Sequel::DatabaseConnectionError
|
44
|
+
(try_count == retry_count) ? raise : sleep(retry_delay)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
|
42
48
|
end
|
data/lib/cranium/dsl.rb
CHANGED
@@ -87,6 +87,14 @@ module Cranium::DSL
|
|
87
87
|
|
88
88
|
|
89
89
|
|
90
|
+
def move(*sources, to: "")
|
91
|
+
sources.each do |source_name|
|
92
|
+
Cranium::Archiver.move *Cranium.application.sources[source_name].files, target_directory: to
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
90
98
|
def sequence(name)
|
91
99
|
Cranium::Transformation::Sequence.new name
|
92
100
|
end
|
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
|
|
7
7
|
attr_reader :name
|
8
8
|
|
9
9
|
define_attribute :connect_to
|
10
|
+
define_attribute :retry_count
|
11
|
+
define_attribute :retry_delay
|
10
12
|
|
11
13
|
|
12
14
|
|
13
15
|
def initialize(name)
|
14
16
|
@name = name
|
17
|
+
@retry_count = 0
|
18
|
+
@retry_delay = 0
|
15
19
|
end
|
16
20
|
|
17
21
|
|
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
|
|
10
10
|
attr_reader :merge_fields
|
11
11
|
|
12
12
|
define_attribute :into
|
13
|
+
define_attribute :error_threshold
|
13
14
|
define_boolean_attribute :truncate_insert
|
14
15
|
define_array_attribute :delete_insert_on
|
16
|
+
define_array_attribute :delete_on
|
15
17
|
|
16
18
|
|
17
19
|
def initialize(name)
|
@@ -1,46 +1,45 @@
|
|
1
1
|
class Cranium::ExternalTable
|
2
2
|
|
3
|
-
def initialize(source, db_connection)
|
4
|
-
@source
|
3
|
+
def initialize(source, db_connection, error_threshold: nil)
|
4
|
+
@source = source
|
5
|
+
@connection = db_connection
|
6
|
+
@error_threshold = error_threshold
|
5
7
|
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
9
|
def create
|
10
|
-
@connection.run
|
11
|
-
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
-
#{field_definitions}
|
13
|
-
)
|
14
|
-
LOCATION (#{external_location})
|
15
|
-
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
-
ENCODING 'UTF8'
|
17
|
-
sql
|
10
|
+
@connection.run external_table_sql
|
18
11
|
end
|
19
12
|
|
20
|
-
|
21
|
-
|
22
13
|
def destroy
|
23
14
|
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
15
|
end
|
25
16
|
|
26
|
-
|
27
|
-
|
28
17
|
def name
|
29
18
|
:"external_#{@source.name}"
|
30
19
|
end
|
31
20
|
|
21
|
+
private
|
32
22
|
|
23
|
+
def external_table_sql
|
24
|
+
external_table_sql = <<~sql
|
25
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
26
|
+
#{field_definitions}
|
27
|
+
)
|
28
|
+
LOCATION (#{external_location})
|
29
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
30
|
+
ENCODING 'UTF8'
|
31
|
+
sql
|
33
32
|
|
34
|
-
|
33
|
+
external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
|
34
|
+
external_table_sql
|
35
|
+
end
|
35
36
|
|
36
37
|
def field_definitions
|
37
38
|
@source.fields.map do |name, type|
|
38
39
|
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
-
end.join ",\n
|
40
|
+
end.join ",\n "
|
40
41
|
end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
43
|
def sql_type_for_ruby_type(type)
|
45
44
|
case type.to_s
|
46
45
|
when "Integer" then
|
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
|
|
58
57
|
end
|
59
58
|
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
60
|
def quote(text)
|
64
61
|
text.gsub "'", "''"
|
65
62
|
end
|
66
63
|
|
67
|
-
|
68
|
-
|
69
64
|
def external_location
|
70
65
|
@source.files.map do |file_name|
|
71
66
|
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
|
|
2
2
|
|
3
3
|
autoload :Base, 'cranium/import_strategy/base'
|
4
4
|
autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
|
5
|
+
autoload :Delete, 'cranium/import_strategy/delete'
|
5
6
|
autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
|
6
7
|
autoload :Delta, 'cranium/import_strategy/delta'
|
7
8
|
autoload :Merge, 'cranium/import_strategy/merge'
|
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
|
|
11
11
|
|
12
12
|
|
13
13
|
def import
|
14
|
-
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
|
15
15
|
|
16
16
|
external_table.create
|
17
17
|
number_of_items_imported = import_from external_table.name
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
delete_existing_records
|
7
|
+
puts @source_table
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def delete_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def delete_by_fields
|
25
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def delete_field_mapping
|
31
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -22,8 +22,9 @@ class Cranium::TestFramework::World
|
|
22
22
|
|
23
23
|
|
24
24
|
def save_definition(definition)
|
25
|
-
config =
|
25
|
+
config = <<~config_string
|
26
26
|
require 'logger'
|
27
|
+
require 'date'
|
27
28
|
|
28
29
|
Cranium.configure do |config|
|
29
30
|
config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
|
@@ -32,6 +33,7 @@ class Cranium::TestFramework::World
|
|
32
33
|
config.upload_directory = "#{Cranium.configuration.upload_directory}"
|
33
34
|
config.loggers << Logger.new("log/application.log")
|
34
35
|
end
|
36
|
+
|
35
37
|
config_string
|
36
38
|
|
37
39
|
upload_directory.save_file DEFINITION_FILE, config + definition
|
@@ -1,44 +1,74 @@
|
|
1
|
-
|
1
|
+
RSpec.describe Cranium::Archiver do
|
2
|
+
subject(:archiver) { described_class }
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
config.
|
8
|
-
|
9
|
-
config.archive_directory = "path/to/archive"
|
10
|
-
end)
|
4
|
+
let(:configuration) do
|
5
|
+
Cranium::Configuration.new.tap do |config|
|
6
|
+
config.gpfdist_home_directory = "tmp"
|
7
|
+
config.upload_directory = "upload_directory"
|
8
|
+
config.archive_directory = "tmp/archive_directory"
|
9
|
+
end
|
11
10
|
end
|
11
|
+
let(:file1) { "file.txt" }
|
12
|
+
let(:file2) { "another_file.txt" }
|
12
13
|
|
14
|
+
before do
|
15
|
+
allow(Cranium).to receive_messages(configuration: configuration)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p(configuration.upload_path)
|
18
|
+
FileUtils.touch(File.join(configuration.upload_path, file1))
|
19
|
+
FileUtils.touch(File.join(configuration.upload_path, file2))
|
20
|
+
end
|
13
21
|
|
14
22
|
describe ".archive" do
|
15
|
-
|
16
|
-
|
23
|
+
context "when archive directory does not exist" do
|
24
|
+
before { FileUtils.rm_rf configuration.archive_directory }
|
17
25
|
|
18
|
-
|
26
|
+
it "creates the archive directory" do
|
27
|
+
archiver.archive file1, file2
|
19
28
|
|
20
|
-
|
29
|
+
expect(File.exists?(configuration.archive_directory)).to eq true
|
30
|
+
end
|
21
31
|
end
|
22
32
|
|
23
|
-
|
24
|
-
|
25
|
-
|
33
|
+
context "when there are some file in the upload directory" do
|
34
|
+
it "moves files to the archive directory" do
|
35
|
+
archiver.archive file1, file2
|
26
36
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
37
|
+
expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
|
38
|
+
expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
|
39
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
|
40
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
|
41
|
+
end
|
31
42
|
end
|
32
43
|
end
|
33
44
|
|
34
|
-
|
35
45
|
describe ".remove" do
|
36
|
-
|
37
|
-
|
38
|
-
|
46
|
+
before { FileUtils.mkdir_p configuration.archive_directory }
|
47
|
+
|
48
|
+
it "removes files from the upload directory" do
|
49
|
+
archiver.remove file1, file2
|
39
50
|
|
40
|
-
|
51
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
|
52
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
|
41
53
|
end
|
42
54
|
end
|
43
55
|
|
56
|
+
describe ".move" do
|
57
|
+
let(:target_directory) { "tmp/target_directory" }
|
58
|
+
|
59
|
+
it "creates given directory if it does not exist" do
|
60
|
+
archiver.move(file1, file2, target_directory: target_directory)
|
61
|
+
|
62
|
+
expect(File.exists?(target_directory)).to eq true
|
63
|
+
end
|
64
|
+
|
65
|
+
it "moves files from upload directory into a given directory" do
|
66
|
+
archiver.move(file1, file2, target_directory: target_directory)
|
67
|
+
|
68
|
+
expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
|
69
|
+
expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
|
70
|
+
expect(File.exist?(File.join(target_directory, file1))).to eq true
|
71
|
+
expect(File.exist?(File.join(target_directory, file2))).to eq true
|
72
|
+
end
|
73
|
+
end
|
44
74
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
|
3
3
|
describe Cranium::DataImporter do
|
4
|
+
let(:connection) { double 'a_connection' }
|
4
5
|
|
5
6
|
before do
|
6
|
-
connection = double
|
7
7
|
allow(Cranium::Database).to receive(:connection).and_return connection
|
8
8
|
allow(connection).to receive(:transaction).and_yield
|
9
|
+
allow(Cranium.application).to receive(:apply_hook).with :after_import
|
9
10
|
end
|
10
11
|
|
11
12
|
let(:importer) { Cranium::DataImporter.new }
|
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
|
|
13
14
|
|
14
15
|
describe "#import" do
|
15
16
|
|
17
|
+
context "when called with delete_on strategy" do
|
18
|
+
it "calls Delete strategy" do
|
19
|
+
import_strategy = instance_double Cranium::ImportStrategy::Delete
|
20
|
+
allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
|
21
|
+
expect(import_strategy).to receive(:import).and_return 0
|
22
|
+
definition.delete_on :source_id
|
23
|
+
|
24
|
+
importer.import definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "when called with both merge and delete_on fields set" do
|
29
|
+
it "should raise an exception" do
|
30
|
+
definition.delete_on :source_id
|
31
|
+
definition.merge_on :another_field
|
32
|
+
|
33
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
16
37
|
context "when called with both merge and delete_insert fields set" do
|
17
38
|
it "should raise an exception" do
|
18
39
|
definition.delete_insert_on :some_field
|
19
40
|
definition.merge_on :another_field
|
20
41
|
|
21
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
42
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
43
|
end
|
23
44
|
end
|
24
45
|
|
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
|
|
27
48
|
definition.truncate_insert true
|
28
49
|
definition.merge_on :another_field
|
29
50
|
|
30
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
51
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
31
52
|
end
|
32
53
|
end
|
33
54
|
|
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
|
|
36
57
|
definition.delete_insert_on :some_field
|
37
58
|
definition.truncate_insert true
|
38
59
|
|
39
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
60
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
40
61
|
end
|
41
62
|
end
|
42
63
|
|
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
|
|
46
67
|
definition.merge_on :another_field
|
47
68
|
definition.truncate_insert true
|
48
69
|
|
49
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
70
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
@@ -81,6 +81,40 @@ describe Cranium::Database do
|
|
81
81
|
|
82
82
|
expect(database[:dwh]).not_to eq database[:dwh2]
|
83
83
|
end
|
84
|
+
|
85
|
+
context 'when retry_count is specified' do
|
86
|
+
before do
|
87
|
+
database.register_database :dwh do
|
88
|
+
connect_to "other connection string"
|
89
|
+
retry_count 3
|
90
|
+
retry_delay 15
|
91
|
+
end
|
92
|
+
allow(database).to receive(:sleep)
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should retry connecting to the DB the specified number of times" do
|
96
|
+
call_count = 0
|
97
|
+
allow(Sequel).to receive(:connect) do
|
98
|
+
call_count += 1
|
99
|
+
call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
|
100
|
+
end
|
101
|
+
|
102
|
+
expect(database[:dwh]).to eq connection
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not retry connecting to the DB more than the specified number of times" do
|
106
|
+
allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
|
107
|
+
|
108
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should wait retry_delay seconds between connection attempts" do
|
112
|
+
allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
|
113
|
+
expect(database).to receive(:sleep).with(15).exactly(3).times
|
114
|
+
|
115
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
116
|
+
end
|
117
|
+
end
|
84
118
|
end
|
85
119
|
|
86
120
|
end
|
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
|
24
|
+
describe "#retry_count" do
|
25
|
+
context 'when not set' do
|
26
|
+
it "should return 0 by default" do
|
27
|
+
expect(database.retry_count).to eq(0)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'when set' do
|
32
|
+
it "should return the number of retries specified for the database" do
|
33
|
+
database.retry_count 3
|
34
|
+
|
35
|
+
expect(database.retry_count).to eq(3)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
describe "#retry_delay" do
|
42
|
+
context 'when not set' do
|
43
|
+
it "should return 0 by default" do
|
44
|
+
expect(database.retry_delay).to eq(0)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when set' do
|
49
|
+
it "should return the number of retries specified for the database" do
|
50
|
+
database.retry_delay 15
|
51
|
+
|
52
|
+
expect(database.retry_delay).to eq(15)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
23
57
|
end
|
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
describe "#error_threshold" do
|
16
|
+
it "should set the error threshold to the given percentage" do
|
17
|
+
import.error_threshold 10
|
18
|
+
|
19
|
+
expect(import.error_threshold).to eq 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
|
16
24
|
describe "#name" do
|
17
25
|
it "should return the name of the import definition" do
|
@@ -38,11 +38,11 @@ describe Cranium::DSL::SourceDefinition do
|
|
38
38
|
|
39
39
|
it "should return the fields and types that were set" do
|
40
40
|
source.field :field1, String
|
41
|
-
source.field :field2,
|
41
|
+
source.field :field2, Integer
|
42
42
|
|
43
43
|
expect(source.fields).to eq({
|
44
44
|
field1: String,
|
45
|
-
field2:
|
45
|
+
field2: Integer
|
46
46
|
})
|
47
47
|
end
|
48
48
|
end
|
data/spec/cranium/dsl_spec.rb
CHANGED
@@ -97,6 +97,22 @@ describe Cranium::DSL do
|
|
97
97
|
end
|
98
98
|
|
99
99
|
|
100
|
+
describe "#move" do
|
101
|
+
let(:target_directory) { "/tmp/target" }
|
102
|
+
|
103
|
+
it "should move files for the specified sources" do
|
104
|
+
allow(Cranium.application).to receive_messages sources: {first_source: double(files: ["file1", "file2"]),
|
105
|
+
second_source: double(files: ["file3"]),
|
106
|
+
third_source: double(files: ["file4"])}
|
107
|
+
|
108
|
+
expect(Cranium::Archiver).to receive(:move).with "file1", "file2", target_directory: target_directory
|
109
|
+
expect(Cranium::Archiver).to receive(:move).with "file3", target_directory: target_directory
|
110
|
+
|
111
|
+
dsl_object.move :first_source, :second_source, to: target_directory
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
100
116
|
describe "#sequence" do
|
101
117
|
it "should return a sequence with the specified name" do
|
102
118
|
result = dsl_object.sequence "test_sequence"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'ostruct'
|
3
|
+
require 'date'
|
3
4
|
|
4
5
|
describe Cranium::ExternalTable do
|
5
6
|
|
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
|
|
19
20
|
source.escape "'"
|
20
21
|
end
|
21
22
|
end
|
22
|
-
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
23
|
|
24
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection }
|
24
25
|
|
25
26
|
describe "#create" do
|
26
|
-
|
27
|
+
before do
|
27
28
|
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
gpfdist_url: "gpfdist-url",
|
30
|
+
gpfdist_home_directory: "/gpfdist-home",
|
31
|
+
upload_directory: "upload-dir"
|
31
32
|
)
|
32
33
|
|
33
34
|
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
35
|
+
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
it "should create an external table from the specified source" do
|
38
|
+
expect(connection).to receive(:run).with(<<~sql
|
39
|
+
CREATE EXTERNAL TABLE "external_products" (
|
40
|
+
"text_field" TEXT,
|
41
|
+
"integer_field" INTEGER,
|
42
|
+
"numeric_field" NUMERIC,
|
43
|
+
"date_field" DATE,
|
44
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
45
|
+
"boolean_field1" BOOLEAN,
|
46
|
+
"boolean_field2" BOOLEAN
|
47
|
+
)
|
48
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
49
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
50
|
+
ENCODING 'UTF8'
|
48
51
|
sql
|
49
52
|
)
|
50
53
|
|
51
54
|
external_table.create
|
52
55
|
end
|
56
|
+
|
57
|
+
context "with error_threshold argument" do
|
58
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
|
59
|
+
|
60
|
+
it "should create an external table from the specified source" do
|
61
|
+
expect(connection).to receive(:run).with(<<~sql
|
62
|
+
CREATE EXTERNAL TABLE "external_products" (
|
63
|
+
"text_field" TEXT,
|
64
|
+
"integer_field" INTEGER,
|
65
|
+
"numeric_field" NUMERIC,
|
66
|
+
"date_field" DATE,
|
67
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
68
|
+
"boolean_field1" BOOLEAN,
|
69
|
+
"boolean_field2" BOOLEAN
|
70
|
+
)
|
71
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
72
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
73
|
+
ENCODING 'UTF8'
|
74
|
+
SEGMENT REJECT LIMIT 10 PERCENT
|
75
|
+
sql
|
76
|
+
)
|
77
|
+
|
78
|
+
external_table.create
|
79
|
+
end
|
80
|
+
end
|
53
81
|
end
|
54
82
|
|
55
83
|
|
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
|
|
67
95
|
expect(external_table.name).to eq(:external_products)
|
68
96
|
end
|
69
97
|
end
|
70
|
-
|
71
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -42,16 +42,22 @@ dependencies:
|
|
42
42
|
name: sequel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '4'
|
48
|
+
- - "<"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '6'
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
52
|
-
- - "
|
55
|
+
- - ">="
|
53
56
|
- !ruby/object:Gem::Version
|
54
57
|
version: '4'
|
58
|
+
- - "<"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '6'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: slop
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +142,20 @@ dependencies:
|
|
136
142
|
- - "~>"
|
137
143
|
- !ruby/object:Gem::Version
|
138
144
|
version: '1'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: dotenv
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - "~>"
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '2.5'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - "~>"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '2.5'
|
139
159
|
description: Provides Extract, Transform and Load functionality for loading data from
|
140
160
|
CSV files to a Greenplum database.
|
141
161
|
email:
|
@@ -145,8 +165,9 @@ executables:
|
|
145
165
|
extensions: []
|
146
166
|
extra_rdoc_files: []
|
147
167
|
files:
|
168
|
+
- ".env"
|
148
169
|
- ".gitignore"
|
149
|
-
- ".
|
170
|
+
- ".rspec"
|
150
171
|
- Gemfile
|
151
172
|
- LICENSE.txt
|
152
173
|
- README.md
|
@@ -166,6 +187,7 @@ files:
|
|
166
187
|
- features/archive.feature
|
167
188
|
- features/extract/incremental_extract.feature
|
168
189
|
- features/extract/simple_extract.feature
|
190
|
+
- features/import/delete_from_table_based_on_csv.feature
|
169
191
|
- features/import/import_csv_to_database_as_delta.feature
|
170
192
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
171
193
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
@@ -225,6 +247,7 @@ files:
|
|
225
247
|
- lib/cranium/file_utils.rb
|
226
248
|
- lib/cranium/import_strategy.rb
|
227
249
|
- lib/cranium/import_strategy/base.rb
|
250
|
+
- lib/cranium/import_strategy/delete.rb
|
228
251
|
- lib/cranium/import_strategy/delete_insert.rb
|
229
252
|
- lib/cranium/import_strategy/delta.rb
|
230
253
|
- lib/cranium/import_strategy/merge.rb
|
@@ -296,8 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
319
|
- !ruby/object:Gem::Version
|
297
320
|
version: '0'
|
298
321
|
requirements: []
|
299
|
-
|
300
|
-
rubygems_version: 2.6.4
|
322
|
+
rubygems_version: 3.0.3
|
301
323
|
signing_key:
|
302
324
|
specification_version: 4
|
303
325
|
summary: Pure Ruby ETL framework
|
@@ -305,6 +327,7 @@ test_files:
|
|
305
327
|
- features/archive.feature
|
306
328
|
- features/extract/incremental_extract.feature
|
307
329
|
- features/extract/simple_extract.feature
|
330
|
+
- features/import/delete_from_table_based_on_csv.feature
|
308
331
|
- features/import/import_csv_to_database_as_delta.feature
|
309
332
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
310
333
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.3.0
|