cranium 0.4.2 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.env +1 -0
- data/.gitignore +1 -0
- data/.rspec +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -14
- data/cranium.gemspec +3 -2
- data/features/archive.feature +6 -0
- data/features/import/delete_from_table_based_on_csv.feature +39 -0
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/features/support/env.rb +1 -0
- data/lib/cranium/archiver.rb +25 -24
- data/lib/cranium/data_importer.rb +4 -2
- data/lib/cranium/database.rb +16 -10
- data/lib/cranium/dsl.rb +8 -0
- data/lib/cranium/dsl/database_definition.rb +4 -0
- data/lib/cranium/dsl/import_definition.rb +2 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy.rb +1 -0
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/lib/cranium/import_strategy/delete.rb +34 -0
- data/lib/cranium/test_framework/world.rb +3 -1
- data/spec/cranium/archiver_spec.rb +55 -25
- data/spec/cranium/data_importer_spec.rb +26 -5
- data/spec/cranium/database_spec.rb +34 -0
- data/spec/cranium/dsl/database_definition_spec.rb +34 -0
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/dsl/source_definition_spec.rb +2 -2
- data/spec/cranium/dsl_spec.rb +16 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +30 -7
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3d6f7888fe46d32c156a48c081e57ddf8942d483b6a42c1b69faa4e0f276a128
|
4
|
+
data.tar.gz: 9f49d912ca7cf12a8d3e90e1e6d00565099f4bf0fe4cee90e29f439c345a86df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb4ed093460aaf4eddde16e0ed1236e34d232ef128047683111459a836c7c544a4c5fe3ad5d05abb6609927affaf286c88f8269695153f21c1513b4c802445ea
|
7
|
+
data.tar.gz: 65730273e55b87a3ba2b35f303f21613af3e7cf9328a8f7612cee60c37c54e606f2d50a3891597275ea084ce4405bc445c3d7277d2f5cb3d31b7be1ae805a740
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -18,21 +18,9 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Development
|
20
20
|
|
21
|
-
start up the db
|
22
|
-
|
23
|
-
docker-compose create && docker-compose start
|
24
|
-
|
25
|
-
find out what's the ip is (in case you're using native docker)
|
26
|
-
|
27
|
-
docker-compose ps
|
28
|
-
|
29
|
-
(if using docker-machine use the machine's ip)
|
30
|
-
setup the DATABASE_HOST enviroment variable to this IP (192.168.64.4 in my case)
|
31
|
-
|
32
|
-
export DATABASE_HOST=192.168.64.4
|
33
|
-
|
34
|
-
Now, your ready to run the integration tests :)
|
21
|
+
start up the db:
|
35
22
|
|
23
|
+
$ docker-compose up -d
|
36
24
|
|
37
25
|
## Contributing
|
38
26
|
|
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.8'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.add_runtime_dependency 'pg', '~> 0'
|
17
17
|
spec.add_runtime_dependency 'progressbar', '~> 0'
|
18
|
-
spec.add_runtime_dependency 'sequel', '
|
18
|
+
spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
|
19
19
|
spec.add_runtime_dependency 'slop', '~> 3'
|
20
20
|
|
21
21
|
spec.add_development_dependency 'bundler', '~> 1'
|
@@ -23,4 +23,5 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3'
|
24
24
|
spec.add_development_dependency 'ruby-prof', '~> 0'
|
25
25
|
spec.add_development_dependency 'cucumber', '~> 1'
|
26
|
+
spec.add_development_dependency 'dotenv', '~> 2.5'
|
26
27
|
end
|
data/features/archive.feature
CHANGED
@@ -2,6 +2,7 @@ Feature: Archive source files
|
|
2
2
|
|
3
3
|
Scenario:
|
4
4
|
Given no "/tmp/cranium_archive" directory
|
5
|
+
And no "/tmp/cranium_storage" directory
|
5
6
|
And a "products_1.csv" data file containing:
|
6
7
|
"""
|
7
8
|
"""
|
@@ -39,6 +40,8 @@ Feature: Archive source files
|
|
39
40
|
end
|
40
41
|
|
41
42
|
archive :products, :contacts
|
43
|
+
|
44
|
+
move :purchases, to: "/tmp/cranium_storage"
|
42
45
|
"""
|
43
46
|
When I execute the definition
|
44
47
|
Then the process should exit successfully
|
@@ -47,3 +50,6 @@ Feature: Archive source files
|
|
47
50
|
| .*contacts.csv |
|
48
51
|
| .*products_1.csv |
|
49
52
|
| .*products_2.csv |
|
53
|
+
And the "/tmp/cranium_storage" directory should contain the following files:
|
54
|
+
| filename |
|
55
|
+
| purchases.csv |
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Feature: Delete rows from table provided by CSV file
|
2
|
+
|
3
|
+
Scenario: Successful delete
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| source_id | TEXT |
|
7
|
+
And only the following rows in the "dim_contact" database table:
|
8
|
+
| source_id (i) |
|
9
|
+
| 1 |
|
10
|
+
| 2 |
|
11
|
+
| 3 |
|
12
|
+
| 4 |
|
13
|
+
| 5 |
|
14
|
+
And a "deleted_contacts_extract.csv" data file containing:
|
15
|
+
"""
|
16
|
+
source_id
|
17
|
+
3
|
18
|
+
4
|
19
|
+
"""
|
20
|
+
And the following definition:
|
21
|
+
"""
|
22
|
+
source :deleted_contacts_extract do
|
23
|
+
field :source_id, String
|
24
|
+
end
|
25
|
+
|
26
|
+
import :deleted_contacts_extract do
|
27
|
+
into :dim_contact
|
28
|
+
put :source_id
|
29
|
+
|
30
|
+
delete_on :source_id
|
31
|
+
end
|
32
|
+
"""
|
33
|
+
When I execute the definition
|
34
|
+
Then the process should exit successfully
|
35
|
+
And the "dim_contact" table should contain:
|
36
|
+
| source_id |
|
37
|
+
| 1 |
|
38
|
+
| 2 |
|
39
|
+
| 5 |
|
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
|
|
3
3
|
end
|
4
4
|
|
5
5
|
|
6
|
-
Given
|
6
|
+
Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
7
|
database_table(table_name).clear
|
8
8
|
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
9
|
end
|
10
10
|
|
11
11
|
|
12
|
-
Given
|
12
|
+
Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
13
|
database_table(table_name).insert rows.data
|
14
14
|
end
|
15
15
|
|
data/features/support/env.rb
CHANGED
data/lib/cranium/archiver.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
|
3
3
|
module Cranium::Archiver
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def self.remove(*files)
|
13
|
-
files.each do |file_name|
|
14
|
-
FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
|
4
|
+
class << self
|
5
|
+
def archive(*files)
|
6
|
+
create_directory(Cranium.configuration.archive_directory)
|
7
|
+
archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
|
8
|
+
move_files_from_upload_directory(files, Cranium.configuration.archive_directory, prefix: "#{archive_datetime}_")
|
15
9
|
end
|
16
|
-
end
|
17
|
-
|
18
10
|
|
11
|
+
def remove(*files)
|
12
|
+
files.each do |file_name|
|
13
|
+
FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
|
14
|
+
end
|
15
|
+
end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
17
|
+
def move(*files, target_directory:)
|
18
|
+
create_directory(target_directory)
|
19
|
+
move_files_from_upload_directory(files, target_directory)
|
20
|
+
end
|
25
21
|
|
22
|
+
private
|
26
23
|
|
24
|
+
def create_directory(path)
|
25
|
+
FileUtils.mkdir_p(path)
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
def move_files_from_upload_directory(files, target_directory, prefix: "")
|
29
|
+
files.each do |file_name|
|
30
|
+
FileUtils.mv(
|
31
|
+
File.join(Cranium.configuration.upload_path, file_name),
|
32
|
+
File.join(target_directory, "#{prefix}#{file_name}")
|
33
|
+
)
|
34
|
+
end
|
33
35
|
end
|
34
36
|
end
|
35
|
-
|
36
37
|
end
|
@@ -17,14 +17,16 @@ class Cranium::DataImporter
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def importer_for_definition(import_definition)
|
20
|
-
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
-
raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
20
|
+
if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
|
21
|
+
raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
22
|
end
|
23
23
|
|
24
24
|
if !import_definition.merge_fields.empty?
|
25
25
|
Cranium::ImportStrategy::Merge.new(import_definition)
|
26
26
|
elsif !import_definition.delete_insert_on.empty?
|
27
27
|
Cranium::ImportStrategy::DeleteInsert.new(import_definition)
|
28
|
+
elsif !import_definition.delete_on.empty?
|
29
|
+
Cranium::ImportStrategy::Delete.new(import_definition)
|
28
30
|
elsif import_definition.truncate_insert
|
29
31
|
Cranium::ImportStrategy::TruncateInsert.new(import_definition)
|
30
32
|
else
|
data/lib/cranium/database.rb
CHANGED
@@ -13,7 +13,9 @@ module Cranium::Database
|
|
13
13
|
|
14
14
|
def self.[](name)
|
15
15
|
@connections ||= {}
|
16
|
-
@connections[name] ||= setup_connection(@definitions[name].connect_to
|
16
|
+
@connections[name] ||= setup_connection(@definitions[name].connect_to,
|
17
|
+
@definitions[name].retry_count,
|
18
|
+
@definitions[name].retry_delay)
|
17
19
|
end
|
18
20
|
|
19
21
|
|
@@ -28,15 +30,19 @@ module Cranium::Database
|
|
28
30
|
private
|
29
31
|
|
30
32
|
|
31
|
-
def self.setup_connection(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
|
34
|
+
(retry_count + 1).times do |try_count|
|
35
|
+
connection = if Cranium.configuration.log_queries
|
36
|
+
Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
|
37
|
+
else
|
38
|
+
Sequel.connect(connection_details)
|
39
|
+
end
|
40
|
+
connection.extension :connection_validator
|
41
|
+
connection.pool.connection_validation_timeout = -1
|
42
|
+
break connection
|
43
|
+
rescue Sequel::DatabaseConnectionError
|
44
|
+
(try_count == retry_count) ? raise : sleep(retry_delay)
|
45
|
+
end
|
40
46
|
end
|
41
47
|
|
42
48
|
end
|
data/lib/cranium/dsl.rb
CHANGED
@@ -87,6 +87,14 @@ module Cranium::DSL
|
|
87
87
|
|
88
88
|
|
89
89
|
|
90
|
+
def move(*sources, to: "")
|
91
|
+
sources.each do |source_name|
|
92
|
+
Cranium::Archiver.move *Cranium.application.sources[source_name].files, target_directory: to
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
90
98
|
def sequence(name)
|
91
99
|
Cranium::Transformation::Sequence.new name
|
92
100
|
end
|
@@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition
|
|
7
7
|
attr_reader :name
|
8
8
|
|
9
9
|
define_attribute :connect_to
|
10
|
+
define_attribute :retry_count
|
11
|
+
define_attribute :retry_delay
|
10
12
|
|
11
13
|
|
12
14
|
|
13
15
|
def initialize(name)
|
14
16
|
@name = name
|
17
|
+
@retry_count = 0
|
18
|
+
@retry_delay = 0
|
15
19
|
end
|
16
20
|
|
17
21
|
|
@@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition
|
|
10
10
|
attr_reader :merge_fields
|
11
11
|
|
12
12
|
define_attribute :into
|
13
|
+
define_attribute :error_threshold
|
13
14
|
define_boolean_attribute :truncate_insert
|
14
15
|
define_array_attribute :delete_insert_on
|
16
|
+
define_array_attribute :delete_on
|
15
17
|
|
16
18
|
|
17
19
|
def initialize(name)
|
@@ -1,46 +1,45 @@
|
|
1
1
|
class Cranium::ExternalTable
|
2
2
|
|
3
|
-
def initialize(source, db_connection)
|
4
|
-
@source
|
3
|
+
def initialize(source, db_connection, error_threshold: nil)
|
4
|
+
@source = source
|
5
|
+
@connection = db_connection
|
6
|
+
@error_threshold = error_threshold
|
5
7
|
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
9
|
def create
|
10
|
-
@connection.run
|
11
|
-
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
-
#{field_definitions}
|
13
|
-
)
|
14
|
-
LOCATION (#{external_location})
|
15
|
-
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
-
ENCODING 'UTF8'
|
17
|
-
sql
|
10
|
+
@connection.run external_table_sql
|
18
11
|
end
|
19
12
|
|
20
|
-
|
21
|
-
|
22
13
|
def destroy
|
23
14
|
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
15
|
end
|
25
16
|
|
26
|
-
|
27
|
-
|
28
17
|
def name
|
29
18
|
:"external_#{@source.name}"
|
30
19
|
end
|
31
20
|
|
21
|
+
private
|
32
22
|
|
23
|
+
def external_table_sql
|
24
|
+
external_table_sql = <<~sql
|
25
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
26
|
+
#{field_definitions}
|
27
|
+
)
|
28
|
+
LOCATION (#{external_location})
|
29
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
30
|
+
ENCODING 'UTF8'
|
31
|
+
sql
|
33
32
|
|
34
|
-
|
33
|
+
external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
|
34
|
+
external_table_sql
|
35
|
+
end
|
35
36
|
|
36
37
|
def field_definitions
|
37
38
|
@source.fields.map do |name, type|
|
38
39
|
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
-
end.join ",\n
|
40
|
+
end.join ",\n "
|
40
41
|
end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
43
|
def sql_type_for_ruby_type(type)
|
45
44
|
case type.to_s
|
46
45
|
when "Integer" then
|
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
|
|
58
57
|
end
|
59
58
|
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
60
|
def quote(text)
|
64
61
|
text.gsub "'", "''"
|
65
62
|
end
|
66
63
|
|
67
|
-
|
68
|
-
|
69
64
|
def external_location
|
70
65
|
@source.files.map do |file_name|
|
71
66
|
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
@@ -2,6 +2,7 @@ module Cranium::ImportStrategy
|
|
2
2
|
|
3
3
|
autoload :Base, 'cranium/import_strategy/base'
|
4
4
|
autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
|
5
|
+
autoload :Delete, 'cranium/import_strategy/delete'
|
5
6
|
autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
|
6
7
|
autoload :Delta, 'cranium/import_strategy/delta'
|
7
8
|
autoload :Merge, 'cranium/import_strategy/merge'
|
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
|
|
11
11
|
|
12
12
|
|
13
13
|
def import
|
14
|
-
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
|
15
15
|
|
16
16
|
external_table.create
|
17
17
|
number_of_items_imported = import_from external_table.name
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
delete_existing_records
|
7
|
+
puts @source_table
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def delete_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def delete_by_fields
|
25
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def delete_field_mapping
|
31
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -22,8 +22,9 @@ class Cranium::TestFramework::World
|
|
22
22
|
|
23
23
|
|
24
24
|
def save_definition(definition)
|
25
|
-
config =
|
25
|
+
config = <<~config_string
|
26
26
|
require 'logger'
|
27
|
+
require 'date'
|
27
28
|
|
28
29
|
Cranium.configure do |config|
|
29
30
|
config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
|
@@ -32,6 +33,7 @@ class Cranium::TestFramework::World
|
|
32
33
|
config.upload_directory = "#{Cranium.configuration.upload_directory}"
|
33
34
|
config.loggers << Logger.new("log/application.log")
|
34
35
|
end
|
36
|
+
|
35
37
|
config_string
|
36
38
|
|
37
39
|
upload_directory.save_file DEFINITION_FILE, config + definition
|
@@ -1,44 +1,74 @@
|
|
1
|
-
|
1
|
+
RSpec.describe Cranium::Archiver do
|
2
|
+
subject(:archiver) { described_class }
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
config.
|
8
|
-
|
9
|
-
config.archive_directory = "path/to/archive"
|
10
|
-
end)
|
4
|
+
let(:configuration) do
|
5
|
+
Cranium::Configuration.new.tap do |config|
|
6
|
+
config.gpfdist_home_directory = "tmp"
|
7
|
+
config.upload_directory = "upload_directory"
|
8
|
+
config.archive_directory = "tmp/archive_directory"
|
9
|
+
end
|
11
10
|
end
|
11
|
+
let(:file1) { "file.txt" }
|
12
|
+
let(:file2) { "another_file.txt" }
|
12
13
|
|
14
|
+
before do
|
15
|
+
allow(Cranium).to receive_messages(configuration: configuration)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p(configuration.upload_path)
|
18
|
+
FileUtils.touch(File.join(configuration.upload_path, file1))
|
19
|
+
FileUtils.touch(File.join(configuration.upload_path, file2))
|
20
|
+
end
|
13
21
|
|
14
22
|
describe ".archive" do
|
15
|
-
|
16
|
-
|
23
|
+
context "when archive directory does not exist" do
|
24
|
+
before { FileUtils.rm_rf configuration.archive_directory }
|
17
25
|
|
18
|
-
|
26
|
+
it "creates the archive directory" do
|
27
|
+
archiver.archive file1, file2
|
19
28
|
|
20
|
-
|
29
|
+
expect(File.exists?(configuration.archive_directory)).to eq true
|
30
|
+
end
|
21
31
|
end
|
22
32
|
|
23
|
-
|
24
|
-
|
25
|
-
|
33
|
+
context "when there are some file in the upload directory" do
|
34
|
+
it "moves files to the archive directory" do
|
35
|
+
archiver.archive file1, file2
|
26
36
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
37
|
+
expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
|
38
|
+
expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
|
39
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
|
40
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
|
41
|
+
end
|
31
42
|
end
|
32
43
|
end
|
33
44
|
|
34
|
-
|
35
45
|
describe ".remove" do
|
36
|
-
|
37
|
-
|
38
|
-
|
46
|
+
before { FileUtils.mkdir_p configuration.archive_directory }
|
47
|
+
|
48
|
+
it "removes files from the upload directory" do
|
49
|
+
archiver.remove file1, file2
|
39
50
|
|
40
|
-
|
51
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
|
52
|
+
expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
|
41
53
|
end
|
42
54
|
end
|
43
55
|
|
56
|
+
describe ".move" do
|
57
|
+
let(:target_directory) { "tmp/target_directory" }
|
58
|
+
|
59
|
+
it "creates given directory if it does not exist" do
|
60
|
+
archiver.move(file1, file2, target_directory: target_directory)
|
61
|
+
|
62
|
+
expect(File.exists?(target_directory)).to eq true
|
63
|
+
end
|
64
|
+
|
65
|
+
it "moves files from upload directory into a given directory" do
|
66
|
+
archiver.move(file1, file2, target_directory: target_directory)
|
67
|
+
|
68
|
+
expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
|
69
|
+
expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
|
70
|
+
expect(File.exist?(File.join(target_directory, file1))).to eq true
|
71
|
+
expect(File.exist?(File.join(target_directory, file2))).to eq true
|
72
|
+
end
|
73
|
+
end
|
44
74
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
|
3
3
|
describe Cranium::DataImporter do
|
4
|
+
let(:connection) { double 'a_connection' }
|
4
5
|
|
5
6
|
before do
|
6
|
-
connection = double
|
7
7
|
allow(Cranium::Database).to receive(:connection).and_return connection
|
8
8
|
allow(connection).to receive(:transaction).and_yield
|
9
|
+
allow(Cranium.application).to receive(:apply_hook).with :after_import
|
9
10
|
end
|
10
11
|
|
11
12
|
let(:importer) { Cranium::DataImporter.new }
|
@@ -13,12 +14,32 @@ describe Cranium::DataImporter do
|
|
13
14
|
|
14
15
|
describe "#import" do
|
15
16
|
|
17
|
+
context "when called with delete_on strategy" do
|
18
|
+
it "calls Delete strategy" do
|
19
|
+
import_strategy = instance_double Cranium::ImportStrategy::Delete
|
20
|
+
allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
|
21
|
+
expect(import_strategy).to receive(:import).and_return 0
|
22
|
+
definition.delete_on :source_id
|
23
|
+
|
24
|
+
importer.import definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "when called with both merge and delete_on fields set" do
|
29
|
+
it "should raise an exception" do
|
30
|
+
definition.delete_on :source_id
|
31
|
+
definition.merge_on :another_field
|
32
|
+
|
33
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
16
37
|
context "when called with both merge and delete_insert fields set" do
|
17
38
|
it "should raise an exception" do
|
18
39
|
definition.delete_insert_on :some_field
|
19
40
|
definition.merge_on :another_field
|
20
41
|
|
21
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
42
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
22
43
|
end
|
23
44
|
end
|
24
45
|
|
@@ -27,7 +48,7 @@ describe Cranium::DataImporter do
|
|
27
48
|
definition.truncate_insert true
|
28
49
|
definition.merge_on :another_field
|
29
50
|
|
30
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
51
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
31
52
|
end
|
32
53
|
end
|
33
54
|
|
@@ -36,7 +57,7 @@ describe Cranium::DataImporter do
|
|
36
57
|
definition.delete_insert_on :some_field
|
37
58
|
definition.truncate_insert true
|
38
59
|
|
39
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
60
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
40
61
|
end
|
41
62
|
end
|
42
63
|
|
@@ -46,7 +67,7 @@ describe Cranium::DataImporter do
|
|
46
67
|
definition.merge_on :another_field
|
47
68
|
definition.truncate_insert true
|
48
69
|
|
49
|
-
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
|
70
|
+
expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
@@ -81,6 +81,40 @@ describe Cranium::Database do
|
|
81
81
|
|
82
82
|
expect(database[:dwh]).not_to eq database[:dwh2]
|
83
83
|
end
|
84
|
+
|
85
|
+
context 'when retry_count is specified' do
|
86
|
+
before do
|
87
|
+
database.register_database :dwh do
|
88
|
+
connect_to "other connection string"
|
89
|
+
retry_count 3
|
90
|
+
retry_delay 15
|
91
|
+
end
|
92
|
+
allow(database).to receive(:sleep)
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should retry connecting to the DB the specified number of times" do
|
96
|
+
call_count = 0
|
97
|
+
allow(Sequel).to receive(:connect) do
|
98
|
+
call_count += 1
|
99
|
+
call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
|
100
|
+
end
|
101
|
+
|
102
|
+
expect(database[:dwh]).to eq connection
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should not retry connecting to the DB more than the specified number of times" do
|
106
|
+
allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
|
107
|
+
|
108
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should wait retry_delay seconds between connection attempts" do
|
112
|
+
allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
|
113
|
+
expect(database).to receive(:sleep).with(15).exactly(3).times
|
114
|
+
|
115
|
+
expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
|
116
|
+
end
|
117
|
+
end
|
84
118
|
end
|
85
119
|
|
86
120
|
end
|
@@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
|
24
|
+
describe "#retry_count" do
|
25
|
+
context 'when not set' do
|
26
|
+
it "should return 0 by default" do
|
27
|
+
expect(database.retry_count).to eq(0)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'when set' do
|
32
|
+
it "should return the number of retries specified for the database" do
|
33
|
+
database.retry_count 3
|
34
|
+
|
35
|
+
expect(database.retry_count).to eq(3)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
describe "#retry_delay" do
|
42
|
+
context 'when not set' do
|
43
|
+
it "should return 0 by default" do
|
44
|
+
expect(database.retry_delay).to eq(0)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when set' do
|
49
|
+
it "should return the number of retries specified for the database" do
|
50
|
+
database.retry_delay 15
|
51
|
+
|
52
|
+
expect(database.retry_delay).to eq(15)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
23
57
|
end
|
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
describe "#error_threshold" do
|
16
|
+
it "should set the error threshold to the given percentage" do
|
17
|
+
import.error_threshold 10
|
18
|
+
|
19
|
+
expect(import.error_threshold).to eq 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
|
16
24
|
describe "#name" do
|
17
25
|
it "should return the name of the import definition" do
|
@@ -38,11 +38,11 @@ describe Cranium::DSL::SourceDefinition do
|
|
38
38
|
|
39
39
|
it "should return the fields and types that were set" do
|
40
40
|
source.field :field1, String
|
41
|
-
source.field :field2,
|
41
|
+
source.field :field2, Integer
|
42
42
|
|
43
43
|
expect(source.fields).to eq({
|
44
44
|
field1: String,
|
45
|
-
field2:
|
45
|
+
field2: Integer
|
46
46
|
})
|
47
47
|
end
|
48
48
|
end
|
data/spec/cranium/dsl_spec.rb
CHANGED
@@ -97,6 +97,22 @@ describe Cranium::DSL do
|
|
97
97
|
end
|
98
98
|
|
99
99
|
|
100
|
+
describe "#move" do
|
101
|
+
let(:target_directory) { "/tmp/target" }
|
102
|
+
|
103
|
+
it "should move files for the specified sources" do
|
104
|
+
allow(Cranium.application).to receive_messages sources: {first_source: double(files: ["file1", "file2"]),
|
105
|
+
second_source: double(files: ["file3"]),
|
106
|
+
third_source: double(files: ["file4"])}
|
107
|
+
|
108
|
+
expect(Cranium::Archiver).to receive(:move).with "file1", "file2", target_directory: target_directory
|
109
|
+
expect(Cranium::Archiver).to receive(:move).with "file3", target_directory: target_directory
|
110
|
+
|
111
|
+
dsl_object.move :first_source, :second_source, to: target_directory
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
100
116
|
describe "#sequence" do
|
101
117
|
it "should return a sequence with the specified name" do
|
102
118
|
result = dsl_object.sequence "test_sequence"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'ostruct'
|
3
|
+
require 'date'
|
3
4
|
|
4
5
|
describe Cranium::ExternalTable do
|
5
6
|
|
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
|
|
19
20
|
source.escape "'"
|
20
21
|
end
|
21
22
|
end
|
22
|
-
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
23
|
|
24
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection }
|
24
25
|
|
25
26
|
describe "#create" do
|
26
|
-
|
27
|
+
before do
|
27
28
|
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
gpfdist_url: "gpfdist-url",
|
30
|
+
gpfdist_home_directory: "/gpfdist-home",
|
31
|
+
upload_directory: "upload-dir"
|
31
32
|
)
|
32
33
|
|
33
34
|
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
35
|
+
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
it "should create an external table from the specified source" do
|
38
|
+
expect(connection).to receive(:run).with(<<~sql
|
39
|
+
CREATE EXTERNAL TABLE "external_products" (
|
40
|
+
"text_field" TEXT,
|
41
|
+
"integer_field" INTEGER,
|
42
|
+
"numeric_field" NUMERIC,
|
43
|
+
"date_field" DATE,
|
44
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
45
|
+
"boolean_field1" BOOLEAN,
|
46
|
+
"boolean_field2" BOOLEAN
|
47
|
+
)
|
48
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
49
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
50
|
+
ENCODING 'UTF8'
|
48
51
|
sql
|
49
52
|
)
|
50
53
|
|
51
54
|
external_table.create
|
52
55
|
end
|
56
|
+
|
57
|
+
context "with error_threshold argument" do
|
58
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
|
59
|
+
|
60
|
+
it "should create an external table from the specified source" do
|
61
|
+
expect(connection).to receive(:run).with(<<~sql
|
62
|
+
CREATE EXTERNAL TABLE "external_products" (
|
63
|
+
"text_field" TEXT,
|
64
|
+
"integer_field" INTEGER,
|
65
|
+
"numeric_field" NUMERIC,
|
66
|
+
"date_field" DATE,
|
67
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
68
|
+
"boolean_field1" BOOLEAN,
|
69
|
+
"boolean_field2" BOOLEAN
|
70
|
+
)
|
71
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
72
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
73
|
+
ENCODING 'UTF8'
|
74
|
+
SEGMENT REJECT LIMIT 10 PERCENT
|
75
|
+
sql
|
76
|
+
)
|
77
|
+
|
78
|
+
external_table.create
|
79
|
+
end
|
80
|
+
end
|
53
81
|
end
|
54
82
|
|
55
83
|
|
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
|
|
67
95
|
expect(external_table.name).to eq(:external_products)
|
68
96
|
end
|
69
97
|
end
|
70
|
-
|
71
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -42,16 +42,22 @@ dependencies:
|
|
42
42
|
name: sequel
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '4'
|
48
|
+
- - "<"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '6'
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
52
|
-
- - "
|
55
|
+
- - ">="
|
53
56
|
- !ruby/object:Gem::Version
|
54
57
|
version: '4'
|
58
|
+
- - "<"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '6'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
62
|
name: slop
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +142,20 @@ dependencies:
|
|
136
142
|
- - "~>"
|
137
143
|
- !ruby/object:Gem::Version
|
138
144
|
version: '1'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: dotenv
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - "~>"
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '2.5'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - "~>"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '2.5'
|
139
159
|
description: Provides Extract, Transform and Load functionality for loading data from
|
140
160
|
CSV files to a Greenplum database.
|
141
161
|
email:
|
@@ -145,8 +165,9 @@ executables:
|
|
145
165
|
extensions: []
|
146
166
|
extra_rdoc_files: []
|
147
167
|
files:
|
168
|
+
- ".env"
|
148
169
|
- ".gitignore"
|
149
|
-
- ".
|
170
|
+
- ".rspec"
|
150
171
|
- Gemfile
|
151
172
|
- LICENSE.txt
|
152
173
|
- README.md
|
@@ -166,6 +187,7 @@ files:
|
|
166
187
|
- features/archive.feature
|
167
188
|
- features/extract/incremental_extract.feature
|
168
189
|
- features/extract/simple_extract.feature
|
190
|
+
- features/import/delete_from_table_based_on_csv.feature
|
169
191
|
- features/import/import_csv_to_database_as_delta.feature
|
170
192
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
171
193
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
@@ -225,6 +247,7 @@ files:
|
|
225
247
|
- lib/cranium/file_utils.rb
|
226
248
|
- lib/cranium/import_strategy.rb
|
227
249
|
- lib/cranium/import_strategy/base.rb
|
250
|
+
- lib/cranium/import_strategy/delete.rb
|
228
251
|
- lib/cranium/import_strategy/delete_insert.rb
|
229
252
|
- lib/cranium/import_strategy/delta.rb
|
230
253
|
- lib/cranium/import_strategy/merge.rb
|
@@ -296,8 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
296
319
|
- !ruby/object:Gem::Version
|
297
320
|
version: '0'
|
298
321
|
requirements: []
|
299
|
-
|
300
|
-
rubygems_version: 2.6.4
|
322
|
+
rubygems_version: 3.0.3
|
301
323
|
signing_key:
|
302
324
|
specification_version: 4
|
303
325
|
summary: Pure Ruby ETL framework
|
@@ -305,6 +327,7 @@ test_files:
|
|
305
327
|
- features/archive.feature
|
306
328
|
- features/extract/incremental_extract.feature
|
307
329
|
- features/extract/simple_extract.feature
|
330
|
+
- features/import/delete_from_table_based_on_csv.feature
|
308
331
|
- features/import/import_csv_to_database_as_delta.feature
|
309
332
|
- features/import/import_csv_to_database_with_delete_insert_merging.feature
|
310
333
|
- features/import/import_csv_to_database_with_truncate_insert.feature
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.3.0
|