cranium 0.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/cranium.gemspec +1 -1
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/lib/cranium/dsl/import_definition.rb +1 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +3 -5
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf60640829ad3e3977a453a18b4a91c01538ecfe79fb8759b3d978348e10cb8c
|
4
|
+
data.tar.gz: e297e5c85dd7f0c6296aa1e9ce01dc112d43b08e7087e34f266ff856a0e41ac9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c47d42ecea886eb75e21b471c9e7b5cdc810d020174e43a76b5554cd3f4f6a71d2517bde5e041a544167ec461d74e2c0945459ac7b3223bf6793ef9fe4d0192d
|
7
|
+
data.tar.gz: 9dbc134d24bd752b0adfe664d2a0187fb07a8c3e8d0a3d956b5949a7135dbe1c147e158bd6c5f54eca9580f0f424fd56d68b88a032e8352c7709c6ade6c673da
|
data/.gitignore
CHANGED
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.6.1'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl
|
|
3
3
|
end
|
4
4
|
|
5
5
|
|
6
|
-
Given
|
6
|
+
Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
7
|
database_table(table_name).clear
|
8
8
|
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
9
|
end
|
10
10
|
|
11
11
|
|
12
|
-
Given
|
12
|
+
Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
13
|
database_table(table_name).insert rows.data
|
14
14
|
end
|
15
15
|
|
@@ -1,46 +1,45 @@
|
|
1
1
|
class Cranium::ExternalTable
|
2
2
|
|
3
|
-
def initialize(source, db_connection)
|
4
|
-
@source
|
3
|
+
def initialize(source, db_connection, error_threshold: nil)
|
4
|
+
@source = source
|
5
|
+
@connection = db_connection
|
6
|
+
@error_threshold = error_threshold
|
5
7
|
end
|
6
8
|
|
7
|
-
|
8
|
-
|
9
9
|
def create
|
10
|
-
@connection.run
|
11
|
-
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
-
#{field_definitions}
|
13
|
-
)
|
14
|
-
LOCATION (#{external_location})
|
15
|
-
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
-
ENCODING 'UTF8'
|
17
|
-
sql
|
10
|
+
@connection.run external_table_sql
|
18
11
|
end
|
19
12
|
|
20
|
-
|
21
|
-
|
22
13
|
def destroy
|
23
14
|
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
15
|
end
|
25
16
|
|
26
|
-
|
27
|
-
|
28
17
|
def name
|
29
18
|
:"external_#{@source.name}"
|
30
19
|
end
|
31
20
|
|
21
|
+
private
|
32
22
|
|
23
|
+
def external_table_sql
|
24
|
+
external_table_sql = <<~sql
|
25
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
26
|
+
#{field_definitions}
|
27
|
+
)
|
28
|
+
LOCATION (#{external_location})
|
29
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
30
|
+
ENCODING 'UTF8'
|
31
|
+
sql
|
33
32
|
|
34
|
-
|
33
|
+
external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
|
34
|
+
external_table_sql
|
35
|
+
end
|
35
36
|
|
36
37
|
def field_definitions
|
37
38
|
@source.fields.map do |name, type|
|
38
39
|
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
-
end.join ",\n
|
40
|
+
end.join ",\n "
|
40
41
|
end
|
41
42
|
|
42
|
-
|
43
|
-
|
44
43
|
def sql_type_for_ruby_type(type)
|
45
44
|
case type.to_s
|
46
45
|
when "Integer" then
|
@@ -58,14 +57,10 @@ class Cranium::ExternalTable
|
|
58
57
|
end
|
59
58
|
end
|
60
59
|
|
61
|
-
|
62
|
-
|
63
60
|
def quote(text)
|
64
61
|
text.gsub "'", "''"
|
65
62
|
end
|
66
63
|
|
67
|
-
|
68
|
-
|
69
64
|
def external_location
|
70
65
|
@source.files.map do |file_name|
|
71
66
|
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
@@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base
|
|
11
11
|
|
12
12
|
|
13
13
|
def import
|
14
|
-
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
|
15
15
|
|
16
16
|
external_table.create
|
17
17
|
number_of_items_imported = import_from external_table.name
|
@@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
describe "#error_threshold" do
|
16
|
+
it "should set the error threshold to the given percentage" do
|
17
|
+
import.error_threshold 10
|
18
|
+
|
19
|
+
expect(import.error_threshold).to eq 10
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
|
16
24
|
describe "#name" do
|
17
25
|
it "should return the name of the import definition" do
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative '../spec_helper'
|
2
2
|
require 'ostruct'
|
3
|
+
require 'date'
|
3
4
|
|
4
5
|
describe Cranium::ExternalTable do
|
5
6
|
|
@@ -19,37 +20,64 @@ describe Cranium::ExternalTable do
|
|
19
20
|
source.escape "'"
|
20
21
|
end
|
21
22
|
end
|
22
|
-
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
23
|
|
24
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection }
|
24
25
|
|
25
26
|
describe "#create" do
|
26
|
-
|
27
|
+
before do
|
27
28
|
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
-
|
29
|
-
|
30
|
-
|
29
|
+
gpfdist_url: "gpfdist-url",
|
30
|
+
gpfdist_home_directory: "/gpfdist-home",
|
31
|
+
upload_directory: "upload-dir"
|
31
32
|
)
|
32
33
|
|
33
34
|
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
35
|
+
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
it "should create an external table from the specified source" do
|
38
|
+
expect(connection).to receive(:run).with(<<~sql
|
39
|
+
CREATE EXTERNAL TABLE "external_products" (
|
40
|
+
"text_field" TEXT,
|
41
|
+
"integer_field" INTEGER,
|
42
|
+
"numeric_field" NUMERIC,
|
43
|
+
"date_field" DATE,
|
44
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
45
|
+
"boolean_field1" BOOLEAN,
|
46
|
+
"boolean_field2" BOOLEAN
|
47
|
+
)
|
48
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
49
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
50
|
+
ENCODING 'UTF8'
|
48
51
|
sql
|
49
52
|
)
|
50
53
|
|
51
54
|
external_table.create
|
52
55
|
end
|
56
|
+
|
57
|
+
context "with error_threshold argument" do
|
58
|
+
subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
|
59
|
+
|
60
|
+
it "should create an external table from the specified source" do
|
61
|
+
expect(connection).to receive(:run).with(<<~sql
|
62
|
+
CREATE EXTERNAL TABLE "external_products" (
|
63
|
+
"text_field" TEXT,
|
64
|
+
"integer_field" INTEGER,
|
65
|
+
"numeric_field" NUMERIC,
|
66
|
+
"date_field" DATE,
|
67
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
68
|
+
"boolean_field1" BOOLEAN,
|
69
|
+
"boolean_field2" BOOLEAN
|
70
|
+
)
|
71
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
72
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
73
|
+
ENCODING 'UTF8'
|
74
|
+
SEGMENT REJECT LIMIT 10 PERCENT
|
75
|
+
sql
|
76
|
+
)
|
77
|
+
|
78
|
+
external_table.create
|
79
|
+
end
|
80
|
+
end
|
53
81
|
end
|
54
82
|
|
55
83
|
|
@@ -67,5 +95,4 @@ describe Cranium::ExternalTable do
|
|
67
95
|
expect(external_table.name).to eq(:external_products)
|
68
96
|
end
|
69
97
|
end
|
70
|
-
|
71
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|
@@ -168,7 +168,6 @@ files:
|
|
168
168
|
- ".env"
|
169
169
|
- ".gitignore"
|
170
170
|
- ".rspec"
|
171
|
-
- ".ruby-version"
|
172
171
|
- Gemfile
|
173
172
|
- LICENSE.txt
|
174
173
|
- README.md
|
@@ -318,8 +317,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
318
317
|
- !ruby/object:Gem::Version
|
319
318
|
version: '0'
|
320
319
|
requirements: []
|
321
|
-
|
322
|
-
rubygems_version: 2.7.6
|
320
|
+
rubygems_version: 3.0.3
|
323
321
|
signing_key:
|
324
322
|
specification_version: 4
|
325
323
|
summary: Pure Ruby ETL framework
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.5.1
|