cranium 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
@@ -0,0 +1,23 @@
|
|
1
|
+
class Cranium::DSL::DatabaseDefinition
|
2
|
+
|
3
|
+
class << self
|
4
|
+
include Cranium::AttributeDSL
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
define_attribute :connect_to
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def initialize(name)
|
14
|
+
@name = name
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
name == other.name and connect_to == other.connect_to
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class Cranium::DSL::ExtractDefinition
|
2
|
+
|
3
|
+
class << self
|
4
|
+
include Cranium::AttributeDSL
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :name, :storage
|
8
|
+
|
9
|
+
define_attribute :from
|
10
|
+
define_attribute :query
|
11
|
+
define_attribute :columns
|
12
|
+
define_attribute :incrementally_by
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def initialize(name)
|
17
|
+
@name = name
|
18
|
+
@storage = Cranium::Extract::Storage.new name
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
def last_extracted_value_of(field, default = nil)
|
24
|
+
stored_value = @storage.last_value_of field
|
25
|
+
stored_value.nil? ? default : stored_value
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class Cranium::DSL::ImportDefinition
|
2
|
+
|
3
|
+
class << self
|
4
|
+
include Cranium::AttributeDSL
|
5
|
+
end
|
6
|
+
|
7
|
+
|
8
|
+
attr_reader :name
|
9
|
+
attr_reader :field_associations
|
10
|
+
attr_reader :merge_fields
|
11
|
+
|
12
|
+
define_attribute :into
|
13
|
+
define_boolean_attribute :truncate_insert
|
14
|
+
define_array_attribute :delete_insert_on
|
15
|
+
|
16
|
+
|
17
|
+
def initialize(name)
|
18
|
+
@name = name
|
19
|
+
@field_associations = {}
|
20
|
+
@merge_fields = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def put(fields)
|
26
|
+
@field_associations.merge! fields_hash(fields)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def merge_on(merge_fields)
|
32
|
+
@merge_fields = fields_hash(merge_fields)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def fields_hash(fields)
|
40
|
+
case fields
|
41
|
+
when Hash
|
42
|
+
return fields
|
43
|
+
when Symbol
|
44
|
+
return { fields => fields }
|
45
|
+
else
|
46
|
+
raise ArgumentError, "Unsupported argument for Import::#{caller[0][/`.*'/][1..-2]}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
class Cranium::DSL::SourceDefinition
|
2
|
+
|
3
|
+
class << self
|
4
|
+
include Cranium::AttributeDSL
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
attr_reader :files
|
9
|
+
attr_reader :fields
|
10
|
+
|
11
|
+
define_attribute :file
|
12
|
+
define_attribute :delimiter
|
13
|
+
define_attribute :escape
|
14
|
+
define_attribute :quote
|
15
|
+
define_attribute :encoding
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
def initialize(name)
|
20
|
+
@name = name
|
21
|
+
@file = default_file_name
|
22
|
+
@fields = {}
|
23
|
+
@delimiter = ","
|
24
|
+
@escape = '"'
|
25
|
+
@quote = '"'
|
26
|
+
@encoding = "UTF-8"
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def resolve_files
|
32
|
+
@files = Dir[File.join Cranium.configuration.upload_path, @file].map { |file| File.basename file }.sort
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
def field(name, type)
|
38
|
+
@fields[name] = type
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
def file_name_overriden?
|
44
|
+
@file != default_file_name
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
def ==(other)
|
50
|
+
name == other.name and
|
51
|
+
file == other.file and
|
52
|
+
delimiter == other.delimiter and
|
53
|
+
escape == other.escape and
|
54
|
+
quote == other.quote and
|
55
|
+
encoding == other.encoding and
|
56
|
+
fields == other.fields
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def default_file_name
|
64
|
+
"#{@name}.csv"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
data/lib/cranium/dsl.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
module Cranium::DSL
|
2
|
+
|
3
|
+
autoload :DatabaseDefinition, 'cranium/dsl/database_definition'
|
4
|
+
autoload :ExtractDefinition, 'cranium/dsl/extract_definition'
|
5
|
+
autoload :ImportDefinition, 'cranium/dsl/import_definition'
|
6
|
+
autoload :SourceDefinition, 'cranium/dsl/source_definition'
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
def database(name, &block)
|
11
|
+
Cranium::Database.register_database name, &block
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def source(name, &block)
|
17
|
+
Cranium.application.register_source name, &block
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def extract(name, &block)
|
23
|
+
extract_definition = ExtractDefinition.new name
|
24
|
+
extract_definition.instance_eval &block
|
25
|
+
Cranium::Extract::DataExtractor.new.execute extract_definition
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def read(name, &block)
|
31
|
+
source = Cranium.application.sources[name]
|
32
|
+
Cranium::DataReader.new(source).read(&block)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
def transform(names, &block)
|
38
|
+
source = Cranium.application.sources[names.keys.first]
|
39
|
+
target = Cranium.application.sources[names.values.first]
|
40
|
+
|
41
|
+
Cranium::DataTransformer.new(source, target).transform(&block)
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
def deduplicate(source, options)
|
47
|
+
transform source => options[:into] do |record|
|
48
|
+
output record if unique_on_fields? *options[:by]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
def join(source_name, options)
|
55
|
+
Cranium::Transformation::Join.new.tap do |join|
|
56
|
+
join.source_left = Cranium.application.sources[source_name]
|
57
|
+
join.source_right = Cranium.application.sources[options[:with]]
|
58
|
+
join.target = Cranium.application.sources[options[:into]]
|
59
|
+
join.match_fields = options[:match_on]
|
60
|
+
join.type = options[:type] || :inner
|
61
|
+
end.execute
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
def import(name, &block)
|
67
|
+
import_definition = ImportDefinition.new(name)
|
68
|
+
import_definition.instance_eval &block
|
69
|
+
Cranium::DataImporter.new.import import_definition
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
def archive(*sources)
|
75
|
+
sources.each do |source_name|
|
76
|
+
Cranium::Archiver.archive *Cranium.application.sources[source_name].files
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
def remove(*sources)
|
83
|
+
sources.each do |source_name|
|
84
|
+
Cranium::Archiver.remove *Cranium.application.sources[source_name].files
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
def sequence(name)
|
91
|
+
Cranium::Transformation::Sequence.new name
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
def after(&block)
|
97
|
+
Cranium.application.register_hook :after, &block
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
Sequel.require 'adapters/shared/postgres'
|
3
|
+
|
4
|
+
module Sequel::Postgres::DatasetMethods
|
5
|
+
|
6
|
+
def insert_clause_methods
|
7
|
+
remove_returning_from INSERT_CLAUSE_METHODS, :insert
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
def update_clause_methods
|
13
|
+
remove_returning_from UPDATE_CLAUSE_METHODS, :update
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def delete_clause_methods
|
19
|
+
remove_returning_from DELETE_CLAUSE_METHODS, :delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def remove_returning_from(methods, type)
|
27
|
+
methods.reject { |method| method == :"#{type}_returning_sql" }
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
class Cranium::ExternalTable
|
2
|
+
|
3
|
+
def initialize(source, db_connection)
|
4
|
+
@source, @connection = source, db_connection
|
5
|
+
end
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
def create
|
10
|
+
@connection.run <<-sql
|
11
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
12
|
+
#{field_definitions}
|
13
|
+
)
|
14
|
+
LOCATION (#{external_location})
|
15
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
16
|
+
ENCODING 'UTF8'
|
17
|
+
sql
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def destroy
|
23
|
+
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
def name
|
29
|
+
:"external_#{@source.name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def field_definitions
|
37
|
+
@source.fields.map do |name, type|
|
38
|
+
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
39
|
+
end.join ",\n "
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
def sql_type_for_ruby_type(type)
|
45
|
+
case type.to_s
|
46
|
+
when "Integer" then
|
47
|
+
"INTEGER"
|
48
|
+
when "Float" then
|
49
|
+
"NUMERIC"
|
50
|
+
when "Date" then
|
51
|
+
"DATE"
|
52
|
+
when "Time" then
|
53
|
+
"TIMESTAMP WITHOUT TIME ZONE"
|
54
|
+
when "TrueClass", "FalseClass" then
|
55
|
+
"BOOLEAN"
|
56
|
+
else
|
57
|
+
"TEXT"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
def quote(text)
|
64
|
+
text.gsub "'", "''"
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
def external_location
|
70
|
+
@source.files.map do |file_name|
|
71
|
+
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
72
|
+
end.join(', ')
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class Cranium::Extract::DataExtractor
|
2
|
+
|
3
|
+
def execute(extract_definition)
|
4
|
+
if extract_definition.incrementally_by.nil?
|
5
|
+
Cranium::Extract::Strategy::Simple.new.execute extract_definition
|
6
|
+
else
|
7
|
+
Cranium::Extract::Strategy::Incremental.new.execute extract_definition
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
class Cranium::Extract::Storage
|
5
|
+
|
6
|
+
STORAGE_FILE_NAME = "extracts"
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
def initialize(extract_name)
|
11
|
+
@extract_name = extract_name
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def last_value_of(field)
|
17
|
+
stored_values[:last_values][field]
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def save_last_value_of(field, value)
|
23
|
+
stored_values[:last_values][field] = value
|
24
|
+
save_stored_values
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def stored_values
|
32
|
+
return @stored_values[@extract_name] unless @stored_values.nil?
|
33
|
+
@stored_values = (File.exists? storage_file) ? YAML.load(File.read storage_file) : {}
|
34
|
+
@stored_values[@extract_name] = { last_values: {} } if @stored_values[@extract_name].nil?
|
35
|
+
@stored_values[@extract_name]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
def storage_file
|
41
|
+
File.join storage_dir, STORAGE_FILE_NAME
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
def storage_dir
|
47
|
+
Cranium.configuration.storage_directory
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
def save_stored_values
|
53
|
+
FileUtils.mkdir_p storage_dir unless Dir.exists? storage_dir
|
54
|
+
File.write storage_file, YAML.dump(@stored_values)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
class Cranium::Extract::Strategy::Base
|
4
|
+
|
5
|
+
def execute(extract_definition)
|
6
|
+
target_file_name = "#{extract_definition.name}.csv"
|
7
|
+
target_file_path = File.join Cranium.configuration.upload_path, target_file_name
|
8
|
+
|
9
|
+
raise StandardError, %Q(Extract halted: a file named "#{target_file_name}" already exists) if File.exists? target_file_path
|
10
|
+
|
11
|
+
CSV.open target_file_path, "w:UTF-8" do |target_file|
|
12
|
+
dataset = Cranium::Database[extract_definition.from].fetch extract_definition.query
|
13
|
+
|
14
|
+
target_file << (extract_definition.columns || dataset.columns)
|
15
|
+
write_dataset_into_file target_file, dataset, extract_definition
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
def write_dataset_into_file(target_file, dataset, extract_definition)
|
24
|
+
raise "This template method must be overriden in descendants"
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class Cranium::Extract::Strategy::Incremental < Cranium::Extract::Strategy::Base
|
2
|
+
|
3
|
+
protected
|
4
|
+
|
5
|
+
def write_dataset_into_file(target_file, dataset, extract_definition)
|
6
|
+
incremental_field, max_value = extract_definition.incrementally_by, nil
|
7
|
+
|
8
|
+
dataset.each do |row|
|
9
|
+
max_value = row[incremental_field] if max_value.nil? or row[incremental_field] > max_value
|
10
|
+
target_file << row.values
|
11
|
+
end
|
12
|
+
|
13
|
+
extract_definition.storage.save_last_value_of incremental_field, max_value unless max_value.nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
attr_reader :import_definition
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(import_definition)
|
8
|
+
@import_definition = import_definition
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def import
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
15
|
+
|
16
|
+
external_table.create
|
17
|
+
number_of_items_imported = import_from external_table.name
|
18
|
+
external_table.destroy
|
19
|
+
|
20
|
+
number_of_items_imported
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
def import_from(external_table)
|
28
|
+
raise StandardError "Not implemented"
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
def database
|
34
|
+
Cranium::Database.connection
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
def target_table
|
40
|
+
import_definition.into
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
def source_fields
|
46
|
+
import_definition.field_associations.keys.map &:to_sym
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
def target_fields
|
52
|
+
import_definition.field_associations.values.map &:to_sym
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class Cranium::ImportStrategy::DeleteInsert < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
delete_existing_records
|
7
|
+
import_new_records
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def delete_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
delete
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def import_new_records
|
25
|
+
database.run database[target_table].insert_sql(target_fields, database[@source_table].select(*source_fields))
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def delete_by_fields
|
31
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
def delete_field_mapping
|
37
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_insert_on.include? target_field }
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
class Cranium::ImportStrategy::Delta < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
database.run database[import_definition.into].insert_sql(target_fields, database[source_table].select(*source_fields))
|
5
|
+
database[source_table].count
|
6
|
+
end
|
7
|
+
|
8
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class Cranium::ImportStrategy::Merge < Cranium::ImportStrategy::Base
|
2
|
+
|
3
|
+
def import_from(source_table)
|
4
|
+
@source_table = source_table
|
5
|
+
|
6
|
+
update_existing_records
|
7
|
+
import_new_records
|
8
|
+
database[@source_table].count
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def update_existing_records
|
16
|
+
database.
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
18
|
+
where(merge_fields.qualify keys_with: :source, values_with: :target).
|
19
|
+
update(not_merge_fields.qualify(keys_with: :source).invert)
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def import_new_records
|
25
|
+
database.run database[target_table].insert_sql(target_fields,
|
26
|
+
database[@source_table].
|
27
|
+
left_outer_join(target_table, merge_fields.invert).
|
28
|
+
where(merge_fields_are_empty).
|
29
|
+
select(*source_fields).qualify)
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
def merge_fields
|
35
|
+
Cranium::Sequel::Hash[import_definition.merge_fields]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
def not_merge_fields
|
41
|
+
Cranium::Sequel::Hash[import_definition.field_associations.reject { |key, _| merge_fields.keys.include? key }]
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
def merge_fields_are_empty
|
47
|
+
Hash[merge_fields.qualified_values(target_table).zip Array.new(merge_fields.count, nil)]
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|