cranium 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
class Cranium::DSL::DatabaseDefinition
|
|
2
|
+
|
|
3
|
+
class << self
|
|
4
|
+
include Cranium::AttributeDSL
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
attr_reader :name
|
|
8
|
+
|
|
9
|
+
define_attribute :connect_to
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def initialize(name)
|
|
14
|
+
@name = name
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def ==(other)
|
|
20
|
+
name == other.name and connect_to == other.connect_to
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
class Cranium::DSL::ExtractDefinition
|
|
2
|
+
|
|
3
|
+
class << self
|
|
4
|
+
include Cranium::AttributeDSL
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
attr_reader :name, :storage
|
|
8
|
+
|
|
9
|
+
define_attribute :from
|
|
10
|
+
define_attribute :query
|
|
11
|
+
define_attribute :columns
|
|
12
|
+
define_attribute :incrementally_by
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def initialize(name)
|
|
17
|
+
@name = name
|
|
18
|
+
@storage = Cranium::Extract::Storage.new name
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def last_extracted_value_of(field, default = nil)
|
|
24
|
+
stored_value = @storage.last_value_of field
|
|
25
|
+
stored_value.nil? ? default : stored_value
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
class Cranium::DSL::ImportDefinition
|
|
2
|
+
|
|
3
|
+
class << self
|
|
4
|
+
include Cranium::AttributeDSL
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
attr_reader :name
|
|
9
|
+
attr_reader :field_associations
|
|
10
|
+
attr_reader :merge_fields
|
|
11
|
+
|
|
12
|
+
define_attribute :into
|
|
13
|
+
define_boolean_attribute :truncate_insert
|
|
14
|
+
define_array_attribute :delete_insert_on
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def initialize(name)
|
|
18
|
+
@name = name
|
|
19
|
+
@field_associations = {}
|
|
20
|
+
@merge_fields = {}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def put(fields)
|
|
26
|
+
@field_associations.merge! fields_hash(fields)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def merge_on(merge_fields)
|
|
32
|
+
@merge_fields = fields_hash(merge_fields)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def fields_hash(fields)
|
|
40
|
+
case fields
|
|
41
|
+
when Hash
|
|
42
|
+
return fields
|
|
43
|
+
when Symbol
|
|
44
|
+
return { fields => fields }
|
|
45
|
+
else
|
|
46
|
+
raise ArgumentError, "Unsupported argument for Import::#{caller[0][/`.*'/][1..-2]}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
class Cranium::DSL::SourceDefinition
|
|
2
|
+
|
|
3
|
+
class << self
|
|
4
|
+
include Cranium::AttributeDSL
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
attr_reader :name
|
|
8
|
+
attr_reader :files
|
|
9
|
+
attr_reader :fields
|
|
10
|
+
|
|
11
|
+
define_attribute :file
|
|
12
|
+
define_attribute :delimiter
|
|
13
|
+
define_attribute :escape
|
|
14
|
+
define_attribute :quote
|
|
15
|
+
define_attribute :encoding
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def initialize(name)
|
|
20
|
+
@name = name
|
|
21
|
+
@file = default_file_name
|
|
22
|
+
@fields = {}
|
|
23
|
+
@delimiter = ","
|
|
24
|
+
@escape = '"'
|
|
25
|
+
@quote = '"'
|
|
26
|
+
@encoding = "UTF-8"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def resolve_files
|
|
32
|
+
@files = Dir[File.join Cranium.configuration.upload_path, @file].map { |file| File.basename file }.sort
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def field(name, type)
|
|
38
|
+
@fields[name] = type
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def file_name_overriden?
|
|
44
|
+
@file != default_file_name
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ==(other)
|
|
50
|
+
name == other.name and
|
|
51
|
+
file == other.file and
|
|
52
|
+
delimiter == other.delimiter and
|
|
53
|
+
escape == other.escape and
|
|
54
|
+
quote == other.quote and
|
|
55
|
+
encoding == other.encoding and
|
|
56
|
+
fields == other.fields
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def default_file_name
|
|
64
|
+
"#{@name}.csv"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
data/lib/cranium/dsl.rb
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
module Cranium::DSL
|
|
2
|
+
|
|
3
|
+
autoload :DatabaseDefinition, 'cranium/dsl/database_definition'
|
|
4
|
+
autoload :ExtractDefinition, 'cranium/dsl/extract_definition'
|
|
5
|
+
autoload :ImportDefinition, 'cranium/dsl/import_definition'
|
|
6
|
+
autoload :SourceDefinition, 'cranium/dsl/source_definition'
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def database(name, &block)
|
|
11
|
+
Cranium::Database.register_database name, &block
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def source(name, &block)
|
|
17
|
+
Cranium.application.register_source name, &block
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def extract(name, &block)
|
|
23
|
+
extract_definition = ExtractDefinition.new name
|
|
24
|
+
extract_definition.instance_eval &block
|
|
25
|
+
Cranium::Extract::DataExtractor.new.execute extract_definition
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def read(name, &block)
|
|
31
|
+
source = Cranium.application.sources[name]
|
|
32
|
+
Cranium::DataReader.new(source).read(&block)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def transform(names, &block)
|
|
38
|
+
source = Cranium.application.sources[names.keys.first]
|
|
39
|
+
target = Cranium.application.sources[names.values.first]
|
|
40
|
+
|
|
41
|
+
Cranium::DataTransformer.new(source, target).transform(&block)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def deduplicate(source, options)
|
|
47
|
+
transform source => options[:into] do |record|
|
|
48
|
+
output record if unique_on_fields? *options[:by]
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def join(source_name, options)
|
|
55
|
+
Cranium::Transformation::Join.new.tap do |join|
|
|
56
|
+
join.source_left = Cranium.application.sources[source_name]
|
|
57
|
+
join.source_right = Cranium.application.sources[options[:with]]
|
|
58
|
+
join.target = Cranium.application.sources[options[:into]]
|
|
59
|
+
join.match_fields = options[:match_on]
|
|
60
|
+
join.type = options[:type] || :inner
|
|
61
|
+
end.execute
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def import(name, &block)
|
|
67
|
+
import_definition = ImportDefinition.new(name)
|
|
68
|
+
import_definition.instance_eval &block
|
|
69
|
+
Cranium::DataImporter.new.import import_definition
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def archive(*sources)
|
|
75
|
+
sources.each do |source_name|
|
|
76
|
+
Cranium::Archiver.archive *Cranium.application.sources[source_name].files
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def remove(*sources)
|
|
83
|
+
sources.each do |source_name|
|
|
84
|
+
Cranium::Archiver.remove *Cranium.application.sources[source_name].files
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def sequence(name)
|
|
91
|
+
Cranium::Transformation::Sequence.new name
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def after(&block)
|
|
97
|
+
Cranium.application.register_hook :after, &block
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
Sequel.require 'adapters/shared/postgres'
|
|
3
|
+
|
|
4
|
+
module Sequel::Postgres::DatasetMethods
|
|
5
|
+
|
|
6
|
+
def insert_clause_methods
|
|
7
|
+
remove_returning_from INSERT_CLAUSE_METHODS, :insert
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def update_clause_methods
|
|
13
|
+
remove_returning_from UPDATE_CLAUSE_METHODS, :update
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def delete_clause_methods
|
|
19
|
+
remove_returning_from DELETE_CLAUSE_METHODS, :delete
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def remove_returning_from(methods, type)
|
|
27
|
+
methods.reject { |method| method == :"#{type}_returning_sql" }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
class Cranium::ExternalTable
|
|
2
|
+
|
|
3
|
+
def initialize(source, db_connection)
|
|
4
|
+
@source, @connection = source, db_connection
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create
|
|
10
|
+
@connection.run <<-sql
|
|
11
|
+
CREATE EXTERNAL TABLE "#{name}" (
|
|
12
|
+
#{field_definitions}
|
|
13
|
+
)
|
|
14
|
+
LOCATION (#{external_location})
|
|
15
|
+
FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
|
|
16
|
+
ENCODING 'UTF8'
|
|
17
|
+
sql
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def destroy
|
|
23
|
+
@connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def name
|
|
29
|
+
:"external_#{@source.name}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def field_definitions
|
|
37
|
+
@source.fields.map do |name, type|
|
|
38
|
+
%Q("#{name}" #{sql_type_for_ruby_type(type)})
|
|
39
|
+
end.join ",\n "
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def sql_type_for_ruby_type(type)
|
|
45
|
+
case type.to_s
|
|
46
|
+
when "Integer" then
|
|
47
|
+
"INTEGER"
|
|
48
|
+
when "Float" then
|
|
49
|
+
"NUMERIC"
|
|
50
|
+
when "Date" then
|
|
51
|
+
"DATE"
|
|
52
|
+
when "Time" then
|
|
53
|
+
"TIMESTAMP WITHOUT TIME ZONE"
|
|
54
|
+
when "TrueClass", "FalseClass" then
|
|
55
|
+
"BOOLEAN"
|
|
56
|
+
else
|
|
57
|
+
"TEXT"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def quote(text)
|
|
64
|
+
text.gsub "'", "''"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def external_location
|
|
70
|
+
@source.files.map do |file_name|
|
|
71
|
+
"'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
|
|
72
|
+
end.join(', ')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class Cranium::Extract::DataExtractor
|
|
2
|
+
|
|
3
|
+
def execute(extract_definition)
|
|
4
|
+
if extract_definition.incrementally_by.nil?
|
|
5
|
+
Cranium::Extract::Strategy::Simple.new.execute extract_definition
|
|
6
|
+
else
|
|
7
|
+
Cranium::Extract::Strategy::Incremental.new.execute extract_definition
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
require 'fileutils'
|
|
3
|
+
|
|
4
|
+
class Cranium::Extract::Storage
|
|
5
|
+
|
|
6
|
+
STORAGE_FILE_NAME = "extracts"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def initialize(extract_name)
|
|
11
|
+
@extract_name = extract_name
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def last_value_of(field)
|
|
17
|
+
stored_values[:last_values][field]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def save_last_value_of(field, value)
|
|
23
|
+
stored_values[:last_values][field] = value
|
|
24
|
+
save_stored_values
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def stored_values
|
|
32
|
+
return @stored_values[@extract_name] unless @stored_values.nil?
|
|
33
|
+
@stored_values = (File.exists? storage_file) ? YAML.load(File.read storage_file) : {}
|
|
34
|
+
@stored_values[@extract_name] = { last_values: {} } if @stored_values[@extract_name].nil?
|
|
35
|
+
@stored_values[@extract_name]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def storage_file
|
|
41
|
+
File.join storage_dir, STORAGE_FILE_NAME
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def storage_dir
|
|
47
|
+
Cranium.configuration.storage_directory
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def save_stored_values
|
|
53
|
+
FileUtils.mkdir_p storage_dir unless Dir.exists? storage_dir
|
|
54
|
+
File.write storage_file, YAML.dump(@stored_values)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
|
|
3
|
+
class Cranium::Extract::Strategy::Base
|
|
4
|
+
|
|
5
|
+
def execute(extract_definition)
|
|
6
|
+
target_file_name = "#{extract_definition.name}.csv"
|
|
7
|
+
target_file_path = File.join Cranium.configuration.upload_path, target_file_name
|
|
8
|
+
|
|
9
|
+
raise StandardError, %Q(Extract halted: a file named "#{target_file_name}" already exists) if File.exists? target_file_path
|
|
10
|
+
|
|
11
|
+
CSV.open target_file_path, "w:UTF-8" do |target_file|
|
|
12
|
+
dataset = Cranium::Database[extract_definition.from].fetch extract_definition.query
|
|
13
|
+
|
|
14
|
+
target_file << (extract_definition.columns || dataset.columns)
|
|
15
|
+
write_dataset_into_file target_file, dataset, extract_definition
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
def write_dataset_into_file(target_file, dataset, extract_definition)
|
|
24
|
+
raise "This template method must be overriden in descendants"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Cranium::Extract::Strategy::Incremental < Cranium::Extract::Strategy::Base
|
|
2
|
+
|
|
3
|
+
protected
|
|
4
|
+
|
|
5
|
+
def write_dataset_into_file(target_file, dataset, extract_definition)
|
|
6
|
+
incremental_field, max_value = extract_definition.incrementally_by, nil
|
|
7
|
+
|
|
8
|
+
dataset.each do |row|
|
|
9
|
+
max_value = row[incremental_field] if max_value.nil? or row[incremental_field] > max_value
|
|
10
|
+
target_file << row.values
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
extract_definition.storage.save_last_value_of incremental_field, max_value unless max_value.nil?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
class Cranium::ImportStrategy::Base
|
|
2
|
+
|
|
3
|
+
attr_reader :import_definition
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def initialize(import_definition)
|
|
8
|
+
@import_definition = import_definition
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def import
|
|
14
|
+
external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
|
|
15
|
+
|
|
16
|
+
external_table.create
|
|
17
|
+
number_of_items_imported = import_from external_table.name
|
|
18
|
+
external_table.destroy
|
|
19
|
+
|
|
20
|
+
number_of_items_imported
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
protected
|
|
26
|
+
|
|
27
|
+
def import_from(external_table)
|
|
28
|
+
raise StandardError "Not implemented"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def database
|
|
34
|
+
Cranium::Database.connection
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def target_table
|
|
40
|
+
import_definition.into
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def source_fields
|
|
46
|
+
import_definition.field_associations.keys.map &:to_sym
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def target_fields
|
|
52
|
+
import_definition.field_associations.values.map &:to_sym
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
class Cranium::ImportStrategy::DeleteInsert < Cranium::ImportStrategy::Base
|
|
2
|
+
|
|
3
|
+
def import_from(source_table)
|
|
4
|
+
@source_table = source_table
|
|
5
|
+
|
|
6
|
+
delete_existing_records
|
|
7
|
+
import_new_records
|
|
8
|
+
database[@source_table].count
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def delete_existing_records
|
|
16
|
+
database.
|
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
|
18
|
+
where(delete_by_fields.qualify keys_with: :source, values_with: :target).
|
|
19
|
+
delete
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def import_new_records
|
|
25
|
+
database.run database[target_table].insert_sql(target_fields, database[@source_table].select(*source_fields))
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def delete_by_fields
|
|
31
|
+
Cranium::Sequel::Hash[delete_field_mapping]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def delete_field_mapping
|
|
37
|
+
import_definition.field_associations.select { |_, target_field| import_definition.delete_insert_on.include? target_field }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
class Cranium::ImportStrategy::Delta < Cranium::ImportStrategy::Base
|
|
2
|
+
|
|
3
|
+
def import_from(source_table)
|
|
4
|
+
database.run database[import_definition.into].insert_sql(target_fields, database[source_table].select(*source_fields))
|
|
5
|
+
database[source_table].count
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
class Cranium::ImportStrategy::Merge < Cranium::ImportStrategy::Base
|
|
2
|
+
|
|
3
|
+
def import_from(source_table)
|
|
4
|
+
@source_table = source_table
|
|
5
|
+
|
|
6
|
+
update_existing_records
|
|
7
|
+
import_new_records
|
|
8
|
+
database[@source_table].count
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def update_existing_records
|
|
16
|
+
database.
|
|
17
|
+
from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
|
|
18
|
+
where(merge_fields.qualify keys_with: :source, values_with: :target).
|
|
19
|
+
update(not_merge_fields.qualify(keys_with: :source).invert)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def import_new_records
|
|
25
|
+
database.run database[target_table].insert_sql(target_fields,
|
|
26
|
+
database[@source_table].
|
|
27
|
+
left_outer_join(target_table, merge_fields.invert).
|
|
28
|
+
where(merge_fields_are_empty).
|
|
29
|
+
select(*source_fields).qualify)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def merge_fields
|
|
35
|
+
Cranium::Sequel::Hash[import_definition.merge_fields]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def not_merge_fields
|
|
41
|
+
Cranium::Sequel::Hash[import_definition.field_associations.reject { |key, _| merge_fields.keys.include? key }]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def merge_fields_are_empty
|
|
47
|
+
Hash[merge_fields.qualified_values(target_table).zip Array.new(merge_fields.count, nil)]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
end
|