cranium 0.4.2 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.env +1 -0
- data/.gitignore +1 -0
- data/.rspec +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -14
- data/cranium.gemspec +3 -2
- data/features/archive.feature +6 -0
- data/features/import/delete_from_table_based_on_csv.feature +39 -0
- data/features/step_definitions/database_table_steps.rb +2 -2
- data/features/support/env.rb +1 -0
- data/lib/cranium/archiver.rb +25 -24
- data/lib/cranium/data_importer.rb +4 -2
- data/lib/cranium/database.rb +16 -10
- data/lib/cranium/dsl.rb +8 -0
- data/lib/cranium/dsl/database_definition.rb +4 -0
- data/lib/cranium/dsl/import_definition.rb +2 -0
- data/lib/cranium/external_table.rb +19 -24
- data/lib/cranium/import_strategy.rb +1 -0
- data/lib/cranium/import_strategy/base.rb +1 -1
- data/lib/cranium/import_strategy/delete.rb +34 -0
- data/lib/cranium/test_framework/world.rb +3 -1
- data/spec/cranium/archiver_spec.rb +55 -25
- data/spec/cranium/data_importer_spec.rb +26 -5
- data/spec/cranium/database_spec.rb +34 -0
- data/spec/cranium/dsl/database_definition_spec.rb +34 -0
- data/spec/cranium/dsl/import_definition_spec.rb +8 -0
- data/spec/cranium/dsl/source_definition_spec.rb +2 -2
- data/spec/cranium/dsl_spec.rb +16 -0
- data/spec/cranium/external_table_spec.rb +46 -19
- metadata +30 -7
- data/.ruby-version +0 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 | 
            -
             | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 2 | 
            +
            SHA256:
         | 
| 3 | 
            +
              metadata.gz: 3d6f7888fe46d32c156a48c081e57ddf8942d483b6a42c1b69faa4e0f276a128
         | 
| 4 | 
            +
              data.tar.gz: 9f49d912ca7cf12a8d3e90e1e6d00565099f4bf0fe4cee90e29f439c345a86df
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: fb4ed093460aaf4eddde16e0ed1236e34d232ef128047683111459a836c7c544a4c5fe3ad5d05abb6609927affaf286c88f8269695153f21c1513b4c802445ea
         | 
| 7 | 
            +
              data.tar.gz: 65730273e55b87a3ba2b35f303f21613af3e7cf9328a8f7612cee60c37c54e606f2d50a3891597275ea084ce4405bc445c3d7277d2f5cb3d31b7be1ae805a740
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/.rspec
    ADDED
    
    
    
        data/LICENSE.txt
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -18,21 +18,9 @@ Or install it yourself as: | |
| 18 18 |  | 
| 19 19 | 
             
            ## Development
         | 
| 20 20 |  | 
| 21 | 
            -
            start up the db
         | 
| 22 | 
            -
             | 
| 23 | 
            -
                docker-compose create && docker-compose start
         | 
| 24 | 
            -
                
         | 
| 25 | 
            -
            find out what's the ip is (in case you're using native docker)
         | 
| 26 | 
            -
             | 
| 27 | 
            -
                docker-compose ps 
         | 
| 28 | 
            -
                
         | 
| 29 | 
            -
            (if using docker-machine use the machine's ip)
         | 
| 30 | 
            -
            setup the DATABASE_HOST enviroment variable to this IP (192.168.64.4 in my case)
         | 
| 31 | 
            -
                
         | 
| 32 | 
            -
                export DATABASE_HOST=192.168.64.4
         | 
| 33 | 
            -
                
         | 
| 34 | 
            -
            Now, your ready to run the integration tests :)
         | 
| 21 | 
            +
            start up the db:
         | 
| 35 22 |  | 
| 23 | 
            +
                $ docker-compose up -d
         | 
| 36 24 |  | 
| 37 25 | 
             
            ## Contributing
         | 
| 38 26 |  | 
    
        data/cranium.gemspec
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Gem::Specification.new do |spec|
         | 
| 2 2 | 
             
              spec.name = 'cranium'
         | 
| 3 | 
            -
              spec.version = '0. | 
| 3 | 
            +
              spec.version = '0.8'
         | 
| 4 4 | 
             
              spec.authors = ['Emarsys Technologies']
         | 
| 5 5 | 
             
              spec.email = ['smart-insight-dev@emarsys.com']
         | 
| 6 6 | 
             
              spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
         | 
| @@ -15,7 +15,7 @@ Gem::Specification.new do |spec| | |
| 15 15 |  | 
| 16 16 | 
             
              spec.add_runtime_dependency 'pg', '~> 0'
         | 
| 17 17 | 
             
              spec.add_runtime_dependency 'progressbar', '~> 0'
         | 
| 18 | 
            -
              spec.add_runtime_dependency 'sequel', ' | 
| 18 | 
            +
              spec.add_runtime_dependency 'sequel', '>= 4', '< 6'
         | 
| 19 19 | 
             
              spec.add_runtime_dependency 'slop', '~> 3'
         | 
| 20 20 |  | 
| 21 21 | 
             
              spec.add_development_dependency 'bundler', '~> 1'
         | 
| @@ -23,4 +23,5 @@ Gem::Specification.new do |spec| | |
| 23 23 | 
             
              spec.add_development_dependency 'rspec', '~> 3'
         | 
| 24 24 | 
             
              spec.add_development_dependency 'ruby-prof', '~> 0'
         | 
| 25 25 | 
             
              spec.add_development_dependency 'cucumber', '~> 1'
         | 
| 26 | 
            +
              spec.add_development_dependency 'dotenv', '~> 2.5'
         | 
| 26 27 | 
             
            end
         | 
    
        data/features/archive.feature
    CHANGED
    
    | @@ -2,6 +2,7 @@ Feature: Archive source files | |
| 2 2 |  | 
| 3 3 | 
             
              Scenario:
         | 
| 4 4 | 
             
                Given no "/tmp/cranium_archive" directory
         | 
| 5 | 
            +
                And no "/tmp/cranium_storage" directory
         | 
| 5 6 | 
             
                And a "products_1.csv" data file containing:
         | 
| 6 7 | 
             
                """
         | 
| 7 8 | 
             
                """
         | 
| @@ -39,6 +40,8 @@ Feature: Archive source files | |
| 39 40 | 
             
                end
         | 
| 40 41 |  | 
| 41 42 | 
             
                archive :products, :contacts
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                move :purchases, to: "/tmp/cranium_storage"
         | 
| 42 45 | 
             
                """
         | 
| 43 46 | 
             
                When I execute the definition
         | 
| 44 47 | 
             
                Then the process should exit successfully
         | 
| @@ -47,3 +50,6 @@ Feature: Archive source files | |
| 47 50 | 
             
                  | .*contacts.csv   |
         | 
| 48 51 | 
             
                  | .*products_1.csv |
         | 
| 49 52 | 
             
                  | .*products_2.csv |
         | 
| 53 | 
            +
                And the "/tmp/cranium_storage" directory should contain the following files:
         | 
| 54 | 
            +
                  | filename      |
         | 
| 55 | 
            +
                  | purchases.csv |
         | 
| @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            Feature: Delete rows from table provided by CSV file
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              Scenario: Successful delete
         | 
| 4 | 
            +
                Given a database table called "dim_contact" with the following fields:
         | 
| 5 | 
            +
                  | field_name  | field_type |
         | 
| 6 | 
            +
                  | source_id   | TEXT       |
         | 
| 7 | 
            +
                And only the following rows in the "dim_contact" database table:
         | 
| 8 | 
            +
                  | source_id (i) |
         | 
| 9 | 
            +
                  | 1             |
         | 
| 10 | 
            +
                  | 2             |
         | 
| 11 | 
            +
                  | 3             |
         | 
| 12 | 
            +
                  | 4             |
         | 
| 13 | 
            +
                  | 5             |
         | 
| 14 | 
            +
                And a "deleted_contacts_extract.csv" data file containing:
         | 
| 15 | 
            +
                """
         | 
| 16 | 
            +
                source_id
         | 
| 17 | 
            +
                3
         | 
| 18 | 
            +
                4
         | 
| 19 | 
            +
                """
         | 
| 20 | 
            +
                And the following definition:
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                source :deleted_contacts_extract do
         | 
| 23 | 
            +
                  field :source_id, String
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                import :deleted_contacts_extract do
         | 
| 27 | 
            +
                  into :dim_contact
         | 
| 28 | 
            +
                  put :source_id
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                  delete_on :source_id
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
                """
         | 
| 33 | 
            +
                When I execute the definition
         | 
| 34 | 
            +
                Then the process should exit successfully
         | 
| 35 | 
            +
                And the "dim_contact" table should contain:
         | 
| 36 | 
            +
                  | source_id   |
         | 
| 37 | 
            +
                  | 1           |
         | 
| 38 | 
            +
                  | 2           |
         | 
| 39 | 
            +
                  | 5           |
         | 
| @@ -3,13 +3,13 @@ Given(/^a database table called "([^"]*)" with the following fields:$/) do |tabl | |
| 3 3 | 
             
            end
         | 
| 4 4 |  | 
| 5 5 |  | 
| 6 | 
            -
            Given | 
| 6 | 
            +
            Given(/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
         | 
| 7 7 | 
             
              database_table(table_name).clear
         | 
| 8 8 | 
             
              step %Q(the following new rows in the "#{table_name}" database table:), rows
         | 
| 9 9 | 
             
            end
         | 
| 10 10 |  | 
| 11 11 |  | 
| 12 | 
            -
            Given | 
| 12 | 
            +
            Given(/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
         | 
| 13 13 | 
             
              database_table(table_name).insert rows.data
         | 
| 14 14 | 
             
            end
         | 
| 15 15 |  | 
    
        data/features/support/env.rb
    CHANGED
    
    
    
        data/lib/cranium/archiver.rb
    CHANGED
    
    | @@ -1,36 +1,37 @@ | |
| 1 1 | 
             
            require 'fileutils'
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Cranium::Archiver
         | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
              def self.remove(*files)
         | 
| 13 | 
            -
                files.each do |file_name|
         | 
| 14 | 
            -
                  FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
         | 
| 4 | 
            +
              class << self
         | 
| 5 | 
            +
                def archive(*files)
         | 
| 6 | 
            +
                  create_directory(Cranium.configuration.archive_directory)
         | 
| 7 | 
            +
                  archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
         | 
| 8 | 
            +
                  move_files_from_upload_directory(files, Cranium.configuration.archive_directory, prefix: "#{archive_datetime}_")
         | 
| 15 9 | 
             
                end
         | 
| 16 | 
            -
              end
         | 
| 17 | 
            -
             | 
| 18 10 |  | 
| 11 | 
            +
                def remove(*files)
         | 
| 12 | 
            +
                  files.each do |file_name|
         | 
| 13 | 
            +
                    FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
                end
         | 
| 19 16 |  | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
                 | 
| 24 | 
            -
              end
         | 
| 17 | 
            +
                def move(*files, target_directory:)
         | 
| 18 | 
            +
                  create_directory(target_directory)
         | 
| 19 | 
            +
                  move_files_from_upload_directory(files, target_directory)
         | 
| 20 | 
            +
                end
         | 
| 25 21 |  | 
| 22 | 
            +
                private
         | 
| 26 23 |  | 
| 24 | 
            +
                def create_directory(path)
         | 
| 25 | 
            +
                  FileUtils.mkdir_p(path)
         | 
| 26 | 
            +
                end
         | 
| 27 27 |  | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 28 | 
            +
                def move_files_from_upload_directory(files, target_directory, prefix: "")
         | 
| 29 | 
            +
                  files.each do |file_name|
         | 
| 30 | 
            +
                    FileUtils.mv(
         | 
| 31 | 
            +
                      File.join(Cranium.configuration.upload_path, file_name),
         | 
| 32 | 
            +
                      File.join(target_directory, "#{prefix}#{file_name}")
         | 
| 33 | 
            +
                    )
         | 
| 34 | 
            +
                  end
         | 
| 33 35 | 
             
                end
         | 
| 34 36 | 
             
              end
         | 
| 35 | 
            -
             | 
| 36 37 | 
             
            end
         | 
| @@ -17,14 +17,16 @@ class Cranium::DataImporter | |
| 17 17 | 
             
              private
         | 
| 18 18 |  | 
| 19 19 | 
             
              def importer_for_definition(import_definition)
         | 
| 20 | 
            -
                if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
         | 
| 21 | 
            -
                  raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
         | 
| 20 | 
            +
                if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, !import_definition.delete_on.empty?, import_definition.truncate_insert].count(true) > 1
         | 
| 21 | 
            +
                  raise StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 22 22 | 
             
                end
         | 
| 23 23 |  | 
| 24 24 | 
             
                if !import_definition.merge_fields.empty?
         | 
| 25 25 | 
             
                  Cranium::ImportStrategy::Merge.new(import_definition)
         | 
| 26 26 | 
             
                elsif !import_definition.delete_insert_on.empty?
         | 
| 27 27 | 
             
                  Cranium::ImportStrategy::DeleteInsert.new(import_definition)
         | 
| 28 | 
            +
                elsif !import_definition.delete_on.empty?
         | 
| 29 | 
            +
                  Cranium::ImportStrategy::Delete.new(import_definition)
         | 
| 28 30 | 
             
                elsif import_definition.truncate_insert
         | 
| 29 31 | 
             
                  Cranium::ImportStrategy::TruncateInsert.new(import_definition)
         | 
| 30 32 | 
             
                else
         | 
    
        data/lib/cranium/database.rb
    CHANGED
    
    | @@ -13,7 +13,9 @@ module Cranium::Database | |
| 13 13 |  | 
| 14 14 | 
             
              def self.[](name)
         | 
| 15 15 | 
             
                @connections ||= {}
         | 
| 16 | 
            -
                @connections[name] ||= setup_connection(@definitions[name].connect_to | 
| 16 | 
            +
                @connections[name] ||= setup_connection(@definitions[name].connect_to,
         | 
| 17 | 
            +
                                                        @definitions[name].retry_count,
         | 
| 18 | 
            +
                                                        @definitions[name].retry_delay)
         | 
| 17 19 | 
             
              end
         | 
| 18 20 |  | 
| 19 21 |  | 
| @@ -28,15 +30,19 @@ module Cranium::Database | |
| 28 30 | 
             
              private
         | 
| 29 31 |  | 
| 30 32 |  | 
| 31 | 
            -
              def self.setup_connection( | 
| 32 | 
            -
                 | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
                                | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 33 | 
            +
              def self.setup_connection(connection_details, retry_count = 0, retry_delay = 0)
         | 
| 34 | 
            +
                (retry_count + 1).times do |try_count|
         | 
| 35 | 
            +
                  connection = if Cranium.configuration.log_queries
         | 
| 36 | 
            +
                                 Sequel.connect(connection_details, loggers: Cranium.configuration.loggers)
         | 
| 37 | 
            +
                               else
         | 
| 38 | 
            +
                                 Sequel.connect(connection_details)
         | 
| 39 | 
            +
                               end
         | 
| 40 | 
            +
                  connection.extension :connection_validator
         | 
| 41 | 
            +
                  connection.pool.connection_validation_timeout = -1
         | 
| 42 | 
            +
                  break connection
         | 
| 43 | 
            +
                rescue Sequel::DatabaseConnectionError
         | 
| 44 | 
            +
                  (try_count == retry_count) ? raise : sleep(retry_delay)
         | 
| 45 | 
            +
                end
         | 
| 40 46 | 
             
              end
         | 
| 41 47 |  | 
| 42 48 | 
             
            end
         | 
    
        data/lib/cranium/dsl.rb
    CHANGED
    
    | @@ -87,6 +87,14 @@ module Cranium::DSL | |
| 87 87 |  | 
| 88 88 |  | 
| 89 89 |  | 
| 90 | 
            +
              def move(*sources, to: "")
         | 
| 91 | 
            +
                sources.each do |source_name|
         | 
| 92 | 
            +
                  Cranium::Archiver.move *Cranium.application.sources[source_name].files, target_directory: to
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
              end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
             | 
| 97 | 
            +
             | 
| 90 98 | 
             
              def sequence(name)
         | 
| 91 99 | 
             
                Cranium::Transformation::Sequence.new name
         | 
| 92 100 | 
             
              end
         | 
| @@ -7,11 +7,15 @@ class Cranium::DSL::DatabaseDefinition | |
| 7 7 | 
             
              attr_reader :name
         | 
| 8 8 |  | 
| 9 9 | 
             
              define_attribute :connect_to
         | 
| 10 | 
            +
              define_attribute :retry_count
         | 
| 11 | 
            +
              define_attribute :retry_delay
         | 
| 10 12 |  | 
| 11 13 |  | 
| 12 14 |  | 
| 13 15 | 
             
              def initialize(name)
         | 
| 14 16 | 
             
                @name = name
         | 
| 17 | 
            +
                @retry_count = 0
         | 
| 18 | 
            +
                @retry_delay = 0
         | 
| 15 19 | 
             
              end
         | 
| 16 20 |  | 
| 17 21 |  | 
| @@ -10,8 +10,10 @@ class Cranium::DSL::ImportDefinition | |
| 10 10 | 
             
              attr_reader :merge_fields
         | 
| 11 11 |  | 
| 12 12 | 
             
              define_attribute :into
         | 
| 13 | 
            +
              define_attribute :error_threshold
         | 
| 13 14 | 
             
              define_boolean_attribute :truncate_insert
         | 
| 14 15 | 
             
              define_array_attribute :delete_insert_on
         | 
| 16 | 
            +
              define_array_attribute :delete_on
         | 
| 15 17 |  | 
| 16 18 |  | 
| 17 19 | 
             
              def initialize(name)
         | 
| @@ -1,46 +1,45 @@ | |
| 1 1 | 
             
            class Cranium::ExternalTable
         | 
| 2 2 |  | 
| 3 | 
            -
              def initialize(source, db_connection)
         | 
| 4 | 
            -
                @source | 
| 3 | 
            +
              def initialize(source, db_connection, error_threshold: nil)
         | 
| 4 | 
            +
                @source = source
         | 
| 5 | 
            +
                @connection = db_connection
         | 
| 6 | 
            +
                @error_threshold = error_threshold
         | 
| 5 7 | 
             
              end
         | 
| 6 8 |  | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 9 | 
             
              def create
         | 
| 10 | 
            -
                @connection.run  | 
| 11 | 
            -
                  CREATE EXTERNAL TABLE "#{name}" (
         | 
| 12 | 
            -
                      #{field_definitions}
         | 
| 13 | 
            -
                  )
         | 
| 14 | 
            -
                  LOCATION (#{external_location})
         | 
| 15 | 
            -
                  FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
         | 
| 16 | 
            -
                  ENCODING 'UTF8'
         | 
| 17 | 
            -
                sql
         | 
| 10 | 
            +
                @connection.run external_table_sql
         | 
| 18 11 | 
             
              end
         | 
| 19 12 |  | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 13 | 
             
              def destroy
         | 
| 23 14 | 
             
                @connection.run %Q[DROP EXTERNAL TABLE "#{name}"]
         | 
| 24 15 | 
             
              end
         | 
| 25 16 |  | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 17 | 
             
              def name
         | 
| 29 18 | 
             
                :"external_#{@source.name}"
         | 
| 30 19 | 
             
              end
         | 
| 31 20 |  | 
| 21 | 
            +
              private
         | 
| 32 22 |  | 
| 23 | 
            +
              def external_table_sql
         | 
| 24 | 
            +
                external_table_sql = <<~sql
         | 
| 25 | 
            +
                  CREATE EXTERNAL TABLE "#{name}" (
         | 
| 26 | 
            +
                      #{field_definitions}
         | 
| 27 | 
            +
                  )
         | 
| 28 | 
            +
                  LOCATION (#{external_location})
         | 
| 29 | 
            +
                  FORMAT 'CSV' (DELIMITER '#{quote @source.delimiter}' ESCAPE '#{quote @source.escape}' QUOTE '#{quote @source.quote}' HEADER)
         | 
| 30 | 
            +
                  ENCODING 'UTF8'
         | 
| 31 | 
            +
                sql
         | 
| 33 32 |  | 
| 34 | 
            -
             | 
| 33 | 
            +
                external_table_sql << "SEGMENT REJECT LIMIT #{@error_threshold} PERCENT\n" unless @error_threshold.nil?
         | 
| 34 | 
            +
                external_table_sql
         | 
| 35 | 
            +
              end
         | 
| 35 36 |  | 
| 36 37 | 
             
              def field_definitions
         | 
| 37 38 | 
             
                @source.fields.map do |name, type|
         | 
| 38 39 | 
             
                  %Q("#{name}" #{sql_type_for_ruby_type(type)})
         | 
| 39 | 
            -
                end.join ",\n | 
| 40 | 
            +
                end.join ",\n    "
         | 
| 40 41 | 
             
              end
         | 
| 41 42 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 43 | 
             
              def sql_type_for_ruby_type(type)
         | 
| 45 44 | 
             
                case type.to_s
         | 
| 46 45 | 
             
                  when "Integer" then
         | 
| @@ -58,14 +57,10 @@ class Cranium::ExternalTable | |
| 58 57 | 
             
                end
         | 
| 59 58 | 
             
              end
         | 
| 60 59 |  | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 60 | 
             
              def quote(text)
         | 
| 64 61 | 
             
                text.gsub "'", "''"
         | 
| 65 62 | 
             
              end
         | 
| 66 63 |  | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 64 | 
             
              def external_location
         | 
| 70 65 | 
             
                @source.files.map do |file_name|
         | 
| 71 66 | 
             
                  "'gpfdist://#{Cranium.configuration.gpfdist_url}/#{Cranium.configuration.upload_directory}/#{file_name}'"
         | 
| @@ -2,6 +2,7 @@ module Cranium::ImportStrategy | |
| 2 2 |  | 
| 3 3 | 
             
              autoload :Base, 'cranium/import_strategy/base'
         | 
| 4 4 | 
             
              autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
         | 
| 5 | 
            +
              autoload :Delete, 'cranium/import_strategy/delete'
         | 
| 5 6 | 
             
              autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
         | 
| 6 7 | 
             
              autoload :Delta, 'cranium/import_strategy/delta'
         | 
| 7 8 | 
             
              autoload :Merge, 'cranium/import_strategy/merge'
         | 
| @@ -11,7 +11,7 @@ class Cranium::ImportStrategy::Base | |
| 11 11 |  | 
| 12 12 |  | 
| 13 13 | 
             
              def import
         | 
| 14 | 
            -
                external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection
         | 
| 14 | 
            +
                external_table = Cranium::ExternalTable.new Cranium.application.sources[import_definition.name], Cranium::Database.connection, error_threshold: @import_definition.error_threshold
         | 
| 15 15 |  | 
| 16 16 | 
             
                external_table.create
         | 
| 17 17 | 
             
                number_of_items_imported = import_from external_table.name
         | 
| @@ -0,0 +1,34 @@ | |
| 1 | 
            +
            class Cranium::ImportStrategy::Delete < Cranium::ImportStrategy::Base
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              def import_from(source_table)
         | 
| 4 | 
            +
                @source_table = source_table
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                delete_existing_records
         | 
| 7 | 
            +
                puts @source_table
         | 
| 8 | 
            +
                database[@source_table].count
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
              private
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              def delete_existing_records
         | 
| 16 | 
            +
                database.
         | 
| 17 | 
            +
                    from(Sequel.as(target_table, "target"), Sequel.as(@source_table, "source")).
         | 
| 18 | 
            +
                    where(delete_by_fields.qualify keys_with: :source, values_with: :target).
         | 
| 19 | 
            +
                    delete
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
             | 
| 23 | 
            +
             | 
| 24 | 
            +
              def delete_by_fields
         | 
| 25 | 
            +
                Cranium::Sequel::Hash[delete_field_mapping]
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
             | 
| 30 | 
            +
              def delete_field_mapping
         | 
| 31 | 
            +
                import_definition.field_associations.select { |_, target_field| import_definition.delete_on.include? target_field }
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            end
         | 
| @@ -22,8 +22,9 @@ class Cranium::TestFramework::World | |
| 22 22 |  | 
| 23 23 |  | 
| 24 24 | 
             
              def save_definition(definition)
         | 
| 25 | 
            -
                config =  | 
| 25 | 
            +
                config = <<~config_string
         | 
| 26 26 | 
             
                  require 'logger'
         | 
| 27 | 
            +
                  require 'date'
         | 
| 27 28 |  | 
| 28 29 | 
             
                  Cranium.configure do |config|
         | 
| 29 30 | 
             
                    config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
         | 
| @@ -32,6 +33,7 @@ class Cranium::TestFramework::World | |
| 32 33 | 
             
                    config.upload_directory = "#{Cranium.configuration.upload_directory}"
         | 
| 33 34 | 
             
                    config.loggers << Logger.new("log/application.log")
         | 
| 34 35 | 
             
                  end
         | 
| 36 | 
            +
             | 
| 35 37 | 
             
                config_string
         | 
| 36 38 |  | 
| 37 39 | 
             
                upload_directory.save_file DEFINITION_FILE, config + definition
         | 
| @@ -1,44 +1,74 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            RSpec.describe Cranium::Archiver do
         | 
| 2 | 
            +
              subject(:archiver) { described_class }
         | 
| 2 3 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
                  config. | 
| 8 | 
            -
             | 
| 9 | 
            -
                  config.archive_directory = "path/to/archive"
         | 
| 10 | 
            -
                end)
         | 
| 4 | 
            +
              let(:configuration) do
         | 
| 5 | 
            +
                Cranium::Configuration.new.tap do |config|
         | 
| 6 | 
            +
                  config.gpfdist_home_directory = "tmp"
         | 
| 7 | 
            +
                  config.upload_directory = "upload_directory"
         | 
| 8 | 
            +
                  config.archive_directory = "tmp/archive_directory"
         | 
| 9 | 
            +
                end
         | 
| 11 10 | 
             
              end
         | 
| 11 | 
            +
              let(:file1) { "file.txt" }
         | 
| 12 | 
            +
              let(:file2) { "another_file.txt" }
         | 
| 12 13 |  | 
| 14 | 
            +
              before do
         | 
| 15 | 
            +
                allow(Cranium).to receive_messages(configuration: configuration)
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                FileUtils.mkdir_p(configuration.upload_path)
         | 
| 18 | 
            +
                FileUtils.touch(File.join(configuration.upload_path, file1))
         | 
| 19 | 
            +
                FileUtils.touch(File.join(configuration.upload_path, file2))
         | 
| 20 | 
            +
              end
         | 
| 13 21 |  | 
| 14 22 | 
             
              describe ".archive" do
         | 
| 15 | 
            -
                 | 
| 16 | 
            -
                   | 
| 23 | 
            +
                context "when archive directory does not exist" do
         | 
| 24 | 
            +
                  before { FileUtils.rm_rf configuration.archive_directory }
         | 
| 17 25 |  | 
| 18 | 
            -
                   | 
| 26 | 
            +
                  it "creates the archive directory" do
         | 
| 27 | 
            +
                    archiver.archive file1, file2
         | 
| 19 28 |  | 
| 20 | 
            -
             | 
| 29 | 
            +
                    expect(File.exists?(configuration.archive_directory)).to eq true
         | 
| 30 | 
            +
                  end
         | 
| 21 31 | 
             
                end
         | 
| 22 32 |  | 
| 23 | 
            -
                 | 
| 24 | 
            -
                   | 
| 25 | 
            -
             | 
| 33 | 
            +
                context "when there are some file in the upload directory" do
         | 
| 34 | 
            +
                  it "moves files to the archive directory" do
         | 
| 35 | 
            +
                    archiver.archive file1, file2
         | 
| 26 36 |  | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 37 | 
            +
                    expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
         | 
| 38 | 
            +
                    expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
         | 
| 39 | 
            +
                    expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
         | 
| 40 | 
            +
                    expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
         | 
| 41 | 
            +
                  end
         | 
| 31 42 | 
             
                end
         | 
| 32 43 | 
             
              end
         | 
| 33 44 |  | 
| 34 | 
            -
             | 
| 35 45 | 
             
              describe ".remove" do
         | 
| 36 | 
            -
                 | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 46 | 
            +
                before { FileUtils.mkdir_p configuration.archive_directory }
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                it "removes files from the upload directory" do
         | 
| 49 | 
            +
                  archiver.remove file1, file2
         | 
| 39 50 |  | 
| 40 | 
            -
                   | 
| 51 | 
            +
                  expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file1}")))).to eq true
         | 
| 52 | 
            +
                  expect(File.exist?(File.join(configuration.archive_directory, Dir.glob("*#{file2}")))).to eq true
         | 
| 41 53 | 
             
                end
         | 
| 42 54 | 
             
              end
         | 
| 43 55 |  | 
| 56 | 
            +
              describe ".move" do
         | 
| 57 | 
            +
                let(:target_directory) { "tmp/target_directory" }
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                it "creates given directory if it does not exist" do
         | 
| 60 | 
            +
                  archiver.move(file1, file2, target_directory: target_directory)
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  expect(File.exists?(target_directory)).to eq true
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                it "moves files from upload directory into a given directory" do
         | 
| 66 | 
            +
                  archiver.move(file1, file2, target_directory: target_directory)
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                  expect(File.exist?(File.join(configuration.upload_path, file1))).to eq false
         | 
| 69 | 
            +
                  expect(File.exist?(File.join(configuration.upload_path, file2))).to eq false
         | 
| 70 | 
            +
                  expect(File.exist?(File.join(target_directory, file1))).to eq true
         | 
| 71 | 
            +
                  expect(File.exist?(File.join(target_directory, file2))).to eq true
         | 
| 72 | 
            +
                end
         | 
| 73 | 
            +
              end
         | 
| 44 74 | 
             
            end
         | 
| @@ -1,11 +1,12 @@ | |
| 1 1 | 
             
            require_relative '../spec_helper'
         | 
| 2 2 |  | 
| 3 3 | 
             
            describe Cranium::DataImporter do
         | 
| 4 | 
            +
              let(:connection) { double 'a_connection' }
         | 
| 4 5 |  | 
| 5 6 | 
             
              before do
         | 
| 6 | 
            -
                connection = double
         | 
| 7 7 | 
             
                allow(Cranium::Database).to receive(:connection).and_return connection
         | 
| 8 8 | 
             
                allow(connection).to receive(:transaction).and_yield
         | 
| 9 | 
            +
                allow(Cranium.application).to receive(:apply_hook).with :after_import
         | 
| 9 10 | 
             
              end
         | 
| 10 11 |  | 
| 11 12 | 
             
              let(:importer) { Cranium::DataImporter.new }
         | 
| @@ -13,12 +14,32 @@ describe Cranium::DataImporter do | |
| 13 14 |  | 
| 14 15 | 
             
              describe "#import" do
         | 
| 15 16 |  | 
| 17 | 
            +
                context "when called with delete_on strategy" do
         | 
| 18 | 
            +
                  it "calls Delete strategy" do
         | 
| 19 | 
            +
                    import_strategy = instance_double Cranium::ImportStrategy::Delete
         | 
| 20 | 
            +
                    allow(Cranium::ImportStrategy::Delete).to receive(:new).with(definition).and_return import_strategy
         | 
| 21 | 
            +
                    expect(import_strategy).to receive(:import).and_return 0
         | 
| 22 | 
            +
                    definition.delete_on :source_id
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    importer.import definition
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                context "when called with both merge and delete_on fields set" do
         | 
| 29 | 
            +
                  it "should raise an exception" do
         | 
| 30 | 
            +
                    definition.delete_on :source_id
         | 
| 31 | 
            +
                    definition.merge_on :another_field
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 16 37 | 
             
                context "when called with both merge and delete_insert fields set" do
         | 
| 17 38 | 
             
                  it "should raise an exception" do
         | 
| 18 39 | 
             
                    definition.delete_insert_on :some_field
         | 
| 19 40 | 
             
                    definition.merge_on :another_field
         | 
| 20 41 |  | 
| 21 | 
            -
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
         | 
| 42 | 
            +
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 22 43 | 
             
                  end
         | 
| 23 44 | 
             
                end
         | 
| 24 45 |  | 
| @@ -27,7 +48,7 @@ describe Cranium::DataImporter do | |
| 27 48 | 
             
                    definition.truncate_insert true
         | 
| 28 49 | 
             
                    definition.merge_on :another_field
         | 
| 29 50 |  | 
| 30 | 
            -
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
         | 
| 51 | 
            +
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 31 52 | 
             
                  end
         | 
| 32 53 | 
             
                end
         | 
| 33 54 |  | 
| @@ -36,7 +57,7 @@ describe Cranium::DataImporter do | |
| 36 57 | 
             
                    definition.delete_insert_on :some_field
         | 
| 37 58 | 
             
                    definition.truncate_insert true
         | 
| 38 59 |  | 
| 39 | 
            -
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
         | 
| 60 | 
            +
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 40 61 | 
             
                  end
         | 
| 41 62 | 
             
                end
         | 
| 42 63 |  | 
| @@ -46,7 +67,7 @@ describe Cranium::DataImporter do | |
| 46 67 | 
             
                    definition.merge_on :another_field
         | 
| 47 68 | 
             
                    definition.truncate_insert true
         | 
| 48 69 |  | 
| 49 | 
            -
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
         | 
| 70 | 
            +
                    expect { importer.import(definition) }.to raise_error StandardError, "Import should not combine merge_on, delete_insert_on, delete_on and truncate_insert settings"
         | 
| 50 71 | 
             
                  end
         | 
| 51 72 | 
             
                end
         | 
| 52 73 |  | 
| @@ -81,6 +81,40 @@ describe Cranium::Database do | |
| 81 81 |  | 
| 82 82 | 
             
                  expect(database[:dwh]).not_to eq database[:dwh2]
         | 
| 83 83 | 
             
                end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                context 'when retry_count is specified' do
         | 
| 86 | 
            +
                  before do
         | 
| 87 | 
            +
                    database.register_database :dwh do
         | 
| 88 | 
            +
                      connect_to "other connection string"
         | 
| 89 | 
            +
                      retry_count 3
         | 
| 90 | 
            +
                      retry_delay 15
         | 
| 91 | 
            +
                    end
         | 
| 92 | 
            +
                    allow(database).to receive(:sleep)
         | 
| 93 | 
            +
                  end
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                  it "should retry connecting to the DB the specified number of times" do
         | 
| 96 | 
            +
                    call_count = 0
         | 
| 97 | 
            +
                    allow(Sequel).to receive(:connect) do
         | 
| 98 | 
            +
                      call_count += 1
         | 
| 99 | 
            +
                      call_count < 3 ? raise(Sequel::DatabaseConnectionError) : connection
         | 
| 100 | 
            +
                    end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                    expect(database[:dwh]).to eq connection
         | 
| 103 | 
            +
                  end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                  it "should not retry connecting to the DB more than the specified number of times" do
         | 
| 106 | 
            +
                    allow(Sequel).to receive(:connect).exactly(4).times.and_raise(Sequel::DatabaseConnectionError)
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
         | 
| 109 | 
            +
                  end
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                  it "should wait retry_delay seconds between connection attempts" do
         | 
| 112 | 
            +
                    allow(Sequel).to receive(:connect).and_raise(Sequel::DatabaseConnectionError)
         | 
| 113 | 
            +
                    expect(database).to receive(:sleep).with(15).exactly(3).times
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                    expect { database[:dwh] }.to raise_error(Sequel::DatabaseConnectionError)
         | 
| 116 | 
            +
                  end
         | 
| 117 | 
            +
                end
         | 
| 84 118 | 
             
              end
         | 
| 85 119 |  | 
| 86 120 | 
             
            end
         | 
| @@ -20,4 +20,38 @@ describe Cranium::DSL::DatabaseDefinition do | |
| 20 20 | 
             
                end
         | 
| 21 21 | 
             
              end
         | 
| 22 22 |  | 
| 23 | 
            +
             | 
| 24 | 
            +
              describe "#retry_count" do
         | 
| 25 | 
            +
                context 'when not set' do
         | 
| 26 | 
            +
                  it "should return 0 by default" do
         | 
| 27 | 
            +
                    expect(database.retry_count).to eq(0)
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                context 'when set' do
         | 
| 32 | 
            +
                  it "should return the number of retries specified for the database" do
         | 
| 33 | 
            +
                    database.retry_count 3
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    expect(database.retry_count).to eq(3)
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
             | 
| 41 | 
            +
              describe "#retry_delay" do
         | 
| 42 | 
            +
                context 'when not set' do
         | 
| 43 | 
            +
                  it "should return 0 by default" do
         | 
| 44 | 
            +
                    expect(database.retry_delay).to eq(0)
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                context 'when set' do
         | 
| 49 | 
            +
                  it "should return the number of retries specified for the database" do
         | 
| 50 | 
            +
                    database.retry_delay 15
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    expect(database.retry_delay).to eq(15)
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
              end
         | 
| 56 | 
            +
             | 
| 23 57 | 
             
            end
         | 
| @@ -12,6 +12,14 @@ describe Cranium::DSL::ImportDefinition do | |
| 12 12 | 
             
                end
         | 
| 13 13 | 
             
              end
         | 
| 14 14 |  | 
| 15 | 
            +
              describe "#error_threshold" do
         | 
| 16 | 
            +
                it "should set the error threshold to the given percentage" do
         | 
| 17 | 
            +
                  import.error_threshold 10
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  expect(import.error_threshold).to eq 10
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 15 23 |  | 
| 16 24 | 
             
              describe "#name" do
         | 
| 17 25 | 
             
                it "should return the name of the import definition" do
         | 
| @@ -38,11 +38,11 @@ describe Cranium::DSL::SourceDefinition do | |
| 38 38 |  | 
| 39 39 | 
             
                it "should return the fields and types that were set" do
         | 
| 40 40 | 
             
                  source.field :field1, String
         | 
| 41 | 
            -
                  source.field :field2,  | 
| 41 | 
            +
                  source.field :field2, Integer
         | 
| 42 42 |  | 
| 43 43 | 
             
                  expect(source.fields).to eq({
         | 
| 44 44 | 
             
                    field1: String,
         | 
| 45 | 
            -
                    field2:  | 
| 45 | 
            +
                    field2: Integer
         | 
| 46 46 | 
             
                  })
         | 
| 47 47 | 
             
                end
         | 
| 48 48 | 
             
              end
         | 
    
        data/spec/cranium/dsl_spec.rb
    CHANGED
    
    | @@ -97,6 +97,22 @@ describe Cranium::DSL do | |
| 97 97 | 
             
              end
         | 
| 98 98 |  | 
| 99 99 |  | 
| 100 | 
            +
              describe "#move" do
         | 
| 101 | 
            +
                let(:target_directory) { "/tmp/target" }
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                it "should move files for the specified sources" do
         | 
| 104 | 
            +
                  allow(Cranium.application).to receive_messages sources: {first_source: double(files: ["file1", "file2"]),
         | 
| 105 | 
            +
                                                                           second_source: double(files: ["file3"]),
         | 
| 106 | 
            +
                                                                           third_source: double(files: ["file4"])}
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                  expect(Cranium::Archiver).to receive(:move).with "file1", "file2", target_directory: target_directory
         | 
| 109 | 
            +
                  expect(Cranium::Archiver).to receive(:move).with "file3", target_directory: target_directory
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                  dsl_object.move :first_source, :second_source, to: target_directory
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
              end
         | 
| 114 | 
            +
             | 
| 115 | 
            +
             | 
| 100 116 | 
             
              describe "#sequence" do
         | 
| 101 117 | 
             
                it "should return a sequence with the specified name" do
         | 
| 102 118 | 
             
                  result = dsl_object.sequence "test_sequence"
         | 
| @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            require_relative '../spec_helper'
         | 
| 2 2 | 
             
            require 'ostruct'
         | 
| 3 | 
            +
            require 'date'
         | 
| 3 4 |  | 
| 4 5 | 
             
            describe Cranium::ExternalTable do
         | 
| 5 6 |  | 
| @@ -19,37 +20,64 @@ describe Cranium::ExternalTable do | |
| 19 20 | 
             
                  source.escape "'"
         | 
| 20 21 | 
             
                end
         | 
| 21 22 | 
             
              end
         | 
| 22 | 
            -
              let(:external_table) { Cranium::ExternalTable.new source, connection }
         | 
| 23 23 |  | 
| 24 | 
            +
              subject(:external_table) { Cranium::ExternalTable.new source, connection }
         | 
| 24 25 |  | 
| 25 26 | 
             
              describe "#create" do
         | 
| 26 | 
            -
                 | 
| 27 | 
            +
                before do
         | 
| 27 28 | 
             
                  allow(Cranium).to receive_messages configuration: OpenStruct.new(
         | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 29 | 
            +
                      gpfdist_url: "gpfdist-url",
         | 
| 30 | 
            +
                      gpfdist_home_directory: "/gpfdist-home",
         | 
| 31 | 
            +
                      upload_directory: "upload-dir"
         | 
| 31 32 | 
             
                  )
         | 
| 32 33 |  | 
| 33 34 | 
             
                  allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
         | 
| 35 | 
            +
                end
         | 
| 34 36 |  | 
| 35 | 
            -
             | 
| 36 | 
            -
                   | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 37 | 
            +
                it "should create an external table from the specified source" do
         | 
| 38 | 
            +
                  expect(connection).to receive(:run).with(<<~sql
         | 
| 39 | 
            +
                    CREATE EXTERNAL TABLE "external_products" (
         | 
| 40 | 
            +
                        "text_field" TEXT,
         | 
| 41 | 
            +
                        "integer_field" INTEGER,
         | 
| 42 | 
            +
                        "numeric_field" NUMERIC,
         | 
| 43 | 
            +
                        "date_field" DATE,
         | 
| 44 | 
            +
                        "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
         | 
| 45 | 
            +
                        "boolean_field1" BOOLEAN,
         | 
| 46 | 
            +
                        "boolean_field2" BOOLEAN
         | 
| 47 | 
            +
                    )
         | 
| 48 | 
            +
                    LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
         | 
| 49 | 
            +
                    FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
         | 
| 50 | 
            +
                    ENCODING 'UTF8'
         | 
| 48 51 | 
             
                  sql
         | 
| 49 52 | 
             
                  )
         | 
| 50 53 |  | 
| 51 54 | 
             
                  external_table.create
         | 
| 52 55 | 
             
                end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                context "with error_threshold argument" do
         | 
| 58 | 
            +
                  subject(:external_table) { Cranium::ExternalTable.new source, connection, error_threshold: 10 }
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                  it "should create an external table from the specified source" do
         | 
| 61 | 
            +
                    expect(connection).to receive(:run).with(<<~sql
         | 
| 62 | 
            +
                      CREATE EXTERNAL TABLE "external_products" (
         | 
| 63 | 
            +
                          "text_field" TEXT,
         | 
| 64 | 
            +
                          "integer_field" INTEGER,
         | 
| 65 | 
            +
                          "numeric_field" NUMERIC,
         | 
| 66 | 
            +
                          "date_field" DATE,
         | 
| 67 | 
            +
                          "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
         | 
| 68 | 
            +
                          "boolean_field1" BOOLEAN,
         | 
| 69 | 
            +
                          "boolean_field2" BOOLEAN
         | 
| 70 | 
            +
                      )
         | 
| 71 | 
            +
                      LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
         | 
| 72 | 
            +
                      FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
         | 
| 73 | 
            +
                      ENCODING 'UTF8'
         | 
| 74 | 
            +
                      SEGMENT REJECT LIMIT 10 PERCENT
         | 
| 75 | 
            +
                    sql
         | 
| 76 | 
            +
                    )
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    external_table.create
         | 
| 79 | 
            +
                  end
         | 
| 80 | 
            +
                end
         | 
| 53 81 | 
             
              end
         | 
| 54 82 |  | 
| 55 83 |  | 
| @@ -67,5 +95,4 @@ describe Cranium::ExternalTable do | |
| 67 95 | 
             
                  expect(external_table.name).to eq(:external_products)
         | 
| 68 96 | 
             
                end
         | 
| 69 97 | 
             
              end
         | 
| 70 | 
            -
             | 
| 71 98 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: cranium
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: '0.8'
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Emarsys Technologies
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2020-10-19 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: pg
         | 
| @@ -42,16 +42,22 @@ dependencies: | |
| 42 42 | 
             
              name: sequel
         | 
| 43 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 44 | 
             
                requirements:
         | 
| 45 | 
            -
                - - " | 
| 45 | 
            +
                - - ">="
         | 
| 46 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 47 | 
             
                    version: '4'
         | 
| 48 | 
            +
                - - "<"
         | 
| 49 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 50 | 
            +
                    version: '6'
         | 
| 48 51 | 
             
              type: :runtime
         | 
| 49 52 | 
             
              prerelease: false
         | 
| 50 53 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 54 | 
             
                requirements:
         | 
| 52 | 
            -
                - - " | 
| 55 | 
            +
                - - ">="
         | 
| 53 56 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 57 | 
             
                    version: '4'
         | 
| 58 | 
            +
                - - "<"
         | 
| 59 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 60 | 
            +
                    version: '6'
         | 
| 55 61 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 62 | 
             
              name: slop
         | 
| 57 63 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -136,6 +142,20 @@ dependencies: | |
| 136 142 | 
             
                - - "~>"
         | 
| 137 143 | 
             
                  - !ruby/object:Gem::Version
         | 
| 138 144 | 
             
                    version: '1'
         | 
| 145 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 146 | 
            +
              name: dotenv
         | 
| 147 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 148 | 
            +
                requirements:
         | 
| 149 | 
            +
                - - "~>"
         | 
| 150 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 151 | 
            +
                    version: '2.5'
         | 
| 152 | 
            +
              type: :development
         | 
| 153 | 
            +
              prerelease: false
         | 
| 154 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 155 | 
            +
                requirements:
         | 
| 156 | 
            +
                - - "~>"
         | 
| 157 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 158 | 
            +
                    version: '2.5'
         | 
| 139 159 | 
             
            description: Provides Extract, Transform and Load functionality for loading data from
         | 
| 140 160 | 
             
              CSV files to a Greenplum database.
         | 
| 141 161 | 
             
            email:
         | 
| @@ -145,8 +165,9 @@ executables: | |
| 145 165 | 
             
            extensions: []
         | 
| 146 166 | 
             
            extra_rdoc_files: []
         | 
| 147 167 | 
             
            files:
         | 
| 168 | 
            +
            - ".env"
         | 
| 148 169 | 
             
            - ".gitignore"
         | 
| 149 | 
            -
            - ". | 
| 170 | 
            +
            - ".rspec"
         | 
| 150 171 | 
             
            - Gemfile
         | 
| 151 172 | 
             
            - LICENSE.txt
         | 
| 152 173 | 
             
            - README.md
         | 
| @@ -166,6 +187,7 @@ files: | |
| 166 187 | 
             
            - features/archive.feature
         | 
| 167 188 | 
             
            - features/extract/incremental_extract.feature
         | 
| 168 189 | 
             
            - features/extract/simple_extract.feature
         | 
| 190 | 
            +
            - features/import/delete_from_table_based_on_csv.feature
         | 
| 169 191 | 
             
            - features/import/import_csv_to_database_as_delta.feature
         | 
| 170 192 | 
             
            - features/import/import_csv_to_database_with_delete_insert_merging.feature
         | 
| 171 193 | 
             
            - features/import/import_csv_to_database_with_truncate_insert.feature
         | 
| @@ -225,6 +247,7 @@ files: | |
| 225 247 | 
             
            - lib/cranium/file_utils.rb
         | 
| 226 248 | 
             
            - lib/cranium/import_strategy.rb
         | 
| 227 249 | 
             
            - lib/cranium/import_strategy/base.rb
         | 
| 250 | 
            +
            - lib/cranium/import_strategy/delete.rb
         | 
| 228 251 | 
             
            - lib/cranium/import_strategy/delete_insert.rb
         | 
| 229 252 | 
             
            - lib/cranium/import_strategy/delta.rb
         | 
| 230 253 | 
             
            - lib/cranium/import_strategy/merge.rb
         | 
| @@ -296,8 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 296 319 | 
             
                - !ruby/object:Gem::Version
         | 
| 297 320 | 
             
                  version: '0'
         | 
| 298 321 | 
             
            requirements: []
         | 
| 299 | 
            -
             | 
| 300 | 
            -
            rubygems_version: 2.6.4
         | 
| 322 | 
            +
            rubygems_version: 3.0.3
         | 
| 301 323 | 
             
            signing_key: 
         | 
| 302 324 | 
             
            specification_version: 4
         | 
| 303 325 | 
             
            summary: Pure Ruby ETL framework
         | 
| @@ -305,6 +327,7 @@ test_files: | |
| 305 327 | 
             
            - features/archive.feature
         | 
| 306 328 | 
             
            - features/extract/incremental_extract.feature
         | 
| 307 329 | 
             
            - features/extract/simple_extract.feature
         | 
| 330 | 
            +
            - features/import/delete_from_table_based_on_csv.feature
         | 
| 308 331 | 
             
            - features/import/import_csv_to_database_as_delta.feature
         | 
| 309 332 | 
             
            - features/import/import_csv_to_database_with_delete_insert_merging.feature
         | 
| 310 333 | 
             
            - features/import/import_csv_to_database_with_truncate_insert.feature
         | 
    
        data/.ruby-version
    DELETED
    
    | @@ -1 +0,0 @@ | |
| 1 | 
            -
            2.3.0
         |