RubyGems - cranium - Versions diffs - 0.2.0 - Mend

cranium 0.2.0

Files changed (132) hide show

checksums.yaml +7 -0
data/.gitignore +21 -0
data/.ruby-version +1 -0
data/Gemfile +4 -0
data/LICENSE.txt +22 -0
data/README.md +29 -0
data/Rakefile +3 -0
data/Vagrantfile +24 -0
data/bin/cranium +9 -0
data/config/cucumber.yml +9 -0
data/cranium.gemspec +26 -0
data/db/setup.sql +8 -0
data/docker-compose.yml +8 -0
data/examples/config.rb +14 -0
data/examples/deduplication.rb +27 -0
data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
data/examples/incremental_extract.rb +17 -0
data/examples/lookup_with_multiple_fields.rb +25 -0
data/features/archive.feature +49 -0
data/features/extract/incremental_extract.feature +56 -0
data/features/extract/simple_extract.feature +85 -0
data/features/import/import_csv_to_database_as_delta.feature +38 -0
data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
data/features/import/import_csv_with_transformation.feature +55 -0
data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
data/features/import/import_with_load_id_from_sequence.feature +53 -0
data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
data/features/read.feature +56 -0
data/features/remove.feature +44 -0
data/features/restore_database_connection.feature +55 -0
data/features/step_definitions/database_table_steps.rb +40 -0
data/features/step_definitions/definition_steps.rb +3 -0
data/features/step_definitions/execution_steps.rb +23 -0
data/features/step_definitions/file_steps.rb +39 -0
data/features/support/class_extensions.rb +24 -0
data/features/support/env.rb +27 -0
data/features/support/randomize.rb +22 -0
data/features/support/stop_on_first_error.rb +5 -0
data/features/transform/deduplication.feature +37 -0
data/features/transform/empty_transformation.feature +72 -0
data/features/transform/join.feature +180 -0
data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
data/features/transform/output_rows.feature +70 -0
data/features/transform/projection.feature +34 -0
data/features/transform/raw_ruby_transformation.feature +69 -0
data/features/transform/split_field.feature +39 -0
data/lib/cranium/application.rb +104 -0
data/lib/cranium/archiver.rb +36 -0
data/lib/cranium/attribute_dsl.rb +43 -0
data/lib/cranium/command_line_options.rb +27 -0
data/lib/cranium/configuration.rb +33 -0
data/lib/cranium/data_importer.rb +35 -0
data/lib/cranium/data_reader.rb +48 -0
data/lib/cranium/data_transformer.rb +126 -0
data/lib/cranium/database.rb +36 -0
data/lib/cranium/definition_registry.rb +21 -0
data/lib/cranium/dimension_manager.rb +65 -0
data/lib/cranium/dsl/database_definition.rb +23 -0
data/lib/cranium/dsl/extract_definition.rb +28 -0
data/lib/cranium/dsl/import_definition.rb +50 -0
data/lib/cranium/dsl/source_definition.rb +67 -0
data/lib/cranium/dsl.rb +100 -0
data/lib/cranium/extensions/file.rb +7 -0
data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
data/lib/cranium/external_table.rb +75 -0
data/lib/cranium/extract/data_extractor.rb +11 -0
data/lib/cranium/extract/storage.rb +57 -0
data/lib/cranium/extract/strategy/base.rb +27 -0
data/lib/cranium/extract/strategy/incremental.rb +16 -0
data/lib/cranium/extract/strategy/simple.rb +9 -0
data/lib/cranium/extract/strategy.rb +7 -0
data/lib/cranium/extract.rb +7 -0
data/lib/cranium/import_strategy/base.rb +55 -0
data/lib/cranium/import_strategy/delete_insert.rb +40 -0
data/lib/cranium/import_strategy/delta.rb +8 -0
data/lib/cranium/import_strategy/merge.rb +50 -0
data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
data/lib/cranium/import_strategy.rb +9 -0
data/lib/cranium/logging.rb +15 -0
data/lib/cranium/profiling.rb +13 -0
data/lib/cranium/progress_output.rb +37 -0
data/lib/cranium/sequel/hash.rb +32 -0
data/lib/cranium/sequel.rb +5 -0
data/lib/cranium/source_registry.rb +21 -0
data/lib/cranium/test_framework/cucumber_table.rb +140 -0
data/lib/cranium/test_framework/database_entity.rb +29 -0
data/lib/cranium/test_framework/database_sequence.rb +16 -0
data/lib/cranium/test_framework/database_table.rb +33 -0
data/lib/cranium/test_framework/upload_directory.rb +39 -0
data/lib/cranium/test_framework/world.rb +66 -0
data/lib/cranium/test_framework.rb +10 -0
data/lib/cranium/transformation/duplication_index.rb +42 -0
data/lib/cranium/transformation/index.rb +83 -0
data/lib/cranium/transformation/join.rb +141 -0
data/lib/cranium/transformation/sequence.rb +42 -0
data/lib/cranium/transformation.rb +8 -0
data/lib/cranium/transformation_record.rb +45 -0
data/lib/cranium.rb +57 -0
data/rake/test.rake +31 -0
data/spec/cranium/application_spec.rb +166 -0
data/spec/cranium/archiver_spec.rb +44 -0
data/spec/cranium/command_line_options_spec.rb +32 -0
data/spec/cranium/configuration_spec.rb +31 -0
data/spec/cranium/data_importer_spec.rb +55 -0
data/spec/cranium/data_transformer_spec.rb +16 -0
data/spec/cranium/database_spec.rb +69 -0
data/spec/cranium/definition_registry_spec.rb +45 -0
data/spec/cranium/dimension_manager_spec.rb +63 -0
data/spec/cranium/dsl/database_definition_spec.rb +23 -0
data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
data/spec/cranium/dsl/import_definition_spec.rb +153 -0
data/spec/cranium/dsl/source_definition_spec.rb +84 -0
data/spec/cranium/dsl_spec.rb +119 -0
data/spec/cranium/external_table_spec.rb +71 -0
data/spec/cranium/extract/storage_spec.rb +125 -0
data/spec/cranium/logging_spec.rb +37 -0
data/spec/cranium/sequel/hash_spec.rb +56 -0
data/spec/cranium/source_registry_spec.rb +31 -0
data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
data/spec/cranium/transformation/index_spec.rb +178 -0
data/spec/cranium/transformation/join_spec.rb +43 -0
data/spec/cranium/transformation/sequence_spec.rb +83 -0
data/spec/cranium/transformation_record_spec.rb +78 -0
data/spec/cranium_spec.rb +53 -0
data/spec/spec_helper.rb +1 -0
metadata +362 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: aa44efc27b0fc354b994ea3c8738d5e6aeab2415
+  data.tar.gz: 036dfbc242e5858bad5fd374cc0b1c27bda4b6da
+SHA512:
+  metadata.gz: a90bc3b7ee0cd635b13e9deb325e4bc58fe83889816ac9b7bc02637318f096b953caa01ce44a6314ac67a74d0374beba991548ba46349250f01914e591cf1554
+  data.tar.gz: d95a477491949134ef37a86c165d4fdc19615fa12e53fed042af1bd838788a290c64b9ffa9a59db420b446e46544103315f6a18f337c1d09d2713665a9a2fc7d

data/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+.vagrant
+.idea
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+atlassian-ide-plugin.xml
+log/

data/.ruby-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2.3.0

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in cranium.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Zoltan Ormandi
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# Cranium
+TODO: Write a gem description
+## Installation
+Add this line to your application's Gemfile:
+    gem 'cranium'
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install cranium
+## Usage
+TODO: Write usage instructions here
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

@@ -0,0 +1,3 @@
+require "bundler/gem_tasks"
+import *Dir['rake/**/*.rake']

data/Vagrantfile ADDED Viewed

@@ -0,0 +1,24 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+FileUtils.mkdir_p("tmp/custdata") unless Dir.exists?("tmp/custdata")
+# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
+VAGRANTFILE_API_VERSION = "2"
+Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
+  config.vm.box = "si-build.v2"
+  config.vm.box_url = "http://vboxes.ett.local/si-build.v2.box"
+  config.vbguest.auto_update = false
+  config.vm.hostname = 'cranium-build'
+  config.vm.network :private_network, ip: "192.168.56.43"
+  config.vm.provider :virtualbox do |virtual_machine|
+    virtual_machine.name = "cranium"
+  end
+  config.vm.synced_folder "tmp/custdata", "/home/gpadmin/gpfdist-data", owner: "gpadmin", group: "gpadmin"
+  config.vm.provision :shell, inline: "su - gpadmin -c 'cat /vagrant/db/setup.sql | psql'"
+end

data/bin/cranium ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+require 'cranium'
+if ENV["PROFILING"] == "yes"
+  require 'cranium/profiling'
+end
+Cranium.application(ARGV).run

data/config/cucumber.yml ADDED Viewed

@@ -0,0 +1,9 @@
+default:
+  --backtrace
+  --tags ~@wip
+  --require features
+build:
+  --backtrace
+  --format progress
+  --tags ~@wip

data/cranium.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+Gem::Specification.new do |spec|
+  spec.name = 'cranium'
+  spec.version = '0.2.0'
+  spec.authors = ['Emarsys Technologies']
+  spec.email = ['smart-insight-dev@emarsys.com']
+  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
+  spec.summary = %q{Pure Ruby ETL framework}
+  spec.homepage = 'https://github.com/emartech/cranium'
+  spec.license = 'MIT'
+  spec.files = `git ls-files`.split($/)
+  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ['lib']
+  spec.add_runtime_dependency 'pg', '~> 0'
+  spec.add_runtime_dependency 'progressbar', '~> 0'
+  spec.add_runtime_dependency 'sequel', '~> 4'
+  spec.add_runtime_dependency 'slop', '~> 3'
+  spec.add_development_dependency 'bundler', '~> 1'
+  spec.add_development_dependency 'rake', '~> 10'
+  spec.add_development_dependency 'rspec', '~> 3'
+  spec.add_development_dependency 'ruby-prof', '~> 0'
+  spec.add_development_dependency 'cucumber', '~> 1'
+end

data/db/setup.sql ADDED Viewed

@@ -0,0 +1,8 @@
+CREATE RESOURCE QUEUE smart_insight WITH (ACTIVE_STATEMENTS=10, PRIORITY=MEDIUM);
+CREATE ROLE cranium WITH RESOURCE QUEUE smart_insight CREATEEXTTABLE LOGIN PASSWORD 'cranium';
+COMMENT ON ROLE cranium IS 'Cranium test user';
+CREATE DATABASE cranium WITH OWNER=cranium;
+CREATE ROLE database_administrator WITH SUPERUSER LOGIN PASSWORD 'emarsys';

data/docker-compose.yml ADDED Viewed

@@ -0,0 +1,8 @@
+greenplum:
+  image: kevinmtrowbridge/gpdb-docker
+  ports:
+    - 22:22
+    - 5432:5432
+    - 8123:8123
+  volumes:
+    - ./tmp/custdata:/home/gpadmin/gpfdist-data

data/examples/config.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require 'logger'
+Cranium.configure do |config|
+  config.greenplum_connection_string = "postgres://cranium:cranium@192.168.56.43:5432/cranium"
+  config.gpfdist_url = "192.168.56.43:8123"
+  config.gpfdist_home_directory = "tmp/custdata"
+  config.upload_directory = "cranium_build"
+  config.archive_directory = "cranium_archive"
+  config.loggers << Logger.new("log/application.log")
+end
+database :suite do
+  connect_to "postgres://cranium:cranium@192.168.56.43:5432/cranium"
+end

data/examples/deduplication.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require_relative 'config'
+source :sales_items do
+  file "sales_items*.csv"
+  field :order_id, String
+  field :date, Date
+  field :customer, Integer
+  field :item, String
+  field :item_name, String
+  field :quantity, Float
+  field :c_sales_amount, Float
+end
+source :products do
+  field :item_id
+  field :item_name
+end
+deduplicate :sales_items, into: :products, by: [:item]
+# Equivalent to
+transform :sales_items => :products do |record|
+  deduplicate_by :item
+end

data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require_relative 'config'
+source :purchases do
+  field :user_id, String
+  field :amount, String
+end
+source :transformed_purchases do
+  field :contact_key, Integer
+  field :amount, String
+end
+transform :purchases => :transformed_purchases do |record|
+  record[:contact_key] = lookup :contact_key,
+                                from_table: :dim_contact,
+                                match_column: :user_id,
+                                to_value: record[:user_id],
+                                if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
+                                                            name: "Unknown contact #{record[:user_id]}" }
+end
+import :transformed_purchases do
+  into :fct_purchases
+  put :contact_key
+  put :amount
+end

data/examples/incremental_extract.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require_relative 'config'
+extract :contacts do
+  from :suite
+  incrementally_by :created
+  query <<-sql
+    SELECT *
+    FROM contacts
+    WHERE created BETWEEN '#{last_extracted_value_of :created, "1970-01-01 00:00:00"}' AND '#{Time.now - 60*10}'
+  sql
+end
+extract :contacts do
+  from :suite
+  incrementally_by :id
+  query "SELECT * FROM akarmi WHERE id > #{last_extracted_value_of :id, 0}"
+end

data/examples/lookup_with_multiple_fields.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require_relative 'config'
+source :purchases do
+  field :user_id, String
+  field :amount, String
+end
+source :transformed_purchases do
+  field :contact_key, Integer
+  field :amount, String
+end
+transform :purchases => :transformed_purchases do |record|
+  record[:contact_key] = lookup :contact_key,
+                                from_table: :dim_contact,
+                                match: {:user_id => record[:user_id], :another_field => record[:another_value]},
+                                if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
+                                                            name: "Unknown contact #{record[:user_id]}" }
+end
+import :transformed_purchases do
+  into :fct_purchases
+  put :contact_key
+  put :amount
+end

data/features/archive.feature ADDED Viewed

@@ -0,0 +1,49 @@
+Feature: Archive source files
+  Scenario:
+    Given no "/tmp/cranium_archive" directory
+    And a "products_1.csv" data file containing:
+    """
+    """
+    And a "products_2.csv" data file containing:
+    """
+    """
+    And a "contacts.csv" data file containing:
+    """
+    """
+    And a "purchases.csv" data file containing:
+    """
+    """
+    And the following definition:
+    """
+    Cranium.configure do |config|
+      config.archive_directory = "/tmp/cranium_archive"
+    end
+    source :products do
+      file "products_*.csv"
+    end
+    source :products_transformed do end
+    source :contacts do
+      file "contacts.csv"
+    end
+    source :purchases do
+      file "purchases.csv"
+    end
+    transform :products => :products_transformed do |record|
+      output record
+    end
+    archive :products, :contacts
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And the "/tmp/cranium_archive/" directory should contain the following files:
+      | filename         |
+      | .*contacts.csv   |
+      | .*products_1.csv |
+      | .*products_2.csv |

data/features/extract/incremental_extract.feature ADDED Viewed

@@ -0,0 +1,56 @@
+Feature: Extracting data incrementally from a database table to CSV
+  Incremental extracts work by indicating that a field (or fields) should be used to detect new data rows
+  in the table. The highest extracted values are saved from one process and passed on to the next when the
+  process is run again. This approach typically works best with id or timestamp fields.
+  Background:
+    Given the following definition:
+    """
+    database :suite do
+      connect_to Cranium.configuration.greenplum_connection_string
+    end
+    extract :contacts do
+      from :suite
+      incrementally_by :id
+      query "SELECT id, name FROM contacts WHERE id > #{last_extracted_value_of :id, 0} ORDER BY id DESC"
+    end
+    """
+    And a database table called "contacts" with the following fields:
+      | field_name | field_type |
+      | id         | INTEGER    |
+      | name       | TEXT       |
+    And only the following rows in the "contacts" database table:
+      | id | name     |
+      | 1  | John Doe |
+      | 2  | Jane Doe |
+    And the definition is executed
+    And the "contacts.csv" file is deleted
+  Scenario: Successful extract
+    Given the following new rows in the "contacts" database table:
+      | id | name       |
+      | 3  | John Smith |
+      | 4  | Jane Smith |
+    When I execute the definition again
+    Then the process should exit successfully
+    And there should be a "contacts.csv" data file in the upload directory containing:
+    """
+    id,name
+    4,Jane Smith
+    3,John Smith
+    """
+  Scenario: Incremental extract doesn't remember empty 'last extracted value' - bugfix
+    Given the definition is executed again
+    And the "contacts.csv" file is deleted
+    When I execute the definition again
+    Then the process should exit successfully
+    And there should be a "contacts.csv" data file in the upload directory containing:
+    """
+    id,name
+    """

data/features/extract/simple_extract.feature ADDED Viewed

@@ -0,0 +1,85 @@
+Feature: Extracting data from a database table to CSV
+  Data can be extracted from a database table into a CSV file. The CSV file is named after the extract process
+  and is placed in the upload directory specified in the configuration.
+  Background:
+    Given a database table called "contacts" with the following fields:
+      | field_name | field_type |
+      | id         | INTEGER    |
+      | name       | TEXT       |
+  Scenario: Successful extract using raw SQL
+    Given only the following rows in the "contacts" database table:
+      | id | name       |
+      | 1  | John Doe   |
+      | 2  | Jane Doe   |
+      | 3  | John Smith |
+    And the following definition:
+    """
+    database :suite do
+      connect_to Cranium.configuration.greenplum_connection_string
+    end
+    extract :contacts do
+      from :suite
+      query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
+    end
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And there should be a "contacts.csv" data file in the upload directory containing:
+    """
+    id,name
+    1,John Doe
+    2,Jane Doe
+    """
+  Scenario: Successful extract with overrided columns
+    Given only the following rows in the "contacts" database table:
+      | id | name       |
+      | 1  | John Doe   |
+      | 2  | Jane Doe   |
+      | 3  | John Smith |
+    And the following definition:
+    """
+    database :suite do
+      connect_to Cranium.configuration.greenplum_connection_string
+    end
+    extract :contacts do
+      from :suite
+      columns %w(uid full_name)
+      query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
+    end
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And there should be a "contacts.csv" data file in the upload directory containing:
+    """
+    uid,full_name
+    1,John Doe
+    2,Jane Doe
+    """
+  Scenario: Extract should fail if file already exists
+    Given an empty "contacts.csv" data file
+    And the following definition:
+    """
+    database :suite do
+      connect_to Cranium.configuration.greenplum_connection_string
+    end
+    extract :contacts do
+      from :suite
+      query "SELECT id, name FROM contacts WHERE name LIKE '%Doe%' ORDER BY id"
+    end
+    """
+    When I execute the definition
+    Then the process should exit with an error
+    And the error message should contain:
+    """
+    Extract halted: a file named "contacts.csv" already exists
+    """

data/features/import/import_csv_to_database_as_delta.feature ADDED Viewed

@@ -0,0 +1,38 @@
+Feature: Import a CSV file into the database as a delta
+  Scenario: Successful import
+    Given a database table called "dim_product" with the following fields:
+      | field_name  | field_type |
+      | item        | TEXT       |
+      | title       | TEXT       |
+      | category    | TEXT       |
+      | description | TEXT       |
+    And a "products.csv" data file containing:
+    """
+    id,name,category,description
+    JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory,Some description
+    CDI-234,Another product name,Smart Insight > Cool stuff > Scripts,Another description
+    """
+    And the following definition:
+    """
+    source :products do
+      field :id, String
+      field :name, String
+      field :category, String
+      field :description, String
+    end
+    import :products do
+      into :dim_product
+      put :id => :item
+      put :name => :title
+      put :category => :category
+      put :description => :description
+    end
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And the "dim_product" table should contain:
+      | item    | title                | category                                      | description         |
+      | JNI-123 | Just a product name  | Main category > Subcategory > Sub-subcategory | Some description    |
+      | CDI-234 | Another product name | Smart Insight > Cool stuff > Scripts          | Another description |

data/features/import/import_csv_to_database_with_delete_insert_merging.feature ADDED Viewed

@@ -0,0 +1,51 @@
+Feature: Import a CSV file into the database with merging
+  The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
+  Duplicates are updated and new items are added.
+  Scenario: Successful import with merged items
+    Given a database table called "lkp_categories" with the following fields:
+      | field_name  | field_type |
+      | contact_id  | INTEGER    |
+      | category_id | TEXT       |
+    And only the following rows in the "lkp_categories" database table:
+      | contact_id (i) | category_id (s) |
+      | 1              | A               |
+      | 1              | B               |
+      | 1              | C               |
+      | 2              | A               |
+      | 2              | D               |
+    And a "category_lookup.csv" data file containing:
+    """
+    user_id,category_id
+    1,A
+    1,E
+    3,E
+    3,F
+    """
+    And the following definition:
+    """
+    source :category_lookup do
+      field :user_id, Integer
+      field :category_id, String
+    end
+    import :category_lookup do
+      into :lkp_categories
+      put :user_id => :contact_id
+      put :category_id => :category_id
+      delete_insert_on :contact_id
+    end
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And the "lkp_categories" table should contain:
+      | contact_id (i) | category_id (s) |
+      | 1              | A               |
+      | 1              | E               |
+      | 2              | A               |
+      | 2              | D               |
+      | 3              | E               |
+      | 3              | F               |

data/features/import/import_csv_to_database_with_truncate_insert.feature ADDED Viewed

@@ -0,0 +1,49 @@
+Feature: Import a CSV file into the database with truncation
+  The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
+  Duplicates are updated and new items are added.
+  Scenario: Successful import with merged items
+    Given a database table called "lkp_categories" with the following fields:
+      | field_name  | field_type |
+      | contact_id  | INTEGER    |
+      | category_id | TEXT       |
+    And only the following rows in the "lkp_categories" database table:
+      | contact_id (i) | category_id (s) |
+      | 1              | A               |
+      | 1              | B               |
+      | 1              | C               |
+      | 2              | A               |
+      | 2              | D               |
+    And a "category_lookup.csv" data file containing:
+    """
+    user_id,category_id
+    1,A
+    1,E
+    3,E
+    3,F
+    """
+    And the following definition:
+    """
+    source :category_lookup do
+      field :user_id, Integer
+      field :category_id, String
+    end
+    import :category_lookup do
+      into :lkp_categories
+      put :user_id => :contact_id
+      put :category_id => :category_id
+      truncate_insert true
+    end
+    """
+    When I execute the definition
+    Then the process should exit successfully
+    And the "lkp_categories" table should contain:
+      | contact_id (i) | category_id (s) |
+      | 1              | A               |
+      | 1              | E               |
+      | 3              | E               |
+      | 3              | F               |