cranium 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ded6526a7ab3b7875f8561df207318f256eda9ee
4
- data.tar.gz: 13d3b0f04ed8d2a69f2ff21250ab48abf226368d
3
+ metadata.gz: 95d417150867a455805bdc4b21372050ee81f8cc
4
+ data.tar.gz: 6a1ee615b84a1a6eefd83e4590d947e408fb1244
5
5
  SHA512:
6
- metadata.gz: 4e7e5574b8a0f51ebcd8bd2b3fcfdf6c9d2b8ece9fbc361022cc90bf7183fb00f9bc4c2a5eca3ca6c390763d429c0bf5b358a8fc58368efb49ce3f4965c0d795
7
- data.tar.gz: 4f7a839e74dc68656d2b1754ca20ffa936241b1954f38fc841928fec471ea065ca83e00d2c5069bb313d00eaed4080a8c78e4cf350f410bc9bafa964fc2c6778
6
+ metadata.gz: 6ee2e4a162601873d76cc96b1d88b02b4e6348019f04a899d68ba6c1723a9d1f678f51314bdc748e30357f7d04d8026330c1d428761c7c65506d46483bc45e3e
7
+ data.tar.gz: 114072465d737d6d8fd18ea884d7ed3c1f5b26bfc4dbdf3c2da2e6c6fb0994d5922b75502de2c0cbe75bb60042d3a1c90f788258403a580098175e9e7f108c23
data/cranium.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.2.1'
3
+ spec.version = '0.3.0'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -62,7 +62,7 @@ Feature: Import a CSV file into the database with new dimension values always in
62
62
 
63
63
 
64
64
  Scenario: Example use case for the insert
65
- If purchases made by a predefined contact identifier (NA in this case) do not look for it insert .
65
+ If purchases made by a predefined contact identifier (NA in this case) do not look for it insert.
66
66
  Otherwise use lookup to find or create that contact
67
67
 
68
68
  Given a database table called "dim_contact" with the following fields:
@@ -135,3 +135,64 @@ Feature: Import a CSV file into the database with new dimension values always in
135
135
  | 11 | NA | Unknown contact NA |
136
136
  | 12 | NA | Unknown contact NA |
137
137
  | 13 | 2 | Unknown contact 2 |
138
+
139
+
140
+ Scenario: Successful import with a large number of users (contacts) to insert
141
+ Given a database table called "dim_contact" with the following fields:
142
+ | field_name | field_type |
143
+ | contact_key | SERIAL |
144
+ | user_id | TEXT |
145
+ | name | TEXT |
146
+ | load_id | NUMERIC |
147
+ | load_date | DATE |
148
+ | is_generated | BOOLEAN |
149
+ And only the following rows in the "dim_contact" database table:
150
+ | contact_key (i) | user_id | name |
151
+ | 10 | 1 | Alma |
152
+ And the current value in sequence "dim_contact_contact_key_seq" is 10
153
+ And a database table called "fct_purchases" with the following fields:
154
+ | field_name | field_type |
155
+ | contact_key | INTEGER |
156
+ | amount | TEXT |
157
+ And a 500_000 lines long "purchases.csv" data file containing rows like:
158
+ """
159
+ user_id,amount
160
+ NA,100
161
+ """
162
+ And the following definition:
163
+ """
164
+ source :purchases do
165
+ field :user_id, String
166
+ field :amount, String
167
+ end
168
+
169
+ source :transformed_purchases do
170
+ field :contact_key, Integer
171
+ field :amount, String
172
+ end
173
+
174
+ now = Time.now
175
+ transform :purchases => :transformed_purchases do |record|
176
+ record[:contact_key] = insert :contact_key,
177
+ table: :dim_contact,
178
+ record: {
179
+ load_id: 1,
180
+ load_date: now,
181
+ contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
182
+ user_id: record[:user_id],
183
+ name: "Unknown contact #{record[:user_id]}",
184
+ is_generated: true
185
+ }
186
+ output record
187
+ end
188
+
189
+ import :transformed_purchases do
190
+ into :fct_purchases
191
+ put :contact_key
192
+ put :amount
193
+ end
194
+ """
195
+ When I execute the definition
196
+ Then the process should exit successfully
197
+ And the "fct_purchases" table should contain 500_000 purchases
198
+ And the "dim_contact" table should contain 500_001 contacts
@@ -38,3 +38,8 @@ Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
38
38
 
39
39
  expect(database_table(table_name).content(data.fields)).to match_array expected_data
40
40
  end
41
+
42
+
43
+ Then(/^the "([^"]*)" table should contain ([\d_]+) .+$/) do |table_name, count|
44
+ expect(database_table(table_name).count).to eq count.to_i
45
+ end
@@ -13,6 +13,13 @@ Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
13
13
  end
14
14
 
15
15
 
16
+ Given /^an? ([\d_]+) lines long "([^"]*)" data file containing rows like:$/ do |lines_count, file_name, content|
17
+ lines = content.split("\n")
18
+
19
+ upload_directory.save_file file_name, "#{lines.first}\n" + "#{lines.last}\n" * lines_count.to_i
20
+ end
21
+
22
+
16
23
  Given /^the "([^"]*)" file is deleted$/ do |file_name|
17
24
  upload_directory.delete_file file_name
18
25
  end
@@ -36,7 +36,7 @@ class Cranium::DimensionManager
36
36
 
37
37
 
38
38
  def flush
39
- db.multi_insert(@rows) unless @rows.empty?
39
+ db.multi_insert(@rows, slice: INSERT_BATCH_SIZE) unless @rows.empty?
40
40
  @rows = []
41
41
  end
42
42
 
@@ -44,6 +44,10 @@ class Cranium::DimensionManager
44
44
 
45
45
  private
46
46
 
47
+ INSERT_BATCH_SIZE = 100_000.freeze
48
+
49
+
50
+
47
51
  def to_multi_key_cache(table_data)
48
52
  Hash[table_data.map { |row| [row[0..-2], row.last] }]
49
53
  end
@@ -14,6 +14,12 @@ class Cranium::TestFramework::DatabaseTable < Cranium::TestFramework::DatabaseEn
14
14
 
15
15
 
16
16
 
17
+ def count
18
+ connection[entity_name].count
19
+ end
20
+
21
+
22
+
17
23
  def content(fields = ["*".to_sym])
18
24
  connection[entity_name].select(*fields).all
19
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-24 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg