cranium 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/cranium.gemspec +1 -1
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +62 -1
- data/features/step_definitions/database_table_steps.rb +5 -0
- data/features/step_definitions/file_steps.rb +7 -0
- data/lib/cranium/dimension_manager.rb +5 -1
- data/lib/cranium/test_framework/database_table.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95d417150867a455805bdc4b21372050ee81f8cc
|
4
|
+
data.tar.gz: 6a1ee615b84a1a6eefd83e4590d947e408fb1244
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ee2e4a162601873d76cc96b1d88b02b4e6348019f04a899d68ba6c1723a9d1f678f51314bdc748e30357f7d04d8026330c1d428761c7c65506d46483bc45e3e
|
7
|
+
data.tar.gz: 114072465d737d6d8fd18ea884d7ed3c1f5b26bfc4dbdf3c2da2e6c6fb0994d5922b75502de2c0cbe75bb60042d3a1c90f788258403a580098175e9e7f108c23
|
data/cranium.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = 'cranium'
|
3
|
-
spec.version = '0.
|
3
|
+
spec.version = '0.3.0'
|
4
4
|
spec.authors = ['Emarsys Technologies']
|
5
5
|
spec.email = ['smart-insight-dev@emarsys.com']
|
6
6
|
spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
|
@@ -62,7 +62,7 @@ Feature: Import a CSV file into the database with new dimension values always in
|
|
62
62
|
|
63
63
|
|
64
64
|
Scenario: Example use case for the insert
|
65
|
-
If purchases made by a predefined contact identifier (NA in this case) do not look for it insert
|
65
|
+
If purchases made by a predefined contact identifier (NA in this case) do not look for it insert.
|
66
66
|
Otherwise use lookup to find or create that contact
|
67
67
|
|
68
68
|
Given a database table called "dim_contact" with the following fields:
|
@@ -135,3 +135,64 @@ Feature: Import a CSV file into the database with new dimension values always in
|
|
135
135
|
| 11 | NA | Unknown contact NA |
|
136
136
|
| 12 | NA | Unknown contact NA |
|
137
137
|
| 13 | 2 | Unknown contact 2 |
|
138
|
+
|
139
|
+
|
140
|
+
Scenario: Successful import with a large number of users (contacts) to insert
|
141
|
+
Given a database table called "dim_contact" with the following fields:
|
142
|
+
| field_name | field_type |
|
143
|
+
| contact_key | SERIAL |
|
144
|
+
| user_id | TEXT |
|
145
|
+
| name | TEXT |
|
146
|
+
| load_id | NUMERIC |
|
147
|
+
| load_date | DATE |
|
148
|
+
| is_generated | BOOLEAN |
|
149
|
+
And only the following rows in the "dim_contact" database table:
|
150
|
+
| contact_key (i) | user_id | name |
|
151
|
+
| 10 | 1 | Alma |
|
152
|
+
And the current value in sequence "dim_contact_contact_key_seq" is 10
|
153
|
+
And a database table called "fct_purchases" with the following fields:
|
154
|
+
| field_name | field_type |
|
155
|
+
| contact_key | INTEGER |
|
156
|
+
| amount | TEXT |
|
157
|
+
And a 500_000 lines long "purchases.csv" data file containing rows like:
|
158
|
+
"""
|
159
|
+
user_id,amount
|
160
|
+
NA,100
|
161
|
+
"""
|
162
|
+
And the following definition:
|
163
|
+
"""
|
164
|
+
source :purchases do
|
165
|
+
field :user_id, String
|
166
|
+
field :amount, String
|
167
|
+
end
|
168
|
+
|
169
|
+
source :transformed_purchases do
|
170
|
+
field :contact_key, Integer
|
171
|
+
field :amount, String
|
172
|
+
end
|
173
|
+
|
174
|
+
now = Time.now
|
175
|
+
transform :purchases => :transformed_purchases do |record|
|
176
|
+
record[:contact_key] = insert :contact_key,
|
177
|
+
table: :dim_contact,
|
178
|
+
record: {
|
179
|
+
load_id: 1,
|
180
|
+
load_date: now,
|
181
|
+
contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
|
182
|
+
user_id: record[:user_id],
|
183
|
+
name: "Unknown contact #{record[:user_id]}",
|
184
|
+
is_generated: true
|
185
|
+
}
|
186
|
+
output record
|
187
|
+
end
|
188
|
+
|
189
|
+
import :transformed_purchases do
|
190
|
+
into :fct_purchases
|
191
|
+
put :contact_key
|
192
|
+
put :amount
|
193
|
+
end
|
194
|
+
"""
|
195
|
+
When I execute the definition
|
196
|
+
Then the process should exit successfully
|
197
|
+
And the "fct_purchases" table should contain 500_000 purchases
|
198
|
+
And the "dim_contact" table should contain 500_001 contacts
|
@@ -38,3 +38,8 @@ Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
|
|
38
38
|
|
39
39
|
expect(database_table(table_name).content(data.fields)).to match_array expected_data
|
40
40
|
end
|
41
|
+
|
42
|
+
|
43
|
+
Then(/^the "([^"]*)" table should contain ([\d_]+) .+$/) do |table_name, count|
|
44
|
+
expect(database_table(table_name).count).to eq count.to_i
|
45
|
+
end
|
@@ -13,6 +13,13 @@ Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
|
|
13
13
|
end
|
14
14
|
|
15
15
|
|
16
|
+
Given /^an? ([\d_]+) lines long "([^"]*)" data file containing rows like:$/ do |lines_count, file_name, content|
|
17
|
+
lines = content.split("\n")
|
18
|
+
|
19
|
+
upload_directory.save_file file_name, "#{lines.first}\n" + "#{lines.last}\n" * lines_count.to_i
|
20
|
+
end
|
21
|
+
|
22
|
+
|
16
23
|
Given /^the "([^"]*)" file is deleted$/ do |file_name|
|
17
24
|
upload_directory.delete_file file_name
|
18
25
|
end
|
@@ -36,7 +36,7 @@ class Cranium::DimensionManager
|
|
36
36
|
|
37
37
|
|
38
38
|
def flush
|
39
|
-
db.multi_insert(@rows) unless @rows.empty?
|
39
|
+
db.multi_insert(@rows, slice: INSERT_BATCH_SIZE) unless @rows.empty?
|
40
40
|
@rows = []
|
41
41
|
end
|
42
42
|
|
@@ -44,6 +44,10 @@ class Cranium::DimensionManager
|
|
44
44
|
|
45
45
|
private
|
46
46
|
|
47
|
+
INSERT_BATCH_SIZE = 100_000.freeze
|
48
|
+
|
49
|
+
|
50
|
+
|
47
51
|
def to_multi_key_cache(table_data)
|
48
52
|
Hash[table_data.map { |row| [row[0..-2], row.last] }]
|
49
53
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cranium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emarsys Technologies
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pg
|