cranium 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ded6526a7ab3b7875f8561df207318f256eda9ee
4
- data.tar.gz: 13d3b0f04ed8d2a69f2ff21250ab48abf226368d
3
+ metadata.gz: 95d417150867a455805bdc4b21372050ee81f8cc
4
+ data.tar.gz: 6a1ee615b84a1a6eefd83e4590d947e408fb1244
5
5
  SHA512:
6
- metadata.gz: 4e7e5574b8a0f51ebcd8bd2b3fcfdf6c9d2b8ece9fbc361022cc90bf7183fb00f9bc4c2a5eca3ca6c390763d429c0bf5b358a8fc58368efb49ce3f4965c0d795
7
- data.tar.gz: 4f7a839e74dc68656d2b1754ca20ffa936241b1954f38fc841928fec471ea065ca83e00d2c5069bb313d00eaed4080a8c78e4cf350f410bc9bafa964fc2c6778
6
+ metadata.gz: 6ee2e4a162601873d76cc96b1d88b02b4e6348019f04a899d68ba6c1723a9d1f678f51314bdc748e30357f7d04d8026330c1d428761c7c65506d46483bc45e3e
7
+ data.tar.gz: 114072465d737d6d8fd18ea884d7ed3c1f5b26bfc4dbdf3c2da2e6c6fb0994d5922b75502de2c0cbe75bb60042d3a1c90f788258403a580098175e9e7f108c23
data/cranium.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = 'cranium'
3
- spec.version = '0.2.1'
3
+ spec.version = '0.3.0'
4
4
  spec.authors = ['Emarsys Technologies']
5
5
  spec.email = ['smart-insight-dev@emarsys.com']
6
6
  spec.description = %q{Provides Extract, Transform and Load functionality for loading data from CSV files to a Greenplum database.}
@@ -62,7 +62,7 @@ Feature: Import a CSV file into the database with new dimension values always in
62
62
 
63
63
 
64
64
  Scenario: Example use case for the insert
65
- If purchases made by a predefined contact identifier (NA in this case) do not look for it insert .
65
+ If purchases made by a predefined contact identifier (NA in this case) do not look for it insert.
66
66
  Otherwise use lookup to find or create that contact
67
67
 
68
68
  Given a database table called "dim_contact" with the following fields:
@@ -135,3 +135,64 @@ Feature: Import a CSV file into the database with new dimension values always in
135
135
  | 11 | NA | Unknown contact NA |
136
136
  | 12 | NA | Unknown contact NA |
137
137
  | 13 | 2 | Unknown contact 2 |
138
+
139
+
140
+ Scenario: Successful import with a large number of users (contacts) to insert
141
+ Given a database table called "dim_contact" with the following fields:
142
+ | field_name | field_type |
143
+ | contact_key | SERIAL |
144
+ | user_id | TEXT |
145
+ | name | TEXT |
146
+ | load_id | NUMERIC |
147
+ | load_date | DATE |
148
+ | is_generated | BOOLEAN |
149
+ And only the following rows in the "dim_contact" database table:
150
+ | contact_key (i) | user_id | name |
151
+ | 10 | 1 | Alma |
152
+ And the current value in sequence "dim_contact_contact_key_seq" is 10
153
+ And a database table called "fct_purchases" with the following fields:
154
+ | field_name | field_type |
155
+ | contact_key | INTEGER |
156
+ | amount | TEXT |
157
+ And a 500_000 lines long "purchases.csv" data file containing rows like:
158
+ """
159
+ user_id,amount
160
+ NA,100
161
+ """
162
+ And the following definition:
163
+ """
164
+ source :purchases do
165
+ field :user_id, String
166
+ field :amount, String
167
+ end
168
+
169
+ source :transformed_purchases do
170
+ field :contact_key, Integer
171
+ field :amount, String
172
+ end
173
+
174
+ now = Time.now
175
+ transform :purchases => :transformed_purchases do |record|
176
+ record[:contact_key] = insert :contact_key,
177
+ table: :dim_contact,
178
+ record: {
179
+ load_id: 1,
180
+ load_date: now,
181
+ contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
182
+ user_id: record[:user_id],
183
+ name: "Unknown contact #{record[:user_id]}",
184
+ is_generated: true
185
+ }
186
+ output record
187
+ end
188
+
189
+ import :transformed_purchases do
190
+ into :fct_purchases
191
+ put :contact_key
192
+ put :amount
193
+ end
194
+ """
195
+ When I execute the definition
196
+ Then the process should exit successfully
197
+ And the "fct_purchases" table should contain 500_000 purchases
198
+ And the "dim_contact" table should contain 500_001 contacts
@@ -38,3 +38,8 @@ Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
38
38
 
39
39
  expect(database_table(table_name).content(data.fields)).to match_array expected_data
40
40
  end
41
+
42
+
43
+ Then(/^the "([^"]*)" table should contain ([\d_]+) .+$/) do |table_name, count|
44
+ expect(database_table(table_name).count).to eq count.to_i
45
+ end
@@ -13,6 +13,13 @@ Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
13
13
  end
14
14
 
15
15
 
16
+ Given /^an? ([\d_]+) lines long "([^"]*)" data file containing rows like:$/ do |lines_count, file_name, content|
17
+ lines = content.split("\n")
18
+
19
+ upload_directory.save_file file_name, "#{lines.first}\n" + "#{lines.last}\n" * lines_count.to_i
20
+ end
21
+
22
+
16
23
  Given /^the "([^"]*)" file is deleted$/ do |file_name|
17
24
  upload_directory.delete_file file_name
18
25
  end
@@ -36,7 +36,7 @@ class Cranium::DimensionManager
36
36
 
37
37
 
38
38
  def flush
39
- db.multi_insert(@rows) unless @rows.empty?
39
+ db.multi_insert(@rows, slice: INSERT_BATCH_SIZE) unless @rows.empty?
40
40
  @rows = []
41
41
  end
42
42
 
@@ -44,6 +44,10 @@ class Cranium::DimensionManager
44
44
 
45
45
  private
46
46
 
47
+ INSERT_BATCH_SIZE = 100_000.freeze
48
+
49
+
50
+
47
51
  def to_multi_key_cache(table_data)
48
52
  Hash[table_data.map { |row| [row[0..-2], row.last] }]
49
53
  end
@@ -14,6 +14,12 @@ class Cranium::TestFramework::DatabaseTable < Cranium::TestFramework::DatabaseEn
14
14
 
15
15
 
16
16
 
17
+ def count
18
+ connection[entity_name].count
19
+ end
20
+
21
+
22
+
17
23
  def content(fields = ["*".to_sym])
18
24
  connection[entity_name].select(*fields).all
19
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cranium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emarsys Technologies
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-24 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pg