masamune 0.11.9 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/masamune/actions/transform.rb +31 -16
- data/lib/masamune/schema.rb +0 -1
- data/lib/masamune/schema/catalog.rb +2 -10
- data/lib/masamune/schema/column.rb +16 -30
- data/lib/masamune/schema/dimension.rb +2 -9
- data/lib/masamune/schema/fact.rb +0 -4
- data/lib/masamune/schema/map.rb +1 -1
- data/lib/masamune/schema/row.rb +3 -3
- data/lib/masamune/schema/store.rb +1 -3
- data/lib/masamune/schema/table.rb +28 -2
- data/lib/masamune/transform.rb +0 -1
- data/lib/masamune/transform/define_schema.rb +0 -6
- data/lib/masamune/transform/define_table.hql.erb +7 -6
- data/lib/masamune/transform/define_table.rb +1 -0
- data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
- data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
- data/lib/masamune/transform/denormalize_table.rb +13 -4
- data/lib/masamune/transform/snapshot_dimension.rb +1 -1
- data/lib/masamune/transform/stage_fact.rb +1 -1
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/actions/transform_spec.rb +50 -18
- data/spec/masamune/schema/catalog_spec.rb +0 -53
- data/spec/masamune/schema/column_spec.rb +9 -41
- data/spec/masamune/schema/fact_spec.rb +3 -1
- data/spec/masamune/schema/map_spec.rb +187 -189
- data/spec/masamune/schema/table_spec.rb +8 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
- data/spec/masamune/transform/define_schema_spec.rb +5 -6
- data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
- data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
- data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
- data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
- metadata +3 -9
- data/lib/masamune/schema/event.rb +0 -121
- data/lib/masamune/transform/define_event_view.rb +0 -60
- data/spec/masamune/schema/event_spec.rb +0 -75
- data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -36,6 +36,14 @@ describe Masamune::Schema::Table do
|
|
36
36
|
it { expect(table.name).to eq('account_table') }
|
37
37
|
end
|
38
38
|
|
39
|
+
context 'with format' do
|
40
|
+
let(:table) do
|
41
|
+
described_class.new id: 'user', properties: { format: :tsv }
|
42
|
+
end
|
43
|
+
|
44
|
+
it { expect(table.properties[:format]).to eq(:tsv) }
|
45
|
+
end
|
46
|
+
|
39
47
|
context 'with columns' do
|
40
48
|
let(:table) do
|
41
49
|
described_class.new id: 'user',
|
@@ -63,7 +63,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
63
63
|
column 'department.department_id', type: :integer
|
64
64
|
column 'user_account_state.name', type: :string
|
65
65
|
column 'hr_user_account_state.name', type: :string
|
66
|
-
column '
|
66
|
+
column 'preferences', type: :json
|
67
67
|
column 'start_at', type: :timestamp
|
68
68
|
column 'source_kind', type: :string
|
69
69
|
column 'delta', type: :integer
|
@@ -147,8 +147,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
147
147
|
user_account_state_type_id = COALESCE(user_dimension_ledger_stage.user_account_state_type_id, user_dimension_ledger.user_account_state_type_id),
|
148
148
|
hr_user_account_state_type_id = COALESCE(user_dimension_ledger_stage.hr_user_account_state_type_id, user_dimension_ledger.hr_user_account_state_type_id),
|
149
149
|
name = COALESCE(user_dimension_ledger_stage.name, user_dimension_ledger.name),
|
150
|
-
|
151
|
-
preferences_was = COALESCE(user_dimension_ledger_stage.preferences_was, user_dimension_ledger.preferences_was)
|
150
|
+
preferences = COALESCE(user_dimension_ledger_stage.preferences, user_dimension_ledger.preferences)
|
152
151
|
FROM
|
153
152
|
user_dimension_ledger_stage
|
154
153
|
WHERE
|
@@ -160,7 +159,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
160
159
|
;
|
161
160
|
|
162
161
|
INSERT INTO
|
163
|
-
user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name,
|
162
|
+
user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences, source_kind, source_uuid, start_at, last_modified_at, delta)
|
164
163
|
SELECT
|
165
164
|
user_dimension_ledger_stage.department_type_id,
|
166
165
|
user_dimension_ledger_stage.user_account_state_type_id,
|
@@ -168,8 +167,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
168
167
|
user_dimension_ledger_stage.tenant_id,
|
169
168
|
user_dimension_ledger_stage.user_id,
|
170
169
|
user_dimension_ledger_stage.name,
|
171
|
-
user_dimension_ledger_stage.
|
172
|
-
user_dimension_ledger_stage.preferences_was,
|
170
|
+
user_dimension_ledger_stage.preferences,
|
173
171
|
user_dimension_ledger_stage.source_kind,
|
174
172
|
user_dimension_ledger_stage.source_uuid,
|
175
173
|
user_dimension_ledger_stage.start_at,
|
@@ -63,11 +63,10 @@ describe Masamune::Transform::DefineSchema do
|
|
63
63
|
context 'for hive schema' do
|
64
64
|
before do
|
65
65
|
catalog.schema :hive do
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
attribute 'preferences', type: :json
|
66
|
+
dimension 'user', type: :ledger do
|
67
|
+
column 'tenant_id', index: true, natural_key: true
|
68
|
+
column 'user_id', index: true, natural_key: true
|
69
|
+
column 'preferences', type: :key_value, null: true
|
71
70
|
end
|
72
71
|
end
|
73
72
|
end
|
@@ -77,7 +76,7 @@ describe Masamune::Transform::DefineSchema do
|
|
77
76
|
it 'should render combined template' do
|
78
77
|
is_expected.to eq Masamune::Template.combine \
|
79
78
|
Masamune::Transform::Operator.new('define_schema', source: catalog.hive),
|
80
|
-
transform.
|
79
|
+
transform.define_table(catalog.hive.dimensions['user'])
|
81
80
|
end
|
82
81
|
end
|
83
82
|
end
|
@@ -41,6 +41,106 @@ describe Masamune::Transform::DefineTable do
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
context 'for hive ledger dimension' do
|
45
|
+
before do
|
46
|
+
catalog.schema :hive do
|
47
|
+
dimension 'tenant', type: :ledger do
|
48
|
+
column 'tenant_id', type: :integer, natural_key: true
|
49
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
50
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
51
|
+
column 'preferences', type: :key_value, null: true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
let(:table) { catalog.hive.tenant_dimension }
|
57
|
+
|
58
|
+
it 'should render table template' do
|
59
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
60
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
61
|
+
(
|
62
|
+
id STRING,
|
63
|
+
tenant_id INT,
|
64
|
+
tenant_account_state STRING,
|
65
|
+
tenant_premium_state STRING,
|
66
|
+
preferences STRING,
|
67
|
+
source_kind STRING,
|
68
|
+
source_uuid STRING,
|
69
|
+
start_at STRING,
|
70
|
+
last_modified_at STRING,
|
71
|
+
delta INT
|
72
|
+
)
|
73
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
74
|
+
EOS
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context 'for hive ledger dimension with partitions' do
|
79
|
+
before do
|
80
|
+
catalog.schema :hive do
|
81
|
+
dimension 'tenant', type: :ledger do
|
82
|
+
partition :y
|
83
|
+
partition :m
|
84
|
+
column 'tenant_id', type: :integer, natural_key: true
|
85
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
86
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
87
|
+
column 'preferences', type: :key_value, null: true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
let(:table) { catalog.hive.tenant_dimension }
|
93
|
+
|
94
|
+
it 'should render table template' do
|
95
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
96
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
97
|
+
(
|
98
|
+
id STRING,
|
99
|
+
tenant_id INT,
|
100
|
+
tenant_account_state STRING,
|
101
|
+
tenant_premium_state STRING,
|
102
|
+
preferences STRING,
|
103
|
+
source_kind STRING,
|
104
|
+
source_uuid STRING,
|
105
|
+
start_at STRING,
|
106
|
+
last_modified_at STRING,
|
107
|
+
delta INT
|
108
|
+
)
|
109
|
+
PARTITIONED BY (y INT, m INT)
|
110
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
111
|
+
EOS
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'for hive ledger dimension with :tsv format' do
|
116
|
+
before do
|
117
|
+
catalog.schema :hive do
|
118
|
+
dimension 'tenant', type: :ledger, properties: { format: :tsv } do
|
119
|
+
column 'tenant_id', type: :integer, natural_key: true
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
let(:table) { catalog.hive.tenant_dimension }
|
125
|
+
|
126
|
+
it 'should render table template' do
|
127
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
128
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
129
|
+
(
|
130
|
+
id STRING,
|
131
|
+
tenant_id INT,
|
132
|
+
source_kind STRING,
|
133
|
+
source_uuid STRING,
|
134
|
+
start_at STRING,
|
135
|
+
last_modified_at STRING,
|
136
|
+
delta INT
|
137
|
+
)
|
138
|
+
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'
|
139
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
140
|
+
EOS
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
44
144
|
context 'for postgres dimension type: one' do
|
45
145
|
before do
|
46
146
|
catalog.schema :postgres do
|
@@ -160,8 +260,7 @@ describe Masamune::Transform::DefineTable do
|
|
160
260
|
user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
|
161
261
|
tenant_id INTEGER NOT NULL,
|
162
262
|
user_id INTEGER NOT NULL,
|
163
|
-
|
164
|
-
preferences_was HSTORE,
|
263
|
+
preferences HSTORE,
|
165
264
|
source_kind VARCHAR NOT NULL,
|
166
265
|
source_uuid VARCHAR NOT NULL,
|
167
266
|
start_at TIMESTAMP NOT NULL,
|
@@ -63,22 +63,80 @@ describe Masamune::Transform::DenormalizeTable do
|
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
|
-
let(:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
66
|
+
let(:options) { {} }
|
67
|
+
|
68
|
+
subject(:result) { transform.denormalize_table(target, options).to_s }
|
69
|
+
|
70
|
+
context 'with postgres fact with :columns' do
|
71
|
+
let(:target) { catalog.postgres.visits_fact }
|
72
|
+
let(:options) do
|
73
|
+
{
|
74
|
+
columns: [
|
75
|
+
'date.date_id',
|
76
|
+
'tenant.tenant_id',
|
77
|
+
'user.tenant_id',
|
78
|
+
'user.user_id',
|
79
|
+
'user_agent.name',
|
80
|
+
'user_agent.version',
|
81
|
+
'total',
|
82
|
+
'time_key'
|
83
|
+
]
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should eq render denormalize_table template' do
|
88
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
89
|
+
SELECT
|
90
|
+
date_dimension.date_id AS date_dimension_date_id,
|
91
|
+
tenant_dimension.tenant_id AS tenant_dimension_tenant_id,
|
92
|
+
user_dimension.tenant_id AS user_dimension_tenant_id,
|
93
|
+
user_dimension.user_id AS user_dimension_user_id,
|
94
|
+
user_agent_type.name AS user_agent_type_name,
|
95
|
+
user_agent_type.version AS user_agent_type_version,
|
96
|
+
visits_fact.total,
|
97
|
+
visits_fact.time_key
|
98
|
+
FROM
|
99
|
+
visits_fact
|
100
|
+
JOIN
|
101
|
+
date_dimension
|
102
|
+
ON
|
103
|
+
date_dimension.id = visits_fact.date_dimension_id
|
104
|
+
JOIN
|
105
|
+
tenant_dimension
|
106
|
+
ON
|
107
|
+
tenant_dimension.id = visits_fact.tenant_dimension_id
|
108
|
+
JOIN
|
109
|
+
user_dimension
|
110
|
+
ON
|
111
|
+
user_dimension.id = visits_fact.user_dimension_id
|
112
|
+
JOIN
|
113
|
+
user_agent_type
|
114
|
+
ON
|
115
|
+
user_agent_type.id = visits_fact.user_agent_type_id
|
116
|
+
ORDER BY
|
117
|
+
date_dimension_date_id,
|
118
|
+
tenant_dimension_tenant_id,
|
119
|
+
user_dimension_tenant_id,
|
120
|
+
user_dimension_user_id,
|
121
|
+
user_agent_type_name,
|
122
|
+
user_agent_type_version,
|
123
|
+
total,
|
124
|
+
time_key
|
125
|
+
;
|
126
|
+
EOS
|
127
|
+
end
|
78
128
|
end
|
79
129
|
|
80
|
-
context 'with postgres fact' do
|
81
|
-
|
130
|
+
context 'with postgres fact with :except' do
|
131
|
+
let(:target) { catalog.postgres.visits_fact }
|
132
|
+
let(:options) do
|
133
|
+
{
|
134
|
+
except: [
|
135
|
+
'cluster.name',
|
136
|
+
'last_modified_at'
|
137
|
+
]
|
138
|
+
}
|
139
|
+
end
|
82
140
|
|
83
141
|
it 'should eq render denormalize_table template' do
|
84
142
|
is_expected.to eq <<-EOS.strip_heredoc
|
@@ -89,6 +147,7 @@ describe Masamune::Transform::DenormalizeTable do
|
|
89
147
|
user_dimension.user_id AS user_dimension_user_id,
|
90
148
|
user_agent_type.name AS user_agent_type_name,
|
91
149
|
user_agent_type.version AS user_agent_type_version,
|
150
|
+
user_agent_type.mobile AS user_agent_type_mobile,
|
92
151
|
visits_fact.total,
|
93
152
|
visits_fact.time_key
|
94
153
|
FROM
|
@@ -116,10 +175,107 @@ describe Masamune::Transform::DenormalizeTable do
|
|
116
175
|
user_dimension_user_id,
|
117
176
|
user_agent_type_name,
|
118
177
|
user_agent_type_version,
|
178
|
+
user_agent_type_mobile,
|
119
179
|
total,
|
120
180
|
time_key
|
121
181
|
;
|
122
182
|
EOS
|
123
183
|
end
|
124
184
|
end
|
185
|
+
|
186
|
+
context 'with hive table' do
|
187
|
+
before do
|
188
|
+
catalog.schema :hive do
|
189
|
+
dimension 'tenant', type: :ledger do
|
190
|
+
partition :y
|
191
|
+
partition :m
|
192
|
+
column 'tenant_id', type: :integer, natural_key: true
|
193
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
194
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
195
|
+
column 'preferences', type: :key_value, null: true
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
let(:target) { catalog.hive.tenant_dimension }
|
201
|
+
|
202
|
+
let(:options) do
|
203
|
+
{
|
204
|
+
columns: [
|
205
|
+
'tenant_id',
|
206
|
+
'tenant_account_state',
|
207
|
+
'tenant_premium_state',
|
208
|
+
'preferences',
|
209
|
+
'y',
|
210
|
+
'm'
|
211
|
+
],
|
212
|
+
order: [
|
213
|
+
'tenant_id',
|
214
|
+
'start_at'
|
215
|
+
]
|
216
|
+
}
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'should eq render denormalize_table template' do
|
220
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
221
|
+
SELECT
|
222
|
+
tenant_ledger.tenant_id,
|
223
|
+
tenant_ledger.tenant_account_state,
|
224
|
+
tenant_ledger.tenant_premium_state,
|
225
|
+
tenant_ledger.preferences,
|
226
|
+
tenant_ledger.y,
|
227
|
+
tenant_ledger.m
|
228
|
+
FROM
|
229
|
+
tenant_ledger
|
230
|
+
ORDER BY
|
231
|
+
tenant_id,
|
232
|
+
start_at
|
233
|
+
;
|
234
|
+
EOS
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
context 'with hive table with implicit references' do
|
239
|
+
before do
|
240
|
+
catalog.schema :hive do
|
241
|
+
dimension 'date', type: :date, implicit: true do
|
242
|
+
column 'date_id', type: :integer, natural_key: true
|
243
|
+
end
|
244
|
+
|
245
|
+
fact 'visits' do
|
246
|
+
references :date
|
247
|
+
references :user, degenerate: true
|
248
|
+
measure 'total'
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
let(:target) { catalog.hive.visits_fact }
|
254
|
+
|
255
|
+
let(:options) do
|
256
|
+
{
|
257
|
+
columns: [
|
258
|
+
'date.date_id',
|
259
|
+
'user.id',
|
260
|
+
'total'
|
261
|
+
]
|
262
|
+
}
|
263
|
+
end
|
264
|
+
|
265
|
+
it 'should eq render denormalize_table template' do
|
266
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
267
|
+
SELECT
|
268
|
+
date_dimension_date_id AS date_dimension_date_id,
|
269
|
+
user_type_id AS user_type_id,
|
270
|
+
visits_fact.total
|
271
|
+
FROM
|
272
|
+
visits_fact
|
273
|
+
ORDER BY
|
274
|
+
date_dimension_date_id,
|
275
|
+
user_type_id,
|
276
|
+
total
|
277
|
+
;
|
278
|
+
EOS
|
279
|
+
end
|
280
|
+
end
|
125
281
|
end
|
@@ -72,7 +72,7 @@ describe Masamune::Transform::SnapshotDimension do
|
|
72
72
|
coalesce_merge(user_account_state_type_id) OVER w AS user_account_state_type_id,
|
73
73
|
tenant_id AS tenant_id,
|
74
74
|
user_id AS user_id,
|
75
|
-
hstore_merge(
|
75
|
+
hstore_merge(preferences) OVER w AS preferences,
|
76
76
|
start_at AS start_at
|
77
77
|
FROM
|
78
78
|
windows
|
@@ -63,7 +63,7 @@ describe Masamune::Transform::StageDimension do
|
|
63
63
|
column 'department.department_id', type: :integer
|
64
64
|
column 'user_account_state.name', type: :string
|
65
65
|
column 'hr_user_account_state.name', type: :string
|
66
|
-
column '
|
66
|
+
column 'preferences', type: :json
|
67
67
|
column 'start_at', type: :timestamp
|
68
68
|
column 'source_kind', type: :string
|
69
69
|
column 'delta', type: :integer
|
@@ -82,14 +82,14 @@ describe Masamune::Transform::StageDimension do
|
|
82
82
|
CREATE TEMPORARY TABLE IF NOT EXISTS user_dimension_ledger_stage (LIKE user_dimension_ledger INCLUDING ALL);
|
83
83
|
|
84
84
|
INSERT INTO
|
85
|
-
user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id,
|
85
|
+
user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences, source_kind, start_at, delta)
|
86
86
|
SELECT
|
87
87
|
department_type.id,
|
88
88
|
user_account_state_type.id,
|
89
89
|
hr_user_account_state_type.id,
|
90
90
|
user_file_dimension_ledger_stage.tenant_id,
|
91
91
|
user_file_dimension_ledger_stage.user_id,
|
92
|
-
json_to_hstore(user_file_dimension_ledger_stage.
|
92
|
+
json_to_hstore(user_file_dimension_ledger_stage.preferences),
|
93
93
|
user_file_dimension_ledger_stage.source_kind,
|
94
94
|
user_file_dimension_ledger_stage.start_at,
|
95
95
|
user_file_dimension_ledger_stage.delta
|