masamune 0.11.9 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/masamune/actions/transform.rb +31 -16
- data/lib/masamune/schema.rb +0 -1
- data/lib/masamune/schema/catalog.rb +2 -10
- data/lib/masamune/schema/column.rb +16 -30
- data/lib/masamune/schema/dimension.rb +2 -9
- data/lib/masamune/schema/fact.rb +0 -4
- data/lib/masamune/schema/map.rb +1 -1
- data/lib/masamune/schema/row.rb +3 -3
- data/lib/masamune/schema/store.rb +1 -3
- data/lib/masamune/schema/table.rb +28 -2
- data/lib/masamune/transform.rb +0 -1
- data/lib/masamune/transform/define_schema.rb +0 -6
- data/lib/masamune/transform/define_table.hql.erb +7 -6
- data/lib/masamune/transform/define_table.rb +1 -0
- data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
- data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
- data/lib/masamune/transform/denormalize_table.rb +13 -4
- data/lib/masamune/transform/snapshot_dimension.rb +1 -1
- data/lib/masamune/transform/stage_fact.rb +1 -1
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/actions/transform_spec.rb +50 -18
- data/spec/masamune/schema/catalog_spec.rb +0 -53
- data/spec/masamune/schema/column_spec.rb +9 -41
- data/spec/masamune/schema/fact_spec.rb +3 -1
- data/spec/masamune/schema/map_spec.rb +187 -189
- data/spec/masamune/schema/table_spec.rb +8 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
- data/spec/masamune/transform/define_schema_spec.rb +5 -6
- data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
- data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
- data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
- data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
- metadata +3 -9
- data/lib/masamune/schema/event.rb +0 -121
- data/lib/masamune/transform/define_event_view.rb +0 -60
- data/spec/masamune/schema/event_spec.rb +0 -75
- data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -36,6 +36,14 @@ describe Masamune::Schema::Table do
|
|
36
36
|
it { expect(table.name).to eq('account_table') }
|
37
37
|
end
|
38
38
|
|
39
|
+
context 'with format' do
|
40
|
+
let(:table) do
|
41
|
+
described_class.new id: 'user', properties: { format: :tsv }
|
42
|
+
end
|
43
|
+
|
44
|
+
it { expect(table.properties[:format]).to eq(:tsv) }
|
45
|
+
end
|
46
|
+
|
39
47
|
context 'with columns' do
|
40
48
|
let(:table) do
|
41
49
|
described_class.new id: 'user',
|
@@ -63,7 +63,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
63
63
|
column 'department.department_id', type: :integer
|
64
64
|
column 'user_account_state.name', type: :string
|
65
65
|
column 'hr_user_account_state.name', type: :string
|
66
|
-
column '
|
66
|
+
column 'preferences', type: :json
|
67
67
|
column 'start_at', type: :timestamp
|
68
68
|
column 'source_kind', type: :string
|
69
69
|
column 'delta', type: :integer
|
@@ -147,8 +147,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
147
147
|
user_account_state_type_id = COALESCE(user_dimension_ledger_stage.user_account_state_type_id, user_dimension_ledger.user_account_state_type_id),
|
148
148
|
hr_user_account_state_type_id = COALESCE(user_dimension_ledger_stage.hr_user_account_state_type_id, user_dimension_ledger.hr_user_account_state_type_id),
|
149
149
|
name = COALESCE(user_dimension_ledger_stage.name, user_dimension_ledger.name),
|
150
|
-
|
151
|
-
preferences_was = COALESCE(user_dimension_ledger_stage.preferences_was, user_dimension_ledger.preferences_was)
|
150
|
+
preferences = COALESCE(user_dimension_ledger_stage.preferences, user_dimension_ledger.preferences)
|
152
151
|
FROM
|
153
152
|
user_dimension_ledger_stage
|
154
153
|
WHERE
|
@@ -160,7 +159,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
160
159
|
;
|
161
160
|
|
162
161
|
INSERT INTO
|
163
|
-
user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name,
|
162
|
+
user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences, source_kind, source_uuid, start_at, last_modified_at, delta)
|
164
163
|
SELECT
|
165
164
|
user_dimension_ledger_stage.department_type_id,
|
166
165
|
user_dimension_ledger_stage.user_account_state_type_id,
|
@@ -168,8 +167,7 @@ describe Masamune::Transform::BulkUpsert do
|
|
168
167
|
user_dimension_ledger_stage.tenant_id,
|
169
168
|
user_dimension_ledger_stage.user_id,
|
170
169
|
user_dimension_ledger_stage.name,
|
171
|
-
user_dimension_ledger_stage.
|
172
|
-
user_dimension_ledger_stage.preferences_was,
|
170
|
+
user_dimension_ledger_stage.preferences,
|
173
171
|
user_dimension_ledger_stage.source_kind,
|
174
172
|
user_dimension_ledger_stage.source_uuid,
|
175
173
|
user_dimension_ledger_stage.start_at,
|
@@ -63,11 +63,10 @@ describe Masamune::Transform::DefineSchema do
|
|
63
63
|
context 'for hive schema' do
|
64
64
|
before do
|
65
65
|
catalog.schema :hive do
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
attribute 'preferences', type: :json
|
66
|
+
dimension 'user', type: :ledger do
|
67
|
+
column 'tenant_id', index: true, natural_key: true
|
68
|
+
column 'user_id', index: true, natural_key: true
|
69
|
+
column 'preferences', type: :key_value, null: true
|
71
70
|
end
|
72
71
|
end
|
73
72
|
end
|
@@ -77,7 +76,7 @@ describe Masamune::Transform::DefineSchema do
|
|
77
76
|
it 'should render combined template' do
|
78
77
|
is_expected.to eq Masamune::Template.combine \
|
79
78
|
Masamune::Transform::Operator.new('define_schema', source: catalog.hive),
|
80
|
-
transform.
|
79
|
+
transform.define_table(catalog.hive.dimensions['user'])
|
81
80
|
end
|
82
81
|
end
|
83
82
|
end
|
@@ -41,6 +41,106 @@ describe Masamune::Transform::DefineTable do
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
context 'for hive ledger dimension' do
|
45
|
+
before do
|
46
|
+
catalog.schema :hive do
|
47
|
+
dimension 'tenant', type: :ledger do
|
48
|
+
column 'tenant_id', type: :integer, natural_key: true
|
49
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
50
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
51
|
+
column 'preferences', type: :key_value, null: true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
let(:table) { catalog.hive.tenant_dimension }
|
57
|
+
|
58
|
+
it 'should render table template' do
|
59
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
60
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
61
|
+
(
|
62
|
+
id STRING,
|
63
|
+
tenant_id INT,
|
64
|
+
tenant_account_state STRING,
|
65
|
+
tenant_premium_state STRING,
|
66
|
+
preferences STRING,
|
67
|
+
source_kind STRING,
|
68
|
+
source_uuid STRING,
|
69
|
+
start_at STRING,
|
70
|
+
last_modified_at STRING,
|
71
|
+
delta INT
|
72
|
+
)
|
73
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
74
|
+
EOS
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context 'for hive ledger dimension with partitions' do
|
79
|
+
before do
|
80
|
+
catalog.schema :hive do
|
81
|
+
dimension 'tenant', type: :ledger do
|
82
|
+
partition :y
|
83
|
+
partition :m
|
84
|
+
column 'tenant_id', type: :integer, natural_key: true
|
85
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
86
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
87
|
+
column 'preferences', type: :key_value, null: true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
let(:table) { catalog.hive.tenant_dimension }
|
93
|
+
|
94
|
+
it 'should render table template' do
|
95
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
96
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
97
|
+
(
|
98
|
+
id STRING,
|
99
|
+
tenant_id INT,
|
100
|
+
tenant_account_state STRING,
|
101
|
+
tenant_premium_state STRING,
|
102
|
+
preferences STRING,
|
103
|
+
source_kind STRING,
|
104
|
+
source_uuid STRING,
|
105
|
+
start_at STRING,
|
106
|
+
last_modified_at STRING,
|
107
|
+
delta INT
|
108
|
+
)
|
109
|
+
PARTITIONED BY (y INT, m INT)
|
110
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
111
|
+
EOS
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'for hive ledger dimension with :tsv format' do
|
116
|
+
before do
|
117
|
+
catalog.schema :hive do
|
118
|
+
dimension 'tenant', type: :ledger, properties: { format: :tsv } do
|
119
|
+
column 'tenant_id', type: :integer, natural_key: true
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
let(:table) { catalog.hive.tenant_dimension }
|
125
|
+
|
126
|
+
it 'should render table template' do
|
127
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
128
|
+
CREATE TABLE IF NOT EXISTS tenant_ledger
|
129
|
+
(
|
130
|
+
id STRING,
|
131
|
+
tenant_id INT,
|
132
|
+
source_kind STRING,
|
133
|
+
source_uuid STRING,
|
134
|
+
start_at STRING,
|
135
|
+
last_modified_at STRING,
|
136
|
+
delta INT
|
137
|
+
)
|
138
|
+
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'
|
139
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
140
|
+
EOS
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
44
144
|
context 'for postgres dimension type: one' do
|
45
145
|
before do
|
46
146
|
catalog.schema :postgres do
|
@@ -160,8 +260,7 @@ describe Masamune::Transform::DefineTable do
|
|
160
260
|
user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
|
161
261
|
tenant_id INTEGER NOT NULL,
|
162
262
|
user_id INTEGER NOT NULL,
|
163
|
-
|
164
|
-
preferences_was HSTORE,
|
263
|
+
preferences HSTORE,
|
165
264
|
source_kind VARCHAR NOT NULL,
|
166
265
|
source_uuid VARCHAR NOT NULL,
|
167
266
|
start_at TIMESTAMP NOT NULL,
|
@@ -63,22 +63,80 @@ describe Masamune::Transform::DenormalizeTable do
|
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
|
-
let(:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
66
|
+
let(:options) { {} }
|
67
|
+
|
68
|
+
subject(:result) { transform.denormalize_table(target, options).to_s }
|
69
|
+
|
70
|
+
context 'with postgres fact with :columns' do
|
71
|
+
let(:target) { catalog.postgres.visits_fact }
|
72
|
+
let(:options) do
|
73
|
+
{
|
74
|
+
columns: [
|
75
|
+
'date.date_id',
|
76
|
+
'tenant.tenant_id',
|
77
|
+
'user.tenant_id',
|
78
|
+
'user.user_id',
|
79
|
+
'user_agent.name',
|
80
|
+
'user_agent.version',
|
81
|
+
'total',
|
82
|
+
'time_key'
|
83
|
+
]
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should eq render denormalize_table template' do
|
88
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
89
|
+
SELECT
|
90
|
+
date_dimension.date_id AS date_dimension_date_id,
|
91
|
+
tenant_dimension.tenant_id AS tenant_dimension_tenant_id,
|
92
|
+
user_dimension.tenant_id AS user_dimension_tenant_id,
|
93
|
+
user_dimension.user_id AS user_dimension_user_id,
|
94
|
+
user_agent_type.name AS user_agent_type_name,
|
95
|
+
user_agent_type.version AS user_agent_type_version,
|
96
|
+
visits_fact.total,
|
97
|
+
visits_fact.time_key
|
98
|
+
FROM
|
99
|
+
visits_fact
|
100
|
+
JOIN
|
101
|
+
date_dimension
|
102
|
+
ON
|
103
|
+
date_dimension.id = visits_fact.date_dimension_id
|
104
|
+
JOIN
|
105
|
+
tenant_dimension
|
106
|
+
ON
|
107
|
+
tenant_dimension.id = visits_fact.tenant_dimension_id
|
108
|
+
JOIN
|
109
|
+
user_dimension
|
110
|
+
ON
|
111
|
+
user_dimension.id = visits_fact.user_dimension_id
|
112
|
+
JOIN
|
113
|
+
user_agent_type
|
114
|
+
ON
|
115
|
+
user_agent_type.id = visits_fact.user_agent_type_id
|
116
|
+
ORDER BY
|
117
|
+
date_dimension_date_id,
|
118
|
+
tenant_dimension_tenant_id,
|
119
|
+
user_dimension_tenant_id,
|
120
|
+
user_dimension_user_id,
|
121
|
+
user_agent_type_name,
|
122
|
+
user_agent_type_version,
|
123
|
+
total,
|
124
|
+
time_key
|
125
|
+
;
|
126
|
+
EOS
|
127
|
+
end
|
78
128
|
end
|
79
129
|
|
80
|
-
context 'with postgres fact' do
|
81
|
-
|
130
|
+
context 'with postgres fact with :except' do
|
131
|
+
let(:target) { catalog.postgres.visits_fact }
|
132
|
+
let(:options) do
|
133
|
+
{
|
134
|
+
except: [
|
135
|
+
'cluster.name',
|
136
|
+
'last_modified_at'
|
137
|
+
]
|
138
|
+
}
|
139
|
+
end
|
82
140
|
|
83
141
|
it 'should eq render denormalize_table template' do
|
84
142
|
is_expected.to eq <<-EOS.strip_heredoc
|
@@ -89,6 +147,7 @@ describe Masamune::Transform::DenormalizeTable do
|
|
89
147
|
user_dimension.user_id AS user_dimension_user_id,
|
90
148
|
user_agent_type.name AS user_agent_type_name,
|
91
149
|
user_agent_type.version AS user_agent_type_version,
|
150
|
+
user_agent_type.mobile AS user_agent_type_mobile,
|
92
151
|
visits_fact.total,
|
93
152
|
visits_fact.time_key
|
94
153
|
FROM
|
@@ -116,10 +175,107 @@ describe Masamune::Transform::DenormalizeTable do
|
|
116
175
|
user_dimension_user_id,
|
117
176
|
user_agent_type_name,
|
118
177
|
user_agent_type_version,
|
178
|
+
user_agent_type_mobile,
|
119
179
|
total,
|
120
180
|
time_key
|
121
181
|
;
|
122
182
|
EOS
|
123
183
|
end
|
124
184
|
end
|
185
|
+
|
186
|
+
context 'with hive table' do
|
187
|
+
before do
|
188
|
+
catalog.schema :hive do
|
189
|
+
dimension 'tenant', type: :ledger do
|
190
|
+
partition :y
|
191
|
+
partition :m
|
192
|
+
column 'tenant_id', type: :integer, natural_key: true
|
193
|
+
column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
|
194
|
+
column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
|
195
|
+
column 'preferences', type: :key_value, null: true
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
let(:target) { catalog.hive.tenant_dimension }
|
201
|
+
|
202
|
+
let(:options) do
|
203
|
+
{
|
204
|
+
columns: [
|
205
|
+
'tenant_id',
|
206
|
+
'tenant_account_state',
|
207
|
+
'tenant_premium_state',
|
208
|
+
'preferences',
|
209
|
+
'y',
|
210
|
+
'm'
|
211
|
+
],
|
212
|
+
order: [
|
213
|
+
'tenant_id',
|
214
|
+
'start_at'
|
215
|
+
]
|
216
|
+
}
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'should eq render denormalize_table template' do
|
220
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
221
|
+
SELECT
|
222
|
+
tenant_ledger.tenant_id,
|
223
|
+
tenant_ledger.tenant_account_state,
|
224
|
+
tenant_ledger.tenant_premium_state,
|
225
|
+
tenant_ledger.preferences,
|
226
|
+
tenant_ledger.y,
|
227
|
+
tenant_ledger.m
|
228
|
+
FROM
|
229
|
+
tenant_ledger
|
230
|
+
ORDER BY
|
231
|
+
tenant_id,
|
232
|
+
start_at
|
233
|
+
;
|
234
|
+
EOS
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
context 'with hive table with implicit references' do
|
239
|
+
before do
|
240
|
+
catalog.schema :hive do
|
241
|
+
dimension 'date', type: :date, implicit: true do
|
242
|
+
column 'date_id', type: :integer, natural_key: true
|
243
|
+
end
|
244
|
+
|
245
|
+
fact 'visits' do
|
246
|
+
references :date
|
247
|
+
references :user, degenerate: true
|
248
|
+
measure 'total'
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
let(:target) { catalog.hive.visits_fact }
|
254
|
+
|
255
|
+
let(:options) do
|
256
|
+
{
|
257
|
+
columns: [
|
258
|
+
'date.date_id',
|
259
|
+
'user.id',
|
260
|
+
'total'
|
261
|
+
]
|
262
|
+
}
|
263
|
+
end
|
264
|
+
|
265
|
+
it 'should eq render denormalize_table template' do
|
266
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
267
|
+
SELECT
|
268
|
+
date_dimension_date_id AS date_dimension_date_id,
|
269
|
+
user_type_id AS user_type_id,
|
270
|
+
visits_fact.total
|
271
|
+
FROM
|
272
|
+
visits_fact
|
273
|
+
ORDER BY
|
274
|
+
date_dimension_date_id,
|
275
|
+
user_type_id,
|
276
|
+
total
|
277
|
+
;
|
278
|
+
EOS
|
279
|
+
end
|
280
|
+
end
|
125
281
|
end
|
@@ -72,7 +72,7 @@ describe Masamune::Transform::SnapshotDimension do
|
|
72
72
|
coalesce_merge(user_account_state_type_id) OVER w AS user_account_state_type_id,
|
73
73
|
tenant_id AS tenant_id,
|
74
74
|
user_id AS user_id,
|
75
|
-
hstore_merge(
|
75
|
+
hstore_merge(preferences) OVER w AS preferences,
|
76
76
|
start_at AS start_at
|
77
77
|
FROM
|
78
78
|
windows
|
@@ -63,7 +63,7 @@ describe Masamune::Transform::StageDimension do
|
|
63
63
|
column 'department.department_id', type: :integer
|
64
64
|
column 'user_account_state.name', type: :string
|
65
65
|
column 'hr_user_account_state.name', type: :string
|
66
|
-
column '
|
66
|
+
column 'preferences', type: :json
|
67
67
|
column 'start_at', type: :timestamp
|
68
68
|
column 'source_kind', type: :string
|
69
69
|
column 'delta', type: :integer
|
@@ -82,14 +82,14 @@ describe Masamune::Transform::StageDimension do
|
|
82
82
|
CREATE TEMPORARY TABLE IF NOT EXISTS user_dimension_ledger_stage (LIKE user_dimension_ledger INCLUDING ALL);
|
83
83
|
|
84
84
|
INSERT INTO
|
85
|
-
user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id,
|
85
|
+
user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences, source_kind, start_at, delta)
|
86
86
|
SELECT
|
87
87
|
department_type.id,
|
88
88
|
user_account_state_type.id,
|
89
89
|
hr_user_account_state_type.id,
|
90
90
|
user_file_dimension_ledger_stage.tenant_id,
|
91
91
|
user_file_dimension_ledger_stage.user_id,
|
92
|
-
json_to_hstore(user_file_dimension_ledger_stage.
|
92
|
+
json_to_hstore(user_file_dimension_ledger_stage.preferences),
|
93
93
|
user_file_dimension_ledger_stage.source_kind,
|
94
94
|
user_file_dimension_ledger_stage.start_at,
|
95
95
|
user_file_dimension_ledger_stage.delta
|