masamune 0.11.9 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -36,6 +36,14 @@ describe Masamune::Schema::Table do
36
36
  it { expect(table.name).to eq('account_table') }
37
37
  end
38
38
 
39
+ context 'with format' do
40
+ let(:table) do
41
+ described_class.new id: 'user', properties: { format: :tsv }
42
+ end
43
+
44
+ it { expect(table.properties[:format]).to eq(:tsv) }
45
+ end
46
+
39
47
  context 'with columns' do
40
48
  let(:table) do
41
49
  described_class.new id: 'user',
@@ -63,7 +63,7 @@ describe Masamune::Transform::BulkUpsert do
63
63
  column 'department.department_id', type: :integer
64
64
  column 'user_account_state.name', type: :string
65
65
  column 'hr_user_account_state.name', type: :string
66
- column 'preferences_now', type: :json
66
+ column 'preferences', type: :json
67
67
  column 'start_at', type: :timestamp
68
68
  column 'source_kind', type: :string
69
69
  column 'delta', type: :integer
@@ -147,8 +147,7 @@ describe Masamune::Transform::BulkUpsert do
147
147
  user_account_state_type_id = COALESCE(user_dimension_ledger_stage.user_account_state_type_id, user_dimension_ledger.user_account_state_type_id),
148
148
  hr_user_account_state_type_id = COALESCE(user_dimension_ledger_stage.hr_user_account_state_type_id, user_dimension_ledger.hr_user_account_state_type_id),
149
149
  name = COALESCE(user_dimension_ledger_stage.name, user_dimension_ledger.name),
150
- preferences_now = COALESCE(user_dimension_ledger_stage.preferences_now, user_dimension_ledger.preferences_now),
151
- preferences_was = COALESCE(user_dimension_ledger_stage.preferences_was, user_dimension_ledger.preferences_was)
150
+ preferences = COALESCE(user_dimension_ledger_stage.preferences, user_dimension_ledger.preferences)
152
151
  FROM
153
152
  user_dimension_ledger_stage
154
153
  WHERE
@@ -160,7 +159,7 @@ describe Masamune::Transform::BulkUpsert do
160
159
  ;
161
160
 
162
161
  INSERT INTO
163
- user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences_now, preferences_was, source_kind, source_uuid, start_at, last_modified_at, delta)
162
+ user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences, source_kind, source_uuid, start_at, last_modified_at, delta)
164
163
  SELECT
165
164
  user_dimension_ledger_stage.department_type_id,
166
165
  user_dimension_ledger_stage.user_account_state_type_id,
@@ -168,8 +167,7 @@ describe Masamune::Transform::BulkUpsert do
168
167
  user_dimension_ledger_stage.tenant_id,
169
168
  user_dimension_ledger_stage.user_id,
170
169
  user_dimension_ledger_stage.name,
171
- user_dimension_ledger_stage.preferences_now,
172
- user_dimension_ledger_stage.preferences_was,
170
+ user_dimension_ledger_stage.preferences,
173
171
  user_dimension_ledger_stage.source_kind,
174
172
  user_dimension_ledger_stage.source_uuid,
175
173
  user_dimension_ledger_stage.start_at,
@@ -63,11 +63,10 @@ describe Masamune::Transform::DefineSchema do
63
63
  context 'for hive schema' do
64
64
  before do
65
65
  catalog.schema :hive do
66
- event 'tenant' do
67
- attribute 'tenant_id', type: :integer, immutable: true
68
- attribute 'account_state', type: :string
69
- attribute 'premium_type', type: :string
70
- attribute 'preferences', type: :json
66
+ dimension 'user', type: :ledger do
67
+ column 'tenant_id', index: true, natural_key: true
68
+ column 'user_id', index: true, natural_key: true
69
+ column 'preferences', type: :key_value, null: true
71
70
  end
72
71
  end
73
72
  end
@@ -77,7 +76,7 @@ describe Masamune::Transform::DefineSchema do
77
76
  it 'should render combined template' do
78
77
  is_expected.to eq Masamune::Template.combine \
79
78
  Masamune::Transform::Operator.new('define_schema', source: catalog.hive),
80
- transform.define_event_view(catalog.hive.events['tenant'])
79
+ transform.define_table(catalog.hive.dimensions['user'])
81
80
  end
82
81
  end
83
82
  end
@@ -41,6 +41,106 @@ describe Masamune::Transform::DefineTable do
41
41
  end
42
42
  end
43
43
 
44
+ context 'for hive ledger dimension' do
45
+ before do
46
+ catalog.schema :hive do
47
+ dimension 'tenant', type: :ledger do
48
+ column 'tenant_id', type: :integer, natural_key: true
49
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
50
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
51
+ column 'preferences', type: :key_value, null: true
52
+ end
53
+ end
54
+ end
55
+
56
+ let(:table) { catalog.hive.tenant_dimension }
57
+
58
+ it 'should render table template' do
59
+ is_expected.to eq <<-EOS.strip_heredoc
60
+ CREATE TABLE IF NOT EXISTS tenant_ledger
61
+ (
62
+ id STRING,
63
+ tenant_id INT,
64
+ tenant_account_state STRING,
65
+ tenant_premium_state STRING,
66
+ preferences STRING,
67
+ source_kind STRING,
68
+ source_uuid STRING,
69
+ start_at STRING,
70
+ last_modified_at STRING,
71
+ delta INT
72
+ )
73
+ TBLPROPERTIES ('serialization.null.format' = '');
74
+ EOS
75
+ end
76
+ end
77
+
78
+ context 'for hive ledger dimension with partitions' do
79
+ before do
80
+ catalog.schema :hive do
81
+ dimension 'tenant', type: :ledger do
82
+ partition :y
83
+ partition :m
84
+ column 'tenant_id', type: :integer, natural_key: true
85
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
86
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
87
+ column 'preferences', type: :key_value, null: true
88
+ end
89
+ end
90
+ end
91
+
92
+ let(:table) { catalog.hive.tenant_dimension }
93
+
94
+ it 'should render table template' do
95
+ is_expected.to eq <<-EOS.strip_heredoc
96
+ CREATE TABLE IF NOT EXISTS tenant_ledger
97
+ (
98
+ id STRING,
99
+ tenant_id INT,
100
+ tenant_account_state STRING,
101
+ tenant_premium_state STRING,
102
+ preferences STRING,
103
+ source_kind STRING,
104
+ source_uuid STRING,
105
+ start_at STRING,
106
+ last_modified_at STRING,
107
+ delta INT
108
+ )
109
+ PARTITIONED BY (y INT, m INT)
110
+ TBLPROPERTIES ('serialization.null.format' = '');
111
+ EOS
112
+ end
113
+ end
114
+
115
+ context 'for hive ledger dimension with :tsv format' do
116
+ before do
117
+ catalog.schema :hive do
118
+ dimension 'tenant', type: :ledger, properties: { format: :tsv } do
119
+ column 'tenant_id', type: :integer, natural_key: true
120
+ end
121
+ end
122
+ end
123
+
124
+ let(:table) { catalog.hive.tenant_dimension }
125
+
126
+ it 'should render table template' do
127
+ is_expected.to eq <<-EOS.strip_heredoc
128
+ CREATE TABLE IF NOT EXISTS tenant_ledger
129
+ (
130
+ id STRING,
131
+ tenant_id INT,
132
+ source_kind STRING,
133
+ source_uuid STRING,
134
+ start_at STRING,
135
+ last_modified_at STRING,
136
+ delta INT
137
+ )
138
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'
139
+ TBLPROPERTIES ('serialization.null.format' = '');
140
+ EOS
141
+ end
142
+ end
143
+
44
144
  context 'for postgres dimension type: one' do
45
145
  before do
46
146
  catalog.schema :postgres do
@@ -160,8 +260,7 @@ describe Masamune::Transform::DefineTable do
160
260
  user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
161
261
  tenant_id INTEGER NOT NULL,
162
262
  user_id INTEGER NOT NULL,
163
- preferences_now HSTORE,
164
- preferences_was HSTORE,
263
+ preferences HSTORE,
165
264
  source_kind VARCHAR NOT NULL,
166
265
  source_uuid VARCHAR NOT NULL,
167
266
  start_at TIMESTAMP NOT NULL,
@@ -60,6 +60,8 @@ describe Masamune::Transform::DefineTable do
60
60
  end
61
61
 
62
62
  fact 'visits', partition: 'y%Ym%m' do
63
+ partition :y
64
+ partition :m
63
65
  references :cluster
64
66
  references :date
65
67
  references :tenant
@@ -63,22 +63,80 @@ describe Masamune::Transform::DenormalizeTable do
63
63
  end
64
64
  end
65
65
 
66
- let(:target) { catalog.postgres.visits_fact }
67
- let(:columns) do
68
- [
69
- 'date.date_id',
70
- 'tenant.tenant_id',
71
- 'user.tenant_id',
72
- 'user.user_id',
73
- 'user_agent.name',
74
- 'user_agent.version',
75
- 'total',
76
- 'time_key'
77
- ]
66
+ let(:options) { {} }
67
+
68
+ subject(:result) { transform.denormalize_table(target, options).to_s }
69
+
70
+ context 'with postgres fact with :columns' do
71
+ let(:target) { catalog.postgres.visits_fact }
72
+ let(:options) do
73
+ {
74
+ columns: [
75
+ 'date.date_id',
76
+ 'tenant.tenant_id',
77
+ 'user.tenant_id',
78
+ 'user.user_id',
79
+ 'user_agent.name',
80
+ 'user_agent.version',
81
+ 'total',
82
+ 'time_key'
83
+ ]
84
+ }
85
+ end
86
+
87
+ it 'should eq render denormalize_table template' do
88
+ is_expected.to eq <<-EOS.strip_heredoc
89
+ SELECT
90
+ date_dimension.date_id AS date_dimension_date_id,
91
+ tenant_dimension.tenant_id AS tenant_dimension_tenant_id,
92
+ user_dimension.tenant_id AS user_dimension_tenant_id,
93
+ user_dimension.user_id AS user_dimension_user_id,
94
+ user_agent_type.name AS user_agent_type_name,
95
+ user_agent_type.version AS user_agent_type_version,
96
+ visits_fact.total,
97
+ visits_fact.time_key
98
+ FROM
99
+ visits_fact
100
+ JOIN
101
+ date_dimension
102
+ ON
103
+ date_dimension.id = visits_fact.date_dimension_id
104
+ JOIN
105
+ tenant_dimension
106
+ ON
107
+ tenant_dimension.id = visits_fact.tenant_dimension_id
108
+ JOIN
109
+ user_dimension
110
+ ON
111
+ user_dimension.id = visits_fact.user_dimension_id
112
+ JOIN
113
+ user_agent_type
114
+ ON
115
+ user_agent_type.id = visits_fact.user_agent_type_id
116
+ ORDER BY
117
+ date_dimension_date_id,
118
+ tenant_dimension_tenant_id,
119
+ user_dimension_tenant_id,
120
+ user_dimension_user_id,
121
+ user_agent_type_name,
122
+ user_agent_type_version,
123
+ total,
124
+ time_key
125
+ ;
126
+ EOS
127
+ end
78
128
  end
79
129
 
80
- context 'with postgres fact' do
81
- subject(:result) { transform.denormalize_table(target, columns).to_s }
130
+ context 'with postgres fact with :except' do
131
+ let(:target) { catalog.postgres.visits_fact }
132
+ let(:options) do
133
+ {
134
+ except: [
135
+ 'cluster.name',
136
+ 'last_modified_at'
137
+ ]
138
+ }
139
+ end
82
140
 
83
141
  it 'should eq render denormalize_table template' do
84
142
  is_expected.to eq <<-EOS.strip_heredoc
@@ -89,6 +147,7 @@ describe Masamune::Transform::DenormalizeTable do
89
147
  user_dimension.user_id AS user_dimension_user_id,
90
148
  user_agent_type.name AS user_agent_type_name,
91
149
  user_agent_type.version AS user_agent_type_version,
150
+ user_agent_type.mobile AS user_agent_type_mobile,
92
151
  visits_fact.total,
93
152
  visits_fact.time_key
94
153
  FROM
@@ -116,10 +175,107 @@ describe Masamune::Transform::DenormalizeTable do
116
175
  user_dimension_user_id,
117
176
  user_agent_type_name,
118
177
  user_agent_type_version,
178
+ user_agent_type_mobile,
119
179
  total,
120
180
  time_key
121
181
  ;
122
182
  EOS
123
183
  end
124
184
  end
185
+
186
+ context 'with hive table' do
187
+ before do
188
+ catalog.schema :hive do
189
+ dimension 'tenant', type: :ledger do
190
+ partition :y
191
+ partition :m
192
+ column 'tenant_id', type: :integer, natural_key: true
193
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
194
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
195
+ column 'preferences', type: :key_value, null: true
196
+ end
197
+ end
198
+ end
199
+
200
+ let(:target) { catalog.hive.tenant_dimension }
201
+
202
+ let(:options) do
203
+ {
204
+ columns: [
205
+ 'tenant_id',
206
+ 'tenant_account_state',
207
+ 'tenant_premium_state',
208
+ 'preferences',
209
+ 'y',
210
+ 'm'
211
+ ],
212
+ order: [
213
+ 'tenant_id',
214
+ 'start_at'
215
+ ]
216
+ }
217
+ end
218
+
219
+ it 'should eq render denormalize_table template' do
220
+ is_expected.to eq <<-EOS.strip_heredoc
221
+ SELECT
222
+ tenant_ledger.tenant_id,
223
+ tenant_ledger.tenant_account_state,
224
+ tenant_ledger.tenant_premium_state,
225
+ tenant_ledger.preferences,
226
+ tenant_ledger.y,
227
+ tenant_ledger.m
228
+ FROM
229
+ tenant_ledger
230
+ ORDER BY
231
+ tenant_id,
232
+ start_at
233
+ ;
234
+ EOS
235
+ end
236
+ end
237
+
238
+ context 'with hive table with implicit references' do
239
+ before do
240
+ catalog.schema :hive do
241
+ dimension 'date', type: :date, implicit: true do
242
+ column 'date_id', type: :integer, natural_key: true
243
+ end
244
+
245
+ fact 'visits' do
246
+ references :date
247
+ references :user, degenerate: true
248
+ measure 'total'
249
+ end
250
+ end
251
+ end
252
+
253
+ let(:target) { catalog.hive.visits_fact }
254
+
255
+ let(:options) do
256
+ {
257
+ columns: [
258
+ 'date.date_id',
259
+ 'user.id',
260
+ 'total'
261
+ ]
262
+ }
263
+ end
264
+
265
+ it 'should eq render denormalize_table template' do
266
+ is_expected.to eq <<-EOS.strip_heredoc
267
+ SELECT
268
+ date_dimension_date_id AS date_dimension_date_id,
269
+ user_type_id AS user_type_id,
270
+ visits_fact.total
271
+ FROM
272
+ visits_fact
273
+ ORDER BY
274
+ date_dimension_date_id,
275
+ user_type_id,
276
+ total
277
+ ;
278
+ EOS
279
+ end
280
+ end
125
281
  end
@@ -72,7 +72,7 @@ describe Masamune::Transform::SnapshotDimension do
72
72
  coalesce_merge(user_account_state_type_id) OVER w AS user_account_state_type_id,
73
73
  tenant_id AS tenant_id,
74
74
  user_id AS user_id,
75
- hstore_merge(preferences_now) OVER w - hstore_merge(preferences_was) OVER w AS preferences,
75
+ hstore_merge(preferences) OVER w AS preferences,
76
76
  start_at AS start_at
77
77
  FROM
78
78
  windows
@@ -63,7 +63,7 @@ describe Masamune::Transform::StageDimension do
63
63
  column 'department.department_id', type: :integer
64
64
  column 'user_account_state.name', type: :string
65
65
  column 'hr_user_account_state.name', type: :string
66
- column 'preferences_now', type: :json
66
+ column 'preferences', type: :json
67
67
  column 'start_at', type: :timestamp
68
68
  column 'source_kind', type: :string
69
69
  column 'delta', type: :integer
@@ -82,14 +82,14 @@ describe Masamune::Transform::StageDimension do
82
82
  CREATE TEMPORARY TABLE IF NOT EXISTS user_dimension_ledger_stage (LIKE user_dimension_ledger INCLUDING ALL);
83
83
 
84
84
  INSERT INTO
85
- user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences_now, source_kind, start_at, delta)
85
+ user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences, source_kind, start_at, delta)
86
86
  SELECT
87
87
  department_type.id,
88
88
  user_account_state_type.id,
89
89
  hr_user_account_state_type.id,
90
90
  user_file_dimension_ledger_stage.tenant_id,
91
91
  user_file_dimension_ledger_stage.user_id,
92
- json_to_hstore(user_file_dimension_ledger_stage.preferences_now),
92
+ json_to_hstore(user_file_dimension_ledger_stage.preferences),
93
93
  user_file_dimension_ledger_stage.source_kind,
94
94
  user_file_dimension_ledger_stage.start_at,
95
95
  user_file_dimension_ledger_stage.delta