masamune 0.11.9 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -36,6 +36,14 @@ describe Masamune::Schema::Table do
36
36
  it { expect(table.name).to eq('account_table') }
37
37
  end
38
38
 
39
+ context 'with format' do
40
+ let(:table) do
41
+ described_class.new id: 'user', properties: { format: :tsv }
42
+ end
43
+
44
+ it { expect(table.properties[:format]).to eq(:tsv) }
45
+ end
46
+
39
47
  context 'with columns' do
40
48
  let(:table) do
41
49
  described_class.new id: 'user',
@@ -63,7 +63,7 @@ describe Masamune::Transform::BulkUpsert do
63
63
  column 'department.department_id', type: :integer
64
64
  column 'user_account_state.name', type: :string
65
65
  column 'hr_user_account_state.name', type: :string
66
- column 'preferences_now', type: :json
66
+ column 'preferences', type: :json
67
67
  column 'start_at', type: :timestamp
68
68
  column 'source_kind', type: :string
69
69
  column 'delta', type: :integer
@@ -147,8 +147,7 @@ describe Masamune::Transform::BulkUpsert do
147
147
  user_account_state_type_id = COALESCE(user_dimension_ledger_stage.user_account_state_type_id, user_dimension_ledger.user_account_state_type_id),
148
148
  hr_user_account_state_type_id = COALESCE(user_dimension_ledger_stage.hr_user_account_state_type_id, user_dimension_ledger.hr_user_account_state_type_id),
149
149
  name = COALESCE(user_dimension_ledger_stage.name, user_dimension_ledger.name),
150
- preferences_now = COALESCE(user_dimension_ledger_stage.preferences_now, user_dimension_ledger.preferences_now),
151
- preferences_was = COALESCE(user_dimension_ledger_stage.preferences_was, user_dimension_ledger.preferences_was)
150
+ preferences = COALESCE(user_dimension_ledger_stage.preferences, user_dimension_ledger.preferences)
152
151
  FROM
153
152
  user_dimension_ledger_stage
154
153
  WHERE
@@ -160,7 +159,7 @@ describe Masamune::Transform::BulkUpsert do
160
159
  ;
161
160
 
162
161
  INSERT INTO
163
- user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences_now, preferences_was, source_kind, source_uuid, start_at, last_modified_at, delta)
162
+ user_dimension_ledger (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, name, preferences, source_kind, source_uuid, start_at, last_modified_at, delta)
164
163
  SELECT
165
164
  user_dimension_ledger_stage.department_type_id,
166
165
  user_dimension_ledger_stage.user_account_state_type_id,
@@ -168,8 +167,7 @@ describe Masamune::Transform::BulkUpsert do
168
167
  user_dimension_ledger_stage.tenant_id,
169
168
  user_dimension_ledger_stage.user_id,
170
169
  user_dimension_ledger_stage.name,
171
- user_dimension_ledger_stage.preferences_now,
172
- user_dimension_ledger_stage.preferences_was,
170
+ user_dimension_ledger_stage.preferences,
173
171
  user_dimension_ledger_stage.source_kind,
174
172
  user_dimension_ledger_stage.source_uuid,
175
173
  user_dimension_ledger_stage.start_at,
@@ -63,11 +63,10 @@ describe Masamune::Transform::DefineSchema do
63
63
  context 'for hive schema' do
64
64
  before do
65
65
  catalog.schema :hive do
66
- event 'tenant' do
67
- attribute 'tenant_id', type: :integer, immutable: true
68
- attribute 'account_state', type: :string
69
- attribute 'premium_type', type: :string
70
- attribute 'preferences', type: :json
66
+ dimension 'user', type: :ledger do
67
+ column 'tenant_id', index: true, natural_key: true
68
+ column 'user_id', index: true, natural_key: true
69
+ column 'preferences', type: :key_value, null: true
71
70
  end
72
71
  end
73
72
  end
@@ -77,7 +76,7 @@ describe Masamune::Transform::DefineSchema do
77
76
  it 'should render combined template' do
78
77
  is_expected.to eq Masamune::Template.combine \
79
78
  Masamune::Transform::Operator.new('define_schema', source: catalog.hive),
80
- transform.define_event_view(catalog.hive.events['tenant'])
79
+ transform.define_table(catalog.hive.dimensions['user'])
81
80
  end
82
81
  end
83
82
  end
@@ -41,6 +41,106 @@ describe Masamune::Transform::DefineTable do
41
41
  end
42
42
  end
43
43
 
44
+ context 'for hive ledger dimension' do
45
+ before do
46
+ catalog.schema :hive do
47
+ dimension 'tenant', type: :ledger do
48
+ column 'tenant_id', type: :integer, natural_key: true
49
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
50
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
51
+ column 'preferences', type: :key_value, null: true
52
+ end
53
+ end
54
+ end
55
+
56
+ let(:table) { catalog.hive.tenant_dimension }
57
+
58
+ it 'should render table template' do
59
+ is_expected.to eq <<-EOS.strip_heredoc
60
+ CREATE TABLE IF NOT EXISTS tenant_ledger
61
+ (
62
+ id STRING,
63
+ tenant_id INT,
64
+ tenant_account_state STRING,
65
+ tenant_premium_state STRING,
66
+ preferences STRING,
67
+ source_kind STRING,
68
+ source_uuid STRING,
69
+ start_at STRING,
70
+ last_modified_at STRING,
71
+ delta INT
72
+ )
73
+ TBLPROPERTIES ('serialization.null.format' = '');
74
+ EOS
75
+ end
76
+ end
77
+
78
+ context 'for hive ledger dimension with partitions' do
79
+ before do
80
+ catalog.schema :hive do
81
+ dimension 'tenant', type: :ledger do
82
+ partition :y
83
+ partition :m
84
+ column 'tenant_id', type: :integer, natural_key: true
85
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
86
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
87
+ column 'preferences', type: :key_value, null: true
88
+ end
89
+ end
90
+ end
91
+
92
+ let(:table) { catalog.hive.tenant_dimension }
93
+
94
+ it 'should render table template' do
95
+ is_expected.to eq <<-EOS.strip_heredoc
96
+ CREATE TABLE IF NOT EXISTS tenant_ledger
97
+ (
98
+ id STRING,
99
+ tenant_id INT,
100
+ tenant_account_state STRING,
101
+ tenant_premium_state STRING,
102
+ preferences STRING,
103
+ source_kind STRING,
104
+ source_uuid STRING,
105
+ start_at STRING,
106
+ last_modified_at STRING,
107
+ delta INT
108
+ )
109
+ PARTITIONED BY (y INT, m INT)
110
+ TBLPROPERTIES ('serialization.null.format' = '');
111
+ EOS
112
+ end
113
+ end
114
+
115
+ context 'for hive ledger dimension with :tsv format' do
116
+ before do
117
+ catalog.schema :hive do
118
+ dimension 'tenant', type: :ledger, properties: { format: :tsv } do
119
+ column 'tenant_id', type: :integer, natural_key: true
120
+ end
121
+ end
122
+ end
123
+
124
+ let(:table) { catalog.hive.tenant_dimension }
125
+
126
+ it 'should render table template' do
127
+ is_expected.to eq <<-EOS.strip_heredoc
128
+ CREATE TABLE IF NOT EXISTS tenant_ledger
129
+ (
130
+ id STRING,
131
+ tenant_id INT,
132
+ source_kind STRING,
133
+ source_uuid STRING,
134
+ start_at STRING,
135
+ last_modified_at STRING,
136
+ delta INT
137
+ )
138
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'
139
+ TBLPROPERTIES ('serialization.null.format' = '');
140
+ EOS
141
+ end
142
+ end
143
+
44
144
  context 'for postgres dimension type: one' do
45
145
  before do
46
146
  catalog.schema :postgres do
@@ -160,8 +260,7 @@ describe Masamune::Transform::DefineTable do
160
260
  user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
161
261
  tenant_id INTEGER NOT NULL,
162
262
  user_id INTEGER NOT NULL,
163
- preferences_now HSTORE,
164
- preferences_was HSTORE,
263
+ preferences HSTORE,
165
264
  source_kind VARCHAR NOT NULL,
166
265
  source_uuid VARCHAR NOT NULL,
167
266
  start_at TIMESTAMP NOT NULL,
@@ -60,6 +60,8 @@ describe Masamune::Transform::DefineTable do
60
60
  end
61
61
 
62
62
  fact 'visits', partition: 'y%Ym%m' do
63
+ partition :y
64
+ partition :m
63
65
  references :cluster
64
66
  references :date
65
67
  references :tenant
@@ -63,22 +63,80 @@ describe Masamune::Transform::DenormalizeTable do
63
63
  end
64
64
  end
65
65
 
66
- let(:target) { catalog.postgres.visits_fact }
67
- let(:columns) do
68
- [
69
- 'date.date_id',
70
- 'tenant.tenant_id',
71
- 'user.tenant_id',
72
- 'user.user_id',
73
- 'user_agent.name',
74
- 'user_agent.version',
75
- 'total',
76
- 'time_key'
77
- ]
66
+ let(:options) { {} }
67
+
68
+ subject(:result) { transform.denormalize_table(target, options).to_s }
69
+
70
+ context 'with postgres fact with :columns' do
71
+ let(:target) { catalog.postgres.visits_fact }
72
+ let(:options) do
73
+ {
74
+ columns: [
75
+ 'date.date_id',
76
+ 'tenant.tenant_id',
77
+ 'user.tenant_id',
78
+ 'user.user_id',
79
+ 'user_agent.name',
80
+ 'user_agent.version',
81
+ 'total',
82
+ 'time_key'
83
+ ]
84
+ }
85
+ end
86
+
87
+ it 'should eq render denormalize_table template' do
88
+ is_expected.to eq <<-EOS.strip_heredoc
89
+ SELECT
90
+ date_dimension.date_id AS date_dimension_date_id,
91
+ tenant_dimension.tenant_id AS tenant_dimension_tenant_id,
92
+ user_dimension.tenant_id AS user_dimension_tenant_id,
93
+ user_dimension.user_id AS user_dimension_user_id,
94
+ user_agent_type.name AS user_agent_type_name,
95
+ user_agent_type.version AS user_agent_type_version,
96
+ visits_fact.total,
97
+ visits_fact.time_key
98
+ FROM
99
+ visits_fact
100
+ JOIN
101
+ date_dimension
102
+ ON
103
+ date_dimension.id = visits_fact.date_dimension_id
104
+ JOIN
105
+ tenant_dimension
106
+ ON
107
+ tenant_dimension.id = visits_fact.tenant_dimension_id
108
+ JOIN
109
+ user_dimension
110
+ ON
111
+ user_dimension.id = visits_fact.user_dimension_id
112
+ JOIN
113
+ user_agent_type
114
+ ON
115
+ user_agent_type.id = visits_fact.user_agent_type_id
116
+ ORDER BY
117
+ date_dimension_date_id,
118
+ tenant_dimension_tenant_id,
119
+ user_dimension_tenant_id,
120
+ user_dimension_user_id,
121
+ user_agent_type_name,
122
+ user_agent_type_version,
123
+ total,
124
+ time_key
125
+ ;
126
+ EOS
127
+ end
78
128
  end
79
129
 
80
- context 'with postgres fact' do
81
- subject(:result) { transform.denormalize_table(target, columns).to_s }
130
+ context 'with postgres fact with :except' do
131
+ let(:target) { catalog.postgres.visits_fact }
132
+ let(:options) do
133
+ {
134
+ except: [
135
+ 'cluster.name',
136
+ 'last_modified_at'
137
+ ]
138
+ }
139
+ end
82
140
 
83
141
  it 'should eq render denormalize_table template' do
84
142
  is_expected.to eq <<-EOS.strip_heredoc
@@ -89,6 +147,7 @@ describe Masamune::Transform::DenormalizeTable do
89
147
  user_dimension.user_id AS user_dimension_user_id,
90
148
  user_agent_type.name AS user_agent_type_name,
91
149
  user_agent_type.version AS user_agent_type_version,
150
+ user_agent_type.mobile AS user_agent_type_mobile,
92
151
  visits_fact.total,
93
152
  visits_fact.time_key
94
153
  FROM
@@ -116,10 +175,107 @@ describe Masamune::Transform::DenormalizeTable do
116
175
  user_dimension_user_id,
117
176
  user_agent_type_name,
118
177
  user_agent_type_version,
178
+ user_agent_type_mobile,
119
179
  total,
120
180
  time_key
121
181
  ;
122
182
  EOS
123
183
  end
124
184
  end
185
+
186
+ context 'with hive table' do
187
+ before do
188
+ catalog.schema :hive do
189
+ dimension 'tenant', type: :ledger do
190
+ partition :y
191
+ partition :m
192
+ column 'tenant_id', type: :integer, natural_key: true
193
+ column 'tenant_account_state', type: :enum, values: %w(missing unknown active inactive)
194
+ column 'tenant_premium_state', type: :enum, values: %w(missing unkown goodwill pilot sandbox premium internal free vmware)
195
+ column 'preferences', type: :key_value, null: true
196
+ end
197
+ end
198
+ end
199
+
200
+ let(:target) { catalog.hive.tenant_dimension }
201
+
202
+ let(:options) do
203
+ {
204
+ columns: [
205
+ 'tenant_id',
206
+ 'tenant_account_state',
207
+ 'tenant_premium_state',
208
+ 'preferences',
209
+ 'y',
210
+ 'm'
211
+ ],
212
+ order: [
213
+ 'tenant_id',
214
+ 'start_at'
215
+ ]
216
+ }
217
+ end
218
+
219
+ it 'should eq render denormalize_table template' do
220
+ is_expected.to eq <<-EOS.strip_heredoc
221
+ SELECT
222
+ tenant_ledger.tenant_id,
223
+ tenant_ledger.tenant_account_state,
224
+ tenant_ledger.tenant_premium_state,
225
+ tenant_ledger.preferences,
226
+ tenant_ledger.y,
227
+ tenant_ledger.m
228
+ FROM
229
+ tenant_ledger
230
+ ORDER BY
231
+ tenant_id,
232
+ start_at
233
+ ;
234
+ EOS
235
+ end
236
+ end
237
+
238
+ context 'with hive table with implicit references' do
239
+ before do
240
+ catalog.schema :hive do
241
+ dimension 'date', type: :date, implicit: true do
242
+ column 'date_id', type: :integer, natural_key: true
243
+ end
244
+
245
+ fact 'visits' do
246
+ references :date
247
+ references :user, degenerate: true
248
+ measure 'total'
249
+ end
250
+ end
251
+ end
252
+
253
+ let(:target) { catalog.hive.visits_fact }
254
+
255
+ let(:options) do
256
+ {
257
+ columns: [
258
+ 'date.date_id',
259
+ 'user.id',
260
+ 'total'
261
+ ]
262
+ }
263
+ end
264
+
265
+ it 'should eq render denormalize_table template' do
266
+ is_expected.to eq <<-EOS.strip_heredoc
267
+ SELECT
268
+ date_dimension_date_id AS date_dimension_date_id,
269
+ user_type_id AS user_type_id,
270
+ visits_fact.total
271
+ FROM
272
+ visits_fact
273
+ ORDER BY
274
+ date_dimension_date_id,
275
+ user_type_id,
276
+ total
277
+ ;
278
+ EOS
279
+ end
280
+ end
125
281
  end
@@ -72,7 +72,7 @@ describe Masamune::Transform::SnapshotDimension do
72
72
  coalesce_merge(user_account_state_type_id) OVER w AS user_account_state_type_id,
73
73
  tenant_id AS tenant_id,
74
74
  user_id AS user_id,
75
- hstore_merge(preferences_now) OVER w - hstore_merge(preferences_was) OVER w AS preferences,
75
+ hstore_merge(preferences) OVER w AS preferences,
76
76
  start_at AS start_at
77
77
  FROM
78
78
  windows
@@ -63,7 +63,7 @@ describe Masamune::Transform::StageDimension do
63
63
  column 'department.department_id', type: :integer
64
64
  column 'user_account_state.name', type: :string
65
65
  column 'hr_user_account_state.name', type: :string
66
- column 'preferences_now', type: :json
66
+ column 'preferences', type: :json
67
67
  column 'start_at', type: :timestamp
68
68
  column 'source_kind', type: :string
69
69
  column 'delta', type: :integer
@@ -82,14 +82,14 @@ describe Masamune::Transform::StageDimension do
82
82
  CREATE TEMPORARY TABLE IF NOT EXISTS user_dimension_ledger_stage (LIKE user_dimension_ledger INCLUDING ALL);
83
83
 
84
84
  INSERT INTO
85
- user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences_now, source_kind, start_at, delta)
85
+ user_dimension_ledger_stage (department_type_id, user_account_state_type_id, hr_user_account_state_type_id, tenant_id, user_id, preferences, source_kind, start_at, delta)
86
86
  SELECT
87
87
  department_type.id,
88
88
  user_account_state_type.id,
89
89
  hr_user_account_state_type.id,
90
90
  user_file_dimension_ledger_stage.tenant_id,
91
91
  user_file_dimension_ledger_stage.user_id,
92
- json_to_hstore(user_file_dimension_ledger_stage.preferences_now),
92
+ json_to_hstore(user_file_dimension_ledger_stage.preferences),
93
93
  user_file_dimension_ledger_stage.source_kind,
94
94
  user_file_dimension_ledger_stage.start_at,
95
95
  user_file_dimension_ledger_stage.delta