masamune 0.11.9 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -33,7 +33,7 @@ ON
33
33
  <%= condition %>
34
34
  <%- end -%>
35
35
  ORDER BY
36
- <%- target.order_by_columns(columns).each do |column, last| -%>
36
+ <%- target.order_by_columns(order_by).each do |column, last| -%>
37
37
  <%= column %><%= ',' unless last %>
38
38
  <%- end -%>
39
39
  ;
@@ -24,20 +24,29 @@ module Masamune::Transform
24
24
  module DenormalizeTable
25
25
  extend ActiveSupport::Concern
26
26
 
27
- def denormalize_table(target, columns = [])
28
- Operator.new(__method__, target: target, columns: columns, presenters: { postgres: Postgres })
27
+ def denormalize_table(target, options = {})
28
+ options.symbolize_keys!
29
+ columns = options[:include] || []
30
+ columns += options[:columns] || target.denormalized_column_names
31
+ columns -= options[:except] || []
32
+ order_by = options[:order] || columns
33
+ Operator.new(__method__, target: target, columns: columns, order_by: order_by, presenters: { postgres: Common, hive: Common })
29
34
  end
30
35
 
31
36
  private
32
37
 
33
- class Postgres < SimpleDelegator
38
+ class Common < SimpleDelegator
34
39
  include Masamune::LastElement
35
40
 
36
41
  def select_columns(column_names)
37
42
  column_names.map do |column_name|
38
43
  next unless column = dereference_column_name(column_name)
39
44
  if column.reference
40
- "#{column.foreign_key_name} AS #{column.name}"
45
+ if column.reference.implicit || column.reference.degenerate
46
+ "#{column.name} AS #{column.name}"
47
+ else
48
+ "#{column.foreign_key_name} AS #{column.name}"
49
+ end
41
50
  else
42
51
  column.qualified_name
43
52
  end
@@ -56,7 +56,7 @@ module Masamune::Transform
56
56
  if column.natural_key
57
57
  "#{column.name} AS #{column.name}"
58
58
  elsif column.type == :key_value
59
- "hstore_merge(#{column.name}_now) OVER #{window} - hstore_merge(#{column.name}_was) OVER #{window} AS #{column.name}"
59
+ "hstore_merge(#{column.name}) OVER #{window} AS #{column.name}"
60
60
  else
61
61
  "coalesce_merge(#{column.name}) OVER #{window} AS #{column.name}"
62
62
  end
@@ -77,7 +77,7 @@ module Masamune::Transform
77
77
 
78
78
  if column.reference && !column.reference.default.nil?
79
79
  coalesce_values << column.reference.default(column.adjacent) if column.adjacent.natural_key
80
- elsif !column.adjacent.default.nil?
80
+ elsif column.adjacent && !column.adjacent.default.nil?
81
81
  coalesce_values << column.adjacent.sql_value(column.adjacent.default)
82
82
  end
83
83
 
@@ -21,5 +21,5 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  module Masamune
24
- VERSION = '0.11.9'
24
+ VERSION = '0.12.0'
25
25
  end
@@ -46,16 +46,6 @@ describe Masamune::Actions::Transform do
46
46
  column 'updated_at', type: :timestamp
47
47
  end
48
48
 
49
- map from: postgres.user_file, to: postgres.user_dimension do |row|
50
- {
51
- user_id: row[:id],
52
- tenant_id: row[:tenant_id],
53
- source_kind: 'users',
54
- start_at: row[:updated_at],
55
- delta: 1
56
- }
57
- end
58
-
59
49
  fact 'visits', partition: 'y%Ym%m', grain: %w(hourly daily monthly) do
60
50
  references :date
61
51
  references :user
@@ -89,13 +79,37 @@ describe Masamune::Actions::Transform do
89
79
  end
90
80
 
91
81
  describe '.load_dimension' do
92
- before do
93
- mock_command(/\Apsql/, mock_success)
82
+ subject { instance.load_dimension(source_file, postgres.user_file, postgres.user_dimension) }
83
+
84
+ context 'without :map' do
85
+ before do
86
+ expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
87
+ mock_command(/\Apsql/, mock_success)
88
+ end
89
+
90
+ it { is_expected.to be_success }
94
91
  end
95
92
 
96
- subject { instance.load_dimension(source_file, postgres.user_file, postgres.user_dimension) }
93
+ context 'with :map' do
94
+ before do
95
+ catalog.schema :postgres do
96
+ map from: postgres.user_file, to: postgres.user_dimension do |row|
97
+ {
98
+ user_id: row[:id],
99
+ tenant_id: row[:tenant_id],
100
+ source_kind: 'users',
101
+ start_at: row[:updated_at],
102
+ delta: 1
103
+ }
104
+ end
105
+ end
106
+
107
+ expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
108
+ mock_command(/\Apsql/, mock_success)
109
+ end
97
110
 
98
- it { is_expected.to be_success }
111
+ it { is_expected.to be_success }
112
+ end
99
113
  end
100
114
 
101
115
  describe '.relabel_dimension' do
@@ -121,13 +135,31 @@ describe Masamune::Actions::Transform do
121
135
  describe '.load_fact' do
122
136
  let(:date) { DateTime.civil(2014, 8) }
123
137
 
124
- before do
125
- mock_command(/\Apsql/, mock_success)
138
+ context 'without :map' do
139
+ before do
140
+ expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
141
+ mock_command(/\Apsql/, mock_success)
142
+ end
143
+
144
+ subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
145
+
146
+ it { is_expected.to be_success }
126
147
  end
127
148
 
128
- subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
149
+ context 'with :map' do
150
+ before do
151
+ catalog.schema :postgres do
152
+ map from: postgres.visits_hourly_file, to: postgres.visits_hourly_fact, distinct: true
153
+ end
129
154
 
130
- it { is_expected.to be_success }
155
+ expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
156
+ mock_command(/\Apsql/, mock_success)
157
+ end
158
+
159
+ subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
160
+
161
+ it { is_expected.to be_success }
162
+ end
131
163
  end
132
164
 
133
165
  describe '.rollup_fact' do
@@ -332,30 +332,6 @@ describe Masamune::Schema::Catalog do
332
332
  it { expect(visits_monthly.measures).to include :count }
333
333
  end
334
334
 
335
- context 'when schema contains events' do
336
- before do
337
- instance.schema :hive do
338
- event 'event_one' do
339
- attribute 'attribute_one'
340
- attribute 'attribute_two'
341
- end
342
-
343
- event 'event_two' do
344
- attribute 'attribute_three'
345
- attribute 'attribute_four'
346
- end
347
- end
348
- end
349
-
350
- let(:event_one) { hive.event_one_event }
351
- let(:event_two) { hive.event_two_event }
352
-
353
- it { expect(event_one.attributes).to include :attribute_one }
354
- it { expect(event_one.attributes).to include :attribute_two }
355
- it { expect(event_two.attributes).to include :attribute_three }
356
- it { expect(event_two.attributes).to include :attribute_four }
357
- end
358
-
359
335
  context 'when schema contains file' do
360
336
  before do
361
337
  instance.schema :postgres do
@@ -456,35 +432,6 @@ describe Masamune::Schema::Catalog do
456
432
  end
457
433
  end
458
434
 
459
- context 'when schema contains map from: event' do
460
- before do
461
- instance.schema :postgres do
462
- dimension 'user', type: :mini do
463
- column 'user_id', type: :integer, natural_key: true
464
- column 'name', type: :string
465
- end
466
-
467
- event 'users' do
468
- attribute 'id', type: :integer, immutable: true
469
- attribute 'name', type: :string
470
- end
471
-
472
- map from: postgres.users_event, to: postgres.user_dimension do |row|
473
- {
474
- 'user_id' => row[:id],
475
- 'name' => row[:name_now]
476
- }
477
- end
478
- end
479
- end
480
-
481
- subject(:map) { postgres.users_event.map(to: postgres.user_dimension) }
482
-
483
- it 'constructs map' do
484
- expect(map.function).to_not be_nil
485
- end
486
- end
487
-
488
435
  context 'when schema contains map missing the from: field' do
489
436
  subject(:schema) do
490
437
  instance.schema :postgres do
@@ -433,31 +433,6 @@ describe Masamune::Schema::Column do
433
433
  it { is_expected.to eq({"k" => "v"}) }
434
434
  end
435
435
  end
436
-
437
- context 'with type :yaml and sub_type :boolean' do
438
- let(:column) { described_class.new(id: 'yaml', type: :yaml, sub_type: :boolean) }
439
- let(:value) do
440
- {
441
- 'true' => true,
442
- 'one' => '1',
443
- 'zero' => '0',
444
- 'false' => false,
445
- 'string' => 'string',
446
- 'one_integer' => 1,
447
- 'zero_integer' => 0
448
- }.to_yaml
449
- end
450
-
451
- it 'should cast yaml to ruby' do
452
- expect(result['true']).to eq(true)
453
- expect(result['false']).to eq(false)
454
- expect(result['one']).to eq(true)
455
- expect(result['zero']).to eq(false)
456
- expect(result['one_integer']).to eq(true)
457
- expect(result['zero_integer']).to eq(false)
458
- expect(result.key?('string')).to eq(false)
459
- end
460
- end
461
436
  end
462
437
 
463
438
  describe '#default_ruby_value' do
@@ -665,8 +640,13 @@ describe Masamune::Schema::Column do
665
640
  it { is_expected.to eq(false) }
666
641
  end
667
642
 
668
- context 'when surrogate_key' do
669
- let(:column) { described_class.new id: 'name', type: :string, surrogate_key: true }
643
+ context 'when column has default of false' do
644
+ let(:column) { described_class.new id: 'flag', type: :boolean, default: false }
645
+ it { is_expected.to eq(false) }
646
+ end
647
+
648
+ context 'when column has reference' do
649
+ let(:column) { described_class.new id: 'name', type: :string }
670
650
  it { is_expected.to eq(true) }
671
651
 
672
652
  context 'when reference allow null' do
@@ -682,22 +662,10 @@ describe Masamune::Schema::Column do
682
662
  end
683
663
  it { is_expected.to eq(false) }
684
664
  end
685
- end
686
-
687
- context 'when natural_key' do
688
- let(:column) { described_class.new id: 'name', type: :string, natural_key: true }
689
- it { is_expected.to eq(true) }
690
665
 
691
- context 'when reference allow null' do
666
+ context 'when reference has default of false' do
692
667
  before do
693
- allow(column).to receive(:reference).and_return(double(null: true, default: nil))
694
- end
695
- it { is_expected.to eq(false) }
696
- end
697
-
698
- context 'when reference has default' do
699
- before do
700
- allow(column).to receive(:reference).and_return(double(null: false, default: 'Unknown'))
668
+ allow(column).to receive(:reference).and_return(double(null: false, default: false))
701
669
  end
702
670
  it { is_expected.to eq(false) }
703
671
  end
@@ -47,7 +47,9 @@ describe Masamune::Schema::Fact do
47
47
  Masamune::Schema::TableReference.new(user_dimension)
48
48
  ],
49
49
  columns: [
50
- Masamune::Schema::Column.new(id: 'total', type: :integer)
50
+ Masamune::Schema::Column.new(id: 'total', type: :integer),
51
+ Masamune::Schema::Column.new(id: 'y', type: :integer, partition: true),
52
+ Masamune::Schema::Column.new(id: 'm', type: :integer, partition: true)
51
53
  ]
52
54
  end
53
55
 
@@ -52,32 +52,6 @@ describe Masamune::Schema::Map do
52
52
  column 'deleted_at', type: :timestamp, null: true
53
53
  end
54
54
  end
55
-
56
- catalog.schema :hive do
57
- event 'user' do
58
- attribute 'id', type: :integer, immutable: true
59
- attribute 'tenant_id', type: :integer, immutable: true
60
- attribute 'admin', type: :boolean
61
- attribute 'preferences', type: :json
62
- end
63
-
64
- dimension 'tenant', type: :two, implicit: true do
65
- column 'tenant_id'
66
- end
67
-
68
- fact 'user' do
69
- references :tenant
70
- measure 'delta'
71
- end
72
-
73
- file 'user' do
74
- column 'id', type: :integer
75
- column 'tenant_id', type: :integer
76
- column 'admin', type: :boolean
77
- column 'preferences', type: :json
78
- column 'deleted_at', type: :timestamp, null: true
79
- end
80
- end
81
55
  end
82
56
 
83
57
  context 'without source' do
@@ -136,22 +110,26 @@ describe Masamune::Schema::Map do
136
110
  end
137
111
 
138
112
  context 'with undefined function' do
139
- let(:source) { catalog.hive.user_event }
140
- let(:target) { catalog.hive.user_fact }
141
- let(:source_data) { '' }
142
- let(:target_data) { '' }
143
-
144
113
  before do
145
- catalog.schema :hive do
146
- map from: hive.user_event, to: hive.user_fact do |row|
114
+ catalog.schema :files do
115
+ file 'input'
116
+ file 'output'
117
+
118
+ map from: files.input , to: files.output do |row|
119
+ # Empty
147
120
  end
148
121
  end
149
122
  end
150
123
 
124
+ let(:source) { catalog.files.input }
125
+ let(:target) { catalog.files.output }
126
+ let(:source_data) { '' }
127
+ let(:target_data) { '' }
128
+
151
129
  it { expect { subject }.to raise_error ArgumentError, /function for map between .* does not return output for default input/ }
152
130
  end
153
131
 
154
- context 'from csv file to dimension' do
132
+ context 'from csv file to postgres dimension' do
155
133
  before do
156
134
  catalog.schema :files do
157
135
  map from: postgres.user_file, to: postgres.user_dimension, distinct: true do |row|
@@ -161,7 +139,7 @@ describe Masamune::Schema::Map do
161
139
  'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
162
140
  'hr_user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
163
141
  'admin' => row[:admin],
164
- 'preferences_now' => row[:preferences],
142
+ 'preferences' => row[:preferences],
165
143
  'source' => 'users_file',
166
144
  'cluster_id' => 100
167
145
  }
@@ -193,7 +171,7 @@ describe Masamune::Schema::Map do
193
171
 
194
172
  let(:target_data) do
195
173
  <<-EOS.strip_heredoc
196
- tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences_now,source,cluster_id
174
+ tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences,source,cluster_id
197
175
  30,1,active,active,FALSE,{},users_file,100
198
176
  40,2,deleted,deleted,TRUE,"{""enabled"":true}",users_file,100
199
177
  EOS
@@ -210,19 +188,26 @@ describe Masamune::Schema::Map do
210
188
  it_behaves_like 'apply input/output'
211
189
  end
212
190
 
213
- context 'from event to postgres dimension with quoted json' do
191
+ context 'from tsv file to postgres dimension' do
214
192
  before do
215
193
  catalog.schema :files do
216
- map from: hive.user_event, to: postgres.user_dimension do |row|
194
+ file 'input', format: :tsv, headers: false do
195
+ column 'id', type: :integer
196
+ column 'tenant_id', type: :integer
197
+ column 'admin', type: :boolean
198
+ column 'preferences', type: :json
199
+ column 'deleted_at', type: :timestamp, null: true
200
+ end
201
+
202
+ map from: files.input, to: postgres.user_dimension do |row|
217
203
  raise if row[:tenant_id] == 42
218
204
  {
219
205
  'tenant_id' => row[:tenant_id],
220
206
  'user_id' => row[:id],
221
- 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
222
- 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
223
- 'preferences_now' => row[:preferences_now],
224
- 'preferences_was' => row[:preferences_was],
225
- 'source' => 'user_event',
207
+ 'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
208
+ 'admin' => row[:admin],
209
+ 'preferences' => row[:preferences],
210
+ 'source' => 'user_file',
226
211
  'cluster_id' => 100
227
212
  }
228
213
  end
@@ -230,7 +215,7 @@ describe Masamune::Schema::Map do
230
215
  end
231
216
 
232
217
  let(:source) do
233
- catalog.hive.user_event
218
+ catalog.files.input
234
219
  end
235
220
 
236
221
  let(:target) do
@@ -242,72 +227,113 @@ describe Masamune::Schema::Map do
242
227
  expect(environment.logger).to receive(:warn).with(/failed to parse '{.*}' for #{source.name}/).ordered
243
228
  end
244
229
 
245
- let(:source_data) do
246
- <<-EOS.strip_heredoc
247
- X user_create 1 30 0 \\N \\N \\N
248
- # NOTE intentional duplicate record
249
- X user_create 1 30 0 \\N \\N \\N
250
- A user_create 1 42 0 \\N \\N \\N
251
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N
252
- # NOTE record is intentionally invalid
253
- Z user_create 3 50 0 1 INVALID_JSON \\N
254
- EOS
255
- end
256
-
257
230
  let(:target_data) do
258
231
  <<-EOS.strip_heredoc
259
- tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
260
- 30,1,active,FALSE,{},{},user_event,100
261
- 30,1,active,FALSE,{},{},user_event,100
262
- 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
232
+ tenant_id,user_id,user_account_state_type_name,admin,preferences,source,cluster_id
233
+ 30,1,active,FALSE,{},user_file,100
234
+ 30,1,active,FALSE,{},user_file,100
235
+ 40,2,deleted,TRUE,"{""enabled"":true}",user_file,100
263
236
  EOS
264
237
  end
265
238
 
266
- it 'should match target data' do
267
- is_expected.to eq(target_data)
239
+ context 'with quoted json' do
240
+ let(:source_data) do
241
+ <<-EOS.strip_heredoc
242
+ 1 30 0
243
+ # NOTE intentional duplicate record
244
+ 1 30 0
245
+ 1 42 0
246
+ 2 40 1 "{""enabled"":true}" 2015-07-19 00:00:00
247
+ # NOTE record is intentionally invalid
248
+ 3 50 0 INVALID_JSON
249
+ EOS
250
+ end
251
+
252
+ it 'should match target data' do
253
+ is_expected.to eq(target_data)
254
+ end
255
+
256
+ it_behaves_like 'apply input/output'
268
257
  end
269
258
 
270
- it_behaves_like 'apply input/output'
259
+ context 'with raw json' do
260
+ let(:source_data) do
261
+ <<-EOS.strip_heredoc
262
+ 1 30 0
263
+ # NOTE intentional duplicate record
264
+ 1 30 0
265
+ 1 42 0
266
+ 2 40 1 {"enabled":true} 2015-07-19 00:00:00
267
+ # NOTE record is intentionally invalid
268
+ 3 50 0 INVALID_JSON
269
+ EOS
270
+ end
271
+
272
+ it 'should match target data' do
273
+ is_expected.to eq(target_data)
274
+ end
275
+
276
+ it_behaves_like 'apply input/output'
277
+ end
271
278
  end
272
279
 
273
- context 'from event to tsv file' do
280
+ context 'from tsv file to csv file' do
274
281
  before do
275
282
  catalog.schema :files do
276
- map from: hive.user_event, to: hive.user_file do |row|
283
+ file 'input', format: :tsv, headers: false do
284
+ column 'id', type: :integer
285
+ column 'tenant_id', type: :integer
286
+ column 'admin', type: :boolean
287
+ column 'preferences', type: :json
288
+ column 'deleted_at', type: :timestamp, null: true
289
+ end
290
+
291
+ file 'output', format: :csv, headers: true do
292
+ column 'id', type: :integer
293
+ column 'tenant_id', type: :integer
294
+ column 'admin', type: :boolean
295
+ column 'preferences', type: :yaml
296
+ column 'deleted_at', type: :timestamp, null: true
297
+ end
298
+
299
+ map from: files.input, to: files.output do |row|
277
300
  {
278
301
  'id' => row[:id],
279
302
  'tenant_id' => row[:tenant_id],
280
- 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
281
- 'admin' => row[:admin_now],
282
- 'preferences' => row[:preferences_now]
303
+ 'deleted_at' => row[:deleted_at],
304
+ 'admin' => row[:admin],
305
+ 'preferences' => row[:preferences]
283
306
  }
284
307
  end
285
308
  end
286
309
  end
287
310
 
288
311
  let(:source) do
289
- catalog.hive.user_event
312
+ catalog.files.input
290
313
  end
291
314
 
292
315
  let(:target) do
293
- catalog.hive.user_file
316
+ catalog.files.output
294
317
  end
295
318
 
296
319
  let(:source_data) do
297
320
  <<-EOS.strip_heredoc
298
- X user_create 1 30 0 \\N \\N \\N 0 \\N
299
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
321
+ 1 30 0
322
+ 2 40 0 "{""enabled"":true}" 2014-02-26T18:15:51.000Z
300
323
  EOS
301
324
  end
302
325
 
303
326
  let(:target_data) do
304
327
  <<-EOS.strip_heredoc
305
- 1 30 {}
306
- 2 40 2014-02-26T18:15:51.000Z "{""enabled"":true}"
328
+ id,tenant_id,deleted_at,admin,preferences
329
+ 1,30,,FALSE,"--- {}
330
+ "
331
+ 2,40,2014-02-26T18:15:51.000Z,FALSE,"---
332
+ enabled: true
333
+ "
307
334
  EOS
308
335
  end
309
336
 
310
-
311
337
  it 'should match target data' do
312
338
  is_expected.to eq(target_data)
313
339
  end
@@ -315,37 +341,46 @@ describe Masamune::Schema::Map do
315
341
  it_behaves_like 'apply input/output'
316
342
  end
317
343
 
318
- context 'from event to csv file' do
344
+ context 'from csv file to tsv file' do
319
345
  before do
320
346
  catalog.schema :files do
321
- map from: hive.user_event, to: postgres.user_file do |row|
347
+ file 'input', format: :csv, headers: true, json_encoding: :quoted do
348
+ column 'id', type: :integer
349
+ column 'tenant_id', type: :integer
350
+ column 'admin', type: :boolean
351
+ column 'preferences', type: :yaml
352
+ column 'deleted_at', type: :timestamp, null: true
353
+ end
354
+
355
+ file 'output', format: :tsv, headers: false do
356
+ column 'id', type: :integer
357
+ column 'tenant_id', type: :integer
358
+ column 'admin', type: :boolean
359
+ column 'preferences', type: :json
360
+ column 'deleted_at', type: :timestamp, null: true
361
+ end
362
+
363
+ map from: files.input, to: files.output do |row|
322
364
  {
323
365
  'id' => row[:id],
324
366
  'tenant_id' => row[:tenant_id],
325
- 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
326
- 'admin' => row[:admin_now],
327
- 'preferences' => row[:preferences_now]
367
+ 'deleted_at' => row[:deleted_at],
368
+ 'admin' => row[:admin],
369
+ 'preferences' => row[:preferences]
328
370
  }
329
371
  end
330
372
  end
331
373
  end
332
374
 
333
375
  let(:source) do
334
- catalog.hive.user_event
376
+ catalog.files.input
335
377
  end
336
378
 
337
379
  let(:target) do
338
- catalog.postgres.user_file
380
+ catalog.files.output
339
381
  end
340
382
 
341
383
  let(:source_data) do
342
- <<-EOS.strip_heredoc
343
- X user_create 1 30 0 \\N \\N \\N 0 \\N
344
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
345
- EOS
346
- end
347
-
348
- let(:target_data) do
349
384
  <<-EOS.strip_heredoc
350
385
  id,tenant_id,deleted_at,admin,preferences
351
386
  1,30,,FALSE,"--- {}
@@ -356,6 +391,13 @@ describe Masamune::Schema::Map do
356
391
  EOS
357
392
  end
358
393
 
394
+ let(:target_data) do
395
+ <<-EOS.strip_heredoc
396
+ 1 30 FALSE {}
397
+ 2 40 2014-02-26T18:15:51.000Z FALSE "{""enabled"":true}"
398
+ EOS
399
+ end
400
+
359
401
  it 'should match target data' do
360
402
  is_expected.to eq(target_data)
361
403
  end
@@ -363,56 +405,44 @@ describe Masamune::Schema::Map do
363
405
  it_behaves_like 'apply input/output'
364
406
  end
365
407
 
366
- context 'from event to fact' do
408
+ context 'with multiple outputs' do
367
409
  before do
368
410
  catalog.schema :files do
369
- map from: hive.user_event, to: hive.user_fact do |row|
370
- if row[:type] =~ /update/
371
- [
372
- {
373
- 'tenant.tenant_id' => row[:tenant_id],
374
- 'delta' => 0,
375
- 'time_key' => row[:created_at]
376
- },
377
- {
378
- 'tenant.tenant_id' => row[:tenant_id],
379
- 'delta' => 0,
380
- 'time_key' => row[:created_at]
381
- }
382
- ]
383
- else
384
- {
385
- 'tenant.tenant_id' => row[:tenant_id],
386
- 'delta' => row[:type] =~ /create/ ? 1 : -1,
387
- 'time_key' => row[:created_at]
388
- }
389
- end
411
+ file 'input' do
412
+ column 'id', type: :integer
413
+ end
414
+
415
+ file 'output' do
416
+ column 'id', type: :integer
417
+ end
418
+
419
+ map from: files.input, to: files.output do |row|
420
+ [row, row]
390
421
  end
391
422
  end
392
423
  end
393
424
 
394
425
  let(:source) do
395
- catalog.hive.user_event
426
+ catalog.files.input
396
427
  end
397
428
 
398
429
  let(:target) do
399
- catalog.hive.user_fact
430
+ catalog.files.output
400
431
  end
401
432
 
402
433
  let(:source_data) do
403
434
  <<-EOS.strip_heredoc
404
- X user_create 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:10:00Z
405
- Y user_update 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:20:00Z
406
- Z user_delete 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:30:00Z
435
+ 1
436
+ 2
407
437
  EOS
408
438
  end
409
439
 
410
440
  let(:target_data) do
411
441
  <<-EOS.strip_heredoc
412
- 10 1 1420071000
413
- 10 0 1420071600
414
- 10 0 1420071600
415
- 10 -1 1420072200
442
+ 1
443
+ 1
444
+ 2
445
+ 2
416
446
  EOS
417
447
  end
418
448
 
@@ -423,75 +453,40 @@ describe Masamune::Schema::Map do
423
453
  it_behaves_like 'apply input/output'
424
454
  end
425
455
 
426
- context 'from event with array attribute to fact' do
456
+ context 'without block' do
427
457
  before do
428
- catalog.clear!
429
- catalog.schema :hive do
430
- event 'user' do
431
- attribute 'id', type: :integer, immutable: true
432
- attribute 'group_id', type: :integer, array: true
433
- end
434
-
435
- dimension 'group', type: :two, implicit: true do
436
- column 'group_id'
458
+ catalog.schema :files do
459
+ file 'input' do
460
+ column 'id', type: :integer
437
461
  end
438
462
 
439
- fact 'user' do
440
- references :group
441
- column 'junk'
442
- measure 'total'
463
+ file 'output' do
464
+ column 'id', type: :integer
443
465
  end
444
466
 
445
- map from: hive.user_event, to: hive.user_fact, columns: %w(group.group_id total time_key) do |row|
446
- result = []
447
- (row[:group_id_now] - row[:group_id_was]).each do |group_id|
448
- result <<
449
- {
450
- 'group.group_id' => group_id,
451
- 'total' => 1,
452
- 'time_key' => row[:created_at]
453
- }
454
- end
455
- (row[:group_id_was] - row[:group_id_now]).each do |group_id|
456
- result <<
457
- {
458
- 'group.group_id' => group_id,
459
- 'total' => -1,
460
- 'time_key' => row[:created_at]
461
- }
462
- end
463
- result
464
- end
467
+ map from: files.input, to: files.output
465
468
  end
466
469
  end
467
470
 
468
471
  let(:source) do
469
- catalog.hive.user_event
472
+ catalog.files.input
470
473
  end
471
474
 
472
475
  let(:target) do
473
- catalog.hive.user_fact
476
+ catalog.files.output
474
477
  end
475
478
 
476
479
  let(:source_data) do
477
480
  <<-EOS.strip_heredoc
478
- # new lines and comments should be skipped
479
-
480
- X user_create 3 [1,2] [] 0 2015-01-01T00:10:00Z
481
- Y user_update 3 [1,2,3] [1,2] 1 2015-01-01T00:20:00Z
482
- Y user_update 3 [1,2] [1,2,3] 1 2015-01-01T00:30:00Z
483
- Z user_delete 3 [] [1,2] 0 2015-01-01T00:40:00Z
481
+ 1
482
+ 2
484
483
  EOS
485
484
  end
486
485
 
487
486
  let(:target_data) do
488
487
  <<-EOS.strip_heredoc
489
- 1 1 1420071000
490
- 2 1 1420071000
491
- 3 1 1420071600
492
- 3 -1 1420072200
493
- 1 -1 1420072800
494
- 2 -1 1420072800
488
+ 1
489
+ 2
495
490
  EOS
496
491
  end
497
492
 
@@ -502,44 +497,47 @@ describe Masamune::Schema::Map do
502
497
  it_behaves_like 'apply input/output'
503
498
  end
504
499
 
505
- context 'from event to postgres dimension with raw json' do
500
+ context 'from file to table' do
506
501
  before do
507
- catalog.schema :files do
508
- map from: hive.user_event, to: postgres.user_dimension do |row|
509
- {
510
- 'tenant_id' => row[:tenant_id],
511
- 'user_id' => row[:id],
512
- 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
513
- 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
514
- 'preferences_now' => row[:preferences_now],
515
- 'preferences_was' => row[:preferences_was],
516
- 'source' => 'user_event',
517
- 'cluster_id' => 100
518
- }
502
+ catalog.schema :postgres do
503
+ table 'parent' do
504
+ column 'id', type: :integer
519
505
  end
506
+
507
+ file 'input', format: :csv, headers: false do
508
+ column 'parent.id', type: :integer
509
+ column 'id', type: :integer
510
+ end
511
+
512
+ table 'output' do
513
+ references :parent
514
+ column 'id', type: :integer
515
+ end
516
+
517
+ map from: postgres.input_file, to: postgres.output_table
520
518
  end
521
519
  end
522
520
 
523
521
  let(:source) do
524
- catalog.hive.user_event
522
+ catalog.postgres.input_file
525
523
  end
526
524
 
527
525
  let(:target) do
528
- catalog.postgres.user_dimension
526
+ catalog.postgres.output_table
529
527
  end
530
528
 
531
529
  let(:source_data) do
532
530
  <<-EOS.strip_heredoc
533
- X user_create 1 30 0 \\N \\N \\N
534
- Y user_delete 2 40 0 1 {"enabled":true} \\N
531
+ 10,1
532
+ 10,2
535
533
  EOS
536
534
  end
537
535
 
538
536
  let(:target_data) do
539
537
  <<-EOS.strip_heredoc
540
- tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
541
- 30,1,active,FALSE,{},{},user_event,100
542
- 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
538
+ parent_table_id,id
539
+ 10,1
540
+ 10,2
543
541
  EOS
544
542
  end
545
543
 
@@ -566,7 +564,7 @@ describe Masamune::Schema::Map do
566
564
  it { is_expected.to eq(%Q{"{}","{}"}) }
567
565
  end
568
566
 
569
- context 'with raw quoted json' do
567
+ context 'with quoted empty json' do
570
568
  before do
571
569
  io.write '"{}","{}"'
572
570
  io.rewind