masamune 0.11.9 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -33,7 +33,7 @@ ON
33
33
  <%= condition %>
34
34
  <%- end -%>
35
35
  ORDER BY
36
- <%- target.order_by_columns(columns).each do |column, last| -%>
36
+ <%- target.order_by_columns(order_by).each do |column, last| -%>
37
37
  <%= column %><%= ',' unless last %>
38
38
  <%- end -%>
39
39
  ;
@@ -24,20 +24,29 @@ module Masamune::Transform
24
24
  module DenormalizeTable
25
25
  extend ActiveSupport::Concern
26
26
 
27
- def denormalize_table(target, columns = [])
28
- Operator.new(__method__, target: target, columns: columns, presenters: { postgres: Postgres })
27
+ def denormalize_table(target, options = {})
28
+ options.symbolize_keys!
29
+ columns = options[:include] || []
30
+ columns += options[:columns] || target.denormalized_column_names
31
+ columns -= options[:except] || []
32
+ order_by = options[:order] || columns
33
+ Operator.new(__method__, target: target, columns: columns, order_by: order_by, presenters: { postgres: Common, hive: Common })
29
34
  end
30
35
 
31
36
  private
32
37
 
33
- class Postgres < SimpleDelegator
38
+ class Common < SimpleDelegator
34
39
  include Masamune::LastElement
35
40
 
36
41
  def select_columns(column_names)
37
42
  column_names.map do |column_name|
38
43
  next unless column = dereference_column_name(column_name)
39
44
  if column.reference
40
- "#{column.foreign_key_name} AS #{column.name}"
45
+ if column.reference.implicit || column.reference.degenerate
46
+ "#{column.name} AS #{column.name}"
47
+ else
48
+ "#{column.foreign_key_name} AS #{column.name}"
49
+ end
41
50
  else
42
51
  column.qualified_name
43
52
  end
@@ -56,7 +56,7 @@ module Masamune::Transform
56
56
  if column.natural_key
57
57
  "#{column.name} AS #{column.name}"
58
58
  elsif column.type == :key_value
59
- "hstore_merge(#{column.name}_now) OVER #{window} - hstore_merge(#{column.name}_was) OVER #{window} AS #{column.name}"
59
+ "hstore_merge(#{column.name}) OVER #{window} AS #{column.name}"
60
60
  else
61
61
  "coalesce_merge(#{column.name}) OVER #{window} AS #{column.name}"
62
62
  end
@@ -77,7 +77,7 @@ module Masamune::Transform
77
77
 
78
78
  if column.reference && !column.reference.default.nil?
79
79
  coalesce_values << column.reference.default(column.adjacent) if column.adjacent.natural_key
80
- elsif !column.adjacent.default.nil?
80
+ elsif column.adjacent && !column.adjacent.default.nil?
81
81
  coalesce_values << column.adjacent.sql_value(column.adjacent.default)
82
82
  end
83
83
 
@@ -21,5 +21,5 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  module Masamune
24
- VERSION = '0.11.9'
24
+ VERSION = '0.12.0'
25
25
  end
@@ -46,16 +46,6 @@ describe Masamune::Actions::Transform do
46
46
  column 'updated_at', type: :timestamp
47
47
  end
48
48
 
49
- map from: postgres.user_file, to: postgres.user_dimension do |row|
50
- {
51
- user_id: row[:id],
52
- tenant_id: row[:tenant_id],
53
- source_kind: 'users',
54
- start_at: row[:updated_at],
55
- delta: 1
56
- }
57
- end
58
-
59
49
  fact 'visits', partition: 'y%Ym%m', grain: %w(hourly daily monthly) do
60
50
  references :date
61
51
  references :user
@@ -89,13 +79,37 @@ describe Masamune::Actions::Transform do
89
79
  end
90
80
 
91
81
  describe '.load_dimension' do
92
- before do
93
- mock_command(/\Apsql/, mock_success)
82
+ subject { instance.load_dimension(source_file, postgres.user_file, postgres.user_dimension) }
83
+
84
+ context 'without :map' do
85
+ before do
86
+ expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
87
+ mock_command(/\Apsql/, mock_success)
88
+ end
89
+
90
+ it { is_expected.to be_success }
94
91
  end
95
92
 
96
- subject { instance.load_dimension(source_file, postgres.user_file, postgres.user_dimension) }
93
+ context 'with :map' do
94
+ before do
95
+ catalog.schema :postgres do
96
+ map from: postgres.user_file, to: postgres.user_dimension do |row|
97
+ {
98
+ user_id: row[:id],
99
+ tenant_id: row[:tenant_id],
100
+ source_kind: 'users',
101
+ start_at: row[:updated_at],
102
+ delta: 1
103
+ }
104
+ end
105
+ end
106
+
107
+ expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
108
+ mock_command(/\Apsql/, mock_success)
109
+ end
97
110
 
98
- it { is_expected.to be_success }
111
+ it { is_expected.to be_success }
112
+ end
99
113
  end
100
114
 
101
115
  describe '.relabel_dimension' do
@@ -121,13 +135,31 @@ describe Masamune::Actions::Transform do
121
135
  describe '.load_fact' do
122
136
  let(:date) { DateTime.civil(2014, 8) }
123
137
 
124
- before do
125
- mock_command(/\Apsql/, mock_success)
138
+ context 'without :map' do
139
+ before do
140
+ expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
141
+ mock_command(/\Apsql/, mock_success)
142
+ end
143
+
144
+ subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
145
+
146
+ it { is_expected.to be_success }
126
147
  end
127
148
 
128
- subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
149
+ context 'with :map' do
150
+ before do
151
+ catalog.schema :postgres do
152
+ map from: postgres.visits_hourly_file, to: postgres.visits_hourly_fact, distinct: true
153
+ end
129
154
 
130
- it { is_expected.to be_success }
155
+ expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
156
+ mock_command(/\Apsql/, mock_success)
157
+ end
158
+
159
+ subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
160
+
161
+ it { is_expected.to be_success }
162
+ end
131
163
  end
132
164
 
133
165
  describe '.rollup_fact' do
@@ -332,30 +332,6 @@ describe Masamune::Schema::Catalog do
332
332
  it { expect(visits_monthly.measures).to include :count }
333
333
  end
334
334
 
335
- context 'when schema contains events' do
336
- before do
337
- instance.schema :hive do
338
- event 'event_one' do
339
- attribute 'attribute_one'
340
- attribute 'attribute_two'
341
- end
342
-
343
- event 'event_two' do
344
- attribute 'attribute_three'
345
- attribute 'attribute_four'
346
- end
347
- end
348
- end
349
-
350
- let(:event_one) { hive.event_one_event }
351
- let(:event_two) { hive.event_two_event }
352
-
353
- it { expect(event_one.attributes).to include :attribute_one }
354
- it { expect(event_one.attributes).to include :attribute_two }
355
- it { expect(event_two.attributes).to include :attribute_three }
356
- it { expect(event_two.attributes).to include :attribute_four }
357
- end
358
-
359
335
  context 'when schema contains file' do
360
336
  before do
361
337
  instance.schema :postgres do
@@ -456,35 +432,6 @@ describe Masamune::Schema::Catalog do
456
432
  end
457
433
  end
458
434
 
459
- context 'when schema contains map from: event' do
460
- before do
461
- instance.schema :postgres do
462
- dimension 'user', type: :mini do
463
- column 'user_id', type: :integer, natural_key: true
464
- column 'name', type: :string
465
- end
466
-
467
- event 'users' do
468
- attribute 'id', type: :integer, immutable: true
469
- attribute 'name', type: :string
470
- end
471
-
472
- map from: postgres.users_event, to: postgres.user_dimension do |row|
473
- {
474
- 'user_id' => row[:id],
475
- 'name' => row[:name_now]
476
- }
477
- end
478
- end
479
- end
480
-
481
- subject(:map) { postgres.users_event.map(to: postgres.user_dimension) }
482
-
483
- it 'constructs map' do
484
- expect(map.function).to_not be_nil
485
- end
486
- end
487
-
488
435
  context 'when schema contains map missing the from: field' do
489
436
  subject(:schema) do
490
437
  instance.schema :postgres do
@@ -433,31 +433,6 @@ describe Masamune::Schema::Column do
433
433
  it { is_expected.to eq({"k" => "v"}) }
434
434
  end
435
435
  end
436
-
437
- context 'with type :yaml and sub_type :boolean' do
438
- let(:column) { described_class.new(id: 'yaml', type: :yaml, sub_type: :boolean) }
439
- let(:value) do
440
- {
441
- 'true' => true,
442
- 'one' => '1',
443
- 'zero' => '0',
444
- 'false' => false,
445
- 'string' => 'string',
446
- 'one_integer' => 1,
447
- 'zero_integer' => 0
448
- }.to_yaml
449
- end
450
-
451
- it 'should cast yaml to ruby' do
452
- expect(result['true']).to eq(true)
453
- expect(result['false']).to eq(false)
454
- expect(result['one']).to eq(true)
455
- expect(result['zero']).to eq(false)
456
- expect(result['one_integer']).to eq(true)
457
- expect(result['zero_integer']).to eq(false)
458
- expect(result.key?('string')).to eq(false)
459
- end
460
- end
461
436
  end
462
437
 
463
438
  describe '#default_ruby_value' do
@@ -665,8 +640,13 @@ describe Masamune::Schema::Column do
665
640
  it { is_expected.to eq(false) }
666
641
  end
667
642
 
668
- context 'when surrogate_key' do
669
- let(:column) { described_class.new id: 'name', type: :string, surrogate_key: true }
643
+ context 'when column has default of false' do
644
+ let(:column) { described_class.new id: 'flag', type: :boolean, default: false }
645
+ it { is_expected.to eq(false) }
646
+ end
647
+
648
+ context 'when column has reference' do
649
+ let(:column) { described_class.new id: 'name', type: :string }
670
650
  it { is_expected.to eq(true) }
671
651
 
672
652
  context 'when reference allow null' do
@@ -682,22 +662,10 @@ describe Masamune::Schema::Column do
682
662
  end
683
663
  it { is_expected.to eq(false) }
684
664
  end
685
- end
686
-
687
- context 'when natural_key' do
688
- let(:column) { described_class.new id: 'name', type: :string, natural_key: true }
689
- it { is_expected.to eq(true) }
690
665
 
691
- context 'when reference allow null' do
666
+ context 'when reference has default of false' do
692
667
  before do
693
- allow(column).to receive(:reference).and_return(double(null: true, default: nil))
694
- end
695
- it { is_expected.to eq(false) }
696
- end
697
-
698
- context 'when reference has default' do
699
- before do
700
- allow(column).to receive(:reference).and_return(double(null: false, default: 'Unknown'))
668
+ allow(column).to receive(:reference).and_return(double(null: false, default: false))
701
669
  end
702
670
  it { is_expected.to eq(false) }
703
671
  end
@@ -47,7 +47,9 @@ describe Masamune::Schema::Fact do
47
47
  Masamune::Schema::TableReference.new(user_dimension)
48
48
  ],
49
49
  columns: [
50
- Masamune::Schema::Column.new(id: 'total', type: :integer)
50
+ Masamune::Schema::Column.new(id: 'total', type: :integer),
51
+ Masamune::Schema::Column.new(id: 'y', type: :integer, partition: true),
52
+ Masamune::Schema::Column.new(id: 'm', type: :integer, partition: true)
51
53
  ]
52
54
  end
53
55
 
@@ -52,32 +52,6 @@ describe Masamune::Schema::Map do
52
52
  column 'deleted_at', type: :timestamp, null: true
53
53
  end
54
54
  end
55
-
56
- catalog.schema :hive do
57
- event 'user' do
58
- attribute 'id', type: :integer, immutable: true
59
- attribute 'tenant_id', type: :integer, immutable: true
60
- attribute 'admin', type: :boolean
61
- attribute 'preferences', type: :json
62
- end
63
-
64
- dimension 'tenant', type: :two, implicit: true do
65
- column 'tenant_id'
66
- end
67
-
68
- fact 'user' do
69
- references :tenant
70
- measure 'delta'
71
- end
72
-
73
- file 'user' do
74
- column 'id', type: :integer
75
- column 'tenant_id', type: :integer
76
- column 'admin', type: :boolean
77
- column 'preferences', type: :json
78
- column 'deleted_at', type: :timestamp, null: true
79
- end
80
- end
81
55
  end
82
56
 
83
57
  context 'without source' do
@@ -136,22 +110,26 @@ describe Masamune::Schema::Map do
136
110
  end
137
111
 
138
112
  context 'with undefined function' do
139
- let(:source) { catalog.hive.user_event }
140
- let(:target) { catalog.hive.user_fact }
141
- let(:source_data) { '' }
142
- let(:target_data) { '' }
143
-
144
113
  before do
145
- catalog.schema :hive do
146
- map from: hive.user_event, to: hive.user_fact do |row|
114
+ catalog.schema :files do
115
+ file 'input'
116
+ file 'output'
117
+
118
+ map from: files.input , to: files.output do |row|
119
+ # Empty
147
120
  end
148
121
  end
149
122
  end
150
123
 
124
+ let(:source) { catalog.files.input }
125
+ let(:target) { catalog.files.output }
126
+ let(:source_data) { '' }
127
+ let(:target_data) { '' }
128
+
151
129
  it { expect { subject }.to raise_error ArgumentError, /function for map between .* does not return output for default input/ }
152
130
  end
153
131
 
154
- context 'from csv file to dimension' do
132
+ context 'from csv file to postgres dimension' do
155
133
  before do
156
134
  catalog.schema :files do
157
135
  map from: postgres.user_file, to: postgres.user_dimension, distinct: true do |row|
@@ -161,7 +139,7 @@ describe Masamune::Schema::Map do
161
139
  'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
162
140
  'hr_user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
163
141
  'admin' => row[:admin],
164
- 'preferences_now' => row[:preferences],
142
+ 'preferences' => row[:preferences],
165
143
  'source' => 'users_file',
166
144
  'cluster_id' => 100
167
145
  }
@@ -193,7 +171,7 @@ describe Masamune::Schema::Map do
193
171
 
194
172
  let(:target_data) do
195
173
  <<-EOS.strip_heredoc
196
- tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences_now,source,cluster_id
174
+ tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences,source,cluster_id
197
175
  30,1,active,active,FALSE,{},users_file,100
198
176
  40,2,deleted,deleted,TRUE,"{""enabled"":true}",users_file,100
199
177
  EOS
@@ -210,19 +188,26 @@ describe Masamune::Schema::Map do
210
188
  it_behaves_like 'apply input/output'
211
189
  end
212
190
 
213
- context 'from event to postgres dimension with quoted json' do
191
+ context 'from tsv file to postgres dimension' do
214
192
  before do
215
193
  catalog.schema :files do
216
- map from: hive.user_event, to: postgres.user_dimension do |row|
194
+ file 'input', format: :tsv, headers: false do
195
+ column 'id', type: :integer
196
+ column 'tenant_id', type: :integer
197
+ column 'admin', type: :boolean
198
+ column 'preferences', type: :json
199
+ column 'deleted_at', type: :timestamp, null: true
200
+ end
201
+
202
+ map from: files.input, to: postgres.user_dimension do |row|
217
203
  raise if row[:tenant_id] == 42
218
204
  {
219
205
  'tenant_id' => row[:tenant_id],
220
206
  'user_id' => row[:id],
221
- 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
222
- 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
223
- 'preferences_now' => row[:preferences_now],
224
- 'preferences_was' => row[:preferences_was],
225
- 'source' => 'user_event',
207
+ 'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
208
+ 'admin' => row[:admin],
209
+ 'preferences' => row[:preferences],
210
+ 'source' => 'user_file',
226
211
  'cluster_id' => 100
227
212
  }
228
213
  end
@@ -230,7 +215,7 @@ describe Masamune::Schema::Map do
230
215
  end
231
216
 
232
217
  let(:source) do
233
- catalog.hive.user_event
218
+ catalog.files.input
234
219
  end
235
220
 
236
221
  let(:target) do
@@ -242,72 +227,113 @@ describe Masamune::Schema::Map do
242
227
  expect(environment.logger).to receive(:warn).with(/failed to parse '{.*}' for #{source.name}/).ordered
243
228
  end
244
229
 
245
- let(:source_data) do
246
- <<-EOS.strip_heredoc
247
- X user_create 1 30 0 \\N \\N \\N
248
- # NOTE intentional duplicate record
249
- X user_create 1 30 0 \\N \\N \\N
250
- A user_create 1 42 0 \\N \\N \\N
251
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N
252
- # NOTE record is intentionally invalid
253
- Z user_create 3 50 0 1 INVALID_JSON \\N
254
- EOS
255
- end
256
-
257
230
  let(:target_data) do
258
231
  <<-EOS.strip_heredoc
259
- tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
260
- 30,1,active,FALSE,{},{},user_event,100
261
- 30,1,active,FALSE,{},{},user_event,100
262
- 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
232
+ tenant_id,user_id,user_account_state_type_name,admin,preferences,source,cluster_id
233
+ 30,1,active,FALSE,{},user_file,100
234
+ 30,1,active,FALSE,{},user_file,100
235
+ 40,2,deleted,TRUE,"{""enabled"":true}",user_file,100
263
236
  EOS
264
237
  end
265
238
 
266
- it 'should match target data' do
267
- is_expected.to eq(target_data)
239
+ context 'with quoted json' do
240
+ let(:source_data) do
241
+ <<-EOS.strip_heredoc
242
+ 1 30 0
243
+ # NOTE intentional duplicate record
244
+ 1 30 0
245
+ 1 42 0
246
+ 2 40 1 "{""enabled"":true}" 2015-07-19 00:00:00
247
+ # NOTE record is intentionally invalid
248
+ 3 50 0 INVALID_JSON
249
+ EOS
250
+ end
251
+
252
+ it 'should match target data' do
253
+ is_expected.to eq(target_data)
254
+ end
255
+
256
+ it_behaves_like 'apply input/output'
268
257
  end
269
258
 
270
- it_behaves_like 'apply input/output'
259
+ context 'with raw json' do
260
+ let(:source_data) do
261
+ <<-EOS.strip_heredoc
262
+ 1 30 0
263
+ # NOTE intentional duplicate record
264
+ 1 30 0
265
+ 1 42 0
266
+ 2 40 1 {"enabled":true} 2015-07-19 00:00:00
267
+ # NOTE record is intentionally invalid
268
+ 3 50 0 INVALID_JSON
269
+ EOS
270
+ end
271
+
272
+ it 'should match target data' do
273
+ is_expected.to eq(target_data)
274
+ end
275
+
276
+ it_behaves_like 'apply input/output'
277
+ end
271
278
  end
272
279
 
273
- context 'from event to tsv file' do
280
+ context 'from tsv file to csv file' do
274
281
  before do
275
282
  catalog.schema :files do
276
- map from: hive.user_event, to: hive.user_file do |row|
283
+ file 'input', format: :tsv, headers: false do
284
+ column 'id', type: :integer
285
+ column 'tenant_id', type: :integer
286
+ column 'admin', type: :boolean
287
+ column 'preferences', type: :json
288
+ column 'deleted_at', type: :timestamp, null: true
289
+ end
290
+
291
+ file 'output', format: :csv, headers: true do
292
+ column 'id', type: :integer
293
+ column 'tenant_id', type: :integer
294
+ column 'admin', type: :boolean
295
+ column 'preferences', type: :yaml
296
+ column 'deleted_at', type: :timestamp, null: true
297
+ end
298
+
299
+ map from: files.input, to: files.output do |row|
277
300
  {
278
301
  'id' => row[:id],
279
302
  'tenant_id' => row[:tenant_id],
280
- 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
281
- 'admin' => row[:admin_now],
282
- 'preferences' => row[:preferences_now]
303
+ 'deleted_at' => row[:deleted_at],
304
+ 'admin' => row[:admin],
305
+ 'preferences' => row[:preferences]
283
306
  }
284
307
  end
285
308
  end
286
309
  end
287
310
 
288
311
  let(:source) do
289
- catalog.hive.user_event
312
+ catalog.files.input
290
313
  end
291
314
 
292
315
  let(:target) do
293
- catalog.hive.user_file
316
+ catalog.files.output
294
317
  end
295
318
 
296
319
  let(:source_data) do
297
320
  <<-EOS.strip_heredoc
298
- X user_create 1 30 0 \\N \\N \\N 0 \\N
299
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
321
+ 1 30 0
322
+ 2 40 0 "{""enabled"":true}" 2014-02-26T18:15:51.000Z
300
323
  EOS
301
324
  end
302
325
 
303
326
  let(:target_data) do
304
327
  <<-EOS.strip_heredoc
305
- 1 30 {}
306
- 2 40 2014-02-26T18:15:51.000Z "{""enabled"":true}"
328
+ id,tenant_id,deleted_at,admin,preferences
329
+ 1,30,,FALSE,"--- {}
330
+ "
331
+ 2,40,2014-02-26T18:15:51.000Z,FALSE,"---
332
+ enabled: true
333
+ "
307
334
  EOS
308
335
  end
309
336
 
310
-
311
337
  it 'should match target data' do
312
338
  is_expected.to eq(target_data)
313
339
  end
@@ -315,37 +341,46 @@ describe Masamune::Schema::Map do
315
341
  it_behaves_like 'apply input/output'
316
342
  end
317
343
 
318
- context 'from event to csv file' do
344
+ context 'from csv file to tsv file' do
319
345
  before do
320
346
  catalog.schema :files do
321
- map from: hive.user_event, to: postgres.user_file do |row|
347
+ file 'input', format: :csv, headers: true, json_encoding: :quoted do
348
+ column 'id', type: :integer
349
+ column 'tenant_id', type: :integer
350
+ column 'admin', type: :boolean
351
+ column 'preferences', type: :yaml
352
+ column 'deleted_at', type: :timestamp, null: true
353
+ end
354
+
355
+ file 'output', format: :tsv, headers: false do
356
+ column 'id', type: :integer
357
+ column 'tenant_id', type: :integer
358
+ column 'admin', type: :boolean
359
+ column 'preferences', type: :json
360
+ column 'deleted_at', type: :timestamp, null: true
361
+ end
362
+
363
+ map from: files.input, to: files.output do |row|
322
364
  {
323
365
  'id' => row[:id],
324
366
  'tenant_id' => row[:tenant_id],
325
- 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
326
- 'admin' => row[:admin_now],
327
- 'preferences' => row[:preferences_now]
367
+ 'deleted_at' => row[:deleted_at],
368
+ 'admin' => row[:admin],
369
+ 'preferences' => row[:preferences]
328
370
  }
329
371
  end
330
372
  end
331
373
  end
332
374
 
333
375
  let(:source) do
334
- catalog.hive.user_event
376
+ catalog.files.input
335
377
  end
336
378
 
337
379
  let(:target) do
338
- catalog.postgres.user_file
380
+ catalog.files.output
339
381
  end
340
382
 
341
383
  let(:source_data) do
342
- <<-EOS.strip_heredoc
343
- X user_create 1 30 0 \\N \\N \\N 0 \\N
344
- Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
345
- EOS
346
- end
347
-
348
- let(:target_data) do
349
384
  <<-EOS.strip_heredoc
350
385
  id,tenant_id,deleted_at,admin,preferences
351
386
  1,30,,FALSE,"--- {}
@@ -356,6 +391,13 @@ describe Masamune::Schema::Map do
356
391
  EOS
357
392
  end
358
393
 
394
+ let(:target_data) do
395
+ <<-EOS.strip_heredoc
396
+ 1 30 FALSE {}
397
+ 2 40 2014-02-26T18:15:51.000Z FALSE "{""enabled"":true}"
398
+ EOS
399
+ end
400
+
359
401
  it 'should match target data' do
360
402
  is_expected.to eq(target_data)
361
403
  end
@@ -363,56 +405,44 @@ describe Masamune::Schema::Map do
363
405
  it_behaves_like 'apply input/output'
364
406
  end
365
407
 
366
- context 'from event to fact' do
408
+ context 'with multiple outputs' do
367
409
  before do
368
410
  catalog.schema :files do
369
- map from: hive.user_event, to: hive.user_fact do |row|
370
- if row[:type] =~ /update/
371
- [
372
- {
373
- 'tenant.tenant_id' => row[:tenant_id],
374
- 'delta' => 0,
375
- 'time_key' => row[:created_at]
376
- },
377
- {
378
- 'tenant.tenant_id' => row[:tenant_id],
379
- 'delta' => 0,
380
- 'time_key' => row[:created_at]
381
- }
382
- ]
383
- else
384
- {
385
- 'tenant.tenant_id' => row[:tenant_id],
386
- 'delta' => row[:type] =~ /create/ ? 1 : -1,
387
- 'time_key' => row[:created_at]
388
- }
389
- end
411
+ file 'input' do
412
+ column 'id', type: :integer
413
+ end
414
+
415
+ file 'output' do
416
+ column 'id', type: :integer
417
+ end
418
+
419
+ map from: files.input, to: files.output do |row|
420
+ [row, row]
390
421
  end
391
422
  end
392
423
  end
393
424
 
394
425
  let(:source) do
395
- catalog.hive.user_event
426
+ catalog.files.input
396
427
  end
397
428
 
398
429
  let(:target) do
399
- catalog.hive.user_fact
430
+ catalog.files.output
400
431
  end
401
432
 
402
433
  let(:source_data) do
403
434
  <<-EOS.strip_heredoc
404
- X user_create 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:10:00Z
405
- Y user_update 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:20:00Z
406
- Z user_delete 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:30:00Z
435
+ 1
436
+ 2
407
437
  EOS
408
438
  end
409
439
 
410
440
  let(:target_data) do
411
441
  <<-EOS.strip_heredoc
412
- 10 1 1420071000
413
- 10 0 1420071600
414
- 10 0 1420071600
415
- 10 -1 1420072200
442
+ 1
443
+ 1
444
+ 2
445
+ 2
416
446
  EOS
417
447
  end
418
448
 
@@ -423,75 +453,40 @@ describe Masamune::Schema::Map do
423
453
  it_behaves_like 'apply input/output'
424
454
  end
425
455
 
426
- context 'from event with array attribute to fact' do
456
+ context 'without block' do
427
457
  before do
428
- catalog.clear!
429
- catalog.schema :hive do
430
- event 'user' do
431
- attribute 'id', type: :integer, immutable: true
432
- attribute 'group_id', type: :integer, array: true
433
- end
434
-
435
- dimension 'group', type: :two, implicit: true do
436
- column 'group_id'
458
+ catalog.schema :files do
459
+ file 'input' do
460
+ column 'id', type: :integer
437
461
  end
438
462
 
439
- fact 'user' do
440
- references :group
441
- column 'junk'
442
- measure 'total'
463
+ file 'output' do
464
+ column 'id', type: :integer
443
465
  end
444
466
 
445
- map from: hive.user_event, to: hive.user_fact, columns: %w(group.group_id total time_key) do |row|
446
- result = []
447
- (row[:group_id_now] - row[:group_id_was]).each do |group_id|
448
- result <<
449
- {
450
- 'group.group_id' => group_id,
451
- 'total' => 1,
452
- 'time_key' => row[:created_at]
453
- }
454
- end
455
- (row[:group_id_was] - row[:group_id_now]).each do |group_id|
456
- result <<
457
- {
458
- 'group.group_id' => group_id,
459
- 'total' => -1,
460
- 'time_key' => row[:created_at]
461
- }
462
- end
463
- result
464
- end
467
+ map from: files.input, to: files.output
465
468
  end
466
469
  end
467
470
 
468
471
  let(:source) do
469
- catalog.hive.user_event
472
+ catalog.files.input
470
473
  end
471
474
 
472
475
  let(:target) do
473
- catalog.hive.user_fact
476
+ catalog.files.output
474
477
  end
475
478
 
476
479
  let(:source_data) do
477
480
  <<-EOS.strip_heredoc
478
- # new lines and comments should be skipped
479
-
480
- X user_create 3 [1,2] [] 0 2015-01-01T00:10:00Z
481
- Y user_update 3 [1,2,3] [1,2] 1 2015-01-01T00:20:00Z
482
- Y user_update 3 [1,2] [1,2,3] 1 2015-01-01T00:30:00Z
483
- Z user_delete 3 [] [1,2] 0 2015-01-01T00:40:00Z
481
+ 1
482
+ 2
484
483
  EOS
485
484
  end
486
485
 
487
486
  let(:target_data) do
488
487
  <<-EOS.strip_heredoc
489
- 1 1 1420071000
490
- 2 1 1420071000
491
- 3 1 1420071600
492
- 3 -1 1420072200
493
- 1 -1 1420072800
494
- 2 -1 1420072800
488
+ 1
489
+ 2
495
490
  EOS
496
491
  end
497
492
 
@@ -502,44 +497,47 @@ describe Masamune::Schema::Map do
502
497
  it_behaves_like 'apply input/output'
503
498
  end
504
499
 
505
- context 'from event to postgres dimension with raw json' do
500
+ context 'from file to table' do
506
501
  before do
507
- catalog.schema :files do
508
- map from: hive.user_event, to: postgres.user_dimension do |row|
509
- {
510
- 'tenant_id' => row[:tenant_id],
511
- 'user_id' => row[:id],
512
- 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
513
- 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
514
- 'preferences_now' => row[:preferences_now],
515
- 'preferences_was' => row[:preferences_was],
516
- 'source' => 'user_event',
517
- 'cluster_id' => 100
518
- }
502
+ catalog.schema :postgres do
503
+ table 'parent' do
504
+ column 'id', type: :integer
519
505
  end
506
+
507
+ file 'input', format: :csv, headers: false do
508
+ column 'parent.id', type: :integer
509
+ column 'id', type: :integer
510
+ end
511
+
512
+ table 'output' do
513
+ references :parent
514
+ column 'id', type: :integer
515
+ end
516
+
517
+ map from: postgres.input_file, to: postgres.output_table
520
518
  end
521
519
  end
522
520
 
523
521
  let(:source) do
524
- catalog.hive.user_event
522
+ catalog.postgres.input_file
525
523
  end
526
524
 
527
525
  let(:target) do
528
- catalog.postgres.user_dimension
526
+ catalog.postgres.output_table
529
527
  end
530
528
 
531
529
  let(:source_data) do
532
530
  <<-EOS.strip_heredoc
533
- X user_create 1 30 0 \\N \\N \\N
534
- Y user_delete 2 40 0 1 {"enabled":true} \\N
531
+ 10,1
532
+ 10,2
535
533
  EOS
536
534
  end
537
535
 
538
536
  let(:target_data) do
539
537
  <<-EOS.strip_heredoc
540
- tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
541
- 30,1,active,FALSE,{},{},user_event,100
542
- 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
538
+ parent_table_id,id
539
+ 10,1
540
+ 10,2
543
541
  EOS
544
542
  end
545
543
 
@@ -566,7 +564,7 @@ describe Masamune::Schema::Map do
566
564
  it { is_expected.to eq(%Q{"{}","{}"}) }
567
565
  end
568
566
 
569
- context 'with raw quoted json' do
567
+ context 'with quoted empty json' do
570
568
  before do
571
569
  io.write '"{}","{}"'
572
570
  io.rewind