masamune 0.11.9 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/masamune/actions/transform.rb +31 -16
- data/lib/masamune/schema.rb +0 -1
- data/lib/masamune/schema/catalog.rb +2 -10
- data/lib/masamune/schema/column.rb +16 -30
- data/lib/masamune/schema/dimension.rb +2 -9
- data/lib/masamune/schema/fact.rb +0 -4
- data/lib/masamune/schema/map.rb +1 -1
- data/lib/masamune/schema/row.rb +3 -3
- data/lib/masamune/schema/store.rb +1 -3
- data/lib/masamune/schema/table.rb +28 -2
- data/lib/masamune/transform.rb +0 -1
- data/lib/masamune/transform/define_schema.rb +0 -6
- data/lib/masamune/transform/define_table.hql.erb +7 -6
- data/lib/masamune/transform/define_table.rb +1 -0
- data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
- data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
- data/lib/masamune/transform/denormalize_table.rb +13 -4
- data/lib/masamune/transform/snapshot_dimension.rb +1 -1
- data/lib/masamune/transform/stage_fact.rb +1 -1
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/actions/transform_spec.rb +50 -18
- data/spec/masamune/schema/catalog_spec.rb +0 -53
- data/spec/masamune/schema/column_spec.rb +9 -41
- data/spec/masamune/schema/fact_spec.rb +3 -1
- data/spec/masamune/schema/map_spec.rb +187 -189
- data/spec/masamune/schema/table_spec.rb +8 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
- data/spec/masamune/transform/define_schema_spec.rb +5 -6
- data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
- data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
- data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
- data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
- metadata +3 -9
- data/lib/masamune/schema/event.rb +0 -121
- data/lib/masamune/transform/define_event_view.rb +0 -60
- data/spec/masamune/schema/event_spec.rb +0 -75
- data/spec/masamune/transform/define_event_view_spec.rb +0 -84
@@ -24,20 +24,29 @@ module Masamune::Transform
|
|
24
24
|
module DenormalizeTable
|
25
25
|
extend ActiveSupport::Concern
|
26
26
|
|
27
|
-
def denormalize_table(target,
|
28
|
-
|
27
|
+
def denormalize_table(target, options = {})
|
28
|
+
options.symbolize_keys!
|
29
|
+
columns = options[:include] || []
|
30
|
+
columns += options[:columns] || target.denormalized_column_names
|
31
|
+
columns -= options[:except] || []
|
32
|
+
order_by = options[:order] || columns
|
33
|
+
Operator.new(__method__, target: target, columns: columns, order_by: order_by, presenters: { postgres: Common, hive: Common })
|
29
34
|
end
|
30
35
|
|
31
36
|
private
|
32
37
|
|
33
|
-
class
|
38
|
+
class Common < SimpleDelegator
|
34
39
|
include Masamune::LastElement
|
35
40
|
|
36
41
|
def select_columns(column_names)
|
37
42
|
column_names.map do |column_name|
|
38
43
|
next unless column = dereference_column_name(column_name)
|
39
44
|
if column.reference
|
40
|
-
|
45
|
+
if column.reference.implicit || column.reference.degenerate
|
46
|
+
"#{column.name} AS #{column.name}"
|
47
|
+
else
|
48
|
+
"#{column.foreign_key_name} AS #{column.name}"
|
49
|
+
end
|
41
50
|
else
|
42
51
|
column.qualified_name
|
43
52
|
end
|
@@ -56,7 +56,7 @@ module Masamune::Transform
|
|
56
56
|
if column.natural_key
|
57
57
|
"#{column.name} AS #{column.name}"
|
58
58
|
elsif column.type == :key_value
|
59
|
-
"hstore_merge(#{column.name}
|
59
|
+
"hstore_merge(#{column.name}) OVER #{window} AS #{column.name}"
|
60
60
|
else
|
61
61
|
"coalesce_merge(#{column.name}) OVER #{window} AS #{column.name}"
|
62
62
|
end
|
@@ -77,7 +77,7 @@ module Masamune::Transform
|
|
77
77
|
|
78
78
|
if column.reference && !column.reference.default.nil?
|
79
79
|
coalesce_values << column.reference.default(column.adjacent) if column.adjacent.natural_key
|
80
|
-
elsif !column.adjacent.default.nil?
|
80
|
+
elsif column.adjacent && !column.adjacent.default.nil?
|
81
81
|
coalesce_values << column.adjacent.sql_value(column.adjacent.default)
|
82
82
|
end
|
83
83
|
|
data/lib/masamune/version.rb
CHANGED
@@ -46,16 +46,6 @@ describe Masamune::Actions::Transform do
|
|
46
46
|
column 'updated_at', type: :timestamp
|
47
47
|
end
|
48
48
|
|
49
|
-
map from: postgres.user_file, to: postgres.user_dimension do |row|
|
50
|
-
{
|
51
|
-
user_id: row[:id],
|
52
|
-
tenant_id: row[:tenant_id],
|
53
|
-
source_kind: 'users',
|
54
|
-
start_at: row[:updated_at],
|
55
|
-
delta: 1
|
56
|
-
}
|
57
|
-
end
|
58
|
-
|
59
49
|
fact 'visits', partition: 'y%Ym%m', grain: %w(hourly daily monthly) do
|
60
50
|
references :date
|
61
51
|
references :user
|
@@ -89,13 +79,37 @@ describe Masamune::Actions::Transform do
|
|
89
79
|
end
|
90
80
|
|
91
81
|
describe '.load_dimension' do
|
92
|
-
|
93
|
-
|
82
|
+
subject { instance.load_dimension(source_file, postgres.user_file, postgres.user_dimension) }
|
83
|
+
|
84
|
+
context 'without :map' do
|
85
|
+
before do
|
86
|
+
expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
|
87
|
+
mock_command(/\Apsql/, mock_success)
|
88
|
+
end
|
89
|
+
|
90
|
+
it { is_expected.to be_success }
|
94
91
|
end
|
95
92
|
|
96
|
-
|
93
|
+
context 'with :map' do
|
94
|
+
before do
|
95
|
+
catalog.schema :postgres do
|
96
|
+
map from: postgres.user_file, to: postgres.user_dimension do |row|
|
97
|
+
{
|
98
|
+
user_id: row[:id],
|
99
|
+
tenant_id: row[:tenant_id],
|
100
|
+
source_kind: 'users',
|
101
|
+
start_at: row[:updated_at],
|
102
|
+
delta: 1
|
103
|
+
}
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
|
108
|
+
mock_command(/\Apsql/, mock_success)
|
109
|
+
end
|
97
110
|
|
98
|
-
|
111
|
+
it { is_expected.to be_success }
|
112
|
+
end
|
99
113
|
end
|
100
114
|
|
101
115
|
describe '.relabel_dimension' do
|
@@ -121,13 +135,31 @@ describe Masamune::Actions::Transform do
|
|
121
135
|
describe '.load_fact' do
|
122
136
|
let(:date) { DateTime.civil(2014, 8) }
|
123
137
|
|
124
|
-
|
125
|
-
|
138
|
+
context 'without :map' do
|
139
|
+
before do
|
140
|
+
expect_any_instance_of(Masamune::Schema::Map).to_not receive(:apply)
|
141
|
+
mock_command(/\Apsql/, mock_success)
|
142
|
+
end
|
143
|
+
|
144
|
+
subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
|
145
|
+
|
146
|
+
it { is_expected.to be_success }
|
126
147
|
end
|
127
148
|
|
128
|
-
|
149
|
+
context 'with :map' do
|
150
|
+
before do
|
151
|
+
catalog.schema :postgres do
|
152
|
+
map from: postgres.visits_hourly_file, to: postgres.visits_hourly_fact, distinct: true
|
153
|
+
end
|
129
154
|
|
130
|
-
|
155
|
+
expect_any_instance_of(Masamune::Schema::Map).to receive(:apply).and_call_original
|
156
|
+
mock_command(/\Apsql/, mock_success)
|
157
|
+
end
|
158
|
+
|
159
|
+
subject { instance.load_fact(source_file, postgres.visits_hourly_file, postgres.visits_hourly_fact, date) }
|
160
|
+
|
161
|
+
it { is_expected.to be_success }
|
162
|
+
end
|
131
163
|
end
|
132
164
|
|
133
165
|
describe '.rollup_fact' do
|
@@ -332,30 +332,6 @@ describe Masamune::Schema::Catalog do
|
|
332
332
|
it { expect(visits_monthly.measures).to include :count }
|
333
333
|
end
|
334
334
|
|
335
|
-
context 'when schema contains events' do
|
336
|
-
before do
|
337
|
-
instance.schema :hive do
|
338
|
-
event 'event_one' do
|
339
|
-
attribute 'attribute_one'
|
340
|
-
attribute 'attribute_two'
|
341
|
-
end
|
342
|
-
|
343
|
-
event 'event_two' do
|
344
|
-
attribute 'attribute_three'
|
345
|
-
attribute 'attribute_four'
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
let(:event_one) { hive.event_one_event }
|
351
|
-
let(:event_two) { hive.event_two_event }
|
352
|
-
|
353
|
-
it { expect(event_one.attributes).to include :attribute_one }
|
354
|
-
it { expect(event_one.attributes).to include :attribute_two }
|
355
|
-
it { expect(event_two.attributes).to include :attribute_three }
|
356
|
-
it { expect(event_two.attributes).to include :attribute_four }
|
357
|
-
end
|
358
|
-
|
359
335
|
context 'when schema contains file' do
|
360
336
|
before do
|
361
337
|
instance.schema :postgres do
|
@@ -456,35 +432,6 @@ describe Masamune::Schema::Catalog do
|
|
456
432
|
end
|
457
433
|
end
|
458
434
|
|
459
|
-
context 'when schema contains map from: event' do
|
460
|
-
before do
|
461
|
-
instance.schema :postgres do
|
462
|
-
dimension 'user', type: :mini do
|
463
|
-
column 'user_id', type: :integer, natural_key: true
|
464
|
-
column 'name', type: :string
|
465
|
-
end
|
466
|
-
|
467
|
-
event 'users' do
|
468
|
-
attribute 'id', type: :integer, immutable: true
|
469
|
-
attribute 'name', type: :string
|
470
|
-
end
|
471
|
-
|
472
|
-
map from: postgres.users_event, to: postgres.user_dimension do |row|
|
473
|
-
{
|
474
|
-
'user_id' => row[:id],
|
475
|
-
'name' => row[:name_now]
|
476
|
-
}
|
477
|
-
end
|
478
|
-
end
|
479
|
-
end
|
480
|
-
|
481
|
-
subject(:map) { postgres.users_event.map(to: postgres.user_dimension) }
|
482
|
-
|
483
|
-
it 'constructs map' do
|
484
|
-
expect(map.function).to_not be_nil
|
485
|
-
end
|
486
|
-
end
|
487
|
-
|
488
435
|
context 'when schema contains map missing the from: field' do
|
489
436
|
subject(:schema) do
|
490
437
|
instance.schema :postgres do
|
@@ -433,31 +433,6 @@ describe Masamune::Schema::Column do
|
|
433
433
|
it { is_expected.to eq({"k" => "v"}) }
|
434
434
|
end
|
435
435
|
end
|
436
|
-
|
437
|
-
context 'with type :yaml and sub_type :boolean' do
|
438
|
-
let(:column) { described_class.new(id: 'yaml', type: :yaml, sub_type: :boolean) }
|
439
|
-
let(:value) do
|
440
|
-
{
|
441
|
-
'true' => true,
|
442
|
-
'one' => '1',
|
443
|
-
'zero' => '0',
|
444
|
-
'false' => false,
|
445
|
-
'string' => 'string',
|
446
|
-
'one_integer' => 1,
|
447
|
-
'zero_integer' => 0
|
448
|
-
}.to_yaml
|
449
|
-
end
|
450
|
-
|
451
|
-
it 'should cast yaml to ruby' do
|
452
|
-
expect(result['true']).to eq(true)
|
453
|
-
expect(result['false']).to eq(false)
|
454
|
-
expect(result['one']).to eq(true)
|
455
|
-
expect(result['zero']).to eq(false)
|
456
|
-
expect(result['one_integer']).to eq(true)
|
457
|
-
expect(result['zero_integer']).to eq(false)
|
458
|
-
expect(result.key?('string')).to eq(false)
|
459
|
-
end
|
460
|
-
end
|
461
436
|
end
|
462
437
|
|
463
438
|
describe '#default_ruby_value' do
|
@@ -665,8 +640,13 @@ describe Masamune::Schema::Column do
|
|
665
640
|
it { is_expected.to eq(false) }
|
666
641
|
end
|
667
642
|
|
668
|
-
context 'when
|
669
|
-
let(:column) { described_class.new id: '
|
643
|
+
context 'when column has default of false' do
|
644
|
+
let(:column) { described_class.new id: 'flag', type: :boolean, default: false }
|
645
|
+
it { is_expected.to eq(false) }
|
646
|
+
end
|
647
|
+
|
648
|
+
context 'when column has reference' do
|
649
|
+
let(:column) { described_class.new id: 'name', type: :string }
|
670
650
|
it { is_expected.to eq(true) }
|
671
651
|
|
672
652
|
context 'when reference allow null' do
|
@@ -682,22 +662,10 @@ describe Masamune::Schema::Column do
|
|
682
662
|
end
|
683
663
|
it { is_expected.to eq(false) }
|
684
664
|
end
|
685
|
-
end
|
686
|
-
|
687
|
-
context 'when natural_key' do
|
688
|
-
let(:column) { described_class.new id: 'name', type: :string, natural_key: true }
|
689
|
-
it { is_expected.to eq(true) }
|
690
665
|
|
691
|
-
context 'when reference
|
666
|
+
context 'when reference has default of false' do
|
692
667
|
before do
|
693
|
-
allow(column).to receive(:reference).and_return(double(null:
|
694
|
-
end
|
695
|
-
it { is_expected.to eq(false) }
|
696
|
-
end
|
697
|
-
|
698
|
-
context 'when reference has default' do
|
699
|
-
before do
|
700
|
-
allow(column).to receive(:reference).and_return(double(null: false, default: 'Unknown'))
|
668
|
+
allow(column).to receive(:reference).and_return(double(null: false, default: false))
|
701
669
|
end
|
702
670
|
it { is_expected.to eq(false) }
|
703
671
|
end
|
@@ -47,7 +47,9 @@ describe Masamune::Schema::Fact do
|
|
47
47
|
Masamune::Schema::TableReference.new(user_dimension)
|
48
48
|
],
|
49
49
|
columns: [
|
50
|
-
Masamune::Schema::Column.new(id: 'total', type: :integer)
|
50
|
+
Masamune::Schema::Column.new(id: 'total', type: :integer),
|
51
|
+
Masamune::Schema::Column.new(id: 'y', type: :integer, partition: true),
|
52
|
+
Masamune::Schema::Column.new(id: 'm', type: :integer, partition: true)
|
51
53
|
]
|
52
54
|
end
|
53
55
|
|
@@ -52,32 +52,6 @@ describe Masamune::Schema::Map do
|
|
52
52
|
column 'deleted_at', type: :timestamp, null: true
|
53
53
|
end
|
54
54
|
end
|
55
|
-
|
56
|
-
catalog.schema :hive do
|
57
|
-
event 'user' do
|
58
|
-
attribute 'id', type: :integer, immutable: true
|
59
|
-
attribute 'tenant_id', type: :integer, immutable: true
|
60
|
-
attribute 'admin', type: :boolean
|
61
|
-
attribute 'preferences', type: :json
|
62
|
-
end
|
63
|
-
|
64
|
-
dimension 'tenant', type: :two, implicit: true do
|
65
|
-
column 'tenant_id'
|
66
|
-
end
|
67
|
-
|
68
|
-
fact 'user' do
|
69
|
-
references :tenant
|
70
|
-
measure 'delta'
|
71
|
-
end
|
72
|
-
|
73
|
-
file 'user' do
|
74
|
-
column 'id', type: :integer
|
75
|
-
column 'tenant_id', type: :integer
|
76
|
-
column 'admin', type: :boolean
|
77
|
-
column 'preferences', type: :json
|
78
|
-
column 'deleted_at', type: :timestamp, null: true
|
79
|
-
end
|
80
|
-
end
|
81
55
|
end
|
82
56
|
|
83
57
|
context 'without source' do
|
@@ -136,22 +110,26 @@ describe Masamune::Schema::Map do
|
|
136
110
|
end
|
137
111
|
|
138
112
|
context 'with undefined function' do
|
139
|
-
let(:source) { catalog.hive.user_event }
|
140
|
-
let(:target) { catalog.hive.user_fact }
|
141
|
-
let(:source_data) { '' }
|
142
|
-
let(:target_data) { '' }
|
143
|
-
|
144
113
|
before do
|
145
|
-
catalog.schema :
|
146
|
-
|
114
|
+
catalog.schema :files do
|
115
|
+
file 'input'
|
116
|
+
file 'output'
|
117
|
+
|
118
|
+
map from: files.input , to: files.output do |row|
|
119
|
+
# Empty
|
147
120
|
end
|
148
121
|
end
|
149
122
|
end
|
150
123
|
|
124
|
+
let(:source) { catalog.files.input }
|
125
|
+
let(:target) { catalog.files.output }
|
126
|
+
let(:source_data) { '' }
|
127
|
+
let(:target_data) { '' }
|
128
|
+
|
151
129
|
it { expect { subject }.to raise_error ArgumentError, /function for map between .* does not return output for default input/ }
|
152
130
|
end
|
153
131
|
|
154
|
-
context 'from csv file to dimension' do
|
132
|
+
context 'from csv file to postgres dimension' do
|
155
133
|
before do
|
156
134
|
catalog.schema :files do
|
157
135
|
map from: postgres.user_file, to: postgres.user_dimension, distinct: true do |row|
|
@@ -161,7 +139,7 @@ describe Masamune::Schema::Map do
|
|
161
139
|
'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
|
162
140
|
'hr_user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
|
163
141
|
'admin' => row[:admin],
|
164
|
-
'
|
142
|
+
'preferences' => row[:preferences],
|
165
143
|
'source' => 'users_file',
|
166
144
|
'cluster_id' => 100
|
167
145
|
}
|
@@ -193,7 +171,7 @@ describe Masamune::Schema::Map do
|
|
193
171
|
|
194
172
|
let(:target_data) do
|
195
173
|
<<-EOS.strip_heredoc
|
196
|
-
tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,
|
174
|
+
tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences,source,cluster_id
|
197
175
|
30,1,active,active,FALSE,{},users_file,100
|
198
176
|
40,2,deleted,deleted,TRUE,"{""enabled"":true}",users_file,100
|
199
177
|
EOS
|
@@ -210,19 +188,26 @@ describe Masamune::Schema::Map do
|
|
210
188
|
it_behaves_like 'apply input/output'
|
211
189
|
end
|
212
190
|
|
213
|
-
context 'from
|
191
|
+
context 'from tsv file to postgres dimension' do
|
214
192
|
before do
|
215
193
|
catalog.schema :files do
|
216
|
-
|
194
|
+
file 'input', format: :tsv, headers: false do
|
195
|
+
column 'id', type: :integer
|
196
|
+
column 'tenant_id', type: :integer
|
197
|
+
column 'admin', type: :boolean
|
198
|
+
column 'preferences', type: :json
|
199
|
+
column 'deleted_at', type: :timestamp, null: true
|
200
|
+
end
|
201
|
+
|
202
|
+
map from: files.input, to: postgres.user_dimension do |row|
|
217
203
|
raise if row[:tenant_id] == 42
|
218
204
|
{
|
219
205
|
'tenant_id' => row[:tenant_id],
|
220
206
|
'user_id' => row[:id],
|
221
|
-
'user_account_state.name' => row[:
|
222
|
-
'admin' => row[:
|
223
|
-
'
|
224
|
-
'
|
225
|
-
'source' => 'user_event',
|
207
|
+
'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
|
208
|
+
'admin' => row[:admin],
|
209
|
+
'preferences' => row[:preferences],
|
210
|
+
'source' => 'user_file',
|
226
211
|
'cluster_id' => 100
|
227
212
|
}
|
228
213
|
end
|
@@ -230,7 +215,7 @@ describe Masamune::Schema::Map do
|
|
230
215
|
end
|
231
216
|
|
232
217
|
let(:source) do
|
233
|
-
catalog.
|
218
|
+
catalog.files.input
|
234
219
|
end
|
235
220
|
|
236
221
|
let(:target) do
|
@@ -242,72 +227,113 @@ describe Masamune::Schema::Map do
|
|
242
227
|
expect(environment.logger).to receive(:warn).with(/failed to parse '{.*}' for #{source.name}/).ordered
|
243
228
|
end
|
244
229
|
|
245
|
-
let(:source_data) do
|
246
|
-
<<-EOS.strip_heredoc
|
247
|
-
X user_create 1 30 0 \\N \\N \\N
|
248
|
-
# NOTE intentional duplicate record
|
249
|
-
X user_create 1 30 0 \\N \\N \\N
|
250
|
-
A user_create 1 42 0 \\N \\N \\N
|
251
|
-
Y user_delete 2 40 0 1 "{""enabled"":true}" \\N
|
252
|
-
# NOTE record is intentionally invalid
|
253
|
-
Z user_create 3 50 0 1 INVALID_JSON \\N
|
254
|
-
EOS
|
255
|
-
end
|
256
|
-
|
257
230
|
let(:target_data) do
|
258
231
|
<<-EOS.strip_heredoc
|
259
|
-
tenant_id,user_id,user_account_state_type_name,admin,
|
260
|
-
30,1,active,FALSE,{},
|
261
|
-
30,1,active,FALSE,{},
|
262
|
-
40,2,deleted,TRUE,"{""enabled"":true}",
|
232
|
+
tenant_id,user_id,user_account_state_type_name,admin,preferences,source,cluster_id
|
233
|
+
30,1,active,FALSE,{},user_file,100
|
234
|
+
30,1,active,FALSE,{},user_file,100
|
235
|
+
40,2,deleted,TRUE,"{""enabled"":true}",user_file,100
|
263
236
|
EOS
|
264
237
|
end
|
265
238
|
|
266
|
-
|
267
|
-
|
239
|
+
context 'with quoted json' do
|
240
|
+
let(:source_data) do
|
241
|
+
<<-EOS.strip_heredoc
|
242
|
+
1 30 0
|
243
|
+
# NOTE intentional duplicate record
|
244
|
+
1 30 0
|
245
|
+
1 42 0
|
246
|
+
2 40 1 "{""enabled"":true}" 2015-07-19 00:00:00
|
247
|
+
# NOTE record is intentionally invalid
|
248
|
+
3 50 0 INVALID_JSON
|
249
|
+
EOS
|
250
|
+
end
|
251
|
+
|
252
|
+
it 'should match target data' do
|
253
|
+
is_expected.to eq(target_data)
|
254
|
+
end
|
255
|
+
|
256
|
+
it_behaves_like 'apply input/output'
|
268
257
|
end
|
269
258
|
|
270
|
-
|
259
|
+
context 'with raw json' do
|
260
|
+
let(:source_data) do
|
261
|
+
<<-EOS.strip_heredoc
|
262
|
+
1 30 0
|
263
|
+
# NOTE intentional duplicate record
|
264
|
+
1 30 0
|
265
|
+
1 42 0
|
266
|
+
2 40 1 {"enabled":true} 2015-07-19 00:00:00
|
267
|
+
# NOTE record is intentionally invalid
|
268
|
+
3 50 0 INVALID_JSON
|
269
|
+
EOS
|
270
|
+
end
|
271
|
+
|
272
|
+
it 'should match target data' do
|
273
|
+
is_expected.to eq(target_data)
|
274
|
+
end
|
275
|
+
|
276
|
+
it_behaves_like 'apply input/output'
|
277
|
+
end
|
271
278
|
end
|
272
279
|
|
273
|
-
context 'from
|
280
|
+
context 'from tsv file to csv file' do
|
274
281
|
before do
|
275
282
|
catalog.schema :files do
|
276
|
-
|
283
|
+
file 'input', format: :tsv, headers: false do
|
284
|
+
column 'id', type: :integer
|
285
|
+
column 'tenant_id', type: :integer
|
286
|
+
column 'admin', type: :boolean
|
287
|
+
column 'preferences', type: :json
|
288
|
+
column 'deleted_at', type: :timestamp, null: true
|
289
|
+
end
|
290
|
+
|
291
|
+
file 'output', format: :csv, headers: true do
|
292
|
+
column 'id', type: :integer
|
293
|
+
column 'tenant_id', type: :integer
|
294
|
+
column 'admin', type: :boolean
|
295
|
+
column 'preferences', type: :yaml
|
296
|
+
column 'deleted_at', type: :timestamp, null: true
|
297
|
+
end
|
298
|
+
|
299
|
+
map from: files.input, to: files.output do |row|
|
277
300
|
{
|
278
301
|
'id' => row[:id],
|
279
302
|
'tenant_id' => row[:tenant_id],
|
280
|
-
'deleted_at' => row[:
|
281
|
-
'admin' => row[:
|
282
|
-
'preferences' => row[:
|
303
|
+
'deleted_at' => row[:deleted_at],
|
304
|
+
'admin' => row[:admin],
|
305
|
+
'preferences' => row[:preferences]
|
283
306
|
}
|
284
307
|
end
|
285
308
|
end
|
286
309
|
end
|
287
310
|
|
288
311
|
let(:source) do
|
289
|
-
catalog.
|
312
|
+
catalog.files.input
|
290
313
|
end
|
291
314
|
|
292
315
|
let(:target) do
|
293
|
-
catalog.
|
316
|
+
catalog.files.output
|
294
317
|
end
|
295
318
|
|
296
319
|
let(:source_data) do
|
297
320
|
<<-EOS.strip_heredoc
|
298
|
-
|
299
|
-
|
321
|
+
1 30 0
|
322
|
+
2 40 0 "{""enabled"":true}" 2014-02-26T18:15:51.000Z
|
300
323
|
EOS
|
301
324
|
end
|
302
325
|
|
303
326
|
let(:target_data) do
|
304
327
|
<<-EOS.strip_heredoc
|
305
|
-
|
306
|
-
|
328
|
+
id,tenant_id,deleted_at,admin,preferences
|
329
|
+
1,30,,FALSE,"--- {}
|
330
|
+
"
|
331
|
+
2,40,2014-02-26T18:15:51.000Z,FALSE,"---
|
332
|
+
enabled: true
|
333
|
+
"
|
307
334
|
EOS
|
308
335
|
end
|
309
336
|
|
310
|
-
|
311
337
|
it 'should match target data' do
|
312
338
|
is_expected.to eq(target_data)
|
313
339
|
end
|
@@ -315,37 +341,46 @@ describe Masamune::Schema::Map do
|
|
315
341
|
it_behaves_like 'apply input/output'
|
316
342
|
end
|
317
343
|
|
318
|
-
context 'from
|
344
|
+
context 'from csv file to tsv file' do
|
319
345
|
before do
|
320
346
|
catalog.schema :files do
|
321
|
-
|
347
|
+
file 'input', format: :csv, headers: true, json_encoding: :quoted do
|
348
|
+
column 'id', type: :integer
|
349
|
+
column 'tenant_id', type: :integer
|
350
|
+
column 'admin', type: :boolean
|
351
|
+
column 'preferences', type: :yaml
|
352
|
+
column 'deleted_at', type: :timestamp, null: true
|
353
|
+
end
|
354
|
+
|
355
|
+
file 'output', format: :tsv, headers: false do
|
356
|
+
column 'id', type: :integer
|
357
|
+
column 'tenant_id', type: :integer
|
358
|
+
column 'admin', type: :boolean
|
359
|
+
column 'preferences', type: :json
|
360
|
+
column 'deleted_at', type: :timestamp, null: true
|
361
|
+
end
|
362
|
+
|
363
|
+
map from: files.input, to: files.output do |row|
|
322
364
|
{
|
323
365
|
'id' => row[:id],
|
324
366
|
'tenant_id' => row[:tenant_id],
|
325
|
-
'deleted_at' => row[:
|
326
|
-
'admin' => row[:
|
327
|
-
'preferences' => row[:
|
367
|
+
'deleted_at' => row[:deleted_at],
|
368
|
+
'admin' => row[:admin],
|
369
|
+
'preferences' => row[:preferences]
|
328
370
|
}
|
329
371
|
end
|
330
372
|
end
|
331
373
|
end
|
332
374
|
|
333
375
|
let(:source) do
|
334
|
-
catalog.
|
376
|
+
catalog.files.input
|
335
377
|
end
|
336
378
|
|
337
379
|
let(:target) do
|
338
|
-
catalog.
|
380
|
+
catalog.files.output
|
339
381
|
end
|
340
382
|
|
341
383
|
let(:source_data) do
|
342
|
-
<<-EOS.strip_heredoc
|
343
|
-
X user_create 1 30 0 \\N \\N \\N 0 \\N
|
344
|
-
Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
|
345
|
-
EOS
|
346
|
-
end
|
347
|
-
|
348
|
-
let(:target_data) do
|
349
384
|
<<-EOS.strip_heredoc
|
350
385
|
id,tenant_id,deleted_at,admin,preferences
|
351
386
|
1,30,,FALSE,"--- {}
|
@@ -356,6 +391,13 @@ describe Masamune::Schema::Map do
|
|
356
391
|
EOS
|
357
392
|
end
|
358
393
|
|
394
|
+
let(:target_data) do
|
395
|
+
<<-EOS.strip_heredoc
|
396
|
+
1 30 FALSE {}
|
397
|
+
2 40 2014-02-26T18:15:51.000Z FALSE "{""enabled"":true}"
|
398
|
+
EOS
|
399
|
+
end
|
400
|
+
|
359
401
|
it 'should match target data' do
|
360
402
|
is_expected.to eq(target_data)
|
361
403
|
end
|
@@ -363,56 +405,44 @@ describe Masamune::Schema::Map do
|
|
363
405
|
it_behaves_like 'apply input/output'
|
364
406
|
end
|
365
407
|
|
366
|
-
context '
|
408
|
+
context 'with multiple outputs' do
|
367
409
|
before do
|
368
410
|
catalog.schema :files do
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
'delta' => 0,
|
380
|
-
'time_key' => row[:created_at]
|
381
|
-
}
|
382
|
-
]
|
383
|
-
else
|
384
|
-
{
|
385
|
-
'tenant.tenant_id' => row[:tenant_id],
|
386
|
-
'delta' => row[:type] =~ /create/ ? 1 : -1,
|
387
|
-
'time_key' => row[:created_at]
|
388
|
-
}
|
389
|
-
end
|
411
|
+
file 'input' do
|
412
|
+
column 'id', type: :integer
|
413
|
+
end
|
414
|
+
|
415
|
+
file 'output' do
|
416
|
+
column 'id', type: :integer
|
417
|
+
end
|
418
|
+
|
419
|
+
map from: files.input, to: files.output do |row|
|
420
|
+
[row, row]
|
390
421
|
end
|
391
422
|
end
|
392
423
|
end
|
393
424
|
|
394
425
|
let(:source) do
|
395
|
-
catalog.
|
426
|
+
catalog.files.input
|
396
427
|
end
|
397
428
|
|
398
429
|
let(:target) do
|
399
|
-
catalog.
|
430
|
+
catalog.files.output
|
400
431
|
end
|
401
432
|
|
402
433
|
let(:source_data) do
|
403
434
|
<<-EOS.strip_heredoc
|
404
|
-
|
405
|
-
|
406
|
-
Z user_delete 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:30:00Z
|
435
|
+
1
|
436
|
+
2
|
407
437
|
EOS
|
408
438
|
end
|
409
439
|
|
410
440
|
let(:target_data) do
|
411
441
|
<<-EOS.strip_heredoc
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
442
|
+
1
|
443
|
+
1
|
444
|
+
2
|
445
|
+
2
|
416
446
|
EOS
|
417
447
|
end
|
418
448
|
|
@@ -423,75 +453,40 @@ describe Masamune::Schema::Map do
|
|
423
453
|
it_behaves_like 'apply input/output'
|
424
454
|
end
|
425
455
|
|
426
|
-
context '
|
456
|
+
context 'without block' do
|
427
457
|
before do
|
428
|
-
catalog.
|
429
|
-
|
430
|
-
|
431
|
-
attribute 'id', type: :integer, immutable: true
|
432
|
-
attribute 'group_id', type: :integer, array: true
|
433
|
-
end
|
434
|
-
|
435
|
-
dimension 'group', type: :two, implicit: true do
|
436
|
-
column 'group_id'
|
458
|
+
catalog.schema :files do
|
459
|
+
file 'input' do
|
460
|
+
column 'id', type: :integer
|
437
461
|
end
|
438
462
|
|
439
|
-
|
440
|
-
|
441
|
-
column 'junk'
|
442
|
-
measure 'total'
|
463
|
+
file 'output' do
|
464
|
+
column 'id', type: :integer
|
443
465
|
end
|
444
466
|
|
445
|
-
map from:
|
446
|
-
result = []
|
447
|
-
(row[:group_id_now] - row[:group_id_was]).each do |group_id|
|
448
|
-
result <<
|
449
|
-
{
|
450
|
-
'group.group_id' => group_id,
|
451
|
-
'total' => 1,
|
452
|
-
'time_key' => row[:created_at]
|
453
|
-
}
|
454
|
-
end
|
455
|
-
(row[:group_id_was] - row[:group_id_now]).each do |group_id|
|
456
|
-
result <<
|
457
|
-
{
|
458
|
-
'group.group_id' => group_id,
|
459
|
-
'total' => -1,
|
460
|
-
'time_key' => row[:created_at]
|
461
|
-
}
|
462
|
-
end
|
463
|
-
result
|
464
|
-
end
|
467
|
+
map from: files.input, to: files.output
|
465
468
|
end
|
466
469
|
end
|
467
470
|
|
468
471
|
let(:source) do
|
469
|
-
catalog.
|
472
|
+
catalog.files.input
|
470
473
|
end
|
471
474
|
|
472
475
|
let(:target) do
|
473
|
-
catalog.
|
476
|
+
catalog.files.output
|
474
477
|
end
|
475
478
|
|
476
479
|
let(:source_data) do
|
477
480
|
<<-EOS.strip_heredoc
|
478
|
-
|
479
|
-
|
480
|
-
X user_create 3 [1,2] [] 0 2015-01-01T00:10:00Z
|
481
|
-
Y user_update 3 [1,2,3] [1,2] 1 2015-01-01T00:20:00Z
|
482
|
-
Y user_update 3 [1,2] [1,2,3] 1 2015-01-01T00:30:00Z
|
483
|
-
Z user_delete 3 [] [1,2] 0 2015-01-01T00:40:00Z
|
481
|
+
1
|
482
|
+
2
|
484
483
|
EOS
|
485
484
|
end
|
486
485
|
|
487
486
|
let(:target_data) do
|
488
487
|
<<-EOS.strip_heredoc
|
489
|
-
1
|
490
|
-
2
|
491
|
-
3 1 1420071600
|
492
|
-
3 -1 1420072200
|
493
|
-
1 -1 1420072800
|
494
|
-
2 -1 1420072800
|
488
|
+
1
|
489
|
+
2
|
495
490
|
EOS
|
496
491
|
end
|
497
492
|
|
@@ -502,44 +497,47 @@ describe Masamune::Schema::Map do
|
|
502
497
|
it_behaves_like 'apply input/output'
|
503
498
|
end
|
504
499
|
|
505
|
-
context 'from
|
500
|
+
context 'from file to table' do
|
506
501
|
before do
|
507
|
-
catalog.schema :
|
508
|
-
|
509
|
-
|
510
|
-
'tenant_id' => row[:tenant_id],
|
511
|
-
'user_id' => row[:id],
|
512
|
-
'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
|
513
|
-
'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
|
514
|
-
'preferences_now' => row[:preferences_now],
|
515
|
-
'preferences_was' => row[:preferences_was],
|
516
|
-
'source' => 'user_event',
|
517
|
-
'cluster_id' => 100
|
518
|
-
}
|
502
|
+
catalog.schema :postgres do
|
503
|
+
table 'parent' do
|
504
|
+
column 'id', type: :integer
|
519
505
|
end
|
506
|
+
|
507
|
+
file 'input', format: :csv, headers: false do
|
508
|
+
column 'parent.id', type: :integer
|
509
|
+
column 'id', type: :integer
|
510
|
+
end
|
511
|
+
|
512
|
+
table 'output' do
|
513
|
+
references :parent
|
514
|
+
column 'id', type: :integer
|
515
|
+
end
|
516
|
+
|
517
|
+
map from: postgres.input_file, to: postgres.output_table
|
520
518
|
end
|
521
519
|
end
|
522
520
|
|
523
521
|
let(:source) do
|
524
|
-
catalog.
|
522
|
+
catalog.postgres.input_file
|
525
523
|
end
|
526
524
|
|
527
525
|
let(:target) do
|
528
|
-
catalog.postgres.
|
526
|
+
catalog.postgres.output_table
|
529
527
|
end
|
530
528
|
|
531
529
|
let(:source_data) do
|
532
530
|
<<-EOS.strip_heredoc
|
533
|
-
|
534
|
-
|
531
|
+
10,1
|
532
|
+
10,2
|
535
533
|
EOS
|
536
534
|
end
|
537
535
|
|
538
536
|
let(:target_data) do
|
539
537
|
<<-EOS.strip_heredoc
|
540
|
-
|
541
|
-
|
542
|
-
|
538
|
+
parent_table_id,id
|
539
|
+
10,1
|
540
|
+
10,2
|
543
541
|
EOS
|
544
542
|
end
|
545
543
|
|
@@ -566,7 +564,7 @@ describe Masamune::Schema::Map do
|
|
566
564
|
it { is_expected.to eq(%Q{"{}","{}"}) }
|
567
565
|
end
|
568
566
|
|
569
|
-
context 'with
|
567
|
+
context 'with quoted empty json' do
|
570
568
|
before do
|
571
569
|
io.write '"{}","{}"'
|
572
570
|
io.rewind
|