masamune 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +15 -0
- data/bin/masamune-elastic-mapreduce +4 -0
- data/bin/masamune-hive +4 -0
- data/bin/masamune-psql +4 -0
- data/bin/masamune-shell +4 -0
- data/lib/masamune.rb +56 -0
- data/lib/masamune/accumulate.rb +60 -0
- data/lib/masamune/actions.rb +38 -0
- data/lib/masamune/actions/data_flow.rb +131 -0
- data/lib/masamune/actions/date_parse.rb +75 -0
- data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
- data/lib/masamune/actions/execute.rb +52 -0
- data/lib/masamune/actions/filesystem.rb +37 -0
- data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
- data/lib/masamune/actions/hadoop_streaming.rb +41 -0
- data/lib/masamune/actions/hive.rb +74 -0
- data/lib/masamune/actions/postgres.rb +76 -0
- data/lib/masamune/actions/postgres_admin.rb +34 -0
- data/lib/masamune/actions/s3cmd.rb +44 -0
- data/lib/masamune/actions/transform.rb +89 -0
- data/lib/masamune/after_initialize_callbacks.rb +55 -0
- data/lib/masamune/cached_filesystem.rb +110 -0
- data/lib/masamune/commands.rb +37 -0
- data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
- data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
- data/lib/masamune/commands/hadoop_streaming.rb +116 -0
- data/lib/masamune/commands/hive.rb +178 -0
- data/lib/masamune/commands/interactive.rb +37 -0
- data/lib/masamune/commands/postgres.rb +128 -0
- data/lib/masamune/commands/postgres_admin.rb +72 -0
- data/lib/masamune/commands/postgres_common.rb +33 -0
- data/lib/masamune/commands/retry_with_backoff.rb +60 -0
- data/lib/masamune/commands/s3cmd.rb +70 -0
- data/lib/masamune/commands/shell.rb +202 -0
- data/lib/masamune/configuration.rb +195 -0
- data/lib/masamune/data_plan.rb +31 -0
- data/lib/masamune/data_plan/builder.rb +66 -0
- data/lib/masamune/data_plan/elem.rb +190 -0
- data/lib/masamune/data_plan/engine.rb +162 -0
- data/lib/masamune/data_plan/rule.rb +292 -0
- data/lib/masamune/data_plan/set.rb +176 -0
- data/lib/masamune/environment.rb +164 -0
- data/lib/masamune/filesystem.rb +567 -0
- data/lib/masamune/has_environment.rb +40 -0
- data/lib/masamune/helpers.rb +27 -0
- data/lib/masamune/helpers/postgres.rb +84 -0
- data/lib/masamune/io.rb +33 -0
- data/lib/masamune/last_element.rb +53 -0
- data/lib/masamune/method_logger.rb +41 -0
- data/lib/masamune/multi_io.rb +39 -0
- data/lib/masamune/schema.rb +36 -0
- data/lib/masamune/schema/catalog.rb +233 -0
- data/lib/masamune/schema/column.rb +527 -0
- data/lib/masamune/schema/dimension.rb +133 -0
- data/lib/masamune/schema/event.rb +121 -0
- data/lib/masamune/schema/fact.rb +133 -0
- data/lib/masamune/schema/map.rb +265 -0
- data/lib/masamune/schema/row.rb +133 -0
- data/lib/masamune/schema/store.rb +115 -0
- data/lib/masamune/schema/table.rb +308 -0
- data/lib/masamune/schema/table_reference.rb +76 -0
- data/lib/masamune/spec_helper.rb +23 -0
- data/lib/masamune/string_format.rb +34 -0
- data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
- data/lib/masamune/tasks/hive_thor.rb +55 -0
- data/lib/masamune/tasks/postgres_thor.rb +47 -0
- data/lib/masamune/tasks/shell_thor.rb +63 -0
- data/lib/masamune/template.rb +77 -0
- data/lib/masamune/thor.rb +186 -0
- data/lib/masamune/thor_loader.rb +38 -0
- data/lib/masamune/topological_hash.rb +34 -0
- data/lib/masamune/transform.rb +47 -0
- data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
- data/lib/masamune/transform/bulk_upsert.rb +52 -0
- data/lib/masamune/transform/consolidate_dimension.rb +54 -0
- data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
- data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
- data/lib/masamune/transform/define_event_view.hql.erb +51 -0
- data/lib/masamune/transform/define_event_view.rb +60 -0
- data/lib/masamune/transform/define_index.psql.erb +34 -0
- data/lib/masamune/transform/define_schema.hql.erb +23 -0
- data/lib/masamune/transform/define_schema.psql.erb +79 -0
- data/lib/masamune/transform/define_schema.rb +56 -0
- data/lib/masamune/transform/define_table.hql.erb +34 -0
- data/lib/masamune/transform/define_table.psql.erb +95 -0
- data/lib/masamune/transform/define_table.rb +40 -0
- data/lib/masamune/transform/define_unique.psql.erb +30 -0
- data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
- data/lib/masamune/transform/insert_reference_values.rb +64 -0
- data/lib/masamune/transform/load_dimension.rb +47 -0
- data/lib/masamune/transform/load_fact.rb +45 -0
- data/lib/masamune/transform/operator.rb +96 -0
- data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
- data/lib/masamune/transform/relabel_dimension.rb +39 -0
- data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
- data/lib/masamune/transform/rollup_fact.rb +149 -0
- data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
- data/lib/masamune/transform/snapshot_dimension.rb +74 -0
- data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
- data/lib/masamune/transform/stage_dimension.rb +83 -0
- data/lib/masamune/transform/stage_fact.psql.erb +80 -0
- data/lib/masamune/transform/stage_fact.rb +111 -0
- data/lib/masamune/version.rb +25 -0
- data/spec/fixtures/aggregate.sql.erb +25 -0
- data/spec/fixtures/comment.sql.erb +27 -0
- data/spec/fixtures/invalid.sql.erb +23 -0
- data/spec/fixtures/relative.sql.erb +23 -0
- data/spec/fixtures/simple.sql.erb +28 -0
- data/spec/fixtures/whitespace.sql.erb +30 -0
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
- data/spec/masamune/actions/execute_spec.rb +50 -0
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
- data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
- data/spec/masamune/actions/hive_spec.rb +117 -0
- data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
- data/spec/masamune/actions/postgres_spec.rb +134 -0
- data/spec/masamune/actions/s3cmd_spec.rb +44 -0
- data/spec/masamune/actions/transform_spec.rb +144 -0
- data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
- data/spec/masamune/cached_filesystem_spec.rb +167 -0
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
- data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
- data/spec/masamune/commands/hive_spec.rb +117 -0
- data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
- data/spec/masamune/commands/postgres_spec.rb +100 -0
- data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
- data/spec/masamune/commands/s3cmd_spec.rb +50 -0
- data/spec/masamune/commands/shell_spec.rb +101 -0
- data/spec/masamune/configuration_spec.rb +102 -0
- data/spec/masamune/data_plan/builder_spec.rb +91 -0
- data/spec/masamune/data_plan/elem_spec.rb +102 -0
- data/spec/masamune/data_plan/engine_spec.rb +356 -0
- data/spec/masamune/data_plan/rule_spec.rb +407 -0
- data/spec/masamune/data_plan/set_spec.rb +517 -0
- data/spec/masamune/environment_spec.rb +65 -0
- data/spec/masamune/filesystem_spec.rb +1421 -0
- data/spec/masamune/helpers/postgres_spec.rb +95 -0
- data/spec/masamune/schema/catalog_spec.rb +613 -0
- data/spec/masamune/schema/column_spec.rb +696 -0
- data/spec/masamune/schema/dimension_spec.rb +137 -0
- data/spec/masamune/schema/event_spec.rb +75 -0
- data/spec/masamune/schema/fact_spec.rb +117 -0
- data/spec/masamune/schema/map_spec.rb +593 -0
- data/spec/masamune/schema/row_spec.rb +28 -0
- data/spec/masamune/schema/store_spec.rb +49 -0
- data/spec/masamune/schema/table_spec.rb +395 -0
- data/spec/masamune/string_format_spec.rb +60 -0
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
- data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
- data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
- data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
- data/spec/masamune/template_spec.rb +77 -0
- data/spec/masamune/thor_spec.rb +238 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
- data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
- data/spec/masamune/transform/define_event_view_spec.rb +84 -0
- data/spec/masamune/transform/define_schema_spec.rb +83 -0
- data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
- data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
- data/spec/masamune/transform/define_table.table_spec.rb +525 -0
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
- data/spec/masamune/transform/load_dimension_spec.rb +76 -0
- data/spec/masamune/transform/load_fact_spec.rb +89 -0
- data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
- data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
- data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
- data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
- data/spec/masamune/transform/stage_fact_spec.rb +204 -0
- data/spec/masamune_spec.rb +32 -0
- data/spec/spec_helper.rb +41 -0
- data/spec/support/masamune/example_group.rb +36 -0
- data/spec/support/masamune/mock_command.rb +99 -0
- data/spec/support/masamune/mock_delegate.rb +51 -0
- data/spec/support/masamune/mock_filesystem.rb +96 -0
- data/spec/support/masamune/thor_mute.rb +35 -0
- data/spec/support/rspec/example/action_example_group.rb +34 -0
- data/spec/support/rspec/example/task_example_group.rb +80 -0
- data/spec/support/rspec/example/transform_example_group.rb +36 -0
- data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
- metadata +462 -0
@@ -0,0 +1,306 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'spec_helper'
|
24
|
+
|
25
|
+
describe Masamune::Transform::DefineTable do
|
26
|
+
subject { transform.define_table(table).to_s }
|
27
|
+
|
28
|
+
context 'for hive implicit dimension' do
|
29
|
+
before do
|
30
|
+
catalog.schema :hive do
|
31
|
+
dimension 'user', implicit: true do
|
32
|
+
column 'user_id', natural_key: true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
let(:table) { catalog.hive.user_dimension }
|
38
|
+
|
39
|
+
it 'should not render table template' do
|
40
|
+
is_expected.to eq ''
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'for postgres dimension type: one' do
|
45
|
+
before do
|
46
|
+
catalog.schema :postgres do
|
47
|
+
dimension 'user', type: :one do
|
48
|
+
column 'tenant_id'
|
49
|
+
column 'user_id'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
let(:table) { catalog.postgres.user_dimension }
|
55
|
+
|
56
|
+
it 'should render table template' do
|
57
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
58
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
59
|
+
(
|
60
|
+
id SERIAL PRIMARY KEY,
|
61
|
+
tenant_id INTEGER NOT NULL,
|
62
|
+
user_id INTEGER NOT NULL,
|
63
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
64
|
+
);
|
65
|
+
EOS
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context 'for postgres dimension type: two' do
|
70
|
+
before do
|
71
|
+
catalog.schema :postgres do
|
72
|
+
dimension 'user', type: :two do
|
73
|
+
column 'tenant_id', index: true, natural_key: true
|
74
|
+
column 'user_id', index: true, natural_key: true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
let(:table) { catalog.postgres.user_dimension }
|
80
|
+
|
81
|
+
it 'should render table template' do
|
82
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
83
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
84
|
+
(
|
85
|
+
id SERIAL PRIMARY KEY,
|
86
|
+
tenant_id INTEGER NOT NULL,
|
87
|
+
user_id INTEGER NOT NULL,
|
88
|
+
start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
|
89
|
+
end_at TIMESTAMP,
|
90
|
+
version INTEGER DEFAULT 1,
|
91
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
92
|
+
);
|
93
|
+
|
94
|
+
DO $$ BEGIN
|
95
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
|
96
|
+
ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
|
97
|
+
END IF; END $$;
|
98
|
+
|
99
|
+
DO $$ BEGIN
|
100
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
|
101
|
+
CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
|
102
|
+
END IF; END $$;
|
103
|
+
|
104
|
+
DO $$ BEGIN
|
105
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
|
106
|
+
CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
|
107
|
+
END IF; END $$;
|
108
|
+
|
109
|
+
DO $$ BEGIN
|
110
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
|
111
|
+
CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
|
112
|
+
END IF; END $$;
|
113
|
+
|
114
|
+
DO $$ BEGIN
|
115
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
|
116
|
+
CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
|
117
|
+
END IF; END $$;
|
118
|
+
|
119
|
+
DO $$ BEGIN
|
120
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
|
121
|
+
CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
|
122
|
+
END IF; END $$;
|
123
|
+
EOS
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'for postgres dimension type: four' do
|
128
|
+
before do
|
129
|
+
catalog.schema :postgres do
|
130
|
+
dimension 'cluster', type: :mini do
|
131
|
+
column 'id', type: :integer, surrogate_key: true, auto: true
|
132
|
+
column 'name', type: :string, unique: true
|
133
|
+
row name: 'default', attributes: {default: true}
|
134
|
+
end
|
135
|
+
|
136
|
+
dimension 'user_account_state', type: :mini do
|
137
|
+
column 'name', type: :string, unique: true
|
138
|
+
column 'description', type: :string
|
139
|
+
row name: 'active', description: 'Active', attributes: {default: true}
|
140
|
+
end
|
141
|
+
|
142
|
+
dimension 'user', type: :four do
|
143
|
+
references :cluster
|
144
|
+
references :user_account_state
|
145
|
+
column 'tenant_id', index: true, natural_key: true
|
146
|
+
column 'user_id', index: true, natural_key: true
|
147
|
+
column 'preferences', type: :key_value, null: true
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
let(:table) { catalog.postgres.user_dimension }
|
153
|
+
|
154
|
+
it 'should render table template' do
|
155
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
156
|
+
CREATE TABLE IF NOT EXISTS user_dimension_ledger
|
157
|
+
(
|
158
|
+
id SERIAL PRIMARY KEY,
|
159
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
160
|
+
user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
|
161
|
+
tenant_id INTEGER NOT NULL,
|
162
|
+
user_id INTEGER NOT NULL,
|
163
|
+
preferences_now HSTORE,
|
164
|
+
preferences_was HSTORE,
|
165
|
+
source_kind VARCHAR NOT NULL,
|
166
|
+
source_uuid VARCHAR NOT NULL,
|
167
|
+
start_at TIMESTAMP NOT NULL,
|
168
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
169
|
+
delta INTEGER NOT NULL
|
170
|
+
);
|
171
|
+
|
172
|
+
DO $$ BEGIN
|
173
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_370d6dd_key') THEN
|
174
|
+
ALTER TABLE user_dimension_ledger ADD CONSTRAINT user_dimension_ledger_370d6dd_key UNIQUE(tenant_id, user_id, source_kind, source_uuid, start_at);
|
175
|
+
END IF; END $$;
|
176
|
+
|
177
|
+
DO $$ BEGIN
|
178
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_d6b9b38_index') THEN
|
179
|
+
CREATE INDEX user_dimension_ledger_d6b9b38_index ON user_dimension_ledger (cluster_type_id);
|
180
|
+
END IF; END $$;
|
181
|
+
|
182
|
+
DO $$ BEGIN
|
183
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_7988187_index') THEN
|
184
|
+
CREATE INDEX user_dimension_ledger_7988187_index ON user_dimension_ledger (user_account_state_type_id);
|
185
|
+
END IF; END $$;
|
186
|
+
|
187
|
+
DO $$ BEGIN
|
188
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_3854361_index') THEN
|
189
|
+
CREATE INDEX user_dimension_ledger_3854361_index ON user_dimension_ledger (tenant_id);
|
190
|
+
END IF; END $$;
|
191
|
+
|
192
|
+
DO $$ BEGIN
|
193
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_e8701ad_index') THEN
|
194
|
+
CREATE INDEX user_dimension_ledger_e8701ad_index ON user_dimension_ledger (user_id);
|
195
|
+
END IF; END $$;
|
196
|
+
|
197
|
+
DO $$ BEGIN
|
198
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_23563d3_index') THEN
|
199
|
+
CREATE INDEX user_dimension_ledger_23563d3_index ON user_dimension_ledger (start_at);
|
200
|
+
END IF; END $$;
|
201
|
+
|
202
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
203
|
+
(
|
204
|
+
id SERIAL PRIMARY KEY,
|
205
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
206
|
+
user_account_state_type_id INTEGER NOT NULL REFERENCES user_account_state_type(id) DEFAULT default_user_account_state_type_id(),
|
207
|
+
tenant_id INTEGER NOT NULL,
|
208
|
+
user_id INTEGER NOT NULL,
|
209
|
+
preferences HSTORE,
|
210
|
+
parent_id INTEGER REFERENCES user_dimension_ledger(id),
|
211
|
+
record_id INTEGER REFERENCES user_dimension_ledger(id),
|
212
|
+
start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
|
213
|
+
end_at TIMESTAMP,
|
214
|
+
version INTEGER DEFAULT 1,
|
215
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
216
|
+
);
|
217
|
+
|
218
|
+
DO $$ BEGIN
|
219
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
|
220
|
+
ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
|
221
|
+
END IF; END $$;
|
222
|
+
|
223
|
+
DO $$ BEGIN
|
224
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_d6b9b38_index') THEN
|
225
|
+
CREATE INDEX user_dimension_d6b9b38_index ON user_dimension (cluster_type_id);
|
226
|
+
END IF; END $$;
|
227
|
+
|
228
|
+
DO $$ BEGIN
|
229
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_7988187_index') THEN
|
230
|
+
CREATE INDEX user_dimension_7988187_index ON user_dimension (user_account_state_type_id);
|
231
|
+
END IF; END $$;
|
232
|
+
|
233
|
+
DO $$ BEGIN
|
234
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
|
235
|
+
CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
|
236
|
+
END IF; END $$;
|
237
|
+
|
238
|
+
DO $$ BEGIN
|
239
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
|
240
|
+
CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
|
241
|
+
END IF; END $$;
|
242
|
+
|
243
|
+
DO $$ BEGIN
|
244
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
|
245
|
+
CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
|
246
|
+
END IF; END $$;
|
247
|
+
|
248
|
+
DO $$ BEGIN
|
249
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
|
250
|
+
CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
|
251
|
+
END IF; END $$;
|
252
|
+
|
253
|
+
DO $$ BEGIN
|
254
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
|
255
|
+
CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
|
256
|
+
END IF; END $$;
|
257
|
+
EOS
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
context 'for postgres dimension type: four stage table' do
|
262
|
+
before do
|
263
|
+
catalog.schema :postgres do
|
264
|
+
dimension 'user_account_state', type: :mini do
|
265
|
+
column 'name', type: :string, unique: true
|
266
|
+
column 'description', type: :string
|
267
|
+
row name: 'active', description: 'Active', attributes: {default: true}
|
268
|
+
end
|
269
|
+
|
270
|
+
dimension 'user', type: :four do
|
271
|
+
references :user_account_state
|
272
|
+
column 'tenant_id', index: true, natural_key: true
|
273
|
+
column 'user_id', index: true, natural_key: true
|
274
|
+
column 'preferences', type: :key_value, null: true
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
let(:table) { catalog.postgres.user_dimension.stage_table(suffix: 'consolidated_forward') }
|
280
|
+
|
281
|
+
it 'should render table template' do
|
282
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
283
|
+
CREATE TEMPORARY TABLE IF NOT EXISTS user_consolidated_forward_dimension_stage
|
284
|
+
(
|
285
|
+
user_account_state_type_id INTEGER DEFAULT default_user_account_state_type_id(),
|
286
|
+
tenant_id INTEGER,
|
287
|
+
user_id INTEGER,
|
288
|
+
preferences HSTORE,
|
289
|
+
parent_id INTEGER,
|
290
|
+
record_id INTEGER,
|
291
|
+
start_at TIMESTAMP DEFAULT TO_TIMESTAMP(0),
|
292
|
+
end_at TIMESTAMP,
|
293
|
+
version INTEGER DEFAULT 1,
|
294
|
+
last_modified_at TIMESTAMP DEFAULT NOW()
|
295
|
+
);
|
296
|
+
|
297
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_7988187_index ON user_consolidated_forward_dimension_stage (user_account_state_type_id);
|
298
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_3854361_index ON user_consolidated_forward_dimension_stage (tenant_id);
|
299
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_e8701ad_index ON user_consolidated_forward_dimension_stage (user_id);
|
300
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_23563d3_index ON user_consolidated_forward_dimension_stage (start_at);
|
301
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_2c8e908_index ON user_consolidated_forward_dimension_stage (end_at);
|
302
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_2af72f1_index ON user_consolidated_forward_dimension_stage (version);
|
303
|
+
EOS
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
@@ -0,0 +1,291 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'spec_helper'
|
24
|
+
|
25
|
+
describe Masamune::Transform::DefineTable do
|
26
|
+
before do
|
27
|
+
catalog.schema :postgres do
|
28
|
+
dimension 'cluster', type: :mini do
|
29
|
+
column 'id', type: :sequence, surrogate_key: true, auto: true
|
30
|
+
column 'name', type: :string
|
31
|
+
|
32
|
+
row name: 'current_database()', attributes: {default: true}
|
33
|
+
end
|
34
|
+
|
35
|
+
dimension 'date', type: :date do
|
36
|
+
column 'date_id', type: :integer, unique: true, index: true, natural_key: true
|
37
|
+
end
|
38
|
+
|
39
|
+
dimension 'user_agent', type: :mini do
|
40
|
+
column 'name', type: :string, unique: true, index: 'shared'
|
41
|
+
column 'version', type: :string, unique: true, index: 'shared', default: 'Unknown'
|
42
|
+
column 'description', type: :string, null: true, ignore: true
|
43
|
+
end
|
44
|
+
|
45
|
+
dimension 'feature', type: :mini do
|
46
|
+
column 'name', type: :string, unique: true, index: true
|
47
|
+
end
|
48
|
+
|
49
|
+
dimension 'tenant', type: :two do
|
50
|
+
column 'tenant_id', type: :integer, index: true, natural_key: true
|
51
|
+
end
|
52
|
+
|
53
|
+
dimension 'user', type: :two do
|
54
|
+
column 'tenant_id', type: :integer, index: true, natural_key: true
|
55
|
+
column 'user_id', type: :integer, index: true, natural_key: true
|
56
|
+
end
|
57
|
+
|
58
|
+
dimension 'group', type: :two do
|
59
|
+
column 'group_id', type: :integer, natural_key: true
|
60
|
+
end
|
61
|
+
|
62
|
+
fact 'visits', partition: 'y%Ym%m' do
|
63
|
+
references :cluster
|
64
|
+
references :date
|
65
|
+
references :tenant
|
66
|
+
references :user
|
67
|
+
references :group, multiple: true
|
68
|
+
references :user_agent, insert: true
|
69
|
+
references :feature, insert: true
|
70
|
+
measure 'total', type: :integer
|
71
|
+
end
|
72
|
+
|
73
|
+
file 'visits' do
|
74
|
+
column 'date.date_id', type: :integer
|
75
|
+
column 'tenant.tenant_id', type: :integer
|
76
|
+
column 'user.user_id', type: :integer
|
77
|
+
column 'user_agent.name', type: :string
|
78
|
+
column 'user_agent.version', type: :string
|
79
|
+
column 'feature.name', type: :string
|
80
|
+
column 'time_key', type: :integer
|
81
|
+
column 'total', type: :integer
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
catalog.schema :hive do
|
86
|
+
dimension 'date', type: :date, implicit: true do
|
87
|
+
column 'date_id', type: :integer, natural_key: true
|
88
|
+
end
|
89
|
+
|
90
|
+
dimension 'user', type: :two, implicit: true do
|
91
|
+
column 'user_id', type: :integer, natural_key: true
|
92
|
+
end
|
93
|
+
|
94
|
+
dimension 'group', type: :two, implicit: true do
|
95
|
+
column 'group_id', type: :integer, natural_key: true
|
96
|
+
end
|
97
|
+
|
98
|
+
dimension 'user_agent', type: :mini do
|
99
|
+
column 'name', type: :string
|
100
|
+
column 'version', type: :string
|
101
|
+
column 'description', type: :string, ignore: true
|
102
|
+
end
|
103
|
+
|
104
|
+
fact 'visits', grain: :hourly do
|
105
|
+
partition :y
|
106
|
+
partition :m
|
107
|
+
partition :d
|
108
|
+
references :date
|
109
|
+
references :user
|
110
|
+
references :group, multiple: true
|
111
|
+
references :user_agent, denormalize: true
|
112
|
+
measure 'total'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'for postgres fact' do
|
118
|
+
let(:target) { catalog.postgres.visits_fact }
|
119
|
+
|
120
|
+
subject(:result) { transform.define_table(target).to_s }
|
121
|
+
|
122
|
+
it 'should eq render table template' do
|
123
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
124
|
+
CREATE TABLE IF NOT EXISTS visits_fact
|
125
|
+
(
|
126
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
127
|
+
date_dimension_id INTEGER NOT NULL REFERENCES date_dimension(id),
|
128
|
+
tenant_dimension_id INTEGER NOT NULL REFERENCES tenant_dimension(id),
|
129
|
+
user_dimension_id INTEGER NOT NULL REFERENCES user_dimension(id),
|
130
|
+
group_dimension_id INTEGER[] NOT NULL,
|
131
|
+
user_agent_type_id INTEGER NOT NULL REFERENCES user_agent_type(id),
|
132
|
+
feature_type_id INTEGER NOT NULL REFERENCES feature_type(id),
|
133
|
+
total INTEGER NOT NULL,
|
134
|
+
time_key INTEGER NOT NULL,
|
135
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
136
|
+
);
|
137
|
+
|
138
|
+
DO $$ BEGIN
|
139
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d6b9b38_index') THEN
|
140
|
+
CREATE INDEX visits_fact_d6b9b38_index ON visits_fact (cluster_type_id);
|
141
|
+
END IF; END $$;
|
142
|
+
|
143
|
+
DO $$ BEGIN
|
144
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_0a531a8_index') THEN
|
145
|
+
CREATE INDEX visits_fact_0a531a8_index ON visits_fact (date_dimension_id);
|
146
|
+
END IF; END $$;
|
147
|
+
|
148
|
+
DO $$ BEGIN
|
149
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d3950d9_index') THEN
|
150
|
+
CREATE INDEX visits_fact_d3950d9_index ON visits_fact (tenant_dimension_id);
|
151
|
+
END IF; END $$;
|
152
|
+
|
153
|
+
DO $$ BEGIN
|
154
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_39f0fdd_index') THEN
|
155
|
+
CREATE INDEX visits_fact_39f0fdd_index ON visits_fact (user_dimension_id);
|
156
|
+
END IF; END $$;
|
157
|
+
|
158
|
+
DO $$ BEGIN
|
159
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_e0d2a9e_index') THEN
|
160
|
+
CREATE INDEX visits_fact_e0d2a9e_index ON visits_fact (group_dimension_id);
|
161
|
+
END IF; END $$;
|
162
|
+
|
163
|
+
DO $$ BEGIN
|
164
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d8b1c3e_index') THEN
|
165
|
+
CREATE INDEX visits_fact_d8b1c3e_index ON visits_fact (user_agent_type_id);
|
166
|
+
END IF; END $$;
|
167
|
+
|
168
|
+
DO $$ BEGIN
|
169
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_33b68fd_index') THEN
|
170
|
+
CREATE INDEX visits_fact_33b68fd_index ON visits_fact (feature_type_id);
|
171
|
+
END IF; END $$;
|
172
|
+
|
173
|
+
DO $$ BEGIN
|
174
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
|
175
|
+
CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
|
176
|
+
END IF; END $$;
|
177
|
+
EOS
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
describe 'for fact table from file with sources files' do
|
182
|
+
let(:files) { (1..3).map { |i| double(path: "output_#{i}.csv") } }
|
183
|
+
let(:target) { catalog.postgres.visits_fact }
|
184
|
+
let(:source) { catalog.postgres.visits_file }
|
185
|
+
|
186
|
+
subject(:result) { transform.define_table(source.stage_table(suffix: 'file', table: target, inherit: false), files).to_s }
|
187
|
+
|
188
|
+
it 'should eq render table template' do
|
189
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
190
|
+
CREATE TEMPORARY TABLE IF NOT EXISTS visits_file_fact_stage
|
191
|
+
(
|
192
|
+
date_dimension_date_id INTEGER,
|
193
|
+
tenant_dimension_tenant_id INTEGER,
|
194
|
+
user_dimension_user_id INTEGER,
|
195
|
+
user_agent_type_name VARCHAR,
|
196
|
+
user_agent_type_version VARCHAR,
|
197
|
+
feature_type_name VARCHAR,
|
198
|
+
time_key INTEGER,
|
199
|
+
total INTEGER
|
200
|
+
);
|
201
|
+
|
202
|
+
COPY visits_file_fact_stage FROM 'output_1.csv' WITH (FORMAT 'csv', HEADER true);
|
203
|
+
COPY visits_file_fact_stage FROM 'output_2.csv' WITH (FORMAT 'csv', HEADER true);
|
204
|
+
COPY visits_file_fact_stage FROM 'output_3.csv' WITH (FORMAT 'csv', HEADER true);
|
205
|
+
|
206
|
+
CREATE INDEX visits_file_fact_stage_964dac1_index ON visits_file_fact_stage (date_dimension_date_id);
|
207
|
+
CREATE INDEX visits_file_fact_stage_90fc13c_index ON visits_file_fact_stage (tenant_dimension_tenant_id);
|
208
|
+
CREATE INDEX visits_file_fact_stage_30f3cca_index ON visits_file_fact_stage (user_dimension_user_id);
|
209
|
+
CREATE INDEX visits_file_fact_stage_99c433b_index ON visits_file_fact_stage (user_agent_type_name);
|
210
|
+
CREATE INDEX visits_file_fact_stage_d5d236f_index ON visits_file_fact_stage (user_agent_type_version);
|
211
|
+
CREATE INDEX visits_file_fact_stage_5a187ed_index ON visits_file_fact_stage (feature_type_name);
|
212
|
+
CREATE INDEX visits_file_fact_stage_6444ed3_index ON visits_file_fact_stage (time_key);
|
213
|
+
EOS
|
214
|
+
end
|
215
|
+
|
216
|
+
context 'with file' do
|
217
|
+
subject(:result) { transform.define_table(source.stage_table(table: target), files.first).to_s }
|
218
|
+
it 'should eq render table template' do
|
219
|
+
is_expected.to_not be_nil
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
context 'with Set' do
|
224
|
+
subject(:result) { transform.define_table(source.stage_table(table: target), Set.new(files)).to_s }
|
225
|
+
it 'should eq render table template' do
|
226
|
+
is_expected.to_not be_nil
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
context 'for postgres fact with degenerate reference' do
|
232
|
+
before do
|
233
|
+
catalog.clear!
|
234
|
+
catalog.schema :postgres do
|
235
|
+
fact 'visits' do
|
236
|
+
references :message_kind, degenerate: true
|
237
|
+
measure 'count', aggregate: :sum
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
let(:target) { catalog.postgres.visits_fact }
|
243
|
+
|
244
|
+
subject(:result) { transform.define_table(target).to_s }
|
245
|
+
|
246
|
+
it 'should eq render table template' do
|
247
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
248
|
+
CREATE TABLE IF NOT EXISTS visits_fact
|
249
|
+
(
|
250
|
+
message_kind_type_id INTEGER,
|
251
|
+
count INTEGER NOT NULL,
|
252
|
+
time_key INTEGER NOT NULL,
|
253
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
254
|
+
);
|
255
|
+
|
256
|
+
DO $$ BEGIN
|
257
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_2a6313d_index') THEN
|
258
|
+
CREATE INDEX visits_fact_2a6313d_index ON visits_fact (message_kind_type_id);
|
259
|
+
END IF; END $$;
|
260
|
+
|
261
|
+
DO $$ BEGIN
|
262
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
|
263
|
+
CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
|
264
|
+
END IF; END $$;
|
265
|
+
EOS
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
context 'for hive fact' do
|
270
|
+
let(:target) { catalog.hive.visits_hourly_fact }
|
271
|
+
|
272
|
+
subject(:result) { transform.define_table(target).to_s }
|
273
|
+
|
274
|
+
it 'should eq render table template' do
|
275
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
276
|
+
CREATE TABLE IF NOT EXISTS visits_hourly_fact
|
277
|
+
(
|
278
|
+
date_dimension_date_id INT,
|
279
|
+
user_dimension_user_id INT,
|
280
|
+
group_dimension_group_id ARRAY<INT>,
|
281
|
+
user_agent_type_name STRING,
|
282
|
+
user_agent_type_version STRING,
|
283
|
+
total INT,
|
284
|
+
time_key INT
|
285
|
+
)
|
286
|
+
PARTITIONED BY (y INT, m INT, d INT)
|
287
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
288
|
+
EOS
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|