masamune 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +15 -0
- data/bin/masamune-elastic-mapreduce +4 -0
- data/bin/masamune-hive +4 -0
- data/bin/masamune-psql +4 -0
- data/bin/masamune-shell +4 -0
- data/lib/masamune.rb +56 -0
- data/lib/masamune/accumulate.rb +60 -0
- data/lib/masamune/actions.rb +38 -0
- data/lib/masamune/actions/data_flow.rb +131 -0
- data/lib/masamune/actions/date_parse.rb +75 -0
- data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
- data/lib/masamune/actions/execute.rb +52 -0
- data/lib/masamune/actions/filesystem.rb +37 -0
- data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
- data/lib/masamune/actions/hadoop_streaming.rb +41 -0
- data/lib/masamune/actions/hive.rb +74 -0
- data/lib/masamune/actions/postgres.rb +76 -0
- data/lib/masamune/actions/postgres_admin.rb +34 -0
- data/lib/masamune/actions/s3cmd.rb +44 -0
- data/lib/masamune/actions/transform.rb +89 -0
- data/lib/masamune/after_initialize_callbacks.rb +55 -0
- data/lib/masamune/cached_filesystem.rb +110 -0
- data/lib/masamune/commands.rb +37 -0
- data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
- data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
- data/lib/masamune/commands/hadoop_streaming.rb +116 -0
- data/lib/masamune/commands/hive.rb +178 -0
- data/lib/masamune/commands/interactive.rb +37 -0
- data/lib/masamune/commands/postgres.rb +128 -0
- data/lib/masamune/commands/postgres_admin.rb +72 -0
- data/lib/masamune/commands/postgres_common.rb +33 -0
- data/lib/masamune/commands/retry_with_backoff.rb +60 -0
- data/lib/masamune/commands/s3cmd.rb +70 -0
- data/lib/masamune/commands/shell.rb +202 -0
- data/lib/masamune/configuration.rb +195 -0
- data/lib/masamune/data_plan.rb +31 -0
- data/lib/masamune/data_plan/builder.rb +66 -0
- data/lib/masamune/data_plan/elem.rb +190 -0
- data/lib/masamune/data_plan/engine.rb +162 -0
- data/lib/masamune/data_plan/rule.rb +292 -0
- data/lib/masamune/data_plan/set.rb +176 -0
- data/lib/masamune/environment.rb +164 -0
- data/lib/masamune/filesystem.rb +567 -0
- data/lib/masamune/has_environment.rb +40 -0
- data/lib/masamune/helpers.rb +27 -0
- data/lib/masamune/helpers/postgres.rb +84 -0
- data/lib/masamune/io.rb +33 -0
- data/lib/masamune/last_element.rb +53 -0
- data/lib/masamune/method_logger.rb +41 -0
- data/lib/masamune/multi_io.rb +39 -0
- data/lib/masamune/schema.rb +36 -0
- data/lib/masamune/schema/catalog.rb +233 -0
- data/lib/masamune/schema/column.rb +527 -0
- data/lib/masamune/schema/dimension.rb +133 -0
- data/lib/masamune/schema/event.rb +121 -0
- data/lib/masamune/schema/fact.rb +133 -0
- data/lib/masamune/schema/map.rb +265 -0
- data/lib/masamune/schema/row.rb +133 -0
- data/lib/masamune/schema/store.rb +115 -0
- data/lib/masamune/schema/table.rb +308 -0
- data/lib/masamune/schema/table_reference.rb +76 -0
- data/lib/masamune/spec_helper.rb +23 -0
- data/lib/masamune/string_format.rb +34 -0
- data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
- data/lib/masamune/tasks/hive_thor.rb +55 -0
- data/lib/masamune/tasks/postgres_thor.rb +47 -0
- data/lib/masamune/tasks/shell_thor.rb +63 -0
- data/lib/masamune/template.rb +77 -0
- data/lib/masamune/thor.rb +186 -0
- data/lib/masamune/thor_loader.rb +38 -0
- data/lib/masamune/topological_hash.rb +34 -0
- data/lib/masamune/transform.rb +47 -0
- data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
- data/lib/masamune/transform/bulk_upsert.rb +52 -0
- data/lib/masamune/transform/consolidate_dimension.rb +54 -0
- data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
- data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
- data/lib/masamune/transform/define_event_view.hql.erb +51 -0
- data/lib/masamune/transform/define_event_view.rb +60 -0
- data/lib/masamune/transform/define_index.psql.erb +34 -0
- data/lib/masamune/transform/define_schema.hql.erb +23 -0
- data/lib/masamune/transform/define_schema.psql.erb +79 -0
- data/lib/masamune/transform/define_schema.rb +56 -0
- data/lib/masamune/transform/define_table.hql.erb +34 -0
- data/lib/masamune/transform/define_table.psql.erb +95 -0
- data/lib/masamune/transform/define_table.rb +40 -0
- data/lib/masamune/transform/define_unique.psql.erb +30 -0
- data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
- data/lib/masamune/transform/insert_reference_values.rb +64 -0
- data/lib/masamune/transform/load_dimension.rb +47 -0
- data/lib/masamune/transform/load_fact.rb +45 -0
- data/lib/masamune/transform/operator.rb +96 -0
- data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
- data/lib/masamune/transform/relabel_dimension.rb +39 -0
- data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
- data/lib/masamune/transform/rollup_fact.rb +149 -0
- data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
- data/lib/masamune/transform/snapshot_dimension.rb +74 -0
- data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
- data/lib/masamune/transform/stage_dimension.rb +83 -0
- data/lib/masamune/transform/stage_fact.psql.erb +80 -0
- data/lib/masamune/transform/stage_fact.rb +111 -0
- data/lib/masamune/version.rb +25 -0
- data/spec/fixtures/aggregate.sql.erb +25 -0
- data/spec/fixtures/comment.sql.erb +27 -0
- data/spec/fixtures/invalid.sql.erb +23 -0
- data/spec/fixtures/relative.sql.erb +23 -0
- data/spec/fixtures/simple.sql.erb +28 -0
- data/spec/fixtures/whitespace.sql.erb +30 -0
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
- data/spec/masamune/actions/execute_spec.rb +50 -0
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
- data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
- data/spec/masamune/actions/hive_spec.rb +117 -0
- data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
- data/spec/masamune/actions/postgres_spec.rb +134 -0
- data/spec/masamune/actions/s3cmd_spec.rb +44 -0
- data/spec/masamune/actions/transform_spec.rb +144 -0
- data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
- data/spec/masamune/cached_filesystem_spec.rb +167 -0
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
- data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
- data/spec/masamune/commands/hive_spec.rb +117 -0
- data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
- data/spec/masamune/commands/postgres_spec.rb +100 -0
- data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
- data/spec/masamune/commands/s3cmd_spec.rb +50 -0
- data/spec/masamune/commands/shell_spec.rb +101 -0
- data/spec/masamune/configuration_spec.rb +102 -0
- data/spec/masamune/data_plan/builder_spec.rb +91 -0
- data/spec/masamune/data_plan/elem_spec.rb +102 -0
- data/spec/masamune/data_plan/engine_spec.rb +356 -0
- data/spec/masamune/data_plan/rule_spec.rb +407 -0
- data/spec/masamune/data_plan/set_spec.rb +517 -0
- data/spec/masamune/environment_spec.rb +65 -0
- data/spec/masamune/filesystem_spec.rb +1421 -0
- data/spec/masamune/helpers/postgres_spec.rb +95 -0
- data/spec/masamune/schema/catalog_spec.rb +613 -0
- data/spec/masamune/schema/column_spec.rb +696 -0
- data/spec/masamune/schema/dimension_spec.rb +137 -0
- data/spec/masamune/schema/event_spec.rb +75 -0
- data/spec/masamune/schema/fact_spec.rb +117 -0
- data/spec/masamune/schema/map_spec.rb +593 -0
- data/spec/masamune/schema/row_spec.rb +28 -0
- data/spec/masamune/schema/store_spec.rb +49 -0
- data/spec/masamune/schema/table_spec.rb +395 -0
- data/spec/masamune/string_format_spec.rb +60 -0
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
- data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
- data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
- data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
- data/spec/masamune/template_spec.rb +77 -0
- data/spec/masamune/thor_spec.rb +238 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
- data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
- data/spec/masamune/transform/define_event_view_spec.rb +84 -0
- data/spec/masamune/transform/define_schema_spec.rb +83 -0
- data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
- data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
- data/spec/masamune/transform/define_table.table_spec.rb +525 -0
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
- data/spec/masamune/transform/load_dimension_spec.rb +76 -0
- data/spec/masamune/transform/load_fact_spec.rb +89 -0
- data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
- data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
- data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
- data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
- data/spec/masamune/transform/stage_fact_spec.rb +204 -0
- data/spec/masamune_spec.rb +32 -0
- data/spec/spec_helper.rb +41 -0
- data/spec/support/masamune/example_group.rb +36 -0
- data/spec/support/masamune/mock_command.rb +99 -0
- data/spec/support/masamune/mock_delegate.rb +51 -0
- data/spec/support/masamune/mock_filesystem.rb +96 -0
- data/spec/support/masamune/thor_mute.rb +35 -0
- data/spec/support/rspec/example/action_example_group.rb +34 -0
- data/spec/support/rspec/example/task_example_group.rb +80 -0
- data/spec/support/rspec/example/transform_example_group.rb +36 -0
- data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
- metadata +462 -0
@@ -0,0 +1,306 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'spec_helper'
|
24
|
+
|
25
|
+
describe Masamune::Transform::DefineTable do
|
26
|
+
subject { transform.define_table(table).to_s }
|
27
|
+
|
28
|
+
context 'for hive implicit dimension' do
|
29
|
+
before do
|
30
|
+
catalog.schema :hive do
|
31
|
+
dimension 'user', implicit: true do
|
32
|
+
column 'user_id', natural_key: true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
let(:table) { catalog.hive.user_dimension }
|
38
|
+
|
39
|
+
it 'should not render table template' do
|
40
|
+
is_expected.to eq ''
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'for postgres dimension type: one' do
|
45
|
+
before do
|
46
|
+
catalog.schema :postgres do
|
47
|
+
dimension 'user', type: :one do
|
48
|
+
column 'tenant_id'
|
49
|
+
column 'user_id'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
let(:table) { catalog.postgres.user_dimension }
|
55
|
+
|
56
|
+
it 'should render table template' do
|
57
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
58
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
59
|
+
(
|
60
|
+
id SERIAL PRIMARY KEY,
|
61
|
+
tenant_id INTEGER NOT NULL,
|
62
|
+
user_id INTEGER NOT NULL,
|
63
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
64
|
+
);
|
65
|
+
EOS
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context 'for postgres dimension type: two' do
|
70
|
+
before do
|
71
|
+
catalog.schema :postgres do
|
72
|
+
dimension 'user', type: :two do
|
73
|
+
column 'tenant_id', index: true, natural_key: true
|
74
|
+
column 'user_id', index: true, natural_key: true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
let(:table) { catalog.postgres.user_dimension }
|
80
|
+
|
81
|
+
it 'should render table template' do
|
82
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
83
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
84
|
+
(
|
85
|
+
id SERIAL PRIMARY KEY,
|
86
|
+
tenant_id INTEGER NOT NULL,
|
87
|
+
user_id INTEGER NOT NULL,
|
88
|
+
start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
|
89
|
+
end_at TIMESTAMP,
|
90
|
+
version INTEGER DEFAULT 1,
|
91
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
92
|
+
);
|
93
|
+
|
94
|
+
DO $$ BEGIN
|
95
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
|
96
|
+
ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
|
97
|
+
END IF; END $$;
|
98
|
+
|
99
|
+
DO $$ BEGIN
|
100
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
|
101
|
+
CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
|
102
|
+
END IF; END $$;
|
103
|
+
|
104
|
+
DO $$ BEGIN
|
105
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
|
106
|
+
CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
|
107
|
+
END IF; END $$;
|
108
|
+
|
109
|
+
DO $$ BEGIN
|
110
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
|
111
|
+
CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
|
112
|
+
END IF; END $$;
|
113
|
+
|
114
|
+
DO $$ BEGIN
|
115
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
|
116
|
+
CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
|
117
|
+
END IF; END $$;
|
118
|
+
|
119
|
+
DO $$ BEGIN
|
120
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
|
121
|
+
CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
|
122
|
+
END IF; END $$;
|
123
|
+
EOS
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'for postgres dimension type: four' do
|
128
|
+
before do
|
129
|
+
catalog.schema :postgres do
|
130
|
+
dimension 'cluster', type: :mini do
|
131
|
+
column 'id', type: :integer, surrogate_key: true, auto: true
|
132
|
+
column 'name', type: :string, unique: true
|
133
|
+
row name: 'default', attributes: {default: true}
|
134
|
+
end
|
135
|
+
|
136
|
+
dimension 'user_account_state', type: :mini do
|
137
|
+
column 'name', type: :string, unique: true
|
138
|
+
column 'description', type: :string
|
139
|
+
row name: 'active', description: 'Active', attributes: {default: true}
|
140
|
+
end
|
141
|
+
|
142
|
+
dimension 'user', type: :four do
|
143
|
+
references :cluster
|
144
|
+
references :user_account_state
|
145
|
+
column 'tenant_id', index: true, natural_key: true
|
146
|
+
column 'user_id', index: true, natural_key: true
|
147
|
+
column 'preferences', type: :key_value, null: true
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
let(:table) { catalog.postgres.user_dimension }
|
153
|
+
|
154
|
+
it 'should render table template' do
|
155
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
156
|
+
CREATE TABLE IF NOT EXISTS user_dimension_ledger
|
157
|
+
(
|
158
|
+
id SERIAL PRIMARY KEY,
|
159
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
160
|
+
user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
|
161
|
+
tenant_id INTEGER NOT NULL,
|
162
|
+
user_id INTEGER NOT NULL,
|
163
|
+
preferences_now HSTORE,
|
164
|
+
preferences_was HSTORE,
|
165
|
+
source_kind VARCHAR NOT NULL,
|
166
|
+
source_uuid VARCHAR NOT NULL,
|
167
|
+
start_at TIMESTAMP NOT NULL,
|
168
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW(),
|
169
|
+
delta INTEGER NOT NULL
|
170
|
+
);
|
171
|
+
|
172
|
+
DO $$ BEGIN
|
173
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_370d6dd_key') THEN
|
174
|
+
ALTER TABLE user_dimension_ledger ADD CONSTRAINT user_dimension_ledger_370d6dd_key UNIQUE(tenant_id, user_id, source_kind, source_uuid, start_at);
|
175
|
+
END IF; END $$;
|
176
|
+
|
177
|
+
DO $$ BEGIN
|
178
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_d6b9b38_index') THEN
|
179
|
+
CREATE INDEX user_dimension_ledger_d6b9b38_index ON user_dimension_ledger (cluster_type_id);
|
180
|
+
END IF; END $$;
|
181
|
+
|
182
|
+
DO $$ BEGIN
|
183
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_7988187_index') THEN
|
184
|
+
CREATE INDEX user_dimension_ledger_7988187_index ON user_dimension_ledger (user_account_state_type_id);
|
185
|
+
END IF; END $$;
|
186
|
+
|
187
|
+
DO $$ BEGIN
|
188
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_3854361_index') THEN
|
189
|
+
CREATE INDEX user_dimension_ledger_3854361_index ON user_dimension_ledger (tenant_id);
|
190
|
+
END IF; END $$;
|
191
|
+
|
192
|
+
DO $$ BEGIN
|
193
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_e8701ad_index') THEN
|
194
|
+
CREATE INDEX user_dimension_ledger_e8701ad_index ON user_dimension_ledger (user_id);
|
195
|
+
END IF; END $$;
|
196
|
+
|
197
|
+
DO $$ BEGIN
|
198
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_23563d3_index') THEN
|
199
|
+
CREATE INDEX user_dimension_ledger_23563d3_index ON user_dimension_ledger (start_at);
|
200
|
+
END IF; END $$;
|
201
|
+
|
202
|
+
CREATE TABLE IF NOT EXISTS user_dimension
|
203
|
+
(
|
204
|
+
id SERIAL PRIMARY KEY,
|
205
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
206
|
+
user_account_state_type_id INTEGER NOT NULL REFERENCES user_account_state_type(id) DEFAULT default_user_account_state_type_id(),
|
207
|
+
tenant_id INTEGER NOT NULL,
|
208
|
+
user_id INTEGER NOT NULL,
|
209
|
+
preferences HSTORE,
|
210
|
+
parent_id INTEGER REFERENCES user_dimension_ledger(id),
|
211
|
+
record_id INTEGER REFERENCES user_dimension_ledger(id),
|
212
|
+
start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
|
213
|
+
end_at TIMESTAMP,
|
214
|
+
version INTEGER DEFAULT 1,
|
215
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
216
|
+
);
|
217
|
+
|
218
|
+
DO $$ BEGIN
|
219
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
|
220
|
+
ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
|
221
|
+
END IF; END $$;
|
222
|
+
|
223
|
+
DO $$ BEGIN
|
224
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_d6b9b38_index') THEN
|
225
|
+
CREATE INDEX user_dimension_d6b9b38_index ON user_dimension (cluster_type_id);
|
226
|
+
END IF; END $$;
|
227
|
+
|
228
|
+
DO $$ BEGIN
|
229
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_7988187_index') THEN
|
230
|
+
CREATE INDEX user_dimension_7988187_index ON user_dimension (user_account_state_type_id);
|
231
|
+
END IF; END $$;
|
232
|
+
|
233
|
+
DO $$ BEGIN
|
234
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
|
235
|
+
CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
|
236
|
+
END IF; END $$;
|
237
|
+
|
238
|
+
DO $$ BEGIN
|
239
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
|
240
|
+
CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
|
241
|
+
END IF; END $$;
|
242
|
+
|
243
|
+
DO $$ BEGIN
|
244
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
|
245
|
+
CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
|
246
|
+
END IF; END $$;
|
247
|
+
|
248
|
+
DO $$ BEGIN
|
249
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
|
250
|
+
CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
|
251
|
+
END IF; END $$;
|
252
|
+
|
253
|
+
DO $$ BEGIN
|
254
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
|
255
|
+
CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
|
256
|
+
END IF; END $$;
|
257
|
+
EOS
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
context 'for postgres dimension type: four stage table' do
|
262
|
+
before do
|
263
|
+
catalog.schema :postgres do
|
264
|
+
dimension 'user_account_state', type: :mini do
|
265
|
+
column 'name', type: :string, unique: true
|
266
|
+
column 'description', type: :string
|
267
|
+
row name: 'active', description: 'Active', attributes: {default: true}
|
268
|
+
end
|
269
|
+
|
270
|
+
dimension 'user', type: :four do
|
271
|
+
references :user_account_state
|
272
|
+
column 'tenant_id', index: true, natural_key: true
|
273
|
+
column 'user_id', index: true, natural_key: true
|
274
|
+
column 'preferences', type: :key_value, null: true
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
let(:table) { catalog.postgres.user_dimension.stage_table(suffix: 'consolidated_forward') }
|
280
|
+
|
281
|
+
it 'should render table template' do
|
282
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
283
|
+
CREATE TEMPORARY TABLE IF NOT EXISTS user_consolidated_forward_dimension_stage
|
284
|
+
(
|
285
|
+
user_account_state_type_id INTEGER DEFAULT default_user_account_state_type_id(),
|
286
|
+
tenant_id INTEGER,
|
287
|
+
user_id INTEGER,
|
288
|
+
preferences HSTORE,
|
289
|
+
parent_id INTEGER,
|
290
|
+
record_id INTEGER,
|
291
|
+
start_at TIMESTAMP DEFAULT TO_TIMESTAMP(0),
|
292
|
+
end_at TIMESTAMP,
|
293
|
+
version INTEGER DEFAULT 1,
|
294
|
+
last_modified_at TIMESTAMP DEFAULT NOW()
|
295
|
+
);
|
296
|
+
|
297
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_7988187_index ON user_consolidated_forward_dimension_stage (user_account_state_type_id);
|
298
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_3854361_index ON user_consolidated_forward_dimension_stage (tenant_id);
|
299
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_e8701ad_index ON user_consolidated_forward_dimension_stage (user_id);
|
300
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_23563d3_index ON user_consolidated_forward_dimension_stage (start_at);
|
301
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_2c8e908_index ON user_consolidated_forward_dimension_stage (end_at);
|
302
|
+
CREATE INDEX user_consolidated_forward_dimension_stage_2af72f1_index ON user_consolidated_forward_dimension_stage (version);
|
303
|
+
EOS
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
@@ -0,0 +1,291 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'spec_helper'
|
24
|
+
|
25
|
+
describe Masamune::Transform::DefineTable do
|
26
|
+
before do
|
27
|
+
catalog.schema :postgres do
|
28
|
+
dimension 'cluster', type: :mini do
|
29
|
+
column 'id', type: :sequence, surrogate_key: true, auto: true
|
30
|
+
column 'name', type: :string
|
31
|
+
|
32
|
+
row name: 'current_database()', attributes: {default: true}
|
33
|
+
end
|
34
|
+
|
35
|
+
dimension 'date', type: :date do
|
36
|
+
column 'date_id', type: :integer, unique: true, index: true, natural_key: true
|
37
|
+
end
|
38
|
+
|
39
|
+
dimension 'user_agent', type: :mini do
|
40
|
+
column 'name', type: :string, unique: true, index: 'shared'
|
41
|
+
column 'version', type: :string, unique: true, index: 'shared', default: 'Unknown'
|
42
|
+
column 'description', type: :string, null: true, ignore: true
|
43
|
+
end
|
44
|
+
|
45
|
+
dimension 'feature', type: :mini do
|
46
|
+
column 'name', type: :string, unique: true, index: true
|
47
|
+
end
|
48
|
+
|
49
|
+
dimension 'tenant', type: :two do
|
50
|
+
column 'tenant_id', type: :integer, index: true, natural_key: true
|
51
|
+
end
|
52
|
+
|
53
|
+
dimension 'user', type: :two do
|
54
|
+
column 'tenant_id', type: :integer, index: true, natural_key: true
|
55
|
+
column 'user_id', type: :integer, index: true, natural_key: true
|
56
|
+
end
|
57
|
+
|
58
|
+
dimension 'group', type: :two do
|
59
|
+
column 'group_id', type: :integer, natural_key: true
|
60
|
+
end
|
61
|
+
|
62
|
+
fact 'visits', partition: 'y%Ym%m' do
|
63
|
+
references :cluster
|
64
|
+
references :date
|
65
|
+
references :tenant
|
66
|
+
references :user
|
67
|
+
references :group, multiple: true
|
68
|
+
references :user_agent, insert: true
|
69
|
+
references :feature, insert: true
|
70
|
+
measure 'total', type: :integer
|
71
|
+
end
|
72
|
+
|
73
|
+
file 'visits' do
|
74
|
+
column 'date.date_id', type: :integer
|
75
|
+
column 'tenant.tenant_id', type: :integer
|
76
|
+
column 'user.user_id', type: :integer
|
77
|
+
column 'user_agent.name', type: :string
|
78
|
+
column 'user_agent.version', type: :string
|
79
|
+
column 'feature.name', type: :string
|
80
|
+
column 'time_key', type: :integer
|
81
|
+
column 'total', type: :integer
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
catalog.schema :hive do
|
86
|
+
dimension 'date', type: :date, implicit: true do
|
87
|
+
column 'date_id', type: :integer, natural_key: true
|
88
|
+
end
|
89
|
+
|
90
|
+
dimension 'user', type: :two, implicit: true do
|
91
|
+
column 'user_id', type: :integer, natural_key: true
|
92
|
+
end
|
93
|
+
|
94
|
+
dimension 'group', type: :two, implicit: true do
|
95
|
+
column 'group_id', type: :integer, natural_key: true
|
96
|
+
end
|
97
|
+
|
98
|
+
dimension 'user_agent', type: :mini do
|
99
|
+
column 'name', type: :string
|
100
|
+
column 'version', type: :string
|
101
|
+
column 'description', type: :string, ignore: true
|
102
|
+
end
|
103
|
+
|
104
|
+
fact 'visits', grain: :hourly do
|
105
|
+
partition :y
|
106
|
+
partition :m
|
107
|
+
partition :d
|
108
|
+
references :date
|
109
|
+
references :user
|
110
|
+
references :group, multiple: true
|
111
|
+
references :user_agent, denormalize: true
|
112
|
+
measure 'total'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'for postgres fact' do
|
118
|
+
let(:target) { catalog.postgres.visits_fact }
|
119
|
+
|
120
|
+
subject(:result) { transform.define_table(target).to_s }
|
121
|
+
|
122
|
+
it 'should eq render table template' do
|
123
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
124
|
+
CREATE TABLE IF NOT EXISTS visits_fact
|
125
|
+
(
|
126
|
+
cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
|
127
|
+
date_dimension_id INTEGER NOT NULL REFERENCES date_dimension(id),
|
128
|
+
tenant_dimension_id INTEGER NOT NULL REFERENCES tenant_dimension(id),
|
129
|
+
user_dimension_id INTEGER NOT NULL REFERENCES user_dimension(id),
|
130
|
+
group_dimension_id INTEGER[] NOT NULL,
|
131
|
+
user_agent_type_id INTEGER NOT NULL REFERENCES user_agent_type(id),
|
132
|
+
feature_type_id INTEGER NOT NULL REFERENCES feature_type(id),
|
133
|
+
total INTEGER NOT NULL,
|
134
|
+
time_key INTEGER NOT NULL,
|
135
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
136
|
+
);
|
137
|
+
|
138
|
+
DO $$ BEGIN
|
139
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d6b9b38_index') THEN
|
140
|
+
CREATE INDEX visits_fact_d6b9b38_index ON visits_fact (cluster_type_id);
|
141
|
+
END IF; END $$;
|
142
|
+
|
143
|
+
DO $$ BEGIN
|
144
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_0a531a8_index') THEN
|
145
|
+
CREATE INDEX visits_fact_0a531a8_index ON visits_fact (date_dimension_id);
|
146
|
+
END IF; END $$;
|
147
|
+
|
148
|
+
DO $$ BEGIN
|
149
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d3950d9_index') THEN
|
150
|
+
CREATE INDEX visits_fact_d3950d9_index ON visits_fact (tenant_dimension_id);
|
151
|
+
END IF; END $$;
|
152
|
+
|
153
|
+
DO $$ BEGIN
|
154
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_39f0fdd_index') THEN
|
155
|
+
CREATE INDEX visits_fact_39f0fdd_index ON visits_fact (user_dimension_id);
|
156
|
+
END IF; END $$;
|
157
|
+
|
158
|
+
DO $$ BEGIN
|
159
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_e0d2a9e_index') THEN
|
160
|
+
CREATE INDEX visits_fact_e0d2a9e_index ON visits_fact (group_dimension_id);
|
161
|
+
END IF; END $$;
|
162
|
+
|
163
|
+
DO $$ BEGIN
|
164
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d8b1c3e_index') THEN
|
165
|
+
CREATE INDEX visits_fact_d8b1c3e_index ON visits_fact (user_agent_type_id);
|
166
|
+
END IF; END $$;
|
167
|
+
|
168
|
+
DO $$ BEGIN
|
169
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_33b68fd_index') THEN
|
170
|
+
CREATE INDEX visits_fact_33b68fd_index ON visits_fact (feature_type_id);
|
171
|
+
END IF; END $$;
|
172
|
+
|
173
|
+
DO $$ BEGIN
|
174
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
|
175
|
+
CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
|
176
|
+
END IF; END $$;
|
177
|
+
EOS
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
describe 'for fact table from file with sources files' do
|
182
|
+
let(:files) { (1..3).map { |i| double(path: "output_#{i}.csv") } }
|
183
|
+
let(:target) { catalog.postgres.visits_fact }
|
184
|
+
let(:source) { catalog.postgres.visits_file }
|
185
|
+
|
186
|
+
subject(:result) { transform.define_table(source.stage_table(suffix: 'file', table: target, inherit: false), files).to_s }
|
187
|
+
|
188
|
+
it 'should eq render table template' do
|
189
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
190
|
+
CREATE TEMPORARY TABLE IF NOT EXISTS visits_file_fact_stage
|
191
|
+
(
|
192
|
+
date_dimension_date_id INTEGER,
|
193
|
+
tenant_dimension_tenant_id INTEGER,
|
194
|
+
user_dimension_user_id INTEGER,
|
195
|
+
user_agent_type_name VARCHAR,
|
196
|
+
user_agent_type_version VARCHAR,
|
197
|
+
feature_type_name VARCHAR,
|
198
|
+
time_key INTEGER,
|
199
|
+
total INTEGER
|
200
|
+
);
|
201
|
+
|
202
|
+
COPY visits_file_fact_stage FROM 'output_1.csv' WITH (FORMAT 'csv', HEADER true);
|
203
|
+
COPY visits_file_fact_stage FROM 'output_2.csv' WITH (FORMAT 'csv', HEADER true);
|
204
|
+
COPY visits_file_fact_stage FROM 'output_3.csv' WITH (FORMAT 'csv', HEADER true);
|
205
|
+
|
206
|
+
CREATE INDEX visits_file_fact_stage_964dac1_index ON visits_file_fact_stage (date_dimension_date_id);
|
207
|
+
CREATE INDEX visits_file_fact_stage_90fc13c_index ON visits_file_fact_stage (tenant_dimension_tenant_id);
|
208
|
+
CREATE INDEX visits_file_fact_stage_30f3cca_index ON visits_file_fact_stage (user_dimension_user_id);
|
209
|
+
CREATE INDEX visits_file_fact_stage_99c433b_index ON visits_file_fact_stage (user_agent_type_name);
|
210
|
+
CREATE INDEX visits_file_fact_stage_d5d236f_index ON visits_file_fact_stage (user_agent_type_version);
|
211
|
+
CREATE INDEX visits_file_fact_stage_5a187ed_index ON visits_file_fact_stage (feature_type_name);
|
212
|
+
CREATE INDEX visits_file_fact_stage_6444ed3_index ON visits_file_fact_stage (time_key);
|
213
|
+
EOS
|
214
|
+
end
|
215
|
+
|
216
|
+
context 'with file' do
|
217
|
+
subject(:result) { transform.define_table(source.stage_table(table: target), files.first).to_s }
|
218
|
+
it 'should eq render table template' do
|
219
|
+
is_expected.to_not be_nil
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
context 'with Set' do
|
224
|
+
subject(:result) { transform.define_table(source.stage_table(table: target), Set.new(files)).to_s }
|
225
|
+
it 'should eq render table template' do
|
226
|
+
is_expected.to_not be_nil
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
context 'for postgres fact with degenerate reference' do
|
232
|
+
before do
|
233
|
+
catalog.clear!
|
234
|
+
catalog.schema :postgres do
|
235
|
+
fact 'visits' do
|
236
|
+
references :message_kind, degenerate: true
|
237
|
+
measure 'count', aggregate: :sum
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
let(:target) { catalog.postgres.visits_fact }
|
243
|
+
|
244
|
+
subject(:result) { transform.define_table(target).to_s }
|
245
|
+
|
246
|
+
it 'should eq render table template' do
|
247
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
248
|
+
CREATE TABLE IF NOT EXISTS visits_fact
|
249
|
+
(
|
250
|
+
message_kind_type_id INTEGER,
|
251
|
+
count INTEGER NOT NULL,
|
252
|
+
time_key INTEGER NOT NULL,
|
253
|
+
last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
|
254
|
+
);
|
255
|
+
|
256
|
+
DO $$ BEGIN
|
257
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_2a6313d_index') THEN
|
258
|
+
CREATE INDEX visits_fact_2a6313d_index ON visits_fact (message_kind_type_id);
|
259
|
+
END IF; END $$;
|
260
|
+
|
261
|
+
DO $$ BEGIN
|
262
|
+
IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
|
263
|
+
CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
|
264
|
+
END IF; END $$;
|
265
|
+
EOS
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
context 'for hive fact' do
|
270
|
+
let(:target) { catalog.hive.visits_hourly_fact }
|
271
|
+
|
272
|
+
subject(:result) { transform.define_table(target).to_s }
|
273
|
+
|
274
|
+
it 'should eq render table template' do
|
275
|
+
is_expected.to eq <<-EOS.strip_heredoc
|
276
|
+
CREATE TABLE IF NOT EXISTS visits_hourly_fact
|
277
|
+
(
|
278
|
+
date_dimension_date_id INT,
|
279
|
+
user_dimension_user_id INT,
|
280
|
+
group_dimension_group_id ARRAY<INT>,
|
281
|
+
user_agent_type_name STRING,
|
282
|
+
user_agent_type_version STRING,
|
283
|
+
total INT,
|
284
|
+
time_key INT
|
285
|
+
)
|
286
|
+
PARTITIONED BY (y INT, m INT, d INT)
|
287
|
+
TBLPROPERTIES ('serialization.null.format' = '');
|
288
|
+
EOS
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|