masamune 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,306 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Transform::DefineTable do
26
+ subject { transform.define_table(table).to_s }
27
+
28
+ context 'for hive implicit dimension' do
29
+ before do
30
+ catalog.schema :hive do
31
+ dimension 'user', implicit: true do
32
+ column 'user_id', natural_key: true
33
+ end
34
+ end
35
+ end
36
+
37
+ let(:table) { catalog.hive.user_dimension }
38
+
39
+ it 'should not render table template' do
40
+ is_expected.to eq ''
41
+ end
42
+ end
43
+
44
+ context 'for postgres dimension type: one' do
45
+ before do
46
+ catalog.schema :postgres do
47
+ dimension 'user', type: :one do
48
+ column 'tenant_id'
49
+ column 'user_id'
50
+ end
51
+ end
52
+ end
53
+
54
+ let(:table) { catalog.postgres.user_dimension }
55
+
56
+ it 'should render table template' do
57
+ is_expected.to eq <<-EOS.strip_heredoc
58
+ CREATE TABLE IF NOT EXISTS user_dimension
59
+ (
60
+ id SERIAL PRIMARY KEY,
61
+ tenant_id INTEGER NOT NULL,
62
+ user_id INTEGER NOT NULL,
63
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
64
+ );
65
+ EOS
66
+ end
67
+ end
68
+
69
+ context 'for postgres dimension type: two' do
70
+ before do
71
+ catalog.schema :postgres do
72
+ dimension 'user', type: :two do
73
+ column 'tenant_id', index: true, natural_key: true
74
+ column 'user_id', index: true, natural_key: true
75
+ end
76
+ end
77
+ end
78
+
79
+ let(:table) { catalog.postgres.user_dimension }
80
+
81
+ it 'should render table template' do
82
+ is_expected.to eq <<-EOS.strip_heredoc
83
+ CREATE TABLE IF NOT EXISTS user_dimension
84
+ (
85
+ id SERIAL PRIMARY KEY,
86
+ tenant_id INTEGER NOT NULL,
87
+ user_id INTEGER NOT NULL,
88
+ start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
89
+ end_at TIMESTAMP,
90
+ version INTEGER DEFAULT 1,
91
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
92
+ );
93
+
94
+ DO $$ BEGIN
95
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
96
+ ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
97
+ END IF; END $$;
98
+
99
+ DO $$ BEGIN
100
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
101
+ CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
102
+ END IF; END $$;
103
+
104
+ DO $$ BEGIN
105
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
106
+ CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
107
+ END IF; END $$;
108
+
109
+ DO $$ BEGIN
110
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
111
+ CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
112
+ END IF; END $$;
113
+
114
+ DO $$ BEGIN
115
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
116
+ CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
117
+ END IF; END $$;
118
+
119
+ DO $$ BEGIN
120
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
121
+ CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
122
+ END IF; END $$;
123
+ EOS
124
+ end
125
+ end
126
+
127
+ context 'for postgres dimension type: four' do
128
+ before do
129
+ catalog.schema :postgres do
130
+ dimension 'cluster', type: :mini do
131
+ column 'id', type: :integer, surrogate_key: true, auto: true
132
+ column 'name', type: :string, unique: true
133
+ row name: 'default', attributes: {default: true}
134
+ end
135
+
136
+ dimension 'user_account_state', type: :mini do
137
+ column 'name', type: :string, unique: true
138
+ column 'description', type: :string
139
+ row name: 'active', description: 'Active', attributes: {default: true}
140
+ end
141
+
142
+ dimension 'user', type: :four do
143
+ references :cluster
144
+ references :user_account_state
145
+ column 'tenant_id', index: true, natural_key: true
146
+ column 'user_id', index: true, natural_key: true
147
+ column 'preferences', type: :key_value, null: true
148
+ end
149
+ end
150
+ end
151
+
152
+ let(:table) { catalog.postgres.user_dimension }
153
+
154
+ it 'should render table template' do
155
+ is_expected.to eq <<-EOS.strip_heredoc
156
+ CREATE TABLE IF NOT EXISTS user_dimension_ledger
157
+ (
158
+ id SERIAL PRIMARY KEY,
159
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
160
+ user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
161
+ tenant_id INTEGER NOT NULL,
162
+ user_id INTEGER NOT NULL,
163
+ preferences_now HSTORE,
164
+ preferences_was HSTORE,
165
+ source_kind VARCHAR NOT NULL,
166
+ source_uuid VARCHAR NOT NULL,
167
+ start_at TIMESTAMP NOT NULL,
168
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW(),
169
+ delta INTEGER NOT NULL
170
+ );
171
+
172
+ DO $$ BEGIN
173
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_370d6dd_key') THEN
174
+ ALTER TABLE user_dimension_ledger ADD CONSTRAINT user_dimension_ledger_370d6dd_key UNIQUE(tenant_id, user_id, source_kind, source_uuid, start_at);
175
+ END IF; END $$;
176
+
177
+ DO $$ BEGIN
178
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_d6b9b38_index') THEN
179
+ CREATE INDEX user_dimension_ledger_d6b9b38_index ON user_dimension_ledger (cluster_type_id);
180
+ END IF; END $$;
181
+
182
+ DO $$ BEGIN
183
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_7988187_index') THEN
184
+ CREATE INDEX user_dimension_ledger_7988187_index ON user_dimension_ledger (user_account_state_type_id);
185
+ END IF; END $$;
186
+
187
+ DO $$ BEGIN
188
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_3854361_index') THEN
189
+ CREATE INDEX user_dimension_ledger_3854361_index ON user_dimension_ledger (tenant_id);
190
+ END IF; END $$;
191
+
192
+ DO $$ BEGIN
193
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_e8701ad_index') THEN
194
+ CREATE INDEX user_dimension_ledger_e8701ad_index ON user_dimension_ledger (user_id);
195
+ END IF; END $$;
196
+
197
+ DO $$ BEGIN
198
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_23563d3_index') THEN
199
+ CREATE INDEX user_dimension_ledger_23563d3_index ON user_dimension_ledger (start_at);
200
+ END IF; END $$;
201
+
202
+ CREATE TABLE IF NOT EXISTS user_dimension
203
+ (
204
+ id SERIAL PRIMARY KEY,
205
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
206
+ user_account_state_type_id INTEGER NOT NULL REFERENCES user_account_state_type(id) DEFAULT default_user_account_state_type_id(),
207
+ tenant_id INTEGER NOT NULL,
208
+ user_id INTEGER NOT NULL,
209
+ preferences HSTORE,
210
+ parent_id INTEGER REFERENCES user_dimension_ledger(id),
211
+ record_id INTEGER REFERENCES user_dimension_ledger(id),
212
+ start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
213
+ end_at TIMESTAMP,
214
+ version INTEGER DEFAULT 1,
215
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
216
+ );
217
+
218
+ DO $$ BEGIN
219
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
220
+ ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
221
+ END IF; END $$;
222
+
223
+ DO $$ BEGIN
224
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_d6b9b38_index') THEN
225
+ CREATE INDEX user_dimension_d6b9b38_index ON user_dimension (cluster_type_id);
226
+ END IF; END $$;
227
+
228
+ DO $$ BEGIN
229
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_7988187_index') THEN
230
+ CREATE INDEX user_dimension_7988187_index ON user_dimension (user_account_state_type_id);
231
+ END IF; END $$;
232
+
233
+ DO $$ BEGIN
234
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
235
+ CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
236
+ END IF; END $$;
237
+
238
+ DO $$ BEGIN
239
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
240
+ CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
241
+ END IF; END $$;
242
+
243
+ DO $$ BEGIN
244
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
245
+ CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
246
+ END IF; END $$;
247
+
248
+ DO $$ BEGIN
249
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
250
+ CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
251
+ END IF; END $$;
252
+
253
+ DO $$ BEGIN
254
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
255
+ CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
256
+ END IF; END $$;
257
+ EOS
258
+ end
259
+ end
260
+
261
+ context 'for postgres dimension type: four stage table' do
262
+ before do
263
+ catalog.schema :postgres do
264
+ dimension 'user_account_state', type: :mini do
265
+ column 'name', type: :string, unique: true
266
+ column 'description', type: :string
267
+ row name: 'active', description: 'Active', attributes: {default: true}
268
+ end
269
+
270
+ dimension 'user', type: :four do
271
+ references :user_account_state
272
+ column 'tenant_id', index: true, natural_key: true
273
+ column 'user_id', index: true, natural_key: true
274
+ column 'preferences', type: :key_value, null: true
275
+ end
276
+ end
277
+ end
278
+
279
+ let(:table) { catalog.postgres.user_dimension.stage_table(suffix: 'consolidated_forward') }
280
+
281
+ it 'should render table template' do
282
+ is_expected.to eq <<-EOS.strip_heredoc
283
+ CREATE TEMPORARY TABLE IF NOT EXISTS user_consolidated_forward_dimension_stage
284
+ (
285
+ user_account_state_type_id INTEGER DEFAULT default_user_account_state_type_id(),
286
+ tenant_id INTEGER,
287
+ user_id INTEGER,
288
+ preferences HSTORE,
289
+ parent_id INTEGER,
290
+ record_id INTEGER,
291
+ start_at TIMESTAMP DEFAULT TO_TIMESTAMP(0),
292
+ end_at TIMESTAMP,
293
+ version INTEGER DEFAULT 1,
294
+ last_modified_at TIMESTAMP DEFAULT NOW()
295
+ );
296
+
297
+ CREATE INDEX user_consolidated_forward_dimension_stage_7988187_index ON user_consolidated_forward_dimension_stage (user_account_state_type_id);
298
+ CREATE INDEX user_consolidated_forward_dimension_stage_3854361_index ON user_consolidated_forward_dimension_stage (tenant_id);
299
+ CREATE INDEX user_consolidated_forward_dimension_stage_e8701ad_index ON user_consolidated_forward_dimension_stage (user_id);
300
+ CREATE INDEX user_consolidated_forward_dimension_stage_23563d3_index ON user_consolidated_forward_dimension_stage (start_at);
301
+ CREATE INDEX user_consolidated_forward_dimension_stage_2c8e908_index ON user_consolidated_forward_dimension_stage (end_at);
302
+ CREATE INDEX user_consolidated_forward_dimension_stage_2af72f1_index ON user_consolidated_forward_dimension_stage (version);
303
+ EOS
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,291 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Transform::DefineTable do
26
+ before do
27
+ catalog.schema :postgres do
28
+ dimension 'cluster', type: :mini do
29
+ column 'id', type: :sequence, surrogate_key: true, auto: true
30
+ column 'name', type: :string
31
+
32
+ row name: 'current_database()', attributes: {default: true}
33
+ end
34
+
35
+ dimension 'date', type: :date do
36
+ column 'date_id', type: :integer, unique: true, index: true, natural_key: true
37
+ end
38
+
39
+ dimension 'user_agent', type: :mini do
40
+ column 'name', type: :string, unique: true, index: 'shared'
41
+ column 'version', type: :string, unique: true, index: 'shared', default: 'Unknown'
42
+ column 'description', type: :string, null: true, ignore: true
43
+ end
44
+
45
+ dimension 'feature', type: :mini do
46
+ column 'name', type: :string, unique: true, index: true
47
+ end
48
+
49
+ dimension 'tenant', type: :two do
50
+ column 'tenant_id', type: :integer, index: true, natural_key: true
51
+ end
52
+
53
+ dimension 'user', type: :two do
54
+ column 'tenant_id', type: :integer, index: true, natural_key: true
55
+ column 'user_id', type: :integer, index: true, natural_key: true
56
+ end
57
+
58
+ dimension 'group', type: :two do
59
+ column 'group_id', type: :integer, natural_key: true
60
+ end
61
+
62
+ fact 'visits', partition: 'y%Ym%m' do
63
+ references :cluster
64
+ references :date
65
+ references :tenant
66
+ references :user
67
+ references :group, multiple: true
68
+ references :user_agent, insert: true
69
+ references :feature, insert: true
70
+ measure 'total', type: :integer
71
+ end
72
+
73
+ file 'visits' do
74
+ column 'date.date_id', type: :integer
75
+ column 'tenant.tenant_id', type: :integer
76
+ column 'user.user_id', type: :integer
77
+ column 'user_agent.name', type: :string
78
+ column 'user_agent.version', type: :string
79
+ column 'feature.name', type: :string
80
+ column 'time_key', type: :integer
81
+ column 'total', type: :integer
82
+ end
83
+ end
84
+
85
+ catalog.schema :hive do
86
+ dimension 'date', type: :date, implicit: true do
87
+ column 'date_id', type: :integer, natural_key: true
88
+ end
89
+
90
+ dimension 'user', type: :two, implicit: true do
91
+ column 'user_id', type: :integer, natural_key: true
92
+ end
93
+
94
+ dimension 'group', type: :two, implicit: true do
95
+ column 'group_id', type: :integer, natural_key: true
96
+ end
97
+
98
+ dimension 'user_agent', type: :mini do
99
+ column 'name', type: :string
100
+ column 'version', type: :string
101
+ column 'description', type: :string, ignore: true
102
+ end
103
+
104
+ fact 'visits', grain: :hourly do
105
+ partition :y
106
+ partition :m
107
+ partition :d
108
+ references :date
109
+ references :user
110
+ references :group, multiple: true
111
+ references :user_agent, denormalize: true
112
+ measure 'total'
113
+ end
114
+ end
115
+ end
116
+
117
+ context 'for postgres fact' do
118
+ let(:target) { catalog.postgres.visits_fact }
119
+
120
+ subject(:result) { transform.define_table(target).to_s }
121
+
122
+ it 'should eq render table template' do
123
+ is_expected.to eq <<-EOS.strip_heredoc
124
+ CREATE TABLE IF NOT EXISTS visits_fact
125
+ (
126
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
127
+ date_dimension_id INTEGER NOT NULL REFERENCES date_dimension(id),
128
+ tenant_dimension_id INTEGER NOT NULL REFERENCES tenant_dimension(id),
129
+ user_dimension_id INTEGER NOT NULL REFERENCES user_dimension(id),
130
+ group_dimension_id INTEGER[] NOT NULL,
131
+ user_agent_type_id INTEGER NOT NULL REFERENCES user_agent_type(id),
132
+ feature_type_id INTEGER NOT NULL REFERENCES feature_type(id),
133
+ total INTEGER NOT NULL,
134
+ time_key INTEGER NOT NULL,
135
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
136
+ );
137
+
138
+ DO $$ BEGIN
139
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d6b9b38_index') THEN
140
+ CREATE INDEX visits_fact_d6b9b38_index ON visits_fact (cluster_type_id);
141
+ END IF; END $$;
142
+
143
+ DO $$ BEGIN
144
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_0a531a8_index') THEN
145
+ CREATE INDEX visits_fact_0a531a8_index ON visits_fact (date_dimension_id);
146
+ END IF; END $$;
147
+
148
+ DO $$ BEGIN
149
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d3950d9_index') THEN
150
+ CREATE INDEX visits_fact_d3950d9_index ON visits_fact (tenant_dimension_id);
151
+ END IF; END $$;
152
+
153
+ DO $$ BEGIN
154
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_39f0fdd_index') THEN
155
+ CREATE INDEX visits_fact_39f0fdd_index ON visits_fact (user_dimension_id);
156
+ END IF; END $$;
157
+
158
+ DO $$ BEGIN
159
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_e0d2a9e_index') THEN
160
+ CREATE INDEX visits_fact_e0d2a9e_index ON visits_fact (group_dimension_id);
161
+ END IF; END $$;
162
+
163
+ DO $$ BEGIN
164
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d8b1c3e_index') THEN
165
+ CREATE INDEX visits_fact_d8b1c3e_index ON visits_fact (user_agent_type_id);
166
+ END IF; END $$;
167
+
168
+ DO $$ BEGIN
169
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_33b68fd_index') THEN
170
+ CREATE INDEX visits_fact_33b68fd_index ON visits_fact (feature_type_id);
171
+ END IF; END $$;
172
+
173
+ DO $$ BEGIN
174
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
175
+ CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
176
+ END IF; END $$;
177
+ EOS
178
+ end
179
+ end
180
+
181
+ describe 'for fact table from file with sources files' do
182
+ let(:files) { (1..3).map { |i| double(path: "output_#{i}.csv") } }
183
+ let(:target) { catalog.postgres.visits_fact }
184
+ let(:source) { catalog.postgres.visits_file }
185
+
186
+ subject(:result) { transform.define_table(source.stage_table(suffix: 'file', table: target, inherit: false), files).to_s }
187
+
188
+ it 'should eq render table template' do
189
+ is_expected.to eq <<-EOS.strip_heredoc
190
+ CREATE TEMPORARY TABLE IF NOT EXISTS visits_file_fact_stage
191
+ (
192
+ date_dimension_date_id INTEGER,
193
+ tenant_dimension_tenant_id INTEGER,
194
+ user_dimension_user_id INTEGER,
195
+ user_agent_type_name VARCHAR,
196
+ user_agent_type_version VARCHAR,
197
+ feature_type_name VARCHAR,
198
+ time_key INTEGER,
199
+ total INTEGER
200
+ );
201
+
202
+ COPY visits_file_fact_stage FROM 'output_1.csv' WITH (FORMAT 'csv', HEADER true);
203
+ COPY visits_file_fact_stage FROM 'output_2.csv' WITH (FORMAT 'csv', HEADER true);
204
+ COPY visits_file_fact_stage FROM 'output_3.csv' WITH (FORMAT 'csv', HEADER true);
205
+
206
+ CREATE INDEX visits_file_fact_stage_964dac1_index ON visits_file_fact_stage (date_dimension_date_id);
207
+ CREATE INDEX visits_file_fact_stage_90fc13c_index ON visits_file_fact_stage (tenant_dimension_tenant_id);
208
+ CREATE INDEX visits_file_fact_stage_30f3cca_index ON visits_file_fact_stage (user_dimension_user_id);
209
+ CREATE INDEX visits_file_fact_stage_99c433b_index ON visits_file_fact_stage (user_agent_type_name);
210
+ CREATE INDEX visits_file_fact_stage_d5d236f_index ON visits_file_fact_stage (user_agent_type_version);
211
+ CREATE INDEX visits_file_fact_stage_5a187ed_index ON visits_file_fact_stage (feature_type_name);
212
+ CREATE INDEX visits_file_fact_stage_6444ed3_index ON visits_file_fact_stage (time_key);
213
+ EOS
214
+ end
215
+
216
+ context 'with file' do
217
+ subject(:result) { transform.define_table(source.stage_table(table: target), files.first).to_s }
218
+ it 'should eq render table template' do
219
+ is_expected.to_not be_nil
220
+ end
221
+ end
222
+
223
+ context 'with Set' do
224
+ subject(:result) { transform.define_table(source.stage_table(table: target), Set.new(files)).to_s }
225
+ it 'should eq render table template' do
226
+ is_expected.to_not be_nil
227
+ end
228
+ end
229
+ end
230
+
231
+ context 'for postgres fact with degenerate reference' do
232
+ before do
233
+ catalog.clear!
234
+ catalog.schema :postgres do
235
+ fact 'visits' do
236
+ references :message_kind, degenerate: true
237
+ measure 'count', aggregate: :sum
238
+ end
239
+ end
240
+ end
241
+
242
+ let(:target) { catalog.postgres.visits_fact }
243
+
244
+ subject(:result) { transform.define_table(target).to_s }
245
+
246
+ it 'should eq render table template' do
247
+ is_expected.to eq <<-EOS.strip_heredoc
248
+ CREATE TABLE IF NOT EXISTS visits_fact
249
+ (
250
+ message_kind_type_id INTEGER,
251
+ count INTEGER NOT NULL,
252
+ time_key INTEGER NOT NULL,
253
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
254
+ );
255
+
256
+ DO $$ BEGIN
257
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_2a6313d_index') THEN
258
+ CREATE INDEX visits_fact_2a6313d_index ON visits_fact (message_kind_type_id);
259
+ END IF; END $$;
260
+
261
+ DO $$ BEGIN
262
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
263
+ CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
264
+ END IF; END $$;
265
+ EOS
266
+ end
267
+ end
268
+
269
+ context 'for hive fact' do
270
+ let(:target) { catalog.hive.visits_hourly_fact }
271
+
272
+ subject(:result) { transform.define_table(target).to_s }
273
+
274
+ it 'should eq render table template' do
275
+ is_expected.to eq <<-EOS.strip_heredoc
276
+ CREATE TABLE IF NOT EXISTS visits_hourly_fact
277
+ (
278
+ date_dimension_date_id INT,
279
+ user_dimension_user_id INT,
280
+ group_dimension_group_id ARRAY<INT>,
281
+ user_agent_type_name STRING,
282
+ user_agent_type_version STRING,
283
+ total INT,
284
+ time_key INT
285
+ )
286
+ PARTITIONED BY (y INT, m INT, d INT)
287
+ TBLPROPERTIES ('serialization.null.format' = '');
288
+ EOS
289
+ end
290
+ end
291
+ end