masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,306 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Transform::DefineTable do
26
+ subject { transform.define_table(table).to_s }
27
+
28
+ context 'for hive implicit dimension' do
29
+ before do
30
+ catalog.schema :hive do
31
+ dimension 'user', implicit: true do
32
+ column 'user_id', natural_key: true
33
+ end
34
+ end
35
+ end
36
+
37
+ let(:table) { catalog.hive.user_dimension }
38
+
39
+ it 'should not render table template' do
40
+ is_expected.to eq ''
41
+ end
42
+ end
43
+
44
+ context 'for postgres dimension type: one' do
45
+ before do
46
+ catalog.schema :postgres do
47
+ dimension 'user', type: :one do
48
+ column 'tenant_id'
49
+ column 'user_id'
50
+ end
51
+ end
52
+ end
53
+
54
+ let(:table) { catalog.postgres.user_dimension }
55
+
56
+ it 'should render table template' do
57
+ is_expected.to eq <<-EOS.strip_heredoc
58
+ CREATE TABLE IF NOT EXISTS user_dimension
59
+ (
60
+ id SERIAL PRIMARY KEY,
61
+ tenant_id INTEGER NOT NULL,
62
+ user_id INTEGER NOT NULL,
63
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
64
+ );
65
+ EOS
66
+ end
67
+ end
68
+
69
+ context 'for postgres dimension type: two' do
70
+ before do
71
+ catalog.schema :postgres do
72
+ dimension 'user', type: :two do
73
+ column 'tenant_id', index: true, natural_key: true
74
+ column 'user_id', index: true, natural_key: true
75
+ end
76
+ end
77
+ end
78
+
79
+ let(:table) { catalog.postgres.user_dimension }
80
+
81
+ it 'should render table template' do
82
+ is_expected.to eq <<-EOS.strip_heredoc
83
+ CREATE TABLE IF NOT EXISTS user_dimension
84
+ (
85
+ id SERIAL PRIMARY KEY,
86
+ tenant_id INTEGER NOT NULL,
87
+ user_id INTEGER NOT NULL,
88
+ start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
89
+ end_at TIMESTAMP,
90
+ version INTEGER DEFAULT 1,
91
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
92
+ );
93
+
94
+ DO $$ BEGIN
95
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
96
+ ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
97
+ END IF; END $$;
98
+
99
+ DO $$ BEGIN
100
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
101
+ CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
102
+ END IF; END $$;
103
+
104
+ DO $$ BEGIN
105
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
106
+ CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
107
+ END IF; END $$;
108
+
109
+ DO $$ BEGIN
110
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
111
+ CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
112
+ END IF; END $$;
113
+
114
+ DO $$ BEGIN
115
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
116
+ CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
117
+ END IF; END $$;
118
+
119
+ DO $$ BEGIN
120
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
121
+ CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
122
+ END IF; END $$;
123
+ EOS
124
+ end
125
+ end
126
+
127
+ context 'for postgres dimension type: four' do
128
+ before do
129
+ catalog.schema :postgres do
130
+ dimension 'cluster', type: :mini do
131
+ column 'id', type: :integer, surrogate_key: true, auto: true
132
+ column 'name', type: :string, unique: true
133
+ row name: 'default', attributes: {default: true}
134
+ end
135
+
136
+ dimension 'user_account_state', type: :mini do
137
+ column 'name', type: :string, unique: true
138
+ column 'description', type: :string
139
+ row name: 'active', description: 'Active', attributes: {default: true}
140
+ end
141
+
142
+ dimension 'user', type: :four do
143
+ references :cluster
144
+ references :user_account_state
145
+ column 'tenant_id', index: true, natural_key: true
146
+ column 'user_id', index: true, natural_key: true
147
+ column 'preferences', type: :key_value, null: true
148
+ end
149
+ end
150
+ end
151
+
152
+ let(:table) { catalog.postgres.user_dimension }
153
+
154
+ it 'should render table template' do
155
+ is_expected.to eq <<-EOS.strip_heredoc
156
+ CREATE TABLE IF NOT EXISTS user_dimension_ledger
157
+ (
158
+ id SERIAL PRIMARY KEY,
159
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
160
+ user_account_state_type_id INTEGER REFERENCES user_account_state_type(id),
161
+ tenant_id INTEGER NOT NULL,
162
+ user_id INTEGER NOT NULL,
163
+ preferences_now HSTORE,
164
+ preferences_was HSTORE,
165
+ source_kind VARCHAR NOT NULL,
166
+ source_uuid VARCHAR NOT NULL,
167
+ start_at TIMESTAMP NOT NULL,
168
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW(),
169
+ delta INTEGER NOT NULL
170
+ );
171
+
172
+ DO $$ BEGIN
173
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_370d6dd_key') THEN
174
+ ALTER TABLE user_dimension_ledger ADD CONSTRAINT user_dimension_ledger_370d6dd_key UNIQUE(tenant_id, user_id, source_kind, source_uuid, start_at);
175
+ END IF; END $$;
176
+
177
+ DO $$ BEGIN
178
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_d6b9b38_index') THEN
179
+ CREATE INDEX user_dimension_ledger_d6b9b38_index ON user_dimension_ledger (cluster_type_id);
180
+ END IF; END $$;
181
+
182
+ DO $$ BEGIN
183
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_7988187_index') THEN
184
+ CREATE INDEX user_dimension_ledger_7988187_index ON user_dimension_ledger (user_account_state_type_id);
185
+ END IF; END $$;
186
+
187
+ DO $$ BEGIN
188
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_3854361_index') THEN
189
+ CREATE INDEX user_dimension_ledger_3854361_index ON user_dimension_ledger (tenant_id);
190
+ END IF; END $$;
191
+
192
+ DO $$ BEGIN
193
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_e8701ad_index') THEN
194
+ CREATE INDEX user_dimension_ledger_e8701ad_index ON user_dimension_ledger (user_id);
195
+ END IF; END $$;
196
+
197
+ DO $$ BEGIN
198
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_ledger_23563d3_index') THEN
199
+ CREATE INDEX user_dimension_ledger_23563d3_index ON user_dimension_ledger (start_at);
200
+ END IF; END $$;
201
+
202
+ CREATE TABLE IF NOT EXISTS user_dimension
203
+ (
204
+ id SERIAL PRIMARY KEY,
205
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
206
+ user_account_state_type_id INTEGER NOT NULL REFERENCES user_account_state_type(id) DEFAULT default_user_account_state_type_id(),
207
+ tenant_id INTEGER NOT NULL,
208
+ user_id INTEGER NOT NULL,
209
+ preferences HSTORE,
210
+ parent_id INTEGER REFERENCES user_dimension_ledger(id),
211
+ record_id INTEGER REFERENCES user_dimension_ledger(id),
212
+ start_at TIMESTAMP NOT NULL DEFAULT TO_TIMESTAMP(0),
213
+ end_at TIMESTAMP,
214
+ version INTEGER DEFAULT 1,
215
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
216
+ );
217
+
218
+ DO $$ BEGIN
219
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e6c3d91_key') THEN
220
+ ALTER TABLE user_dimension ADD CONSTRAINT user_dimension_e6c3d91_key UNIQUE(tenant_id, user_id, start_at);
221
+ END IF; END $$;
222
+
223
+ DO $$ BEGIN
224
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_d6b9b38_index') THEN
225
+ CREATE INDEX user_dimension_d6b9b38_index ON user_dimension (cluster_type_id);
226
+ END IF; END $$;
227
+
228
+ DO $$ BEGIN
229
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_7988187_index') THEN
230
+ CREATE INDEX user_dimension_7988187_index ON user_dimension (user_account_state_type_id);
231
+ END IF; END $$;
232
+
233
+ DO $$ BEGIN
234
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_3854361_index') THEN
235
+ CREATE INDEX user_dimension_3854361_index ON user_dimension (tenant_id);
236
+ END IF; END $$;
237
+
238
+ DO $$ BEGIN
239
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_e8701ad_index') THEN
240
+ CREATE INDEX user_dimension_e8701ad_index ON user_dimension (user_id);
241
+ END IF; END $$;
242
+
243
+ DO $$ BEGIN
244
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_23563d3_index') THEN
245
+ CREATE INDEX user_dimension_23563d3_index ON user_dimension (start_at);
246
+ END IF; END $$;
247
+
248
+ DO $$ BEGIN
249
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2c8e908_index') THEN
250
+ CREATE INDEX user_dimension_2c8e908_index ON user_dimension (end_at);
251
+ END IF; END $$;
252
+
253
+ DO $$ BEGIN
254
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'user_dimension_2af72f1_index') THEN
255
+ CREATE INDEX user_dimension_2af72f1_index ON user_dimension (version);
256
+ END IF; END $$;
257
+ EOS
258
+ end
259
+ end
260
+
261
+ context 'for postgres dimension type: four stage table' do
262
+ before do
263
+ catalog.schema :postgres do
264
+ dimension 'user_account_state', type: :mini do
265
+ column 'name', type: :string, unique: true
266
+ column 'description', type: :string
267
+ row name: 'active', description: 'Active', attributes: {default: true}
268
+ end
269
+
270
+ dimension 'user', type: :four do
271
+ references :user_account_state
272
+ column 'tenant_id', index: true, natural_key: true
273
+ column 'user_id', index: true, natural_key: true
274
+ column 'preferences', type: :key_value, null: true
275
+ end
276
+ end
277
+ end
278
+
279
+ let(:table) { catalog.postgres.user_dimension.stage_table(suffix: 'consolidated_forward') }
280
+
281
+ it 'should render table template' do
282
+ is_expected.to eq <<-EOS.strip_heredoc
283
+ CREATE TEMPORARY TABLE IF NOT EXISTS user_consolidated_forward_dimension_stage
284
+ (
285
+ user_account_state_type_id INTEGER DEFAULT default_user_account_state_type_id(),
286
+ tenant_id INTEGER,
287
+ user_id INTEGER,
288
+ preferences HSTORE,
289
+ parent_id INTEGER,
290
+ record_id INTEGER,
291
+ start_at TIMESTAMP DEFAULT TO_TIMESTAMP(0),
292
+ end_at TIMESTAMP,
293
+ version INTEGER DEFAULT 1,
294
+ last_modified_at TIMESTAMP DEFAULT NOW()
295
+ );
296
+
297
+ CREATE INDEX user_consolidated_forward_dimension_stage_7988187_index ON user_consolidated_forward_dimension_stage (user_account_state_type_id);
298
+ CREATE INDEX user_consolidated_forward_dimension_stage_3854361_index ON user_consolidated_forward_dimension_stage (tenant_id);
299
+ CREATE INDEX user_consolidated_forward_dimension_stage_e8701ad_index ON user_consolidated_forward_dimension_stage (user_id);
300
+ CREATE INDEX user_consolidated_forward_dimension_stage_23563d3_index ON user_consolidated_forward_dimension_stage (start_at);
301
+ CREATE INDEX user_consolidated_forward_dimension_stage_2c8e908_index ON user_consolidated_forward_dimension_stage (end_at);
302
+ CREATE INDEX user_consolidated_forward_dimension_stage_2af72f1_index ON user_consolidated_forward_dimension_stage (version);
303
+ EOS
304
+ end
305
+ end
306
+ end
@@ -0,0 +1,291 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Transform::DefineTable do
26
+ before do
27
+ catalog.schema :postgres do
28
+ dimension 'cluster', type: :mini do
29
+ column 'id', type: :sequence, surrogate_key: true, auto: true
30
+ column 'name', type: :string
31
+
32
+ row name: 'current_database()', attributes: {default: true}
33
+ end
34
+
35
+ dimension 'date', type: :date do
36
+ column 'date_id', type: :integer, unique: true, index: true, natural_key: true
37
+ end
38
+
39
+ dimension 'user_agent', type: :mini do
40
+ column 'name', type: :string, unique: true, index: 'shared'
41
+ column 'version', type: :string, unique: true, index: 'shared', default: 'Unknown'
42
+ column 'description', type: :string, null: true, ignore: true
43
+ end
44
+
45
+ dimension 'feature', type: :mini do
46
+ column 'name', type: :string, unique: true, index: true
47
+ end
48
+
49
+ dimension 'tenant', type: :two do
50
+ column 'tenant_id', type: :integer, index: true, natural_key: true
51
+ end
52
+
53
+ dimension 'user', type: :two do
54
+ column 'tenant_id', type: :integer, index: true, natural_key: true
55
+ column 'user_id', type: :integer, index: true, natural_key: true
56
+ end
57
+
58
+ dimension 'group', type: :two do
59
+ column 'group_id', type: :integer, natural_key: true
60
+ end
61
+
62
+ fact 'visits', partition: 'y%Ym%m' do
63
+ references :cluster
64
+ references :date
65
+ references :tenant
66
+ references :user
67
+ references :group, multiple: true
68
+ references :user_agent, insert: true
69
+ references :feature, insert: true
70
+ measure 'total', type: :integer
71
+ end
72
+
73
+ file 'visits' do
74
+ column 'date.date_id', type: :integer
75
+ column 'tenant.tenant_id', type: :integer
76
+ column 'user.user_id', type: :integer
77
+ column 'user_agent.name', type: :string
78
+ column 'user_agent.version', type: :string
79
+ column 'feature.name', type: :string
80
+ column 'time_key', type: :integer
81
+ column 'total', type: :integer
82
+ end
83
+ end
84
+
85
+ catalog.schema :hive do
86
+ dimension 'date', type: :date, implicit: true do
87
+ column 'date_id', type: :integer, natural_key: true
88
+ end
89
+
90
+ dimension 'user', type: :two, implicit: true do
91
+ column 'user_id', type: :integer, natural_key: true
92
+ end
93
+
94
+ dimension 'group', type: :two, implicit: true do
95
+ column 'group_id', type: :integer, natural_key: true
96
+ end
97
+
98
+ dimension 'user_agent', type: :mini do
99
+ column 'name', type: :string
100
+ column 'version', type: :string
101
+ column 'description', type: :string, ignore: true
102
+ end
103
+
104
+ fact 'visits', grain: :hourly do
105
+ partition :y
106
+ partition :m
107
+ partition :d
108
+ references :date
109
+ references :user
110
+ references :group, multiple: true
111
+ references :user_agent, denormalize: true
112
+ measure 'total'
113
+ end
114
+ end
115
+ end
116
+
117
+ context 'for postgres fact' do
118
+ let(:target) { catalog.postgres.visits_fact }
119
+
120
+ subject(:result) { transform.define_table(target).to_s }
121
+
122
+ it 'should eq render table template' do
123
+ is_expected.to eq <<-EOS.strip_heredoc
124
+ CREATE TABLE IF NOT EXISTS visits_fact
125
+ (
126
+ cluster_type_id INTEGER NOT NULL REFERENCES cluster_type(id) DEFAULT default_cluster_type_id(),
127
+ date_dimension_id INTEGER NOT NULL REFERENCES date_dimension(id),
128
+ tenant_dimension_id INTEGER NOT NULL REFERENCES tenant_dimension(id),
129
+ user_dimension_id INTEGER NOT NULL REFERENCES user_dimension(id),
130
+ group_dimension_id INTEGER[] NOT NULL,
131
+ user_agent_type_id INTEGER NOT NULL REFERENCES user_agent_type(id),
132
+ feature_type_id INTEGER NOT NULL REFERENCES feature_type(id),
133
+ total INTEGER NOT NULL,
134
+ time_key INTEGER NOT NULL,
135
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
136
+ );
137
+
138
+ DO $$ BEGIN
139
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d6b9b38_index') THEN
140
+ CREATE INDEX visits_fact_d6b9b38_index ON visits_fact (cluster_type_id);
141
+ END IF; END $$;
142
+
143
+ DO $$ BEGIN
144
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_0a531a8_index') THEN
145
+ CREATE INDEX visits_fact_0a531a8_index ON visits_fact (date_dimension_id);
146
+ END IF; END $$;
147
+
148
+ DO $$ BEGIN
149
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d3950d9_index') THEN
150
+ CREATE INDEX visits_fact_d3950d9_index ON visits_fact (tenant_dimension_id);
151
+ END IF; END $$;
152
+
153
+ DO $$ BEGIN
154
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_39f0fdd_index') THEN
155
+ CREATE INDEX visits_fact_39f0fdd_index ON visits_fact (user_dimension_id);
156
+ END IF; END $$;
157
+
158
+ DO $$ BEGIN
159
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_e0d2a9e_index') THEN
160
+ CREATE INDEX visits_fact_e0d2a9e_index ON visits_fact (group_dimension_id);
161
+ END IF; END $$;
162
+
163
+ DO $$ BEGIN
164
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_d8b1c3e_index') THEN
165
+ CREATE INDEX visits_fact_d8b1c3e_index ON visits_fact (user_agent_type_id);
166
+ END IF; END $$;
167
+
168
+ DO $$ BEGIN
169
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_33b68fd_index') THEN
170
+ CREATE INDEX visits_fact_33b68fd_index ON visits_fact (feature_type_id);
171
+ END IF; END $$;
172
+
173
+ DO $$ BEGIN
174
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
175
+ CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
176
+ END IF; END $$;
177
+ EOS
178
+ end
179
+ end
180
+
181
+ describe 'for fact table from file with sources files' do
182
+ let(:files) { (1..3).map { |i| double(path: "output_#{i}.csv") } }
183
+ let(:target) { catalog.postgres.visits_fact }
184
+ let(:source) { catalog.postgres.visits_file }
185
+
186
+ subject(:result) { transform.define_table(source.stage_table(suffix: 'file', table: target, inherit: false), files).to_s }
187
+
188
+ it 'should eq render table template' do
189
+ is_expected.to eq <<-EOS.strip_heredoc
190
+ CREATE TEMPORARY TABLE IF NOT EXISTS visits_file_fact_stage
191
+ (
192
+ date_dimension_date_id INTEGER,
193
+ tenant_dimension_tenant_id INTEGER,
194
+ user_dimension_user_id INTEGER,
195
+ user_agent_type_name VARCHAR,
196
+ user_agent_type_version VARCHAR,
197
+ feature_type_name VARCHAR,
198
+ time_key INTEGER,
199
+ total INTEGER
200
+ );
201
+
202
+ COPY visits_file_fact_stage FROM 'output_1.csv' WITH (FORMAT 'csv', HEADER true);
203
+ COPY visits_file_fact_stage FROM 'output_2.csv' WITH (FORMAT 'csv', HEADER true);
204
+ COPY visits_file_fact_stage FROM 'output_3.csv' WITH (FORMAT 'csv', HEADER true);
205
+
206
+ CREATE INDEX visits_file_fact_stage_964dac1_index ON visits_file_fact_stage (date_dimension_date_id);
207
+ CREATE INDEX visits_file_fact_stage_90fc13c_index ON visits_file_fact_stage (tenant_dimension_tenant_id);
208
+ CREATE INDEX visits_file_fact_stage_30f3cca_index ON visits_file_fact_stage (user_dimension_user_id);
209
+ CREATE INDEX visits_file_fact_stage_99c433b_index ON visits_file_fact_stage (user_agent_type_name);
210
+ CREATE INDEX visits_file_fact_stage_d5d236f_index ON visits_file_fact_stage (user_agent_type_version);
211
+ CREATE INDEX visits_file_fact_stage_5a187ed_index ON visits_file_fact_stage (feature_type_name);
212
+ CREATE INDEX visits_file_fact_stage_6444ed3_index ON visits_file_fact_stage (time_key);
213
+ EOS
214
+ end
215
+
216
+ context 'with file' do
217
+ subject(:result) { transform.define_table(source.stage_table(table: target), files.first).to_s }
218
+ it 'should eq render table template' do
219
+ is_expected.to_not be_nil
220
+ end
221
+ end
222
+
223
+ context 'with Set' do
224
+ subject(:result) { transform.define_table(source.stage_table(table: target), Set.new(files)).to_s }
225
+ it 'should eq render table template' do
226
+ is_expected.to_not be_nil
227
+ end
228
+ end
229
+ end
230
+
231
+ context 'for postgres fact with degenerate reference' do
232
+ before do
233
+ catalog.clear!
234
+ catalog.schema :postgres do
235
+ fact 'visits' do
236
+ references :message_kind, degenerate: true
237
+ measure 'count', aggregate: :sum
238
+ end
239
+ end
240
+ end
241
+
242
+ let(:target) { catalog.postgres.visits_fact }
243
+
244
+ subject(:result) { transform.define_table(target).to_s }
245
+
246
+ it 'should eq render table template' do
247
+ is_expected.to eq <<-EOS.strip_heredoc
248
+ CREATE TABLE IF NOT EXISTS visits_fact
249
+ (
250
+ message_kind_type_id INTEGER,
251
+ count INTEGER NOT NULL,
252
+ time_key INTEGER NOT NULL,
253
+ last_modified_at TIMESTAMP NOT NULL DEFAULT NOW()
254
+ );
255
+
256
+ DO $$ BEGIN
257
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_2a6313d_index') THEN
258
+ CREATE INDEX visits_fact_2a6313d_index ON visits_fact (message_kind_type_id);
259
+ END IF; END $$;
260
+
261
+ DO $$ BEGIN
262
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = 'visits_fact_6444ed3_index') THEN
263
+ CREATE INDEX visits_fact_6444ed3_index ON visits_fact (time_key);
264
+ END IF; END $$;
265
+ EOS
266
+ end
267
+ end
268
+
269
+ context 'for hive fact' do
270
+ let(:target) { catalog.hive.visits_hourly_fact }
271
+
272
+ subject(:result) { transform.define_table(target).to_s }
273
+
274
+ it 'should eq render table template' do
275
+ is_expected.to eq <<-EOS.strip_heredoc
276
+ CREATE TABLE IF NOT EXISTS visits_hourly_fact
277
+ (
278
+ date_dimension_date_id INT,
279
+ user_dimension_user_id INT,
280
+ group_dimension_group_id ARRAY<INT>,
281
+ user_agent_type_name STRING,
282
+ user_agent_type_version STRING,
283
+ total INT,
284
+ time_key INT
285
+ )
286
+ PARTITIONED BY (y INT, m INT, d INT)
287
+ TBLPROPERTIES ('serialization.null.format' = '');
288
+ EOS
289
+ end
290
+ end
291
+ end