masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,95 @@
1
+ -- The MIT License (MIT)
2
+ --
3
+ -- Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ --
5
+ -- Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ -- of this software and associated documentation files (the "Software"), to deal
7
+ -- in the Software without restriction, including without limitation the rights
8
+ -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ -- copies of the Software, and to permit persons to whom the Software is
10
+ -- furnished to do so, subject to the following conditions:
11
+ --
12
+ -- The above copyright notice and this permission notice shall be included in
13
+ -- all copies or substantial portions of the Software.
14
+ --
15
+ -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ -- THE SOFTWARE.
22
+
23
+ <% files ||= [] %>
24
+
25
+ <%- target.children.each do |child| -%>
26
+ <%= render 'define_table.psql.erb', target: child %>
27
+ <%- end -%>
28
+
29
+ <%- target.enum_columns.each do |_, column| -%>
30
+ DO $$ BEGIN
31
+ IF NOT EXISTS (SELECT 1 FROM pg_type t WHERE LOWER(t.typname) = LOWER('<%= column.sql_type %>')) THEN
32
+ CREATE TYPE <%= column.sql_type %> AS ENUM (<%= column.values.map { |value| "'#{value}'" }.join(', ') %>);
33
+ END IF; END $$;
34
+ <%- end -%>
35
+
36
+ <%- target.sequence_columns.each do |_, column| -%>
37
+ DO $$ BEGIN
38
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = '<%= column.sequence_id %>') THEN
39
+ CREATE SEQUENCE <%= column.sequence_id %>;
40
+ ALTER SEQUENCE <%= column.sequence_id %> RESTART <%= column.sequence_offset %>;
41
+ END IF; END $$;
42
+ <%- end -%>
43
+
44
+ <%- if target.temporary? -%>
45
+ CREATE TEMPORARY TABLE IF NOT EXISTS <%= target.name %>
46
+ <%- else -%>
47
+ CREATE TABLE IF NOT EXISTS <%= target.name %>
48
+ <%- end -%>
49
+ (
50
+ <%- target.defined_columns.each do |column, last| -%>
51
+ <%= column.as_psql %><%= ',' unless last %>
52
+ <%- end -%>
53
+ );
54
+
55
+ <%- target.sequence_columns.each do |_, column| -%>
56
+ DO $$ BEGIN
57
+ IF NOT EXISTS (SELECT 1 WHERE sequence_owner('<%= column.sequence_id %>') = '<%= column.qualified_name %>') THEN
58
+ ALTER SEQUENCE <%= column.sequence_id %> OWNED BY <%= column.qualified_name %>;
59
+ END IF; END $$;
60
+ <%- end -%>
61
+
62
+ <%- files.each do |file| -%>
63
+ <%-
64
+ copy_options = []
65
+ copy_options << "FORMAT 'csv'" if target.store.format == :csv
66
+ copy_options << "DELIMITER '\t'" if target.store.format == :tsv
67
+ copy_options << "HEADER true" if target.store.headers
68
+ -%>
69
+ COPY <%= target.name %> FROM '<%= file %>' WITH (<%= copy_options.join(", ") %>);
70
+ <%- end -%>
71
+
72
+ <%= render 'define_unique.psql.erb', target: target %>
73
+ <%= render 'define_index.psql.erb', target: target %>
74
+
75
+ <% target.insert_rows.each do |row| %>
76
+ INSERT INTO <%= target.name %> (<%= row.insert_columns.join(', ') %>)
77
+ SELECT <%= row.insert_values.join(', ') %>
78
+ WHERE NOT EXISTS (SELECT 1 FROM <%= target.name %> WHERE <%= row.insert_constraints.join(' AND ') %>);
79
+ <%- end -%>
80
+
81
+ <% target.aliased_rows.each do |row| %>
82
+ <%- row.natural_keys.each do |column| -%>
83
+ CREATE OR REPLACE FUNCTION <%= row.name(column) %>
84
+ RETURNS <%= column.sql_type %> IMMUTABLE AS $$
85
+ SELECT <%= row.sql_value(column) %>;
86
+ $$ LANGUAGE SQL;
87
+
88
+ <%- end -%>
89
+
90
+ CREATE OR REPLACE FUNCTION <%= row.name %>
91
+ RETURNS <%= target.surrogate_key.sql_type %> IMMUTABLE AS $$
92
+ SELECT <%= target.surrogate_key.name %> FROM <%= target.name %> WHERE <%= row.insert_constraints.join(' AND ') %>;
93
+ $$ LANGUAGE SQL;
94
+
95
+ <%- end -%>
@@ -0,0 +1,40 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform
24
+ module DefineTable
25
+ extend ActiveSupport::Concern
26
+
27
+ def define_table(target, files = [])
28
+ return if target.implicit
29
+ Operator.new(__method__, target: target, files: Masamune::Schema::Map.convert_files(files), presenters: { hive: Hive }).tap do |operator|
30
+ logger.debug("#{target.id}\n" + operator.to_s) if target.debug
31
+ end
32
+ end
33
+
34
+ class Hive < SimpleDelegator
35
+ def partition_by
36
+ partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,30 @@
1
+ -- The MIT License (MIT)
2
+ --
3
+ -- Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ --
5
+ -- Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ -- of this software and associated documentation files (the "Software"), to deal
7
+ -- in the Software without restriction, including without limitation the rights
8
+ -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ -- copies of the Software, and to permit persons to whom the Software is
10
+ -- furnished to do so, subject to the following conditions:
11
+ --
12
+ -- The above copyright notice and this permission notice shall be included in
13
+ -- all copies or substantial portions of the Software.
14
+ --
15
+ -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ -- THE SOFTWARE.
22
+
23
+ <%- target.unique_constraints.each do |column_names, id| -%>
24
+ <%- constraint_name = "#{target.name}_#{id}_key" -%>
25
+ DO $$ BEGIN
26
+ IF NOT EXISTS (SELECT 1 FROM pg_class c WHERE c.relname = '<%= constraint_name %>') THEN
27
+ ALTER TABLE <%= target.name %> ADD CONSTRAINT <%= constraint_name %> UNIQUE(<%= column_names.join(', ') %>);
28
+ END IF; END $$;
29
+
30
+ <%- end -%>
@@ -0,0 +1,43 @@
1
+ -- The MIT License (MIT)
2
+ --
3
+ -- Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ --
5
+ -- Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ -- of this software and associated documentation files (the "Software"), to deal
7
+ -- in the Software without restriction, including without limitation the rights
8
+ -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ -- copies of the Software, and to permit persons to whom the Software is
10
+ -- furnished to do so, subject to the following conditions:
11
+ --
12
+ -- The above copyright notice and this permission notice shall be included in
13
+ -- all copies or substantial portions of the Software.
14
+ --
15
+ -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ -- THE SOFTWARE.
22
+
23
+ <%- if target.insert_columns(source).any? -%>
24
+ CREATE TEMPORARY TABLE IF NOT EXISTS <%= target.stage_table.name %> (LIKE <%= target.name %> INCLUDING ALL);
25
+
26
+ INSERT INTO
27
+ <%= target.stage_table.name %> (<%= target.insert_columns(source).join(', ') %>)
28
+ SELECT DISTINCT
29
+ <%- target.insert_values(source).each do |value, last| -%>
30
+ <%= value %><%= ',' unless last %>
31
+ <%- end -%>
32
+ FROM
33
+ <%= source.name %>
34
+ <%- if target.insert_constraints(source).any? -%>
35
+ WHERE
36
+ <%- target.insert_constraints(source).each do |constraint, last| -%>
37
+ <%= constraint %><%= ' AND' unless last %>
38
+ <%- end -%>
39
+ <%- end -%>
40
+ ;
41
+
42
+ <%= target.bulk_upsert(target.stage_table, target) %>
43
+ <%- end -%>
@@ -0,0 +1,64 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune/transform/bulk_upsert'
24
+
25
+ module Masamune::Transform
26
+ module InsertReferenceValues
27
+ extend ActiveSupport::Concern
28
+
29
+ def insert_reference_values(source, target)
30
+ operators = []
31
+ target.insert_references.each do |_, reference|
32
+ operators << Operator.new(__method__, source: source, target: reference, presenters: { postgres: Postgres })
33
+ end
34
+ Operator.new *operators
35
+ end
36
+
37
+ private
38
+
39
+ class Postgres < SimpleDelegator
40
+ include BulkUpsert
41
+ include Masamune::LastElement
42
+
43
+ def insert_columns(source)
44
+ source.shared_columns(stage_table).map { |_, columns| columns.first.name }
45
+ end
46
+
47
+ def insert_values(source)
48
+ source.shared_columns(stage_table).map do |column, _|
49
+ if column.adjacent.try(:default)
50
+ "COALESCE(#{column.name}, #{column.adjacent.sql_value(column.adjacent.default)})"
51
+ else
52
+ column.name
53
+ end
54
+ end
55
+ end
56
+ method_with_last_element :insert_values
57
+
58
+ def insert_constraints(source)
59
+ source.shared_columns(stage_table).reject { |column, _| column.null || column.default || column.adjacent.try(:default) }.map { |column, _| "#{column.name} IS NOT NULL"}
60
+ end
61
+ method_with_last_element :insert_constraints
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,47 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune/transform/define_table'
24
+ require 'masamune/transform/stage_dimension'
25
+ require 'masamune/transform/insert_reference_values'
26
+ require 'masamune/transform/bulk_upsert'
27
+
28
+ module Masamune::Transform
29
+ module LoadDimension
30
+ include DefineTable
31
+ include StageDimension
32
+ include InsertReferenceValues
33
+ include BulkUpsert
34
+
35
+ extend ActiveSupport::Concern
36
+
37
+ def load_dimension(files, source, target)
38
+ target = target.type == :four ? target.ledger_table : target
39
+ source = source.stage_table(suffix: 'file', table: target, inherit: false)
40
+ Operator.new \
41
+ define_table(source, files),
42
+ insert_reference_values(source, target),
43
+ stage_dimension(source, target),
44
+ bulk_upsert(target.stage_table, target)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,45 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune/transform/define_table'
24
+ require 'masamune/transform/stage_fact'
25
+ require 'masamune/transform/insert_reference_values'
26
+ require 'masamune/transform/bulk_upsert'
27
+
28
+ module Masamune::Transform
29
+ module LoadFact
30
+ include DefineTable
31
+ include StageFact
32
+ include InsertReferenceValues
33
+ include BulkUpsert
34
+
35
+ extend ActiveSupport::Concern
36
+
37
+ def load_fact(files, source, target, date)
38
+ source = source.stage_table(suffix: 'file', table: target, inherit: false)
39
+ Operator.new \
40
+ define_table(source, files),
41
+ insert_reference_values(source, target),
42
+ stage_fact(source, target, date)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,96 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform
24
+ class Operator
25
+ def initialize(*args)
26
+ options = args.last.is_a?(Hash) ? args.pop : {}
27
+ @templates = args
28
+ @source = options.delete(:source)
29
+ @target = options.delete(:target)
30
+ @presenters = options.delete(:presenters) || {}
31
+ @locals = options
32
+ end
33
+
34
+ def source
35
+ return unless @source
36
+ @presenters.key?(source_store.try(:type)) ? @presenters[source_store.try(:type)].new(@source) : @source
37
+ end
38
+
39
+ def target
40
+ return unless @target
41
+ @presenters.key?(target_store.try(:type)) ? @presenters[target_store.try(:type)].new(@target) : @target
42
+ end
43
+
44
+ def to_s
45
+ result = []
46
+ @templates.each do |template|
47
+ case template
48
+ when Operator
49
+ result << template
50
+ when Symbol, String
51
+ result << template_eval(template)
52
+ end
53
+ end
54
+ Masamune::Template.combine(*result)
55
+ end
56
+
57
+ def to_file
58
+ Tempfile.new('masamune').tap do |file|
59
+ file.write(to_s)
60
+ file.close
61
+ end.path
62
+ end
63
+
64
+ private
65
+
66
+ def source_store
67
+ return @source if @source.is_a?(Masamune::Schema::Store)
68
+ @source.try(:store)
69
+ end
70
+
71
+ def target_store
72
+ return @target if @target.is_a?(Masamune::Schema::Store)
73
+ @target.try(:store)
74
+ end
75
+
76
+ def template_eval(template)
77
+ return File.read(template) if File.exists?(template.to_s)
78
+ Masamune::Template.render_to_string(template_file(template), @locals.merge(source: source, target: target))
79
+ end
80
+
81
+ def template_file(template_prefix)
82
+ File.expand_path(File.join(__FILE__, '..', "#{template_prefix}.#{template_suffix}.erb"))
83
+ end
84
+
85
+ def template_suffix
86
+ case (target_store || source_store).try(:type)
87
+ when :postgres
88
+ 'psql'
89
+ when :hive
90
+ 'hql'
91
+ else
92
+ 'txt'
93
+ end
94
+ end
95
+ end
96
+ end