masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,133 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Schema
24
+ class Row
25
+ DEFAULT_ATTRIBUTES =
26
+ {
27
+ id: nil,
28
+ values: {},
29
+ default: false,
30
+ strict: true,
31
+ parent: nil,
32
+ debug: false
33
+ }
34
+
35
+ DEFAULT_ATTRIBUTES.keys.each do |attr|
36
+ attr_accessor attr
37
+ end
38
+
39
+ def initialize(opts = {})
40
+ opts.symbolize_keys!
41
+ DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
42
+ public_send("#{name}=", value)
43
+ end
44
+ self.id ||= :default if default
45
+ end
46
+
47
+ def id=(id)
48
+ @id = id.to_sym if id
49
+ end
50
+
51
+ def values=(values)
52
+ @values = values.symbolize_keys
53
+ end
54
+
55
+ def parent=(parent)
56
+ @parent = parent
57
+ normalize_values! if @parent
58
+ end
59
+
60
+ def name(column = nil)
61
+ return unless @id
62
+ if column
63
+ "#{@id}_#{column.name}()"
64
+ else
65
+ "#{@id}_#{parent.name}_#{parent.surrogate_key.name}()"
66
+ end
67
+ end
68
+
69
+ def natural_keys
70
+ parent.natural_keys.select do |column|
71
+ values.keys.include?(column.name) && !column.sql_function?(values[column.name])
72
+ end
73
+ end
74
+
75
+ def insert_constraints
76
+ values.map { |key, value| "#{key} = #{parent.columns[key].sql_value(value)}" }.compact
77
+ end
78
+
79
+ def insert_columns
80
+ values.keys
81
+ end
82
+
83
+ def insert_values
84
+ values.map { |key, value| parent.columns[key].sql_value(value) }
85
+ end
86
+
87
+ def to_hash
88
+ values.with_indifferent_access
89
+ end
90
+
91
+ def headers
92
+ values.keys
93
+ end
94
+
95
+ def serialize
96
+ [].tap do |result|
97
+ values.each do |key, value|
98
+ result << @columns[key].csv_value(value)
99
+ end
100
+ end
101
+ end
102
+
103
+ def sql_value(column)
104
+ column.sql_value(values[column.name])
105
+ end
106
+
107
+ def missing_required_columns
108
+ Set.new.tap do |missing|
109
+ values.select do |key, value|
110
+ column = @columns[key]
111
+ missing << column if column.required_value? && value.nil?
112
+ end
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ def normalize_values!
119
+ result = {}
120
+ @columns = {}
121
+ values.each do |key, value|
122
+ next unless key
123
+ if column = parent.dereference_column_name(key)
124
+ @columns[column.name] = column
125
+ result[column.name] = column.ruby_value(value)
126
+ elsif strict
127
+ raise ArgumentError, "#{@values} contains undefined columns #{key}"
128
+ end
129
+ end
130
+ @values = result
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,115 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'active_support/core_ext/hash'
24
+
25
+ module Masamune::Schema
26
+ class Store
27
+ include Masamune::HasEnvironment
28
+
29
+ SUPPORTED_ATTRIBUTES = %(table dimension fact file event)
30
+
31
+ DEFAULT_ATTRIBUTES =
32
+ {
33
+ type: nil,
34
+ format: ->(store) { store.type == :postgres ? :csv : :tsv },
35
+ json_encoding: ->(store) { store.type == :postgres ? :quoted : :raw },
36
+ headers: ->(store) { store.type == :postgres ? true : false },
37
+ debug: false
38
+ }
39
+
40
+ DEFAULT_ATTRIBUTES.keys.each do |attr|
41
+ attr_accessor attr
42
+ end
43
+
44
+ attr_accessor :tables
45
+ attr_accessor :dimensions
46
+ attr_accessor :facts
47
+ attr_accessor :files
48
+ attr_accessor :events
49
+ attr_accessor :references
50
+
51
+ class << self
52
+ def types
53
+ [:postgres, :hive, :files]
54
+ end
55
+ end
56
+
57
+ def initialize(environment, opts = {})
58
+ self.environment = environment
59
+ opts.symbolize_keys!
60
+ raise ArgumentError, 'required parameter type: missing' unless opts.key?(:type)
61
+ raise ArgumentError, "unknown type: '#{opts[:type]}'" unless self.class.types.include?(opts[:type])
62
+ DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
63
+ public_send("#{name}=", value.respond_to?(:call) ? value.call(self) : value)
64
+ end
65
+
66
+ @tables = {}.with_indifferent_access
67
+ @dimensions = {}.with_indifferent_access
68
+ @facts = {}.with_indifferent_access
69
+ @files = {}.with_indifferent_access
70
+ @events = {}.with_indifferent_access
71
+ @references = {}.with_indifferent_access
72
+ @extra = []
73
+ end
74
+
75
+ def method_missing(method, *args, &block)
76
+ if type == :files
77
+ files[method]
78
+ else
79
+ *attribute_name, attribute_type = method.to_s.split('_')
80
+ raise ArgumentError, "unknown attribute type '#{attribute_type}'" unless SUPPORTED_ATTRIBUTES.include?(attribute_type)
81
+ self.send(attribute_type.pluralize)[attribute_name.join('_')]
82
+ end
83
+ end
84
+
85
+ def dereference_column(id, options = {})
86
+ column_id, reference_id = id.to_s.split(/\./).reverse
87
+ column_options = options.dup
88
+ column_options.merge!(id: column_id)
89
+
90
+ if reference = references[reference_id]
91
+ column_options.merge!(reference: reference)
92
+ else
93
+ raise ArgumentError, "dimension #{reference_id} not defined"
94
+ end if reference_id
95
+
96
+ Masamune::Schema::Column.new(column_options)
97
+ end
98
+
99
+ def extra(order = nil)
100
+ return @extra unless order
101
+ result = Set.new
102
+ @extra.each do |file|
103
+ filename = File.basename(file)
104
+ if filename =~ /\A\d+_/
105
+ number = filename.split('_').first.to_i
106
+ result << file if number <= 0 && order == :pre
107
+ result << file if number > 0 && order == :post
108
+ else
109
+ result << file if order == :pre
110
+ end
111
+ end
112
+ result.to_a
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,308 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Schema
24
+ class Table
25
+ include Masamune::LastElement
26
+
27
+ attr_reader :children
28
+
29
+ DEFAULT_ATTRIBUTES =
30
+ {
31
+ id: nil,
32
+ name: nil,
33
+ type: :table,
34
+ store: nil,
35
+ parent: nil,
36
+ suffix: nil,
37
+ implicit: false,
38
+ references: {},
39
+ columns: {},
40
+ rows: [],
41
+ inherit: false,
42
+ debug: false
43
+ }
44
+
45
+ DEFAULT_ATTRIBUTES.keys.each do |attr|
46
+ attr_accessor attr
47
+ end
48
+
49
+ def initialize(opts = {})
50
+ opts.symbolize_keys!
51
+ raise ArgumentError, 'required parameter id: missing' unless opts.key?(:id)
52
+ DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
53
+ public_send("#{name}=", value)
54
+ end
55
+ @children = Set.new
56
+ end
57
+
58
+ def id=(id)
59
+ @id = id.to_sym
60
+ end
61
+
62
+ def references=(instance)
63
+ @references = {}
64
+ references = (instance.is_a?(Hash) ? instance.values : instance).compact
65
+ references.each do |reference|
66
+ raise ArgumentError, "table #{name} contains invalid table references" unless reference.is_a?(TableReference)
67
+ @references[reference.id] = reference
68
+ end
69
+ end
70
+
71
+ def columns=(instance)
72
+ @columns = {}
73
+ columns = (instance.is_a?(Hash) ? instance.values : instance).compact
74
+ raise ArgumentError, "table #{name} contains reserved columns" if columns.any? { |column| reserved_column_ids.include?(column.id) }
75
+
76
+ initialize_surrogate_key_column! unless columns.any? { |column| column.surrogate_key }
77
+ initialize_reference_columns! unless columns.any? { |column| column.reference }
78
+ columns.each do |column|
79
+ raise ArgumentError, "table #{name} contains invalid columns" unless column.is_a?(Column)
80
+ @columns[column.name] = column.dup
81
+ @columns[column.name].parent = self
82
+ end
83
+ end
84
+
85
+ def rows=(rows)
86
+ @rows = []
87
+ rows.each do |row|
88
+ @rows << row.dup
89
+ @rows.last.parent = self
90
+ end
91
+ end
92
+
93
+ def name
94
+ @name || [id, suffix].compact.join('_')
95
+ end
96
+
97
+ def suffix
98
+ ((parent ? parent.suffix.split('_') : []) + [type.to_s, @suffix]).compact.uniq.join('_')
99
+ end
100
+
101
+ def temporary?
102
+ type == :stage
103
+ end
104
+
105
+ def surrogate_key
106
+ columns.values.detect { |column| column.surrogate_key }
107
+ end
108
+
109
+ def natural_keys
110
+ columns.values.select { |column| column.natural_key }
111
+ end
112
+
113
+ def defined_columns
114
+ columns.values
115
+ end
116
+ method_with_last_element :defined_columns
117
+
118
+ def unique_constraints
119
+ return [] if temporary?
120
+ unique_constraints_map.map do |_, column_names|
121
+ [column_names, short_md5(column_names)]
122
+ end
123
+ end
124
+
125
+ # TODO: Add optional USING
126
+ # TODO: Default to GIN for array columns
127
+ def index_columns
128
+ index_column_map.map do |_, column_names|
129
+ [column_names, reverse_unique_constraints_map.key?(column_names.sort), short_md5(column_names)]
130
+ end
131
+ end
132
+
133
+ def unique_columns
134
+ return {} if temporary?
135
+ columns.select { |_, column| column.unique }
136
+ end
137
+
138
+ def enum_columns
139
+ return {} if temporary?
140
+ columns.select { |_, column| column.type == :enum }
141
+ end
142
+
143
+ def sequence_columns
144
+ return {} if temporary?
145
+ columns.select { |_, column| column.reference.nil? && column.type == :sequence }
146
+ end
147
+
148
+ def reference_columns
149
+ columns.values.select { | column| column.reference }
150
+ end
151
+
152
+ def foreign_key_columns
153
+ columns.values.select { | column| column.reference && column.reference.foreign_key }
154
+ end
155
+
156
+ def insert_rows
157
+ rows.select { |row| row.insert_values.any? }
158
+ end
159
+
160
+ def aliased_rows
161
+ rows.select { |row| row.name }
162
+ end
163
+
164
+ def insert_references
165
+ references.select { |_, reference| reference.insert }
166
+ end
167
+
168
+ def reserved_columns
169
+ columns.select { |_, column| reserved_column_ids.include?(column.id) }
170
+ end
171
+
172
+ def unreserved_columns
173
+ columns.reject { |_, column| reserved_column_ids.include?(column.id) }
174
+ end
175
+
176
+ def stage_table(options = {})
177
+ selected = options[:columns] if options[:columns]
178
+ selected ||= options[:target].columns.values.map(&:compact_name) if options[:target]
179
+ selected ||= []
180
+ stage_id = [id, options[:suffix]].compact.join('_')
181
+ parent = options[:table] ? options[:table] : self
182
+ type = options[:type] ? options[:type] : :stage
183
+ @stage_tables ||= {}
184
+ @stage_tables[options] ||= parent.class.new id: stage_id, type: type, store: store, columns: stage_table_columns(parent, selected, options.fetch(:inherit, true)), references: stage_table_references(parent, selected), parent: parent, inherit: options.fetch(:inherit, true)
185
+ end
186
+
187
+ def shared_columns(other)
188
+ Hash.new { |h,k| h[k] = [] }.tap do |shared|
189
+ columns.each do |_, column|
190
+ other.columns.each do |_, other_column|
191
+ shared[column] << other_column if column.references?(other_column)
192
+ end
193
+ end
194
+ end
195
+ end
196
+
197
+ def dereference_column_name(name)
198
+ reference_name, column_name = Column::dereference_column_name(name)
199
+ if reference = references[reference_name]
200
+ if column = reference.columns[column_name]
201
+ dereference_column(column.dup, reference)
202
+ end
203
+ elsif column = columns[column_name]
204
+ column
205
+ end
206
+ end
207
+
208
+ def dereference_column(column, reference)
209
+ column.surrogate_key = false
210
+ column.reference = reference
211
+ column
212
+ end
213
+
214
+ def reserved_column_ids
215
+ inherit ? parent.reserved_column_ids : []
216
+ end
217
+
218
+ private
219
+
220
+ def stage_table_columns(parent, selected = [], inherit = true)
221
+ selected = columns.keys if selected.empty?
222
+ {}.tap do |result|
223
+ selected.each do |name|
224
+ column = dereference_column_name(name)
225
+ next unless column
226
+ next if inherit && parent.reserved_column_ids.include?(column.id)
227
+ if column.parent == self
228
+ next if column.surrogate_key
229
+ result[name] = column
230
+ else
231
+ result[name] = column
232
+ end
233
+ end
234
+ end
235
+ end
236
+
237
+ def stage_table_references(parent, selected = [])
238
+ selected = references.keys if selected.empty?
239
+ {}.tap do |result|
240
+ selected.each do |name|
241
+ column = dereference_column_name(name)
242
+ next unless column
243
+ next if column.parent == self
244
+ result[name] = column.reference
245
+ end
246
+ end
247
+ end
248
+
249
+ def initialize_surrogate_key_column!
250
+ case type
251
+ when :table
252
+ initialize_column! id: 'id', type: :integer, surrogate_key: true
253
+ end
254
+ end
255
+
256
+ def initialize_reference_columns!
257
+ references.map do |_, reference|
258
+ if reference.denormalize
259
+ reference.unreserved_columns.each do |_, column|
260
+ next if column.surrogate_key
261
+ next if column.ignore
262
+ initialize_column! id: column.id, type: column.type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
263
+ end
264
+ elsif reference.foreign_key
265
+ # FIXME column.reference should point to reference.surrogate_key, only allow column references to Columns
266
+ initialize_column! id: reference.foreign_key_name, type: reference.foreign_key_type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
267
+ end
268
+ end
269
+ end
270
+
271
+ def initialize_column!(options = {})
272
+ column = Masamune::Schema::Column.new(options.merge(parent: self))
273
+ @columns[column.name.to_sym] = column
274
+ end
275
+
276
+ def index_column_map
277
+ @index_column_map ||= begin
278
+ map = Hash.new { |h,k| h[k] = [] }
279
+ columns.each do |_, column|
280
+ column.index.each do |index|
281
+ map[index] << column.name
282
+ end
283
+ end
284
+ Hash[map.sort_by { |k, v| v.length }]
285
+ end
286
+ end
287
+
288
+ def unique_constraints_map
289
+ @unique_constraints_map ||= begin
290
+ map = Hash.new { |h,k| h[k] = [] }
291
+ columns.each do |_, column|
292
+ column.unique.each do |unique|
293
+ map[unique] << column.name
294
+ end
295
+ end
296
+ Hash[map.sort_by { |k, v| v.length }]
297
+ end
298
+ end
299
+
300
+ def reverse_unique_constraints_map
301
+ @reverse_unique_constraints_map ||= Hash[unique_constraints_map.to_a.map { |k,v| [v.sort, k] }]
302
+ end
303
+
304
+ def short_md5(*a)
305
+ Digest::MD5.hexdigest(a.join('_'))[0..6]
306
+ end
307
+ end
308
+ end