masamune 0.13.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune.rb +8 -5
  3. data/lib/masamune/actions.rb +1 -13
  4. data/lib/masamune/actions/data_flow.rb +2 -1
  5. data/lib/masamune/actions/date_parse.rb +0 -1
  6. data/lib/masamune/actions/elastic_mapreduce.rb +0 -2
  7. data/lib/masamune/actions/filesystem.rb +0 -2
  8. data/lib/masamune/actions/hive.rb +0 -2
  9. data/lib/masamune/actions/invoke_parallel.rb +2 -1
  10. data/lib/masamune/actions/postgres.rb +0 -1
  11. data/lib/masamune/actions/s3cmd.rb +2 -0
  12. data/lib/masamune/actions/transform.rb +0 -2
  13. data/lib/masamune/after_initialize_callbacks.rb +0 -2
  14. data/lib/masamune/commands.rb +1 -11
  15. data/lib/masamune/commands/postgres.rb +1 -0
  16. data/lib/masamune/commands/postgres_admin.rb +2 -0
  17. data/lib/masamune/configuration.rb +2 -0
  18. data/lib/masamune/data_plan/engine.rb +2 -0
  19. data/lib/masamune/filesystem.rb +2 -0
  20. data/lib/masamune/helpers.rb +1 -1
  21. data/lib/masamune/last_element.rb +0 -2
  22. data/lib/masamune/schema/dimension.rb +1 -3
  23. data/lib/masamune/schema/store.rb +2 -0
  24. data/lib/masamune/schema/table.rb +2 -0
  25. data/lib/masamune/template.rb +4 -1
  26. data/lib/masamune/thor.rb +1 -1
  27. data/lib/masamune/transform.rb +1 -21
  28. data/lib/masamune/transform/bulk_upsert.rb +1 -22
  29. data/lib/masamune/transform/common.rb +27 -0
  30. data/lib/masamune/transform/common/denormalize_table.rb +90 -0
  31. data/lib/masamune/transform/deduplicate_dimension.rb +1 -41
  32. data/lib/masamune/transform/define_table.rb +1 -113
  33. data/lib/masamune/transform/denormalize_table.rb +1 -50
  34. data/lib/masamune/transform/hive.rb +27 -0
  35. data/lib/masamune/transform/{define_schema.hql.erb → hive/define_schema.hql.erb} +0 -0
  36. data/lib/masamune/transform/{define_table.hql.erb → hive/define_table.hql.erb} +0 -0
  37. data/lib/masamune/transform/hive/define_table.rb +46 -0
  38. data/lib/masamune/transform/{denormalize_table.hql.erb → hive/denormalize_table.hql.erb} +0 -0
  39. data/lib/masamune/transform/hive/denormalize_table.rb +27 -0
  40. data/lib/masamune/transform/insert_reference_values.rb +1 -30
  41. data/lib/masamune/transform/operator.rb +36 -37
  42. data/lib/masamune/transform/postgres.rb +27 -0
  43. data/lib/masamune/transform/{bulk_upsert.psql.erb → postgres/bulk_upsert.psql.erb} +0 -0
  44. data/lib/masamune/transform/postgres/bulk_upsert.rb +62 -0
  45. data/lib/masamune/transform/{deduplicate_dimension.psql.erb → postgres/deduplicate_dimension.psql.erb} +1 -7
  46. data/lib/masamune/transform/postgres/deduplicate_dimension.rb +79 -0
  47. data/lib/masamune/transform/{define_foreign_key.psql.erb → postgres/define_foreign_key.psql.erb} +0 -0
  48. data/lib/masamune/transform/{define_index.psql.erb → postgres/define_index.psql.erb} +0 -0
  49. data/lib/masamune/transform/{define_inheritance.psql.erb → postgres/define_inheritance.psql.erb} +0 -0
  50. data/lib/masamune/transform/{define_schema.psql.erb → postgres/define_schema.psql.erb} +0 -0
  51. data/lib/masamune/transform/{define_table.psql.erb → postgres/define_table.psql.erb} +0 -0
  52. data/lib/masamune/transform/postgres/define_table.rb +142 -0
  53. data/lib/masamune/transform/{define_unique.psql.erb → postgres/define_unique.psql.erb} +0 -0
  54. data/lib/masamune/transform/{denormalize_table.psql.erb → postgres/denormalize_table.psql.erb} +0 -0
  55. data/lib/masamune/transform/postgres/denormalize_table.rb +27 -0
  56. data/lib/masamune/transform/{insert_reference_values.psql.erb → postgres/insert_reference_values.psql.erb} +1 -1
  57. data/lib/masamune/transform/postgres/insert_reference_values.rb +69 -0
  58. data/lib/masamune/transform/{relabel_dimension.psql.erb → postgres/relabel_dimension.psql.erb} +4 -1
  59. data/lib/masamune/transform/postgres/relabel_dimension.rb +45 -0
  60. data/lib/masamune/transform/{replace_table.psql.erb → postgres/replace_table.psql.erb} +0 -0
  61. data/lib/masamune/transform/{rollup_fact.psql.erb → postgres/rollup_fact.psql.erb} +0 -0
  62. data/lib/masamune/transform/postgres/rollup_fact.rb +123 -0
  63. data/lib/masamune/transform/{snapshot_dimension.psql.erb → postgres/snapshot_dimension.psql.erb} +3 -10
  64. data/lib/masamune/transform/postgres/snapshot_dimension.rb +83 -0
  65. data/lib/masamune/transform/{stage_dimension.psql.erb → postgres/stage_dimension.psql.erb} +0 -0
  66. data/lib/masamune/transform/postgres/stage_dimension.rb +90 -0
  67. data/lib/masamune/transform/{stage_fact.psql.erb → postgres/stage_fact.psql.erb} +0 -0
  68. data/lib/masamune/transform/postgres/stage_fact.rb +134 -0
  69. data/lib/masamune/transform/relabel_dimension.rb +1 -9
  70. data/lib/masamune/transform/rollup_fact.rb +1 -86
  71. data/lib/masamune/transform/snapshot_dimension.rb +1 -44
  72. data/lib/masamune/transform/stage_dimension.rb +1 -53
  73. data/lib/masamune/transform/stage_fact.rb +1 -96
  74. data/lib/masamune/version.rb +1 -1
  75. data/spec/masamune/template_spec.rb +1 -1
  76. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +1 -3
  77. data/spec/masamune/transform/deduplicate_dimension_spec.rb +1 -7
  78. data/spec/masamune/transform/define_table.dimension_spec.rb +0 -14
  79. data/spec/masamune/transform/denormalize_table_spec.rb +34 -0
  80. data/spec/masamune/transform/relabel_dimension_spec.rb +6 -1
  81. data/spec/masamune/transform/snapshot_dimension_spec.rb +3 -10
  82. metadata +37 -21
@@ -25,47 +25,7 @@ module Masamune::Transform
25
25
  extend ActiveSupport::Concern
26
26
 
27
27
  def deduplicate_dimension(source, target)
28
- Operator.new(__method__, source: source, target: target, presenters: { postgres: Postgres })
29
- end
30
-
31
- private
32
-
33
- class Postgres < SimpleDelegator
34
- def insert_columns(source = nil)
35
- consolidated_columns.map { |_, column| column.name }
36
- end
37
-
38
- def insert_view_values(coalesce: false)
39
- consolidated_columns.map do |_, column|
40
- if !column.default.nil? && coalesce
41
- "COALESCE(#{column.name}, #{column.sql_value(column.default)}) AS #{column.name}"
42
- else
43
- column.name
44
- end
45
- end
46
- end
47
-
48
- def duplicate_value_conditions(window)
49
- [].tap do |result|
50
- consolidated_columns.map do |_, column|
51
- if column.null
52
- result << "((LAG(#{column.name}) OVER #{window} = #{column.name}) OR (LAG(#{column.name}) OVER #{window} IS NULL AND #{column.name} IS NULL))"
53
- else
54
- result << "(LAG(#{column.name}) OVER #{window} = #{column.name})"
55
- end
56
- end
57
- end
58
- end
59
-
60
- def window(*extra)
61
- (columns.values.select { |column| extra.delete(column.name) || column.natural_key || column.auto_reference }.map(&:name) + extra).uniq
62
- end
63
-
64
- private
65
-
66
- def consolidated_columns
67
- unreserved_columns.reject { |_, column| column.surrogate_key }
68
- end
28
+ Operator.new(__method__, source: source, target: target)
69
29
  end
70
30
  end
71
31
  end
@@ -26,121 +26,9 @@ module Masamune::Transform
26
26
 
27
27
  def define_table(target, files = [], section = nil)
28
28
  return if target.implicit
29
- Operator.new(__method__, target: target, files: Masamune::Schema::Map.convert_files(files), section: section, helper: Helper, presenters: { postgres: Postgres, hive: Hive }).tap do |operator|
29
+ Operator.new(__method__, target: target, files: files, section: section).tap do |operator|
30
30
  logger.debug("#{target.id}\n" + operator.to_s) if target.debug
31
31
  end
32
32
  end
33
-
34
- class Helper < SimpleDelegator
35
- def files
36
- locals[:files]
37
- end
38
-
39
- def section
40
- locals[:section] || :all
41
- end
42
-
43
- def define_types?
44
- !post_section?
45
- end
46
-
47
- def define_tables?
48
- !post_section?
49
- end
50
-
51
- def define_functions?
52
- !post_section?
53
- end
54
-
55
- def define_sequences?
56
- !post_section?
57
- end
58
-
59
- def define_primary_keys?
60
- !pre_section? && !(target.temporary? || target.primary_keys.empty?)
61
- end
62
-
63
- def define_inheritance?
64
- return false unless target.inherited?
65
- return false if pre_section?
66
- return true if post_section?
67
- !target.delay_indexes?
68
- end
69
-
70
- def define_indexes?
71
- return false if pre_section?
72
- return true if post_section?
73
- !target.delay_indexes?
74
- end
75
-
76
- def define_foreign_keys?
77
- return false if pre_section?
78
- return true if post_section?
79
- !target.delay_foreign_keys?
80
- end
81
-
82
- def define_unique_constraints?
83
- return false if pre_section?
84
- return true if post_section?
85
- !target.delay_unique_constraints?
86
- end
87
-
88
- def insert_rows?
89
- !post_section?
90
- end
91
-
92
- def load_files?
93
- all_section?
94
- end
95
-
96
- def perform_analyze?
97
- return false if pre_section?
98
- return true if post_section?
99
- files.any? || target.insert_rows.any?
100
- end
101
-
102
- private
103
-
104
- def all_section?
105
- section == :all
106
- end
107
-
108
- def pre_section?
109
- section == :pre
110
- end
111
-
112
- def post_section?
113
- section == :post
114
- end
115
- end
116
-
117
- class Postgres < SimpleDelegator
118
- def children
119
- super.map { |child| self.class.new(child) }
120
- end
121
-
122
- def inherited?
123
- type == :fact && inheritance_constraints
124
- end
125
-
126
- def delay_indexes?
127
- type == :fact
128
- end
129
-
130
- def delay_foreign_keys?
131
- type == :fact
132
- end
133
-
134
- def delay_unique_constraints?
135
- type == :fact
136
- end
137
- end
138
-
139
- class Hive < SimpleDelegator
140
- def partition_by
141
- return unless partitions.any?
142
- partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
143
- end
144
- end
145
33
  end
146
34
  end
@@ -32,56 +32,7 @@ module Masamune::Transform
32
32
  columns -= ['last_modified_at']
33
33
  columns.uniq!
34
34
  order_by = options[:order] || columns
35
- Operator.new(__method__, target: target, columns: columns, order_by: order_by, presenters: { postgres: Common, hive: Common })
36
- end
37
-
38
- private
39
-
40
- class Common < SimpleDelegator
41
- include Masamune::LastElement
42
-
43
- def select_columns(column_names)
44
- column_names.map do |column_name|
45
- next unless column = dereference_column_name(column_name)
46
- if column.reference
47
- if column.reference.implicit || column.reference.degenerate
48
- "#{column.name} AS #{column.name}"
49
- else
50
- "#{column.foreign_key_name} AS #{column.name}"
51
- end
52
- else
53
- column.qualified_name
54
- end
55
- end.compact
56
- end
57
- method_with_last_element :select_columns
58
-
59
- def join_alias(reference)
60
- reference.label ? "#{reference.name} AS #{[reference.label, reference.name].compact.join('_')}" : reference.name
61
- end
62
-
63
- def join_conditions(column_names)
64
- {}.tap do |conditions|
65
- column_names.each do |column_name|
66
- next unless column = dereference_column_name(column_name)
67
- next unless column.reference
68
- next if column.reference.degenerate
69
- adjacent_reference = references[column.reference.id]
70
- next unless adjacent_reference
71
- adjacent_column = columns[adjacent_reference.foreign_key_name]
72
- next unless adjacent_column
73
- conditions[join_alias(column.reference)] = "#{column.reference.surrogate_key.qualified_name(column.reference.label)} = #{adjacent_column.qualified_name}"
74
- end
75
- end
76
- end
77
-
78
- def order_by_columns(column_names)
79
- column_names.map do |column_name|
80
- next unless column = dereference_column_name(column_name)
81
- column.name
82
- end.compact
83
- end
84
- method_with_last_element :order_by_columns
35
+ Operator.new(__method__, target: target, columns: columns, order_by: order_by)
85
36
  end
86
37
  end
87
38
  end
@@ -0,0 +1,27 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform
24
+ module Hive
25
+ Dir["#{File.dirname(__FILE__)}/hive/*.rb"].each { |f| require f }
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform::Hive
24
+ class DefineTable
25
+ def initialize(options = {})
26
+ @target = options[:target]
27
+ end
28
+
29
+ def locals
30
+ { target: target }
31
+ end
32
+
33
+ def target
34
+ TargetPresenter.new(@target)
35
+ end
36
+
37
+ private
38
+
39
+ class TargetPresenter < SimpleDelegator
40
+ def partition_by
41
+ return unless partitions.any?
42
+ partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,27 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune/transform/common/denormalize_table'
24
+
25
+ module Masamune::Transform::Hive
26
+ DenormalizeTable = Masamune::Transform::Common::DenormalizeTable
27
+ end
@@ -20,8 +20,6 @@
20
20
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
21
  # THE SOFTWARE.
22
22
 
23
- require 'masamune/transform/bulk_upsert'
24
-
25
23
  module Masamune::Transform
26
24
  module InsertReferenceValues
27
25
  extend ActiveSupport::Concern
@@ -29,36 +27,9 @@ module Masamune::Transform
29
27
  def insert_reference_values(source, target)
30
28
  operators = []
31
29
  target.insert_references.each do |_, reference|
32
- operators << Operator.new(__method__, source: source, target: reference, presenters: { postgres: Postgres })
30
+ operators << Operator.new(__method__, source: source, target: reference)
33
31
  end
34
32
  Operator.new *operators
35
33
  end
36
-
37
- private
38
-
39
- class Postgres < SimpleDelegator
40
- include BulkUpsert
41
- include Masamune::LastElement
42
-
43
- def insert_columns(source)
44
- source.shared_columns(stage_table).map { |_, columns| columns.first.name }
45
- end
46
-
47
- def insert_values(source)
48
- source.shared_columns(stage_table).map do |column, _|
49
- if column.adjacent.try(:default)
50
- "COALESCE(#{column.name}, #{column.adjacent.sql_value(column.adjacent.default)})"
51
- else
52
- column.name
53
- end
54
- end
55
- end
56
- method_with_last_element :insert_values
57
-
58
- def insert_constraints(source)
59
- source.shared_columns(stage_table).reject { |column, _| column.null || column.default || column.adjacent.try(:default) }.map { |column, _| "#{column.name} IS NOT NULL"}
60
- end
61
- method_with_last_element :insert_constraints
62
- end
63
34
  end
64
35
  end
@@ -25,29 +25,7 @@ module Masamune::Transform
25
25
  def initialize(*args)
26
26
  options = args.last.is_a?(Hash) ? args.pop : {}
27
27
  @templates = args
28
- @source = options.delete(:source)
29
- @target = options.delete(:target)
30
- @presenters = options.delete(:presenters) || {}
31
- @helper = options.delete(:helper)
32
- @locals = options
33
- end
34
-
35
- def source
36
- return unless @source
37
- @presenters.key?(source_store.try(:type)) ? @presenters[source_store.try(:type)].new(@source) : @source
38
- end
39
-
40
- def target
41
- return unless @target
42
- @presenters.key?(target_store.try(:type)) ? @presenters[target_store.try(:type)].new(@target) : @target
43
- end
44
-
45
- def helper
46
- (@helper || SimpleDelegator).new(self)
47
- end
48
-
49
- def locals
50
- @locals
28
+ @options = options
51
29
  end
52
30
 
53
31
  def to_s
@@ -72,34 +50,55 @@ module Masamune::Transform
72
50
 
73
51
  private
74
52
 
75
- def source_store
76
- return @source if @source.is_a?(Masamune::Schema::Store)
77
- @source.try(:store)
53
+ def template_eval(template)
54
+ return File.read(template) if File.exists?(template.to_s) && template.to_s !~ /erb\Z/
55
+ template_file = File.exists?(template.to_s) ? template : template_file(template)
56
+ if template_helper(template)
57
+ Masamune::Template.render_to_string(template_file, template_helper(template).new(@options).locals)
58
+ else
59
+ Masamune::Template.render_to_string(template_file, @options)
60
+ end
78
61
  end
79
62
 
80
- def target_store
81
- return @target if @target.is_a?(Masamune::Schema::Store)
82
- @target.try(:store)
63
+ def template_helper(template_name)
64
+ "Masamune::Transform::#{template_type.to_s.camelize}::#{template_name.to_s.camelize}".constantize
65
+ rescue NameError
83
66
  end
84
67
 
85
- def template_eval(template)
86
- return File.read(template) if File.exists?(template.to_s) && template.to_s !~ /erb\Z/
87
- template_file = File.exists?(template.to_s) ? template : template_file(template)
88
- Masamune::Template.render_to_string(template_file, @locals.merge(source: source, target: target, helper: helper))
68
+ def template_file(template_name)
69
+ File.expand_path(File.join(__FILE__, '..', template_dir, "#{template_name}.#{template_suffix}.erb"))
89
70
  end
90
71
 
91
- def template_file(template_prefix)
92
- File.expand_path(File.join(__FILE__, '..', "#{template_prefix}.#{template_suffix}.erb"))
72
+ def template_type
73
+ @options.values.map do |value|
74
+ case value
75
+ when Masamune::Schema::Store
76
+ value.type
77
+ when Masamune::Schema::Table
78
+ value.store.type
79
+ end
80
+ end.first
81
+ end
82
+
83
+ def template_dir
84
+ case template_type
85
+ when :postgres
86
+ 'postgres'
87
+ when :hive
88
+ 'hive'
89
+ else
90
+ raise ArgumentError, "Unknown template_dir for #{template_type}"
91
+ end
93
92
  end
94
93
 
95
94
  def template_suffix
96
- case (target_store || source_store).try(:type)
95
+ case template_type
97
96
  when :postgres
98
97
  'psql'
99
98
  when :hive
100
99
  'hql'
101
100
  else
102
- 'txt'
101
+ raise ArgumentError, "Unknown template_suffix for #{template_type}"
103
102
  end
104
103
  end
105
104
  end