masamune 0.13.8 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune.rb +8 -5
  3. data/lib/masamune/actions.rb +1 -13
  4. data/lib/masamune/actions/data_flow.rb +2 -1
  5. data/lib/masamune/actions/date_parse.rb +0 -1
  6. data/lib/masamune/actions/elastic_mapreduce.rb +0 -2
  7. data/lib/masamune/actions/filesystem.rb +0 -2
  8. data/lib/masamune/actions/hive.rb +0 -2
  9. data/lib/masamune/actions/invoke_parallel.rb +2 -1
  10. data/lib/masamune/actions/postgres.rb +0 -1
  11. data/lib/masamune/actions/s3cmd.rb +2 -0
  12. data/lib/masamune/actions/transform.rb +0 -2
  13. data/lib/masamune/after_initialize_callbacks.rb +0 -2
  14. data/lib/masamune/commands.rb +1 -11
  15. data/lib/masamune/commands/postgres.rb +1 -0
  16. data/lib/masamune/commands/postgres_admin.rb +2 -0
  17. data/lib/masamune/configuration.rb +2 -0
  18. data/lib/masamune/data_plan/engine.rb +2 -0
  19. data/lib/masamune/filesystem.rb +2 -0
  20. data/lib/masamune/helpers.rb +1 -1
  21. data/lib/masamune/last_element.rb +0 -2
  22. data/lib/masamune/schema/dimension.rb +1 -3
  23. data/lib/masamune/schema/store.rb +2 -0
  24. data/lib/masamune/schema/table.rb +2 -0
  25. data/lib/masamune/template.rb +4 -1
  26. data/lib/masamune/thor.rb +1 -1
  27. data/lib/masamune/transform.rb +1 -21
  28. data/lib/masamune/transform/bulk_upsert.rb +1 -22
  29. data/lib/masamune/transform/common.rb +27 -0
  30. data/lib/masamune/transform/common/denormalize_table.rb +90 -0
  31. data/lib/masamune/transform/deduplicate_dimension.rb +1 -41
  32. data/lib/masamune/transform/define_table.rb +1 -113
  33. data/lib/masamune/transform/denormalize_table.rb +1 -50
  34. data/lib/masamune/transform/hive.rb +27 -0
  35. data/lib/masamune/transform/{define_schema.hql.erb → hive/define_schema.hql.erb} +0 -0
  36. data/lib/masamune/transform/{define_table.hql.erb → hive/define_table.hql.erb} +0 -0
  37. data/lib/masamune/transform/hive/define_table.rb +46 -0
  38. data/lib/masamune/transform/{denormalize_table.hql.erb → hive/denormalize_table.hql.erb} +0 -0
  39. data/lib/masamune/transform/hive/denormalize_table.rb +27 -0
  40. data/lib/masamune/transform/insert_reference_values.rb +1 -30
  41. data/lib/masamune/transform/operator.rb +36 -37
  42. data/lib/masamune/transform/postgres.rb +27 -0
  43. data/lib/masamune/transform/{bulk_upsert.psql.erb → postgres/bulk_upsert.psql.erb} +0 -0
  44. data/lib/masamune/transform/postgres/bulk_upsert.rb +62 -0
  45. data/lib/masamune/transform/{deduplicate_dimension.psql.erb → postgres/deduplicate_dimension.psql.erb} +1 -7
  46. data/lib/masamune/transform/postgres/deduplicate_dimension.rb +79 -0
  47. data/lib/masamune/transform/{define_foreign_key.psql.erb → postgres/define_foreign_key.psql.erb} +0 -0
  48. data/lib/masamune/transform/{define_index.psql.erb → postgres/define_index.psql.erb} +0 -0
  49. data/lib/masamune/transform/{define_inheritance.psql.erb → postgres/define_inheritance.psql.erb} +0 -0
  50. data/lib/masamune/transform/{define_schema.psql.erb → postgres/define_schema.psql.erb} +0 -0
  51. data/lib/masamune/transform/{define_table.psql.erb → postgres/define_table.psql.erb} +0 -0
  52. data/lib/masamune/transform/postgres/define_table.rb +142 -0
  53. data/lib/masamune/transform/{define_unique.psql.erb → postgres/define_unique.psql.erb} +0 -0
  54. data/lib/masamune/transform/{denormalize_table.psql.erb → postgres/denormalize_table.psql.erb} +0 -0
  55. data/lib/masamune/transform/postgres/denormalize_table.rb +27 -0
  56. data/lib/masamune/transform/{insert_reference_values.psql.erb → postgres/insert_reference_values.psql.erb} +1 -1
  57. data/lib/masamune/transform/postgres/insert_reference_values.rb +69 -0
  58. data/lib/masamune/transform/{relabel_dimension.psql.erb → postgres/relabel_dimension.psql.erb} +4 -1
  59. data/lib/masamune/transform/postgres/relabel_dimension.rb +45 -0
  60. data/lib/masamune/transform/{replace_table.psql.erb → postgres/replace_table.psql.erb} +0 -0
  61. data/lib/masamune/transform/{rollup_fact.psql.erb → postgres/rollup_fact.psql.erb} +0 -0
  62. data/lib/masamune/transform/postgres/rollup_fact.rb +123 -0
  63. data/lib/masamune/transform/{snapshot_dimension.psql.erb → postgres/snapshot_dimension.psql.erb} +3 -10
  64. data/lib/masamune/transform/postgres/snapshot_dimension.rb +83 -0
  65. data/lib/masamune/transform/{stage_dimension.psql.erb → postgres/stage_dimension.psql.erb} +0 -0
  66. data/lib/masamune/transform/postgres/stage_dimension.rb +90 -0
  67. data/lib/masamune/transform/{stage_fact.psql.erb → postgres/stage_fact.psql.erb} +0 -0
  68. data/lib/masamune/transform/postgres/stage_fact.rb +134 -0
  69. data/lib/masamune/transform/relabel_dimension.rb +1 -9
  70. data/lib/masamune/transform/rollup_fact.rb +1 -86
  71. data/lib/masamune/transform/snapshot_dimension.rb +1 -44
  72. data/lib/masamune/transform/stage_dimension.rb +1 -53
  73. data/lib/masamune/transform/stage_fact.rb +1 -96
  74. data/lib/masamune/version.rb +1 -1
  75. data/spec/masamune/template_spec.rb +1 -1
  76. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +1 -3
  77. data/spec/masamune/transform/deduplicate_dimension_spec.rb +1 -7
  78. data/spec/masamune/transform/define_table.dimension_spec.rb +0 -14
  79. data/spec/masamune/transform/denormalize_table_spec.rb +34 -0
  80. data/spec/masamune/transform/relabel_dimension_spec.rb +6 -1
  81. data/spec/masamune/transform/snapshot_dimension_spec.rb +3 -10
  82. metadata +37 -21
@@ -25,47 +25,7 @@ module Masamune::Transform
25
25
  extend ActiveSupport::Concern
26
26
 
27
27
  def deduplicate_dimension(source, target)
28
- Operator.new(__method__, source: source, target: target, presenters: { postgres: Postgres })
29
- end
30
-
31
- private
32
-
33
- class Postgres < SimpleDelegator
34
- def insert_columns(source = nil)
35
- consolidated_columns.map { |_, column| column.name }
36
- end
37
-
38
- def insert_view_values(coalesce: false)
39
- consolidated_columns.map do |_, column|
40
- if !column.default.nil? && coalesce
41
- "COALESCE(#{column.name}, #{column.sql_value(column.default)}) AS #{column.name}"
42
- else
43
- column.name
44
- end
45
- end
46
- end
47
-
48
- def duplicate_value_conditions(window)
49
- [].tap do |result|
50
- consolidated_columns.map do |_, column|
51
- if column.null
52
- result << "((LAG(#{column.name}) OVER #{window} = #{column.name}) OR (LAG(#{column.name}) OVER #{window} IS NULL AND #{column.name} IS NULL))"
53
- else
54
- result << "(LAG(#{column.name}) OVER #{window} = #{column.name})"
55
- end
56
- end
57
- end
58
- end
59
-
60
- def window(*extra)
61
- (columns.values.select { |column| extra.delete(column.name) || column.natural_key || column.auto_reference }.map(&:name) + extra).uniq
62
- end
63
-
64
- private
65
-
66
- def consolidated_columns
67
- unreserved_columns.reject { |_, column| column.surrogate_key }
68
- end
28
+ Operator.new(__method__, source: source, target: target)
69
29
  end
70
30
  end
71
31
  end
@@ -26,121 +26,9 @@ module Masamune::Transform
26
26
 
27
27
  def define_table(target, files = [], section = nil)
28
28
  return if target.implicit
29
- Operator.new(__method__, target: target, files: Masamune::Schema::Map.convert_files(files), section: section, helper: Helper, presenters: { postgres: Postgres, hive: Hive }).tap do |operator|
29
+ Operator.new(__method__, target: target, files: files, section: section).tap do |operator|
30
30
  logger.debug("#{target.id}\n" + operator.to_s) if target.debug
31
31
  end
32
32
  end
33
-
34
- class Helper < SimpleDelegator
35
- def files
36
- locals[:files]
37
- end
38
-
39
- def section
40
- locals[:section] || :all
41
- end
42
-
43
- def define_types?
44
- !post_section?
45
- end
46
-
47
- def define_tables?
48
- !post_section?
49
- end
50
-
51
- def define_functions?
52
- !post_section?
53
- end
54
-
55
- def define_sequences?
56
- !post_section?
57
- end
58
-
59
- def define_primary_keys?
60
- !pre_section? && !(target.temporary? || target.primary_keys.empty?)
61
- end
62
-
63
- def define_inheritance?
64
- return false unless target.inherited?
65
- return false if pre_section?
66
- return true if post_section?
67
- !target.delay_indexes?
68
- end
69
-
70
- def define_indexes?
71
- return false if pre_section?
72
- return true if post_section?
73
- !target.delay_indexes?
74
- end
75
-
76
- def define_foreign_keys?
77
- return false if pre_section?
78
- return true if post_section?
79
- !target.delay_foreign_keys?
80
- end
81
-
82
- def define_unique_constraints?
83
- return false if pre_section?
84
- return true if post_section?
85
- !target.delay_unique_constraints?
86
- end
87
-
88
- def insert_rows?
89
- !post_section?
90
- end
91
-
92
- def load_files?
93
- all_section?
94
- end
95
-
96
- def perform_analyze?
97
- return false if pre_section?
98
- return true if post_section?
99
- files.any? || target.insert_rows.any?
100
- end
101
-
102
- private
103
-
104
- def all_section?
105
- section == :all
106
- end
107
-
108
- def pre_section?
109
- section == :pre
110
- end
111
-
112
- def post_section?
113
- section == :post
114
- end
115
- end
116
-
117
- class Postgres < SimpleDelegator
118
- def children
119
- super.map { |child| self.class.new(child) }
120
- end
121
-
122
- def inherited?
123
- type == :fact && inheritance_constraints
124
- end
125
-
126
- def delay_indexes?
127
- type == :fact
128
- end
129
-
130
- def delay_foreign_keys?
131
- type == :fact
132
- end
133
-
134
- def delay_unique_constraints?
135
- type == :fact
136
- end
137
- end
138
-
139
- class Hive < SimpleDelegator
140
- def partition_by
141
- return unless partitions.any?
142
- partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
143
- end
144
- end
145
33
  end
146
34
  end
@@ -32,56 +32,7 @@ module Masamune::Transform
32
32
  columns -= ['last_modified_at']
33
33
  columns.uniq!
34
34
  order_by = options[:order] || columns
35
- Operator.new(__method__, target: target, columns: columns, order_by: order_by, presenters: { postgres: Common, hive: Common })
36
- end
37
-
38
- private
39
-
40
- class Common < SimpleDelegator
41
- include Masamune::LastElement
42
-
43
- def select_columns(column_names)
44
- column_names.map do |column_name|
45
- next unless column = dereference_column_name(column_name)
46
- if column.reference
47
- if column.reference.implicit || column.reference.degenerate
48
- "#{column.name} AS #{column.name}"
49
- else
50
- "#{column.foreign_key_name} AS #{column.name}"
51
- end
52
- else
53
- column.qualified_name
54
- end
55
- end.compact
56
- end
57
- method_with_last_element :select_columns
58
-
59
- def join_alias(reference)
60
- reference.label ? "#{reference.name} AS #{[reference.label, reference.name].compact.join('_')}" : reference.name
61
- end
62
-
63
- def join_conditions(column_names)
64
- {}.tap do |conditions|
65
- column_names.each do |column_name|
66
- next unless column = dereference_column_name(column_name)
67
- next unless column.reference
68
- next if column.reference.degenerate
69
- adjacent_reference = references[column.reference.id]
70
- next unless adjacent_reference
71
- adjacent_column = columns[adjacent_reference.foreign_key_name]
72
- next unless adjacent_column
73
- conditions[join_alias(column.reference)] = "#{column.reference.surrogate_key.qualified_name(column.reference.label)} = #{adjacent_column.qualified_name}"
74
- end
75
- end
76
- end
77
-
78
- def order_by_columns(column_names)
79
- column_names.map do |column_name|
80
- next unless column = dereference_column_name(column_name)
81
- column.name
82
- end.compact
83
- end
84
- method_with_last_element :order_by_columns
35
+ Operator.new(__method__, target: target, columns: columns, order_by: order_by)
85
36
  end
86
37
  end
87
38
  end
@@ -0,0 +1,27 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform
24
+ module Hive
25
+ Dir["#{File.dirname(__FILE__)}/hive/*.rb"].each { |f| require f }
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ module Masamune::Transform::Hive
24
+ class DefineTable
25
+ def initialize(options = {})
26
+ @target = options[:target]
27
+ end
28
+
29
+ def locals
30
+ { target: target }
31
+ end
32
+
33
+ def target
34
+ TargetPresenter.new(@target)
35
+ end
36
+
37
+ private
38
+
39
+ class TargetPresenter < SimpleDelegator
40
+ def partition_by
41
+ return unless partitions.any?
42
+ partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,27 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune/transform/common/denormalize_table'
24
+
25
+ module Masamune::Transform::Hive
26
+ DenormalizeTable = Masamune::Transform::Common::DenormalizeTable
27
+ end
@@ -20,8 +20,6 @@
20
20
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
21
  # THE SOFTWARE.
22
22
 
23
- require 'masamune/transform/bulk_upsert'
24
-
25
23
  module Masamune::Transform
26
24
  module InsertReferenceValues
27
25
  extend ActiveSupport::Concern
@@ -29,36 +27,9 @@ module Masamune::Transform
29
27
  def insert_reference_values(source, target)
30
28
  operators = []
31
29
  target.insert_references.each do |_, reference|
32
- operators << Operator.new(__method__, source: source, target: reference, presenters: { postgres: Postgres })
30
+ operators << Operator.new(__method__, source: source, target: reference)
33
31
  end
34
32
  Operator.new *operators
35
33
  end
36
-
37
- private
38
-
39
- class Postgres < SimpleDelegator
40
- include BulkUpsert
41
- include Masamune::LastElement
42
-
43
- def insert_columns(source)
44
- source.shared_columns(stage_table).map { |_, columns| columns.first.name }
45
- end
46
-
47
- def insert_values(source)
48
- source.shared_columns(stage_table).map do |column, _|
49
- if column.adjacent.try(:default)
50
- "COALESCE(#{column.name}, #{column.adjacent.sql_value(column.adjacent.default)})"
51
- else
52
- column.name
53
- end
54
- end
55
- end
56
- method_with_last_element :insert_values
57
-
58
- def insert_constraints(source)
59
- source.shared_columns(stage_table).reject { |column, _| column.null || column.default || column.adjacent.try(:default) }.map { |column, _| "#{column.name} IS NOT NULL"}
60
- end
61
- method_with_last_element :insert_constraints
62
- end
63
34
  end
64
35
  end
@@ -25,29 +25,7 @@ module Masamune::Transform
25
25
  def initialize(*args)
26
26
  options = args.last.is_a?(Hash) ? args.pop : {}
27
27
  @templates = args
28
- @source = options.delete(:source)
29
- @target = options.delete(:target)
30
- @presenters = options.delete(:presenters) || {}
31
- @helper = options.delete(:helper)
32
- @locals = options
33
- end
34
-
35
- def source
36
- return unless @source
37
- @presenters.key?(source_store.try(:type)) ? @presenters[source_store.try(:type)].new(@source) : @source
38
- end
39
-
40
- def target
41
- return unless @target
42
- @presenters.key?(target_store.try(:type)) ? @presenters[target_store.try(:type)].new(@target) : @target
43
- end
44
-
45
- def helper
46
- (@helper || SimpleDelegator).new(self)
47
- end
48
-
49
- def locals
50
- @locals
28
+ @options = options
51
29
  end
52
30
 
53
31
  def to_s
@@ -72,34 +50,55 @@ module Masamune::Transform
72
50
 
73
51
  private
74
52
 
75
- def source_store
76
- return @source if @source.is_a?(Masamune::Schema::Store)
77
- @source.try(:store)
53
+ def template_eval(template)
54
+ return File.read(template) if File.exists?(template.to_s) && template.to_s !~ /erb\Z/
55
+ template_file = File.exists?(template.to_s) ? template : template_file(template)
56
+ if template_helper(template)
57
+ Masamune::Template.render_to_string(template_file, template_helper(template).new(@options).locals)
58
+ else
59
+ Masamune::Template.render_to_string(template_file, @options)
60
+ end
78
61
  end
79
62
 
80
- def target_store
81
- return @target if @target.is_a?(Masamune::Schema::Store)
82
- @target.try(:store)
63
+ def template_helper(template_name)
64
+ "Masamune::Transform::#{template_type.to_s.camelize}::#{template_name.to_s.camelize}".constantize
65
+ rescue NameError
83
66
  end
84
67
 
85
- def template_eval(template)
86
- return File.read(template) if File.exists?(template.to_s) && template.to_s !~ /erb\Z/
87
- template_file = File.exists?(template.to_s) ? template : template_file(template)
88
- Masamune::Template.render_to_string(template_file, @locals.merge(source: source, target: target, helper: helper))
68
+ def template_file(template_name)
69
+ File.expand_path(File.join(__FILE__, '..', template_dir, "#{template_name}.#{template_suffix}.erb"))
89
70
  end
90
71
 
91
- def template_file(template_prefix)
92
- File.expand_path(File.join(__FILE__, '..', "#{template_prefix}.#{template_suffix}.erb"))
72
+ def template_type
73
+ @options.values.map do |value|
74
+ case value
75
+ when Masamune::Schema::Store
76
+ value.type
77
+ when Masamune::Schema::Table
78
+ value.store.type
79
+ end
80
+ end.first
81
+ end
82
+
83
+ def template_dir
84
+ case template_type
85
+ when :postgres
86
+ 'postgres'
87
+ when :hive
88
+ 'hive'
89
+ else
90
+ raise ArgumentError, "Unknown template_dir for #{template_type}"
91
+ end
93
92
  end
94
93
 
95
94
  def template_suffix
96
- case (target_store || source_store).try(:type)
95
+ case template_type
97
96
  when :postgres
98
97
  'psql'
99
98
  when :hive
100
99
  'hql'
101
100
  else
102
- 'txt'
101
+ raise ArgumentError, "Unknown template_suffix for #{template_type}"
103
102
  end
104
103
  end
105
104
  end