masamune 0.11.9 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e71d5026f78a0babee52533490fe3d79da306451
4
- data.tar.gz: 4e78ab8efb0c9697984707dd4f242b061a50b858
3
+ metadata.gz: a12e8b8ce6cb92c5d551e9024053d50ff59f3cd6
4
+ data.tar.gz: 72bdc252c7b9b6f9e787b211eae46bc3edb69358
5
5
  SHA512:
6
- metadata.gz: 16462d0f049ee2caac1f540dbe2266b268b58f601762edb46d69d7b3db676ff9af173d212bf20c3526f4c40307bed4b9d02ccce4c01adde2c5891e73c421c581
7
- data.tar.gz: 1f7fc256a7561808e36390f0bad66c6add14f89b71d36d3ea91e8650ed5f5c719b7cfdc851991e4f4c6676c7206b6e513a842a83bf2b797085978cc00d2f799b
6
+ metadata.gz: 96be3850b76ec158f8a13be46ec2a5108c9ec01f99987f0a9414602b4072b768e072b72906a28ac9b8aa101940ece9e0f1255c2bcf25bc6cdf2726d9c7783c36
7
+ data.tar.gz: 31863312d8fdbbad89e1e0a57fcbbe51b754feca838891d1f02cefe09f547da431f6089e36e6dafc56aa20cabea8a4d0cec432b0c0b07e11c496714668388942
@@ -49,21 +49,10 @@ module Masamune::Actions
49
49
  FILE_MODE = 0777 - File.umask
50
50
 
51
51
  def load_dimension(source_files, source, target)
52
- output = Tempfile.new('masamune')
53
- FileUtils.chmod(FILE_MODE, output.path)
54
-
55
- if source.respond_to?(:map) and map = source.map(to: target)
56
- result = map.apply(source_files, output)
57
- else
58
- output = source_files
59
- result = source
52
+ optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
53
+ transform = Wrapper.load_dimension(intermediate_files, intermediate, target)
54
+ postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
60
55
  end
61
-
62
- transform = Wrapper.load_dimension(output, result, target)
63
- logger.debug(File.read(output)) if (source.debug || map.debug)
64
- postgres file: transform.to_file, debug: (source.debug || target.debug || map.debug)
65
- ensure
66
- output.unlink
67
56
  end
68
57
 
69
58
  def consolidate_dimension(target)
@@ -77,13 +66,39 @@ module Masamune::Actions
77
66
  end
78
67
 
79
68
  def load_fact(source_files, source, target, date)
80
- transform = Wrapper.load_fact(source_files, source, target, date)
81
- postgres file: transform.to_file, debug: (source.debug || target.debug)
69
+ optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
70
+ transform = Wrapper.load_fact(intermediate_files, intermediate, target, date)
71
+ postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
72
+ end
82
73
  end
83
74
 
84
75
  def rollup_fact(source, target, date)
85
76
  transform = Wrapper.rollup_fact(source, target, date)
86
77
  postgres file: transform.to_file, debug: (source.debug || target.debug)
87
78
  end
79
+
80
+ private
81
+
82
+ def optional_apply_map(source_files, source, target, &block)
83
+ if source.respond_to?(:map) and map = source.map(to: target)
84
+ apply_map(map, source_files, source, target, &block)
85
+ else
86
+ yield source_files, source
87
+ end
88
+ end
89
+
90
+ def apply_map(map, source_files, source, target)
91
+ Tempfile.open('masamune') do |output|
92
+ begin
93
+ FileUtils.chmod(FILE_MODE, output.path)
94
+ result = map.apply(source_files, output)
95
+ result.debug = map.debug
96
+ logger.debug(File.read(output)) if (source.debug || result.debug)
97
+ yield output, result
98
+ ensure
99
+ output.unlink
100
+ end
101
+ end
102
+ end
88
103
  end
89
104
  end
@@ -30,7 +30,6 @@ module Masamune
30
30
  require 'masamune/schema/fact'
31
31
  require 'masamune/schema/column'
32
32
  require 'masamune/schema/row'
33
- require 'masamune/schema/event'
34
33
  require 'masamune/schema/map'
35
34
  end
36
35
  end
@@ -173,7 +173,7 @@ module Masamune::Schema
173
173
  end
174
174
 
175
175
  def file(id, options = {})
176
- format_options = options.extract!(:format, :headers)
176
+ format_options = options.extract!(:format, :headers, :json_encoding)
177
177
  @context.push(options)
178
178
  yield if block_given?
179
179
  store = HasFormat.new(@context, format_options)
@@ -182,14 +182,6 @@ module Masamune::Schema
182
182
  @context.pop
183
183
  end
184
184
 
185
- def event(id, options = {})
186
- @context.push(options)
187
- yield if block_given?
188
- @context.events[id] = HasMap.new Masamune::Schema::Event.new(@context.options.merge(id: id))
189
- ensure
190
- @context.pop
191
- end
192
-
193
185
  def attribute(id, options = {})
194
186
  @context.options[:attributes] << Masamune::Schema::Event::Attribute.new(options.merge(id: id))
195
187
  end
@@ -200,7 +192,7 @@ module Masamune::Schema
200
192
  raise ArgumentError, "invalid map, from: is missing" unless from && from.try(:id)
201
193
  raise ArgumentError, "invalid map from: '#{from.id}', to: is missing" unless to
202
194
  @context.push(options)
203
- @context.options[:function] = block.to_proc
195
+ @context.options[:function] = block.to_proc if block
204
196
  from.maps[to] ||= Masamune::Schema::Map.new(@context.options.merge(source: from, target: to))
205
197
  ensure
206
198
  @context.pop
@@ -133,7 +133,7 @@ module Masamune::Schema
133
133
  def compact_name
134
134
  if reference
135
135
  # XXX once columns only reference columns, this can be cleaned up
136
- if @id == reference.surrogate_key.reference_name(reference.label)
136
+ if reference.surrogate_key && @id == reference.surrogate_key.reference_name(reference.label)
137
137
  "#{reference.id}.#{reference.surrogate_key.id}".to_sym
138
138
  else
139
139
  "#{reference.id}.#{@id}".to_sym
@@ -184,12 +184,12 @@ module Masamune::Schema
184
184
  array_value? ? "#{elem}[]" : elem
185
185
  end
186
186
 
187
- def hql_type
187
+ def hql_type(for_surrogate_key = false)
188
188
  elem =
189
189
  case type
190
190
  when :integer
191
- 'INT'
192
- when :string
191
+ for_surrogate_key ? 'STRING' : 'INT'
192
+ when :string, :enum, :key_value, :timestamp
193
193
  'STRING'
194
194
  else
195
195
  sql_type
@@ -264,7 +264,9 @@ module Masamune::Schema
264
264
  when Date, DateTime
265
265
  value.to_time
266
266
  when String
267
- if value =~ /\A\d+\z/
267
+ if value.blank?
268
+ nil
269
+ elsif value =~ /\A\d+\z/
268
270
  Time.at(value.to_i)
269
271
  else
270
272
  Time.parse(value)
@@ -281,7 +283,7 @@ module Masamune::Schema
281
283
  when Hash
282
284
  value
283
285
  when String
284
- ruby_key_value(YAML.load(value))
286
+ YAML.load(value)
285
287
  when nil
286
288
  {}
287
289
  end
@@ -290,7 +292,7 @@ module Masamune::Schema
290
292
  when Hash
291
293
  value
292
294
  when String
293
- ruby_key_value(JSON.load(value))
295
+ JSON.load(value)
294
296
  when nil
295
297
  {}
296
298
  end
@@ -366,6 +368,10 @@ module Masamune::Schema
366
368
  [name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
367
369
  end
368
370
 
371
+ def as_hql
372
+ [name, hql_type(surrogate_key)].compact.join(' ')
373
+ end
374
+
369
375
  def as_hash
370
376
  {id: id}.tap do |hash|
371
377
  DEFAULT_ATTRIBUTES.keys.each do |attr|
@@ -462,11 +468,9 @@ module Masamune::Schema
462
468
  end
463
469
 
464
470
  def required_value?
465
- if reference
466
- !(reference.null || reference.default)
467
- else
468
- surrogate_key || natural_key || !(null || default)
469
- end
471
+ return false if reference && (reference.null || !reference.default.nil?)
472
+ return false if null || !default.nil?
473
+ true
470
474
  end
471
475
 
472
476
  private
@@ -495,24 +499,6 @@ module Masamune::Schema
495
499
  end
496
500
  end
497
501
 
498
- def ruby_key_value(hash)
499
- case sub_type
500
- when :boolean
501
- Hash[hash.map { |key, value| ruby_boolean_key_value(key, value) }.compact]
502
- else
503
- hash
504
- end
505
- end
506
-
507
- def ruby_boolean_key_value(key, value)
508
- case value
509
- when true, '1', 1
510
- [key, true]
511
- when false, '0', 0
512
- [key, false]
513
- end
514
- end
515
-
516
502
  def csv_array(value)
517
503
  case value
518
504
  when Array
@@ -77,15 +77,8 @@ module Masamune::Schema
77
77
  next if column.surrogate_key
78
78
  next if reserved_column_ids.include?(column.id)
79
79
 
80
- if column.type == :key_value
81
- column_now, column_was = column.dup, column.dup
82
- column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
83
- column_now.strict, column_was.strict = false, false
84
- [column_now, column_was]
85
- else
86
- column.dup.tap do |column_copy|
87
- column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
88
- end
80
+ column.dup.tap do |column_copy|
81
+ column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
89
82
  end
90
83
  end.flatten
91
84
  end
@@ -82,10 +82,6 @@ module Masamune::Schema
82
82
  @partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
83
83
  end
84
84
 
85
- def partitions
86
- columns.select { |_, column| column.partition }
87
- end
88
-
89
85
  def measures
90
86
  columns.select { |_, column| column.measure }
91
87
  end
@@ -239,7 +239,7 @@ module Masamune::Schema
239
239
  def default_row(columns)
240
240
  {}.tap do |row|
241
241
  columns.each do |_, column|
242
- row[column.name] = column.default_ruby_value
242
+ row[column.compact_name] = column.default_ruby_value
243
243
  end
244
244
  end
245
245
  end
@@ -89,7 +89,7 @@ module Masamune::Schema
89
89
  end
90
90
 
91
91
  def headers
92
- values.keys
92
+ @columns.map { |_, column| column.name }
93
93
  end
94
94
 
95
95
  def serialize
@@ -121,8 +121,8 @@ module Masamune::Schema
121
121
  values.each do |key, value|
122
122
  next unless key
123
123
  if column = parent.dereference_column_name(key)
124
- @columns[column.name] = column
125
- result[column.name] = column.ruby_value(value)
124
+ @columns[column.compact_name] = column
125
+ result[column.compact_name] = column.ruby_value(value)
126
126
  elsif strict
127
127
  raise ArgumentError, "#{@values} contains undefined columns #{key}"
128
128
  end
@@ -26,7 +26,7 @@ module Masamune::Schema
26
26
  class Store
27
27
  include Masamune::HasEnvironment
28
28
 
29
- SUPPORTED_ATTRIBUTES = %(table dimension fact file event)
29
+ SUPPORTED_ATTRIBUTES = %(table dimension fact file)
30
30
 
31
31
  DEFAULT_ATTRIBUTES =
32
32
  {
@@ -45,7 +45,6 @@ module Masamune::Schema
45
45
  attr_accessor :dimensions
46
46
  attr_accessor :facts
47
47
  attr_accessor :files
48
- attr_accessor :events
49
48
  attr_accessor :references
50
49
 
51
50
  class << self
@@ -67,7 +66,6 @@ module Masamune::Schema
67
66
  @dimensions = {}.with_indifferent_access
68
67
  @facts = {}.with_indifferent_access
69
68
  @files = {}.with_indifferent_access
70
- @events = {}.with_indifferent_access
71
69
  @references = {}.with_indifferent_access
72
70
  @extra = []
73
71
  end
@@ -39,7 +39,8 @@ module Masamune::Schema
39
39
  columns: {},
40
40
  rows: [],
41
41
  inherit: false,
42
- debug: false
42
+ debug: false,
43
+ properties: {}
43
44
  }
44
45
 
45
46
  DEFAULT_ATTRIBUTES.keys.each do |attr|
@@ -111,7 +112,7 @@ module Masamune::Schema
111
112
  end
112
113
 
113
114
  def defined_columns
114
- columns.values
115
+ columns.values.reject { |column| column.partition }
115
116
  end
116
117
  method_with_last_element :defined_columns
117
118
 
@@ -154,6 +155,10 @@ module Masamune::Schema
154
155
  columns.values.select { | column| column.reference && column.reference.foreign_key }
155
156
  end
156
157
 
158
+ def partitions
159
+ columns.select { |_, column| column.partition }
160
+ end
161
+
157
162
  def insert_rows
158
163
  rows.select { |row| row.insert_values.any? }
159
164
  end
@@ -174,6 +179,27 @@ module Masamune::Schema
174
179
  columns.reject { |_, column| reserved_column_ids.include?(column.id) }
175
180
  end
176
181
 
182
+ def denormalized_columns
183
+ columns.map do |_, column|
184
+ next if column.surrogate_key || column.ignore
185
+ if column.reference
186
+ column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
187
+ else
188
+ column
189
+ end
190
+ end.flatten.compact
191
+ end
192
+
193
+ def denormalized_column_names
194
+ denormalized_columns.map do |column|
195
+ if column.parent == self
196
+ column.name.to_s
197
+ else
198
+ [column.parent.id, column.name].join('.')
199
+ end
200
+ end
201
+ end
202
+
177
203
  def stage_table(options = {})
178
204
  selected = options[:columns] if options[:columns]
179
205
  selected ||= options[:target].columns.values.map(&:compact_name) if options[:target]
@@ -25,7 +25,6 @@ module Masamune
25
25
  require 'masamune/transform/operator'
26
26
 
27
27
  require 'masamune/transform/define_table'
28
- require 'masamune/transform/define_event_view'
29
28
  require 'masamune/transform/define_schema'
30
29
  require 'masamune/transform/denormalize_table'
31
30
 
@@ -21,12 +21,10 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  require 'masamune/transform/define_table'
24
- require 'masamune/transform/define_event_view'
25
24
 
26
25
  module Masamune::Transform
27
26
  module DefineSchema
28
27
  include DefineTable
29
- include DefineEventView
30
28
 
31
29
  extend ActiveSupport::Concern
32
30
 
@@ -44,10 +42,6 @@ module Masamune::Transform
44
42
  operators << define_table(fact)
45
43
  end
46
44
 
47
- context.events.each do |_, event|
48
- operators << define_event_view(event)
49
- end
50
-
51
45
  operators += context.extra(:post)
52
46
 
53
47
  Operator.new __method__, *operators, source: context
@@ -22,13 +22,14 @@
22
22
 
23
23
  CREATE TABLE IF NOT EXISTS <%= target.name %>
24
24
  (
25
- <%- target.reference_columns.each do |column| -%>
26
- <%= column.name %> <%= column.hql_type %>,
25
+ <%- target.defined_columns.each do |column, last| -%>
26
+ <%= column.as_hql %><%= ',' unless last %>
27
27
  <%- end -%>
28
- <%- target.measures.each do |_, measure| -%>
29
- <%= measure.name %> <%= measure.hql_type %>,
30
- <%- end -%>
31
- <%= target.time_key.name %> <%= target.time_key.hql_type %>
32
28
  )
29
+ <%- if target.partition_by -%>
33
30
  PARTITIONED BY (<%= target.partition_by %>)
31
+ <%- end -%>
32
+ <%- if target.properties[:format] == :tsv -%>
33
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
34
+ <%- end -%>
34
35
  TBLPROPERTIES ('serialization.null.format' = '');
@@ -33,6 +33,7 @@ module Masamune::Transform
33
33
 
34
34
  class Hive < SimpleDelegator
35
35
  def partition_by
36
+ return unless partitions.any?
36
37
  partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
37
38
  end
38
39
  end
@@ -20,32 +20,14 @@
20
20
  -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
21
  -- THE SOFTWARE.
22
22
 
23
- DROP VIEW IF EXISTS <%= target.view_name %>;
24
- CREATE VIEW IF NOT EXISTS <%= target.view_name %> (
25
- uuid,
26
- type,
27
- <%- target.view_columns.each do |value| -%>
28
- <%= value %>,
23
+ SELECT
24
+ <%- target.select_columns(columns).each do |column, last| -%>
25
+ <%= column %><%= ',' unless last %>
29
26
  <%- end -%>
30
- delta,
31
- created_at,
32
- y, m, d ,h
33
- ) PARTITIONED ON (y, m, d, h) AS
34
- SELECT DISTINCT
35
- uuid,
36
- type,
37
- <%- target.view_values.each do |value| -%>
38
- <%= value %>,
39
- <%- end -%>
40
- IF(type = '<%= target.update_type %>', 1, 0) AS delta,
41
- ctime_iso8601 AS created_at,
42
- y, m, d ,h
43
27
  FROM
44
- events
45
- LATERAL VIEW
46
- json_tuple(events.json, <%= target.view_columns.map { |value| "'#{value}'" }.join(', ') %>) event_data AS <%= target.view_columns.join(', ') %>
47
- WHERE
48
- type = '<%= target.create_type %>' OR
49
- type = '<%= target.update_type %>' OR
50
- type = '<%= target.delete_type %>'
28
+ <%= target.name %>
29
+ ORDER BY
30
+ <%- target.order_by_columns(order_by).each do |column, last| -%>
31
+ <%= column %><%= ',' unless last %>
32
+ <%- end -%>
51
33
  ;