masamune 0.11.9 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/masamune/actions/transform.rb +31 -16
  3. data/lib/masamune/schema.rb +0 -1
  4. data/lib/masamune/schema/catalog.rb +2 -10
  5. data/lib/masamune/schema/column.rb +16 -30
  6. data/lib/masamune/schema/dimension.rb +2 -9
  7. data/lib/masamune/schema/fact.rb +0 -4
  8. data/lib/masamune/schema/map.rb +1 -1
  9. data/lib/masamune/schema/row.rb +3 -3
  10. data/lib/masamune/schema/store.rb +1 -3
  11. data/lib/masamune/schema/table.rb +28 -2
  12. data/lib/masamune/transform.rb +0 -1
  13. data/lib/masamune/transform/define_schema.rb +0 -6
  14. data/lib/masamune/transform/define_table.hql.erb +7 -6
  15. data/lib/masamune/transform/define_table.rb +1 -0
  16. data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
  17. data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
  18. data/lib/masamune/transform/denormalize_table.rb +13 -4
  19. data/lib/masamune/transform/snapshot_dimension.rb +1 -1
  20. data/lib/masamune/transform/stage_fact.rb +1 -1
  21. data/lib/masamune/version.rb +1 -1
  22. data/spec/masamune/actions/transform_spec.rb +50 -18
  23. data/spec/masamune/schema/catalog_spec.rb +0 -53
  24. data/spec/masamune/schema/column_spec.rb +9 -41
  25. data/spec/masamune/schema/fact_spec.rb +3 -1
  26. data/spec/masamune/schema/map_spec.rb +187 -189
  27. data/spec/masamune/schema/table_spec.rb +8 -0
  28. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
  29. data/spec/masamune/transform/define_schema_spec.rb +5 -6
  30. data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
  31. data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
  32. data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
  33. data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
  34. data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
  35. metadata +3 -9
  36. data/lib/masamune/schema/event.rb +0 -121
  37. data/lib/masamune/transform/define_event_view.rb +0 -60
  38. data/spec/masamune/schema/event_spec.rb +0 -75
  39. data/spec/masamune/transform/define_event_view_spec.rb +0 -84
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e71d5026f78a0babee52533490fe3d79da306451
4
- data.tar.gz: 4e78ab8efb0c9697984707dd4f242b061a50b858
3
+ metadata.gz: a12e8b8ce6cb92c5d551e9024053d50ff59f3cd6
4
+ data.tar.gz: 72bdc252c7b9b6f9e787b211eae46bc3edb69358
5
5
  SHA512:
6
- metadata.gz: 16462d0f049ee2caac1f540dbe2266b268b58f601762edb46d69d7b3db676ff9af173d212bf20c3526f4c40307bed4b9d02ccce4c01adde2c5891e73c421c581
7
- data.tar.gz: 1f7fc256a7561808e36390f0bad66c6add14f89b71d36d3ea91e8650ed5f5c719b7cfdc851991e4f4c6676c7206b6e513a842a83bf2b797085978cc00d2f799b
6
+ metadata.gz: 96be3850b76ec158f8a13be46ec2a5108c9ec01f99987f0a9414602b4072b768e072b72906a28ac9b8aa101940ece9e0f1255c2bcf25bc6cdf2726d9c7783c36
7
+ data.tar.gz: 31863312d8fdbbad89e1e0a57fcbbe51b754feca838891d1f02cefe09f547da431f6089e36e6dafc56aa20cabea8a4d0cec432b0c0b07e11c496714668388942
@@ -49,21 +49,10 @@ module Masamune::Actions
49
49
  FILE_MODE = 0777 - File.umask
50
50
 
51
51
  def load_dimension(source_files, source, target)
52
- output = Tempfile.new('masamune')
53
- FileUtils.chmod(FILE_MODE, output.path)
54
-
55
- if source.respond_to?(:map) and map = source.map(to: target)
56
- result = map.apply(source_files, output)
57
- else
58
- output = source_files
59
- result = source
52
+ optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
53
+ transform = Wrapper.load_dimension(intermediate_files, intermediate, target)
54
+ postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
60
55
  end
61
-
62
- transform = Wrapper.load_dimension(output, result, target)
63
- logger.debug(File.read(output)) if (source.debug || map.debug)
64
- postgres file: transform.to_file, debug: (source.debug || target.debug || map.debug)
65
- ensure
66
- output.unlink
67
56
  end
68
57
 
69
58
  def consolidate_dimension(target)
@@ -77,13 +66,39 @@ module Masamune::Actions
77
66
  end
78
67
 
79
68
  def load_fact(source_files, source, target, date)
80
- transform = Wrapper.load_fact(source_files, source, target, date)
81
- postgres file: transform.to_file, debug: (source.debug || target.debug)
69
+ optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
70
+ transform = Wrapper.load_fact(intermediate_files, intermediate, target, date)
71
+ postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
72
+ end
82
73
  end
83
74
 
84
75
  def rollup_fact(source, target, date)
85
76
  transform = Wrapper.rollup_fact(source, target, date)
86
77
  postgres file: transform.to_file, debug: (source.debug || target.debug)
87
78
  end
79
+
80
+ private
81
+
82
+ def optional_apply_map(source_files, source, target, &block)
83
+ if source.respond_to?(:map) and map = source.map(to: target)
84
+ apply_map(map, source_files, source, target, &block)
85
+ else
86
+ yield source_files, source
87
+ end
88
+ end
89
+
90
+ def apply_map(map, source_files, source, target)
91
+ Tempfile.open('masamune') do |output|
92
+ begin
93
+ FileUtils.chmod(FILE_MODE, output.path)
94
+ result = map.apply(source_files, output)
95
+ result.debug = map.debug
96
+ logger.debug(File.read(output)) if (source.debug || result.debug)
97
+ yield output, result
98
+ ensure
99
+ output.unlink
100
+ end
101
+ end
102
+ end
88
103
  end
89
104
  end
@@ -30,7 +30,6 @@ module Masamune
30
30
  require 'masamune/schema/fact'
31
31
  require 'masamune/schema/column'
32
32
  require 'masamune/schema/row'
33
- require 'masamune/schema/event'
34
33
  require 'masamune/schema/map'
35
34
  end
36
35
  end
@@ -173,7 +173,7 @@ module Masamune::Schema
173
173
  end
174
174
 
175
175
  def file(id, options = {})
176
- format_options = options.extract!(:format, :headers)
176
+ format_options = options.extract!(:format, :headers, :json_encoding)
177
177
  @context.push(options)
178
178
  yield if block_given?
179
179
  store = HasFormat.new(@context, format_options)
@@ -182,14 +182,6 @@ module Masamune::Schema
182
182
  @context.pop
183
183
  end
184
184
 
185
- def event(id, options = {})
186
- @context.push(options)
187
- yield if block_given?
188
- @context.events[id] = HasMap.new Masamune::Schema::Event.new(@context.options.merge(id: id))
189
- ensure
190
- @context.pop
191
- end
192
-
193
185
  def attribute(id, options = {})
194
186
  @context.options[:attributes] << Masamune::Schema::Event::Attribute.new(options.merge(id: id))
195
187
  end
@@ -200,7 +192,7 @@ module Masamune::Schema
200
192
  raise ArgumentError, "invalid map, from: is missing" unless from && from.try(:id)
201
193
  raise ArgumentError, "invalid map from: '#{from.id}', to: is missing" unless to
202
194
  @context.push(options)
203
- @context.options[:function] = block.to_proc
195
+ @context.options[:function] = block.to_proc if block
204
196
  from.maps[to] ||= Masamune::Schema::Map.new(@context.options.merge(source: from, target: to))
205
197
  ensure
206
198
  @context.pop
@@ -133,7 +133,7 @@ module Masamune::Schema
133
133
  def compact_name
134
134
  if reference
135
135
  # XXX once columns only reference columns, this can be cleaned up
136
- if @id == reference.surrogate_key.reference_name(reference.label)
136
+ if reference.surrogate_key && @id == reference.surrogate_key.reference_name(reference.label)
137
137
  "#{reference.id}.#{reference.surrogate_key.id}".to_sym
138
138
  else
139
139
  "#{reference.id}.#{@id}".to_sym
@@ -184,12 +184,12 @@ module Masamune::Schema
184
184
  array_value? ? "#{elem}[]" : elem
185
185
  end
186
186
 
187
- def hql_type
187
+ def hql_type(for_surrogate_key = false)
188
188
  elem =
189
189
  case type
190
190
  when :integer
191
- 'INT'
192
- when :string
191
+ for_surrogate_key ? 'STRING' : 'INT'
192
+ when :string, :enum, :key_value, :timestamp
193
193
  'STRING'
194
194
  else
195
195
  sql_type
@@ -264,7 +264,9 @@ module Masamune::Schema
264
264
  when Date, DateTime
265
265
  value.to_time
266
266
  when String
267
- if value =~ /\A\d+\z/
267
+ if value.blank?
268
+ nil
269
+ elsif value =~ /\A\d+\z/
268
270
  Time.at(value.to_i)
269
271
  else
270
272
  Time.parse(value)
@@ -281,7 +283,7 @@ module Masamune::Schema
281
283
  when Hash
282
284
  value
283
285
  when String
284
- ruby_key_value(YAML.load(value))
286
+ YAML.load(value)
285
287
  when nil
286
288
  {}
287
289
  end
@@ -290,7 +292,7 @@ module Masamune::Schema
290
292
  when Hash
291
293
  value
292
294
  when String
293
- ruby_key_value(JSON.load(value))
295
+ JSON.load(value)
294
296
  when nil
295
297
  {}
296
298
  end
@@ -366,6 +368,10 @@ module Masamune::Schema
366
368
  [name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
367
369
  end
368
370
 
371
+ def as_hql
372
+ [name, hql_type(surrogate_key)].compact.join(' ')
373
+ end
374
+
369
375
  def as_hash
370
376
  {id: id}.tap do |hash|
371
377
  DEFAULT_ATTRIBUTES.keys.each do |attr|
@@ -462,11 +468,9 @@ module Masamune::Schema
462
468
  end
463
469
 
464
470
  def required_value?
465
- if reference
466
- !(reference.null || reference.default)
467
- else
468
- surrogate_key || natural_key || !(null || default)
469
- end
471
+ return false if reference && (reference.null || !reference.default.nil?)
472
+ return false if null || !default.nil?
473
+ true
470
474
  end
471
475
 
472
476
  private
@@ -495,24 +499,6 @@ module Masamune::Schema
495
499
  end
496
500
  end
497
501
 
498
- def ruby_key_value(hash)
499
- case sub_type
500
- when :boolean
501
- Hash[hash.map { |key, value| ruby_boolean_key_value(key, value) }.compact]
502
- else
503
- hash
504
- end
505
- end
506
-
507
- def ruby_boolean_key_value(key, value)
508
- case value
509
- when true, '1', 1
510
- [key, true]
511
- when false, '0', 0
512
- [key, false]
513
- end
514
- end
515
-
516
502
  def csv_array(value)
517
503
  case value
518
504
  when Array
@@ -77,15 +77,8 @@ module Masamune::Schema
77
77
  next if column.surrogate_key
78
78
  next if reserved_column_ids.include?(column.id)
79
79
 
80
- if column.type == :key_value
81
- column_now, column_was = column.dup, column.dup
82
- column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
83
- column_now.strict, column_was.strict = false, false
84
- [column_now, column_was]
85
- else
86
- column.dup.tap do |column_copy|
87
- column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
88
- end
80
+ column.dup.tap do |column_copy|
81
+ column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
89
82
  end
90
83
  end.flatten
91
84
  end
@@ -82,10 +82,6 @@ module Masamune::Schema
82
82
  @partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
83
83
  end
84
84
 
85
- def partitions
86
- columns.select { |_, column| column.partition }
87
- end
88
-
89
85
  def measures
90
86
  columns.select { |_, column| column.measure }
91
87
  end
@@ -239,7 +239,7 @@ module Masamune::Schema
239
239
  def default_row(columns)
240
240
  {}.tap do |row|
241
241
  columns.each do |_, column|
242
- row[column.name] = column.default_ruby_value
242
+ row[column.compact_name] = column.default_ruby_value
243
243
  end
244
244
  end
245
245
  end
@@ -89,7 +89,7 @@ module Masamune::Schema
89
89
  end
90
90
 
91
91
  def headers
92
- values.keys
92
+ @columns.map { |_, column| column.name }
93
93
  end
94
94
 
95
95
  def serialize
@@ -121,8 +121,8 @@ module Masamune::Schema
121
121
  values.each do |key, value|
122
122
  next unless key
123
123
  if column = parent.dereference_column_name(key)
124
- @columns[column.name] = column
125
- result[column.name] = column.ruby_value(value)
124
+ @columns[column.compact_name] = column
125
+ result[column.compact_name] = column.ruby_value(value)
126
126
  elsif strict
127
127
  raise ArgumentError, "#{@values} contains undefined columns #{key}"
128
128
  end
@@ -26,7 +26,7 @@ module Masamune::Schema
26
26
  class Store
27
27
  include Masamune::HasEnvironment
28
28
 
29
- SUPPORTED_ATTRIBUTES = %(table dimension fact file event)
29
+ SUPPORTED_ATTRIBUTES = %(table dimension fact file)
30
30
 
31
31
  DEFAULT_ATTRIBUTES =
32
32
  {
@@ -45,7 +45,6 @@ module Masamune::Schema
45
45
  attr_accessor :dimensions
46
46
  attr_accessor :facts
47
47
  attr_accessor :files
48
- attr_accessor :events
49
48
  attr_accessor :references
50
49
 
51
50
  class << self
@@ -67,7 +66,6 @@ module Masamune::Schema
67
66
  @dimensions = {}.with_indifferent_access
68
67
  @facts = {}.with_indifferent_access
69
68
  @files = {}.with_indifferent_access
70
- @events = {}.with_indifferent_access
71
69
  @references = {}.with_indifferent_access
72
70
  @extra = []
73
71
  end
@@ -39,7 +39,8 @@ module Masamune::Schema
39
39
  columns: {},
40
40
  rows: [],
41
41
  inherit: false,
42
- debug: false
42
+ debug: false,
43
+ properties: {}
43
44
  }
44
45
 
45
46
  DEFAULT_ATTRIBUTES.keys.each do |attr|
@@ -111,7 +112,7 @@ module Masamune::Schema
111
112
  end
112
113
 
113
114
  def defined_columns
114
- columns.values
115
+ columns.values.reject { |column| column.partition }
115
116
  end
116
117
  method_with_last_element :defined_columns
117
118
 
@@ -154,6 +155,10 @@ module Masamune::Schema
154
155
  columns.values.select { | column| column.reference && column.reference.foreign_key }
155
156
  end
156
157
 
158
+ def partitions
159
+ columns.select { |_, column| column.partition }
160
+ end
161
+
157
162
  def insert_rows
158
163
  rows.select { |row| row.insert_values.any? }
159
164
  end
@@ -174,6 +179,27 @@ module Masamune::Schema
174
179
  columns.reject { |_, column| reserved_column_ids.include?(column.id) }
175
180
  end
176
181
 
182
+ def denormalized_columns
183
+ columns.map do |_, column|
184
+ next if column.surrogate_key || column.ignore
185
+ if column.reference
186
+ column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
187
+ else
188
+ column
189
+ end
190
+ end.flatten.compact
191
+ end
192
+
193
+ def denormalized_column_names
194
+ denormalized_columns.map do |column|
195
+ if column.parent == self
196
+ column.name.to_s
197
+ else
198
+ [column.parent.id, column.name].join('.')
199
+ end
200
+ end
201
+ end
202
+
177
203
  def stage_table(options = {})
178
204
  selected = options[:columns] if options[:columns]
179
205
  selected ||= options[:target].columns.values.map(&:compact_name) if options[:target]
@@ -25,7 +25,6 @@ module Masamune
25
25
  require 'masamune/transform/operator'
26
26
 
27
27
  require 'masamune/transform/define_table'
28
- require 'masamune/transform/define_event_view'
29
28
  require 'masamune/transform/define_schema'
30
29
  require 'masamune/transform/denormalize_table'
31
30
 
@@ -21,12 +21,10 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  require 'masamune/transform/define_table'
24
- require 'masamune/transform/define_event_view'
25
24
 
26
25
  module Masamune::Transform
27
26
  module DefineSchema
28
27
  include DefineTable
29
- include DefineEventView
30
28
 
31
29
  extend ActiveSupport::Concern
32
30
 
@@ -44,10 +42,6 @@ module Masamune::Transform
44
42
  operators << define_table(fact)
45
43
  end
46
44
 
47
- context.events.each do |_, event|
48
- operators << define_event_view(event)
49
- end
50
-
51
45
  operators += context.extra(:post)
52
46
 
53
47
  Operator.new __method__, *operators, source: context
@@ -22,13 +22,14 @@
22
22
 
23
23
  CREATE TABLE IF NOT EXISTS <%= target.name %>
24
24
  (
25
- <%- target.reference_columns.each do |column| -%>
26
- <%= column.name %> <%= column.hql_type %>,
25
+ <%- target.defined_columns.each do |column, last| -%>
26
+ <%= column.as_hql %><%= ',' unless last %>
27
27
  <%- end -%>
28
- <%- target.measures.each do |_, measure| -%>
29
- <%= measure.name %> <%= measure.hql_type %>,
30
- <%- end -%>
31
- <%= target.time_key.name %> <%= target.time_key.hql_type %>
32
28
  )
29
+ <%- if target.partition_by -%>
33
30
  PARTITIONED BY (<%= target.partition_by %>)
31
+ <%- end -%>
32
+ <%- if target.properties[:format] == :tsv -%>
33
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
34
+ <%- end -%>
34
35
  TBLPROPERTIES ('serialization.null.format' = '');
@@ -33,6 +33,7 @@ module Masamune::Transform
33
33
 
34
34
  class Hive < SimpleDelegator
35
35
  def partition_by
36
+ return unless partitions.any?
36
37
  partitions.map { |_, column| "#{column.name} #{column.hql_type}" }.join(', ')
37
38
  end
38
39
  end
@@ -20,32 +20,14 @@
20
20
  -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
21
  -- THE SOFTWARE.
22
22
 
23
- DROP VIEW IF EXISTS <%= target.view_name %>;
24
- CREATE VIEW IF NOT EXISTS <%= target.view_name %> (
25
- uuid,
26
- type,
27
- <%- target.view_columns.each do |value| -%>
28
- <%= value %>,
23
+ SELECT
24
+ <%- target.select_columns(columns).each do |column, last| -%>
25
+ <%= column %><%= ',' unless last %>
29
26
  <%- end -%>
30
- delta,
31
- created_at,
32
- y, m, d ,h
33
- ) PARTITIONED ON (y, m, d, h) AS
34
- SELECT DISTINCT
35
- uuid,
36
- type,
37
- <%- target.view_values.each do |value| -%>
38
- <%= value %>,
39
- <%- end -%>
40
- IF(type = '<%= target.update_type %>', 1, 0) AS delta,
41
- ctime_iso8601 AS created_at,
42
- y, m, d ,h
43
27
  FROM
44
- events
45
- LATERAL VIEW
46
- json_tuple(events.json, <%= target.view_columns.map { |value| "'#{value}'" }.join(', ') %>) event_data AS <%= target.view_columns.join(', ') %>
47
- WHERE
48
- type = '<%= target.create_type %>' OR
49
- type = '<%= target.update_type %>' OR
50
- type = '<%= target.delete_type %>'
28
+ <%= target.name %>
29
+ ORDER BY
30
+ <%- target.order_by_columns(order_by).each do |column, last| -%>
31
+ <%= column %><%= ',' unless last %>
32
+ <%- end -%>
51
33
  ;