masamune 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/masamune-dump +4 -0
  3. data/lib/masamune/schema/column.rb +2 -20
  4. data/lib/masamune/schema/dimension.rb +12 -11
  5. data/lib/masamune/schema/fact.rb +10 -1
  6. data/lib/masamune/schema/map.rb +3 -3
  7. data/lib/masamune/schema/row.rb +1 -1
  8. data/lib/masamune/schema/table.rb +55 -23
  9. data/lib/masamune/schema/table_reference.rb +5 -0
  10. data/lib/masamune/tasks/dump_thor.rb +58 -0
  11. data/lib/masamune/tasks/shell_thor.rb +0 -19
  12. data/lib/masamune/template.rb +1 -2
  13. data/lib/masamune/transform/define_foreign_key.psql.erb +39 -0
  14. data/lib/masamune/transform/define_index.psql.erb +7 -3
  15. data/lib/masamune/transform/define_schema.rb +3 -3
  16. data/lib/masamune/transform/define_table.psql.erb +24 -3
  17. data/lib/masamune/transform/define_table.rb +16 -2
  18. data/lib/masamune/transform/define_unique.psql.erb +1 -1
  19. data/lib/masamune/transform/denormalize_table.rb +5 -1
  20. data/lib/masamune/transform/replace_table.psql.erb +9 -13
  21. data/lib/masamune/transform/stage_fact.rb +16 -10
  22. data/lib/masamune/version.rb +1 -1
  23. data/spec/masamune/schema/map_spec.rb +1 -1
  24. data/spec/masamune/tasks/dump_thor_spec.rb +42 -0
  25. data/spec/masamune/tasks/shell_thor_spec.rb +0 -11
  26. data/spec/masamune/template_spec.rb +5 -0
  27. data/spec/masamune/transform/define_table.dimension_spec.rb +81 -52
  28. data/spec/masamune/transform/define_table.fact_spec.rb +27 -63
  29. data/spec/masamune/transform/define_table.table_spec.rb +397 -32
  30. data/spec/masamune/transform/denormalize_table_spec.rb +20 -0
  31. data/spec/masamune/transform/rollup_fact_spec.rb +54 -54
  32. data/spec/masamune/transform/stage_fact_spec.rb +57 -34
  33. metadata +9 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18fd59e8bd668c7efc320ad374ef1bc96c384184
4
- data.tar.gz: f12fa656c4136942e1d4a5a917788c2d692b7e8f
3
+ metadata.gz: 77bd1d9e8351f259a7d946ac7b2abd11b4c14408
4
+ data.tar.gz: deb47f8efaadd8b90b5825f79970e46833bbb030
5
5
  SHA512:
6
- metadata.gz: f83bd5adbc40df655a01e4a390d6435fd5bc6e26eeae21ea13898a31931ee6b878540988a0c2de02b082f7e5f86e21e53152e9f704df396f1cda7e8dba1a3cd2
7
- data.tar.gz: af6239a670e87aa6757b6dea3c96c467b4243b7b86cbd3ce35ae46dd5e7ff1362751ede5cb1c7a9e686716ef410e5e976fe2ebe65e5c948717de24713ed44445
6
+ metadata.gz: 784416ce6169e2c3f2a9a14465580417ecfc6bdd73e238643022f608a9a04ede66d0b670dfce06e0956b14d3d96a8edbcbe0018ee76bdd0e1ccd4c2bb9f4b37a
7
+ data.tar.gz: 6b1cb22550d392cb6bf6cf36258d21b28e7c323dd6bab5616b4066380e315bcb289c725d52a36f65cd182293c1a28d68f31e928a642d5429ce1f024945f2d81a
data/bin/masamune-dump ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.expand_path('../../lib/', __FILE__)
3
+ require 'masamune/tasks/dump_thor'
4
+ Masamune::Tasks::DumpThor.start(ARGV)
@@ -85,7 +85,6 @@ module Masamune::Schema
85
85
 
86
86
  def index=(value)
87
87
  @index ||= Set.new
88
- @index.clear
89
88
  @index +=
90
89
  case value
91
90
  when true
@@ -101,14 +100,8 @@ module Masamune::Schema
101
100
  end
102
101
  end
103
102
 
104
- def unique
105
- self.unique = 'natural' if natural_key
106
- @unique
107
- end
108
-
109
103
  def unique=(value)
110
104
  @unique ||= Set.new
111
- @unique.clear
112
105
  @unique +=
113
106
  case value
114
107
  when true
@@ -125,7 +118,7 @@ module Masamune::Schema
125
118
  end
126
119
 
127
120
  def foreign_key_name
128
- "#{reference.name}.#{@id}".to_sym if reference
121
+ "#{[reference.label, reference.name].compact.join('_')}.#{@id}".to_sym if reference
129
122
  end
130
123
 
131
124
  def compact_name
@@ -363,7 +356,7 @@ module Masamune::Schema
363
356
  end
364
357
 
365
358
  def as_psql
366
- [name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
359
+ [name, sql_type(surrogate_key), *sql_constraints, sql_default].compact.join(' ')
367
360
  end
368
361
 
369
362
  def as_hql
@@ -378,16 +371,6 @@ module Masamune::Schema
378
371
  end
379
372
  end
380
373
 
381
- # TODO: Add ELEMENT REFERENCES
382
- def reference_constraint
383
- return if parent && parent.temporary?
384
- return if degenerate?
385
- return if array_value?
386
- if reference && reference.surrogate_key.type == type
387
- "REFERENCES #{reference.name}(#{reference.surrogate_key.name})"
388
- end
389
- end
390
-
391
374
  class << self
392
375
  def dereference_column_name(name)
393
376
  return unless name
@@ -477,7 +460,6 @@ module Masamune::Schema
477
460
  def sql_constraints
478
461
  [].tap do |constraints|
479
462
  constraints << 'NOT NULL' unless null || surrogate_key || !strict || parent.temporary? || degenerate?
480
- constraints << "PRIMARY KEY" if surrogate_key
481
463
  end
482
464
  end
483
465
 
@@ -91,27 +91,28 @@ module Masamune::Schema
91
91
  end
92
92
 
93
93
  def initialize_dimension_columns!
94
+ # TODO assign index for load_fact
94
95
  case type
95
96
  when :one, :date
96
97
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
97
98
  when :two
98
- initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
99
- initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
100
- initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
99
+ initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: [:start_at, :natural], unique: :natural
100
+ initialize_column! id: 'end_at', type: :timestamp, null: true, index: :end_at
101
+ initialize_column! id: 'version', type: :integer, default: 1, null: true
101
102
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
102
103
  when :four
103
104
  children << ledger_table
104
105
  # FIXME derive type from from parent
105
- initialize_column! id: 'parent_id', type: :integer, null: true, reference: ledger_table
106
- initialize_column! id: 'record_id', type: :integer, null: true, reference: ledger_table
107
- initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
108
- initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
109
- initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
106
+ initialize_column! id: 'parent_id', type: :integer, null: true, reference: TableReference.new(ledger_table)
107
+ initialize_column! id: 'record_id', type: :integer, null: true, reference: TableReference.new(ledger_table)
108
+ initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: [:start_at, :natural], unique: :natural
109
+ initialize_column! id: 'end_at', type: :timestamp, null: true, index: :end_at
110
+ initialize_column! id: 'version', type: :integer, default: 1, null: true
110
111
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
111
112
  when :ledger
112
- initialize_column! id: 'source_kind', type: :string, unique: 'natural'
113
- initialize_column! id: 'source_uuid', type: :string, unique: 'natural'
114
- initialize_column! id: 'start_at', type: :timestamp, index: true, unique: 'natural'
113
+ initialize_column! id: 'source_kind', type: :string, index: :natural, unique: :natural
114
+ initialize_column! id: 'source_uuid', type: :string, index: :natural, unique: :natural
115
+ initialize_column! id: 'start_at', type: :timestamp, index: :natural, unique: :natural
115
116
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
116
117
  initialize_column! id: 'delta', type: :integer
117
118
  when :stage
@@ -34,8 +34,13 @@ module Masamune::Schema
34
34
  @partition = opts.delete(:partition)
35
35
  super opts.reverse_merge(type: :fact)
36
36
  initialize_fact_columns!
37
- foreign_key_columns.each do |column|
37
+ reference_columns.each do |column|
38
+ column.index.clear
38
39
  column.index << column.name
40
+ if type == :stage
41
+ column.index << "#{column.name}_time_key"
42
+ time_key.index << "#{column.name}_time_key"
43
+ end
39
44
  end
40
45
  time_key.index << time_key.name
41
46
  end
@@ -59,6 +64,10 @@ module Masamune::Schema
59
64
  columns.select { |_, column| column && column.reference && column.reference.type == :date }.values.first
60
65
  end
61
66
 
67
+ def primary_keys
68
+ []
69
+ end
70
+
62
71
  def time_key
63
72
  columns.values.detect { |column| column.id == :time_key }
64
73
  end
@@ -112,7 +112,7 @@ module Masamune::Schema
112
112
  @csv ||= CSV.new(@io, options.merge(headers: row.headers, write_headers: write_headers))
113
113
  if row.missing_required_columns.any?
114
114
  missing_required_column_names = row.missing_required_columns.map(&:name)
115
- @store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(',')}', skipping")
115
+ @store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(', ')}', skipping")
116
116
  else
117
117
  @csv << row.serialize if append?(row.serialize)
118
118
  end
@@ -258,8 +258,8 @@ module Masamune::Schema
258
258
  Array.wrap(function.call(input)).each do |output|
259
259
  yield output
260
260
  end
261
- rescue
262
- @store.logger.warn("failed to process '#{input}' for #{target.name}, skipping")
261
+ rescue => e
262
+ @store.logger.warn("failed to process row for #{target.name}, skipping: #{e.message}")
263
263
  end
264
264
  end
265
265
  end
@@ -60,7 +60,7 @@ module Masamune::Schema
60
60
  def name(column = nil)
61
61
  return unless @id
62
62
  if column
63
- "#{@id}_#{column.name}()"
63
+ "#{@id}_#{parent.name}_#{column.name}()"
64
64
  else
65
65
  "#{@id}_#{parent.name}_#{parent.surrogate_key.name}()"
66
66
  end
@@ -75,11 +75,9 @@ module Masamune::Schema
75
75
  raise ArgumentError, "table #{name} contains reserved columns" if columns.any? { |column| reserved_column_ids.include?(column.id) }
76
76
 
77
77
  initialize_surrogate_key_column! unless columns.any? { |column| column.surrogate_key }
78
- initialize_reference_columns! unless columns.any? { |column| column.reference }
78
+ initialize_reference_columns! unless columns.any? { |column| column.reference }
79
79
  columns.each do |column|
80
- raise ArgumentError, "table #{name} contains invalid columns" unless column.is_a?(Column)
81
- @columns[column.name] = column.dup
82
- @columns[column.name].parent = self
80
+ initialize_column!(column)
83
81
  end
84
82
  end
85
83
 
@@ -107,6 +105,10 @@ module Masamune::Schema
107
105
  columns.values.detect { |column| column.surrogate_key }
108
106
  end
109
107
 
108
+ def primary_keys
109
+ [*auto_surrogate_keys, surrogate_key].compact
110
+ end
111
+
110
112
  def natural_keys
111
113
  columns.values.select { |column| column.natural_key }
112
114
  end
@@ -119,16 +121,14 @@ module Masamune::Schema
119
121
  def unique_constraints
120
122
  return [] if temporary?
121
123
  unique_constraints_map.map do |_, column_names|
122
- [column_names, short_md5(column_names)]
124
+ [column_names, short_md5(column_names.to_a)]
123
125
  end.uniq
124
126
  end
125
127
 
126
- # TODO: Add optional USING
127
- # TODO: Default to GIN for array columns
128
128
  def index_columns
129
129
  index_column_map.map do |_, column_names|
130
130
  unique_index = reverse_unique_constraints_map.key?(column_names.sort)
131
- [column_names, unique_index, short_md5(column_names)]
131
+ [column_names, unique_index, short_md5(column_names.to_a)]
132
132
  end.uniq
133
133
  end
134
134
 
@@ -152,7 +152,7 @@ module Masamune::Schema
152
152
  end
153
153
 
154
154
  def foreign_key_columns
155
- columns.values.select { | column| column.reference && column.reference.foreign_key }
155
+ columns.values.select { | column| !column.degenerate? && column.reference && column.reference.foreign_key }
156
156
  end
157
157
 
158
158
  def partitions
@@ -180,22 +180,28 @@ module Masamune::Schema
180
180
  end
181
181
 
182
182
  def denormalized_columns
183
+ return to_enum(__method__).to_a.flatten.compact unless block_given?
183
184
  columns.map do |_, column|
184
185
  next if column.surrogate_key || column.ignore
185
186
  if column.reference
186
- column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
187
+ (column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns).each do |join_column|
188
+ yield [column.reference, join_column]
189
+ end
187
190
  else
188
- column
191
+ yield [nil, column]
189
192
  end
190
- end.flatten.compact
193
+ end
191
194
  end
192
195
 
193
196
  def denormalized_column_names
194
- denormalized_columns.map do |column|
197
+ return to_enum(__method__).to_a unless block_given?
198
+ denormalized_columns do |reference, column|
195
199
  if column.parent == self
196
- column.name.to_s
200
+ yield column.name.to_s
201
+ elsif reference
202
+ yield [reference.id, column.name].compact.join('.')
197
203
  else
198
- [column.parent.id, column.name].join('.')
204
+ yield [column.parent.id, column.name].compact.join('.')
199
205
  end
200
206
  end
201
207
  end
@@ -247,6 +253,24 @@ module Masamune::Schema
247
253
  Integer('0x' + Digest::MD5.hexdigest(name)) % (1 << 63)
248
254
  end
249
255
 
256
+ def auto_surrogate_keys
257
+ columns.values.select { |column| column.reference && column.reference.surrogate_key.auto }.uniq.compact
258
+ end
259
+
260
+ def foreign_key_constraints
261
+ return [] if temporary?
262
+ foreign_key_columns.map do |column|
263
+ if column.reference.auto_surrogate_keys == auto_surrogate_keys
264
+ column_names = [*column.reference.auto_surrogate_keys.map(&:name), column.name].compact
265
+ reference_column_names = [*column.reference.auto_surrogate_keys.map(&:name), column.reference.surrogate_key.name].compact
266
+ else
267
+ column_names = [column.name]
268
+ reference_column_names = [column.reference.surrogate_key.name]
269
+ end
270
+ [short_md5(column_names), column_names, column.reference.name, reference_column_names]
271
+ end.compact
272
+ end
273
+
250
274
  private
251
275
 
252
276
  def stage_table_columns(parent, selected = [], inherit = true)
@@ -291,18 +315,22 @@ module Masamune::Schema
291
315
  reference.unreserved_columns.each do |_, column|
292
316
  next if column.surrogate_key
293
317
  next if column.ignore
294
- initialize_column! id: column.id, type: column.type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
318
+ initialize_column! id: column.id, type: column.type, reference: reference, default: reference.default, null: reference.null, natural_key: reference.natural_key
295
319
  end
296
320
  elsif reference.foreign_key
297
321
  # FIXME column.reference should point to reference.surrogate_key, only allow column references to Columns
298
- initialize_column! id: reference.foreign_key_name, type: reference.foreign_key_type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
322
+ initialize_column! id: reference.foreign_key_name, type: reference.foreign_key_type, reference: reference, default: reference.default, null: reference.null, natural_key: reference.natural_key
299
323
  end
300
324
  end
301
325
  end
302
326
 
303
- def initialize_column!(options = {})
304
- column = Masamune::Schema::Column.new(options.merge(parent: self))
305
- @columns[column.name.to_sym] = column
327
+ def initialize_column!(column_or_options)
328
+ column = column_or_options.is_a?(Column) ? column_or_options.dup : Column.new(column_or_options.merge(parent: self))
329
+ column_key = column.name.to_sym
330
+ @columns[column_key] = column
331
+ @columns[column_key].parent = self
332
+ @columns[column_key].index += [column_key, :natural] if column.natural_key
333
+ @columns[column_key].unique << :natural if column.natural_key
306
334
  end
307
335
 
308
336
  def index_column_map
@@ -311,9 +339,10 @@ module Masamune::Schema
311
339
  columns.each do |_, column|
312
340
  column.index.each do |index|
313
341
  map[index] << column.name
342
+ map[index].uniq!
314
343
  end
315
344
  end
316
- Hash[map.sort_by { |k, v| v.length }]
345
+ Hash[map.sort_by { |k, v| [v.length, k.to_s] }]
317
346
  end
318
347
  end
319
348
 
@@ -321,11 +350,14 @@ module Masamune::Schema
321
350
  @unique_constraints_map ||= begin
322
351
  map = Hash.new { |h,k| h[k] = [] }
323
352
  columns.each do |_, column|
353
+ next if column.auto_reference
324
354
  column.unique.each do |unique|
355
+ map[unique] += auto_surrogate_keys.map(&:name)
325
356
  map[unique] << column.name
357
+ map[unique].uniq!
326
358
  end
327
359
  end unless temporary?
328
- Hash[map.sort_by { |k, v| v.length }]
360
+ Hash[map.sort_by { |k, v| [v.length, k.to_s] }]
329
361
  end
330
362
  end
331
363
 
@@ -334,7 +366,7 @@ module Masamune::Schema
334
366
  end
335
367
 
336
368
  def short_md5(*a)
337
- Digest::MD5.hexdigest(a.join('_'))[0..6]
369
+ Digest::MD5.hexdigest(a.compact.sort.uniq.join('_'))[0..6]
338
370
  end
339
371
  end
340
372
  end
@@ -31,6 +31,7 @@ module Masamune::Schema
31
31
  natural_key: false,
32
32
  denormalize: false,
33
33
  multiple: false,
34
+ through: [],
34
35
  degenerate: false
35
36
  }
36
37
 
@@ -72,5 +73,9 @@ module Masamune::Schema
72
73
  default_row.name(column)
73
74
  end
74
75
  end
76
+
77
+ def through=(columns)
78
+ @through = Array.wrap(columns)
79
+ end
75
80
  end
76
81
  end
@@ -0,0 +1,58 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune'
24
+ require 'thor'
25
+
26
+ module Masamune::Tasks
27
+ class DumpThor < Thor
28
+ include Masamune::Thor
29
+ include Masamune::Transform::DefineSchema
30
+
31
+ # FIXME need to add an unnecessary namespace until this issue is fixed:
32
+ # https://github.com/wycats/thor/pull/247
33
+ namespace :dump
34
+ skip_lock!
35
+
36
+ desc 'dump', 'Dump schema'
37
+ method_option :type, :enum => ['psql', 'hql'], :desc => 'Schema type', :default => 'psql'
38
+ method_option :with_index, :type => :boolean, :desc => 'Dump schema with indexes', :default => true
39
+ method_option :with_foreign_key, :type => :boolean, :desc => 'Dump schema with foreign key constraints', :default => true
40
+ method_option :with_unique_constraint, :type => :boolean, :desc => 'Dump schema with uniqueness constraints', :default => true
41
+ def dump_exec
42
+ print_catalog
43
+ exit
44
+ end
45
+ default_task :dump_exec
46
+
47
+ private
48
+
49
+ def print_catalog
50
+ case options[:type]
51
+ when 'psql'
52
+ puts define_schema(catalog, :postgres, options.slice(:with_index, :with_foreign_key, :with_unique_constraint).to_h.symbolize_keys)
53
+ when 'hql'
54
+ puts define_schema(catalog, :hive)
55
+ end
56
+ end
57
+ end
58
+ end
@@ -28,7 +28,6 @@ module Masamune::Tasks
28
28
  class ShellThor < Thor
29
29
  include Masamune::Thor
30
30
  include Masamune::Actions::DataFlow
31
- include Masamune::Transform::DefineSchema
32
31
 
33
32
  # FIXME need to add an unnecessary namespace until this issue is fixed:
34
33
  # https://github.com/wycats/thor/pull/247
@@ -36,29 +35,11 @@ module Masamune::Tasks
36
35
  skip_lock!
37
36
 
38
37
  desc 'shell', 'Launch an interactive shell'
39
- method_option :dump, :type => :boolean, :desc => 'Dump SQL schema', :default => false
40
- method_option :type, :enum => ['psql', 'hql'], :desc => 'Schema type', :default => 'psql'
41
38
  method_option :prompt, :desc => 'Set shell prompt', :default => 'masamune'
42
39
  class_option :start, :aliases => '-a', :desc => 'Start time', default: '1 month ago'
43
40
  def shell_exec
44
- if options[:dump]
45
- print_catalog
46
- exit
47
- end
48
-
49
41
  Pry.start self, prompt: proc { options[:prompt] + '> ' }
50
42
  end
51
43
  default_task :shell_exec
52
-
53
- private
54
-
55
- def print_catalog
56
- case options[:type]
57
- when 'psql'
58
- puts define_schema(catalog, :postgres)
59
- when 'hql'
60
- puts define_schema(catalog, :hive)
61
- end
62
- end
63
44
  end
64
45
  end