masamune 0.12.3 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/masamune-dump +4 -0
  3. data/lib/masamune/schema/column.rb +2 -20
  4. data/lib/masamune/schema/dimension.rb +12 -11
  5. data/lib/masamune/schema/fact.rb +10 -1
  6. data/lib/masamune/schema/map.rb +3 -3
  7. data/lib/masamune/schema/row.rb +1 -1
  8. data/lib/masamune/schema/table.rb +55 -23
  9. data/lib/masamune/schema/table_reference.rb +5 -0
  10. data/lib/masamune/tasks/dump_thor.rb +58 -0
  11. data/lib/masamune/tasks/shell_thor.rb +0 -19
  12. data/lib/masamune/template.rb +1 -2
  13. data/lib/masamune/transform/define_foreign_key.psql.erb +39 -0
  14. data/lib/masamune/transform/define_index.psql.erb +7 -3
  15. data/lib/masamune/transform/define_schema.rb +3 -3
  16. data/lib/masamune/transform/define_table.psql.erb +24 -3
  17. data/lib/masamune/transform/define_table.rb +16 -2
  18. data/lib/masamune/transform/define_unique.psql.erb +1 -1
  19. data/lib/masamune/transform/denormalize_table.rb +5 -1
  20. data/lib/masamune/transform/replace_table.psql.erb +9 -13
  21. data/lib/masamune/transform/stage_fact.rb +16 -10
  22. data/lib/masamune/version.rb +1 -1
  23. data/spec/masamune/schema/map_spec.rb +1 -1
  24. data/spec/masamune/tasks/dump_thor_spec.rb +42 -0
  25. data/spec/masamune/tasks/shell_thor_spec.rb +0 -11
  26. data/spec/masamune/template_spec.rb +5 -0
  27. data/spec/masamune/transform/define_table.dimension_spec.rb +81 -52
  28. data/spec/masamune/transform/define_table.fact_spec.rb +27 -63
  29. data/spec/masamune/transform/define_table.table_spec.rb +397 -32
  30. data/spec/masamune/transform/denormalize_table_spec.rb +20 -0
  31. data/spec/masamune/transform/rollup_fact_spec.rb +54 -54
  32. data/spec/masamune/transform/stage_fact_spec.rb +57 -34
  33. metadata +9 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18fd59e8bd668c7efc320ad374ef1bc96c384184
4
- data.tar.gz: f12fa656c4136942e1d4a5a917788c2d692b7e8f
3
+ metadata.gz: 77bd1d9e8351f259a7d946ac7b2abd11b4c14408
4
+ data.tar.gz: deb47f8efaadd8b90b5825f79970e46833bbb030
5
5
  SHA512:
6
- metadata.gz: f83bd5adbc40df655a01e4a390d6435fd5bc6e26eeae21ea13898a31931ee6b878540988a0c2de02b082f7e5f86e21e53152e9f704df396f1cda7e8dba1a3cd2
7
- data.tar.gz: af6239a670e87aa6757b6dea3c96c467b4243b7b86cbd3ce35ae46dd5e7ff1362751ede5cb1c7a9e686716ef410e5e976fe2ebe65e5c948717de24713ed44445
6
+ metadata.gz: 784416ce6169e2c3f2a9a14465580417ecfc6bdd73e238643022f608a9a04ede66d0b670dfce06e0956b14d3d96a8edbcbe0018ee76bdd0e1ccd4c2bb9f4b37a
7
+ data.tar.gz: 6b1cb22550d392cb6bf6cf36258d21b28e7c323dd6bab5616b4066380e315bcb289c725d52a36f65cd182293c1a28d68f31e928a642d5429ce1f024945f2d81a
data/bin/masamune-dump ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.expand_path('../../lib/', __FILE__)
3
+ require 'masamune/tasks/dump_thor'
4
+ Masamune::Tasks::DumpThor.start(ARGV)
@@ -85,7 +85,6 @@ module Masamune::Schema
85
85
 
86
86
  def index=(value)
87
87
  @index ||= Set.new
88
- @index.clear
89
88
  @index +=
90
89
  case value
91
90
  when true
@@ -101,14 +100,8 @@ module Masamune::Schema
101
100
  end
102
101
  end
103
102
 
104
- def unique
105
- self.unique = 'natural' if natural_key
106
- @unique
107
- end
108
-
109
103
  def unique=(value)
110
104
  @unique ||= Set.new
111
- @unique.clear
112
105
  @unique +=
113
106
  case value
114
107
  when true
@@ -125,7 +118,7 @@ module Masamune::Schema
125
118
  end
126
119
 
127
120
  def foreign_key_name
128
- "#{reference.name}.#{@id}".to_sym if reference
121
+ "#{[reference.label, reference.name].compact.join('_')}.#{@id}".to_sym if reference
129
122
  end
130
123
 
131
124
  def compact_name
@@ -363,7 +356,7 @@ module Masamune::Schema
363
356
  end
364
357
 
365
358
  def as_psql
366
- [name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
359
+ [name, sql_type(surrogate_key), *sql_constraints, sql_default].compact.join(' ')
367
360
  end
368
361
 
369
362
  def as_hql
@@ -378,16 +371,6 @@ module Masamune::Schema
378
371
  end
379
372
  end
380
373
 
381
- # TODO: Add ELEMENT REFERENCES
382
- def reference_constraint
383
- return if parent && parent.temporary?
384
- return if degenerate?
385
- return if array_value?
386
- if reference && reference.surrogate_key.type == type
387
- "REFERENCES #{reference.name}(#{reference.surrogate_key.name})"
388
- end
389
- end
390
-
391
374
  class << self
392
375
  def dereference_column_name(name)
393
376
  return unless name
@@ -477,7 +460,6 @@ module Masamune::Schema
477
460
  def sql_constraints
478
461
  [].tap do |constraints|
479
462
  constraints << 'NOT NULL' unless null || surrogate_key || !strict || parent.temporary? || degenerate?
480
- constraints << "PRIMARY KEY" if surrogate_key
481
463
  end
482
464
  end
483
465
 
@@ -91,27 +91,28 @@ module Masamune::Schema
91
91
  end
92
92
 
93
93
  def initialize_dimension_columns!
94
+ # TODO assign index for load_fact
94
95
  case type
95
96
  when :one, :date
96
97
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
97
98
  when :two
98
- initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
99
- initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
100
- initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
99
+ initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: [:start_at, :natural], unique: :natural
100
+ initialize_column! id: 'end_at', type: :timestamp, null: true, index: :end_at
101
+ initialize_column! id: 'version', type: :integer, default: 1, null: true
101
102
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
102
103
  when :four
103
104
  children << ledger_table
104
105
  # FIXME derive type from from parent
105
- initialize_column! id: 'parent_id', type: :integer, null: true, reference: ledger_table
106
- initialize_column! id: 'record_id', type: :integer, null: true, reference: ledger_table
107
- initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
108
- initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
109
- initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
106
+ initialize_column! id: 'parent_id', type: :integer, null: true, reference: TableReference.new(ledger_table)
107
+ initialize_column! id: 'record_id', type: :integer, null: true, reference: TableReference.new(ledger_table)
108
+ initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: [:start_at, :natural], unique: :natural
109
+ initialize_column! id: 'end_at', type: :timestamp, null: true, index: :end_at
110
+ initialize_column! id: 'version', type: :integer, default: 1, null: true
110
111
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
111
112
  when :ledger
112
- initialize_column! id: 'source_kind', type: :string, unique: 'natural'
113
- initialize_column! id: 'source_uuid', type: :string, unique: 'natural'
114
- initialize_column! id: 'start_at', type: :timestamp, index: true, unique: 'natural'
113
+ initialize_column! id: 'source_kind', type: :string, index: :natural, unique: :natural
114
+ initialize_column! id: 'source_uuid', type: :string, index: :natural, unique: :natural
115
+ initialize_column! id: 'start_at', type: :timestamp, index: :natural, unique: :natural
115
116
  initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
116
117
  initialize_column! id: 'delta', type: :integer
117
118
  when :stage
@@ -34,8 +34,13 @@ module Masamune::Schema
34
34
  @partition = opts.delete(:partition)
35
35
  super opts.reverse_merge(type: :fact)
36
36
  initialize_fact_columns!
37
- foreign_key_columns.each do |column|
37
+ reference_columns.each do |column|
38
+ column.index.clear
38
39
  column.index << column.name
40
+ if type == :stage
41
+ column.index << "#{column.name}_time_key"
42
+ time_key.index << "#{column.name}_time_key"
43
+ end
39
44
  end
40
45
  time_key.index << time_key.name
41
46
  end
@@ -59,6 +64,10 @@ module Masamune::Schema
59
64
  columns.select { |_, column| column && column.reference && column.reference.type == :date }.values.first
60
65
  end
61
66
 
67
+ def primary_keys
68
+ []
69
+ end
70
+
62
71
  def time_key
63
72
  columns.values.detect { |column| column.id == :time_key }
64
73
  end
@@ -112,7 +112,7 @@ module Masamune::Schema
112
112
  @csv ||= CSV.new(@io, options.merge(headers: row.headers, write_headers: write_headers))
113
113
  if row.missing_required_columns.any?
114
114
  missing_required_column_names = row.missing_required_columns.map(&:name)
115
- @store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(',')}', skipping")
115
+ @store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(', ')}', skipping")
116
116
  else
117
117
  @csv << row.serialize if append?(row.serialize)
118
118
  end
@@ -258,8 +258,8 @@ module Masamune::Schema
258
258
  Array.wrap(function.call(input)).each do |output|
259
259
  yield output
260
260
  end
261
- rescue
262
- @store.logger.warn("failed to process '#{input}' for #{target.name}, skipping")
261
+ rescue => e
262
+ @store.logger.warn("failed to process row for #{target.name}, skipping: #{e.message}")
263
263
  end
264
264
  end
265
265
  end
@@ -60,7 +60,7 @@ module Masamune::Schema
60
60
  def name(column = nil)
61
61
  return unless @id
62
62
  if column
63
- "#{@id}_#{column.name}()"
63
+ "#{@id}_#{parent.name}_#{column.name}()"
64
64
  else
65
65
  "#{@id}_#{parent.name}_#{parent.surrogate_key.name}()"
66
66
  end
@@ -75,11 +75,9 @@ module Masamune::Schema
75
75
  raise ArgumentError, "table #{name} contains reserved columns" if columns.any? { |column| reserved_column_ids.include?(column.id) }
76
76
 
77
77
  initialize_surrogate_key_column! unless columns.any? { |column| column.surrogate_key }
78
- initialize_reference_columns! unless columns.any? { |column| column.reference }
78
+ initialize_reference_columns! unless columns.any? { |column| column.reference }
79
79
  columns.each do |column|
80
- raise ArgumentError, "table #{name} contains invalid columns" unless column.is_a?(Column)
81
- @columns[column.name] = column.dup
82
- @columns[column.name].parent = self
80
+ initialize_column!(column)
83
81
  end
84
82
  end
85
83
 
@@ -107,6 +105,10 @@ module Masamune::Schema
107
105
  columns.values.detect { |column| column.surrogate_key }
108
106
  end
109
107
 
108
+ def primary_keys
109
+ [*auto_surrogate_keys, surrogate_key].compact
110
+ end
111
+
110
112
  def natural_keys
111
113
  columns.values.select { |column| column.natural_key }
112
114
  end
@@ -119,16 +121,14 @@ module Masamune::Schema
119
121
  def unique_constraints
120
122
  return [] if temporary?
121
123
  unique_constraints_map.map do |_, column_names|
122
- [column_names, short_md5(column_names)]
124
+ [column_names, short_md5(column_names.to_a)]
123
125
  end.uniq
124
126
  end
125
127
 
126
- # TODO: Add optional USING
127
- # TODO: Default to GIN for array columns
128
128
  def index_columns
129
129
  index_column_map.map do |_, column_names|
130
130
  unique_index = reverse_unique_constraints_map.key?(column_names.sort)
131
- [column_names, unique_index, short_md5(column_names)]
131
+ [column_names, unique_index, short_md5(column_names.to_a)]
132
132
  end.uniq
133
133
  end
134
134
 
@@ -152,7 +152,7 @@ module Masamune::Schema
152
152
  end
153
153
 
154
154
  def foreign_key_columns
155
- columns.values.select { | column| column.reference && column.reference.foreign_key }
155
+ columns.values.select { | column| !column.degenerate? && column.reference && column.reference.foreign_key }
156
156
  end
157
157
 
158
158
  def partitions
@@ -180,22 +180,28 @@ module Masamune::Schema
180
180
  end
181
181
 
182
182
  def denormalized_columns
183
+ return to_enum(__method__).to_a.flatten.compact unless block_given?
183
184
  columns.map do |_, column|
184
185
  next if column.surrogate_key || column.ignore
185
186
  if column.reference
186
- column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
187
+ (column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns).each do |join_column|
188
+ yield [column.reference, join_column]
189
+ end
187
190
  else
188
- column
191
+ yield [nil, column]
189
192
  end
190
- end.flatten.compact
193
+ end
191
194
  end
192
195
 
193
196
  def denormalized_column_names
194
- denormalized_columns.map do |column|
197
+ return to_enum(__method__).to_a unless block_given?
198
+ denormalized_columns do |reference, column|
195
199
  if column.parent == self
196
- column.name.to_s
200
+ yield column.name.to_s
201
+ elsif reference
202
+ yield [reference.id, column.name].compact.join('.')
197
203
  else
198
- [column.parent.id, column.name].join('.')
204
+ yield [column.parent.id, column.name].compact.join('.')
199
205
  end
200
206
  end
201
207
  end
@@ -247,6 +253,24 @@ module Masamune::Schema
247
253
  Integer('0x' + Digest::MD5.hexdigest(name)) % (1 << 63)
248
254
  end
249
255
 
256
+ def auto_surrogate_keys
257
+ columns.values.select { |column| column.reference && column.reference.surrogate_key.auto }.uniq.compact
258
+ end
259
+
260
+ def foreign_key_constraints
261
+ return [] if temporary?
262
+ foreign_key_columns.map do |column|
263
+ if column.reference.auto_surrogate_keys == auto_surrogate_keys
264
+ column_names = [*column.reference.auto_surrogate_keys.map(&:name), column.name].compact
265
+ reference_column_names = [*column.reference.auto_surrogate_keys.map(&:name), column.reference.surrogate_key.name].compact
266
+ else
267
+ column_names = [column.name]
268
+ reference_column_names = [column.reference.surrogate_key.name]
269
+ end
270
+ [short_md5(column_names), column_names, column.reference.name, reference_column_names]
271
+ end.compact
272
+ end
273
+
250
274
  private
251
275
 
252
276
  def stage_table_columns(parent, selected = [], inherit = true)
@@ -291,18 +315,22 @@ module Masamune::Schema
291
315
  reference.unreserved_columns.each do |_, column|
292
316
  next if column.surrogate_key
293
317
  next if column.ignore
294
- initialize_column! id: column.id, type: column.type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
318
+ initialize_column! id: column.id, type: column.type, reference: reference, default: reference.default, null: reference.null, natural_key: reference.natural_key
295
319
  end
296
320
  elsif reference.foreign_key
297
321
  # FIXME column.reference should point to reference.surrogate_key, only allow column references to Columns
298
- initialize_column! id: reference.foreign_key_name, type: reference.foreign_key_type, reference: reference, default: reference.default, index: true, null: reference.null, natural_key: reference.natural_key
322
+ initialize_column! id: reference.foreign_key_name, type: reference.foreign_key_type, reference: reference, default: reference.default, null: reference.null, natural_key: reference.natural_key
299
323
  end
300
324
  end
301
325
  end
302
326
 
303
- def initialize_column!(options = {})
304
- column = Masamune::Schema::Column.new(options.merge(parent: self))
305
- @columns[column.name.to_sym] = column
327
+ def initialize_column!(column_or_options)
328
+ column = column_or_options.is_a?(Column) ? column_or_options.dup : Column.new(column_or_options.merge(parent: self))
329
+ column_key = column.name.to_sym
330
+ @columns[column_key] = column
331
+ @columns[column_key].parent = self
332
+ @columns[column_key].index += [column_key, :natural] if column.natural_key
333
+ @columns[column_key].unique << :natural if column.natural_key
306
334
  end
307
335
 
308
336
  def index_column_map
@@ -311,9 +339,10 @@ module Masamune::Schema
311
339
  columns.each do |_, column|
312
340
  column.index.each do |index|
313
341
  map[index] << column.name
342
+ map[index].uniq!
314
343
  end
315
344
  end
316
- Hash[map.sort_by { |k, v| v.length }]
345
+ Hash[map.sort_by { |k, v| [v.length, k.to_s] }]
317
346
  end
318
347
  end
319
348
 
@@ -321,11 +350,14 @@ module Masamune::Schema
321
350
  @unique_constraints_map ||= begin
322
351
  map = Hash.new { |h,k| h[k] = [] }
323
352
  columns.each do |_, column|
353
+ next if column.auto_reference
324
354
  column.unique.each do |unique|
355
+ map[unique] += auto_surrogate_keys.map(&:name)
325
356
  map[unique] << column.name
357
+ map[unique].uniq!
326
358
  end
327
359
  end unless temporary?
328
- Hash[map.sort_by { |k, v| v.length }]
360
+ Hash[map.sort_by { |k, v| [v.length, k.to_s] }]
329
361
  end
330
362
  end
331
363
 
@@ -334,7 +366,7 @@ module Masamune::Schema
334
366
  end
335
367
 
336
368
  def short_md5(*a)
337
- Digest::MD5.hexdigest(a.join('_'))[0..6]
369
+ Digest::MD5.hexdigest(a.compact.sort.uniq.join('_'))[0..6]
338
370
  end
339
371
  end
340
372
  end
@@ -31,6 +31,7 @@ module Masamune::Schema
31
31
  natural_key: false,
32
32
  denormalize: false,
33
33
  multiple: false,
34
+ through: [],
34
35
  degenerate: false
35
36
  }
36
37
 
@@ -72,5 +73,9 @@ module Masamune::Schema
72
73
  default_row.name(column)
73
74
  end
74
75
  end
76
+
77
+ def through=(columns)
78
+ @through = Array.wrap(columns)
79
+ end
75
80
  end
76
81
  end
@@ -0,0 +1,58 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'masamune'
24
+ require 'thor'
25
+
26
+ module Masamune::Tasks
27
+ class DumpThor < Thor
28
+ include Masamune::Thor
29
+ include Masamune::Transform::DefineSchema
30
+
31
+ # FIXME need to add an unnecessary namespace until this issue is fixed:
32
+ # https://github.com/wycats/thor/pull/247
33
+ namespace :dump
34
+ skip_lock!
35
+
36
+ desc 'dump', 'Dump schema'
37
+ method_option :type, :enum => ['psql', 'hql'], :desc => 'Schema type', :default => 'psql'
38
+ method_option :with_index, :type => :boolean, :desc => 'Dump schema with indexes', :default => true
39
+ method_option :with_foreign_key, :type => :boolean, :desc => 'Dump schema with foreign key constraints', :default => true
40
+ method_option :with_unique_constraint, :type => :boolean, :desc => 'Dump schema with uniqueness constraints', :default => true
41
+ def dump_exec
42
+ print_catalog
43
+ exit
44
+ end
45
+ default_task :dump_exec
46
+
47
+ private
48
+
49
+ def print_catalog
50
+ case options[:type]
51
+ when 'psql'
52
+ puts define_schema(catalog, :postgres, options.slice(:with_index, :with_foreign_key, :with_unique_constraint).to_h.symbolize_keys)
53
+ when 'hql'
54
+ puts define_schema(catalog, :hive)
55
+ end
56
+ end
57
+ end
58
+ end
@@ -28,7 +28,6 @@ module Masamune::Tasks
28
28
  class ShellThor < Thor
29
29
  include Masamune::Thor
30
30
  include Masamune::Actions::DataFlow
31
- include Masamune::Transform::DefineSchema
32
31
 
33
32
  # FIXME need to add an unnecessary namespace until this issue is fixed:
34
33
  # https://github.com/wycats/thor/pull/247
@@ -36,29 +35,11 @@ module Masamune::Tasks
36
35
  skip_lock!
37
36
 
38
37
  desc 'shell', 'Launch an interactive shell'
39
- method_option :dump, :type => :boolean, :desc => 'Dump SQL schema', :default => false
40
- method_option :type, :enum => ['psql', 'hql'], :desc => 'Schema type', :default => 'psql'
41
38
  method_option :prompt, :desc => 'Set shell prompt', :default => 'masamune'
42
39
  class_option :start, :aliases => '-a', :desc => 'Start time', default: '1 month ago'
43
40
  def shell_exec
44
- if options[:dump]
45
- print_catalog
46
- exit
47
- end
48
-
49
41
  Pry.start self, prompt: proc { options[:prompt] + '> ' }
50
42
  end
51
43
  default_task :shell_exec
52
-
53
- private
54
-
55
- def print_catalog
56
- case options[:type]
57
- when 'psql'
58
- puts define_schema(catalog, :postgres)
59
- when 'hql'
60
- puts define_schema(catalog, :hive)
61
- end
62
- end
63
44
  end
64
45
  end