masamune 0.11.9 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/masamune/actions/transform.rb +31 -16
- data/lib/masamune/schema.rb +0 -1
- data/lib/masamune/schema/catalog.rb +2 -10
- data/lib/masamune/schema/column.rb +16 -30
- data/lib/masamune/schema/dimension.rb +2 -9
- data/lib/masamune/schema/fact.rb +0 -4
- data/lib/masamune/schema/map.rb +1 -1
- data/lib/masamune/schema/row.rb +3 -3
- data/lib/masamune/schema/store.rb +1 -3
- data/lib/masamune/schema/table.rb +28 -2
- data/lib/masamune/transform.rb +0 -1
- data/lib/masamune/transform/define_schema.rb +0 -6
- data/lib/masamune/transform/define_table.hql.erb +7 -6
- data/lib/masamune/transform/define_table.rb +1 -0
- data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
- data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
- data/lib/masamune/transform/denormalize_table.rb +13 -4
- data/lib/masamune/transform/snapshot_dimension.rb +1 -1
- data/lib/masamune/transform/stage_fact.rb +1 -1
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/actions/transform_spec.rb +50 -18
- data/spec/masamune/schema/catalog_spec.rb +0 -53
- data/spec/masamune/schema/column_spec.rb +9 -41
- data/spec/masamune/schema/fact_spec.rb +3 -1
- data/spec/masamune/schema/map_spec.rb +187 -189
- data/spec/masamune/schema/table_spec.rb +8 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
- data/spec/masamune/transform/define_schema_spec.rb +5 -6
- data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
- data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
- data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
- data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
- metadata +3 -9
- data/lib/masamune/schema/event.rb +0 -121
- data/lib/masamune/transform/define_event_view.rb +0 -60
- data/spec/masamune/schema/event_spec.rb +0 -75
- data/spec/masamune/transform/define_event_view_spec.rb +0 -84
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a12e8b8ce6cb92c5d551e9024053d50ff59f3cd6
|
4
|
+
data.tar.gz: 72bdc252c7b9b6f9e787b211eae46bc3edb69358
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96be3850b76ec158f8a13be46ec2a5108c9ec01f99987f0a9414602b4072b768e072b72906a28ac9b8aa101940ece9e0f1255c2bcf25bc6cdf2726d9c7783c36
|
7
|
+
data.tar.gz: 31863312d8fdbbad89e1e0a57fcbbe51b754feca838891d1f02cefe09f547da431f6089e36e6dafc56aa20cabea8a4d0cec432b0c0b07e11c496714668388942
|
@@ -49,21 +49,10 @@ module Masamune::Actions
|
|
49
49
|
FILE_MODE = 0777 - File.umask
|
50
50
|
|
51
51
|
def load_dimension(source_files, source, target)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if source.respond_to?(:map) and map = source.map(to: target)
|
56
|
-
result = map.apply(source_files, output)
|
57
|
-
else
|
58
|
-
output = source_files
|
59
|
-
result = source
|
52
|
+
optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
|
53
|
+
transform = Wrapper.load_dimension(intermediate_files, intermediate, target)
|
54
|
+
postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
|
60
55
|
end
|
61
|
-
|
62
|
-
transform = Wrapper.load_dimension(output, result, target)
|
63
|
-
logger.debug(File.read(output)) if (source.debug || map.debug)
|
64
|
-
postgres file: transform.to_file, debug: (source.debug || target.debug || map.debug)
|
65
|
-
ensure
|
66
|
-
output.unlink
|
67
56
|
end
|
68
57
|
|
69
58
|
def consolidate_dimension(target)
|
@@ -77,13 +66,39 @@ module Masamune::Actions
|
|
77
66
|
end
|
78
67
|
|
79
68
|
def load_fact(source_files, source, target, date)
|
80
|
-
|
81
|
-
|
69
|
+
optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
|
70
|
+
transform = Wrapper.load_fact(intermediate_files, intermediate, target, date)
|
71
|
+
postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
|
72
|
+
end
|
82
73
|
end
|
83
74
|
|
84
75
|
def rollup_fact(source, target, date)
|
85
76
|
transform = Wrapper.rollup_fact(source, target, date)
|
86
77
|
postgres file: transform.to_file, debug: (source.debug || target.debug)
|
87
78
|
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def optional_apply_map(source_files, source, target, &block)
|
83
|
+
if source.respond_to?(:map) and map = source.map(to: target)
|
84
|
+
apply_map(map, source_files, source, target, &block)
|
85
|
+
else
|
86
|
+
yield source_files, source
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def apply_map(map, source_files, source, target)
|
91
|
+
Tempfile.open('masamune') do |output|
|
92
|
+
begin
|
93
|
+
FileUtils.chmod(FILE_MODE, output.path)
|
94
|
+
result = map.apply(source_files, output)
|
95
|
+
result.debug = map.debug
|
96
|
+
logger.debug(File.read(output)) if (source.debug || result.debug)
|
97
|
+
yield output, result
|
98
|
+
ensure
|
99
|
+
output.unlink
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
88
103
|
end
|
89
104
|
end
|
data/lib/masamune/schema.rb
CHANGED
@@ -173,7 +173,7 @@ module Masamune::Schema
|
|
173
173
|
end
|
174
174
|
|
175
175
|
def file(id, options = {})
|
176
|
-
format_options = options.extract!(:format, :headers)
|
176
|
+
format_options = options.extract!(:format, :headers, :json_encoding)
|
177
177
|
@context.push(options)
|
178
178
|
yield if block_given?
|
179
179
|
store = HasFormat.new(@context, format_options)
|
@@ -182,14 +182,6 @@ module Masamune::Schema
|
|
182
182
|
@context.pop
|
183
183
|
end
|
184
184
|
|
185
|
-
def event(id, options = {})
|
186
|
-
@context.push(options)
|
187
|
-
yield if block_given?
|
188
|
-
@context.events[id] = HasMap.new Masamune::Schema::Event.new(@context.options.merge(id: id))
|
189
|
-
ensure
|
190
|
-
@context.pop
|
191
|
-
end
|
192
|
-
|
193
185
|
def attribute(id, options = {})
|
194
186
|
@context.options[:attributes] << Masamune::Schema::Event::Attribute.new(options.merge(id: id))
|
195
187
|
end
|
@@ -200,7 +192,7 @@ module Masamune::Schema
|
|
200
192
|
raise ArgumentError, "invalid map, from: is missing" unless from && from.try(:id)
|
201
193
|
raise ArgumentError, "invalid map from: '#{from.id}', to: is missing" unless to
|
202
194
|
@context.push(options)
|
203
|
-
@context.options[:function] = block.to_proc
|
195
|
+
@context.options[:function] = block.to_proc if block
|
204
196
|
from.maps[to] ||= Masamune::Schema::Map.new(@context.options.merge(source: from, target: to))
|
205
197
|
ensure
|
206
198
|
@context.pop
|
@@ -133,7 +133,7 @@ module Masamune::Schema
|
|
133
133
|
def compact_name
|
134
134
|
if reference
|
135
135
|
# XXX once columns only reference columns, this can be cleaned up
|
136
|
-
if @id == reference.surrogate_key.reference_name(reference.label)
|
136
|
+
if reference.surrogate_key && @id == reference.surrogate_key.reference_name(reference.label)
|
137
137
|
"#{reference.id}.#{reference.surrogate_key.id}".to_sym
|
138
138
|
else
|
139
139
|
"#{reference.id}.#{@id}".to_sym
|
@@ -184,12 +184,12 @@ module Masamune::Schema
|
|
184
184
|
array_value? ? "#{elem}[]" : elem
|
185
185
|
end
|
186
186
|
|
187
|
-
def hql_type
|
187
|
+
def hql_type(for_surrogate_key = false)
|
188
188
|
elem =
|
189
189
|
case type
|
190
190
|
when :integer
|
191
|
-
'INT'
|
192
|
-
when :string
|
191
|
+
for_surrogate_key ? 'STRING' : 'INT'
|
192
|
+
when :string, :enum, :key_value, :timestamp
|
193
193
|
'STRING'
|
194
194
|
else
|
195
195
|
sql_type
|
@@ -264,7 +264,9 @@ module Masamune::Schema
|
|
264
264
|
when Date, DateTime
|
265
265
|
value.to_time
|
266
266
|
when String
|
267
|
-
if value
|
267
|
+
if value.blank?
|
268
|
+
nil
|
269
|
+
elsif value =~ /\A\d+\z/
|
268
270
|
Time.at(value.to_i)
|
269
271
|
else
|
270
272
|
Time.parse(value)
|
@@ -281,7 +283,7 @@ module Masamune::Schema
|
|
281
283
|
when Hash
|
282
284
|
value
|
283
285
|
when String
|
284
|
-
|
286
|
+
YAML.load(value)
|
285
287
|
when nil
|
286
288
|
{}
|
287
289
|
end
|
@@ -290,7 +292,7 @@ module Masamune::Schema
|
|
290
292
|
when Hash
|
291
293
|
value
|
292
294
|
when String
|
293
|
-
|
295
|
+
JSON.load(value)
|
294
296
|
when nil
|
295
297
|
{}
|
296
298
|
end
|
@@ -366,6 +368,10 @@ module Masamune::Schema
|
|
366
368
|
[name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
|
367
369
|
end
|
368
370
|
|
371
|
+
def as_hql
|
372
|
+
[name, hql_type(surrogate_key)].compact.join(' ')
|
373
|
+
end
|
374
|
+
|
369
375
|
def as_hash
|
370
376
|
{id: id}.tap do |hash|
|
371
377
|
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
@@ -462,11 +468,9 @@ module Masamune::Schema
|
|
462
468
|
end
|
463
469
|
|
464
470
|
def required_value?
|
465
|
-
if reference
|
466
|
-
|
467
|
-
|
468
|
-
surrogate_key || natural_key || !(null || default)
|
469
|
-
end
|
471
|
+
return false if reference && (reference.null || !reference.default.nil?)
|
472
|
+
return false if null || !default.nil?
|
473
|
+
true
|
470
474
|
end
|
471
475
|
|
472
476
|
private
|
@@ -495,24 +499,6 @@ module Masamune::Schema
|
|
495
499
|
end
|
496
500
|
end
|
497
501
|
|
498
|
-
def ruby_key_value(hash)
|
499
|
-
case sub_type
|
500
|
-
when :boolean
|
501
|
-
Hash[hash.map { |key, value| ruby_boolean_key_value(key, value) }.compact]
|
502
|
-
else
|
503
|
-
hash
|
504
|
-
end
|
505
|
-
end
|
506
|
-
|
507
|
-
def ruby_boolean_key_value(key, value)
|
508
|
-
case value
|
509
|
-
when true, '1', 1
|
510
|
-
[key, true]
|
511
|
-
when false, '0', 0
|
512
|
-
[key, false]
|
513
|
-
end
|
514
|
-
end
|
515
|
-
|
516
502
|
def csv_array(value)
|
517
503
|
case value
|
518
504
|
when Array
|
@@ -77,15 +77,8 @@ module Masamune::Schema
|
|
77
77
|
next if column.surrogate_key
|
78
78
|
next if reserved_column_ids.include?(column.id)
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
|
83
|
-
column_now.strict, column_was.strict = false, false
|
84
|
-
[column_now, column_was]
|
85
|
-
else
|
86
|
-
column.dup.tap do |column_copy|
|
87
|
-
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
88
|
-
end
|
80
|
+
column.dup.tap do |column_copy|
|
81
|
+
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
89
82
|
end
|
90
83
|
end.flatten
|
91
84
|
end
|
data/lib/masamune/schema/fact.rb
CHANGED
@@ -82,10 +82,6 @@ module Masamune::Schema
|
|
82
82
|
@partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
|
83
83
|
end
|
84
84
|
|
85
|
-
def partitions
|
86
|
-
columns.select { |_, column| column.partition }
|
87
|
-
end
|
88
|
-
|
89
85
|
def measures
|
90
86
|
columns.select { |_, column| column.measure }
|
91
87
|
end
|
data/lib/masamune/schema/map.rb
CHANGED
data/lib/masamune/schema/row.rb
CHANGED
@@ -89,7 +89,7 @@ module Masamune::Schema
|
|
89
89
|
end
|
90
90
|
|
91
91
|
def headers
|
92
|
-
|
92
|
+
@columns.map { |_, column| column.name }
|
93
93
|
end
|
94
94
|
|
95
95
|
def serialize
|
@@ -121,8 +121,8 @@ module Masamune::Schema
|
|
121
121
|
values.each do |key, value|
|
122
122
|
next unless key
|
123
123
|
if column = parent.dereference_column_name(key)
|
124
|
-
@columns[column.
|
125
|
-
result[column.
|
124
|
+
@columns[column.compact_name] = column
|
125
|
+
result[column.compact_name] = column.ruby_value(value)
|
126
126
|
elsif strict
|
127
127
|
raise ArgumentError, "#{@values} contains undefined columns #{key}"
|
128
128
|
end
|
@@ -26,7 +26,7 @@ module Masamune::Schema
|
|
26
26
|
class Store
|
27
27
|
include Masamune::HasEnvironment
|
28
28
|
|
29
|
-
SUPPORTED_ATTRIBUTES = %(table dimension fact file
|
29
|
+
SUPPORTED_ATTRIBUTES = %(table dimension fact file)
|
30
30
|
|
31
31
|
DEFAULT_ATTRIBUTES =
|
32
32
|
{
|
@@ -45,7 +45,6 @@ module Masamune::Schema
|
|
45
45
|
attr_accessor :dimensions
|
46
46
|
attr_accessor :facts
|
47
47
|
attr_accessor :files
|
48
|
-
attr_accessor :events
|
49
48
|
attr_accessor :references
|
50
49
|
|
51
50
|
class << self
|
@@ -67,7 +66,6 @@ module Masamune::Schema
|
|
67
66
|
@dimensions = {}.with_indifferent_access
|
68
67
|
@facts = {}.with_indifferent_access
|
69
68
|
@files = {}.with_indifferent_access
|
70
|
-
@events = {}.with_indifferent_access
|
71
69
|
@references = {}.with_indifferent_access
|
72
70
|
@extra = []
|
73
71
|
end
|
@@ -39,7 +39,8 @@ module Masamune::Schema
|
|
39
39
|
columns: {},
|
40
40
|
rows: [],
|
41
41
|
inherit: false,
|
42
|
-
debug: false
|
42
|
+
debug: false,
|
43
|
+
properties: {}
|
43
44
|
}
|
44
45
|
|
45
46
|
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
@@ -111,7 +112,7 @@ module Masamune::Schema
|
|
111
112
|
end
|
112
113
|
|
113
114
|
def defined_columns
|
114
|
-
columns.values
|
115
|
+
columns.values.reject { |column| column.partition }
|
115
116
|
end
|
116
117
|
method_with_last_element :defined_columns
|
117
118
|
|
@@ -154,6 +155,10 @@ module Masamune::Schema
|
|
154
155
|
columns.values.select { | column| column.reference && column.reference.foreign_key }
|
155
156
|
end
|
156
157
|
|
158
|
+
def partitions
|
159
|
+
columns.select { |_, column| column.partition }
|
160
|
+
end
|
161
|
+
|
157
162
|
def insert_rows
|
158
163
|
rows.select { |row| row.insert_values.any? }
|
159
164
|
end
|
@@ -174,6 +179,27 @@ module Masamune::Schema
|
|
174
179
|
columns.reject { |_, column| reserved_column_ids.include?(column.id) }
|
175
180
|
end
|
176
181
|
|
182
|
+
def denormalized_columns
|
183
|
+
columns.map do |_, column|
|
184
|
+
next if column.surrogate_key || column.ignore
|
185
|
+
if column.reference
|
186
|
+
column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
|
187
|
+
else
|
188
|
+
column
|
189
|
+
end
|
190
|
+
end.flatten.compact
|
191
|
+
end
|
192
|
+
|
193
|
+
def denormalized_column_names
|
194
|
+
denormalized_columns.map do |column|
|
195
|
+
if column.parent == self
|
196
|
+
column.name.to_s
|
197
|
+
else
|
198
|
+
[column.parent.id, column.name].join('.')
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
177
203
|
def stage_table(options = {})
|
178
204
|
selected = options[:columns] if options[:columns]
|
179
205
|
selected ||= options[:target].columns.values.map(&:compact_name) if options[:target]
|
data/lib/masamune/transform.rb
CHANGED
@@ -21,12 +21,10 @@
|
|
21
21
|
# THE SOFTWARE.
|
22
22
|
|
23
23
|
require 'masamune/transform/define_table'
|
24
|
-
require 'masamune/transform/define_event_view'
|
25
24
|
|
26
25
|
module Masamune::Transform
|
27
26
|
module DefineSchema
|
28
27
|
include DefineTable
|
29
|
-
include DefineEventView
|
30
28
|
|
31
29
|
extend ActiveSupport::Concern
|
32
30
|
|
@@ -44,10 +42,6 @@ module Masamune::Transform
|
|
44
42
|
operators << define_table(fact)
|
45
43
|
end
|
46
44
|
|
47
|
-
context.events.each do |_, event|
|
48
|
-
operators << define_event_view(event)
|
49
|
-
end
|
50
|
-
|
51
45
|
operators += context.extra(:post)
|
52
46
|
|
53
47
|
Operator.new __method__, *operators, source: context
|
@@ -22,13 +22,14 @@
|
|
22
22
|
|
23
23
|
CREATE TABLE IF NOT EXISTS <%= target.name %>
|
24
24
|
(
|
25
|
-
<%- target.
|
26
|
-
<%= column.
|
25
|
+
<%- target.defined_columns.each do |column, last| -%>
|
26
|
+
<%= column.as_hql %><%= ',' unless last %>
|
27
27
|
<%- end -%>
|
28
|
-
<%- target.measures.each do |_, measure| -%>
|
29
|
-
<%= measure.name %> <%= measure.hql_type %>,
|
30
|
-
<%- end -%>
|
31
|
-
<%= target.time_key.name %> <%= target.time_key.hql_type %>
|
32
28
|
)
|
29
|
+
<%- if target.partition_by -%>
|
33
30
|
PARTITIONED BY (<%= target.partition_by %>)
|
31
|
+
<%- end -%>
|
32
|
+
<%- if target.properties[:format] == :tsv -%>
|
33
|
+
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
34
|
+
<%- end -%>
|
34
35
|
TBLPROPERTIES ('serialization.null.format' = '');
|
@@ -20,32 +20,14 @@
|
|
20
20
|
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
21
|
-- THE SOFTWARE.
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
type,
|
27
|
-
<%- target.view_columns.each do |value| -%>
|
28
|
-
<%= value %>,
|
23
|
+
SELECT
|
24
|
+
<%- target.select_columns(columns).each do |column, last| -%>
|
25
|
+
<%= column %><%= ',' unless last %>
|
29
26
|
<%- end -%>
|
30
|
-
delta,
|
31
|
-
created_at,
|
32
|
-
y, m, d ,h
|
33
|
-
) PARTITIONED ON (y, m, d, h) AS
|
34
|
-
SELECT DISTINCT
|
35
|
-
uuid,
|
36
|
-
type,
|
37
|
-
<%- target.view_values.each do |value| -%>
|
38
|
-
<%= value %>,
|
39
|
-
<%- end -%>
|
40
|
-
IF(type = '<%= target.update_type %>', 1, 0) AS delta,
|
41
|
-
ctime_iso8601 AS created_at,
|
42
|
-
y, m, d ,h
|
43
27
|
FROM
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
type = '<%= target.update_type %>' OR
|
50
|
-
type = '<%= target.delete_type %>'
|
28
|
+
<%= target.name %>
|
29
|
+
ORDER BY
|
30
|
+
<%- target.order_by_columns(order_by).each do |column, last| -%>
|
31
|
+
<%= column %><%= ',' unless last %>
|
32
|
+
<%- end -%>
|
51
33
|
;
|