masamune 0.11.9 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/masamune/actions/transform.rb +31 -16
- data/lib/masamune/schema.rb +0 -1
- data/lib/masamune/schema/catalog.rb +2 -10
- data/lib/masamune/schema/column.rb +16 -30
- data/lib/masamune/schema/dimension.rb +2 -9
- data/lib/masamune/schema/fact.rb +0 -4
- data/lib/masamune/schema/map.rb +1 -1
- data/lib/masamune/schema/row.rb +3 -3
- data/lib/masamune/schema/store.rb +1 -3
- data/lib/masamune/schema/table.rb +28 -2
- data/lib/masamune/transform.rb +0 -1
- data/lib/masamune/transform/define_schema.rb +0 -6
- data/lib/masamune/transform/define_table.hql.erb +7 -6
- data/lib/masamune/transform/define_table.rb +1 -0
- data/lib/masamune/transform/{define_event_view.hql.erb → denormalize_table.hql.erb} +8 -26
- data/lib/masamune/transform/denormalize_table.psql.erb +1 -1
- data/lib/masamune/transform/denormalize_table.rb +13 -4
- data/lib/masamune/transform/snapshot_dimension.rb +1 -1
- data/lib/masamune/transform/stage_fact.rb +1 -1
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/actions/transform_spec.rb +50 -18
- data/spec/masamune/schema/catalog_spec.rb +0 -53
- data/spec/masamune/schema/column_spec.rb +9 -41
- data/spec/masamune/schema/fact_spec.rb +3 -1
- data/spec/masamune/schema/map_spec.rb +187 -189
- data/spec/masamune/schema/table_spec.rb +8 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +4 -6
- data/spec/masamune/transform/define_schema_spec.rb +5 -6
- data/spec/masamune/transform/define_table.dimension_spec.rb +101 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +2 -0
- data/spec/masamune/transform/denormalize_table_spec.rb +170 -14
- data/spec/masamune/transform/snapshot_dimension_spec.rb +1 -1
- data/spec/masamune/transform/stage_dimension_spec.rb +3 -3
- metadata +3 -9
- data/lib/masamune/schema/event.rb +0 -121
- data/lib/masamune/transform/define_event_view.rb +0 -60
- data/spec/masamune/schema/event_spec.rb +0 -75
- data/spec/masamune/transform/define_event_view_spec.rb +0 -84
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a12e8b8ce6cb92c5d551e9024053d50ff59f3cd6
|
4
|
+
data.tar.gz: 72bdc252c7b9b6f9e787b211eae46bc3edb69358
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96be3850b76ec158f8a13be46ec2a5108c9ec01f99987f0a9414602b4072b768e072b72906a28ac9b8aa101940ece9e0f1255c2bcf25bc6cdf2726d9c7783c36
|
7
|
+
data.tar.gz: 31863312d8fdbbad89e1e0a57fcbbe51b754feca838891d1f02cefe09f547da431f6089e36e6dafc56aa20cabea8a4d0cec432b0c0b07e11c496714668388942
|
@@ -49,21 +49,10 @@ module Masamune::Actions
|
|
49
49
|
FILE_MODE = 0777 - File.umask
|
50
50
|
|
51
51
|
def load_dimension(source_files, source, target)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
if source.respond_to?(:map) and map = source.map(to: target)
|
56
|
-
result = map.apply(source_files, output)
|
57
|
-
else
|
58
|
-
output = source_files
|
59
|
-
result = source
|
52
|
+
optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
|
53
|
+
transform = Wrapper.load_dimension(intermediate_files, intermediate, target)
|
54
|
+
postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
|
60
55
|
end
|
61
|
-
|
62
|
-
transform = Wrapper.load_dimension(output, result, target)
|
63
|
-
logger.debug(File.read(output)) if (source.debug || map.debug)
|
64
|
-
postgres file: transform.to_file, debug: (source.debug || target.debug || map.debug)
|
65
|
-
ensure
|
66
|
-
output.unlink
|
67
56
|
end
|
68
57
|
|
69
58
|
def consolidate_dimension(target)
|
@@ -77,13 +66,39 @@ module Masamune::Actions
|
|
77
66
|
end
|
78
67
|
|
79
68
|
def load_fact(source_files, source, target, date)
|
80
|
-
|
81
|
-
|
69
|
+
optional_apply_map(source_files, source, target) do |intermediate_files, intermediate|
|
70
|
+
transform = Wrapper.load_fact(intermediate_files, intermediate, target, date)
|
71
|
+
postgres file: transform.to_file, debug: (source.debug || target.debug || intermediate.debug)
|
72
|
+
end
|
82
73
|
end
|
83
74
|
|
84
75
|
def rollup_fact(source, target, date)
|
85
76
|
transform = Wrapper.rollup_fact(source, target, date)
|
86
77
|
postgres file: transform.to_file, debug: (source.debug || target.debug)
|
87
78
|
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def optional_apply_map(source_files, source, target, &block)
|
83
|
+
if source.respond_to?(:map) and map = source.map(to: target)
|
84
|
+
apply_map(map, source_files, source, target, &block)
|
85
|
+
else
|
86
|
+
yield source_files, source
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def apply_map(map, source_files, source, target)
|
91
|
+
Tempfile.open('masamune') do |output|
|
92
|
+
begin
|
93
|
+
FileUtils.chmod(FILE_MODE, output.path)
|
94
|
+
result = map.apply(source_files, output)
|
95
|
+
result.debug = map.debug
|
96
|
+
logger.debug(File.read(output)) if (source.debug || result.debug)
|
97
|
+
yield output, result
|
98
|
+
ensure
|
99
|
+
output.unlink
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
88
103
|
end
|
89
104
|
end
|
data/lib/masamune/schema.rb
CHANGED
@@ -173,7 +173,7 @@ module Masamune::Schema
|
|
173
173
|
end
|
174
174
|
|
175
175
|
def file(id, options = {})
|
176
|
-
format_options = options.extract!(:format, :headers)
|
176
|
+
format_options = options.extract!(:format, :headers, :json_encoding)
|
177
177
|
@context.push(options)
|
178
178
|
yield if block_given?
|
179
179
|
store = HasFormat.new(@context, format_options)
|
@@ -182,14 +182,6 @@ module Masamune::Schema
|
|
182
182
|
@context.pop
|
183
183
|
end
|
184
184
|
|
185
|
-
def event(id, options = {})
|
186
|
-
@context.push(options)
|
187
|
-
yield if block_given?
|
188
|
-
@context.events[id] = HasMap.new Masamune::Schema::Event.new(@context.options.merge(id: id))
|
189
|
-
ensure
|
190
|
-
@context.pop
|
191
|
-
end
|
192
|
-
|
193
185
|
def attribute(id, options = {})
|
194
186
|
@context.options[:attributes] << Masamune::Schema::Event::Attribute.new(options.merge(id: id))
|
195
187
|
end
|
@@ -200,7 +192,7 @@ module Masamune::Schema
|
|
200
192
|
raise ArgumentError, "invalid map, from: is missing" unless from && from.try(:id)
|
201
193
|
raise ArgumentError, "invalid map from: '#{from.id}', to: is missing" unless to
|
202
194
|
@context.push(options)
|
203
|
-
@context.options[:function] = block.to_proc
|
195
|
+
@context.options[:function] = block.to_proc if block
|
204
196
|
from.maps[to] ||= Masamune::Schema::Map.new(@context.options.merge(source: from, target: to))
|
205
197
|
ensure
|
206
198
|
@context.pop
|
@@ -133,7 +133,7 @@ module Masamune::Schema
|
|
133
133
|
def compact_name
|
134
134
|
if reference
|
135
135
|
# XXX once columns only reference columns, this can be cleaned up
|
136
|
-
if @id == reference.surrogate_key.reference_name(reference.label)
|
136
|
+
if reference.surrogate_key && @id == reference.surrogate_key.reference_name(reference.label)
|
137
137
|
"#{reference.id}.#{reference.surrogate_key.id}".to_sym
|
138
138
|
else
|
139
139
|
"#{reference.id}.#{@id}".to_sym
|
@@ -184,12 +184,12 @@ module Masamune::Schema
|
|
184
184
|
array_value? ? "#{elem}[]" : elem
|
185
185
|
end
|
186
186
|
|
187
|
-
def hql_type
|
187
|
+
def hql_type(for_surrogate_key = false)
|
188
188
|
elem =
|
189
189
|
case type
|
190
190
|
when :integer
|
191
|
-
'INT'
|
192
|
-
when :string
|
191
|
+
for_surrogate_key ? 'STRING' : 'INT'
|
192
|
+
when :string, :enum, :key_value, :timestamp
|
193
193
|
'STRING'
|
194
194
|
else
|
195
195
|
sql_type
|
@@ -264,7 +264,9 @@ module Masamune::Schema
|
|
264
264
|
when Date, DateTime
|
265
265
|
value.to_time
|
266
266
|
when String
|
267
|
-
if value
|
267
|
+
if value.blank?
|
268
|
+
nil
|
269
|
+
elsif value =~ /\A\d+\z/
|
268
270
|
Time.at(value.to_i)
|
269
271
|
else
|
270
272
|
Time.parse(value)
|
@@ -281,7 +283,7 @@ module Masamune::Schema
|
|
281
283
|
when Hash
|
282
284
|
value
|
283
285
|
when String
|
284
|
-
|
286
|
+
YAML.load(value)
|
285
287
|
when nil
|
286
288
|
{}
|
287
289
|
end
|
@@ -290,7 +292,7 @@ module Masamune::Schema
|
|
290
292
|
when Hash
|
291
293
|
value
|
292
294
|
when String
|
293
|
-
|
295
|
+
JSON.load(value)
|
294
296
|
when nil
|
295
297
|
{}
|
296
298
|
end
|
@@ -366,6 +368,10 @@ module Masamune::Schema
|
|
366
368
|
[name, sql_type(surrogate_key), *sql_constraints, reference_constraint, sql_default].compact.join(' ')
|
367
369
|
end
|
368
370
|
|
371
|
+
def as_hql
|
372
|
+
[name, hql_type(surrogate_key)].compact.join(' ')
|
373
|
+
end
|
374
|
+
|
369
375
|
def as_hash
|
370
376
|
{id: id}.tap do |hash|
|
371
377
|
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
@@ -462,11 +468,9 @@ module Masamune::Schema
|
|
462
468
|
end
|
463
469
|
|
464
470
|
def required_value?
|
465
|
-
if reference
|
466
|
-
|
467
|
-
|
468
|
-
surrogate_key || natural_key || !(null || default)
|
469
|
-
end
|
471
|
+
return false if reference && (reference.null || !reference.default.nil?)
|
472
|
+
return false if null || !default.nil?
|
473
|
+
true
|
470
474
|
end
|
471
475
|
|
472
476
|
private
|
@@ -495,24 +499,6 @@ module Masamune::Schema
|
|
495
499
|
end
|
496
500
|
end
|
497
501
|
|
498
|
-
def ruby_key_value(hash)
|
499
|
-
case sub_type
|
500
|
-
when :boolean
|
501
|
-
Hash[hash.map { |key, value| ruby_boolean_key_value(key, value) }.compact]
|
502
|
-
else
|
503
|
-
hash
|
504
|
-
end
|
505
|
-
end
|
506
|
-
|
507
|
-
def ruby_boolean_key_value(key, value)
|
508
|
-
case value
|
509
|
-
when true, '1', 1
|
510
|
-
[key, true]
|
511
|
-
when false, '0', 0
|
512
|
-
[key, false]
|
513
|
-
end
|
514
|
-
end
|
515
|
-
|
516
502
|
def csv_array(value)
|
517
503
|
case value
|
518
504
|
when Array
|
@@ -77,15 +77,8 @@ module Masamune::Schema
|
|
77
77
|
next if column.surrogate_key
|
78
78
|
next if reserved_column_ids.include?(column.id)
|
79
79
|
|
80
|
-
|
81
|
-
|
82
|
-
column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
|
83
|
-
column_now.strict, column_was.strict = false, false
|
84
|
-
[column_now, column_was]
|
85
|
-
else
|
86
|
-
column.dup.tap do |column_copy|
|
87
|
-
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
88
|
-
end
|
80
|
+
column.dup.tap do |column_copy|
|
81
|
+
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
89
82
|
end
|
90
83
|
end.flatten
|
91
84
|
end
|
data/lib/masamune/schema/fact.rb
CHANGED
@@ -82,10 +82,6 @@ module Masamune::Schema
|
|
82
82
|
@partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
|
83
83
|
end
|
84
84
|
|
85
|
-
def partitions
|
86
|
-
columns.select { |_, column| column.partition }
|
87
|
-
end
|
88
|
-
|
89
85
|
def measures
|
90
86
|
columns.select { |_, column| column.measure }
|
91
87
|
end
|
data/lib/masamune/schema/map.rb
CHANGED
data/lib/masamune/schema/row.rb
CHANGED
@@ -89,7 +89,7 @@ module Masamune::Schema
|
|
89
89
|
end
|
90
90
|
|
91
91
|
def headers
|
92
|
-
|
92
|
+
@columns.map { |_, column| column.name }
|
93
93
|
end
|
94
94
|
|
95
95
|
def serialize
|
@@ -121,8 +121,8 @@ module Masamune::Schema
|
|
121
121
|
values.each do |key, value|
|
122
122
|
next unless key
|
123
123
|
if column = parent.dereference_column_name(key)
|
124
|
-
@columns[column.
|
125
|
-
result[column.
|
124
|
+
@columns[column.compact_name] = column
|
125
|
+
result[column.compact_name] = column.ruby_value(value)
|
126
126
|
elsif strict
|
127
127
|
raise ArgumentError, "#{@values} contains undefined columns #{key}"
|
128
128
|
end
|
@@ -26,7 +26,7 @@ module Masamune::Schema
|
|
26
26
|
class Store
|
27
27
|
include Masamune::HasEnvironment
|
28
28
|
|
29
|
-
SUPPORTED_ATTRIBUTES = %(table dimension fact file
|
29
|
+
SUPPORTED_ATTRIBUTES = %(table dimension fact file)
|
30
30
|
|
31
31
|
DEFAULT_ATTRIBUTES =
|
32
32
|
{
|
@@ -45,7 +45,6 @@ module Masamune::Schema
|
|
45
45
|
attr_accessor :dimensions
|
46
46
|
attr_accessor :facts
|
47
47
|
attr_accessor :files
|
48
|
-
attr_accessor :events
|
49
48
|
attr_accessor :references
|
50
49
|
|
51
50
|
class << self
|
@@ -67,7 +66,6 @@ module Masamune::Schema
|
|
67
66
|
@dimensions = {}.with_indifferent_access
|
68
67
|
@facts = {}.with_indifferent_access
|
69
68
|
@files = {}.with_indifferent_access
|
70
|
-
@events = {}.with_indifferent_access
|
71
69
|
@references = {}.with_indifferent_access
|
72
70
|
@extra = []
|
73
71
|
end
|
@@ -39,7 +39,8 @@ module Masamune::Schema
|
|
39
39
|
columns: {},
|
40
40
|
rows: [],
|
41
41
|
inherit: false,
|
42
|
-
debug: false
|
42
|
+
debug: false,
|
43
|
+
properties: {}
|
43
44
|
}
|
44
45
|
|
45
46
|
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
@@ -111,7 +112,7 @@ module Masamune::Schema
|
|
111
112
|
end
|
112
113
|
|
113
114
|
def defined_columns
|
114
|
-
columns.values
|
115
|
+
columns.values.reject { |column| column.partition }
|
115
116
|
end
|
116
117
|
method_with_last_element :defined_columns
|
117
118
|
|
@@ -154,6 +155,10 @@ module Masamune::Schema
|
|
154
155
|
columns.values.select { | column| column.reference && column.reference.foreign_key }
|
155
156
|
end
|
156
157
|
|
158
|
+
def partitions
|
159
|
+
columns.select { |_, column| column.partition }
|
160
|
+
end
|
161
|
+
|
157
162
|
def insert_rows
|
158
163
|
rows.select { |row| row.insert_values.any? }
|
159
164
|
end
|
@@ -174,6 +179,27 @@ module Masamune::Schema
|
|
174
179
|
columns.reject { |_, column| reserved_column_ids.include?(column.id) }
|
175
180
|
end
|
176
181
|
|
182
|
+
def denormalized_columns
|
183
|
+
columns.map do |_, column|
|
184
|
+
next if column.surrogate_key || column.ignore
|
185
|
+
if column.reference
|
186
|
+
column.reference.natural_keys.any? ? column.reference.natural_keys : column.reference.denormalized_columns
|
187
|
+
else
|
188
|
+
column
|
189
|
+
end
|
190
|
+
end.flatten.compact
|
191
|
+
end
|
192
|
+
|
193
|
+
def denormalized_column_names
|
194
|
+
denormalized_columns.map do |column|
|
195
|
+
if column.parent == self
|
196
|
+
column.name.to_s
|
197
|
+
else
|
198
|
+
[column.parent.id, column.name].join('.')
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
177
203
|
def stage_table(options = {})
|
178
204
|
selected = options[:columns] if options[:columns]
|
179
205
|
selected ||= options[:target].columns.values.map(&:compact_name) if options[:target]
|
data/lib/masamune/transform.rb
CHANGED
@@ -21,12 +21,10 @@
|
|
21
21
|
# THE SOFTWARE.
|
22
22
|
|
23
23
|
require 'masamune/transform/define_table'
|
24
|
-
require 'masamune/transform/define_event_view'
|
25
24
|
|
26
25
|
module Masamune::Transform
|
27
26
|
module DefineSchema
|
28
27
|
include DefineTable
|
29
|
-
include DefineEventView
|
30
28
|
|
31
29
|
extend ActiveSupport::Concern
|
32
30
|
|
@@ -44,10 +42,6 @@ module Masamune::Transform
|
|
44
42
|
operators << define_table(fact)
|
45
43
|
end
|
46
44
|
|
47
|
-
context.events.each do |_, event|
|
48
|
-
operators << define_event_view(event)
|
49
|
-
end
|
50
|
-
|
51
45
|
operators += context.extra(:post)
|
52
46
|
|
53
47
|
Operator.new __method__, *operators, source: context
|
@@ -22,13 +22,14 @@
|
|
22
22
|
|
23
23
|
CREATE TABLE IF NOT EXISTS <%= target.name %>
|
24
24
|
(
|
25
|
-
<%- target.
|
26
|
-
<%= column.
|
25
|
+
<%- target.defined_columns.each do |column, last| -%>
|
26
|
+
<%= column.as_hql %><%= ',' unless last %>
|
27
27
|
<%- end -%>
|
28
|
-
<%- target.measures.each do |_, measure| -%>
|
29
|
-
<%= measure.name %> <%= measure.hql_type %>,
|
30
|
-
<%- end -%>
|
31
|
-
<%= target.time_key.name %> <%= target.time_key.hql_type %>
|
32
28
|
)
|
29
|
+
<%- if target.partition_by -%>
|
33
30
|
PARTITIONED BY (<%= target.partition_by %>)
|
31
|
+
<%- end -%>
|
32
|
+
<%- if target.properties[:format] == :tsv -%>
|
33
|
+
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
|
34
|
+
<%- end -%>
|
34
35
|
TBLPROPERTIES ('serialization.null.format' = '');
|
@@ -20,32 +20,14 @@
|
|
20
20
|
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
21
|
-- THE SOFTWARE.
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
type,
|
27
|
-
<%- target.view_columns.each do |value| -%>
|
28
|
-
<%= value %>,
|
23
|
+
SELECT
|
24
|
+
<%- target.select_columns(columns).each do |column, last| -%>
|
25
|
+
<%= column %><%= ',' unless last %>
|
29
26
|
<%- end -%>
|
30
|
-
delta,
|
31
|
-
created_at,
|
32
|
-
y, m, d ,h
|
33
|
-
) PARTITIONED ON (y, m, d, h) AS
|
34
|
-
SELECT DISTINCT
|
35
|
-
uuid,
|
36
|
-
type,
|
37
|
-
<%- target.view_values.each do |value| -%>
|
38
|
-
<%= value %>,
|
39
|
-
<%- end -%>
|
40
|
-
IF(type = '<%= target.update_type %>', 1, 0) AS delta,
|
41
|
-
ctime_iso8601 AS created_at,
|
42
|
-
y, m, d ,h
|
43
27
|
FROM
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
type = '<%= target.update_type %>' OR
|
50
|
-
type = '<%= target.delete_type %>'
|
28
|
+
<%= target.name %>
|
29
|
+
ORDER BY
|
30
|
+
<%- target.order_by_columns(order_by).each do |column, last| -%>
|
31
|
+
<%= column %><%= ',' unless last %>
|
32
|
+
<%- end -%>
|
51
33
|
;
|