masamune 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +15 -0
- data/bin/masamune-elastic-mapreduce +4 -0
- data/bin/masamune-hive +4 -0
- data/bin/masamune-psql +4 -0
- data/bin/masamune-shell +4 -0
- data/lib/masamune.rb +56 -0
- data/lib/masamune/accumulate.rb +60 -0
- data/lib/masamune/actions.rb +38 -0
- data/lib/masamune/actions/data_flow.rb +131 -0
- data/lib/masamune/actions/date_parse.rb +75 -0
- data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
- data/lib/masamune/actions/execute.rb +52 -0
- data/lib/masamune/actions/filesystem.rb +37 -0
- data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
- data/lib/masamune/actions/hadoop_streaming.rb +41 -0
- data/lib/masamune/actions/hive.rb +74 -0
- data/lib/masamune/actions/postgres.rb +76 -0
- data/lib/masamune/actions/postgres_admin.rb +34 -0
- data/lib/masamune/actions/s3cmd.rb +44 -0
- data/lib/masamune/actions/transform.rb +89 -0
- data/lib/masamune/after_initialize_callbacks.rb +55 -0
- data/lib/masamune/cached_filesystem.rb +110 -0
- data/lib/masamune/commands.rb +37 -0
- data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
- data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
- data/lib/masamune/commands/hadoop_streaming.rb +116 -0
- data/lib/masamune/commands/hive.rb +178 -0
- data/lib/masamune/commands/interactive.rb +37 -0
- data/lib/masamune/commands/postgres.rb +128 -0
- data/lib/masamune/commands/postgres_admin.rb +72 -0
- data/lib/masamune/commands/postgres_common.rb +33 -0
- data/lib/masamune/commands/retry_with_backoff.rb +60 -0
- data/lib/masamune/commands/s3cmd.rb +70 -0
- data/lib/masamune/commands/shell.rb +202 -0
- data/lib/masamune/configuration.rb +195 -0
- data/lib/masamune/data_plan.rb +31 -0
- data/lib/masamune/data_plan/builder.rb +66 -0
- data/lib/masamune/data_plan/elem.rb +190 -0
- data/lib/masamune/data_plan/engine.rb +162 -0
- data/lib/masamune/data_plan/rule.rb +292 -0
- data/lib/masamune/data_plan/set.rb +176 -0
- data/lib/masamune/environment.rb +164 -0
- data/lib/masamune/filesystem.rb +567 -0
- data/lib/masamune/has_environment.rb +40 -0
- data/lib/masamune/helpers.rb +27 -0
- data/lib/masamune/helpers/postgres.rb +84 -0
- data/lib/masamune/io.rb +33 -0
- data/lib/masamune/last_element.rb +53 -0
- data/lib/masamune/method_logger.rb +41 -0
- data/lib/masamune/multi_io.rb +39 -0
- data/lib/masamune/schema.rb +36 -0
- data/lib/masamune/schema/catalog.rb +233 -0
- data/lib/masamune/schema/column.rb +527 -0
- data/lib/masamune/schema/dimension.rb +133 -0
- data/lib/masamune/schema/event.rb +121 -0
- data/lib/masamune/schema/fact.rb +133 -0
- data/lib/masamune/schema/map.rb +265 -0
- data/lib/masamune/schema/row.rb +133 -0
- data/lib/masamune/schema/store.rb +115 -0
- data/lib/masamune/schema/table.rb +308 -0
- data/lib/masamune/schema/table_reference.rb +76 -0
- data/lib/masamune/spec_helper.rb +23 -0
- data/lib/masamune/string_format.rb +34 -0
- data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
- data/lib/masamune/tasks/hive_thor.rb +55 -0
- data/lib/masamune/tasks/postgres_thor.rb +47 -0
- data/lib/masamune/tasks/shell_thor.rb +63 -0
- data/lib/masamune/template.rb +77 -0
- data/lib/masamune/thor.rb +186 -0
- data/lib/masamune/thor_loader.rb +38 -0
- data/lib/masamune/topological_hash.rb +34 -0
- data/lib/masamune/transform.rb +47 -0
- data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
- data/lib/masamune/transform/bulk_upsert.rb +52 -0
- data/lib/masamune/transform/consolidate_dimension.rb +54 -0
- data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
- data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
- data/lib/masamune/transform/define_event_view.hql.erb +51 -0
- data/lib/masamune/transform/define_event_view.rb +60 -0
- data/lib/masamune/transform/define_index.psql.erb +34 -0
- data/lib/masamune/transform/define_schema.hql.erb +23 -0
- data/lib/masamune/transform/define_schema.psql.erb +79 -0
- data/lib/masamune/transform/define_schema.rb +56 -0
- data/lib/masamune/transform/define_table.hql.erb +34 -0
- data/lib/masamune/transform/define_table.psql.erb +95 -0
- data/lib/masamune/transform/define_table.rb +40 -0
- data/lib/masamune/transform/define_unique.psql.erb +30 -0
- data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
- data/lib/masamune/transform/insert_reference_values.rb +64 -0
- data/lib/masamune/transform/load_dimension.rb +47 -0
- data/lib/masamune/transform/load_fact.rb +45 -0
- data/lib/masamune/transform/operator.rb +96 -0
- data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
- data/lib/masamune/transform/relabel_dimension.rb +39 -0
- data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
- data/lib/masamune/transform/rollup_fact.rb +149 -0
- data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
- data/lib/masamune/transform/snapshot_dimension.rb +74 -0
- data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
- data/lib/masamune/transform/stage_dimension.rb +83 -0
- data/lib/masamune/transform/stage_fact.psql.erb +80 -0
- data/lib/masamune/transform/stage_fact.rb +111 -0
- data/lib/masamune/version.rb +25 -0
- data/spec/fixtures/aggregate.sql.erb +25 -0
- data/spec/fixtures/comment.sql.erb +27 -0
- data/spec/fixtures/invalid.sql.erb +23 -0
- data/spec/fixtures/relative.sql.erb +23 -0
- data/spec/fixtures/simple.sql.erb +28 -0
- data/spec/fixtures/whitespace.sql.erb +30 -0
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
- data/spec/masamune/actions/execute_spec.rb +50 -0
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
- data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
- data/spec/masamune/actions/hive_spec.rb +117 -0
- data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
- data/spec/masamune/actions/postgres_spec.rb +134 -0
- data/spec/masamune/actions/s3cmd_spec.rb +44 -0
- data/spec/masamune/actions/transform_spec.rb +144 -0
- data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
- data/spec/masamune/cached_filesystem_spec.rb +167 -0
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
- data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
- data/spec/masamune/commands/hive_spec.rb +117 -0
- data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
- data/spec/masamune/commands/postgres_spec.rb +100 -0
- data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
- data/spec/masamune/commands/s3cmd_spec.rb +50 -0
- data/spec/masamune/commands/shell_spec.rb +101 -0
- data/spec/masamune/configuration_spec.rb +102 -0
- data/spec/masamune/data_plan/builder_spec.rb +91 -0
- data/spec/masamune/data_plan/elem_spec.rb +102 -0
- data/spec/masamune/data_plan/engine_spec.rb +356 -0
- data/spec/masamune/data_plan/rule_spec.rb +407 -0
- data/spec/masamune/data_plan/set_spec.rb +517 -0
- data/spec/masamune/environment_spec.rb +65 -0
- data/spec/masamune/filesystem_spec.rb +1421 -0
- data/spec/masamune/helpers/postgres_spec.rb +95 -0
- data/spec/masamune/schema/catalog_spec.rb +613 -0
- data/spec/masamune/schema/column_spec.rb +696 -0
- data/spec/masamune/schema/dimension_spec.rb +137 -0
- data/spec/masamune/schema/event_spec.rb +75 -0
- data/spec/masamune/schema/fact_spec.rb +117 -0
- data/spec/masamune/schema/map_spec.rb +593 -0
- data/spec/masamune/schema/row_spec.rb +28 -0
- data/spec/masamune/schema/store_spec.rb +49 -0
- data/spec/masamune/schema/table_spec.rb +395 -0
- data/spec/masamune/string_format_spec.rb +60 -0
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
- data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
- data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
- data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
- data/spec/masamune/template_spec.rb +77 -0
- data/spec/masamune/thor_spec.rb +238 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
- data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
- data/spec/masamune/transform/define_event_view_spec.rb +84 -0
- data/spec/masamune/transform/define_schema_spec.rb +83 -0
- data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
- data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
- data/spec/masamune/transform/define_table.table_spec.rb +525 -0
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
- data/spec/masamune/transform/load_dimension_spec.rb +76 -0
- data/spec/masamune/transform/load_fact_spec.rb +89 -0
- data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
- data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
- data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
- data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
- data/spec/masamune/transform/stage_fact_spec.rb +204 -0
- data/spec/masamune_spec.rb +32 -0
- data/spec/spec_helper.rb +41 -0
- data/spec/support/masamune/example_group.rb +36 -0
- data/spec/support/masamune/mock_command.rb +99 -0
- data/spec/support/masamune/mock_delegate.rb +51 -0
- data/spec/support/masamune/mock_filesystem.rb +96 -0
- data/spec/support/masamune/thor_mute.rb +35 -0
- data/spec/support/rspec/example/action_example_group.rb +34 -0
- data/spec/support/rspec/example/task_example_group.rb +80 -0
- data/spec/support/rspec/example/transform_example_group.rb +36 -0
- data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
- metadata +462 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Dimension < Table
|
25
|
+
def initialize(opts = {})
|
26
|
+
super
|
27
|
+
initialize_dimension_columns!
|
28
|
+
end
|
29
|
+
|
30
|
+
def suffix
|
31
|
+
suffix = case type
|
32
|
+
when :mini
|
33
|
+
'type'
|
34
|
+
when :one, :two, :four, :date
|
35
|
+
'dimension'
|
36
|
+
else
|
37
|
+
type.to_s
|
38
|
+
end
|
39
|
+
parent ? [parent.suffix, suffix].compact.join('_') : suffix
|
40
|
+
end
|
41
|
+
|
42
|
+
def start_key
|
43
|
+
columns.values.detect { |column| column.id == :start_at }
|
44
|
+
end
|
45
|
+
|
46
|
+
def end_key
|
47
|
+
columns.values.detect { |column| column.id == :end_at }
|
48
|
+
end
|
49
|
+
|
50
|
+
def version_key
|
51
|
+
columns.values.detect { |column| column.id == :version }
|
52
|
+
end
|
53
|
+
|
54
|
+
def ledger_table
|
55
|
+
@ledger_table ||= self.class.new(id: id, type: :ledger, store: store, columns: ledger_table_columns, references: references.values, parent: self)
|
56
|
+
end
|
57
|
+
|
58
|
+
def reserved_column_ids
|
59
|
+
case type
|
60
|
+
when :one, :date
|
61
|
+
[:last_modified_at]
|
62
|
+
when :two
|
63
|
+
[:start_at, :end_at, :version, :last_modified_at]
|
64
|
+
when :four
|
65
|
+
[:parent_id, :record_id, :start_at, :end_at, :version, :last_modified_at]
|
66
|
+
when :ledger
|
67
|
+
[:source_kind, :source_uuid, :start_at, :last_modified_at, :delta]
|
68
|
+
else
|
69
|
+
super
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def ledger_table_columns
|
76
|
+
columns.values.map do |column|
|
77
|
+
next if column.surrogate_key
|
78
|
+
next if reserved_column_ids.include?(column.id)
|
79
|
+
|
80
|
+
if column.type == :key_value
|
81
|
+
column_now, column_was = column.dup, column.dup
|
82
|
+
column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
|
83
|
+
column_now.strict, column_was.strict = false, false
|
84
|
+
[column_now, column_was]
|
85
|
+
else
|
86
|
+
column.dup.tap do |column_copy|
|
87
|
+
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end.flatten
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize_surrogate_key_column!
|
94
|
+
case type
|
95
|
+
when :mini, :one, :two, :four, :ledger, :date
|
96
|
+
initialize_column! id: 'id', type: :integer, surrogate_key: true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize_dimension_columns!
|
101
|
+
case type
|
102
|
+
when :one, :date
|
103
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
104
|
+
when :two
|
105
|
+
initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
|
106
|
+
initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
|
107
|
+
initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
|
108
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
109
|
+
when :four
|
110
|
+
children << ledger_table
|
111
|
+
# FIXME derive type from from parent
|
112
|
+
initialize_column! id: 'parent_id', type: :integer, null: true, reference: ledger_table
|
113
|
+
initialize_column! id: 'record_id', type: :integer, null: true, reference: ledger_table
|
114
|
+
initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
|
115
|
+
initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
|
116
|
+
initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
|
117
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
118
|
+
when :ledger
|
119
|
+
initialize_column! id: 'source_kind', type: :string, unique: 'natural'
|
120
|
+
initialize_column! id: 'source_uuid', type: :string, unique: 'natural'
|
121
|
+
initialize_column! id: 'start_at', type: :timestamp, index: true, unique: 'natural'
|
122
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
123
|
+
initialize_column! id: 'delta', type: :integer
|
124
|
+
when :stage
|
125
|
+
if inherit
|
126
|
+
parent.reserved_columns.each do |_, column|
|
127
|
+
initialize_column! column.as_hash
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Event
|
25
|
+
class Attribute
|
26
|
+
attr_accessor :id
|
27
|
+
attr_accessor :type
|
28
|
+
attr_accessor :array
|
29
|
+
attr_accessor :immutable
|
30
|
+
|
31
|
+
def initialize(opts = {})
|
32
|
+
opts.symbolize_keys!
|
33
|
+
raise ArgumentError, 'required parameter id: missing' unless opts.key?(:id)
|
34
|
+
self.id = opts[:id].to_sym
|
35
|
+
self.type = opts.fetch(:type, :integer).to_sym
|
36
|
+
self.array = opts.fetch(:array, false)
|
37
|
+
self.immutable = opts.fetch(:immutable, false)
|
38
|
+
end
|
39
|
+
|
40
|
+
def as_columns(event, &block)
|
41
|
+
column_ids = immutable ? [id] : [:"#{id}_now", :"#{id}_was"]
|
42
|
+
column_ids.each do |id|
|
43
|
+
yield [id, Column.new(id: id, type: type, array: array, parent: event)]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
DEFAULT_ATTRIBUTES =
|
49
|
+
{
|
50
|
+
id: nil,
|
51
|
+
store: nil,
|
52
|
+
attributes: [],
|
53
|
+
debug: false
|
54
|
+
}
|
55
|
+
|
56
|
+
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
57
|
+
attr_accessor attr
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize(opts = {})
|
61
|
+
opts.symbolize_keys!
|
62
|
+
raise ArgumentError, 'required parameter id: missing' unless opts.key?(:id)
|
63
|
+
DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
|
64
|
+
public_send("#{name}=", value)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def name
|
69
|
+
@name || [id, suffix].compact.join('_')
|
70
|
+
end
|
71
|
+
|
72
|
+
def suffix
|
73
|
+
'event'
|
74
|
+
end
|
75
|
+
|
76
|
+
def attributes=(attributes)
|
77
|
+
@attributes = {}
|
78
|
+
attributes.each do |attribute|
|
79
|
+
@attributes[attribute.id] = attribute
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def columns
|
84
|
+
@columns ||= {}.tap do |columns|
|
85
|
+
columns[:uuid] = Column.new id: :uuid, type: :uuid, parent: self
|
86
|
+
columns[:type] = Column.new id: :type, type: :string, parent: self
|
87
|
+
attributes.map do |_, attribute|
|
88
|
+
attribute.as_columns(self) do |id, column|
|
89
|
+
columns[id] = column
|
90
|
+
end
|
91
|
+
end
|
92
|
+
columns[:delta] = Column.new id: :delta, type: :integer, parent: self
|
93
|
+
columns[:created_at] = Column.new id: :created_at, type: :timestamp, parent: self
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def reserved_column_ids
|
98
|
+
@reserved_column_ids ||= [:uuid, :type, :delta, :created_at]
|
99
|
+
end
|
100
|
+
|
101
|
+
def unreserved_columns
|
102
|
+
columns.reject { |_, column| reserved_column_ids.include?(column.id) }
|
103
|
+
end
|
104
|
+
|
105
|
+
def create_type
|
106
|
+
@create_type ||= "#{id}_create"
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_type
|
110
|
+
@update_type ||= "#{id}_update"
|
111
|
+
end
|
112
|
+
|
113
|
+
def delete_type
|
114
|
+
@delete_type ||= "#{id}_delete"
|
115
|
+
end
|
116
|
+
|
117
|
+
def dereference_column_name(name)
|
118
|
+
columns[name.to_sym]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Fact < Table
|
25
|
+
SUPPORTED_GRAINS = [:transaction, :hourly, :daily, :monthly]
|
26
|
+
|
27
|
+
attr_accessor :grain
|
28
|
+
attr_accessor :partition
|
29
|
+
attr_accessor :range
|
30
|
+
|
31
|
+
def initialize(opts = {})
|
32
|
+
opts.symbolize_keys!
|
33
|
+
self.grain = opts.delete(:grain)
|
34
|
+
@partition = opts.delete(:partition)
|
35
|
+
super opts.reverse_merge(type: :fact)
|
36
|
+
initialize_fact_columns!
|
37
|
+
foreign_key_columns.each do |column|
|
38
|
+
column.index << column.name
|
39
|
+
end
|
40
|
+
time_key.index << time_key.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def id
|
44
|
+
[@id, grain].compact.join('_').to_sym
|
45
|
+
end
|
46
|
+
|
47
|
+
def grain=(grain = nil)
|
48
|
+
return unless grain
|
49
|
+
raise ArgumentError, "unknown grain '#{grain}'" unless SUPPORTED_GRAINS.include?(grain.to_sym)
|
50
|
+
@grain = grain.to_sym
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix
|
54
|
+
inherited = super
|
55
|
+
[*inherited.split('_'), range.try(:suffix)].compact.uniq.join('_')
|
56
|
+
end
|
57
|
+
|
58
|
+
def date_column
|
59
|
+
columns.select { |_, column| column && column.reference && column.reference.type == :date }.values.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def time_key
|
63
|
+
columns.values.detect { |column| column.id == :time_key }
|
64
|
+
end
|
65
|
+
|
66
|
+
def stage_table(*a)
|
67
|
+
super.tap do |stage|
|
68
|
+
stage.id = @id
|
69
|
+
stage.store = store
|
70
|
+
stage.range = range
|
71
|
+
stage.grain = grain
|
72
|
+
stage.columns.each do |_, column|
|
73
|
+
column.unique = false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def partition_table(date)
|
79
|
+
partition_range = partition_rule.bind_date(date)
|
80
|
+
@partition_tables ||= {}
|
81
|
+
@partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
|
82
|
+
end
|
83
|
+
|
84
|
+
def partitions
|
85
|
+
columns.select { |_, column| column.partition }
|
86
|
+
end
|
87
|
+
|
88
|
+
def measures
|
89
|
+
columns.select { |_, column| column.measure }
|
90
|
+
end
|
91
|
+
|
92
|
+
def constraints
|
93
|
+
return unless range
|
94
|
+
"CHECK (time_key >= #{range.start_time.to_i} AND time_key < #{range.stop_time.to_i})"
|
95
|
+
end
|
96
|
+
|
97
|
+
def reserved_column_ids
|
98
|
+
case type
|
99
|
+
when :fact
|
100
|
+
[:time_key, :last_modified_at]
|
101
|
+
else
|
102
|
+
super
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def initialize_surrogate_key_column!
|
109
|
+
end
|
110
|
+
|
111
|
+
def initialize_fact_columns!
|
112
|
+
case type
|
113
|
+
when :fact
|
114
|
+
initialize_column! id: 'time_key', type: :integer, index: true
|
115
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()' unless store.type == :hive
|
116
|
+
when :stage
|
117
|
+
if inherit
|
118
|
+
parent.reserved_columns.each do |_, column|
|
119
|
+
initialize_column! column.as_hash
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def partition_rule
|
126
|
+
@partition_rule ||= Masamune::DataPlan::Rule.new(nil, :tmp, :target, table: name, partition: @partition)
|
127
|
+
end
|
128
|
+
|
129
|
+
def partition_table_columns
|
130
|
+
unreserved_columns.map { |_, column| column.dup }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'csv'
|
24
|
+
|
25
|
+
module Masamune::Schema
|
26
|
+
class Map
|
27
|
+
class JSONEncoder < SimpleDelegator
|
28
|
+
def initialize(io, store)
|
29
|
+
super io
|
30
|
+
@store = store
|
31
|
+
end
|
32
|
+
|
33
|
+
def gets(*a)
|
34
|
+
line = __getobj__.gets(*a)
|
35
|
+
return unless line
|
36
|
+
return line if skip?
|
37
|
+
encode(line, separator).join(separator)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def skip?
|
43
|
+
@store.json_encoding == :quoted
|
44
|
+
end
|
45
|
+
|
46
|
+
def encode(line, separator)
|
47
|
+
fields = []
|
48
|
+
buffer = ''
|
49
|
+
nested = false
|
50
|
+
line.strip.each_char do |char|
|
51
|
+
case char
|
52
|
+
when '{'
|
53
|
+
buffer << char
|
54
|
+
nested = true
|
55
|
+
when '}'
|
56
|
+
buffer << char
|
57
|
+
nested = false
|
58
|
+
when separator
|
59
|
+
if nested
|
60
|
+
buffer << char
|
61
|
+
else
|
62
|
+
fields << quote(buffer)
|
63
|
+
buffer = ''
|
64
|
+
end
|
65
|
+
else
|
66
|
+
buffer << char
|
67
|
+
end
|
68
|
+
end
|
69
|
+
fields << quote(buffer)
|
70
|
+
fields.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def quote(buffer)
|
74
|
+
return buffer if buffer =~ /\A".*"\z/
|
75
|
+
%Q{"#{buffer.gsub('"', '""')}"}
|
76
|
+
end
|
77
|
+
|
78
|
+
def separator
|
79
|
+
@separator ||= (@store.format == :tsv ? "\t" : ',')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class Buffer
|
84
|
+
extend Forwardable
|
85
|
+
|
86
|
+
def_delegators :@io, :flush, :path
|
87
|
+
|
88
|
+
def initialize(table, options = {})
|
89
|
+
@table = table
|
90
|
+
@store = table.store
|
91
|
+
@lines = 0
|
92
|
+
@options = options
|
93
|
+
end
|
94
|
+
|
95
|
+
def bind(io)
|
96
|
+
@io = io.set_encoding('binary', 'UTF-8', undef: :replace)
|
97
|
+
@csv = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def each(&block)
|
101
|
+
raise 'must call Buffer#bind first' unless @io
|
102
|
+
CSV.parse(JSONEncoder.new(@io, @store), options.merge(headers: @store.headers || @table.columns.keys)) do |data|
|
103
|
+
next if data.to_s =~ /\A#/
|
104
|
+
yield safe_row(data)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def append(data)
|
109
|
+
raise 'must call Buffer#bind first' unless @io
|
110
|
+
row = Masamune::Schema::Row.new(parent: @table, values: data.to_hash)
|
111
|
+
write_headers = @store.headers && @lines < 1
|
112
|
+
@csv ||= CSV.new(@io, options.merge(headers: row.headers, write_headers: write_headers))
|
113
|
+
if row.missing_required_columns.any?
|
114
|
+
missing_required_column_names = row.missing_required_columns.map(&:name)
|
115
|
+
@store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(',')}', skipping")
|
116
|
+
else
|
117
|
+
@csv << row.serialize if append?(row.serialize)
|
118
|
+
end
|
119
|
+
@lines += 1
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def options
|
125
|
+
{skip_blanks: true}.tap do | opts|
|
126
|
+
opts[:col_sep] = "\t" if @store.format == :tsv
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def safe_row(data)
|
131
|
+
row = Masamune::Schema::Row.new(parent: @table, values: data.to_hash, strict: false)
|
132
|
+
row.to_hash
|
133
|
+
rescue
|
134
|
+
@store.logger.warn("failed to parse '#{data.to_hash}' for #{@table.name}, skipping")
|
135
|
+
end
|
136
|
+
|
137
|
+
def append?(elem)
|
138
|
+
return true unless @options[:distinct]
|
139
|
+
@seen ||= Set.new
|
140
|
+
@seen.add?(elem)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
DEFAULT_ATTRIBUTES =
|
145
|
+
{
|
146
|
+
source: nil,
|
147
|
+
target: nil,
|
148
|
+
columns: nil,
|
149
|
+
store: nil,
|
150
|
+
function: ->(row) { row },
|
151
|
+
distinct: false,
|
152
|
+
debug: false
|
153
|
+
}
|
154
|
+
|
155
|
+
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
156
|
+
attr_accessor attr
|
157
|
+
end
|
158
|
+
|
159
|
+
def initialize(opts = {})
|
160
|
+
opts.symbolize_keys!
|
161
|
+
raise ArgumentError, 'required parameter source: missing' unless opts.key?(:source)
|
162
|
+
raise ArgumentError, 'required parameter target: missing' unless opts.key?(:target)
|
163
|
+
DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
|
164
|
+
public_send("#{name}=", value)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def source=(source)
|
169
|
+
@source = source
|
170
|
+
end
|
171
|
+
|
172
|
+
# FIXME: avoid implict conversions
|
173
|
+
def target=(target)
|
174
|
+
@target = target.type == :four ? target.ledger_table : target
|
175
|
+
end
|
176
|
+
|
177
|
+
def intermediate_columns
|
178
|
+
output = function.call(default_row(source.columns))
|
179
|
+
example = Array.wrap(output).first
|
180
|
+
raise ArgumentError, "function for map between '#{source.name}' and '#{target.name}' does not return output for default input" unless example
|
181
|
+
example.keys
|
182
|
+
end
|
183
|
+
|
184
|
+
def intermediate
|
185
|
+
target.stage_table(columns: columns || intermediate_columns, inherit: false)
|
186
|
+
end
|
187
|
+
|
188
|
+
def apply(input_files, output_file)
|
189
|
+
input_buffer = Buffer.new(source)
|
190
|
+
output_buffer = Buffer.new(intermediate, distinct: distinct)
|
191
|
+
self.class.convert_files(input_files).each do |input_file|
|
192
|
+
open_stream(input_file, 'r') do |input_stream|
|
193
|
+
input_buffer.bind(input_stream)
|
194
|
+
open_stream(output_file, 'a+') do |output_stream|
|
195
|
+
output_buffer.bind(output_stream)
|
196
|
+
apply_buffer(input_buffer, output_buffer)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
intermediate
|
201
|
+
end
|
202
|
+
|
203
|
+
def open_stream(file, mode, &block)
|
204
|
+
case file
|
205
|
+
when IO, StringIO
|
206
|
+
file.flush
|
207
|
+
yield file
|
208
|
+
when String, Tempfile
|
209
|
+
File.open(file, mode) do |io|
|
210
|
+
yield io
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class << self
|
216
|
+
def convert_file(file)
|
217
|
+
if file.respond_to?(:path)
|
218
|
+
file.flush if file.respond_to?(:flush) && file.respond_to?(:open?) && file.open?
|
219
|
+
file.path
|
220
|
+
else
|
221
|
+
file
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def convert_files(files)
|
226
|
+
case files
|
227
|
+
when Set
|
228
|
+
files.map { |file| convert_file(file) }.to_a
|
229
|
+
when Array
|
230
|
+
files.map { |file| convert_file(file) }.to_a
|
231
|
+
else
|
232
|
+
[convert_file(files)]
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
private
|
238
|
+
|
239
|
+
def default_row(columns)
|
240
|
+
{}.tap do |row|
|
241
|
+
columns.each do |_, column|
|
242
|
+
row[column.name] = column.default_ruby_value
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def apply_buffer(input_buffer, output_buffer)
|
248
|
+
input_buffer.each do |input|
|
249
|
+
safe_apply_function(input) do |output|
|
250
|
+
output_buffer.append output
|
251
|
+
end
|
252
|
+
end
|
253
|
+
output_buffer.flush
|
254
|
+
end
|
255
|
+
|
256
|
+
def safe_apply_function(input, &block)
|
257
|
+
return unless input
|
258
|
+
Array.wrap(function.call(input)).each do |output|
|
259
|
+
yield output
|
260
|
+
end
|
261
|
+
rescue
|
262
|
+
@store.logger.warn("failed to process '#{input}' for #{target.name}, skipping")
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|