masamune 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -1
- data/Rakefile +37 -0
- data/bin/masamune-dump +22 -0
- data/bin/masamune-elastic-mapreduce +22 -0
- data/bin/masamune-hive +22 -0
- data/bin/masamune-psql +22 -0
- data/bin/masamune-shell +22 -0
- data/lib/masamune/cached_filesystem.rb +1 -1
- data/lib/masamune/commands/shell.rb +1 -1
- data/lib/masamune/configuration.rb +5 -7
- data/lib/masamune/data_plan/elem.rb +15 -7
- data/lib/masamune/data_plan/engine.rb +2 -0
- data/lib/masamune/data_plan/rule.rb +16 -5
- data/lib/masamune/data_plan/set.rb +8 -8
- data/lib/masamune/filesystem.rb +12 -6
- data/lib/masamune/schema/catalog.rb +6 -6
- data/lib/masamune/schema/column.rb +1 -1
- data/lib/masamune/schema/map.rb +6 -2
- data/lib/masamune/schema/store.rb +31 -3
- data/lib/masamune/tasks/shell_thor.rb +1 -1
- data/lib/masamune/thor.rb +12 -4
- data/lib/masamune/version.rb +1 -1
- data/lib/masamune.rb +0 -1
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +0 -2
- data/spec/masamune/actions/execute_spec.rb +0 -2
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +0 -2
- data/spec/masamune/actions/hadoop_streaming_spec.rb +0 -2
- data/spec/masamune/actions/hive_spec.rb +0 -2
- data/spec/masamune/actions/invoke_parallel_spec.rb +0 -2
- data/spec/masamune/actions/postgres_admin_spec.rb +0 -2
- data/spec/masamune/actions/postgres_spec.rb +0 -2
- data/spec/masamune/actions/s3cmd_spec.rb +0 -2
- data/spec/masamune/actions/transform_spec.rb +0 -2
- data/spec/masamune/after_initialization_callbacks_spec.rb +0 -2
- data/spec/masamune/cached_filesystem_spec.rb +0 -2
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +0 -2
- data/spec/masamune/commands/hadoop_streaming_spec.rb +0 -2
- data/spec/masamune/commands/hive_spec.rb +0 -2
- data/spec/masamune/commands/postgres_admin_spec.rb +0 -2
- data/spec/masamune/commands/postgres_spec.rb +0 -2
- data/spec/masamune/commands/retry_with_backoff_spec.rb +0 -2
- data/spec/masamune/commands/s3cmd_spec.rb +0 -2
- data/spec/masamune/commands/shell_spec.rb +0 -2
- data/spec/masamune/configuration_spec.rb +12 -2
- data/spec/masamune/data_plan/builder_spec.rb +0 -2
- data/spec/masamune/data_plan/elem_spec.rb +73 -5
- data/spec/masamune/data_plan/engine_spec.rb +0 -2
- data/spec/masamune/data_plan/rule_spec.rb +51 -6
- data/spec/masamune/data_plan/set_spec.rb +2 -5
- data/spec/masamune/environment_spec.rb +0 -2
- data/spec/masamune/filesystem_spec.rb +33 -4
- data/spec/masamune/helpers/postgres_spec.rb +0 -2
- data/spec/masamune/rspec/job_fixture_spec.rb +365 -0
- data/spec/masamune/rspec/shared_example_group_spec.rb +73 -0
- data/spec/masamune/schema/catalog_spec.rb +14 -2
- data/spec/masamune/schema/column_spec.rb +0 -2
- data/spec/masamune/schema/dimension_spec.rb +0 -2
- data/spec/masamune/schema/fact_spec.rb +0 -2
- data/spec/masamune/schema/map_spec.rb +51 -2
- data/spec/masamune/schema/row_spec.rb +0 -2
- data/spec/masamune/schema/store_spec.rb +23 -2
- data/spec/masamune/schema/table_spec.rb +0 -2
- data/spec/masamune/string_format_spec.rb +0 -2
- data/spec/masamune/tasks/dump_thor_spec.rb +0 -3
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +0 -3
- data/spec/masamune/tasks/hive_thor_spec.rb +0 -3
- data/spec/masamune/tasks/postgres_thor_spec.rb +0 -3
- data/spec/masamune/tasks/shell_thor_spec.rb +0 -3
- data/spec/masamune/template_spec.rb +0 -2
- data/spec/masamune/thor_spec.rb +53 -8
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +0 -2
- data/spec/masamune/transform/consolidate_dimension_spec.rb +0 -2
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +0 -2
- data/spec/masamune/transform/define_schema_spec.rb +0 -2
- data/spec/masamune/transform/define_table.dimension_spec.rb +0 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +0 -2
- data/spec/masamune/transform/define_table.table_spec.rb +0 -2
- data/spec/masamune/transform/denormalize_table_spec.rb +0 -2
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +0 -2
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +0 -2
- data/spec/masamune/transform/load_dimension_spec.rb +0 -2
- data/spec/masamune/transform/load_fact_spec.rb +0 -2
- data/spec/masamune/transform/relabel_dimension_spec.rb +0 -2
- data/spec/masamune/transform/rollup_fact_spec.rb +0 -2
- data/spec/masamune/transform/snapshot_dimension_spec.rb +0 -2
- data/spec/masamune/transform/stage_dimension_spec.rb +0 -2
- data/spec/masamune/transform/stage_fact_spec.rb +0 -2
- data/spec/masamune_spec.rb +0 -2
- data/spec/spec_helper.rb +2 -0
- data/spec/support/masamune/job_example_group.rb +62 -0
- data/spec/support/masamune/job_fixture.rb +137 -0
- data/spec/support/masamune/shared_example_group.rb +203 -0
- data/spec/support/masamune/step_example_group.rb +68 -0
- data/spec/support/masamune/step_fixture.rb +91 -0
- data/{lib/masamune/thor_loader.rb → spec/support/masamune/task_example_group.rb} +33 -10
- data/spec/support/rspec/example/action_example_group.rb +1 -1
- metadata +32 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7abad0d40705fd8c85b74398cf3ce198d6e1e0a3
|
4
|
+
data.tar.gz: 049aeb65a25a01b8dca0a9db21d7e543e9eadae5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a8ef99944a527c5d0fde7b4875fc1d6541dd830060372fb47752a8a915f8d6bb7a3c359ad54c2592b2939c1bbc0ac1ea2c486f34c75fb31c7551524ef278963
|
7
|
+
data.tar.gz: 3d44f7583a6048a70010c124b8f8c881163196b285ced0fbfe78cb0bc3858bcd3768bc3b3528f0bd8a8a9043060c8fb33f4a813bb472ffa3a751cd71bd0fbbbb
|
data/README.md
CHANGED
@@ -26,7 +26,7 @@ class ExampleThor < Thor
|
|
26
26
|
# Describe a Data Processing Job
|
27
27
|
desc 'extract_logs', 'Organize log files by YYYY-MM-DD'
|
28
28
|
|
29
|
-
target fs.path(:target_dir, '%Y-%m-%d'
|
29
|
+
target fs.path(:target_dir, '%Y-%m-%d')
|
30
30
|
source fs.path(:source_dir, '%Y%m%d*.log')
|
31
31
|
def extract_logs
|
32
32
|
targets.missing.each do |target|
|
@@ -45,6 +45,15 @@ Execute your dataflow with the goal of processing all data from the start of the
|
|
45
45
|
thor extract_logs --start '1 year ago'
|
46
46
|
```
|
47
47
|
|
48
|
+
Testing
|
49
|
+
---------------
|
50
|
+
```
|
51
|
+
rake spec # Run Rspec unit code examples
|
52
|
+
rake spec:acceptance # Run Rspec acceptance code examples
|
53
|
+
rake spec:all # Run All Rspec code examples
|
54
|
+
rake spec:unit # Run Rspec unit code examples
|
55
|
+
```
|
56
|
+
|
48
57
|
Contributing
|
49
58
|
---------------
|
50
59
|
|
data/Rakefile
CHANGED
@@ -1,4 +1,25 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
2
23
|
|
3
24
|
begin
|
4
25
|
require 'bundler/setup'
|
@@ -10,6 +31,22 @@ Bundler::GemHelper.install_tasks
|
|
10
31
|
|
11
32
|
require 'rspec/core/rake_task'
|
12
33
|
|
34
|
+
desc 'Run Rspec unit code examples'
|
13
35
|
RSpec::Core::RakeTask.new(:spec)
|
14
36
|
|
37
|
+
namespace :spec do
|
38
|
+
desc 'Run Rspec unit code examples'
|
39
|
+
RSpec::Core::RakeTask.new(:unit) do |spec|
|
40
|
+
spec.pattern = "spec/**/*_spec.rb"
|
41
|
+
end
|
42
|
+
|
43
|
+
desc 'Run Rspec acceptance code examples'
|
44
|
+
RSpec::Core::RakeTask.new(:acceptance) do |spec|
|
45
|
+
spec.pattern = "examples/**/*_spec.rb"
|
46
|
+
end
|
47
|
+
|
48
|
+
desc 'Run All Rspec code examples'
|
49
|
+
task all: [:unit, :acceptance]
|
50
|
+
end
|
51
|
+
|
15
52
|
task :default => :spec
|
data/bin/masamune-dump
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/dump_thor'
|
4
26
|
Masamune::Tasks::DumpThor.start(ARGV)
|
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/elastic_mapreduce_thor'
|
4
26
|
Masamune::Tasks::ElasticMapreduceThor.start(ARGV)
|
data/bin/masamune-hive
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/hive_thor'
|
4
26
|
Masamune::Tasks::HiveThor.start(ARGV)
|
data/bin/masamune-psql
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/postgres_thor'
|
4
26
|
Masamune::Tasks::PostgresThor.start(ARGV)
|
data/bin/masamune-shell
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/shell_thor'
|
4
26
|
Masamune::Tasks::ShellThor.start(ARGV)
|
@@ -36,7 +36,6 @@ class Masamune::Configuration
|
|
36
36
|
attr_accessor :quiet
|
37
37
|
attr_accessor :verbose
|
38
38
|
attr_accessor :debug
|
39
|
-
attr_accessor :no_op
|
40
39
|
attr_accessor :dry_run
|
41
40
|
attr_accessor :lock
|
42
41
|
attr_accessor :retries
|
@@ -56,7 +55,6 @@ class Masamune::Configuration
|
|
56
55
|
self.quiet = false
|
57
56
|
self.verbose = false
|
58
57
|
self.debug = false
|
59
|
-
self.no_op = false
|
60
58
|
self.dry_run = false
|
61
59
|
self.lock = nil
|
62
60
|
self.retries = 3
|
@@ -70,9 +68,10 @@ class Masamune::Configuration
|
|
70
68
|
end
|
71
69
|
end
|
72
70
|
|
73
|
-
def load(
|
71
|
+
def load(path)
|
74
72
|
@load_once ||= begin
|
75
|
-
|
73
|
+
config_file = filesystem.eval_path(path)
|
74
|
+
load_yaml_erb_file(config_file).each_pair do |command, value|
|
76
75
|
if COMMANDS.include?(command)
|
77
76
|
send("#{command}=", value)
|
78
77
|
elsif command == 'paths'
|
@@ -82,7 +81,7 @@ class Masamune::Configuration
|
|
82
81
|
self.params.merge! value
|
83
82
|
end
|
84
83
|
end
|
85
|
-
logger.debug("Loaded configuration #{
|
84
|
+
logger.debug("Loaded configuration #{config_file}")
|
86
85
|
load_catalog(configuration.postgres.fetch(:schema_files, []) + configuration.hive.fetch(:schema_files, []))
|
87
86
|
true
|
88
87
|
end
|
@@ -132,8 +131,7 @@ class Masamune::Configuration
|
|
132
131
|
opts << '--quiet' if quiet
|
133
132
|
opts << '--verbose' if verbose
|
134
133
|
opts << '--debug' if debug
|
135
|
-
opts << '--
|
136
|
-
opts << '--dry_run' if dry_run
|
134
|
+
opts << '--dry-run' if dry_run
|
137
135
|
opts
|
138
136
|
end
|
139
137
|
|
@@ -47,10 +47,8 @@ class Masamune::DataPlan::Elem
|
|
47
47
|
def exists?
|
48
48
|
if rule.for_path?
|
49
49
|
rule.engine.filesystem.exists?(path)
|
50
|
-
elsif rule.for_table_with_partition?
|
51
|
-
rule.engine.postgres_helper.table_exists?(table)
|
52
50
|
elsif rule.for_table?
|
53
|
-
table
|
51
|
+
rule.engine.postgres_helper.table_exists?(table)
|
54
52
|
end
|
55
53
|
end
|
56
54
|
|
@@ -70,14 +68,16 @@ class Masamune::DataPlan::Elem
|
|
70
68
|
|
71
69
|
def explode
|
72
70
|
return Set.new(to_enum(__method__)) unless block_given?
|
73
|
-
if rule.for_path?
|
71
|
+
if rule.for_path? && rule.free?
|
74
72
|
file_glob = path
|
75
73
|
file_glob += '/' unless path.include?('*') || path.include?('.')
|
76
74
|
file_glob += '*' unless path.include?('*')
|
77
|
-
rule.engine.filesystem.glob(file_glob)
|
75
|
+
rule.engine.filesystem.glob(file_glob) do |new_path|
|
78
76
|
yield rule.bind_input(new_path)
|
79
77
|
end
|
80
|
-
elsif rule.
|
78
|
+
elsif rule.for_path? && rule.bound?
|
79
|
+
yield self if exists?
|
80
|
+
elsif rule.for_table?
|
81
81
|
yield self if exists?
|
82
82
|
end
|
83
83
|
end
|
@@ -136,6 +136,10 @@ class Masamune::DataPlan::Elem
|
|
136
136
|
@options[:glob]
|
137
137
|
end
|
138
138
|
|
139
|
+
def rest
|
140
|
+
@options[:rest]
|
141
|
+
end
|
142
|
+
|
139
143
|
def next(i = 1)
|
140
144
|
self.class.new(@rule, start_time.advance(@rule.time_step => +1*i), @options)
|
141
145
|
end
|
@@ -184,6 +188,10 @@ class Masamune::DataPlan::Elem
|
|
184
188
|
private
|
185
189
|
|
186
190
|
def strftime_format
|
187
|
-
@strftime_format ||=
|
191
|
+
@strftime_format ||= begin
|
192
|
+
format = @rule.strftime_format.dup
|
193
|
+
format.sub!('*', glob || rest) if glob || rest
|
194
|
+
format
|
195
|
+
end
|
188
196
|
end
|
189
197
|
end
|
@@ -121,6 +121,8 @@ class Masamune::DataPlan::Engine
|
|
121
121
|
def prepare(rule, options = {})
|
122
122
|
@targets[rule].merge options.fetch(:targets, [])
|
123
123
|
@sources[rule].merge options.fetch(:sources, [])
|
124
|
+
@target_rules[rule].try(:prepare)
|
125
|
+
@source_rules[rule].try(:prepare)
|
124
126
|
|
125
127
|
constrain_max_depth(rule) do
|
126
128
|
sources(rule).group_by { |source| rule_for_target(source.input) }.each do |derived_rule, sources|
|
@@ -40,6 +40,10 @@ class Masamune::DataPlan::Rule
|
|
40
40
|
@options = options
|
41
41
|
end
|
42
42
|
|
43
|
+
def prepare
|
44
|
+
pattern
|
45
|
+
end
|
46
|
+
|
43
47
|
def for_targets?
|
44
48
|
@type == :target
|
45
49
|
end
|
@@ -91,7 +95,7 @@ class Masamune::DataPlan::Rule
|
|
91
95
|
def pattern
|
92
96
|
@pattern ||= begin
|
93
97
|
if for_path?
|
94
|
-
|
98
|
+
engine.filesystem.eval_path(path)
|
95
99
|
elsif for_table_with_partition?
|
96
100
|
[table , partition].join('_')
|
97
101
|
elsif for_table?
|
@@ -104,9 +108,17 @@ class Masamune::DataPlan::Rule
|
|
104
108
|
@options.fetch(:primary, true)
|
105
109
|
end
|
106
110
|
|
111
|
+
def free?
|
112
|
+
pattern.include?('%') || pattern.include?('*')
|
113
|
+
end
|
114
|
+
|
115
|
+
def bound?
|
116
|
+
!free?
|
117
|
+
end
|
118
|
+
|
107
119
|
def matches?(input)
|
108
120
|
matched_pattern = match_data_hash(matcher.match(input))
|
109
|
-
matched_pattern.present? && matched_pattern[:rest].blank?
|
121
|
+
matched_pattern.present? && (matched_pattern[:rest].blank? || matched_pattern[:rest].include?('*'))
|
110
122
|
end
|
111
123
|
|
112
124
|
def bind_date_or_time(input = nil)
|
@@ -288,9 +300,8 @@ class Masamune::DataPlan::Rule
|
|
288
300
|
end
|
289
301
|
end
|
290
302
|
|
291
|
-
def matched_extra(matched_data)
|
292
|
-
|
293
|
-
{glob: matched_data[:glob]}.reject { |_,v| v == '*' }
|
303
|
+
def matched_extra(matched_data = {})
|
304
|
+
matched_data.slice(:glob, :rest).reject { |_, v| v.blank? || v == '*' }
|
294
305
|
end
|
295
306
|
|
296
307
|
def options_for_elem
|
@@ -47,7 +47,7 @@ class Masamune::DataPlan::Set < Set
|
|
47
47
|
def missing
|
48
48
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
49
49
|
self.each do |elem|
|
50
|
-
yield elem if elem.explode.
|
50
|
+
yield elem if elem.explode.none?
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
@@ -90,13 +90,13 @@ class Masamune::DataPlan::Set < Set
|
|
90
90
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
91
91
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
92
92
|
set = Set.new
|
93
|
-
missing
|
93
|
+
missing do |target|
|
94
94
|
yield target if set.add?(target)
|
95
95
|
end
|
96
|
-
incomplete
|
96
|
+
incomplete do |target|
|
97
97
|
yield target if set.add?(target)
|
98
98
|
end
|
99
|
-
stale
|
99
|
+
stale do |target|
|
100
100
|
yield target if set.add?(target)
|
101
101
|
end
|
102
102
|
end
|
@@ -105,7 +105,7 @@ class Masamune::DataPlan::Set < Set
|
|
105
105
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
106
106
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
107
107
|
set = Set.new
|
108
|
-
actionable
|
108
|
+
actionable do |target|
|
109
109
|
yield target if set.add?(target) && target.sources.existing.any?
|
110
110
|
end
|
111
111
|
end
|
@@ -124,7 +124,7 @@ class Masamune::DataPlan::Set < Set
|
|
124
124
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_targets?
|
125
125
|
return self.class.new(self.first.targets.rule, to_enum(__method__)) unless block_given?
|
126
126
|
self.each do |elem|
|
127
|
-
elem.targets
|
127
|
+
elem.targets do |target|
|
128
128
|
yield target
|
129
129
|
end
|
130
130
|
end
|
@@ -134,7 +134,7 @@ class Masamune::DataPlan::Set < Set
|
|
134
134
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
135
135
|
return self.class.new(self.first.sources.rule, to_enum(__method__)) unless block_given?
|
136
136
|
self.each do |elem|
|
137
|
-
elem.sources
|
137
|
+
elem.sources do |source|
|
138
138
|
yield source
|
139
139
|
end
|
140
140
|
end
|
@@ -171,6 +171,6 @@ class Masamune::DataPlan::Set < Set
|
|
171
171
|
def target_stale?(source, target)
|
172
172
|
target.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
|
173
173
|
source.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
|
174
|
-
source.last_modified_at
|
174
|
+
source.last_modified_at > target.last_modified_at
|
175
175
|
end
|
176
176
|
end
|
data/lib/masamune/filesystem.rb
CHANGED
@@ -51,13 +51,14 @@ module Masamune
|
|
51
51
|
self
|
52
52
|
end
|
53
53
|
|
54
|
-
def get_path(symbol, *
|
54
|
+
def get_path(symbol, *args)
|
55
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
55
56
|
lazy_path = lambda do |fs|
|
56
57
|
fs.has_path?(symbol) or raise "Path :#{symbol} not defined"
|
57
58
|
path, options = fs.paths[symbol]
|
58
59
|
|
59
60
|
mkdir!(path) if options[:mkdir]
|
60
|
-
expand_params(fs,
|
61
|
+
expand_params(fs, args.any? ? File.join(path, args) : path)
|
61
62
|
end
|
62
63
|
|
63
64
|
if eager_load_paths?
|
@@ -76,6 +77,10 @@ module Masamune
|
|
76
77
|
@paths
|
77
78
|
end
|
78
79
|
|
80
|
+
def eval_path(path)
|
81
|
+
path.respond_to?(:call) ? path.call(self) : path
|
82
|
+
end
|
83
|
+
|
79
84
|
def expand_params(fs, path)
|
80
85
|
new_path = path.dup
|
81
86
|
fs.environment.configuration.params.each do |key, value|
|
@@ -190,7 +195,7 @@ module Masamune
|
|
190
195
|
yield OpenStruct.new(name: name, mtime: Time.parse("#{date} #{time} +0000").at_beginning_of_minute.utc, size: size.to_i)
|
191
196
|
end
|
192
197
|
when :local
|
193
|
-
Dir.glob(pattern.gsub(%r{
|
198
|
+
Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
|
194
199
|
stat = File.stat(file)
|
195
200
|
yield OpenStruct.new(name: file, mtime: stat.mtime.at_beginning_of_minute.utc, size: stat.size.to_i)
|
196
201
|
end
|
@@ -216,7 +221,8 @@ module Masamune
|
|
216
221
|
when :s3
|
217
222
|
touch! *dir_set.map { |dir| File.join(dir, '.not_empty') }
|
218
223
|
when :local
|
219
|
-
|
224
|
+
missing_dir_set = dir_set.reject { |dir| File.exists?(dir) }
|
225
|
+
FileUtils.mkdir_p(missing_dir_set, file_util_args) if missing_dir_set.any?
|
220
226
|
end
|
221
227
|
end
|
222
228
|
end
|
@@ -243,7 +249,7 @@ module Masamune
|
|
243
249
|
yield q(pattern, name)
|
244
250
|
end
|
245
251
|
when :local
|
246
|
-
Dir.glob(pattern.gsub(%r{
|
252
|
+
Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
|
247
253
|
yield file
|
248
254
|
end
|
249
255
|
end
|
@@ -473,7 +479,7 @@ module Masamune
|
|
473
479
|
end
|
474
480
|
|
475
481
|
def file_util_args
|
476
|
-
{noop: configuration.
|
482
|
+
{noop: configuration.dry_run, verbose: configuration.verbose}
|
477
483
|
end
|
478
484
|
|
479
485
|
def qualify_file(dir, file)
|
@@ -198,21 +198,21 @@ module Masamune::Schema
|
|
198
198
|
@context.pop
|
199
199
|
end
|
200
200
|
|
201
|
-
def load(
|
202
|
-
case
|
201
|
+
def load(schema_file)
|
202
|
+
case schema_file
|
203
203
|
when /\.rb\Z/
|
204
|
-
instance_eval(File.read(
|
204
|
+
instance_eval(File.read(schema_file), schema_file)
|
205
205
|
when /\.psql(\.erb)?\Z/
|
206
|
-
@stores[:postgres].extra <<
|
206
|
+
@stores[:postgres].extra << schema_file
|
207
207
|
when /\.hql(\.erb)?\Z/
|
208
|
-
@stores[:hive].extra <<
|
208
|
+
@stores[:hive].extra << schema_file
|
209
209
|
end
|
210
210
|
end
|
211
211
|
|
212
212
|
private
|
213
213
|
|
214
214
|
def dereference_column(id, options = {})
|
215
|
-
store_id = id.split(/\./).reverse.last.try(:to_sym)
|
215
|
+
store_id = id.to_s.split(/\./).reverse.last.try(:to_sym)
|
216
216
|
context = store_id && @stores.key?(store_id) ? @stores[store_id] : @context
|
217
217
|
context.dereference_column(id, options)
|
218
218
|
end
|
@@ -298,7 +298,7 @@ module Masamune::Schema
|
|
298
298
|
|
299
299
|
def default_ruby_value
|
300
300
|
return [] if array_value?
|
301
|
-
return {} if hash_value?
|
301
|
+
return HashWithIndifferentAccess.new { |h,k| h[k] = HashWithIndifferentAccess.new(&h.default_proc) } if hash_value?
|
302
302
|
case type
|
303
303
|
when :date
|
304
304
|
Date.new(0)
|
data/lib/masamune/schema/map.rb
CHANGED
@@ -76,7 +76,11 @@ module Masamune::Schema
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def separator
|
79
|
-
@separator ||=
|
79
|
+
@separator ||=
|
80
|
+
case @store.format
|
81
|
+
when :tsv then "\t"
|
82
|
+
when :csv then ','
|
83
|
+
end
|
80
84
|
end
|
81
85
|
end
|
82
86
|
|
@@ -237,7 +241,7 @@ module Masamune::Schema
|
|
237
241
|
private
|
238
242
|
|
239
243
|
def default_row(columns)
|
240
|
-
{}.tap do |row|
|
244
|
+
{}.with_indifferent_access.tap do |row|
|
241
245
|
columns.each do |_, column|
|
242
246
|
row[column.compact_name] = column.default_ruby_value
|
243
247
|
end
|
@@ -33,9 +33,9 @@ module Masamune::Schema
|
|
33
33
|
DEFAULT_ATTRIBUTES =
|
34
34
|
{
|
35
35
|
type: nil,
|
36
|
-
format: ->(store) { store
|
37
|
-
json_encoding: ->(store) { store
|
38
|
-
headers: ->(store) { store
|
36
|
+
format: ->(store) { default_format(store) },
|
37
|
+
json_encoding: ->(store) { default_json_encoding(store) },
|
38
|
+
headers: ->(store) { default_headers(store) },
|
39
39
|
debug: false
|
40
40
|
}
|
41
41
|
|
@@ -111,5 +111,33 @@ module Masamune::Schema
|
|
111
111
|
end
|
112
112
|
result.to_a
|
113
113
|
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
class << self
|
118
|
+
def default_format(store)
|
119
|
+
case store.type
|
120
|
+
when :postgres then :csv
|
121
|
+
when :hive then :tsv
|
122
|
+
else :raw
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def default_headers(store)
|
127
|
+
return false if store.format == :raw
|
128
|
+
case store.type
|
129
|
+
when :postgres then true
|
130
|
+
else false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def default_json_encoding(store)
|
135
|
+
return :raw if store.format == :raw
|
136
|
+
case store.type
|
137
|
+
when :postgres then :quoted
|
138
|
+
else :raw
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
114
142
|
end
|
115
143
|
end
|
@@ -27,7 +27,7 @@ require 'pry'
|
|
27
27
|
module Masamune::Tasks
|
28
28
|
class ShellThor < Thor
|
29
29
|
include Masamune::Thor
|
30
|
-
include Masamune::Actions::
|
30
|
+
include Masamune::Actions::DateParse
|
31
31
|
|
32
32
|
# FIXME need to add an unnecessary namespace until this issue is fixed:
|
33
33
|
# https://github.com/wycats/thor/pull/247
|