masamune 0.14.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -1
- data/Rakefile +37 -0
- data/bin/masamune-dump +22 -0
- data/bin/masamune-elastic-mapreduce +22 -0
- data/bin/masamune-hive +22 -0
- data/bin/masamune-psql +22 -0
- data/bin/masamune-shell +22 -0
- data/lib/masamune/cached_filesystem.rb +1 -1
- data/lib/masamune/commands/shell.rb +1 -1
- data/lib/masamune/configuration.rb +5 -7
- data/lib/masamune/data_plan/elem.rb +15 -7
- data/lib/masamune/data_plan/engine.rb +2 -0
- data/lib/masamune/data_plan/rule.rb +16 -5
- data/lib/masamune/data_plan/set.rb +8 -8
- data/lib/masamune/filesystem.rb +12 -6
- data/lib/masamune/schema/catalog.rb +6 -6
- data/lib/masamune/schema/column.rb +1 -1
- data/lib/masamune/schema/map.rb +6 -2
- data/lib/masamune/schema/store.rb +31 -3
- data/lib/masamune/tasks/shell_thor.rb +1 -1
- data/lib/masamune/thor.rb +12 -4
- data/lib/masamune/version.rb +1 -1
- data/lib/masamune.rb +0 -1
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +0 -2
- data/spec/masamune/actions/execute_spec.rb +0 -2
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +0 -2
- data/spec/masamune/actions/hadoop_streaming_spec.rb +0 -2
- data/spec/masamune/actions/hive_spec.rb +0 -2
- data/spec/masamune/actions/invoke_parallel_spec.rb +0 -2
- data/spec/masamune/actions/postgres_admin_spec.rb +0 -2
- data/spec/masamune/actions/postgres_spec.rb +0 -2
- data/spec/masamune/actions/s3cmd_spec.rb +0 -2
- data/spec/masamune/actions/transform_spec.rb +0 -2
- data/spec/masamune/after_initialization_callbacks_spec.rb +0 -2
- data/spec/masamune/cached_filesystem_spec.rb +0 -2
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +0 -2
- data/spec/masamune/commands/hadoop_streaming_spec.rb +0 -2
- data/spec/masamune/commands/hive_spec.rb +0 -2
- data/spec/masamune/commands/postgres_admin_spec.rb +0 -2
- data/spec/masamune/commands/postgres_spec.rb +0 -2
- data/spec/masamune/commands/retry_with_backoff_spec.rb +0 -2
- data/spec/masamune/commands/s3cmd_spec.rb +0 -2
- data/spec/masamune/commands/shell_spec.rb +0 -2
- data/spec/masamune/configuration_spec.rb +12 -2
- data/spec/masamune/data_plan/builder_spec.rb +0 -2
- data/spec/masamune/data_plan/elem_spec.rb +73 -5
- data/spec/masamune/data_plan/engine_spec.rb +0 -2
- data/spec/masamune/data_plan/rule_spec.rb +51 -6
- data/spec/masamune/data_plan/set_spec.rb +2 -5
- data/spec/masamune/environment_spec.rb +0 -2
- data/spec/masamune/filesystem_spec.rb +33 -4
- data/spec/masamune/helpers/postgres_spec.rb +0 -2
- data/spec/masamune/rspec/job_fixture_spec.rb +365 -0
- data/spec/masamune/rspec/shared_example_group_spec.rb +73 -0
- data/spec/masamune/schema/catalog_spec.rb +14 -2
- data/spec/masamune/schema/column_spec.rb +0 -2
- data/spec/masamune/schema/dimension_spec.rb +0 -2
- data/spec/masamune/schema/fact_spec.rb +0 -2
- data/spec/masamune/schema/map_spec.rb +51 -2
- data/spec/masamune/schema/row_spec.rb +0 -2
- data/spec/masamune/schema/store_spec.rb +23 -2
- data/spec/masamune/schema/table_spec.rb +0 -2
- data/spec/masamune/string_format_spec.rb +0 -2
- data/spec/masamune/tasks/dump_thor_spec.rb +0 -3
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +0 -3
- data/spec/masamune/tasks/hive_thor_spec.rb +0 -3
- data/spec/masamune/tasks/postgres_thor_spec.rb +0 -3
- data/spec/masamune/tasks/shell_thor_spec.rb +0 -3
- data/spec/masamune/template_spec.rb +0 -2
- data/spec/masamune/thor_spec.rb +53 -8
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +0 -2
- data/spec/masamune/transform/consolidate_dimension_spec.rb +0 -2
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +0 -2
- data/spec/masamune/transform/define_schema_spec.rb +0 -2
- data/spec/masamune/transform/define_table.dimension_spec.rb +0 -2
- data/spec/masamune/transform/define_table.fact_spec.rb +0 -2
- data/spec/masamune/transform/define_table.table_spec.rb +0 -2
- data/spec/masamune/transform/denormalize_table_spec.rb +0 -2
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +0 -2
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +0 -2
- data/spec/masamune/transform/load_dimension_spec.rb +0 -2
- data/spec/masamune/transform/load_fact_spec.rb +0 -2
- data/spec/masamune/transform/relabel_dimension_spec.rb +0 -2
- data/spec/masamune/transform/rollup_fact_spec.rb +0 -2
- data/spec/masamune/transform/snapshot_dimension_spec.rb +0 -2
- data/spec/masamune/transform/stage_dimension_spec.rb +0 -2
- data/spec/masamune/transform/stage_fact_spec.rb +0 -2
- data/spec/masamune_spec.rb +0 -2
- data/spec/spec_helper.rb +2 -0
- data/spec/support/masamune/job_example_group.rb +62 -0
- data/spec/support/masamune/job_fixture.rb +137 -0
- data/spec/support/masamune/shared_example_group.rb +203 -0
- data/spec/support/masamune/step_example_group.rb +68 -0
- data/spec/support/masamune/step_fixture.rb +91 -0
- data/{lib/masamune/thor_loader.rb → spec/support/masamune/task_example_group.rb} +33 -10
- data/spec/support/rspec/example/action_example_group.rb +1 -1
- metadata +32 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7abad0d40705fd8c85b74398cf3ce198d6e1e0a3
|
4
|
+
data.tar.gz: 049aeb65a25a01b8dca0a9db21d7e543e9eadae5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a8ef99944a527c5d0fde7b4875fc1d6541dd830060372fb47752a8a915f8d6bb7a3c359ad54c2592b2939c1bbc0ac1ea2c486f34c75fb31c7551524ef278963
|
7
|
+
data.tar.gz: 3d44f7583a6048a70010c124b8f8c881163196b285ced0fbfe78cb0bc3858bcd3768bc3b3528f0bd8a8a9043060c8fb33f4a813bb472ffa3a751cd71bd0fbbbb
|
data/README.md
CHANGED
@@ -26,7 +26,7 @@ class ExampleThor < Thor
|
|
26
26
|
# Describe a Data Processing Job
|
27
27
|
desc 'extract_logs', 'Organize log files by YYYY-MM-DD'
|
28
28
|
|
29
|
-
target fs.path(:target_dir, '%Y-%m-%d'
|
29
|
+
target fs.path(:target_dir, '%Y-%m-%d')
|
30
30
|
source fs.path(:source_dir, '%Y%m%d*.log')
|
31
31
|
def extract_logs
|
32
32
|
targets.missing.each do |target|
|
@@ -45,6 +45,15 @@ Execute your dataflow with the goal of processing all data from the start of the
|
|
45
45
|
thor extract_logs --start '1 year ago'
|
46
46
|
```
|
47
47
|
|
48
|
+
Testing
|
49
|
+
---------------
|
50
|
+
```
|
51
|
+
rake spec # Run Rspec unit code examples
|
52
|
+
rake spec:acceptance # Run Rspec acceptance code examples
|
53
|
+
rake spec:all # Run All Rspec code examples
|
54
|
+
rake spec:unit # Run Rspec unit code examples
|
55
|
+
```
|
56
|
+
|
48
57
|
Contributing
|
49
58
|
---------------
|
50
59
|
|
data/Rakefile
CHANGED
@@ -1,4 +1,25 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
2
23
|
|
3
24
|
begin
|
4
25
|
require 'bundler/setup'
|
@@ -10,6 +31,22 @@ Bundler::GemHelper.install_tasks
|
|
10
31
|
|
11
32
|
require 'rspec/core/rake_task'
|
12
33
|
|
34
|
+
desc 'Run Rspec unit code examples'
|
13
35
|
RSpec::Core::RakeTask.new(:spec)
|
14
36
|
|
37
|
+
namespace :spec do
|
38
|
+
desc 'Run Rspec unit code examples'
|
39
|
+
RSpec::Core::RakeTask.new(:unit) do |spec|
|
40
|
+
spec.pattern = "spec/**/*_spec.rb"
|
41
|
+
end
|
42
|
+
|
43
|
+
desc 'Run Rspec acceptance code examples'
|
44
|
+
RSpec::Core::RakeTask.new(:acceptance) do |spec|
|
45
|
+
spec.pattern = "examples/**/*_spec.rb"
|
46
|
+
end
|
47
|
+
|
48
|
+
desc 'Run All Rspec code examples'
|
49
|
+
task all: [:unit, :acceptance]
|
50
|
+
end
|
51
|
+
|
15
52
|
task :default => :spec
|
data/bin/masamune-dump
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/dump_thor'
|
4
26
|
Masamune::Tasks::DumpThor.start(ARGV)
|
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/elastic_mapreduce_thor'
|
4
26
|
Masamune::Tasks::ElasticMapreduceThor.start(ARGV)
|
data/bin/masamune-hive
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/hive_thor'
|
4
26
|
Masamune::Tasks::HiveThor.start(ARGV)
|
data/bin/masamune-psql
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/postgres_thor'
|
4
26
|
Masamune::Tasks::PostgresThor.start(ARGV)
|
data/bin/masamune-shell
CHANGED
@@ -1,4 +1,26 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# The MIT License (MIT)
|
3
|
+
#
|
4
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
11
|
+
# furnished to do so, subject to the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be included in
|
14
|
+
# all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
# THE SOFTWARE.
|
23
|
+
|
2
24
|
$: << File.expand_path('../../lib/', __FILE__)
|
3
25
|
require 'masamune/tasks/shell_thor'
|
4
26
|
Masamune::Tasks::ShellThor.start(ARGV)
|
@@ -36,7 +36,6 @@ class Masamune::Configuration
|
|
36
36
|
attr_accessor :quiet
|
37
37
|
attr_accessor :verbose
|
38
38
|
attr_accessor :debug
|
39
|
-
attr_accessor :no_op
|
40
39
|
attr_accessor :dry_run
|
41
40
|
attr_accessor :lock
|
42
41
|
attr_accessor :retries
|
@@ -56,7 +55,6 @@ class Masamune::Configuration
|
|
56
55
|
self.quiet = false
|
57
56
|
self.verbose = false
|
58
57
|
self.debug = false
|
59
|
-
self.no_op = false
|
60
58
|
self.dry_run = false
|
61
59
|
self.lock = nil
|
62
60
|
self.retries = 3
|
@@ -70,9 +68,10 @@ class Masamune::Configuration
|
|
70
68
|
end
|
71
69
|
end
|
72
70
|
|
73
|
-
def load(
|
71
|
+
def load(path)
|
74
72
|
@load_once ||= begin
|
75
|
-
|
73
|
+
config_file = filesystem.eval_path(path)
|
74
|
+
load_yaml_erb_file(config_file).each_pair do |command, value|
|
76
75
|
if COMMANDS.include?(command)
|
77
76
|
send("#{command}=", value)
|
78
77
|
elsif command == 'paths'
|
@@ -82,7 +81,7 @@ class Masamune::Configuration
|
|
82
81
|
self.params.merge! value
|
83
82
|
end
|
84
83
|
end
|
85
|
-
logger.debug("Loaded configuration #{
|
84
|
+
logger.debug("Loaded configuration #{config_file}")
|
86
85
|
load_catalog(configuration.postgres.fetch(:schema_files, []) + configuration.hive.fetch(:schema_files, []))
|
87
86
|
true
|
88
87
|
end
|
@@ -132,8 +131,7 @@ class Masamune::Configuration
|
|
132
131
|
opts << '--quiet' if quiet
|
133
132
|
opts << '--verbose' if verbose
|
134
133
|
opts << '--debug' if debug
|
135
|
-
opts << '--
|
136
|
-
opts << '--dry_run' if dry_run
|
134
|
+
opts << '--dry-run' if dry_run
|
137
135
|
opts
|
138
136
|
end
|
139
137
|
|
@@ -47,10 +47,8 @@ class Masamune::DataPlan::Elem
|
|
47
47
|
def exists?
|
48
48
|
if rule.for_path?
|
49
49
|
rule.engine.filesystem.exists?(path)
|
50
|
-
elsif rule.for_table_with_partition?
|
51
|
-
rule.engine.postgres_helper.table_exists?(table)
|
52
50
|
elsif rule.for_table?
|
53
|
-
table
|
51
|
+
rule.engine.postgres_helper.table_exists?(table)
|
54
52
|
end
|
55
53
|
end
|
56
54
|
|
@@ -70,14 +68,16 @@ class Masamune::DataPlan::Elem
|
|
70
68
|
|
71
69
|
def explode
|
72
70
|
return Set.new(to_enum(__method__)) unless block_given?
|
73
|
-
if rule.for_path?
|
71
|
+
if rule.for_path? && rule.free?
|
74
72
|
file_glob = path
|
75
73
|
file_glob += '/' unless path.include?('*') || path.include?('.')
|
76
74
|
file_glob += '*' unless path.include?('*')
|
77
|
-
rule.engine.filesystem.glob(file_glob)
|
75
|
+
rule.engine.filesystem.glob(file_glob) do |new_path|
|
78
76
|
yield rule.bind_input(new_path)
|
79
77
|
end
|
80
|
-
elsif rule.
|
78
|
+
elsif rule.for_path? && rule.bound?
|
79
|
+
yield self if exists?
|
80
|
+
elsif rule.for_table?
|
81
81
|
yield self if exists?
|
82
82
|
end
|
83
83
|
end
|
@@ -136,6 +136,10 @@ class Masamune::DataPlan::Elem
|
|
136
136
|
@options[:glob]
|
137
137
|
end
|
138
138
|
|
139
|
+
def rest
|
140
|
+
@options[:rest]
|
141
|
+
end
|
142
|
+
|
139
143
|
def next(i = 1)
|
140
144
|
self.class.new(@rule, start_time.advance(@rule.time_step => +1*i), @options)
|
141
145
|
end
|
@@ -184,6 +188,10 @@ class Masamune::DataPlan::Elem
|
|
184
188
|
private
|
185
189
|
|
186
190
|
def strftime_format
|
187
|
-
@strftime_format ||=
|
191
|
+
@strftime_format ||= begin
|
192
|
+
format = @rule.strftime_format.dup
|
193
|
+
format.sub!('*', glob || rest) if glob || rest
|
194
|
+
format
|
195
|
+
end
|
188
196
|
end
|
189
197
|
end
|
@@ -121,6 +121,8 @@ class Masamune::DataPlan::Engine
|
|
121
121
|
def prepare(rule, options = {})
|
122
122
|
@targets[rule].merge options.fetch(:targets, [])
|
123
123
|
@sources[rule].merge options.fetch(:sources, [])
|
124
|
+
@target_rules[rule].try(:prepare)
|
125
|
+
@source_rules[rule].try(:prepare)
|
124
126
|
|
125
127
|
constrain_max_depth(rule) do
|
126
128
|
sources(rule).group_by { |source| rule_for_target(source.input) }.each do |derived_rule, sources|
|
@@ -40,6 +40,10 @@ class Masamune::DataPlan::Rule
|
|
40
40
|
@options = options
|
41
41
|
end
|
42
42
|
|
43
|
+
def prepare
|
44
|
+
pattern
|
45
|
+
end
|
46
|
+
|
43
47
|
def for_targets?
|
44
48
|
@type == :target
|
45
49
|
end
|
@@ -91,7 +95,7 @@ class Masamune::DataPlan::Rule
|
|
91
95
|
def pattern
|
92
96
|
@pattern ||= begin
|
93
97
|
if for_path?
|
94
|
-
|
98
|
+
engine.filesystem.eval_path(path)
|
95
99
|
elsif for_table_with_partition?
|
96
100
|
[table , partition].join('_')
|
97
101
|
elsif for_table?
|
@@ -104,9 +108,17 @@ class Masamune::DataPlan::Rule
|
|
104
108
|
@options.fetch(:primary, true)
|
105
109
|
end
|
106
110
|
|
111
|
+
def free?
|
112
|
+
pattern.include?('%') || pattern.include?('*')
|
113
|
+
end
|
114
|
+
|
115
|
+
def bound?
|
116
|
+
!free?
|
117
|
+
end
|
118
|
+
|
107
119
|
def matches?(input)
|
108
120
|
matched_pattern = match_data_hash(matcher.match(input))
|
109
|
-
matched_pattern.present? && matched_pattern[:rest].blank?
|
121
|
+
matched_pattern.present? && (matched_pattern[:rest].blank? || matched_pattern[:rest].include?('*'))
|
110
122
|
end
|
111
123
|
|
112
124
|
def bind_date_or_time(input = nil)
|
@@ -288,9 +300,8 @@ class Masamune::DataPlan::Rule
|
|
288
300
|
end
|
289
301
|
end
|
290
302
|
|
291
|
-
def matched_extra(matched_data)
|
292
|
-
|
293
|
-
{glob: matched_data[:glob]}.reject { |_,v| v == '*' }
|
303
|
+
def matched_extra(matched_data = {})
|
304
|
+
matched_data.slice(:glob, :rest).reject { |_, v| v.blank? || v == '*' }
|
294
305
|
end
|
295
306
|
|
296
307
|
def options_for_elem
|
@@ -47,7 +47,7 @@ class Masamune::DataPlan::Set < Set
|
|
47
47
|
def missing
|
48
48
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
49
49
|
self.each do |elem|
|
50
|
-
yield elem if elem.explode.
|
50
|
+
yield elem if elem.explode.none?
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
@@ -90,13 +90,13 @@ class Masamune::DataPlan::Set < Set
|
|
90
90
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
91
91
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
92
92
|
set = Set.new
|
93
|
-
missing
|
93
|
+
missing do |target|
|
94
94
|
yield target if set.add?(target)
|
95
95
|
end
|
96
|
-
incomplete
|
96
|
+
incomplete do |target|
|
97
97
|
yield target if set.add?(target)
|
98
98
|
end
|
99
|
-
stale
|
99
|
+
stale do |target|
|
100
100
|
yield target if set.add?(target)
|
101
101
|
end
|
102
102
|
end
|
@@ -105,7 +105,7 @@ class Masamune::DataPlan::Set < Set
|
|
105
105
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
106
106
|
return self.class.new(rule, to_enum(__method__)) unless block_given?
|
107
107
|
set = Set.new
|
108
|
-
actionable
|
108
|
+
actionable do |target|
|
109
109
|
yield target if set.add?(target) && target.sources.existing.any?
|
110
110
|
end
|
111
111
|
end
|
@@ -124,7 +124,7 @@ class Masamune::DataPlan::Set < Set
|
|
124
124
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_targets?
|
125
125
|
return self.class.new(self.first.targets.rule, to_enum(__method__)) unless block_given?
|
126
126
|
self.each do |elem|
|
127
|
-
elem.targets
|
127
|
+
elem.targets do |target|
|
128
128
|
yield target
|
129
129
|
end
|
130
130
|
end
|
@@ -134,7 +134,7 @@ class Masamune::DataPlan::Set < Set
|
|
134
134
|
return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
|
135
135
|
return self.class.new(self.first.sources.rule, to_enum(__method__)) unless block_given?
|
136
136
|
self.each do |elem|
|
137
|
-
elem.sources
|
137
|
+
elem.sources do |source|
|
138
138
|
yield source
|
139
139
|
end
|
140
140
|
end
|
@@ -171,6 +171,6 @@ class Masamune::DataPlan::Set < Set
|
|
171
171
|
def target_stale?(source, target)
|
172
172
|
target.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
|
173
173
|
source.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
|
174
|
-
source.last_modified_at
|
174
|
+
source.last_modified_at > target.last_modified_at
|
175
175
|
end
|
176
176
|
end
|
data/lib/masamune/filesystem.rb
CHANGED
@@ -51,13 +51,14 @@ module Masamune
|
|
51
51
|
self
|
52
52
|
end
|
53
53
|
|
54
|
-
def get_path(symbol, *
|
54
|
+
def get_path(symbol, *args)
|
55
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
55
56
|
lazy_path = lambda do |fs|
|
56
57
|
fs.has_path?(symbol) or raise "Path :#{symbol} not defined"
|
57
58
|
path, options = fs.paths[symbol]
|
58
59
|
|
59
60
|
mkdir!(path) if options[:mkdir]
|
60
|
-
expand_params(fs,
|
61
|
+
expand_params(fs, args.any? ? File.join(path, args) : path)
|
61
62
|
end
|
62
63
|
|
63
64
|
if eager_load_paths?
|
@@ -76,6 +77,10 @@ module Masamune
|
|
76
77
|
@paths
|
77
78
|
end
|
78
79
|
|
80
|
+
def eval_path(path)
|
81
|
+
path.respond_to?(:call) ? path.call(self) : path
|
82
|
+
end
|
83
|
+
|
79
84
|
def expand_params(fs, path)
|
80
85
|
new_path = path.dup
|
81
86
|
fs.environment.configuration.params.each do |key, value|
|
@@ -190,7 +195,7 @@ module Masamune
|
|
190
195
|
yield OpenStruct.new(name: name, mtime: Time.parse("#{date} #{time} +0000").at_beginning_of_minute.utc, size: size.to_i)
|
191
196
|
end
|
192
197
|
when :local
|
193
|
-
Dir.glob(pattern.gsub(%r{
|
198
|
+
Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
|
194
199
|
stat = File.stat(file)
|
195
200
|
yield OpenStruct.new(name: file, mtime: stat.mtime.at_beginning_of_minute.utc, size: stat.size.to_i)
|
196
201
|
end
|
@@ -216,7 +221,8 @@ module Masamune
|
|
216
221
|
when :s3
|
217
222
|
touch! *dir_set.map { |dir| File.join(dir, '.not_empty') }
|
218
223
|
when :local
|
219
|
-
|
224
|
+
missing_dir_set = dir_set.reject { |dir| File.exists?(dir) }
|
225
|
+
FileUtils.mkdir_p(missing_dir_set, file_util_args) if missing_dir_set.any?
|
220
226
|
end
|
221
227
|
end
|
222
228
|
end
|
@@ -243,7 +249,7 @@ module Masamune
|
|
243
249
|
yield q(pattern, name)
|
244
250
|
end
|
245
251
|
when :local
|
246
|
-
Dir.glob(pattern.gsub(%r{
|
252
|
+
Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
|
247
253
|
yield file
|
248
254
|
end
|
249
255
|
end
|
@@ -473,7 +479,7 @@ module Masamune
|
|
473
479
|
end
|
474
480
|
|
475
481
|
def file_util_args
|
476
|
-
{noop: configuration.
|
482
|
+
{noop: configuration.dry_run, verbose: configuration.verbose}
|
477
483
|
end
|
478
484
|
|
479
485
|
def qualify_file(dir, file)
|
@@ -198,21 +198,21 @@ module Masamune::Schema
|
|
198
198
|
@context.pop
|
199
199
|
end
|
200
200
|
|
201
|
-
def load(
|
202
|
-
case
|
201
|
+
def load(schema_file)
|
202
|
+
case schema_file
|
203
203
|
when /\.rb\Z/
|
204
|
-
instance_eval(File.read(
|
204
|
+
instance_eval(File.read(schema_file), schema_file)
|
205
205
|
when /\.psql(\.erb)?\Z/
|
206
|
-
@stores[:postgres].extra <<
|
206
|
+
@stores[:postgres].extra << schema_file
|
207
207
|
when /\.hql(\.erb)?\Z/
|
208
|
-
@stores[:hive].extra <<
|
208
|
+
@stores[:hive].extra << schema_file
|
209
209
|
end
|
210
210
|
end
|
211
211
|
|
212
212
|
private
|
213
213
|
|
214
214
|
def dereference_column(id, options = {})
|
215
|
-
store_id = id.split(/\./).reverse.last.try(:to_sym)
|
215
|
+
store_id = id.to_s.split(/\./).reverse.last.try(:to_sym)
|
216
216
|
context = store_id && @stores.key?(store_id) ? @stores[store_id] : @context
|
217
217
|
context.dereference_column(id, options)
|
218
218
|
end
|
@@ -298,7 +298,7 @@ module Masamune::Schema
|
|
298
298
|
|
299
299
|
def default_ruby_value
|
300
300
|
return [] if array_value?
|
301
|
-
return {} if hash_value?
|
301
|
+
return HashWithIndifferentAccess.new { |h,k| h[k] = HashWithIndifferentAccess.new(&h.default_proc) } if hash_value?
|
302
302
|
case type
|
303
303
|
when :date
|
304
304
|
Date.new(0)
|
data/lib/masamune/schema/map.rb
CHANGED
@@ -76,7 +76,11 @@ module Masamune::Schema
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def separator
|
79
|
-
@separator ||=
|
79
|
+
@separator ||=
|
80
|
+
case @store.format
|
81
|
+
when :tsv then "\t"
|
82
|
+
when :csv then ','
|
83
|
+
end
|
80
84
|
end
|
81
85
|
end
|
82
86
|
|
@@ -237,7 +241,7 @@ module Masamune::Schema
|
|
237
241
|
private
|
238
242
|
|
239
243
|
def default_row(columns)
|
240
|
-
{}.tap do |row|
|
244
|
+
{}.with_indifferent_access.tap do |row|
|
241
245
|
columns.each do |_, column|
|
242
246
|
row[column.compact_name] = column.default_ruby_value
|
243
247
|
end
|
@@ -33,9 +33,9 @@ module Masamune::Schema
|
|
33
33
|
DEFAULT_ATTRIBUTES =
|
34
34
|
{
|
35
35
|
type: nil,
|
36
|
-
format: ->(store) { store
|
37
|
-
json_encoding: ->(store) { store
|
38
|
-
headers: ->(store) { store
|
36
|
+
format: ->(store) { default_format(store) },
|
37
|
+
json_encoding: ->(store) { default_json_encoding(store) },
|
38
|
+
headers: ->(store) { default_headers(store) },
|
39
39
|
debug: false
|
40
40
|
}
|
41
41
|
|
@@ -111,5 +111,33 @@ module Masamune::Schema
|
|
111
111
|
end
|
112
112
|
result.to_a
|
113
113
|
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
class << self
|
118
|
+
def default_format(store)
|
119
|
+
case store.type
|
120
|
+
when :postgres then :csv
|
121
|
+
when :hive then :tsv
|
122
|
+
else :raw
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def default_headers(store)
|
127
|
+
return false if store.format == :raw
|
128
|
+
case store.type
|
129
|
+
when :postgres then true
|
130
|
+
else false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def default_json_encoding(store)
|
135
|
+
return :raw if store.format == :raw
|
136
|
+
case store.type
|
137
|
+
when :postgres then :quoted
|
138
|
+
else :raw
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
114
142
|
end
|
115
143
|
end
|
@@ -27,7 +27,7 @@ require 'pry'
|
|
27
27
|
module Masamune::Tasks
|
28
28
|
class ShellThor < Thor
|
29
29
|
include Masamune::Thor
|
30
|
-
include Masamune::Actions::
|
30
|
+
include Masamune::Actions::DateParse
|
31
31
|
|
32
32
|
# FIXME need to add an unnecessary namespace until this issue is fixed:
|
33
33
|
# https://github.com/wycats/thor/pull/247
|