masamune 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +10 -1
  3. data/Rakefile +37 -0
  4. data/bin/masamune-dump +22 -0
  5. data/bin/masamune-elastic-mapreduce +22 -0
  6. data/bin/masamune-hive +22 -0
  7. data/bin/masamune-psql +22 -0
  8. data/bin/masamune-shell +22 -0
  9. data/lib/masamune/cached_filesystem.rb +1 -1
  10. data/lib/masamune/commands/shell.rb +1 -1
  11. data/lib/masamune/configuration.rb +5 -7
  12. data/lib/masamune/data_plan/elem.rb +15 -7
  13. data/lib/masamune/data_plan/engine.rb +2 -0
  14. data/lib/masamune/data_plan/rule.rb +16 -5
  15. data/lib/masamune/data_plan/set.rb +8 -8
  16. data/lib/masamune/filesystem.rb +12 -6
  17. data/lib/masamune/schema/catalog.rb +6 -6
  18. data/lib/masamune/schema/column.rb +1 -1
  19. data/lib/masamune/schema/map.rb +6 -2
  20. data/lib/masamune/schema/store.rb +31 -3
  21. data/lib/masamune/tasks/shell_thor.rb +1 -1
  22. data/lib/masamune/thor.rb +12 -4
  23. data/lib/masamune/version.rb +1 -1
  24. data/lib/masamune.rb +0 -1
  25. data/spec/masamune/actions/elastic_mapreduce_spec.rb +0 -2
  26. data/spec/masamune/actions/execute_spec.rb +0 -2
  27. data/spec/masamune/actions/hadoop_filesystem_spec.rb +0 -2
  28. data/spec/masamune/actions/hadoop_streaming_spec.rb +0 -2
  29. data/spec/masamune/actions/hive_spec.rb +0 -2
  30. data/spec/masamune/actions/invoke_parallel_spec.rb +0 -2
  31. data/spec/masamune/actions/postgres_admin_spec.rb +0 -2
  32. data/spec/masamune/actions/postgres_spec.rb +0 -2
  33. data/spec/masamune/actions/s3cmd_spec.rb +0 -2
  34. data/spec/masamune/actions/transform_spec.rb +0 -2
  35. data/spec/masamune/after_initialization_callbacks_spec.rb +0 -2
  36. data/spec/masamune/cached_filesystem_spec.rb +0 -2
  37. data/spec/masamune/commands/hadoop_filesystem_spec.rb +0 -2
  38. data/spec/masamune/commands/hadoop_streaming_spec.rb +0 -2
  39. data/spec/masamune/commands/hive_spec.rb +0 -2
  40. data/spec/masamune/commands/postgres_admin_spec.rb +0 -2
  41. data/spec/masamune/commands/postgres_spec.rb +0 -2
  42. data/spec/masamune/commands/retry_with_backoff_spec.rb +0 -2
  43. data/spec/masamune/commands/s3cmd_spec.rb +0 -2
  44. data/spec/masamune/commands/shell_spec.rb +0 -2
  45. data/spec/masamune/configuration_spec.rb +12 -2
  46. data/spec/masamune/data_plan/builder_spec.rb +0 -2
  47. data/spec/masamune/data_plan/elem_spec.rb +73 -5
  48. data/spec/masamune/data_plan/engine_spec.rb +0 -2
  49. data/spec/masamune/data_plan/rule_spec.rb +51 -6
  50. data/spec/masamune/data_plan/set_spec.rb +2 -5
  51. data/spec/masamune/environment_spec.rb +0 -2
  52. data/spec/masamune/filesystem_spec.rb +33 -4
  53. data/spec/masamune/helpers/postgres_spec.rb +0 -2
  54. data/spec/masamune/rspec/job_fixture_spec.rb +365 -0
  55. data/spec/masamune/rspec/shared_example_group_spec.rb +73 -0
  56. data/spec/masamune/schema/catalog_spec.rb +14 -2
  57. data/spec/masamune/schema/column_spec.rb +0 -2
  58. data/spec/masamune/schema/dimension_spec.rb +0 -2
  59. data/spec/masamune/schema/fact_spec.rb +0 -2
  60. data/spec/masamune/schema/map_spec.rb +51 -2
  61. data/spec/masamune/schema/row_spec.rb +0 -2
  62. data/spec/masamune/schema/store_spec.rb +23 -2
  63. data/spec/masamune/schema/table_spec.rb +0 -2
  64. data/spec/masamune/string_format_spec.rb +0 -2
  65. data/spec/masamune/tasks/dump_thor_spec.rb +0 -3
  66. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +0 -3
  67. data/spec/masamune/tasks/hive_thor_spec.rb +0 -3
  68. data/spec/masamune/tasks/postgres_thor_spec.rb +0 -3
  69. data/spec/masamune/tasks/shell_thor_spec.rb +0 -3
  70. data/spec/masamune/template_spec.rb +0 -2
  71. data/spec/masamune/thor_spec.rb +53 -8
  72. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +0 -2
  73. data/spec/masamune/transform/consolidate_dimension_spec.rb +0 -2
  74. data/spec/masamune/transform/deduplicate_dimension_spec.rb +0 -2
  75. data/spec/masamune/transform/define_schema_spec.rb +0 -2
  76. data/spec/masamune/transform/define_table.dimension_spec.rb +0 -2
  77. data/spec/masamune/transform/define_table.fact_spec.rb +0 -2
  78. data/spec/masamune/transform/define_table.table_spec.rb +0 -2
  79. data/spec/masamune/transform/denormalize_table_spec.rb +0 -2
  80. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +0 -2
  81. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +0 -2
  82. data/spec/masamune/transform/load_dimension_spec.rb +0 -2
  83. data/spec/masamune/transform/load_fact_spec.rb +0 -2
  84. data/spec/masamune/transform/relabel_dimension_spec.rb +0 -2
  85. data/spec/masamune/transform/rollup_fact_spec.rb +0 -2
  86. data/spec/masamune/transform/snapshot_dimension_spec.rb +0 -2
  87. data/spec/masamune/transform/stage_dimension_spec.rb +0 -2
  88. data/spec/masamune/transform/stage_fact_spec.rb +0 -2
  89. data/spec/masamune_spec.rb +0 -2
  90. data/spec/spec_helper.rb +2 -0
  91. data/spec/support/masamune/job_example_group.rb +62 -0
  92. data/spec/support/masamune/job_fixture.rb +137 -0
  93. data/spec/support/masamune/shared_example_group.rb +203 -0
  94. data/spec/support/masamune/step_example_group.rb +68 -0
  95. data/spec/support/masamune/step_fixture.rb +91 -0
  96. data/{lib/masamune/thor_loader.rb → spec/support/masamune/task_example_group.rb} +33 -10
  97. data/spec/support/rspec/example/action_example_group.rb +1 -1
  98. metadata +32 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: feca9f373a42ecd946e9682b08213c3357ce7437
4
- data.tar.gz: 51cf1a4d353fb1f607299937e60c66437213d35c
3
+ metadata.gz: 7abad0d40705fd8c85b74398cf3ce198d6e1e0a3
4
+ data.tar.gz: 049aeb65a25a01b8dca0a9db21d7e543e9eadae5
5
5
  SHA512:
6
- metadata.gz: 2e1d3c79fbe068bb79cb653cb746845dbaf7c33bc4525727cfa569b40407578b970418ee701ecb4ed0ed15004fb48d183c14398c03e6267fa8f5429d0184d0cf
7
- data.tar.gz: f62cd40539a9ac2036ee312e9abf9055792463f3340faaed9857bd2dfb16d06cf3fe12e32ba41bc1b3676b23e512641abcaf07c929fe3e0e70bc5b4240d002ea
6
+ metadata.gz: 5a8ef99944a527c5d0fde7b4875fc1d6541dd830060372fb47752a8a915f8d6bb7a3c359ad54c2592b2939c1bbc0ac1ea2c486f34c75fb31c7551524ef278963
7
+ data.tar.gz: 3d44f7583a6048a70010c124b8f8c881163196b285ced0fbfe78cb0bc3858bcd3768bc3b3528f0bd8a8a9043060c8fb33f4a813bb472ffa3a751cd71bd0fbbbb
data/README.md CHANGED
@@ -26,7 +26,7 @@ class ExampleThor < Thor
26
26
  # Describe a Data Processing Job
27
27
  desc 'extract_logs', 'Organize log files by YYYY-MM-DD'
28
28
 
29
- target fs.path(:target_dir, '%Y-%m-%d', mkdir: true)
29
+ target fs.path(:target_dir, '%Y-%m-%d')
30
30
  source fs.path(:source_dir, '%Y%m%d*.log')
31
31
  def extract_logs
32
32
  targets.missing.each do |target|
@@ -45,6 +45,15 @@ Execute your dataflow with the goal of processing all data from the start of the
45
45
  thor extract_logs --start '1 year ago'
46
46
  ```
47
47
 
48
+ Testing
49
+ ---------------
50
+ ```
51
+ rake spec # Run Rspec unit code examples
52
+ rake spec:acceptance # Run Rspec acceptance code examples
53
+ rake spec:all # Run All Rspec code examples
54
+ rake spec:unit # Run Rspec unit code examples
55
+ ```
56
+
48
57
  Contributing
49
58
  ---------------
50
59
 
data/Rakefile CHANGED
@@ -1,4 +1,25 @@
1
1
  #!/usr/bin/env rake
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
2
23
 
3
24
  begin
4
25
  require 'bundler/setup'
@@ -10,6 +31,22 @@ Bundler::GemHelper.install_tasks
10
31
 
11
32
  require 'rspec/core/rake_task'
12
33
 
34
+ desc 'Run Rspec unit code examples'
13
35
  RSpec::Core::RakeTask.new(:spec)
14
36
 
37
+ namespace :spec do
38
+ desc 'Run Rspec unit code examples'
39
+ RSpec::Core::RakeTask.new(:unit) do |spec|
40
+ spec.pattern = "spec/**/*_spec.rb"
41
+ end
42
+
43
+ desc 'Run Rspec acceptance code examples'
44
+ RSpec::Core::RakeTask.new(:acceptance) do |spec|
45
+ spec.pattern = "examples/**/*_spec.rb"
46
+ end
47
+
48
+ desc 'Run All Rspec code examples'
49
+ task all: [:unit, :acceptance]
50
+ end
51
+
15
52
  task :default => :spec
data/bin/masamune-dump CHANGED
@@ -1,4 +1,26 @@
1
1
  #!/usr/bin/env ruby
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
2
24
  $: << File.expand_path('../../lib/', __FILE__)
3
25
  require 'masamune/tasks/dump_thor'
4
26
  Masamune::Tasks::DumpThor.start(ARGV)
@@ -1,4 +1,26 @@
1
1
  #!/usr/bin/env ruby
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
2
24
  $: << File.expand_path('../../lib/', __FILE__)
3
25
  require 'masamune/tasks/elastic_mapreduce_thor'
4
26
  Masamune::Tasks::ElasticMapreduceThor.start(ARGV)
data/bin/masamune-hive CHANGED
@@ -1,4 +1,26 @@
1
1
  #!/usr/bin/env ruby
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
2
24
  $: << File.expand_path('../../lib/', __FILE__)
3
25
  require 'masamune/tasks/hive_thor'
4
26
  Masamune::Tasks::HiveThor.start(ARGV)
data/bin/masamune-psql CHANGED
@@ -1,4 +1,26 @@
1
1
  #!/usr/bin/env ruby
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
2
24
  $: << File.expand_path('../../lib/', __FILE__)
3
25
  require 'masamune/tasks/postgres_thor'
4
26
  Masamune::Tasks::PostgresThor.start(ARGV)
data/bin/masamune-shell CHANGED
@@ -1,4 +1,26 @@
1
1
  #!/usr/bin/env ruby
2
+ # The MIT License (MIT)
3
+ #
4
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
2
24
  $: << File.expand_path('../../lib/', __FILE__)
3
25
  require 'masamune/tasks/shell_thor'
4
26
  Masamune::Tasks::ShellThor.start(ARGV)
@@ -33,7 +33,7 @@ module Masamune
33
33
  end
34
34
 
35
35
  def exists?(file)
36
- @cache.key?(file) || glob(file).include?(file) || @cache.key?(file)
36
+ @cache[file].any? || glob(file).include?(file) || @cache[file].any?
37
37
  end
38
38
 
39
39
  def glob(file_or_glob)
@@ -75,7 +75,7 @@ module Masamune::Commands
75
75
  end
76
76
 
77
77
  def around_execute(&block)
78
- if configuration.no_op && !safe
78
+ if configuration.dry_run && !safe
79
79
  return OpenStruct.new(:success? => true)
80
80
  end
81
81
 
@@ -36,7 +36,6 @@ class Masamune::Configuration
36
36
  attr_accessor :quiet
37
37
  attr_accessor :verbose
38
38
  attr_accessor :debug
39
- attr_accessor :no_op
40
39
  attr_accessor :dry_run
41
40
  attr_accessor :lock
42
41
  attr_accessor :retries
@@ -56,7 +55,6 @@ class Masamune::Configuration
56
55
  self.quiet = false
57
56
  self.verbose = false
58
57
  self.debug = false
59
- self.no_op = false
60
58
  self.dry_run = false
61
59
  self.lock = nil
62
60
  self.retries = 3
@@ -70,9 +68,10 @@ class Masamune::Configuration
70
68
  end
71
69
  end
72
70
 
73
- def load(file)
71
+ def load(path)
74
72
  @load_once ||= begin
75
- load_yaml_erb_file(file).each_pair do |command, value|
73
+ config_file = filesystem.eval_path(path)
74
+ load_yaml_erb_file(config_file).each_pair do |command, value|
76
75
  if COMMANDS.include?(command)
77
76
  send("#{command}=", value)
78
77
  elsif command == 'paths'
@@ -82,7 +81,7 @@ class Masamune::Configuration
82
81
  self.params.merge! value
83
82
  end
84
83
  end
85
- logger.debug("Loaded configuration #{file}")
84
+ logger.debug("Loaded configuration #{config_file}")
86
85
  load_catalog(configuration.postgres.fetch(:schema_files, []) + configuration.hive.fetch(:schema_files, []))
87
86
  true
88
87
  end
@@ -132,8 +131,7 @@ class Masamune::Configuration
132
131
  opts << '--quiet' if quiet
133
132
  opts << '--verbose' if verbose
134
133
  opts << '--debug' if debug
135
- opts << '--no_op' if no_op
136
- opts << '--dry_run' if dry_run
134
+ opts << '--dry-run' if dry_run
137
135
  opts
138
136
  end
139
137
 
@@ -47,10 +47,8 @@ class Masamune::DataPlan::Elem
47
47
  def exists?
48
48
  if rule.for_path?
49
49
  rule.engine.filesystem.exists?(path)
50
- elsif rule.for_table_with_partition?
51
- rule.engine.postgres_helper.table_exists?(table)
52
50
  elsif rule.for_table?
53
- table
51
+ rule.engine.postgres_helper.table_exists?(table)
54
52
  end
55
53
  end
56
54
 
@@ -70,14 +68,16 @@ class Masamune::DataPlan::Elem
70
68
 
71
69
  def explode
72
70
  return Set.new(to_enum(__method__)) unless block_given?
73
- if rule.for_path?
71
+ if rule.for_path? && rule.free?
74
72
  file_glob = path
75
73
  file_glob += '/' unless path.include?('*') || path.include?('.')
76
74
  file_glob += '*' unless path.include?('*')
77
- rule.engine.filesystem.glob(file_glob).each do |new_path|
75
+ rule.engine.filesystem.glob(file_glob) do |new_path|
78
76
  yield rule.bind_input(new_path)
79
77
  end
80
- elsif rule.for_table_with_partition?
78
+ elsif rule.for_path? && rule.bound?
79
+ yield self if exists?
80
+ elsif rule.for_table?
81
81
  yield self if exists?
82
82
  end
83
83
  end
@@ -136,6 +136,10 @@ class Masamune::DataPlan::Elem
136
136
  @options[:glob]
137
137
  end
138
138
 
139
+ def rest
140
+ @options[:rest]
141
+ end
142
+
139
143
  def next(i = 1)
140
144
  self.class.new(@rule, start_time.advance(@rule.time_step => +1*i), @options)
141
145
  end
@@ -184,6 +188,10 @@ class Masamune::DataPlan::Elem
184
188
  private
185
189
 
186
190
  def strftime_format
187
- @strftime_format ||= glob ? @rule.strftime_format.sub('*', glob) : @rule.strftime_format
191
+ @strftime_format ||= begin
192
+ format = @rule.strftime_format.dup
193
+ format.sub!('*', glob || rest) if glob || rest
194
+ format
195
+ end
188
196
  end
189
197
  end
@@ -121,6 +121,8 @@ class Masamune::DataPlan::Engine
121
121
  def prepare(rule, options = {})
122
122
  @targets[rule].merge options.fetch(:targets, [])
123
123
  @sources[rule].merge options.fetch(:sources, [])
124
+ @target_rules[rule].try(:prepare)
125
+ @source_rules[rule].try(:prepare)
124
126
 
125
127
  constrain_max_depth(rule) do
126
128
  sources(rule).group_by { |source| rule_for_target(source.input) }.each do |derived_rule, sources|
@@ -40,6 +40,10 @@ class Masamune::DataPlan::Rule
40
40
  @options = options
41
41
  end
42
42
 
43
+ def prepare
44
+ pattern
45
+ end
46
+
43
47
  def for_targets?
44
48
  @type == :target
45
49
  end
@@ -91,7 +95,7 @@ class Masamune::DataPlan::Rule
91
95
  def pattern
92
96
  @pattern ||= begin
93
97
  if for_path?
94
- path.respond_to?(:call) ? path.call(engine.filesystem) : path
98
+ engine.filesystem.eval_path(path)
95
99
  elsif for_table_with_partition?
96
100
  [table , partition].join('_')
97
101
  elsif for_table?
@@ -104,9 +108,17 @@ class Masamune::DataPlan::Rule
104
108
  @options.fetch(:primary, true)
105
109
  end
106
110
 
111
+ def free?
112
+ pattern.include?('%') || pattern.include?('*')
113
+ end
114
+
115
+ def bound?
116
+ !free?
117
+ end
118
+
107
119
  def matches?(input)
108
120
  matched_pattern = match_data_hash(matcher.match(input))
109
- matched_pattern.present? && matched_pattern[:rest].blank?
121
+ matched_pattern.present? && (matched_pattern[:rest].blank? || matched_pattern[:rest].include?('*'))
110
122
  end
111
123
 
112
124
  def bind_date_or_time(input = nil)
@@ -288,9 +300,8 @@ class Masamune::DataPlan::Rule
288
300
  end
289
301
  end
290
302
 
291
- def matched_extra(matched_data)
292
- return {} unless matched_data.has_key?(:glob)
293
- {glob: matched_data[:glob]}.reject { |_,v| v == '*' }
303
+ def matched_extra(matched_data = {})
304
+ matched_data.slice(:glob, :rest).reject { |_, v| v.blank? || v == '*' }
294
305
  end
295
306
 
296
307
  def options_for_elem
@@ -47,7 +47,7 @@ class Masamune::DataPlan::Set < Set
47
47
  def missing
48
48
  return self.class.new(rule, to_enum(__method__)) unless block_given?
49
49
  self.each do |elem|
50
- yield elem if elem.explode.count < 1
50
+ yield elem if elem.explode.none?
51
51
  end
52
52
  end
53
53
 
@@ -90,13 +90,13 @@ class Masamune::DataPlan::Set < Set
90
90
  return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
91
91
  return self.class.new(rule, to_enum(__method__)) unless block_given?
92
92
  set = Set.new
93
- missing.each do |target|
93
+ missing do |target|
94
94
  yield target if set.add?(target)
95
95
  end
96
- incomplete.each do |target|
96
+ incomplete do |target|
97
97
  yield target if set.add?(target)
98
98
  end
99
- stale.each do |target|
99
+ stale do |target|
100
100
  yield target if set.add?(target)
101
101
  end
102
102
  end
@@ -105,7 +105,7 @@ class Masamune::DataPlan::Set < Set
105
105
  return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
106
106
  return self.class.new(rule, to_enum(__method__)) unless block_given?
107
107
  set = Set.new
108
- actionable.each do |target|
108
+ actionable do |target|
109
109
  yield target if set.add?(target) && target.sources.existing.any?
110
110
  end
111
111
  end
@@ -124,7 +124,7 @@ class Masamune::DataPlan::Set < Set
124
124
  return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_targets?
125
125
  return self.class.new(self.first.targets.rule, to_enum(__method__)) unless block_given?
126
126
  self.each do |elem|
127
- elem.targets.each do |target|
127
+ elem.targets do |target|
128
128
  yield target
129
129
  end
130
130
  end
@@ -134,7 +134,7 @@ class Masamune::DataPlan::Set < Set
134
134
  return Masamune::DataPlan::Set::EMPTY if empty? || @rule.for_sources?
135
135
  return self.class.new(self.first.sources.rule, to_enum(__method__)) unless block_given?
136
136
  self.each do |elem|
137
- elem.sources.each do |source|
137
+ elem.sources do |source|
138
138
  yield source
139
139
  end
140
140
  end
@@ -171,6 +171,6 @@ class Masamune::DataPlan::Set < Set
171
171
  def target_stale?(source, target)
172
172
  target.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
173
173
  source.last_modified_at != Masamune::DataPlan::Elem::MISSING_MODIFIED_AT &&
174
- source.last_modified_at >= target.last_modified_at
174
+ source.last_modified_at > target.last_modified_at
175
175
  end
176
176
  end
@@ -51,13 +51,14 @@ module Masamune
51
51
  self
52
52
  end
53
53
 
54
- def get_path(symbol, *extra)
54
+ def get_path(symbol, *args)
55
+ options = args.last.is_a?(Hash) ? args.pop : {}
55
56
  lazy_path = lambda do |fs|
56
57
  fs.has_path?(symbol) or raise "Path :#{symbol} not defined"
57
58
  path, options = fs.paths[symbol]
58
59
 
59
60
  mkdir!(path) if options[:mkdir]
60
- expand_params(fs, extra.any? ? File.join(path, extra) : path)
61
+ expand_params(fs, args.any? ? File.join(path, args) : path)
61
62
  end
62
63
 
63
64
  if eager_load_paths?
@@ -76,6 +77,10 @@ module Masamune
76
77
  @paths
77
78
  end
78
79
 
80
+ def eval_path(path)
81
+ path.respond_to?(:call) ? path.call(self) : path
82
+ end
83
+
79
84
  def expand_params(fs, path)
80
85
  new_path = path.dup
81
86
  fs.environment.configuration.params.each do |key, value|
@@ -190,7 +195,7 @@ module Masamune
190
195
  yield OpenStruct.new(name: name, mtime: Time.parse("#{date} #{time} +0000").at_beginning_of_minute.utc, size: size.to_i)
191
196
  end
192
197
  when :local
193
- Dir.glob(pattern.gsub(%r{/\*\Z}, '/**/*')) do |file|
198
+ Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
194
199
  stat = File.stat(file)
195
200
  yield OpenStruct.new(name: file, mtime: stat.mtime.at_beginning_of_minute.utc, size: stat.size.to_i)
196
201
  end
@@ -216,7 +221,8 @@ module Masamune
216
221
  when :s3
217
222
  touch! *dir_set.map { |dir| File.join(dir, '.not_empty') }
218
223
  when :local
219
- FileUtils.mkdir_p(dir_set, file_util_args)
224
+ missing_dir_set = dir_set.reject { |dir| File.exists?(dir) }
225
+ FileUtils.mkdir_p(missing_dir_set, file_util_args) if missing_dir_set.any?
220
226
  end
221
227
  end
222
228
  end
@@ -243,7 +249,7 @@ module Masamune
243
249
  yield q(pattern, name)
244
250
  end
245
251
  when :local
246
- Dir.glob(pattern.gsub(%r{/\*\Z}, '/**/*')) do |file|
252
+ Dir.glob(pattern.gsub(%r{/\*(\.\w+)?\Z}, '/**/*\1')) do |file|
247
253
  yield file
248
254
  end
249
255
  end
@@ -473,7 +479,7 @@ module Masamune
473
479
  end
474
480
 
475
481
  def file_util_args
476
- {noop: configuration.no_op, verbose: configuration.verbose}
482
+ {noop: configuration.dry_run, verbose: configuration.verbose}
477
483
  end
478
484
 
479
485
  def qualify_file(dir, file)
@@ -198,21 +198,21 @@ module Masamune::Schema
198
198
  @context.pop
199
199
  end
200
200
 
201
- def load(file)
202
- case file
201
+ def load(schema_file)
202
+ case schema_file
203
203
  when /\.rb\Z/
204
- instance_eval(File.read(file), file)
204
+ instance_eval(File.read(schema_file), schema_file)
205
205
  when /\.psql(\.erb)?\Z/
206
- @stores[:postgres].extra << file
206
+ @stores[:postgres].extra << schema_file
207
207
  when /\.hql(\.erb)?\Z/
208
- @stores[:hive].extra << file
208
+ @stores[:hive].extra << schema_file
209
209
  end
210
210
  end
211
211
 
212
212
  private
213
213
 
214
214
  def dereference_column(id, options = {})
215
- store_id = id.split(/\./).reverse.last.try(:to_sym)
215
+ store_id = id.to_s.split(/\./).reverse.last.try(:to_sym)
216
216
  context = store_id && @stores.key?(store_id) ? @stores[store_id] : @context
217
217
  context.dereference_column(id, options)
218
218
  end
@@ -298,7 +298,7 @@ module Masamune::Schema
298
298
 
299
299
  def default_ruby_value
300
300
  return [] if array_value?
301
- return {} if hash_value?
301
+ return HashWithIndifferentAccess.new { |h,k| h[k] = HashWithIndifferentAccess.new(&h.default_proc) } if hash_value?
302
302
  case type
303
303
  when :date
304
304
  Date.new(0)
@@ -76,7 +76,11 @@ module Masamune::Schema
76
76
  end
77
77
 
78
78
  def separator
79
- @separator ||= (@store.format == :tsv ? "\t" : ',')
79
+ @separator ||=
80
+ case @store.format
81
+ when :tsv then "\t"
82
+ when :csv then ','
83
+ end
80
84
  end
81
85
  end
82
86
 
@@ -237,7 +241,7 @@ module Masamune::Schema
237
241
  private
238
242
 
239
243
  def default_row(columns)
240
- {}.tap do |row|
244
+ {}.with_indifferent_access.tap do |row|
241
245
  columns.each do |_, column|
242
246
  row[column.compact_name] = column.default_ruby_value
243
247
  end
@@ -33,9 +33,9 @@ module Masamune::Schema
33
33
  DEFAULT_ATTRIBUTES =
34
34
  {
35
35
  type: nil,
36
- format: ->(store) { store.type == :postgres ? :csv : :tsv },
37
- json_encoding: ->(store) { store.type == :postgres ? :quoted : :raw },
38
- headers: ->(store) { store.type == :postgres ? true : false },
36
+ format: ->(store) { default_format(store) },
37
+ json_encoding: ->(store) { default_json_encoding(store) },
38
+ headers: ->(store) { default_headers(store) },
39
39
  debug: false
40
40
  }
41
41
 
@@ -111,5 +111,33 @@ module Masamune::Schema
111
111
  end
112
112
  result.to_a
113
113
  end
114
+
115
+ private
116
+
117
+ class << self
118
+ def default_format(store)
119
+ case store.type
120
+ when :postgres then :csv
121
+ when :hive then :tsv
122
+ else :raw
123
+ end
124
+ end
125
+
126
+ def default_headers(store)
127
+ return false if store.format == :raw
128
+ case store.type
129
+ when :postgres then true
130
+ else false
131
+ end
132
+ end
133
+
134
+ def default_json_encoding(store)
135
+ return :raw if store.format == :raw
136
+ case store.type
137
+ when :postgres then :quoted
138
+ else :raw
139
+ end
140
+ end
141
+ end
114
142
  end
115
143
  end
@@ -27,7 +27,7 @@ require 'pry'
27
27
  module Masamune::Tasks
28
28
  class ShellThor < Thor
29
29
  include Masamune::Thor
30
- include Masamune::Actions::DataFlow
30
+ include Masamune::Actions::DateParse
31
31
 
32
32
  # FIXME need to add an unnecessary namespace until this issue is fixed:
33
33
  # https://github.com/wycats/thor/pull/247