masamune 0.15.5 → 0.15.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5057258a406f7f889d685a6060ca42ecd5e39ab
4
- data.tar.gz: c4fa4ad80317574acaeebbe8017fc4d4ed3ff20b
3
+ metadata.gz: 5b70cf7812702e987cc6c1f72d7f50cbb3ddde9b
4
+ data.tar.gz: 2d487a59b8da26e186f2eefab0f7067b5e9f950f
5
5
  SHA512:
6
- metadata.gz: fcac62a5623326ed3a090de28f7355b9e6bc77a36b9959483bf53214c579ab2bffcb3cdfda3c1710b44b7fbfcb026720e7bdbc34877201bfaa3c1f06f20f5b44
7
- data.tar.gz: c69d42b73d6aeb0bb7c6d0cdfcdae0330e31fc8e29192dc36143c3606f085d162a7c37fbcc2b6d631fc3d2d6dab7cf565ee9a1655afc2692c9c707fd35312291
6
+ metadata.gz: 91777fd4da7e0928d678ce4574f91305da2673e8bbbbeed65ce892ac612808552afa68689906f70a54c8f61d4f592b821ecb4ae9e67588bb988a6dc38e2a07b7
7
+ data.tar.gz: 76a2f706860eae74756e2c4b7a04e596913f3c0189e591d5c5f250681fcada14177be815f4033bede60ba1fa75a70a073d7a3bb91bac6692ab21c1ee04d8df3b
@@ -22,6 +22,9 @@
22
22
 
23
23
  module Masamune
24
24
  class CachedFilesystem < SimpleDelegator
25
+ MAX_DEPTH = 10
26
+ EMPTY_SET = Set.new
27
+
25
28
  def initialize(filesystem)
26
29
  super filesystem
27
30
  @filesystem = filesystem
@@ -29,34 +32,55 @@ module Masamune
29
32
  end
30
33
 
31
34
  def clear!
32
- @cache = Hash.new { |h,k| h[k] = Set.new }
35
+ @cache = PathCache.new(@filesystem)
33
36
  end
34
37
 
35
38
  def exists?(file)
36
- @cache[file].any? || glob(file).include?(file) || @cache[file].any?
39
+ glob(file, max_depth: 0).include?(file) || @cache.any?(file)
37
40
  end
38
41
 
39
- def glob(file_or_glob)
40
- return Set.new(to_enum(:glob, file_or_glob)) unless block_given?
41
- glob_stat(file_or_glob) do |entry|
42
- yield entry.name unless entry.name == dirname(file_or_glob)
42
+ def glob(file_or_glob, options = {})
43
+ return Set.new(to_enum(:glob, file_or_glob, options)) unless block_given?
44
+ glob_stat(file_or_glob, options) do |entry|
45
+ yield entry.name
43
46
  end
44
47
  end
45
48
 
46
49
  def stat(file_or_dir)
47
50
  raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
48
- result = Set.new
49
- glob_stat(file_or_dir) do |entry|
50
- result << entry
51
- end
52
- result += @cache[file_or_dir]
51
+ result = glob_stat(file_or_dir, recursive: true)
53
52
  return unless result.any?
54
- return result.first if result.size == 1
55
53
  max_time = result.map { |stat| stat.try(:mtime) }.compact.max
56
54
  sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
57
55
  OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
58
56
  end
59
57
 
58
+ def glob_stat(file_or_glob, options = {}, &block)
59
+ return Set.new(to_enum(:glob_stat, file_or_glob, options)) unless block_given?
60
+ return if file_or_glob.blank?
61
+ return if root_path?(file_or_glob)
62
+ depth = options.fetch(:depth, 0)
63
+ max_depth = options.fetch(:max_depth, 0)
64
+ return if depth > MAX_DEPTH || depth > max_depth
65
+
66
+ glob_stat(dirname(file_or_glob), depth: depth + 1, max_depth: max_depth, &block)
67
+
68
+ dirname = dirname(file_or_glob)
69
+ unless @cache.any?(dirname)
70
+ pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
71
+ @filesystem.glob_stat(pattern) do |entry|
72
+ @cache.put(entry.name, entry)
73
+ end
74
+ end
75
+
76
+ file_regexp = glob_to_regexp(file_or_glob, options)
77
+ @cache.get(dirname).each do |entry|
78
+ next if entry.name == dirname
79
+ next unless entry.name =~ file_regexp
80
+ yield entry
81
+ end if depth == 0
82
+ end
83
+
60
84
  # FIXME cache eviction policy can be more precise
61
85
  [:touch!, :mkdir!, :copy_file_to_file, :copy_file_to_dir, :copy_dir, :remove_file, :remove_dir, :move_file_to_file, :move_file_to_dir, :move_dir, :write].each do |method|
62
86
  define_method(method) do |*args|
@@ -67,42 +91,59 @@ module Masamune
67
91
 
68
92
  private
69
93
 
70
- MAX_DEPTH = 10
71
- CACHE_DEPTH = 1
72
- EMPTY_SET = Set.new
94
+ class PathCache
95
+ def initialize(filesystem)
96
+ @filesystem = filesystem
97
+ @cache = {}
98
+ end
73
99
 
74
- def glob_stat(file_or_glob, options = {}, &block)
75
- return if file_or_glob.blank?
76
- return if root_path?(file_or_glob)
77
- depth = options.fetch(:depth, 0)
78
- return if depth > MAX_DEPTH || depth > CACHE_DEPTH
100
+ def put(path, entry)
101
+ return unless path
102
+ return if @filesystem.root_path?(path)
103
+ put(File.join(@filesystem.dirname(path), '.'), OpenStruct.new(name: @filesystem.dirname(path)))
104
+ paths = path_split(path)
105
+ elems = paths.reverse.inject(entry) { |a, n| { n => a } }
106
+ @cache.deep_merge!(elems)
107
+ end
79
108
 
80
- glob_stat(dirname(file_or_glob), depth: depth + 1, &block)
109
+ def get(path)
110
+ return unless path
111
+ paths = path_split(path)
112
+ elem = paths.inject(@cache) { |level, path| level.is_a?(Hash) ? level.fetch(path, {}) : level }
113
+ emit(elem)
114
+ rescue KeyError
115
+ EMPTY_SET
116
+ end
81
117
 
82
- dirname = dirname(file_or_glob)
83
- unless @cache.key?(dirname)
84
- pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
85
- @filesystem.glob_stat(pattern) do |entry|
86
- recursive_paths(dirname, entry.name) do |path|
87
- @cache[path] << entry
88
- end
118
+ def any?(path)
119
+ if elem = get(path)
120
+ elem.any? { |entry| entry.name.start_with?(path) }
121
+ else
122
+ false
89
123
  end
90
124
  end
91
- @cache[dirname] ||= EMPTY_SET
92
125
 
93
- file_regexp = glob_to_regexp(file_or_glob)
94
- @cache[dirname].each do |entry|
95
- yield entry if entry.name =~ file_regexp
96
- end if depth == 0
97
- end
126
+ private
98
127
 
99
- def recursive_paths(root, path, options = {}, &block)
100
- depth = options.fetch(:depth, 0)
101
- return if depth > MAX_DEPTH
102
- return if root == path
103
- yield path
104
- yield dirname(path)
105
- recursive_paths(root, dirname(path), depth: depth + 1, &block)
128
+ def emit(elem)
129
+ return Set.new(to_enum(:emit, elem)).flatten unless block_given?
130
+ case elem
131
+ when Array, Set
132
+ elem.each do |e|
133
+ yield emit(e)
134
+ end
135
+ when Hash
136
+ elem.values.each do |e|
137
+ yield emit(e)
138
+ end
139
+ else
140
+ yield elem
141
+ end
142
+ end
143
+
144
+ def path_split(path)
145
+ path.split('/').reject { |x| x.blank? }
146
+ end
106
147
  end
107
148
  end
108
149
  end
@@ -70,9 +70,8 @@ class Masamune::DataPlan::Elem
70
70
  return Set.new(to_enum(__method__)) unless block_given?
71
71
  if rule.for_path? && rule.free?
72
72
  file_glob = path
73
- file_glob += '/' unless path.include?('*') || path.include?('.')
74
- file_glob += '*' unless path.include?('*')
75
- rule.engine.filesystem.glob(file_glob) do |new_path|
73
+ file_glob += '/*' unless path.include?('*') || path.include?('.')
74
+ rule.engine.filesystem.glob(file_glob, max_depth: rule.cache_depth) do |new_path|
76
75
  yield rule.bind_input(new_path)
77
76
  end
78
77
  elsif rule.for_path? && rule.bound?
@@ -250,6 +250,17 @@ class Masamune::DataPlan::Rule
250
250
  self.class.new(engine, name, type, options.merge(path: new_pattern))
251
251
  end
252
252
 
253
+ def cache_depth
254
+ case time_step
255
+ when :hour, :hours
256
+ 2
257
+ when :day, :days
258
+ 1
259
+ else
260
+ 0
261
+ end
262
+ end
263
+
253
264
  private
254
265
 
255
266
  def time_step_to_format(step)
@@ -111,9 +111,9 @@ module Masamune
111
111
  parent_paths.each do |part|
112
112
  tmp << part
113
113
  current_path = prefix + File.join(tmp)
114
- break if current_path == path
115
114
  result << current_path
116
115
  end
116
+ result.pop
117
117
  result
118
118
  end
119
119
 
@@ -129,7 +129,7 @@ module Masamune
129
129
  end
130
130
 
131
131
  def dirname(path)
132
- parent_paths(path).last || path
132
+ parent_paths(path).last || remote_prefix(path) || local_prefix(path)
133
133
  end
134
134
 
135
135
  def basename(path)
@@ -174,8 +174,8 @@ module Masamune
174
174
  end
175
175
  end
176
176
 
177
- def glob_stat(pattern)
178
- return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
177
+ def glob_stat(pattern, options = {})
178
+ return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
179
179
  case type(pattern)
180
180
  when :hdfs
181
181
  hadoop_fs('-ls', '-R', pattern, safe: true) do |line|
@@ -206,7 +206,6 @@ module Masamune
206
206
  raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
207
207
  result = glob_stat(file_or_dir)
208
208
  return unless result.any?
209
- return result.first if result.size == 1
210
209
  max_time = result.map { |stat| stat.try(:mtime) }.compact.max
211
210
  sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
212
211
  OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
@@ -227,8 +226,8 @@ module Masamune
227
226
  end
228
227
  end
229
228
 
230
- def glob(pattern)
231
- return Set.new(to_enum(:glob, pattern)) unless block_given?
229
+ def glob(pattern, options = {})
230
+ return Set.new(to_enum(:glob, pattern, options)) unless block_given?
232
231
  case type(pattern)
233
232
  when :hdfs
234
233
  file_glob, file_regexp = glob_split(pattern)
@@ -462,7 +461,12 @@ module Masamune
462
461
  dir[%r{\Ahdfs://}]
463
462
  end
464
463
 
465
- def local_prefix(file)
464
+ def local_prefix(dir)
465
+ dir[%r{\A/}] ||
466
+ '.'
467
+ end
468
+
469
+ def local_file_prefix(file)
466
470
  return file if remote_prefix(file)
467
471
  "file://#{file}"
468
472
  end
@@ -529,7 +533,7 @@ module Masamune
529
533
  when [:hdfs, :hdfs]
530
534
  hadoop_fs('-cp', src, dst)
531
535
  when [:hdfs, :local]
532
- hadoop_fs('-copyToLocal', src, local_prefix(dst))
536
+ hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
533
537
  when [:hdfs, :s3]
534
538
  hadoop_fs('-cp', src, s3n(dst))
535
539
  when [:s3, :s3]
@@ -541,7 +545,7 @@ module Masamune
541
545
  when [:local, :local]
542
546
  FileUtils.cp(src, dst, file_util_args)
543
547
  when [:local, :hdfs]
544
- hadoop_fs('-copyFromLocal', local_prefix(src), dst)
548
+ hadoop_fs('-copyFromLocal', local_file_prefix(src), dst)
545
549
  when [:local, :s3]
546
550
  s3cmd('put', src, s3b(dst, dir: dir))
547
551
  end
@@ -553,7 +557,7 @@ module Masamune
553
557
  hadoop_fs('-mv', src, dst)
554
558
  when [:hdfs, :local]
555
559
  # NOTE: moveToLocal: Option '-moveToLocal' is not implemented yet
556
- hadoop_fs('-copyToLocal', src, local_prefix(dst))
560
+ hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
557
561
  hadoop_fs('-rm', src)
558
562
  when [:hdfs, :s3]
559
563
  copy_file_to_file(src, s3n(dst, dir: dir))
@@ -569,7 +573,7 @@ module Masamune
569
573
  FileUtils.mv(src, dst, file_util_args)
570
574
  FileUtils.chmod(FILE_MODE, dst, file_util_args)
571
575
  when [:local, :hdfs]
572
- hadoop_fs('-moveFromLocal', local_prefix(src), dst)
576
+ hadoop_fs('-moveFromLocal', local_file_prefix(src), dst)
573
577
  when [:local, :s3]
574
578
  s3cmd('put', src, s3b(dst, dir: dir))
575
579
  FileUtils.rm(src, file_util_args)
@@ -21,5 +21,5 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  module Masamune
24
- VERSION = '0.15.5'
24
+ VERSION = '0.15.6'
25
25
  end
@@ -27,8 +27,9 @@ describe Masamune::CachedFilesystem do
27
27
  context 'when path is present, top down traversal' do
28
28
  before do
29
29
  filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
30
- expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
31
- expect(filesystem).to receive(:glob_stat).with('/a').once.and_call_original
30
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
31
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
32
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
32
33
  expect(filesystem).to receive(:glob_stat).with('/*').never
33
34
  end
34
35
 
@@ -39,7 +40,13 @@ describe Masamune::CachedFilesystem do
39
40
  expect(cached_filesystem.exists?('/a/b/c/4.txt')).to eq(false)
40
41
  expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
41
42
  expect(cached_filesystem.glob('/a/b/c/*').count).to eq(3)
43
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/1.txt'
44
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/2.txt'
45
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/3.txt'
42
46
  expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(3)
47
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/1.txt'
48
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/2.txt'
49
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/3.txt'
43
50
  expect(cached_filesystem.stat('/a/b/c/1.txt')).to_not be_nil
44
51
  expect(cached_filesystem.stat('/a/b/c/2.txt')).to_not be_nil
45
52
  expect(cached_filesystem.stat('/a/b/c/3.txt')).to_not be_nil
@@ -53,7 +60,8 @@ describe Masamune::CachedFilesystem do
53
60
  context 'when path is present, bottom up traversal' do
54
61
  before do
55
62
  filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
56
- expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
63
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
64
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
57
65
  expect(filesystem).to receive(:glob_stat).with('/*').never
58
66
  end
59
67
 
@@ -78,8 +86,9 @@ describe Masamune::CachedFilesystem do
78
86
  context 'when path is present, checking for similar non existant paths' do
79
87
  before do
80
88
  filesystem.touch!('/y=2013/m=1/d=22/00000')
81
- expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').once.and_call_original
82
- expect(filesystem).to receive(:glob_stat).with('/y=2013/*').once.and_call_original
89
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/d=22/*').once.and_call_original
90
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').never
91
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/*').never
83
92
  expect(filesystem).to receive(:glob_stat).with('/*').never
84
93
  end
85
94
 
@@ -87,8 +96,11 @@ describe Masamune::CachedFilesystem do
87
96
  expect(cached_filesystem.exists?('/y=2013/m=1/d=22/00000')).to eq(true)
88
97
  expect(cached_filesystem.exists?('/y=2013/m=1/d=22')).to eq(true)
89
98
  expect(cached_filesystem.exists?('/y=2013/m=1/d=2')).to eq(false)
90
- expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(1)
99
+ expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(2)
100
+ expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22')
101
+ expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22/00000')
91
102
  expect(cached_filesystem.glob('/y=2013/m=1/d=22/*').count).to eq(1)
103
+ expect(cached_filesystem.glob('/y=2013/m=1/d=22/*')).to include('/y=2013/m=1/d=22/00000')
92
104
  expect(cached_filesystem.stat('/y=2013/m=1/d=22/00000')).not_to be_nil
93
105
  expect(cached_filesystem.stat('/y=2013/m=1/d=22')).not_to be_nil
94
106
  expect(cached_filesystem.stat('/y=2013/m=1')).not_to be_nil
@@ -116,9 +128,15 @@ describe Masamune::CachedFilesystem do
116
128
  expect(cached_filesystem.glob('/logs/*').count).to eq(3)
117
129
  expect(cached_filesystem.glob('/logs/*.txt').count).to eq(3)
118
130
  expect(cached_filesystem.glob('/logs/box1_*.txt').count).to eq(1)
131
+ expect(cached_filesystem.glob('/logs/box1_*.txt')).to include('/logs/box1_123.txt')
119
132
  expect(cached_filesystem.glob('/logs/box2_*.txt').count).to eq(1)
133
+ expect(cached_filesystem.glob('/logs/box2_*.txt')).to include('/logs/box2_123.txt')
120
134
  expect(cached_filesystem.glob('/logs/box3_*.txt').count).to eq(1)
135
+ expect(cached_filesystem.glob('/logs/box3_*.txt')).to include('/logs/box3_123.txt')
121
136
  expect(cached_filesystem.glob('/logs/box*.txt').count).to eq(3)
137
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box2_123.txt')
138
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box1_123.txt')
139
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box3_123.txt')
122
140
  expect(cached_filesystem.glob('/logs/box*.csv').count).to eq(0)
123
141
  expect(cached_filesystem.glob('/logs/box').count).to eq(0)
124
142
  expect(cached_filesystem.glob('/logs/box/*').count).to eq(0)
@@ -138,7 +156,7 @@ describe Masamune::CachedFilesystem do
138
156
  before do
139
157
  filesystem.touch!('/a/b/c')
140
158
  expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
141
- expect(filesystem).to receive(:glob_stat).with('/a').once.and_call_original
159
+ expect(filesystem).to receive(:glob_stat).with('/a').never
142
160
  expect(filesystem).to receive(:glob_stat).with('/*').never
143
161
  end
144
162
 
@@ -149,11 +167,17 @@ describe Masamune::CachedFilesystem do
149
167
  expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
150
168
  expect(cached_filesystem.exists?('/a/b')).to eq(true)
151
169
  expect(cached_filesystem.exists?('/a')).to eq(true)
152
- expect(cached_filesystem.glob('/a').count).to eq(0)
153
- expect(cached_filesystem.glob('/a/*').count).to eq(1)
154
- expect(cached_filesystem.glob('/a/b').count).to eq(0)
170
+ expect(cached_filesystem.glob('/a').count).to eq(1)
171
+ expect(cached_filesystem.glob('/a')).to include '/a'
172
+ expect(cached_filesystem.glob('/a/*').count).to eq(2)
173
+ expect(cached_filesystem.glob('/a/*')).to include '/a/b'
174
+ expect(cached_filesystem.glob('/a/*')).to include '/a/b/c'
175
+ expect(cached_filesystem.glob('/a/b').count).to eq(1)
176
+ expect(cached_filesystem.glob('/a/b')).to include '/a/b'
155
177
  expect(cached_filesystem.glob('/a/b/*').count).to eq(1)
178
+ expect(cached_filesystem.glob('/a/b/*')).to include '/a/b/c'
156
179
  expect(cached_filesystem.glob('/a/b/c').count).to eq(1)
180
+ expect(cached_filesystem.glob('/a/b/c')).to include '/a/b/c'
157
181
  expect(cached_filesystem.glob('/a/b/c/*').count).to eq(0)
158
182
  expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(0)
159
183
  expect(cached_filesystem.stat('/a/b/c/1.txt')).to be_nil
@@ -164,4 +188,118 @@ describe Masamune::CachedFilesystem do
164
188
  expect(cached_filesystem.stat('/a')).to_not be_nil
165
189
  end
166
190
  end
191
+
192
+ describe '#glob_stat' do
193
+ before do
194
+ filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
195
+ end
196
+
197
+ context 'without options' do
198
+ before do
199
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
200
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
201
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
202
+ expect(filesystem).to receive(:glob_stat).with('/*').never
203
+ end
204
+ it do
205
+ expect(cached_filesystem.glob_stat('/a/b/c/1.txt').count).to eq(1)
206
+ end
207
+ end
208
+
209
+ context 'with max_depth=1' do
210
+ before do
211
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
212
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
213
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
214
+ expect(filesystem).to receive(:glob_stat).with('/*').never
215
+ end
216
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 1).count).to eq(1) }
217
+ end
218
+
219
+ context 'with max_depth=2' do
220
+ before do
221
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
222
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
223
+ expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
224
+ expect(filesystem).to receive(:glob_stat).with('/*').never
225
+ end
226
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 2).count).to eq(1) }
227
+ end
228
+
229
+ context 'with max_depth=3' do
230
+ before do
231
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
232
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
233
+ expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
234
+ expect(filesystem).to receive(:glob_stat).with('/*').never
235
+ end
236
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 3).count).to eq(1) }
237
+ end
238
+ end
239
+
240
+ describe Masamune::CachedFilesystem::PathCache do
241
+ let(:instance) { described_class.new(filesystem) }
242
+
243
+ before do
244
+ instance.put('/a/b/c/1.txt', OpenStruct.new(name: '/a/b/c/1.txt'))
245
+ instance.put('/a/b/c/2.txt', OpenStruct.new(name: '/a/b/c/2.txt'))
246
+ instance.put('/a/b/c/3.txt', OpenStruct.new(name: '/a/b/c/3.txt'))
247
+ end
248
+
249
+ subject(:result) do
250
+ instance.get(path)
251
+ end
252
+
253
+ context 'with nil' do
254
+ let(:path) { nil }
255
+
256
+ it { is_expected.to be_nil }
257
+ end
258
+
259
+ context 'with existing file path' do
260
+ let(:path) { '/a/b/c/1.txt' }
261
+
262
+ it 'returns existing file' do
263
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
264
+ expect(result.count).to eq(1)
265
+ end
266
+ end
267
+
268
+ context 'with existing directory path' do
269
+ let(:path) { '/a/b/c' }
270
+
271
+ it 'returns matching files' do
272
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
273
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
274
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
275
+ expect(result).to include(OpenStruct.new(name: '/a/b/c'))
276
+ expect(result.count).to eq(4)
277
+ end
278
+ end
279
+
280
+ context 'with existing directory path (nested)' do
281
+ let(:path) { '/a/b' }
282
+
283
+ it 'returns matching files' do
284
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
285
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
286
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
287
+ expect(result).to include(OpenStruct.new(name: '/a/b/c'))
288
+ expect(result).to include(OpenStruct.new(name: '/a/b'))
289
+ expect(result.count).to eq(5)
290
+ end
291
+ end
292
+
293
+ context 'with missing file path' do
294
+ let(:path) { '/a/b/c/4.txt' }
295
+
296
+ it { is_expected.to be_empty }
297
+ end
298
+
299
+ context 'with missing directory path' do
300
+ let(:path) { '/a/b/d' }
301
+
302
+ it { is_expected.to be_empty }
303
+ end
304
+ end
167
305
  end
@@ -212,7 +212,7 @@ shared_examples_for 'Filesystem' do
212
212
 
213
213
  context 'with local blank' do
214
214
  let(:path) { '' }
215
- it { is_expected.to be_blank }
215
+ it { is_expected.to eq('.') }
216
216
  end
217
217
 
218
218
  context 'with local path with slash' do
@@ -220,6 +220,11 @@ shared_examples_for 'Filesystem' do
220
220
  it { is_expected.to eq('/a/b') }
221
221
  end
222
222
 
223
+ context 'with local file without slash' do
224
+ let(:path) { 'a' }
225
+ it { is_expected.to eq('.') }
226
+ end
227
+
223
228
  context 'with local path without slash' do
224
229
  let(:path) { 'a/b/c' }
225
230
  it { is_expected.to eq('a/b') }
@@ -227,7 +232,12 @@ shared_examples_for 'Filesystem' do
227
232
 
228
233
  context 'with local relative path' do
229
234
  let(:path) { '/a/b/../c' }
230
- it { is_expected.to eq('/a/c') }
235
+ it { is_expected.to eq('/a') }
236
+ end
237
+
238
+ context 'with local another relative path' do
239
+ let(:path) { '/a/b/.' }
240
+ it { is_expected.to eq('/a') }
231
241
  end
232
242
 
233
243
  context 'with s3 bucket with blank' do
@@ -237,7 +247,7 @@ shared_examples_for 'Filesystem' do
237
247
 
238
248
  context 'with s3 bucket with slash' do
239
249
  let(:path) { 's3://bucket/' }
240
- it { is_expected.to eq('s3://bucket/') }
250
+ it { is_expected.to eq('s3://bucket') }
241
251
  end
242
252
 
243
253
  context 'with s3 bucket with path' do
@@ -247,7 +257,7 @@ shared_examples_for 'Filesystem' do
247
257
 
248
258
  context 'with s3 bucket with relative path' do
249
259
  let(:path) { 's3://bucket/a/b/../c' }
250
- it { is_expected.to eq('s3://bucket/a/c') }
260
+ it { is_expected.to eq('s3://bucket/a') }
251
261
  end
252
262
 
253
263
  context 'with hdfs directory with path' do
@@ -262,7 +272,7 @@ shared_examples_for 'Filesystem' do
262
272
 
263
273
  context 'with hdfs directory with relative path' do
264
274
  let(:path) { 'hdfs:///a/b/../c' }
265
- it { is_expected.to eq('hdfs:///a/c') }
275
+ it { is_expected.to eq('hdfs:///a') }
266
276
  end
267
277
  end
268
278
 
@@ -362,7 +372,7 @@ shared_examples_for 'Filesystem' do
362
372
 
363
373
  context 'hdfs missing file' do
364
374
  before do
365
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
375
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
366
376
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}")
367
377
  expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + new_file, safe: true).at_most(:once).and_return(mock_failure)
368
378
  end
@@ -385,7 +395,7 @@ shared_examples_for 'Filesystem' do
385
395
 
386
396
  context 'hdfs existing file' do
387
397
  before do
388
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
398
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
389
399
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
390
400
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
391
401
  expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + old_file, safe: true).at_most(:once).and_return(mock_success)
@@ -434,7 +444,7 @@ shared_examples_for 'Filesystem' do
434
444
 
435
445
  context 'hdfs missing file' do
436
446
  before do
437
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
447
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
438
448
  and_yield('')
439
449
  expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
440
450
  and_yield('')
@@ -489,7 +499,7 @@ shared_examples_for 'Filesystem' do
489
499
 
490
500
  context 'hdfs existing file' do
491
501
  before do
492
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
502
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
493
503
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
494
504
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
495
505
  expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_file, safe: true).at_most(:once).
@@ -549,7 +559,7 @@ shared_examples_for 'Filesystem' do
549
559
 
550
560
  describe '#name' do
551
561
  subject { stat.name }
552
- it { is_expected.to eq('s3://bucket/dir/file.txt') }
562
+ it { is_expected.to eq('s3://bucket/dir') }
553
563
  end
554
564
 
555
565
  describe '#mtime' do
@@ -0,0 +1,129 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ describe Masamune::MockFilesystem do
24
+ let(:instance) { described_class.new }
25
+
26
+ describe '#glob' do
27
+ before do
28
+ instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
29
+ end
30
+
31
+ subject(:result) { instance.glob(input) }
32
+
33
+ context 'with glob for existing file' do
34
+ let(:input) { '/a/b/c/1.txt' }
35
+
36
+ it 'contains single matching file' do
37
+ expect(result).to include('/a/b/c/1.txt')
38
+ expect(result.count).to eq(1)
39
+ end
40
+ end
41
+
42
+ context 'with glob for existing files' do
43
+ let(:input) { '/a/b/c/*' }
44
+
45
+ it 'contains all matching files' do
46
+ expect(result).to include('/a/b/c/1.txt')
47
+ expect(result).to include('/a/b/c/2.txt')
48
+ expect(result).to include('/a/b/c/3.txt')
49
+ expect(result.count).to eq(3)
50
+ end
51
+ end
52
+
53
+ context 'with glob for existing files (recursive)' do
54
+ let(:input) { '/a/b/*' }
55
+
56
+ it 'contains all matching files and directory' do
57
+ expect(result).to include('/a/b/c')
58
+ expect(result).to include('/a/b/c/1.txt')
59
+ expect(result).to include('/a/b/c/2.txt')
60
+ expect(result).to include('/a/b/c/3.txt')
61
+ expect(result.count).to eq(4)
62
+ end
63
+ end
64
+
65
+ context 'with glob for missing file' do
66
+ let(:input) { '/a/b/c/4.txt' }
67
+
68
+ it { expect(result).to be_empty }
69
+ end
70
+
71
+ context 'with glob for missing directory' do
72
+ let(:input) { '/a/b/d/*' }
73
+
74
+ it { expect(result).to be_empty }
75
+ end
76
+ end
77
+
78
+ describe '#glob_stat' do
79
+ before do
80
+ instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
81
+ end
82
+
83
+ subject(:result) { instance.glob_stat(input).map(&:name) }
84
+
85
+ context 'with glob for existing file' do
86
+ let(:input) { '/a/b/c/1.txt' }
87
+
88
+ it 'contains single matching file' do
89
+ expect(result).to include('/a/b/c/1.txt')
90
+ expect(result.count).to eq(1)
91
+ end
92
+ end
93
+
94
+ context 'with glob for existing files' do
95
+ let(:input) { '/a/b/c/*' }
96
+
97
+ it 'contains all matching files' do
98
+ expect(result).to include('/a/b/c/1.txt')
99
+ expect(result).to include('/a/b/c/2.txt')
100
+ expect(result).to include('/a/b/c/3.txt')
101
+ expect(result.count).to eq(3)
102
+ end
103
+ end
104
+
105
+ context 'with glob for existing files (recursive)' do
106
+ let(:input) { '/a/b/*' }
107
+
108
+ it 'contains all matching files and directory' do
109
+ expect(result).to include('/a/b/c')
110
+ expect(result).to include('/a/b/c/1.txt')
111
+ expect(result).to include('/a/b/c/2.txt')
112
+ expect(result).to include('/a/b/c/3.txt')
113
+ expect(result.count).to eq(4)
114
+ end
115
+ end
116
+
117
+ context 'with glob for missing file' do
118
+ let(:input) { '/a/b/c/4.txt' }
119
+
120
+ it { expect(result).to be_empty }
121
+ end
122
+
123
+ context 'with glob for missing directory' do
124
+ let(:input) { '/a/b/d/*' }
125
+
126
+ it { expect(result).to be_empty }
127
+ end
128
+ end
129
+ end
@@ -32,19 +32,24 @@ class Masamune::MockFilesystem < Delegator
32
32
  def touch!(*args)
33
33
  opts = args.last.is_a?(Hash) ? args.pop : {}
34
34
  args.each do |file|
35
+ parent_paths(file).each do |parent|
36
+ @files[parent] = OpenStruct.new(opts.merge(name: parent))
37
+ end
35
38
  @files[file] = OpenStruct.new(opts.merge(name: file))
36
39
  end
37
40
  end
38
41
 
39
42
  def exists?(file)
40
- @files.keys.include?(file)
43
+ @files.keys.any? { |path| file == path || path.start_with?(File.join(file, '/')) }
41
44
  end
42
45
 
43
- def glob(pattern)
44
- return Set.new(to_enum(:glob, pattern)) unless block_given?
46
+ def glob(pattern, options = {})
47
+ return Set.new(to_enum(:glob, pattern, options)) unless block_given?
45
48
  file_regexp = glob_to_regexp(pattern)
46
49
  @files.keys.each do |name|
47
- yield name if name =~ file_regexp
50
+ next if name == dirname(pattern)
51
+ next unless name =~ file_regexp
52
+ yield name
48
53
  end
49
54
  end
50
55
 
@@ -55,11 +60,13 @@ class Masamune::MockFilesystem < Delegator
55
60
  end
56
61
  end
57
62
 
58
- def glob_stat(pattern)
59
- return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
63
+ def glob_stat(pattern, options = {})
64
+ return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
60
65
  file_regexp = glob_to_regexp(pattern, recursive: true)
61
66
  @files.each do |name, stat|
62
- yield stat if name =~ file_regexp
67
+ next if stat.name == dirname(pattern)
68
+ next unless stat.name =~ file_regexp
69
+ yield stat
63
70
  end
64
71
  end
65
72
 
@@ -118,7 +118,6 @@ module Masamune::SharedExampleGroup
118
118
  expect_data = expect_data.split("\n").sort.join("\n")
119
119
  end
120
120
  yield [actual_data, output_file, expect_data]
121
- FileUtils.rm(output_file) if File.exists?(output_file)
122
121
  end
123
122
  end
124
123
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masamune
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.5
4
+ version: 0.15.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Andrews
@@ -365,6 +365,7 @@ files:
365
365
  - spec/masamune/filesystem_spec.rb
366
366
  - spec/masamune/helpers/postgres_spec.rb
367
367
  - spec/masamune/rspec/job_fixture_spec.rb
368
+ - spec/masamune/rspec/mock_filesystem_spec.rb
368
369
  - spec/masamune/rspec/shared_example_group_spec.rb
369
370
  - spec/masamune/schema/catalog_spec.rb
370
371
  - spec/masamune/schema/column_spec.rb
@@ -476,6 +477,7 @@ test_files:
476
477
  - spec/masamune/filesystem_spec.rb
477
478
  - spec/masamune/helpers/postgres_spec.rb
478
479
  - spec/masamune/rspec/job_fixture_spec.rb
480
+ - spec/masamune/rspec/mock_filesystem_spec.rb
479
481
  - spec/masamune/rspec/shared_example_group_spec.rb
480
482
  - spec/masamune/schema/catalog_spec.rb
481
483
  - spec/masamune/schema/column_spec.rb