masamune 0.15.5 → 0.15.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e5057258a406f7f889d685a6060ca42ecd5e39ab
4
- data.tar.gz: c4fa4ad80317574acaeebbe8017fc4d4ed3ff20b
3
+ metadata.gz: 5b70cf7812702e987cc6c1f72d7f50cbb3ddde9b
4
+ data.tar.gz: 2d487a59b8da26e186f2eefab0f7067b5e9f950f
5
5
  SHA512:
6
- metadata.gz: fcac62a5623326ed3a090de28f7355b9e6bc77a36b9959483bf53214c579ab2bffcb3cdfda3c1710b44b7fbfcb026720e7bdbc34877201bfaa3c1f06f20f5b44
7
- data.tar.gz: c69d42b73d6aeb0bb7c6d0cdfcdae0330e31fc8e29192dc36143c3606f085d162a7c37fbcc2b6d631fc3d2d6dab7cf565ee9a1655afc2692c9c707fd35312291
6
+ metadata.gz: 91777fd4da7e0928d678ce4574f91305da2673e8bbbbeed65ce892ac612808552afa68689906f70a54c8f61d4f592b821ecb4ae9e67588bb988a6dc38e2a07b7
7
+ data.tar.gz: 76a2f706860eae74756e2c4b7a04e596913f3c0189e591d5c5f250681fcada14177be815f4033bede60ba1fa75a70a073d7a3bb91bac6692ab21c1ee04d8df3b
@@ -22,6 +22,9 @@
22
22
 
23
23
  module Masamune
24
24
  class CachedFilesystem < SimpleDelegator
25
+ MAX_DEPTH = 10
26
+ EMPTY_SET = Set.new
27
+
25
28
  def initialize(filesystem)
26
29
  super filesystem
27
30
  @filesystem = filesystem
@@ -29,34 +32,55 @@ module Masamune
29
32
  end
30
33
 
31
34
  def clear!
32
- @cache = Hash.new { |h,k| h[k] = Set.new }
35
+ @cache = PathCache.new(@filesystem)
33
36
  end
34
37
 
35
38
  def exists?(file)
36
- @cache[file].any? || glob(file).include?(file) || @cache[file].any?
39
+ glob(file, max_depth: 0).include?(file) || @cache.any?(file)
37
40
  end
38
41
 
39
- def glob(file_or_glob)
40
- return Set.new(to_enum(:glob, file_or_glob)) unless block_given?
41
- glob_stat(file_or_glob) do |entry|
42
- yield entry.name unless entry.name == dirname(file_or_glob)
42
+ def glob(file_or_glob, options = {})
43
+ return Set.new(to_enum(:glob, file_or_glob, options)) unless block_given?
44
+ glob_stat(file_or_glob, options) do |entry|
45
+ yield entry.name
43
46
  end
44
47
  end
45
48
 
46
49
  def stat(file_or_dir)
47
50
  raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
48
- result = Set.new
49
- glob_stat(file_or_dir) do |entry|
50
- result << entry
51
- end
52
- result += @cache[file_or_dir]
51
+ result = glob_stat(file_or_dir, recursive: true)
53
52
  return unless result.any?
54
- return result.first if result.size == 1
55
53
  max_time = result.map { |stat| stat.try(:mtime) }.compact.max
56
54
  sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
57
55
  OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
58
56
  end
59
57
 
58
+ def glob_stat(file_or_glob, options = {}, &block)
59
+ return Set.new(to_enum(:glob_stat, file_or_glob, options)) unless block_given?
60
+ return if file_or_glob.blank?
61
+ return if root_path?(file_or_glob)
62
+ depth = options.fetch(:depth, 0)
63
+ max_depth = options.fetch(:max_depth, 0)
64
+ return if depth > MAX_DEPTH || depth > max_depth
65
+
66
+ glob_stat(dirname(file_or_glob), depth: depth + 1, max_depth: max_depth, &block)
67
+
68
+ dirname = dirname(file_or_glob)
69
+ unless @cache.any?(dirname)
70
+ pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
71
+ @filesystem.glob_stat(pattern) do |entry|
72
+ @cache.put(entry.name, entry)
73
+ end
74
+ end
75
+
76
+ file_regexp = glob_to_regexp(file_or_glob, options)
77
+ @cache.get(dirname).each do |entry|
78
+ next if entry.name == dirname
79
+ next unless entry.name =~ file_regexp
80
+ yield entry
81
+ end if depth == 0
82
+ end
83
+
60
84
  # FIXME cache eviction policy can be more precise
61
85
  [:touch!, :mkdir!, :copy_file_to_file, :copy_file_to_dir, :copy_dir, :remove_file, :remove_dir, :move_file_to_file, :move_file_to_dir, :move_dir, :write].each do |method|
62
86
  define_method(method) do |*args|
@@ -67,42 +91,59 @@ module Masamune
67
91
 
68
92
  private
69
93
 
70
- MAX_DEPTH = 10
71
- CACHE_DEPTH = 1
72
- EMPTY_SET = Set.new
94
+ class PathCache
95
+ def initialize(filesystem)
96
+ @filesystem = filesystem
97
+ @cache = {}
98
+ end
73
99
 
74
- def glob_stat(file_or_glob, options = {}, &block)
75
- return if file_or_glob.blank?
76
- return if root_path?(file_or_glob)
77
- depth = options.fetch(:depth, 0)
78
- return if depth > MAX_DEPTH || depth > CACHE_DEPTH
100
+ def put(path, entry)
101
+ return unless path
102
+ return if @filesystem.root_path?(path)
103
+ put(File.join(@filesystem.dirname(path), '.'), OpenStruct.new(name: @filesystem.dirname(path)))
104
+ paths = path_split(path)
105
+ elems = paths.reverse.inject(entry) { |a, n| { n => a } }
106
+ @cache.deep_merge!(elems)
107
+ end
79
108
 
80
- glob_stat(dirname(file_or_glob), depth: depth + 1, &block)
109
+ def get(path)
110
+ return unless path
111
+ paths = path_split(path)
112
+ elem = paths.inject(@cache) { |level, path| level.is_a?(Hash) ? level.fetch(path, {}) : level }
113
+ emit(elem)
114
+ rescue KeyError
115
+ EMPTY_SET
116
+ end
81
117
 
82
- dirname = dirname(file_or_glob)
83
- unless @cache.key?(dirname)
84
- pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
85
- @filesystem.glob_stat(pattern) do |entry|
86
- recursive_paths(dirname, entry.name) do |path|
87
- @cache[path] << entry
88
- end
118
+ def any?(path)
119
+ if elem = get(path)
120
+ elem.any? { |entry| entry.name.start_with?(path) }
121
+ else
122
+ false
89
123
  end
90
124
  end
91
- @cache[dirname] ||= EMPTY_SET
92
125
 
93
- file_regexp = glob_to_regexp(file_or_glob)
94
- @cache[dirname].each do |entry|
95
- yield entry if entry.name =~ file_regexp
96
- end if depth == 0
97
- end
126
+ private
98
127
 
99
- def recursive_paths(root, path, options = {}, &block)
100
- depth = options.fetch(:depth, 0)
101
- return if depth > MAX_DEPTH
102
- return if root == path
103
- yield path
104
- yield dirname(path)
105
- recursive_paths(root, dirname(path), depth: depth + 1, &block)
128
+ def emit(elem)
129
+ return Set.new(to_enum(:emit, elem)).flatten unless block_given?
130
+ case elem
131
+ when Array, Set
132
+ elem.each do |e|
133
+ yield emit(e)
134
+ end
135
+ when Hash
136
+ elem.values.each do |e|
137
+ yield emit(e)
138
+ end
139
+ else
140
+ yield elem
141
+ end
142
+ end
143
+
144
+ def path_split(path)
145
+ path.split('/').reject { |x| x.blank? }
146
+ end
106
147
  end
107
148
  end
108
149
  end
@@ -70,9 +70,8 @@ class Masamune::DataPlan::Elem
70
70
  return Set.new(to_enum(__method__)) unless block_given?
71
71
  if rule.for_path? && rule.free?
72
72
  file_glob = path
73
- file_glob += '/' unless path.include?('*') || path.include?('.')
74
- file_glob += '*' unless path.include?('*')
75
- rule.engine.filesystem.glob(file_glob) do |new_path|
73
+ file_glob += '/*' unless path.include?('*') || path.include?('.')
74
+ rule.engine.filesystem.glob(file_glob, max_depth: rule.cache_depth) do |new_path|
76
75
  yield rule.bind_input(new_path)
77
76
  end
78
77
  elsif rule.for_path? && rule.bound?
@@ -250,6 +250,17 @@ class Masamune::DataPlan::Rule
250
250
  self.class.new(engine, name, type, options.merge(path: new_pattern))
251
251
  end
252
252
 
253
+ def cache_depth
254
+ case time_step
255
+ when :hour, :hours
256
+ 2
257
+ when :day, :days
258
+ 1
259
+ else
260
+ 0
261
+ end
262
+ end
263
+
253
264
  private
254
265
 
255
266
  def time_step_to_format(step)
@@ -111,9 +111,9 @@ module Masamune
111
111
  parent_paths.each do |part|
112
112
  tmp << part
113
113
  current_path = prefix + File.join(tmp)
114
- break if current_path == path
115
114
  result << current_path
116
115
  end
116
+ result.pop
117
117
  result
118
118
  end
119
119
 
@@ -129,7 +129,7 @@ module Masamune
129
129
  end
130
130
 
131
131
  def dirname(path)
132
- parent_paths(path).last || path
132
+ parent_paths(path).last || remote_prefix(path) || local_prefix(path)
133
133
  end
134
134
 
135
135
  def basename(path)
@@ -174,8 +174,8 @@ module Masamune
174
174
  end
175
175
  end
176
176
 
177
- def glob_stat(pattern)
178
- return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
177
+ def glob_stat(pattern, options = {})
178
+ return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
179
179
  case type(pattern)
180
180
  when :hdfs
181
181
  hadoop_fs('-ls', '-R', pattern, safe: true) do |line|
@@ -206,7 +206,6 @@ module Masamune
206
206
  raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
207
207
  result = glob_stat(file_or_dir)
208
208
  return unless result.any?
209
- return result.first if result.size == 1
210
209
  max_time = result.map { |stat| stat.try(:mtime) }.compact.max
211
210
  sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
212
211
  OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
@@ -227,8 +226,8 @@ module Masamune
227
226
  end
228
227
  end
229
228
 
230
- def glob(pattern)
231
- return Set.new(to_enum(:glob, pattern)) unless block_given?
229
+ def glob(pattern, options = {})
230
+ return Set.new(to_enum(:glob, pattern, options)) unless block_given?
232
231
  case type(pattern)
233
232
  when :hdfs
234
233
  file_glob, file_regexp = glob_split(pattern)
@@ -462,7 +461,12 @@ module Masamune
462
461
  dir[%r{\Ahdfs://}]
463
462
  end
464
463
 
465
- def local_prefix(file)
464
+ def local_prefix(dir)
465
+ dir[%r{\A/}] ||
466
+ '.'
467
+ end
468
+
469
+ def local_file_prefix(file)
466
470
  return file if remote_prefix(file)
467
471
  "file://#{file}"
468
472
  end
@@ -529,7 +533,7 @@ module Masamune
529
533
  when [:hdfs, :hdfs]
530
534
  hadoop_fs('-cp', src, dst)
531
535
  when [:hdfs, :local]
532
- hadoop_fs('-copyToLocal', src, local_prefix(dst))
536
+ hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
533
537
  when [:hdfs, :s3]
534
538
  hadoop_fs('-cp', src, s3n(dst))
535
539
  when [:s3, :s3]
@@ -541,7 +545,7 @@ module Masamune
541
545
  when [:local, :local]
542
546
  FileUtils.cp(src, dst, file_util_args)
543
547
  when [:local, :hdfs]
544
- hadoop_fs('-copyFromLocal', local_prefix(src), dst)
548
+ hadoop_fs('-copyFromLocal', local_file_prefix(src), dst)
545
549
  when [:local, :s3]
546
550
  s3cmd('put', src, s3b(dst, dir: dir))
547
551
  end
@@ -553,7 +557,7 @@ module Masamune
553
557
  hadoop_fs('-mv', src, dst)
554
558
  when [:hdfs, :local]
555
559
  # NOTE: moveToLocal: Option '-moveToLocal' is not implemented yet
556
- hadoop_fs('-copyToLocal', src, local_prefix(dst))
560
+ hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
557
561
  hadoop_fs('-rm', src)
558
562
  when [:hdfs, :s3]
559
563
  copy_file_to_file(src, s3n(dst, dir: dir))
@@ -569,7 +573,7 @@ module Masamune
569
573
  FileUtils.mv(src, dst, file_util_args)
570
574
  FileUtils.chmod(FILE_MODE, dst, file_util_args)
571
575
  when [:local, :hdfs]
572
- hadoop_fs('-moveFromLocal', local_prefix(src), dst)
576
+ hadoop_fs('-moveFromLocal', local_file_prefix(src), dst)
573
577
  when [:local, :s3]
574
578
  s3cmd('put', src, s3b(dst, dir: dir))
575
579
  FileUtils.rm(src, file_util_args)
@@ -21,5 +21,5 @@
21
21
  # THE SOFTWARE.
22
22
 
23
23
  module Masamune
24
- VERSION = '0.15.5'
24
+ VERSION = '0.15.6'
25
25
  end
@@ -27,8 +27,9 @@ describe Masamune::CachedFilesystem do
27
27
  context 'when path is present, top down traversal' do
28
28
  before do
29
29
  filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
30
- expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
31
- expect(filesystem).to receive(:glob_stat).with('/a').once.and_call_original
30
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
31
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
32
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
32
33
  expect(filesystem).to receive(:glob_stat).with('/*').never
33
34
  end
34
35
 
@@ -39,7 +40,13 @@ describe Masamune::CachedFilesystem do
39
40
  expect(cached_filesystem.exists?('/a/b/c/4.txt')).to eq(false)
40
41
  expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
41
42
  expect(cached_filesystem.glob('/a/b/c/*').count).to eq(3)
43
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/1.txt'
44
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/2.txt'
45
+ expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/3.txt'
42
46
  expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(3)
47
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/1.txt'
48
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/2.txt'
49
+ expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/3.txt'
43
50
  expect(cached_filesystem.stat('/a/b/c/1.txt')).to_not be_nil
44
51
  expect(cached_filesystem.stat('/a/b/c/2.txt')).to_not be_nil
45
52
  expect(cached_filesystem.stat('/a/b/c/3.txt')).to_not be_nil
@@ -53,7 +60,8 @@ describe Masamune::CachedFilesystem do
53
60
  context 'when path is present, bottom up traversal' do
54
61
  before do
55
62
  filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
56
- expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
63
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
64
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
57
65
  expect(filesystem).to receive(:glob_stat).with('/*').never
58
66
  end
59
67
 
@@ -78,8 +86,9 @@ describe Masamune::CachedFilesystem do
78
86
  context 'when path is present, checking for similar non existant paths' do
79
87
  before do
80
88
  filesystem.touch!('/y=2013/m=1/d=22/00000')
81
- expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').once.and_call_original
82
- expect(filesystem).to receive(:glob_stat).with('/y=2013/*').once.and_call_original
89
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/d=22/*').once.and_call_original
90
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').never
91
+ expect(filesystem).to receive(:glob_stat).with('/y=2013/*').never
83
92
  expect(filesystem).to receive(:glob_stat).with('/*').never
84
93
  end
85
94
 
@@ -87,8 +96,11 @@ describe Masamune::CachedFilesystem do
87
96
  expect(cached_filesystem.exists?('/y=2013/m=1/d=22/00000')).to eq(true)
88
97
  expect(cached_filesystem.exists?('/y=2013/m=1/d=22')).to eq(true)
89
98
  expect(cached_filesystem.exists?('/y=2013/m=1/d=2')).to eq(false)
90
- expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(1)
99
+ expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(2)
100
+ expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22')
101
+ expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22/00000')
91
102
  expect(cached_filesystem.glob('/y=2013/m=1/d=22/*').count).to eq(1)
103
+ expect(cached_filesystem.glob('/y=2013/m=1/d=22/*')).to include('/y=2013/m=1/d=22/00000')
92
104
  expect(cached_filesystem.stat('/y=2013/m=1/d=22/00000')).not_to be_nil
93
105
  expect(cached_filesystem.stat('/y=2013/m=1/d=22')).not_to be_nil
94
106
  expect(cached_filesystem.stat('/y=2013/m=1')).not_to be_nil
@@ -116,9 +128,15 @@ describe Masamune::CachedFilesystem do
116
128
  expect(cached_filesystem.glob('/logs/*').count).to eq(3)
117
129
  expect(cached_filesystem.glob('/logs/*.txt').count).to eq(3)
118
130
  expect(cached_filesystem.glob('/logs/box1_*.txt').count).to eq(1)
131
+ expect(cached_filesystem.glob('/logs/box1_*.txt')).to include('/logs/box1_123.txt')
119
132
  expect(cached_filesystem.glob('/logs/box2_*.txt').count).to eq(1)
133
+ expect(cached_filesystem.glob('/logs/box2_*.txt')).to include('/logs/box2_123.txt')
120
134
  expect(cached_filesystem.glob('/logs/box3_*.txt').count).to eq(1)
135
+ expect(cached_filesystem.glob('/logs/box3_*.txt')).to include('/logs/box3_123.txt')
121
136
  expect(cached_filesystem.glob('/logs/box*.txt').count).to eq(3)
137
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box2_123.txt')
138
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box1_123.txt')
139
+ expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box3_123.txt')
122
140
  expect(cached_filesystem.glob('/logs/box*.csv').count).to eq(0)
123
141
  expect(cached_filesystem.glob('/logs/box').count).to eq(0)
124
142
  expect(cached_filesystem.glob('/logs/box/*').count).to eq(0)
@@ -138,7 +156,7 @@ describe Masamune::CachedFilesystem do
138
156
  before do
139
157
  filesystem.touch!('/a/b/c')
140
158
  expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
141
- expect(filesystem).to receive(:glob_stat).with('/a').once.and_call_original
159
+ expect(filesystem).to receive(:glob_stat).with('/a').never
142
160
  expect(filesystem).to receive(:glob_stat).with('/*').never
143
161
  end
144
162
 
@@ -149,11 +167,17 @@ describe Masamune::CachedFilesystem do
149
167
  expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
150
168
  expect(cached_filesystem.exists?('/a/b')).to eq(true)
151
169
  expect(cached_filesystem.exists?('/a')).to eq(true)
152
- expect(cached_filesystem.glob('/a').count).to eq(0)
153
- expect(cached_filesystem.glob('/a/*').count).to eq(1)
154
- expect(cached_filesystem.glob('/a/b').count).to eq(0)
170
+ expect(cached_filesystem.glob('/a').count).to eq(1)
171
+ expect(cached_filesystem.glob('/a')).to include '/a'
172
+ expect(cached_filesystem.glob('/a/*').count).to eq(2)
173
+ expect(cached_filesystem.glob('/a/*')).to include '/a/b'
174
+ expect(cached_filesystem.glob('/a/*')).to include '/a/b/c'
175
+ expect(cached_filesystem.glob('/a/b').count).to eq(1)
176
+ expect(cached_filesystem.glob('/a/b')).to include '/a/b'
155
177
  expect(cached_filesystem.glob('/a/b/*').count).to eq(1)
178
+ expect(cached_filesystem.glob('/a/b/*')).to include '/a/b/c'
156
179
  expect(cached_filesystem.glob('/a/b/c').count).to eq(1)
180
+ expect(cached_filesystem.glob('/a/b/c')).to include '/a/b/c'
157
181
  expect(cached_filesystem.glob('/a/b/c/*').count).to eq(0)
158
182
  expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(0)
159
183
  expect(cached_filesystem.stat('/a/b/c/1.txt')).to be_nil
@@ -164,4 +188,118 @@ describe Masamune::CachedFilesystem do
164
188
  expect(cached_filesystem.stat('/a')).to_not be_nil
165
189
  end
166
190
  end
191
+
192
+ describe '#glob_stat' do
193
+ before do
194
+ filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
195
+ end
196
+
197
+ context 'without options' do
198
+ before do
199
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
200
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
201
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
202
+ expect(filesystem).to receive(:glob_stat).with('/*').never
203
+ end
204
+ it do
205
+ expect(cached_filesystem.glob_stat('/a/b/c/1.txt').count).to eq(1)
206
+ end
207
+ end
208
+
209
+ context 'with max_depth=1' do
210
+ before do
211
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
212
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
213
+ expect(filesystem).to receive(:glob_stat).with('/a/*').never
214
+ expect(filesystem).to receive(:glob_stat).with('/*').never
215
+ end
216
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 1).count).to eq(1) }
217
+ end
218
+
219
+ context 'with max_depth=2' do
220
+ before do
221
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
222
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
223
+ expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
224
+ expect(filesystem).to receive(:glob_stat).with('/*').never
225
+ end
226
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 2).count).to eq(1) }
227
+ end
228
+
229
+ context 'with max_depth=3' do
230
+ before do
231
+ expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
232
+ expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
233
+ expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
234
+ expect(filesystem).to receive(:glob_stat).with('/*').never
235
+ end
236
+ it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 3).count).to eq(1) }
237
+ end
238
+ end
239
+
240
+ describe Masamune::CachedFilesystem::PathCache do
241
+ let(:instance) { described_class.new(filesystem) }
242
+
243
+ before do
244
+ instance.put('/a/b/c/1.txt', OpenStruct.new(name: '/a/b/c/1.txt'))
245
+ instance.put('/a/b/c/2.txt', OpenStruct.new(name: '/a/b/c/2.txt'))
246
+ instance.put('/a/b/c/3.txt', OpenStruct.new(name: '/a/b/c/3.txt'))
247
+ end
248
+
249
+ subject(:result) do
250
+ instance.get(path)
251
+ end
252
+
253
+ context 'with nil' do
254
+ let(:path) { nil }
255
+
256
+ it { is_expected.to be_nil }
257
+ end
258
+
259
+ context 'with existing file path' do
260
+ let(:path) { '/a/b/c/1.txt' }
261
+
262
+ it 'returns existing file' do
263
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
264
+ expect(result.count).to eq(1)
265
+ end
266
+ end
267
+
268
+ context 'with existing directory path' do
269
+ let(:path) { '/a/b/c' }
270
+
271
+ it 'returns matching files' do
272
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
273
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
274
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
275
+ expect(result).to include(OpenStruct.new(name: '/a/b/c'))
276
+ expect(result.count).to eq(4)
277
+ end
278
+ end
279
+
280
+ context 'with existing directory path (nested)' do
281
+ let(:path) { '/a/b' }
282
+
283
+ it 'returns matching files' do
284
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
285
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
286
+ expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
287
+ expect(result).to include(OpenStruct.new(name: '/a/b/c'))
288
+ expect(result).to include(OpenStruct.new(name: '/a/b'))
289
+ expect(result.count).to eq(5)
290
+ end
291
+ end
292
+
293
+ context 'with missing file path' do
294
+ let(:path) { '/a/b/c/4.txt' }
295
+
296
+ it { is_expected.to be_empty }
297
+ end
298
+
299
+ context 'with missing directory path' do
300
+ let(:path) { '/a/b/d' }
301
+
302
+ it { is_expected.to be_empty }
303
+ end
304
+ end
167
305
  end
@@ -212,7 +212,7 @@ shared_examples_for 'Filesystem' do
212
212
 
213
213
  context 'with local blank' do
214
214
  let(:path) { '' }
215
- it { is_expected.to be_blank }
215
+ it { is_expected.to eq('.') }
216
216
  end
217
217
 
218
218
  context 'with local path with slash' do
@@ -220,6 +220,11 @@ shared_examples_for 'Filesystem' do
220
220
  it { is_expected.to eq('/a/b') }
221
221
  end
222
222
 
223
+ context 'with local file without slash' do
224
+ let(:path) { 'a' }
225
+ it { is_expected.to eq('.') }
226
+ end
227
+
223
228
  context 'with local path without slash' do
224
229
  let(:path) { 'a/b/c' }
225
230
  it { is_expected.to eq('a/b') }
@@ -227,7 +232,12 @@ shared_examples_for 'Filesystem' do
227
232
 
228
233
  context 'with local relative path' do
229
234
  let(:path) { '/a/b/../c' }
230
- it { is_expected.to eq('/a/c') }
235
+ it { is_expected.to eq('/a') }
236
+ end
237
+
238
+ context 'with local another relative path' do
239
+ let(:path) { '/a/b/.' }
240
+ it { is_expected.to eq('/a') }
231
241
  end
232
242
 
233
243
  context 'with s3 bucket with blank' do
@@ -237,7 +247,7 @@ shared_examples_for 'Filesystem' do
237
247
 
238
248
  context 'with s3 bucket with slash' do
239
249
  let(:path) { 's3://bucket/' }
240
- it { is_expected.to eq('s3://bucket/') }
250
+ it { is_expected.to eq('s3://bucket') }
241
251
  end
242
252
 
243
253
  context 'with s3 bucket with path' do
@@ -247,7 +257,7 @@ shared_examples_for 'Filesystem' do
247
257
 
248
258
  context 'with s3 bucket with relative path' do
249
259
  let(:path) { 's3://bucket/a/b/../c' }
250
- it { is_expected.to eq('s3://bucket/a/c') }
260
+ it { is_expected.to eq('s3://bucket/a') }
251
261
  end
252
262
 
253
263
  context 'with hdfs directory with path' do
@@ -262,7 +272,7 @@ shared_examples_for 'Filesystem' do
262
272
 
263
273
  context 'with hdfs directory with relative path' do
264
274
  let(:path) { 'hdfs:///a/b/../c' }
265
- it { is_expected.to eq('hdfs:///a/c') }
275
+ it { is_expected.to eq('hdfs:///a') }
266
276
  end
267
277
  end
268
278
 
@@ -362,7 +372,7 @@ shared_examples_for 'Filesystem' do
362
372
 
363
373
  context 'hdfs missing file' do
364
374
  before do
365
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
375
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
366
376
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}")
367
377
  expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + new_file, safe: true).at_most(:once).and_return(mock_failure)
368
378
  end
@@ -385,7 +395,7 @@ shared_examples_for 'Filesystem' do
385
395
 
386
396
  context 'hdfs existing file' do
387
397
  before do
388
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
398
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
389
399
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
390
400
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
391
401
  expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + old_file, safe: true).at_most(:once).and_return(mock_success)
@@ -434,7 +444,7 @@ shared_examples_for 'Filesystem' do
434
444
 
435
445
  context 'hdfs missing file' do
436
446
  before do
437
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
447
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
438
448
  and_yield('')
439
449
  expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
440
450
  and_yield('')
@@ -489,7 +499,7 @@ shared_examples_for 'Filesystem' do
489
499
 
490
500
  context 'hdfs existing file' do
491
501
  before do
492
- expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
502
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
493
503
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
494
504
  and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
495
505
  expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_file, safe: true).at_most(:once).
@@ -549,7 +559,7 @@ shared_examples_for 'Filesystem' do
549
559
 
550
560
  describe '#name' do
551
561
  subject { stat.name }
552
- it { is_expected.to eq('s3://bucket/dir/file.txt') }
562
+ it { is_expected.to eq('s3://bucket/dir') }
553
563
  end
554
564
 
555
565
  describe '#mtime' do
@@ -0,0 +1,129 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ describe Masamune::MockFilesystem do
24
+ let(:instance) { described_class.new }
25
+
26
+ describe '#glob' do
27
+ before do
28
+ instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
29
+ end
30
+
31
+ subject(:result) { instance.glob(input) }
32
+
33
+ context 'with glob for existing file' do
34
+ let(:input) { '/a/b/c/1.txt' }
35
+
36
+ it 'contains single matching file' do
37
+ expect(result).to include('/a/b/c/1.txt')
38
+ expect(result.count).to eq(1)
39
+ end
40
+ end
41
+
42
+ context 'with glob for existing files' do
43
+ let(:input) { '/a/b/c/*' }
44
+
45
+ it 'contains all matching files' do
46
+ expect(result).to include('/a/b/c/1.txt')
47
+ expect(result).to include('/a/b/c/2.txt')
48
+ expect(result).to include('/a/b/c/3.txt')
49
+ expect(result.count).to eq(3)
50
+ end
51
+ end
52
+
53
+ context 'with glob for existing files (recursive)' do
54
+ let(:input) { '/a/b/*' }
55
+
56
+ it 'contains all matching files and directory' do
57
+ expect(result).to include('/a/b/c')
58
+ expect(result).to include('/a/b/c/1.txt')
59
+ expect(result).to include('/a/b/c/2.txt')
60
+ expect(result).to include('/a/b/c/3.txt')
61
+ expect(result.count).to eq(4)
62
+ end
63
+ end
64
+
65
+ context 'with glob for missing file' do
66
+ let(:input) { '/a/b/c/4.txt' }
67
+
68
+ it { expect(result).to be_empty }
69
+ end
70
+
71
+ context 'with glob for missing directory' do
72
+ let(:input) { '/a/b/d/*' }
73
+
74
+ it { expect(result).to be_empty }
75
+ end
76
+ end
77
+
78
+ describe '#glob_stat' do
79
+ before do
80
+ instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
81
+ end
82
+
83
+ subject(:result) { instance.glob_stat(input).map(&:name) }
84
+
85
+ context 'with glob for existing file' do
86
+ let(:input) { '/a/b/c/1.txt' }
87
+
88
+ it 'contains single matching file' do
89
+ expect(result).to include('/a/b/c/1.txt')
90
+ expect(result.count).to eq(1)
91
+ end
92
+ end
93
+
94
+ context 'with glob for existing files' do
95
+ let(:input) { '/a/b/c/*' }
96
+
97
+ it 'contains all matching files' do
98
+ expect(result).to include('/a/b/c/1.txt')
99
+ expect(result).to include('/a/b/c/2.txt')
100
+ expect(result).to include('/a/b/c/3.txt')
101
+ expect(result.count).to eq(3)
102
+ end
103
+ end
104
+
105
+ context 'with glob for existing files (recursive)' do
106
+ let(:input) { '/a/b/*' }
107
+
108
+ it 'contains all matching files and directory' do
109
+ expect(result).to include('/a/b/c')
110
+ expect(result).to include('/a/b/c/1.txt')
111
+ expect(result).to include('/a/b/c/2.txt')
112
+ expect(result).to include('/a/b/c/3.txt')
113
+ expect(result.count).to eq(4)
114
+ end
115
+ end
116
+
117
+ context 'with glob for missing file' do
118
+ let(:input) { '/a/b/c/4.txt' }
119
+
120
+ it { expect(result).to be_empty }
121
+ end
122
+
123
+ context 'with glob for missing directory' do
124
+ let(:input) { '/a/b/d/*' }
125
+
126
+ it { expect(result).to be_empty }
127
+ end
128
+ end
129
+ end
@@ -32,19 +32,24 @@ class Masamune::MockFilesystem < Delegator
32
32
  def touch!(*args)
33
33
  opts = args.last.is_a?(Hash) ? args.pop : {}
34
34
  args.each do |file|
35
+ parent_paths(file).each do |parent|
36
+ @files[parent] = OpenStruct.new(opts.merge(name: parent))
37
+ end
35
38
  @files[file] = OpenStruct.new(opts.merge(name: file))
36
39
  end
37
40
  end
38
41
 
39
42
  def exists?(file)
40
- @files.keys.include?(file)
43
+ @files.keys.any? { |path| file == path || path.start_with?(File.join(file, '/')) }
41
44
  end
42
45
 
43
- def glob(pattern)
44
- return Set.new(to_enum(:glob, pattern)) unless block_given?
46
+ def glob(pattern, options = {})
47
+ return Set.new(to_enum(:glob, pattern, options)) unless block_given?
45
48
  file_regexp = glob_to_regexp(pattern)
46
49
  @files.keys.each do |name|
47
- yield name if name =~ file_regexp
50
+ next if name == dirname(pattern)
51
+ next unless name =~ file_regexp
52
+ yield name
48
53
  end
49
54
  end
50
55
 
@@ -55,11 +60,13 @@ class Masamune::MockFilesystem < Delegator
55
60
  end
56
61
  end
57
62
 
58
- def glob_stat(pattern)
59
- return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
63
+ def glob_stat(pattern, options = {})
64
+ return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
60
65
  file_regexp = glob_to_regexp(pattern, recursive: true)
61
66
  @files.each do |name, stat|
62
- yield stat if name =~ file_regexp
67
+ next if stat.name == dirname(pattern)
68
+ next unless stat.name =~ file_regexp
69
+ yield stat
63
70
  end
64
71
  end
65
72
 
@@ -118,7 +118,6 @@ module Masamune::SharedExampleGroup
118
118
  expect_data = expect_data.split("\n").sort.join("\n")
119
119
  end
120
120
  yield [actual_data, output_file, expect_data]
121
- FileUtils.rm(output_file) if File.exists?(output_file)
122
121
  end
123
122
  end
124
123
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masamune
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.5
4
+ version: 0.15.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Andrews
@@ -365,6 +365,7 @@ files:
365
365
  - spec/masamune/filesystem_spec.rb
366
366
  - spec/masamune/helpers/postgres_spec.rb
367
367
  - spec/masamune/rspec/job_fixture_spec.rb
368
+ - spec/masamune/rspec/mock_filesystem_spec.rb
368
369
  - spec/masamune/rspec/shared_example_group_spec.rb
369
370
  - spec/masamune/schema/catalog_spec.rb
370
371
  - spec/masamune/schema/column_spec.rb
@@ -476,6 +477,7 @@ test_files:
476
477
  - spec/masamune/filesystem_spec.rb
477
478
  - spec/masamune/helpers/postgres_spec.rb
478
479
  - spec/masamune/rspec/job_fixture_spec.rb
480
+ - spec/masamune/rspec/mock_filesystem_spec.rb
479
481
  - spec/masamune/rspec/shared_example_group_spec.rb
480
482
  - spec/masamune/schema/catalog_spec.rb
481
483
  - spec/masamune/schema/column_spec.rb