masamune 0.15.5 → 0.15.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/masamune/cached_filesystem.rb +82 -41
- data/lib/masamune/data_plan/elem.rb +2 -3
- data/lib/masamune/data_plan/rule.rb +11 -0
- data/lib/masamune/filesystem.rb +16 -12
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/cached_filesystem_spec.rb +148 -10
- data/spec/masamune/filesystem_spec.rb +20 -10
- data/spec/masamune/rspec/mock_filesystem_spec.rb +129 -0
- data/spec/support/masamune/mock_filesystem.rb +14 -7
- data/spec/support/masamune/shared_example_group.rb +0 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b70cf7812702e987cc6c1f72d7f50cbb3ddde9b
|
4
|
+
data.tar.gz: 2d487a59b8da26e186f2eefab0f7067b5e9f950f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91777fd4da7e0928d678ce4574f91305da2673e8bbbbeed65ce892ac612808552afa68689906f70a54c8f61d4f592b821ecb4ae9e67588bb988a6dc38e2a07b7
|
7
|
+
data.tar.gz: 76a2f706860eae74756e2c4b7a04e596913f3c0189e591d5c5f250681fcada14177be815f4033bede60ba1fa75a70a073d7a3bb91bac6692ab21c1ee04d8df3b
|
@@ -22,6 +22,9 @@
|
|
22
22
|
|
23
23
|
module Masamune
|
24
24
|
class CachedFilesystem < SimpleDelegator
|
25
|
+
MAX_DEPTH = 10
|
26
|
+
EMPTY_SET = Set.new
|
27
|
+
|
25
28
|
def initialize(filesystem)
|
26
29
|
super filesystem
|
27
30
|
@filesystem = filesystem
|
@@ -29,34 +32,55 @@ module Masamune
|
|
29
32
|
end
|
30
33
|
|
31
34
|
def clear!
|
32
|
-
@cache =
|
35
|
+
@cache = PathCache.new(@filesystem)
|
33
36
|
end
|
34
37
|
|
35
38
|
def exists?(file)
|
36
|
-
|
39
|
+
glob(file, max_depth: 0).include?(file) || @cache.any?(file)
|
37
40
|
end
|
38
41
|
|
39
|
-
def glob(file_or_glob)
|
40
|
-
return Set.new(to_enum(:glob, file_or_glob)) unless block_given?
|
41
|
-
glob_stat(file_or_glob) do |entry|
|
42
|
-
yield entry.name
|
42
|
+
def glob(file_or_glob, options = {})
|
43
|
+
return Set.new(to_enum(:glob, file_or_glob, options)) unless block_given?
|
44
|
+
glob_stat(file_or_glob, options) do |entry|
|
45
|
+
yield entry.name
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
46
49
|
def stat(file_or_dir)
|
47
50
|
raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
|
48
|
-
result =
|
49
|
-
glob_stat(file_or_dir) do |entry|
|
50
|
-
result << entry
|
51
|
-
end
|
52
|
-
result += @cache[file_or_dir]
|
51
|
+
result = glob_stat(file_or_dir, recursive: true)
|
53
52
|
return unless result.any?
|
54
|
-
return result.first if result.size == 1
|
55
53
|
max_time = result.map { |stat| stat.try(:mtime) }.compact.max
|
56
54
|
sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
|
57
55
|
OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
|
58
56
|
end
|
59
57
|
|
58
|
+
def glob_stat(file_or_glob, options = {}, &block)
|
59
|
+
return Set.new(to_enum(:glob_stat, file_or_glob, options)) unless block_given?
|
60
|
+
return if file_or_glob.blank?
|
61
|
+
return if root_path?(file_or_glob)
|
62
|
+
depth = options.fetch(:depth, 0)
|
63
|
+
max_depth = options.fetch(:max_depth, 0)
|
64
|
+
return if depth > MAX_DEPTH || depth > max_depth
|
65
|
+
|
66
|
+
glob_stat(dirname(file_or_glob), depth: depth + 1, max_depth: max_depth, &block)
|
67
|
+
|
68
|
+
dirname = dirname(file_or_glob)
|
69
|
+
unless @cache.any?(dirname)
|
70
|
+
pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
|
71
|
+
@filesystem.glob_stat(pattern) do |entry|
|
72
|
+
@cache.put(entry.name, entry)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
file_regexp = glob_to_regexp(file_or_glob, options)
|
77
|
+
@cache.get(dirname).each do |entry|
|
78
|
+
next if entry.name == dirname
|
79
|
+
next unless entry.name =~ file_regexp
|
80
|
+
yield entry
|
81
|
+
end if depth == 0
|
82
|
+
end
|
83
|
+
|
60
84
|
# FIXME cache eviction policy can be more precise
|
61
85
|
[:touch!, :mkdir!, :copy_file_to_file, :copy_file_to_dir, :copy_dir, :remove_file, :remove_dir, :move_file_to_file, :move_file_to_dir, :move_dir, :write].each do |method|
|
62
86
|
define_method(method) do |*args|
|
@@ -67,42 +91,59 @@ module Masamune
|
|
67
91
|
|
68
92
|
private
|
69
93
|
|
70
|
-
|
71
|
-
|
72
|
-
|
94
|
+
class PathCache
|
95
|
+
def initialize(filesystem)
|
96
|
+
@filesystem = filesystem
|
97
|
+
@cache = {}
|
98
|
+
end
|
73
99
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
100
|
+
def put(path, entry)
|
101
|
+
return unless path
|
102
|
+
return if @filesystem.root_path?(path)
|
103
|
+
put(File.join(@filesystem.dirname(path), '.'), OpenStruct.new(name: @filesystem.dirname(path)))
|
104
|
+
paths = path_split(path)
|
105
|
+
elems = paths.reverse.inject(entry) { |a, n| { n => a } }
|
106
|
+
@cache.deep_merge!(elems)
|
107
|
+
end
|
79
108
|
|
80
|
-
|
109
|
+
def get(path)
|
110
|
+
return unless path
|
111
|
+
paths = path_split(path)
|
112
|
+
elem = paths.inject(@cache) { |level, path| level.is_a?(Hash) ? level.fetch(path, {}) : level }
|
113
|
+
emit(elem)
|
114
|
+
rescue KeyError
|
115
|
+
EMPTY_SET
|
116
|
+
end
|
81
117
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
@cache[path] << entry
|
88
|
-
end
|
118
|
+
def any?(path)
|
119
|
+
if elem = get(path)
|
120
|
+
elem.any? { |entry| entry.name.start_with?(path) }
|
121
|
+
else
|
122
|
+
false
|
89
123
|
end
|
90
124
|
end
|
91
|
-
@cache[dirname] ||= EMPTY_SET
|
92
125
|
|
93
|
-
|
94
|
-
@cache[dirname].each do |entry|
|
95
|
-
yield entry if entry.name =~ file_regexp
|
96
|
-
end if depth == 0
|
97
|
-
end
|
126
|
+
private
|
98
127
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
128
|
+
def emit(elem)
|
129
|
+
return Set.new(to_enum(:emit, elem)).flatten unless block_given?
|
130
|
+
case elem
|
131
|
+
when Array, Set
|
132
|
+
elem.each do |e|
|
133
|
+
yield emit(e)
|
134
|
+
end
|
135
|
+
when Hash
|
136
|
+
elem.values.each do |e|
|
137
|
+
yield emit(e)
|
138
|
+
end
|
139
|
+
else
|
140
|
+
yield elem
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def path_split(path)
|
145
|
+
path.split('/').reject { |x| x.blank? }
|
146
|
+
end
|
106
147
|
end
|
107
148
|
end
|
108
149
|
end
|
@@ -70,9 +70,8 @@ class Masamune::DataPlan::Elem
|
|
70
70
|
return Set.new(to_enum(__method__)) unless block_given?
|
71
71
|
if rule.for_path? && rule.free?
|
72
72
|
file_glob = path
|
73
|
-
file_glob += '
|
74
|
-
file_glob
|
75
|
-
rule.engine.filesystem.glob(file_glob) do |new_path|
|
73
|
+
file_glob += '/*' unless path.include?('*') || path.include?('.')
|
74
|
+
rule.engine.filesystem.glob(file_glob, max_depth: rule.cache_depth) do |new_path|
|
76
75
|
yield rule.bind_input(new_path)
|
77
76
|
end
|
78
77
|
elsif rule.for_path? && rule.bound?
|
@@ -250,6 +250,17 @@ class Masamune::DataPlan::Rule
|
|
250
250
|
self.class.new(engine, name, type, options.merge(path: new_pattern))
|
251
251
|
end
|
252
252
|
|
253
|
+
def cache_depth
|
254
|
+
case time_step
|
255
|
+
when :hour, :hours
|
256
|
+
2
|
257
|
+
when :day, :days
|
258
|
+
1
|
259
|
+
else
|
260
|
+
0
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
253
264
|
private
|
254
265
|
|
255
266
|
def time_step_to_format(step)
|
data/lib/masamune/filesystem.rb
CHANGED
@@ -111,9 +111,9 @@ module Masamune
|
|
111
111
|
parent_paths.each do |part|
|
112
112
|
tmp << part
|
113
113
|
current_path = prefix + File.join(tmp)
|
114
|
-
break if current_path == path
|
115
114
|
result << current_path
|
116
115
|
end
|
116
|
+
result.pop
|
117
117
|
result
|
118
118
|
end
|
119
119
|
|
@@ -129,7 +129,7 @@ module Masamune
|
|
129
129
|
end
|
130
130
|
|
131
131
|
def dirname(path)
|
132
|
-
parent_paths(path).last || path
|
132
|
+
parent_paths(path).last || remote_prefix(path) || local_prefix(path)
|
133
133
|
end
|
134
134
|
|
135
135
|
def basename(path)
|
@@ -174,8 +174,8 @@ module Masamune
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
-
def glob_stat(pattern)
|
178
|
-
return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
|
177
|
+
def glob_stat(pattern, options = {})
|
178
|
+
return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
|
179
179
|
case type(pattern)
|
180
180
|
when :hdfs
|
181
181
|
hadoop_fs('-ls', '-R', pattern, safe: true) do |line|
|
@@ -206,7 +206,6 @@ module Masamune
|
|
206
206
|
raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
|
207
207
|
result = glob_stat(file_or_dir)
|
208
208
|
return unless result.any?
|
209
|
-
return result.first if result.size == 1
|
210
209
|
max_time = result.map { |stat| stat.try(:mtime) }.compact.max
|
211
210
|
sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
|
212
211
|
OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
|
@@ -227,8 +226,8 @@ module Masamune
|
|
227
226
|
end
|
228
227
|
end
|
229
228
|
|
230
|
-
def glob(pattern)
|
231
|
-
return Set.new(to_enum(:glob, pattern)) unless block_given?
|
229
|
+
def glob(pattern, options = {})
|
230
|
+
return Set.new(to_enum(:glob, pattern, options)) unless block_given?
|
232
231
|
case type(pattern)
|
233
232
|
when :hdfs
|
234
233
|
file_glob, file_regexp = glob_split(pattern)
|
@@ -462,7 +461,12 @@ module Masamune
|
|
462
461
|
dir[%r{\Ahdfs://}]
|
463
462
|
end
|
464
463
|
|
465
|
-
def local_prefix(
|
464
|
+
def local_prefix(dir)
|
465
|
+
dir[%r{\A/}] ||
|
466
|
+
'.'
|
467
|
+
end
|
468
|
+
|
469
|
+
def local_file_prefix(file)
|
466
470
|
return file if remote_prefix(file)
|
467
471
|
"file://#{file}"
|
468
472
|
end
|
@@ -529,7 +533,7 @@ module Masamune
|
|
529
533
|
when [:hdfs, :hdfs]
|
530
534
|
hadoop_fs('-cp', src, dst)
|
531
535
|
when [:hdfs, :local]
|
532
|
-
hadoop_fs('-copyToLocal', src,
|
536
|
+
hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
|
533
537
|
when [:hdfs, :s3]
|
534
538
|
hadoop_fs('-cp', src, s3n(dst))
|
535
539
|
when [:s3, :s3]
|
@@ -541,7 +545,7 @@ module Masamune
|
|
541
545
|
when [:local, :local]
|
542
546
|
FileUtils.cp(src, dst, file_util_args)
|
543
547
|
when [:local, :hdfs]
|
544
|
-
hadoop_fs('-copyFromLocal',
|
548
|
+
hadoop_fs('-copyFromLocal', local_file_prefix(src), dst)
|
545
549
|
when [:local, :s3]
|
546
550
|
s3cmd('put', src, s3b(dst, dir: dir))
|
547
551
|
end
|
@@ -553,7 +557,7 @@ module Masamune
|
|
553
557
|
hadoop_fs('-mv', src, dst)
|
554
558
|
when [:hdfs, :local]
|
555
559
|
# NOTE: moveToLocal: Option '-moveToLocal' is not implemented yet
|
556
|
-
hadoop_fs('-copyToLocal', src,
|
560
|
+
hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
|
557
561
|
hadoop_fs('-rm', src)
|
558
562
|
when [:hdfs, :s3]
|
559
563
|
copy_file_to_file(src, s3n(dst, dir: dir))
|
@@ -569,7 +573,7 @@ module Masamune
|
|
569
573
|
FileUtils.mv(src, dst, file_util_args)
|
570
574
|
FileUtils.chmod(FILE_MODE, dst, file_util_args)
|
571
575
|
when [:local, :hdfs]
|
572
|
-
hadoop_fs('-moveFromLocal',
|
576
|
+
hadoop_fs('-moveFromLocal', local_file_prefix(src), dst)
|
573
577
|
when [:local, :s3]
|
574
578
|
s3cmd('put', src, s3b(dst, dir: dir))
|
575
579
|
FileUtils.rm(src, file_util_args)
|
data/lib/masamune/version.rb
CHANGED
@@ -27,8 +27,9 @@ describe Masamune::CachedFilesystem do
|
|
27
27
|
context 'when path is present, top down traversal' do
|
28
28
|
before do
|
29
29
|
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
30
|
-
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
31
|
-
expect(filesystem).to receive(:glob_stat).with('/a').
|
30
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
|
31
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
32
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
32
33
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
33
34
|
end
|
34
35
|
|
@@ -39,7 +40,13 @@ describe Masamune::CachedFilesystem do
|
|
39
40
|
expect(cached_filesystem.exists?('/a/b/c/4.txt')).to eq(false)
|
40
41
|
expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
|
41
42
|
expect(cached_filesystem.glob('/a/b/c/*').count).to eq(3)
|
43
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/1.txt'
|
44
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/2.txt'
|
45
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/3.txt'
|
42
46
|
expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(3)
|
47
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/1.txt'
|
48
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/2.txt'
|
49
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/3.txt'
|
43
50
|
expect(cached_filesystem.stat('/a/b/c/1.txt')).to_not be_nil
|
44
51
|
expect(cached_filesystem.stat('/a/b/c/2.txt')).to_not be_nil
|
45
52
|
expect(cached_filesystem.stat('/a/b/c/3.txt')).to_not be_nil
|
@@ -53,7 +60,8 @@ describe Masamune::CachedFilesystem do
|
|
53
60
|
context 'when path is present, bottom up traversal' do
|
54
61
|
before do
|
55
62
|
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
56
|
-
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
63
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
64
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
57
65
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
58
66
|
end
|
59
67
|
|
@@ -78,8 +86,9 @@ describe Masamune::CachedFilesystem do
|
|
78
86
|
context 'when path is present, checking for similar non existant paths' do
|
79
87
|
before do
|
80
88
|
filesystem.touch!('/y=2013/m=1/d=22/00000')
|
81
|
-
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').once.and_call_original
|
82
|
-
expect(filesystem).to receive(:glob_stat).with('/y=2013/*').
|
89
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/d=22/*').once.and_call_original
|
90
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').never
|
91
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/*').never
|
83
92
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
84
93
|
end
|
85
94
|
|
@@ -87,8 +96,11 @@ describe Masamune::CachedFilesystem do
|
|
87
96
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=22/00000')).to eq(true)
|
88
97
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=22')).to eq(true)
|
89
98
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=2')).to eq(false)
|
90
|
-
expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(
|
99
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(2)
|
100
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22')
|
101
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22/00000')
|
91
102
|
expect(cached_filesystem.glob('/y=2013/m=1/d=22/*').count).to eq(1)
|
103
|
+
expect(cached_filesystem.glob('/y=2013/m=1/d=22/*')).to include('/y=2013/m=1/d=22/00000')
|
92
104
|
expect(cached_filesystem.stat('/y=2013/m=1/d=22/00000')).not_to be_nil
|
93
105
|
expect(cached_filesystem.stat('/y=2013/m=1/d=22')).not_to be_nil
|
94
106
|
expect(cached_filesystem.stat('/y=2013/m=1')).not_to be_nil
|
@@ -116,9 +128,15 @@ describe Masamune::CachedFilesystem do
|
|
116
128
|
expect(cached_filesystem.glob('/logs/*').count).to eq(3)
|
117
129
|
expect(cached_filesystem.glob('/logs/*.txt').count).to eq(3)
|
118
130
|
expect(cached_filesystem.glob('/logs/box1_*.txt').count).to eq(1)
|
131
|
+
expect(cached_filesystem.glob('/logs/box1_*.txt')).to include('/logs/box1_123.txt')
|
119
132
|
expect(cached_filesystem.glob('/logs/box2_*.txt').count).to eq(1)
|
133
|
+
expect(cached_filesystem.glob('/logs/box2_*.txt')).to include('/logs/box2_123.txt')
|
120
134
|
expect(cached_filesystem.glob('/logs/box3_*.txt').count).to eq(1)
|
135
|
+
expect(cached_filesystem.glob('/logs/box3_*.txt')).to include('/logs/box3_123.txt')
|
121
136
|
expect(cached_filesystem.glob('/logs/box*.txt').count).to eq(3)
|
137
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box2_123.txt')
|
138
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box1_123.txt')
|
139
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box3_123.txt')
|
122
140
|
expect(cached_filesystem.glob('/logs/box*.csv').count).to eq(0)
|
123
141
|
expect(cached_filesystem.glob('/logs/box').count).to eq(0)
|
124
142
|
expect(cached_filesystem.glob('/logs/box/*').count).to eq(0)
|
@@ -138,7 +156,7 @@ describe Masamune::CachedFilesystem do
|
|
138
156
|
before do
|
139
157
|
filesystem.touch!('/a/b/c')
|
140
158
|
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
141
|
-
expect(filesystem).to receive(:glob_stat).with('/a').
|
159
|
+
expect(filesystem).to receive(:glob_stat).with('/a').never
|
142
160
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
143
161
|
end
|
144
162
|
|
@@ -149,11 +167,17 @@ describe Masamune::CachedFilesystem do
|
|
149
167
|
expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
|
150
168
|
expect(cached_filesystem.exists?('/a/b')).to eq(true)
|
151
169
|
expect(cached_filesystem.exists?('/a')).to eq(true)
|
152
|
-
expect(cached_filesystem.glob('/a').count).to eq(
|
153
|
-
expect(cached_filesystem.glob('/a
|
154
|
-
expect(cached_filesystem.glob('/a
|
170
|
+
expect(cached_filesystem.glob('/a').count).to eq(1)
|
171
|
+
expect(cached_filesystem.glob('/a')).to include '/a'
|
172
|
+
expect(cached_filesystem.glob('/a/*').count).to eq(2)
|
173
|
+
expect(cached_filesystem.glob('/a/*')).to include '/a/b'
|
174
|
+
expect(cached_filesystem.glob('/a/*')).to include '/a/b/c'
|
175
|
+
expect(cached_filesystem.glob('/a/b').count).to eq(1)
|
176
|
+
expect(cached_filesystem.glob('/a/b')).to include '/a/b'
|
155
177
|
expect(cached_filesystem.glob('/a/b/*').count).to eq(1)
|
178
|
+
expect(cached_filesystem.glob('/a/b/*')).to include '/a/b/c'
|
156
179
|
expect(cached_filesystem.glob('/a/b/c').count).to eq(1)
|
180
|
+
expect(cached_filesystem.glob('/a/b/c')).to include '/a/b/c'
|
157
181
|
expect(cached_filesystem.glob('/a/b/c/*').count).to eq(0)
|
158
182
|
expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(0)
|
159
183
|
expect(cached_filesystem.stat('/a/b/c/1.txt')).to be_nil
|
@@ -164,4 +188,118 @@ describe Masamune::CachedFilesystem do
|
|
164
188
|
expect(cached_filesystem.stat('/a')).to_not be_nil
|
165
189
|
end
|
166
190
|
end
|
191
|
+
|
192
|
+
describe '#glob_stat' do
|
193
|
+
before do
|
194
|
+
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
195
|
+
end
|
196
|
+
|
197
|
+
context 'without options' do
|
198
|
+
before do
|
199
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
|
200
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
201
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
202
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
203
|
+
end
|
204
|
+
it do
|
205
|
+
expect(cached_filesystem.glob_stat('/a/b/c/1.txt').count).to eq(1)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
context 'with max_depth=1' do
|
210
|
+
before do
|
211
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
212
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
213
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
214
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
215
|
+
end
|
216
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 1).count).to eq(1) }
|
217
|
+
end
|
218
|
+
|
219
|
+
context 'with max_depth=2' do
|
220
|
+
before do
|
221
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
222
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
223
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
224
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
225
|
+
end
|
226
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 2).count).to eq(1) }
|
227
|
+
end
|
228
|
+
|
229
|
+
context 'with max_depth=3' do
|
230
|
+
before do
|
231
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
232
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
233
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
234
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
235
|
+
end
|
236
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 3).count).to eq(1) }
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe Masamune::CachedFilesystem::PathCache do
|
241
|
+
let(:instance) { described_class.new(filesystem) }
|
242
|
+
|
243
|
+
before do
|
244
|
+
instance.put('/a/b/c/1.txt', OpenStruct.new(name: '/a/b/c/1.txt'))
|
245
|
+
instance.put('/a/b/c/2.txt', OpenStruct.new(name: '/a/b/c/2.txt'))
|
246
|
+
instance.put('/a/b/c/3.txt', OpenStruct.new(name: '/a/b/c/3.txt'))
|
247
|
+
end
|
248
|
+
|
249
|
+
subject(:result) do
|
250
|
+
instance.get(path)
|
251
|
+
end
|
252
|
+
|
253
|
+
context 'with nil' do
|
254
|
+
let(:path) { nil }
|
255
|
+
|
256
|
+
it { is_expected.to be_nil }
|
257
|
+
end
|
258
|
+
|
259
|
+
context 'with existing file path' do
|
260
|
+
let(:path) { '/a/b/c/1.txt' }
|
261
|
+
|
262
|
+
it 'returns existing file' do
|
263
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
264
|
+
expect(result.count).to eq(1)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
context 'with existing directory path' do
|
269
|
+
let(:path) { '/a/b/c' }
|
270
|
+
|
271
|
+
it 'returns matching files' do
|
272
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
273
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
|
274
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
|
275
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c'))
|
276
|
+
expect(result.count).to eq(4)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
context 'with existing directory path (nested)' do
|
281
|
+
let(:path) { '/a/b' }
|
282
|
+
|
283
|
+
it 'returns matching files' do
|
284
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
285
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
|
286
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
|
287
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c'))
|
288
|
+
expect(result).to include(OpenStruct.new(name: '/a/b'))
|
289
|
+
expect(result.count).to eq(5)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
context 'with missing file path' do
|
294
|
+
let(:path) { '/a/b/c/4.txt' }
|
295
|
+
|
296
|
+
it { is_expected.to be_empty }
|
297
|
+
end
|
298
|
+
|
299
|
+
context 'with missing directory path' do
|
300
|
+
let(:path) { '/a/b/d' }
|
301
|
+
|
302
|
+
it { is_expected.to be_empty }
|
303
|
+
end
|
304
|
+
end
|
167
305
|
end
|
@@ -212,7 +212,7 @@ shared_examples_for 'Filesystem' do
|
|
212
212
|
|
213
213
|
context 'with local blank' do
|
214
214
|
let(:path) { '' }
|
215
|
-
it { is_expected.to
|
215
|
+
it { is_expected.to eq('.') }
|
216
216
|
end
|
217
217
|
|
218
218
|
context 'with local path with slash' do
|
@@ -220,6 +220,11 @@ shared_examples_for 'Filesystem' do
|
|
220
220
|
it { is_expected.to eq('/a/b') }
|
221
221
|
end
|
222
222
|
|
223
|
+
context 'with local file without slash' do
|
224
|
+
let(:path) { 'a' }
|
225
|
+
it { is_expected.to eq('.') }
|
226
|
+
end
|
227
|
+
|
223
228
|
context 'with local path without slash' do
|
224
229
|
let(:path) { 'a/b/c' }
|
225
230
|
it { is_expected.to eq('a/b') }
|
@@ -227,7 +232,12 @@ shared_examples_for 'Filesystem' do
|
|
227
232
|
|
228
233
|
context 'with local relative path' do
|
229
234
|
let(:path) { '/a/b/../c' }
|
230
|
-
it { is_expected.to eq('/a
|
235
|
+
it { is_expected.to eq('/a') }
|
236
|
+
end
|
237
|
+
|
238
|
+
context 'with local another relative path' do
|
239
|
+
let(:path) { '/a/b/.' }
|
240
|
+
it { is_expected.to eq('/a') }
|
231
241
|
end
|
232
242
|
|
233
243
|
context 'with s3 bucket with blank' do
|
@@ -237,7 +247,7 @@ shared_examples_for 'Filesystem' do
|
|
237
247
|
|
238
248
|
context 'with s3 bucket with slash' do
|
239
249
|
let(:path) { 's3://bucket/' }
|
240
|
-
it { is_expected.to eq('s3://bucket
|
250
|
+
it { is_expected.to eq('s3://bucket') }
|
241
251
|
end
|
242
252
|
|
243
253
|
context 'with s3 bucket with path' do
|
@@ -247,7 +257,7 @@ shared_examples_for 'Filesystem' do
|
|
247
257
|
|
248
258
|
context 'with s3 bucket with relative path' do
|
249
259
|
let(:path) { 's3://bucket/a/b/../c' }
|
250
|
-
it { is_expected.to eq('s3://bucket/a
|
260
|
+
it { is_expected.to eq('s3://bucket/a') }
|
251
261
|
end
|
252
262
|
|
253
263
|
context 'with hdfs directory with path' do
|
@@ -262,7 +272,7 @@ shared_examples_for 'Filesystem' do
|
|
262
272
|
|
263
273
|
context 'with hdfs directory with relative path' do
|
264
274
|
let(:path) { 'hdfs:///a/b/../c' }
|
265
|
-
it { is_expected.to eq('hdfs:///a
|
275
|
+
it { is_expected.to eq('hdfs:///a') }
|
266
276
|
end
|
267
277
|
end
|
268
278
|
|
@@ -362,7 +372,7 @@ shared_examples_for 'Filesystem' do
|
|
362
372
|
|
363
373
|
context 'hdfs missing file' do
|
364
374
|
before do
|
365
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
375
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
366
376
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}")
|
367
377
|
expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + new_file, safe: true).at_most(:once).and_return(mock_failure)
|
368
378
|
end
|
@@ -385,7 +395,7 @@ shared_examples_for 'Filesystem' do
|
|
385
395
|
|
386
396
|
context 'hdfs existing file' do
|
387
397
|
before do
|
388
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
398
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
389
399
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
|
390
400
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
|
391
401
|
expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + old_file, safe: true).at_most(:once).and_return(mock_success)
|
@@ -434,7 +444,7 @@ shared_examples_for 'Filesystem' do
|
|
434
444
|
|
435
445
|
context 'hdfs missing file' do
|
436
446
|
before do
|
437
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
447
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
438
448
|
and_yield('')
|
439
449
|
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
|
440
450
|
and_yield('')
|
@@ -489,7 +499,7 @@ shared_examples_for 'Filesystem' do
|
|
489
499
|
|
490
500
|
context 'hdfs existing file' do
|
491
501
|
before do
|
492
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
502
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
493
503
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
|
494
504
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
|
495
505
|
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_file, safe: true).at_most(:once).
|
@@ -549,7 +559,7 @@ shared_examples_for 'Filesystem' do
|
|
549
559
|
|
550
560
|
describe '#name' do
|
551
561
|
subject { stat.name }
|
552
|
-
it { is_expected.to eq('s3://bucket/dir
|
562
|
+
it { is_expected.to eq('s3://bucket/dir') }
|
553
563
|
end
|
554
564
|
|
555
565
|
describe '#mtime' do
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
describe Masamune::MockFilesystem do
|
24
|
+
let(:instance) { described_class.new }
|
25
|
+
|
26
|
+
describe '#glob' do
|
27
|
+
before do
|
28
|
+
instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
29
|
+
end
|
30
|
+
|
31
|
+
subject(:result) { instance.glob(input) }
|
32
|
+
|
33
|
+
context 'with glob for existing file' do
|
34
|
+
let(:input) { '/a/b/c/1.txt' }
|
35
|
+
|
36
|
+
it 'contains single matching file' do
|
37
|
+
expect(result).to include('/a/b/c/1.txt')
|
38
|
+
expect(result.count).to eq(1)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'with glob for existing files' do
|
43
|
+
let(:input) { '/a/b/c/*' }
|
44
|
+
|
45
|
+
it 'contains all matching files' do
|
46
|
+
expect(result).to include('/a/b/c/1.txt')
|
47
|
+
expect(result).to include('/a/b/c/2.txt')
|
48
|
+
expect(result).to include('/a/b/c/3.txt')
|
49
|
+
expect(result.count).to eq(3)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'with glob for existing files (recursive)' do
|
54
|
+
let(:input) { '/a/b/*' }
|
55
|
+
|
56
|
+
it 'contains all matching files and directory' do
|
57
|
+
expect(result).to include('/a/b/c')
|
58
|
+
expect(result).to include('/a/b/c/1.txt')
|
59
|
+
expect(result).to include('/a/b/c/2.txt')
|
60
|
+
expect(result).to include('/a/b/c/3.txt')
|
61
|
+
expect(result.count).to eq(4)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'with glob for missing file' do
|
66
|
+
let(:input) { '/a/b/c/4.txt' }
|
67
|
+
|
68
|
+
it { expect(result).to be_empty }
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'with glob for missing directory' do
|
72
|
+
let(:input) { '/a/b/d/*' }
|
73
|
+
|
74
|
+
it { expect(result).to be_empty }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe '#glob_stat' do
|
79
|
+
before do
|
80
|
+
instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
81
|
+
end
|
82
|
+
|
83
|
+
subject(:result) { instance.glob_stat(input).map(&:name) }
|
84
|
+
|
85
|
+
context 'with glob for existing file' do
|
86
|
+
let(:input) { '/a/b/c/1.txt' }
|
87
|
+
|
88
|
+
it 'contains single matching file' do
|
89
|
+
expect(result).to include('/a/b/c/1.txt')
|
90
|
+
expect(result.count).to eq(1)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'with glob for existing files' do
|
95
|
+
let(:input) { '/a/b/c/*' }
|
96
|
+
|
97
|
+
it 'contains all matching files' do
|
98
|
+
expect(result).to include('/a/b/c/1.txt')
|
99
|
+
expect(result).to include('/a/b/c/2.txt')
|
100
|
+
expect(result).to include('/a/b/c/3.txt')
|
101
|
+
expect(result.count).to eq(3)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context 'with glob for existing files (recursive)' do
|
106
|
+
let(:input) { '/a/b/*' }
|
107
|
+
|
108
|
+
it 'contains all matching files and directory' do
|
109
|
+
expect(result).to include('/a/b/c')
|
110
|
+
expect(result).to include('/a/b/c/1.txt')
|
111
|
+
expect(result).to include('/a/b/c/2.txt')
|
112
|
+
expect(result).to include('/a/b/c/3.txt')
|
113
|
+
expect(result.count).to eq(4)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'with glob for missing file' do
|
118
|
+
let(:input) { '/a/b/c/4.txt' }
|
119
|
+
|
120
|
+
it { expect(result).to be_empty }
|
121
|
+
end
|
122
|
+
|
123
|
+
context 'with glob for missing directory' do
|
124
|
+
let(:input) { '/a/b/d/*' }
|
125
|
+
|
126
|
+
it { expect(result).to be_empty }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -32,19 +32,24 @@ class Masamune::MockFilesystem < Delegator
|
|
32
32
|
def touch!(*args)
|
33
33
|
opts = args.last.is_a?(Hash) ? args.pop : {}
|
34
34
|
args.each do |file|
|
35
|
+
parent_paths(file).each do |parent|
|
36
|
+
@files[parent] = OpenStruct.new(opts.merge(name: parent))
|
37
|
+
end
|
35
38
|
@files[file] = OpenStruct.new(opts.merge(name: file))
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
39
42
|
def exists?(file)
|
40
|
-
@files.keys.
|
43
|
+
@files.keys.any? { |path| file == path || path.start_with?(File.join(file, '/')) }
|
41
44
|
end
|
42
45
|
|
43
|
-
def glob(pattern)
|
44
|
-
return Set.new(to_enum(:glob, pattern)) unless block_given?
|
46
|
+
def glob(pattern, options = {})
|
47
|
+
return Set.new(to_enum(:glob, pattern, options)) unless block_given?
|
45
48
|
file_regexp = glob_to_regexp(pattern)
|
46
49
|
@files.keys.each do |name|
|
47
|
-
|
50
|
+
next if name == dirname(pattern)
|
51
|
+
next unless name =~ file_regexp
|
52
|
+
yield name
|
48
53
|
end
|
49
54
|
end
|
50
55
|
|
@@ -55,11 +60,13 @@ class Masamune::MockFilesystem < Delegator
|
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
58
|
-
def glob_stat(pattern)
|
59
|
-
return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
|
63
|
+
def glob_stat(pattern, options = {})
|
64
|
+
return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
|
60
65
|
file_regexp = glob_to_regexp(pattern, recursive: true)
|
61
66
|
@files.each do |name, stat|
|
62
|
-
|
67
|
+
next if stat.name == dirname(pattern)
|
68
|
+
next unless stat.name =~ file_regexp
|
69
|
+
yield stat
|
63
70
|
end
|
64
71
|
end
|
65
72
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masamune
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Andrews
|
@@ -365,6 +365,7 @@ files:
|
|
365
365
|
- spec/masamune/filesystem_spec.rb
|
366
366
|
- spec/masamune/helpers/postgres_spec.rb
|
367
367
|
- spec/masamune/rspec/job_fixture_spec.rb
|
368
|
+
- spec/masamune/rspec/mock_filesystem_spec.rb
|
368
369
|
- spec/masamune/rspec/shared_example_group_spec.rb
|
369
370
|
- spec/masamune/schema/catalog_spec.rb
|
370
371
|
- spec/masamune/schema/column_spec.rb
|
@@ -476,6 +477,7 @@ test_files:
|
|
476
477
|
- spec/masamune/filesystem_spec.rb
|
477
478
|
- spec/masamune/helpers/postgres_spec.rb
|
478
479
|
- spec/masamune/rspec/job_fixture_spec.rb
|
480
|
+
- spec/masamune/rspec/mock_filesystem_spec.rb
|
479
481
|
- spec/masamune/rspec/shared_example_group_spec.rb
|
480
482
|
- spec/masamune/schema/catalog_spec.rb
|
481
483
|
- spec/masamune/schema/column_spec.rb
|