masamune 0.15.5 → 0.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/masamune/cached_filesystem.rb +82 -41
- data/lib/masamune/data_plan/elem.rb +2 -3
- data/lib/masamune/data_plan/rule.rb +11 -0
- data/lib/masamune/filesystem.rb +16 -12
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/cached_filesystem_spec.rb +148 -10
- data/spec/masamune/filesystem_spec.rb +20 -10
- data/spec/masamune/rspec/mock_filesystem_spec.rb +129 -0
- data/spec/support/masamune/mock_filesystem.rb +14 -7
- data/spec/support/masamune/shared_example_group.rb +0 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b70cf7812702e987cc6c1f72d7f50cbb3ddde9b
|
4
|
+
data.tar.gz: 2d487a59b8da26e186f2eefab0f7067b5e9f950f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91777fd4da7e0928d678ce4574f91305da2673e8bbbbeed65ce892ac612808552afa68689906f70a54c8f61d4f592b821ecb4ae9e67588bb988a6dc38e2a07b7
|
7
|
+
data.tar.gz: 76a2f706860eae74756e2c4b7a04e596913f3c0189e591d5c5f250681fcada14177be815f4033bede60ba1fa75a70a073d7a3bb91bac6692ab21c1ee04d8df3b
|
@@ -22,6 +22,9 @@
|
|
22
22
|
|
23
23
|
module Masamune
|
24
24
|
class CachedFilesystem < SimpleDelegator
|
25
|
+
MAX_DEPTH = 10
|
26
|
+
EMPTY_SET = Set.new
|
27
|
+
|
25
28
|
def initialize(filesystem)
|
26
29
|
super filesystem
|
27
30
|
@filesystem = filesystem
|
@@ -29,34 +32,55 @@ module Masamune
|
|
29
32
|
end
|
30
33
|
|
31
34
|
def clear!
|
32
|
-
@cache =
|
35
|
+
@cache = PathCache.new(@filesystem)
|
33
36
|
end
|
34
37
|
|
35
38
|
def exists?(file)
|
36
|
-
|
39
|
+
glob(file, max_depth: 0).include?(file) || @cache.any?(file)
|
37
40
|
end
|
38
41
|
|
39
|
-
def glob(file_or_glob)
|
40
|
-
return Set.new(to_enum(:glob, file_or_glob)) unless block_given?
|
41
|
-
glob_stat(file_or_glob) do |entry|
|
42
|
-
yield entry.name
|
42
|
+
def glob(file_or_glob, options = {})
|
43
|
+
return Set.new(to_enum(:glob, file_or_glob, options)) unless block_given?
|
44
|
+
glob_stat(file_or_glob, options) do |entry|
|
45
|
+
yield entry.name
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
46
49
|
def stat(file_or_dir)
|
47
50
|
raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
|
48
|
-
result =
|
49
|
-
glob_stat(file_or_dir) do |entry|
|
50
|
-
result << entry
|
51
|
-
end
|
52
|
-
result += @cache[file_or_dir]
|
51
|
+
result = glob_stat(file_or_dir, recursive: true)
|
53
52
|
return unless result.any?
|
54
|
-
return result.first if result.size == 1
|
55
53
|
max_time = result.map { |stat| stat.try(:mtime) }.compact.max
|
56
54
|
sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
|
57
55
|
OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
|
58
56
|
end
|
59
57
|
|
58
|
+
def glob_stat(file_or_glob, options = {}, &block)
|
59
|
+
return Set.new(to_enum(:glob_stat, file_or_glob, options)) unless block_given?
|
60
|
+
return if file_or_glob.blank?
|
61
|
+
return if root_path?(file_or_glob)
|
62
|
+
depth = options.fetch(:depth, 0)
|
63
|
+
max_depth = options.fetch(:max_depth, 0)
|
64
|
+
return if depth > MAX_DEPTH || depth > max_depth
|
65
|
+
|
66
|
+
glob_stat(dirname(file_or_glob), depth: depth + 1, max_depth: max_depth, &block)
|
67
|
+
|
68
|
+
dirname = dirname(file_or_glob)
|
69
|
+
unless @cache.any?(dirname)
|
70
|
+
pattern = root_path?(dirname) ? file_or_glob : File.join(dirname, '*')
|
71
|
+
@filesystem.glob_stat(pattern) do |entry|
|
72
|
+
@cache.put(entry.name, entry)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
file_regexp = glob_to_regexp(file_or_glob, options)
|
77
|
+
@cache.get(dirname).each do |entry|
|
78
|
+
next if entry.name == dirname
|
79
|
+
next unless entry.name =~ file_regexp
|
80
|
+
yield entry
|
81
|
+
end if depth == 0
|
82
|
+
end
|
83
|
+
|
60
84
|
# FIXME cache eviction policy can be more precise
|
61
85
|
[:touch!, :mkdir!, :copy_file_to_file, :copy_file_to_dir, :copy_dir, :remove_file, :remove_dir, :move_file_to_file, :move_file_to_dir, :move_dir, :write].each do |method|
|
62
86
|
define_method(method) do |*args|
|
@@ -67,42 +91,59 @@ module Masamune
|
|
67
91
|
|
68
92
|
private
|
69
93
|
|
70
|
-
|
71
|
-
|
72
|
-
|
94
|
+
class PathCache
|
95
|
+
def initialize(filesystem)
|
96
|
+
@filesystem = filesystem
|
97
|
+
@cache = {}
|
98
|
+
end
|
73
99
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
100
|
+
def put(path, entry)
|
101
|
+
return unless path
|
102
|
+
return if @filesystem.root_path?(path)
|
103
|
+
put(File.join(@filesystem.dirname(path), '.'), OpenStruct.new(name: @filesystem.dirname(path)))
|
104
|
+
paths = path_split(path)
|
105
|
+
elems = paths.reverse.inject(entry) { |a, n| { n => a } }
|
106
|
+
@cache.deep_merge!(elems)
|
107
|
+
end
|
79
108
|
|
80
|
-
|
109
|
+
def get(path)
|
110
|
+
return unless path
|
111
|
+
paths = path_split(path)
|
112
|
+
elem = paths.inject(@cache) { |level, path| level.is_a?(Hash) ? level.fetch(path, {}) : level }
|
113
|
+
emit(elem)
|
114
|
+
rescue KeyError
|
115
|
+
EMPTY_SET
|
116
|
+
end
|
81
117
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
@cache[path] << entry
|
88
|
-
end
|
118
|
+
def any?(path)
|
119
|
+
if elem = get(path)
|
120
|
+
elem.any? { |entry| entry.name.start_with?(path) }
|
121
|
+
else
|
122
|
+
false
|
89
123
|
end
|
90
124
|
end
|
91
|
-
@cache[dirname] ||= EMPTY_SET
|
92
125
|
|
93
|
-
|
94
|
-
@cache[dirname].each do |entry|
|
95
|
-
yield entry if entry.name =~ file_regexp
|
96
|
-
end if depth == 0
|
97
|
-
end
|
126
|
+
private
|
98
127
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
128
|
+
def emit(elem)
|
129
|
+
return Set.new(to_enum(:emit, elem)).flatten unless block_given?
|
130
|
+
case elem
|
131
|
+
when Array, Set
|
132
|
+
elem.each do |e|
|
133
|
+
yield emit(e)
|
134
|
+
end
|
135
|
+
when Hash
|
136
|
+
elem.values.each do |e|
|
137
|
+
yield emit(e)
|
138
|
+
end
|
139
|
+
else
|
140
|
+
yield elem
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def path_split(path)
|
145
|
+
path.split('/').reject { |x| x.blank? }
|
146
|
+
end
|
106
147
|
end
|
107
148
|
end
|
108
149
|
end
|
@@ -70,9 +70,8 @@ class Masamune::DataPlan::Elem
|
|
70
70
|
return Set.new(to_enum(__method__)) unless block_given?
|
71
71
|
if rule.for_path? && rule.free?
|
72
72
|
file_glob = path
|
73
|
-
file_glob += '
|
74
|
-
file_glob
|
75
|
-
rule.engine.filesystem.glob(file_glob) do |new_path|
|
73
|
+
file_glob += '/*' unless path.include?('*') || path.include?('.')
|
74
|
+
rule.engine.filesystem.glob(file_glob, max_depth: rule.cache_depth) do |new_path|
|
76
75
|
yield rule.bind_input(new_path)
|
77
76
|
end
|
78
77
|
elsif rule.for_path? && rule.bound?
|
@@ -250,6 +250,17 @@ class Masamune::DataPlan::Rule
|
|
250
250
|
self.class.new(engine, name, type, options.merge(path: new_pattern))
|
251
251
|
end
|
252
252
|
|
253
|
+
def cache_depth
|
254
|
+
case time_step
|
255
|
+
when :hour, :hours
|
256
|
+
2
|
257
|
+
when :day, :days
|
258
|
+
1
|
259
|
+
else
|
260
|
+
0
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
253
264
|
private
|
254
265
|
|
255
266
|
def time_step_to_format(step)
|
data/lib/masamune/filesystem.rb
CHANGED
@@ -111,9 +111,9 @@ module Masamune
|
|
111
111
|
parent_paths.each do |part|
|
112
112
|
tmp << part
|
113
113
|
current_path = prefix + File.join(tmp)
|
114
|
-
break if current_path == path
|
115
114
|
result << current_path
|
116
115
|
end
|
116
|
+
result.pop
|
117
117
|
result
|
118
118
|
end
|
119
119
|
|
@@ -129,7 +129,7 @@ module Masamune
|
|
129
129
|
end
|
130
130
|
|
131
131
|
def dirname(path)
|
132
|
-
parent_paths(path).last || path
|
132
|
+
parent_paths(path).last || remote_prefix(path) || local_prefix(path)
|
133
133
|
end
|
134
134
|
|
135
135
|
def basename(path)
|
@@ -174,8 +174,8 @@ module Masamune
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
-
def glob_stat(pattern)
|
178
|
-
return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
|
177
|
+
def glob_stat(pattern, options = {})
|
178
|
+
return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
|
179
179
|
case type(pattern)
|
180
180
|
when :hdfs
|
181
181
|
hadoop_fs('-ls', '-R', pattern, safe: true) do |line|
|
@@ -206,7 +206,6 @@ module Masamune
|
|
206
206
|
raise ArgumentError, 'cannot contain wildcard' if file_or_dir.include?('*')
|
207
207
|
result = glob_stat(file_or_dir)
|
208
208
|
return unless result.any?
|
209
|
-
return result.first if result.size == 1
|
210
209
|
max_time = result.map { |stat| stat.try(:mtime) }.compact.max
|
211
210
|
sum_size = result.map { |stat| stat.try(:size) }.compact.reduce(:+)
|
212
211
|
OpenStruct.new(name: file_or_dir, mtime: max_time, size: sum_size)
|
@@ -227,8 +226,8 @@ module Masamune
|
|
227
226
|
end
|
228
227
|
end
|
229
228
|
|
230
|
-
def glob(pattern)
|
231
|
-
return Set.new(to_enum(:glob, pattern)) unless block_given?
|
229
|
+
def glob(pattern, options = {})
|
230
|
+
return Set.new(to_enum(:glob, pattern, options)) unless block_given?
|
232
231
|
case type(pattern)
|
233
232
|
when :hdfs
|
234
233
|
file_glob, file_regexp = glob_split(pattern)
|
@@ -462,7 +461,12 @@ module Masamune
|
|
462
461
|
dir[%r{\Ahdfs://}]
|
463
462
|
end
|
464
463
|
|
465
|
-
def local_prefix(
|
464
|
+
def local_prefix(dir)
|
465
|
+
dir[%r{\A/}] ||
|
466
|
+
'.'
|
467
|
+
end
|
468
|
+
|
469
|
+
def local_file_prefix(file)
|
466
470
|
return file if remote_prefix(file)
|
467
471
|
"file://#{file}"
|
468
472
|
end
|
@@ -529,7 +533,7 @@ module Masamune
|
|
529
533
|
when [:hdfs, :hdfs]
|
530
534
|
hadoop_fs('-cp', src, dst)
|
531
535
|
when [:hdfs, :local]
|
532
|
-
hadoop_fs('-copyToLocal', src,
|
536
|
+
hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
|
533
537
|
when [:hdfs, :s3]
|
534
538
|
hadoop_fs('-cp', src, s3n(dst))
|
535
539
|
when [:s3, :s3]
|
@@ -541,7 +545,7 @@ module Masamune
|
|
541
545
|
when [:local, :local]
|
542
546
|
FileUtils.cp(src, dst, file_util_args)
|
543
547
|
when [:local, :hdfs]
|
544
|
-
hadoop_fs('-copyFromLocal',
|
548
|
+
hadoop_fs('-copyFromLocal', local_file_prefix(src), dst)
|
545
549
|
when [:local, :s3]
|
546
550
|
s3cmd('put', src, s3b(dst, dir: dir))
|
547
551
|
end
|
@@ -553,7 +557,7 @@ module Masamune
|
|
553
557
|
hadoop_fs('-mv', src, dst)
|
554
558
|
when [:hdfs, :local]
|
555
559
|
# NOTE: moveToLocal: Option '-moveToLocal' is not implemented yet
|
556
|
-
hadoop_fs('-copyToLocal', src,
|
560
|
+
hadoop_fs('-copyToLocal', src, local_file_prefix(dst))
|
557
561
|
hadoop_fs('-rm', src)
|
558
562
|
when [:hdfs, :s3]
|
559
563
|
copy_file_to_file(src, s3n(dst, dir: dir))
|
@@ -569,7 +573,7 @@ module Masamune
|
|
569
573
|
FileUtils.mv(src, dst, file_util_args)
|
570
574
|
FileUtils.chmod(FILE_MODE, dst, file_util_args)
|
571
575
|
when [:local, :hdfs]
|
572
|
-
hadoop_fs('-moveFromLocal',
|
576
|
+
hadoop_fs('-moveFromLocal', local_file_prefix(src), dst)
|
573
577
|
when [:local, :s3]
|
574
578
|
s3cmd('put', src, s3b(dst, dir: dir))
|
575
579
|
FileUtils.rm(src, file_util_args)
|
data/lib/masamune/version.rb
CHANGED
@@ -27,8 +27,9 @@ describe Masamune::CachedFilesystem do
|
|
27
27
|
context 'when path is present, top down traversal' do
|
28
28
|
before do
|
29
29
|
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
30
|
-
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
31
|
-
expect(filesystem).to receive(:glob_stat).with('/a').
|
30
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
|
31
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
32
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
32
33
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
33
34
|
end
|
34
35
|
|
@@ -39,7 +40,13 @@ describe Masamune::CachedFilesystem do
|
|
39
40
|
expect(cached_filesystem.exists?('/a/b/c/4.txt')).to eq(false)
|
40
41
|
expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
|
41
42
|
expect(cached_filesystem.glob('/a/b/c/*').count).to eq(3)
|
43
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/1.txt'
|
44
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/2.txt'
|
45
|
+
expect(cached_filesystem.glob('/a/b/c/*')).to include '/a/b/c/3.txt'
|
42
46
|
expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(3)
|
47
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/1.txt'
|
48
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/2.txt'
|
49
|
+
expect(cached_filesystem.glob('/a/b/c/*.txt')).to include '/a/b/c/3.txt'
|
43
50
|
expect(cached_filesystem.stat('/a/b/c/1.txt')).to_not be_nil
|
44
51
|
expect(cached_filesystem.stat('/a/b/c/2.txt')).to_not be_nil
|
45
52
|
expect(cached_filesystem.stat('/a/b/c/3.txt')).to_not be_nil
|
@@ -53,7 +60,8 @@ describe Masamune::CachedFilesystem do
|
|
53
60
|
context 'when path is present, bottom up traversal' do
|
54
61
|
before do
|
55
62
|
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
56
|
-
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
63
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
64
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
57
65
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
58
66
|
end
|
59
67
|
|
@@ -78,8 +86,9 @@ describe Masamune::CachedFilesystem do
|
|
78
86
|
context 'when path is present, checking for similar non existant paths' do
|
79
87
|
before do
|
80
88
|
filesystem.touch!('/y=2013/m=1/d=22/00000')
|
81
|
-
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').once.and_call_original
|
82
|
-
expect(filesystem).to receive(:glob_stat).with('/y=2013/*').
|
89
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/d=22/*').once.and_call_original
|
90
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/m=1/*').never
|
91
|
+
expect(filesystem).to receive(:glob_stat).with('/y=2013/*').never
|
83
92
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
84
93
|
end
|
85
94
|
|
@@ -87,8 +96,11 @@ describe Masamune::CachedFilesystem do
|
|
87
96
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=22/00000')).to eq(true)
|
88
97
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=22')).to eq(true)
|
89
98
|
expect(cached_filesystem.exists?('/y=2013/m=1/d=2')).to eq(false)
|
90
|
-
expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(
|
99
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*').count).to eq(2)
|
100
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22')
|
101
|
+
expect(cached_filesystem.glob('/y=2013/m=1/*')).to include('/y=2013/m=1/d=22/00000')
|
91
102
|
expect(cached_filesystem.glob('/y=2013/m=1/d=22/*').count).to eq(1)
|
103
|
+
expect(cached_filesystem.glob('/y=2013/m=1/d=22/*')).to include('/y=2013/m=1/d=22/00000')
|
92
104
|
expect(cached_filesystem.stat('/y=2013/m=1/d=22/00000')).not_to be_nil
|
93
105
|
expect(cached_filesystem.stat('/y=2013/m=1/d=22')).not_to be_nil
|
94
106
|
expect(cached_filesystem.stat('/y=2013/m=1')).not_to be_nil
|
@@ -116,9 +128,15 @@ describe Masamune::CachedFilesystem do
|
|
116
128
|
expect(cached_filesystem.glob('/logs/*').count).to eq(3)
|
117
129
|
expect(cached_filesystem.glob('/logs/*.txt').count).to eq(3)
|
118
130
|
expect(cached_filesystem.glob('/logs/box1_*.txt').count).to eq(1)
|
131
|
+
expect(cached_filesystem.glob('/logs/box1_*.txt')).to include('/logs/box1_123.txt')
|
119
132
|
expect(cached_filesystem.glob('/logs/box2_*.txt').count).to eq(1)
|
133
|
+
expect(cached_filesystem.glob('/logs/box2_*.txt')).to include('/logs/box2_123.txt')
|
120
134
|
expect(cached_filesystem.glob('/logs/box3_*.txt').count).to eq(1)
|
135
|
+
expect(cached_filesystem.glob('/logs/box3_*.txt')).to include('/logs/box3_123.txt')
|
121
136
|
expect(cached_filesystem.glob('/logs/box*.txt').count).to eq(3)
|
137
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box2_123.txt')
|
138
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box1_123.txt')
|
139
|
+
expect(cached_filesystem.glob('/logs/box*.txt')).to include('/logs/box3_123.txt')
|
122
140
|
expect(cached_filesystem.glob('/logs/box*.csv').count).to eq(0)
|
123
141
|
expect(cached_filesystem.glob('/logs/box').count).to eq(0)
|
124
142
|
expect(cached_filesystem.glob('/logs/box/*').count).to eq(0)
|
@@ -138,7 +156,7 @@ describe Masamune::CachedFilesystem do
|
|
138
156
|
before do
|
139
157
|
filesystem.touch!('/a/b/c')
|
140
158
|
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
141
|
-
expect(filesystem).to receive(:glob_stat).with('/a').
|
159
|
+
expect(filesystem).to receive(:glob_stat).with('/a').never
|
142
160
|
expect(filesystem).to receive(:glob_stat).with('/*').never
|
143
161
|
end
|
144
162
|
|
@@ -149,11 +167,17 @@ describe Masamune::CachedFilesystem do
|
|
149
167
|
expect(cached_filesystem.exists?('/a/b/c')).to eq(true)
|
150
168
|
expect(cached_filesystem.exists?('/a/b')).to eq(true)
|
151
169
|
expect(cached_filesystem.exists?('/a')).to eq(true)
|
152
|
-
expect(cached_filesystem.glob('/a').count).to eq(
|
153
|
-
expect(cached_filesystem.glob('/a
|
154
|
-
expect(cached_filesystem.glob('/a
|
170
|
+
expect(cached_filesystem.glob('/a').count).to eq(1)
|
171
|
+
expect(cached_filesystem.glob('/a')).to include '/a'
|
172
|
+
expect(cached_filesystem.glob('/a/*').count).to eq(2)
|
173
|
+
expect(cached_filesystem.glob('/a/*')).to include '/a/b'
|
174
|
+
expect(cached_filesystem.glob('/a/*')).to include '/a/b/c'
|
175
|
+
expect(cached_filesystem.glob('/a/b').count).to eq(1)
|
176
|
+
expect(cached_filesystem.glob('/a/b')).to include '/a/b'
|
155
177
|
expect(cached_filesystem.glob('/a/b/*').count).to eq(1)
|
178
|
+
expect(cached_filesystem.glob('/a/b/*')).to include '/a/b/c'
|
156
179
|
expect(cached_filesystem.glob('/a/b/c').count).to eq(1)
|
180
|
+
expect(cached_filesystem.glob('/a/b/c')).to include '/a/b/c'
|
157
181
|
expect(cached_filesystem.glob('/a/b/c/*').count).to eq(0)
|
158
182
|
expect(cached_filesystem.glob('/a/b/c/*.txt').count).to eq(0)
|
159
183
|
expect(cached_filesystem.stat('/a/b/c/1.txt')).to be_nil
|
@@ -164,4 +188,118 @@ describe Masamune::CachedFilesystem do
|
|
164
188
|
expect(cached_filesystem.stat('/a')).to_not be_nil
|
165
189
|
end
|
166
190
|
end
|
191
|
+
|
192
|
+
describe '#glob_stat' do
|
193
|
+
before do
|
194
|
+
filesystem.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
195
|
+
end
|
196
|
+
|
197
|
+
context 'without options' do
|
198
|
+
before do
|
199
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').once.and_call_original
|
200
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
201
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
202
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
203
|
+
end
|
204
|
+
it do
|
205
|
+
expect(cached_filesystem.glob_stat('/a/b/c/1.txt').count).to eq(1)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
context 'with max_depth=1' do
|
210
|
+
before do
|
211
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
212
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').once.and_call_original
|
213
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').never
|
214
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
215
|
+
end
|
216
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 1).count).to eq(1) }
|
217
|
+
end
|
218
|
+
|
219
|
+
context 'with max_depth=2' do
|
220
|
+
before do
|
221
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
222
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
223
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
224
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
225
|
+
end
|
226
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 2).count).to eq(1) }
|
227
|
+
end
|
228
|
+
|
229
|
+
context 'with max_depth=3' do
|
230
|
+
before do
|
231
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/c/*').never
|
232
|
+
expect(filesystem).to receive(:glob_stat).with('/a/b/*').never
|
233
|
+
expect(filesystem).to receive(:glob_stat).with('/a/*').once.and_call_original
|
234
|
+
expect(filesystem).to receive(:glob_stat).with('/*').never
|
235
|
+
end
|
236
|
+
it { expect(cached_filesystem.glob_stat('/a/b/c/1.txt', max_depth: 3).count).to eq(1) }
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe Masamune::CachedFilesystem::PathCache do
|
241
|
+
let(:instance) { described_class.new(filesystem) }
|
242
|
+
|
243
|
+
before do
|
244
|
+
instance.put('/a/b/c/1.txt', OpenStruct.new(name: '/a/b/c/1.txt'))
|
245
|
+
instance.put('/a/b/c/2.txt', OpenStruct.new(name: '/a/b/c/2.txt'))
|
246
|
+
instance.put('/a/b/c/3.txt', OpenStruct.new(name: '/a/b/c/3.txt'))
|
247
|
+
end
|
248
|
+
|
249
|
+
subject(:result) do
|
250
|
+
instance.get(path)
|
251
|
+
end
|
252
|
+
|
253
|
+
context 'with nil' do
|
254
|
+
let(:path) { nil }
|
255
|
+
|
256
|
+
it { is_expected.to be_nil }
|
257
|
+
end
|
258
|
+
|
259
|
+
context 'with existing file path' do
|
260
|
+
let(:path) { '/a/b/c/1.txt' }
|
261
|
+
|
262
|
+
it 'returns existing file' do
|
263
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
264
|
+
expect(result.count).to eq(1)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
context 'with existing directory path' do
|
269
|
+
let(:path) { '/a/b/c' }
|
270
|
+
|
271
|
+
it 'returns matching files' do
|
272
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
273
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
|
274
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
|
275
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c'))
|
276
|
+
expect(result.count).to eq(4)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
context 'with existing directory path (nested)' do
|
281
|
+
let(:path) { '/a/b' }
|
282
|
+
|
283
|
+
it 'returns matching files' do
|
284
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/1.txt'))
|
285
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/2.txt'))
|
286
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c/3.txt'))
|
287
|
+
expect(result).to include(OpenStruct.new(name: '/a/b/c'))
|
288
|
+
expect(result).to include(OpenStruct.new(name: '/a/b'))
|
289
|
+
expect(result.count).to eq(5)
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
context 'with missing file path' do
|
294
|
+
let(:path) { '/a/b/c/4.txt' }
|
295
|
+
|
296
|
+
it { is_expected.to be_empty }
|
297
|
+
end
|
298
|
+
|
299
|
+
context 'with missing directory path' do
|
300
|
+
let(:path) { '/a/b/d' }
|
301
|
+
|
302
|
+
it { is_expected.to be_empty }
|
303
|
+
end
|
304
|
+
end
|
167
305
|
end
|
@@ -212,7 +212,7 @@ shared_examples_for 'Filesystem' do
|
|
212
212
|
|
213
213
|
context 'with local blank' do
|
214
214
|
let(:path) { '' }
|
215
|
-
it { is_expected.to
|
215
|
+
it { is_expected.to eq('.') }
|
216
216
|
end
|
217
217
|
|
218
218
|
context 'with local path with slash' do
|
@@ -220,6 +220,11 @@ shared_examples_for 'Filesystem' do
|
|
220
220
|
it { is_expected.to eq('/a/b') }
|
221
221
|
end
|
222
222
|
|
223
|
+
context 'with local file without slash' do
|
224
|
+
let(:path) { 'a' }
|
225
|
+
it { is_expected.to eq('.') }
|
226
|
+
end
|
227
|
+
|
223
228
|
context 'with local path without slash' do
|
224
229
|
let(:path) { 'a/b/c' }
|
225
230
|
it { is_expected.to eq('a/b') }
|
@@ -227,7 +232,12 @@ shared_examples_for 'Filesystem' do
|
|
227
232
|
|
228
233
|
context 'with local relative path' do
|
229
234
|
let(:path) { '/a/b/../c' }
|
230
|
-
it { is_expected.to eq('/a
|
235
|
+
it { is_expected.to eq('/a') }
|
236
|
+
end
|
237
|
+
|
238
|
+
context 'with local another relative path' do
|
239
|
+
let(:path) { '/a/b/.' }
|
240
|
+
it { is_expected.to eq('/a') }
|
231
241
|
end
|
232
242
|
|
233
243
|
context 'with s3 bucket with blank' do
|
@@ -237,7 +247,7 @@ shared_examples_for 'Filesystem' do
|
|
237
247
|
|
238
248
|
context 'with s3 bucket with slash' do
|
239
249
|
let(:path) { 's3://bucket/' }
|
240
|
-
it { is_expected.to eq('s3://bucket
|
250
|
+
it { is_expected.to eq('s3://bucket') }
|
241
251
|
end
|
242
252
|
|
243
253
|
context 'with s3 bucket with path' do
|
@@ -247,7 +257,7 @@ shared_examples_for 'Filesystem' do
|
|
247
257
|
|
248
258
|
context 'with s3 bucket with relative path' do
|
249
259
|
let(:path) { 's3://bucket/a/b/../c' }
|
250
|
-
it { is_expected.to eq('s3://bucket/a
|
260
|
+
it { is_expected.to eq('s3://bucket/a') }
|
251
261
|
end
|
252
262
|
|
253
263
|
context 'with hdfs directory with path' do
|
@@ -262,7 +272,7 @@ shared_examples_for 'Filesystem' do
|
|
262
272
|
|
263
273
|
context 'with hdfs directory with relative path' do
|
264
274
|
let(:path) { 'hdfs:///a/b/../c' }
|
265
|
-
it { is_expected.to eq('hdfs:///a
|
275
|
+
it { is_expected.to eq('hdfs:///a') }
|
266
276
|
end
|
267
277
|
end
|
268
278
|
|
@@ -362,7 +372,7 @@ shared_examples_for 'Filesystem' do
|
|
362
372
|
|
363
373
|
context 'hdfs missing file' do
|
364
374
|
before do
|
365
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
375
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
366
376
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}")
|
367
377
|
expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + new_file, safe: true).at_most(:once).and_return(mock_failure)
|
368
378
|
end
|
@@ -385,7 +395,7 @@ shared_examples_for 'Filesystem' do
|
|
385
395
|
|
386
396
|
context 'hdfs existing file' do
|
387
397
|
before do
|
388
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
398
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
389
399
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
|
390
400
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
|
391
401
|
expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + old_file, safe: true).at_most(:once).and_return(mock_success)
|
@@ -434,7 +444,7 @@ shared_examples_for 'Filesystem' do
|
|
434
444
|
|
435
445
|
context 'hdfs missing file' do
|
436
446
|
before do
|
437
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
447
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
438
448
|
and_yield('')
|
439
449
|
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
|
440
450
|
and_yield('')
|
@@ -489,7 +499,7 @@ shared_examples_for 'Filesystem' do
|
|
489
499
|
|
490
500
|
context 'hdfs existing file' do
|
491
501
|
before do
|
492
|
-
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.
|
502
|
+
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.join(old_dir, '/*'), safe: true).at_most(:once).
|
493
503
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
|
494
504
|
and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
|
495
505
|
expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_file, safe: true).at_most(:once).
|
@@ -549,7 +559,7 @@ shared_examples_for 'Filesystem' do
|
|
549
559
|
|
550
560
|
describe '#name' do
|
551
561
|
subject { stat.name }
|
552
|
-
it { is_expected.to eq('s3://bucket/dir
|
562
|
+
it { is_expected.to eq('s3://bucket/dir') }
|
553
563
|
end
|
554
564
|
|
555
565
|
describe '#mtime' do
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
describe Masamune::MockFilesystem do
|
24
|
+
let(:instance) { described_class.new }
|
25
|
+
|
26
|
+
describe '#glob' do
|
27
|
+
before do
|
28
|
+
instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
29
|
+
end
|
30
|
+
|
31
|
+
subject(:result) { instance.glob(input) }
|
32
|
+
|
33
|
+
context 'with glob for existing file' do
|
34
|
+
let(:input) { '/a/b/c/1.txt' }
|
35
|
+
|
36
|
+
it 'contains single matching file' do
|
37
|
+
expect(result).to include('/a/b/c/1.txt')
|
38
|
+
expect(result.count).to eq(1)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'with glob for existing files' do
|
43
|
+
let(:input) { '/a/b/c/*' }
|
44
|
+
|
45
|
+
it 'contains all matching files' do
|
46
|
+
expect(result).to include('/a/b/c/1.txt')
|
47
|
+
expect(result).to include('/a/b/c/2.txt')
|
48
|
+
expect(result).to include('/a/b/c/3.txt')
|
49
|
+
expect(result.count).to eq(3)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'with glob for existing files (recursive)' do
|
54
|
+
let(:input) { '/a/b/*' }
|
55
|
+
|
56
|
+
it 'contains all matching files and directory' do
|
57
|
+
expect(result).to include('/a/b/c')
|
58
|
+
expect(result).to include('/a/b/c/1.txt')
|
59
|
+
expect(result).to include('/a/b/c/2.txt')
|
60
|
+
expect(result).to include('/a/b/c/3.txt')
|
61
|
+
expect(result.count).to eq(4)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'with glob for missing file' do
|
66
|
+
let(:input) { '/a/b/c/4.txt' }
|
67
|
+
|
68
|
+
it { expect(result).to be_empty }
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'with glob for missing directory' do
|
72
|
+
let(:input) { '/a/b/d/*' }
|
73
|
+
|
74
|
+
it { expect(result).to be_empty }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe '#glob_stat' do
|
79
|
+
before do
|
80
|
+
instance.touch!('/a/b/c/1.txt', '/a/b/c/2.txt', '/a/b/c/3.txt')
|
81
|
+
end
|
82
|
+
|
83
|
+
subject(:result) { instance.glob_stat(input).map(&:name) }
|
84
|
+
|
85
|
+
context 'with glob for existing file' do
|
86
|
+
let(:input) { '/a/b/c/1.txt' }
|
87
|
+
|
88
|
+
it 'contains single matching file' do
|
89
|
+
expect(result).to include('/a/b/c/1.txt')
|
90
|
+
expect(result.count).to eq(1)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'with glob for existing files' do
|
95
|
+
let(:input) { '/a/b/c/*' }
|
96
|
+
|
97
|
+
it 'contains all matching files' do
|
98
|
+
expect(result).to include('/a/b/c/1.txt')
|
99
|
+
expect(result).to include('/a/b/c/2.txt')
|
100
|
+
expect(result).to include('/a/b/c/3.txt')
|
101
|
+
expect(result.count).to eq(3)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context 'with glob for existing files (recursive)' do
|
106
|
+
let(:input) { '/a/b/*' }
|
107
|
+
|
108
|
+
it 'contains all matching files and directory' do
|
109
|
+
expect(result).to include('/a/b/c')
|
110
|
+
expect(result).to include('/a/b/c/1.txt')
|
111
|
+
expect(result).to include('/a/b/c/2.txt')
|
112
|
+
expect(result).to include('/a/b/c/3.txt')
|
113
|
+
expect(result.count).to eq(4)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'with glob for missing file' do
|
118
|
+
let(:input) { '/a/b/c/4.txt' }
|
119
|
+
|
120
|
+
it { expect(result).to be_empty }
|
121
|
+
end
|
122
|
+
|
123
|
+
context 'with glob for missing directory' do
|
124
|
+
let(:input) { '/a/b/d/*' }
|
125
|
+
|
126
|
+
it { expect(result).to be_empty }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -32,19 +32,24 @@ class Masamune::MockFilesystem < Delegator
|
|
32
32
|
def touch!(*args)
|
33
33
|
opts = args.last.is_a?(Hash) ? args.pop : {}
|
34
34
|
args.each do |file|
|
35
|
+
parent_paths(file).each do |parent|
|
36
|
+
@files[parent] = OpenStruct.new(opts.merge(name: parent))
|
37
|
+
end
|
35
38
|
@files[file] = OpenStruct.new(opts.merge(name: file))
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
39
42
|
def exists?(file)
|
40
|
-
@files.keys.
|
43
|
+
@files.keys.any? { |path| file == path || path.start_with?(File.join(file, '/')) }
|
41
44
|
end
|
42
45
|
|
43
|
-
def glob(pattern)
|
44
|
-
return Set.new(to_enum(:glob, pattern)) unless block_given?
|
46
|
+
def glob(pattern, options = {})
|
47
|
+
return Set.new(to_enum(:glob, pattern, options)) unless block_given?
|
45
48
|
file_regexp = glob_to_regexp(pattern)
|
46
49
|
@files.keys.each do |name|
|
47
|
-
|
50
|
+
next if name == dirname(pattern)
|
51
|
+
next unless name =~ file_regexp
|
52
|
+
yield name
|
48
53
|
end
|
49
54
|
end
|
50
55
|
|
@@ -55,11 +60,13 @@ class Masamune::MockFilesystem < Delegator
|
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
58
|
-
def glob_stat(pattern)
|
59
|
-
return Set.new(to_enum(:glob_stat, pattern)) unless block_given?
|
63
|
+
def glob_stat(pattern, options = {})
|
64
|
+
return Set.new(to_enum(:glob_stat, pattern, options)) unless block_given?
|
60
65
|
file_regexp = glob_to_regexp(pattern, recursive: true)
|
61
66
|
@files.each do |name, stat|
|
62
|
-
|
67
|
+
next if stat.name == dirname(pattern)
|
68
|
+
next unless stat.name =~ file_regexp
|
69
|
+
yield stat
|
63
70
|
end
|
64
71
|
end
|
65
72
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masamune
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.15.
|
4
|
+
version: 0.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Andrews
|
@@ -365,6 +365,7 @@ files:
|
|
365
365
|
- spec/masamune/filesystem_spec.rb
|
366
366
|
- spec/masamune/helpers/postgres_spec.rb
|
367
367
|
- spec/masamune/rspec/job_fixture_spec.rb
|
368
|
+
- spec/masamune/rspec/mock_filesystem_spec.rb
|
368
369
|
- spec/masamune/rspec/shared_example_group_spec.rb
|
369
370
|
- spec/masamune/schema/catalog_spec.rb
|
370
371
|
- spec/masamune/schema/column_spec.rb
|
@@ -476,6 +477,7 @@ test_files:
|
|
476
477
|
- spec/masamune/filesystem_spec.rb
|
477
478
|
- spec/masamune/helpers/postgres_spec.rb
|
478
479
|
- spec/masamune/rspec/job_fixture_spec.rb
|
480
|
+
- spec/masamune/rspec/mock_filesystem_spec.rb
|
479
481
|
- spec/masamune/rspec/shared_example_group_spec.rb
|
480
482
|
- spec/masamune/schema/catalog_spec.rb
|
481
483
|
- spec/masamune/schema/column_spec.rb
|