arvados 1.3.0.20181129194931 → 1.3.0.20190122164002
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arvados/keep.rb +32 -13
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10109a46431c756a3146c97e6e7d64d8dbf72ef8
|
4
|
+
data.tar.gz: f52dd28f113f69ea56b53984e24f02f9b32656ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d6cba2cd3e8ba606bbc5ce28695bb8129c2367ebdfa719257959fce83adaf967b694289383b2ee0ec24491df7199996906671eb3b5417dc68546792dfc4644c
|
7
|
+
data.tar.gz: d25379406f3ae7199598b87093afa2527387e90a082c564a93ecaec8f77e4d05177cc3c074369b26464e36ef40c0b8a2e46d309126fd7c239756135b0a550e7e
|
data/lib/arvados/keep.rb
CHANGED
@@ -101,8 +101,14 @@ module Keep
|
|
101
101
|
end
|
102
102
|
|
103
103
|
class Manifest
|
104
|
-
|
105
|
-
|
104
|
+
STREAM_TOKEN_REGEXP = /^([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
|
105
|
+
STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/
|
106
|
+
|
107
|
+
EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name
|
108
|
+
FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
|
109
|
+
FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/
|
110
|
+
|
111
|
+
NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/
|
106
112
|
|
107
113
|
# Class to parse a manifest text and provide common views of that data.
|
108
114
|
def initialize(manifest_text)
|
@@ -131,7 +137,9 @@ module Keep
|
|
131
137
|
end
|
132
138
|
end
|
133
139
|
|
134
|
-
def unescape(s)
|
140
|
+
def self.unescape(s)
|
141
|
+
return nil if s.nil?
|
142
|
+
|
135
143
|
# Parse backslash escapes in a Keep manifest stream or file name.
|
136
144
|
s.gsub(/\\(\\|[0-7]{3})/) do |_|
|
137
145
|
case $1
|
@@ -143,6 +151,10 @@ module Keep
|
|
143
151
|
end
|
144
152
|
end
|
145
153
|
|
154
|
+
def unescape(s)
|
155
|
+
self.class.unescape(s)
|
156
|
+
end
|
157
|
+
|
146
158
|
def split_file_token token
|
147
159
|
start_pos, filesize, filename = token.split(':', 3)
|
148
160
|
if filename.nil?
|
@@ -162,15 +174,15 @@ module Keep
|
|
162
174
|
elsif in_file_tokens or not Locator.valid? token
|
163
175
|
in_file_tokens = true
|
164
176
|
|
165
|
-
|
177
|
+
start_pos, file_size, file_name = split_file_token(token)
|
166
178
|
stream_name_adjuster = ''
|
167
|
-
if
|
168
|
-
|
169
|
-
stream_name_adjuster =
|
170
|
-
|
179
|
+
if file_name.include?('/') # '/' in filename
|
180
|
+
dirname, sep, basename = file_name.rpartition('/')
|
181
|
+
stream_name_adjuster = sep + dirname # /dir_parts
|
182
|
+
file_name = basename
|
171
183
|
end
|
172
184
|
|
173
|
-
yield [stream_name + stream_name_adjuster
|
185
|
+
yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
|
174
186
|
end
|
175
187
|
end
|
176
188
|
end
|
@@ -197,10 +209,13 @@ module Keep
|
|
197
209
|
# files. This can help you avoid parsing the entire manifest if you
|
198
210
|
# just want to check if a small number of files are specified.
|
199
211
|
if stop_after.nil? or not @files.nil?
|
200
|
-
|
212
|
+
# Avoid counting empty dir placeholders
|
213
|
+
return files.reject{|_, name, size| name == '.' and size == 0}.size
|
201
214
|
end
|
202
215
|
seen_files = {}
|
203
|
-
each_file_spec do |streamname, _,
|
216
|
+
each_file_spec do |streamname, _, filesize, filename|
|
217
|
+
# Avoid counting empty dir placeholders
|
218
|
+
next if filename == "." and filesize == 0
|
204
219
|
seen_files[[streamname, filename]] = true
|
205
220
|
return stop_after if (seen_files.size >= stop_after)
|
206
221
|
end
|
@@ -250,7 +265,9 @@ module Keep
|
|
250
265
|
count = 0
|
251
266
|
|
252
267
|
word = words.shift
|
253
|
-
|
268
|
+
raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
|
269
|
+
unescaped_word = unescape(word)
|
270
|
+
count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
|
254
271
|
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
|
255
272
|
|
256
273
|
count = 0
|
@@ -262,7 +279,9 @@ module Keep
|
|
262
279
|
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
|
263
280
|
|
264
281
|
count = 0
|
265
|
-
|
282
|
+
raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
|
283
|
+
while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or
|
284
|
+
(word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
|
266
285
|
word = words.shift
|
267
286
|
count += 1
|
268
287
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.0.
|
4
|
+
version: 1.3.0.20190122164002
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -118,7 +118,7 @@ dependencies:
|
|
118
118
|
- - ">="
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: 0.1.5
|
121
|
-
description: Arvados client library, git commit
|
121
|
+
description: Arvados client library, git commit 7337b18bf7b6996a7fe4df0aba5356a03bda452d
|
122
122
|
email: gem-dev@curoverse.com
|
123
123
|
executables: []
|
124
124
|
extensions: []
|