arvados 1.3.1.20181129194931 → 1.3.1.20190122164002
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arvados/keep.rb +32 -13
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25ac83805db00c44064eb97894356c9b8ea23d16
|
4
|
+
data.tar.gz: 026e2819b842fa224330d1c4e003286311ec4246
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1671d023aada7472cb66571ca0d5c7bbf460cbc08a441c2731999cb4b5117ae30d83ac6d54b801e68a4d2ca86569983bff6986321b9bb03c61651cb019451d0c
|
7
|
+
data.tar.gz: d42b9e36979cea15e04120950d30617c50b0f8c4926e70059e8b369daa45dccc2b54826e00533338446b277f7ec7fca73ffb83aba6720f968bad8975524afa5b
|
data/lib/arvados/keep.rb
CHANGED
@@ -101,8 +101,14 @@ module Keep
|
|
101
101
|
end
|
102
102
|
|
103
103
|
class Manifest
|
104
|
-
|
105
|
-
|
104
|
+
STREAM_TOKEN_REGEXP = /^([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
|
105
|
+
STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/
|
106
|
+
|
107
|
+
EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name
|
108
|
+
FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
|
109
|
+
FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/
|
110
|
+
|
111
|
+
NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/
|
106
112
|
|
107
113
|
# Class to parse a manifest text and provide common views of that data.
|
108
114
|
def initialize(manifest_text)
|
@@ -131,7 +137,9 @@ module Keep
|
|
131
137
|
end
|
132
138
|
end
|
133
139
|
|
134
|
-
def unescape(s)
|
140
|
+
def self.unescape(s)
|
141
|
+
return nil if s.nil?
|
142
|
+
|
135
143
|
# Parse backslash escapes in a Keep manifest stream or file name.
|
136
144
|
s.gsub(/\\(\\|[0-7]{3})/) do |_|
|
137
145
|
case $1
|
@@ -143,6 +151,10 @@ module Keep
|
|
143
151
|
end
|
144
152
|
end
|
145
153
|
|
154
|
+
def unescape(s)
|
155
|
+
self.class.unescape(s)
|
156
|
+
end
|
157
|
+
|
146
158
|
def split_file_token token
|
147
159
|
start_pos, filesize, filename = token.split(':', 3)
|
148
160
|
if filename.nil?
|
@@ -162,15 +174,15 @@ module Keep
|
|
162
174
|
elsif in_file_tokens or not Locator.valid? token
|
163
175
|
in_file_tokens = true
|
164
176
|
|
165
|
-
|
177
|
+
start_pos, file_size, file_name = split_file_token(token)
|
166
178
|
stream_name_adjuster = ''
|
167
|
-
if
|
168
|
-
|
169
|
-
stream_name_adjuster =
|
170
|
-
|
179
|
+
if file_name.include?('/') # '/' in filename
|
180
|
+
dirname, sep, basename = file_name.rpartition('/')
|
181
|
+
stream_name_adjuster = sep + dirname # /dir_parts
|
182
|
+
file_name = basename
|
171
183
|
end
|
172
184
|
|
173
|
-
yield [stream_name + stream_name_adjuster
|
185
|
+
yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
|
174
186
|
end
|
175
187
|
end
|
176
188
|
end
|
@@ -197,10 +209,13 @@ module Keep
|
|
197
209
|
# files. This can help you avoid parsing the entire manifest if you
|
198
210
|
# just want to check if a small number of files are specified.
|
199
211
|
if stop_after.nil? or not @files.nil?
|
200
|
-
|
212
|
+
# Avoid counting empty dir placeholders
|
213
|
+
return files.reject{|_, name, size| name == '.' and size == 0}.size
|
201
214
|
end
|
202
215
|
seen_files = {}
|
203
|
-
each_file_spec do |streamname, _,
|
216
|
+
each_file_spec do |streamname, _, filesize, filename|
|
217
|
+
# Avoid counting empty dir placeholders
|
218
|
+
next if filename == "." and filesize == 0
|
204
219
|
seen_files[[streamname, filename]] = true
|
205
220
|
return stop_after if (seen_files.size >= stop_after)
|
206
221
|
end
|
@@ -250,7 +265,9 @@ module Keep
|
|
250
265
|
count = 0
|
251
266
|
|
252
267
|
word = words.shift
|
253
|
-
|
268
|
+
raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
|
269
|
+
unescaped_word = unescape(word)
|
270
|
+
count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
|
254
271
|
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
|
255
272
|
|
256
273
|
count = 0
|
@@ -262,7 +279,9 @@ module Keep
|
|
262
279
|
raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
|
263
280
|
|
264
281
|
count = 0
|
265
|
-
|
282
|
+
raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
|
283
|
+
while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or
|
284
|
+
(word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
|
266
285
|
word = words.shift
|
267
286
|
count += 1
|
268
287
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arvados
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.1.
|
4
|
+
version: 1.3.1.20190122164002
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arvados Authors
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -118,7 +118,7 @@ dependencies:
|
|
118
118
|
- - ">="
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: 0.1.5
|
121
|
-
description: Arvados client library, git commit
|
121
|
+
description: Arvados client library, git commit 7337b18bf7b6996a7fe4df0aba5356a03bda452d
|
122
122
|
email: gem-dev@curoverse.com
|
123
123
|
executables: []
|
124
124
|
extensions: []
|