arvados 0.1.20150313191637 → 0.1.20150407214339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arvados/collection.rb +513 -0
  3. metadata +4 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f7777c7fd0f5fae868ca7e621a86dd806d69b5d1
4
- data.tar.gz: 09b5ef89cac47dd878b60d84e00992067aafc48f
3
+ metadata.gz: ddce2313bdd7705ea2f920e375829dd4dbc8a4c1
4
+ data.tar.gz: 1a64729b3c2fd824e91362c5035fc23aeb49f4ed
5
5
  SHA512:
6
- metadata.gz: 007a16243ec11d714042476f9e2a89e26cad56b4c97b49f18c72dedbf5ee56931ddb5413b30393e2aaac9071c5c42f10638af999d15e5240fae9264e699bcb27
7
- data.tar.gz: 0ed415e276280b11c7abddb09259d018e25bf3a3916fa8b0b13d9cd2e2af5ae3cdafd4eef05479d2eb313ba682968b3bb7de4f20b095c6674da7d3a9fdd490f1
6
+ metadata.gz: b37a1d8bb49b4eee935ba2945bb972a66aa463beb12b781114ce29a4576da0968234f44aefe9a0c3e1c81496bd30082301d0cc15eac7c689fa15206467a6bc4a
7
+ data.tar.gz: 2d5c9b2ae9f2c169fe57cdf9652d4ec34d45d32bceffad9578a454395c16a0848c4cd3a049242f19d6bfa4ab84e10df371ca68dfc7016bcbd8c550e6ef344fa2
@@ -0,0 +1,513 @@
1
+ require "arvados/keep"
2
+
3
+ module Arv
4
+ class Collection
5
+ def initialize(manifest_text="")
6
+ @manifest_text = manifest_text
7
+ @modified = false
8
+ @root = CollectionRoot.new
9
+ manifest = Keep::Manifest.new(manifest_text)
10
+ manifest.each_line do |stream_root, locators, file_specs|
11
+ if stream_root.empty? or locators.empty? or file_specs.empty?
12
+ raise ArgumentError.new("manifest text includes malformed line")
13
+ end
14
+ loc_list = LocatorList.new(locators)
15
+ file_specs.map { |s| manifest.split_file_token(s) }.
16
+ each do |file_start, file_len, file_path|
17
+ @root.file_at(normalize_path(stream_root, file_path)).
18
+ add_segment(loc_list.segment(file_start, file_len))
19
+ end
20
+ end
21
+ end
22
+
23
+ def manifest_text
24
+ @manifest_text ||= @root.manifest_text
25
+ end
26
+
27
+ def modified?
28
+ @modified
29
+ end
30
+
31
+ def unmodified
32
+ @modified = false
33
+ self
34
+ end
35
+
36
+ def normalize
37
+ @manifest_text = @root.manifest_text
38
+ self
39
+ end
40
+
41
+ def cp_r(source, target, source_collection=nil)
42
+ opts = {descend_target: !source.end_with?("/")}
43
+ copy(:merge, source.chomp("/"), target, source_collection, opts)
44
+ end
45
+
46
+ def rename(source, target)
47
+ copy(:add_copy, source, target) { rm_r(source) }
48
+ end
49
+
50
+ def rm(source)
51
+ remove(source)
52
+ end
53
+
54
+ def rm_r(source)
55
+ remove(source, recursive: true)
56
+ end
57
+
58
+ protected
59
+
60
+ def find(*parts)
61
+ @root.find(normalize_path(*parts))
62
+ end
63
+
64
+ private
65
+
66
+ def modified
67
+ @manifest_text = nil
68
+ @modified = true
69
+ self
70
+ end
71
+
72
+ def normalize_path(*parts)
73
+ path = File.join(*parts)
74
+ if path.empty?
75
+ raise ArgumentError.new("empty path")
76
+ elsif (path == ".") or path.start_with?("./")
77
+ path
78
+ else
79
+ "./#{path}"
80
+ end
81
+ end
82
+
83
+ def copy(copy_method, source, target, source_collection=nil, opts={})
84
+ # Find the item at path `source` in `source_collection`, find the
85
+ # destination stream at path `target`, and use `copy_method` to copy
86
+ # the found object there. If a block is passed in, it will be called
87
+ # right before we do the actual copy, after we confirm that everything
88
+ # is found and can be copied.
89
+ source_collection = self if source_collection.nil?
90
+ src_stream, src_tail = source_collection.find(source)
91
+ dst_stream, dst_tail = find(target)
92
+ if (source_collection.equal?(self) and
93
+ (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
94
+ return self
95
+ end
96
+ src_item = src_stream[src_tail]
97
+ dst_tail ||= src_tail
98
+ check_method = "check_can_#{copy_method}".to_sym
99
+ target_name = nil
100
+ if opts.fetch(:descend_target, true)
101
+ begin
102
+ # Find out if `target` refers to a stream we should copy into.
103
+ tail_stream = dst_stream[dst_tail]
104
+ tail_stream.send(check_method, src_item, src_tail)
105
+ # Yes it does. Copy the item at `source` into it with the same name.
106
+ dst_stream = tail_stream
107
+ target_name = src_tail
108
+ rescue Errno::ENOENT, Errno::ENOTDIR
109
+ # It does not. We'll fall back to writing to `target` below.
110
+ end
111
+ end
112
+ if target_name.nil?
113
+ dst_stream.send(check_method, src_item, dst_tail)
114
+ target_name = dst_tail
115
+ end
116
+ # At this point, we know the operation will work. Call any block as
117
+ # a pre-copy hook.
118
+ if block_given?
119
+ yield
120
+ # Re-find the destination stream, in case the block removed
121
+ # the original (that's how rename is implemented).
122
+ dst_stream = @root.stream_at(dst_stream.path)
123
+ end
124
+ dst_stream.send(copy_method, src_item, target_name)
125
+ modified
126
+ end
127
+
128
+ def remove(path, opts={})
129
+ stream, name = find(path)
130
+ stream.delete(name, opts)
131
+ modified
132
+ end
133
+
134
+ LocatorSegment = Struct.new(:locators, :start_pos, :length)
135
+
136
+ class LocatorRange < Range
137
+ attr_reader :locator
138
+
139
+ def initialize(loc_s, start)
140
+ @locator = loc_s
141
+ range_end = start + Keep::Locator.parse(loc_s).size.to_i
142
+ super(start, range_end, false)
143
+ end
144
+ end
145
+
146
+ class LocatorList
147
+ # LocatorList efficiently builds LocatorSegments from a stream manifest.
148
+ def initialize(locators)
149
+ next_start = 0
150
+ @ranges = locators.map do |loc_s|
151
+ new_range = LocatorRange.new(loc_s, next_start)
152
+ next_start = new_range.end
153
+ new_range
154
+ end
155
+ end
156
+
157
+ def segment(start_pos, length)
158
+ # Return a LocatorSegment that captures `length` bytes from `start_pos`.
159
+ start_index = search_for_byte(start_pos)
160
+ if length == 0
161
+ end_index = start_index
162
+ else
163
+ end_index = search_for_byte(start_pos + length - 1, start_index)
164
+ end
165
+ seg_ranges = @ranges[start_index..end_index]
166
+ LocatorSegment.new(seg_ranges.map(&:locator),
167
+ start_pos - seg_ranges.first.begin,
168
+ length)
169
+ end
170
+
171
+ private
172
+
173
+ def search_for_byte(target, start_index=0)
174
+ # Do a binary search for byte `target` in the list of locators,
175
+ # starting from `start_index`. Return the index of the range in
176
+ # @ranges that contains the byte.
177
+ lo = start_index
178
+ hi = @ranges.size
179
+ loop do
180
+ ii = (lo + hi) / 2
181
+ range = @ranges[ii]
182
+ if range.include?(target)
183
+ return ii
184
+ elsif ii == lo
185
+ raise RangeError.new("%i not in segment" % target)
186
+ elsif target < range.begin
187
+ hi = ii
188
+ else
189
+ lo = ii
190
+ end
191
+ end
192
+ end
193
+ end
194
+
195
+ class CollectionItem
196
+ attr_reader :path, :name
197
+
198
+ def initialize(path)
199
+ @path = path
200
+ @name = File.basename(path)
201
+ end
202
+ end
203
+
204
+ class CollectionFile < CollectionItem
205
+ def initialize(path)
206
+ super
207
+ @segments = []
208
+ end
209
+
210
+ def self.human_name
211
+ "file"
212
+ end
213
+
214
+ def file?
215
+ true
216
+ end
217
+
218
+ def leaf?
219
+ true
220
+ end
221
+
222
+ def add_segment(segment)
223
+ @segments << segment
224
+ end
225
+
226
+ def each_segment(&block)
227
+ @segments.each(&block)
228
+ end
229
+
230
+ def check_can_add_copy(src_item, name)
231
+ raise Errno::ENOTDIR.new(path)
232
+ end
233
+
234
+ alias_method :check_can_merge, :check_can_add_copy
235
+
236
+ def copy_named(copy_path)
237
+ copy = self.class.new(copy_path)
238
+ each_segment { |segment| copy.add_segment(segment) }
239
+ copy
240
+ end
241
+ end
242
+
243
+ class CollectionStream < CollectionItem
244
+ def initialize(path)
245
+ super
246
+ @items = {}
247
+ end
248
+
249
+ def self.human_name
250
+ "stream"
251
+ end
252
+
253
+ def file?
254
+ false
255
+ end
256
+
257
+ def leaf?
258
+ items.empty?
259
+ end
260
+
261
+ def [](key)
262
+ items[key] or
263
+ raise Errno::ENOENT.new("%p not found in %p" % [key, path])
264
+ end
265
+
266
+ def delete(name, opts={})
267
+ item = self[name]
268
+ if item.file? or opts[:recursive]
269
+ items.delete(name)
270
+ else
271
+ raise Errno::EISDIR.new(path)
272
+ end
273
+ end
274
+
275
+ def find(find_path)
276
+ # Given a POSIX-style path, return the CollectionStream that
277
+ # contains the object at that path, and the name of the object
278
+ # inside it.
279
+ components = find_path.split("/")
280
+ tail = components.pop
281
+ [components.reduce(self, :[]), tail]
282
+ end
283
+
284
+ def stream_at(find_path)
285
+ key, rest = find_path.split("/", 2)
286
+ next_stream = get_or_new(key, CollectionStream)
287
+ if rest.nil?
288
+ next_stream
289
+ else
290
+ next_stream.stream_at(rest)
291
+ end
292
+ end
293
+
294
+ def file_at(find_path)
295
+ stream_path, _, file_name = find_path.rpartition("/")
296
+ if stream_path.empty?
297
+ get_or_new(file_name, CollectionFile)
298
+ else
299
+ stream_at(stream_path).file_at(file_name)
300
+ end
301
+ end
302
+
303
+ def manifest_text
304
+ # Return a string with the normalized manifest text for this stream,
305
+ # including all substreams.
306
+ file_keys, stream_keys = items.keys.sort.partition do |key|
307
+ items[key].file?
308
+ end
309
+ my_line = StreamManifest.new(path)
310
+ file_keys.each do |file_name|
311
+ my_line.add_file(items[file_name])
312
+ end
313
+ sub_lines = stream_keys.map do |sub_name|
314
+ items[sub_name].manifest_text
315
+ end
316
+ my_line.to_s + sub_lines.join("")
317
+ end
318
+
319
+ def check_can_add_copy(src_item, key)
320
+ if existing = check_can_merge(src_item, key) and not existing.leaf?
321
+ raise Errno::ENOTEMPTY.new(existing.path)
322
+ end
323
+ end
324
+
325
+ def check_can_merge(src_item, key)
326
+ if existing = items[key] and (existing.class != src_item.class)
327
+ raise Errno::ENOTDIR.new(existing.path)
328
+ end
329
+ existing
330
+ end
331
+
332
+ def add_copy(src_item, key)
333
+ self[key] = src_item.copy_named("#{path}/#{key}")
334
+ end
335
+
336
+ def merge(src_item, key)
337
+ # Do a recursive copy of the collection item `src_item` to destination
338
+ # `key`. If a simple copy is safe, do that; otherwise, recursively
339
+ # merge the contents of the stream `src_item` into the stream at
340
+ # `key`.
341
+ begin
342
+ check_can_add_copy(src_item, key)
343
+ add_copy(src_item, key)
344
+ rescue Errno::ENOTEMPTY
345
+ dest = self[key]
346
+ error = nil
347
+ # Copy as much as possible, then raise any error encountered.
348
+ # Start with streams for a depth-first merge.
349
+ src_items = src_item.items.each_pair.sort_by do |_, sub_item|
350
+ (sub_item.file?) ? 1 : 0
351
+ end
352
+ src_items.each do |sub_key, sub_item|
353
+ begin
354
+ dest.merge(sub_item, sub_key)
355
+ rescue Errno::ENOTDIR => error
356
+ end
357
+ end
358
+ raise error unless error.nil?
359
+ end
360
+ end
361
+
362
+ def copy_named(copy_path)
363
+ copy = self.class.new(copy_path)
364
+ items.each_pair do |key, item|
365
+ copy.add_copy(item, key)
366
+ end
367
+ copy
368
+ end
369
+
370
+ protected
371
+
372
+ attr_reader :items
373
+
374
+ private
375
+
376
+ def []=(key, item)
377
+ items[key] = item
378
+ end
379
+
380
+ def get_or_new(key, klass)
381
+ # Return the collection item at `key` and ensure that it's a `klass`.
382
+ # If `key` does not exist, create a new `klass` there.
383
+ # If the value for `key` is not a `klass`, raise an ArgumentError.
384
+ item = items[key]
385
+ if item.nil?
386
+ self[key] = klass.new("#{path}/#{key}")
387
+ elsif not item.is_a?(klass)
388
+ raise ArgumentError.
389
+ new("in stream %p, %p is a %s, not a %s" %
390
+ [path, key, items[key].class.human_name, klass.human_name])
391
+ else
392
+ item
393
+ end
394
+ end
395
+ end
396
+
397
+ class CollectionRoot < CollectionStream
398
+ def initialize
399
+ super("")
400
+ setup
401
+ end
402
+
403
+ def delete(name, opts={})
404
+ super
405
+ # If that didn't fail, it deleted the . stream. Recreate it.
406
+ setup
407
+ end
408
+
409
+ def check_can_merge(src_item, key)
410
+ if items.include?(key)
411
+ super
412
+ else
413
+ raise_root_write_error(key)
414
+ end
415
+ end
416
+
417
+ private
418
+
419
+ def setup
420
+ items["."] = CollectionStream.new(".")
421
+ end
422
+
423
+ def raise_root_write_error(key)
424
+ raise ArgumentError.new("can't write to %p at collection root" % key)
425
+ end
426
+
427
+ def []=(key, item)
428
+ raise_root_write_error(key)
429
+ end
430
+ end
431
+
432
+ class StreamManifest
433
+ # Build a manifest text for a single stream, without substreams.
434
+ # The manifest includes files in the order they're added. If you want
435
+ # a normalized manifest, add files in lexical order by name.
436
+
437
+ def initialize(name)
438
+ @name = name
439
+ @loc_ranges = {}
440
+ @loc_range_start = 0
441
+ @file_specs = []
442
+ end
443
+
444
+ def add_file(coll_file)
445
+ coll_file.each_segment do |segment|
446
+ extend_locator_ranges(segment.locators)
447
+ extend_file_specs(coll_file.name, segment)
448
+ end
449
+ end
450
+
451
+ def to_s
452
+ if @file_specs.empty?
453
+ ""
454
+ else
455
+ "%s %s %s\n" % [escape_name(@name),
456
+ @loc_ranges.keys.join(" "),
457
+ @file_specs.join(" ")]
458
+ end
459
+ end
460
+
461
+ private
462
+
463
+ def extend_locator_ranges(locators)
464
+ locators.
465
+ select { |loc_s| not @loc_ranges.include?(loc_s) }.
466
+ each do |loc_s|
467
+ @loc_ranges[loc_s] = LocatorRange.new(loc_s, @loc_range_start)
468
+ @loc_range_start = @loc_ranges[loc_s].end
469
+ end
470
+ end
471
+
472
+ def extend_file_specs(filename, segment)
473
+ # Given a filename and a LocatorSegment, add the smallest
474
+ # possible array of file spec strings to @file_specs that
475
+ # builds the file from available locators.
476
+ filename = escape_name(filename)
477
+ start_pos = segment.start_pos
478
+ length = segment.length
479
+ start_loc = segment.locators.first
480
+ prev_loc = start_loc
481
+ # Build a list of file specs by iterating through the segment's
482
+ # locators and preparing a file spec for each contiguous range.
483
+ segment.locators[1..-1].each do |loc_s|
484
+ range = @loc_ranges[loc_s]
485
+ if range.begin != @loc_ranges[prev_loc].end
486
+ range_start, range_length =
487
+ start_and_length_at(start_loc, prev_loc, start_pos, length)
488
+ @file_specs << "#{range_start}:#{range_length}:#{filename}"
489
+ start_pos = 0
490
+ length -= range_length
491
+ start_loc = loc_s
492
+ end
493
+ prev_loc = loc_s
494
+ end
495
+ range_start, range_length =
496
+ start_and_length_at(start_loc, prev_loc, start_pos, length)
497
+ @file_specs << "#{range_start}:#{range_length}:#{filename}"
498
+ end
499
+
500
+ def escape_name(name)
501
+ name.gsub(/\\/, "\\\\\\\\").gsub(/\s/) do |s|
502
+ s.each_byte.map { |c| "\\%03o" % c }.join("")
503
+ end
504
+ end
505
+
506
+ def start_and_length_at(start_key, end_key, start_pos, length)
507
+ range_begin = @loc_ranges[start_key].begin + start_pos
508
+ range_length = [@loc_ranges[end_key].end - range_begin, length].min
509
+ [range_begin, range_length]
510
+ end
511
+ end
512
+ end
513
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arvados
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20150313191637
4
+ version: 0.1.20150407214339
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arvados Authors
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-13 00:00:00.000000000 Z
11
+ date: 2015-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-api-client
@@ -104,13 +104,14 @@ dependencies:
104
104
  - - "<"
105
105
  - !ruby/object:Gem::Version
106
106
  version: 1.0.0
107
- description: Arvados client library, git commit 71c4fdc3352ad5ca34c2f260fe43fa1150868a04
107
+ description: Arvados client library, git commit 83759d40b868f7583cffcfa986bb78ff9c9f6c42
108
108
  email: gem-dev@curoverse.com
109
109
  executables: []
110
110
  extensions: []
111
111
  extra_rdoc_files: []
112
112
  files:
113
113
  - lib/arvados.rb
114
+ - lib/arvados/collection.rb
114
115
  - lib/arvados/google_api_client.rb
115
116
  - lib/arvados/keep.rb
116
117
  homepage: https://arvados.org