zip-container 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,491 @@
1
+ # Copyright (c) 2013 The University of Manchester, UK.
2
+ #
3
+ # All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # * Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ #
11
+ # * Redistributions in binary form must reproduce the above copyright notice,
12
+ # this list of conditions and the following disclaimer in the documentation
13
+ # and/or other materials provided with the distribution.
14
+ #
15
+ # * Neither the names of The University of Manchester nor the names of its
16
+ # contributors may be used to endorse or promote products derived from this
17
+ # software without specific prior written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
+ # POSSIBILITY OF SUCH DAMAGE.
30
+ #
31
+ # Author: Robert Haines
32
+
33
+ require 'forwardable'
34
+ require 'zip/zipfilesystem'
35
+
36
+ module ZipContainer
37
+
38
+ # This class represents a ZipContainer file in PK Zip format. See the
39
+ # {OCF}[http://www.idpf.org/epub/30/spec/epub30-ocf.html] and
40
+ # {UCF}[https://learn.adobe.com/wiki/display/PDFNAV/Universal+Container+Format]
41
+ # specifications for more details.
42
+ #
43
+ # This class provides most of the facilities of the <tt>Zip::ZipFile</tt>
44
+ # class in the rubyzip gem. Please also consult the
45
+ # {rubyzip documentation}[http://rubydoc.info/gems/rubyzip/0.9.9/frames]
46
+ # alongside these pages.
47
+ #
48
+ # There are code examples available with the source code of this library.
49
+ class Container
50
+ include ReservedNames
51
+ include ManagedEntries
52
+
53
+ extend Forwardable
54
+ def_delegators :@zipfile, :comment, :comment=, :commit_required?, :each,
55
+ :entries, :extract, :find_entry, :get_entry, :get_input_stream, :glob,
56
+ :name, :read, :size
57
+
58
+ private_class_method :new
59
+
60
+ # The mime-type of this ZipContainer file.
61
+ attr_reader :mimetype
62
+
63
+ # :stopdoc:
64
+ # The reserved mimetype file name for standard ZipContainer documents.
65
+ MIMETYPE_FILE = "mimetype"
66
+
67
+ def initialize(document)
68
+ @zipfile = open_document(document)
69
+ check_mimetype!
70
+
71
+ @mimetype = read_mimetype
72
+ @on_disk = true
73
+
74
+ # Reserved entry names. Just the mimetype file by default.
75
+ register_reserved_name(MIMETYPE_FILE)
76
+
77
+ # Initialize the managed entries and register the META-INF directory.
78
+ initialize_managed_entries
79
+
80
+ # Here we fake up the connection to the rubyzip filesystem classes so
81
+ # that they also respect the reserved names that we define.
82
+ mapped_zip = ::Zip::ZipFileSystem::ZipFileNameMapper.new(self)
83
+ @fs_dir = ::Zip::ZipFileSystem::ZipFsDir.new(mapped_zip)
84
+ @fs_file = ::Zip::ZipFileSystem::ZipFsFile.new(mapped_zip)
85
+ @fs_dir.file = @fs_file
86
+ @fs_file.dir = @fs_dir
87
+ end
88
+ # :startdoc:
89
+
90
+ # :call-seq:
91
+ # Container.create(filename, mimetype = "application/epub+zip") -> document
92
+ # Container.create(filename, mimetype = "application/epub+zip") {|document| ...}
93
+ #
94
+ # Create a new ZipContainer file on disk with the specified mimetype.
95
+ def Container.create(filename, mimetype, &block)
96
+ ::Zip::ZipOutputStream.open(filename) do |stream|
97
+ stream.put_next_entry(MIMETYPE_FILE, nil, nil, ::Zip::ZipEntry::STORED)
98
+ stream.write mimetype
99
+ end
100
+
101
+ # Now open the newly created container.
102
+ c = new(filename)
103
+
104
+ if block_given?
105
+ begin
106
+ yield c
107
+ ensure
108
+ c.close
109
+ end
110
+ end
111
+
112
+ c
113
+ end
114
+
115
+ # :call-seq:
116
+ # Container.each_entry -> Enumerator
117
+ # Container.each_entry {|entry| ...}
118
+ #
119
+ # Iterate over the entries in the ZipContainer file. The entry objects
120
+ # returned by this method are Zip::ZipEntry objects. Please see the
121
+ # rubyzip documentation for details.
122
+ def Container.each_entry(filename, &block)
123
+ c = new(filename)
124
+
125
+ if block_given?
126
+ begin
127
+ c.each(&block)
128
+ ensure
129
+ c.close
130
+ end
131
+ end
132
+
133
+ c.each
134
+ end
135
+
136
+ # :call-seq:
137
+ # Container.open(filename) -> document
138
+ # Container.open(filename) {|document| ...}
139
+ #
140
+ # Open an existing ZipContainer file from disk. It will be checked for
141
+ # conformance upon first access.
142
+ def Container.open(filename, &block)
143
+ c = new(filename)
144
+
145
+ if block_given?
146
+ begin
147
+ yield c
148
+ ensure
149
+ c.close
150
+ end
151
+ end
152
+
153
+ c
154
+ end
155
+
156
+ # :call-seq:
157
+ # Container.verify(filename) -> boolean
158
+ #
159
+ # Verify that the specified ZipContainer file conforms to the
160
+ # specification. This method returns +false+ if there are any problems at
161
+ # all with the file (including if it cannot be found).
162
+ def Container.verify(filename)
163
+ begin
164
+ new(filename).verify!
165
+ rescue
166
+ return false
167
+ end
168
+
169
+ true
170
+ end
171
+
172
+ # :call-seq:
173
+ # Container.verify!(filename)
174
+ #
175
+ # Verify that the specified ZipContainer file conforms to the
176
+ # specification. This method raises exceptions when errors are found or if
177
+ # there is something fundamental wrong with the file itself (e.g. it
178
+ # cannot be found).
179
+ def Container.verify!(filename)
180
+ new(filename).verify!
181
+ end
182
+
183
+ # :call-seq:
184
+ # add(entry, src_path, &continue_on_exists_proc)
185
+ #
186
+ # Convenience method for adding the contents of a file to the ZipContainer
187
+ # file. If asked to add a file with a reserved name, such as the special
188
+ # mimetype header file, this method will raise a
189
+ # ReservedNameClashError.
190
+ #
191
+ # See the rubyzip documentation for details of the
192
+ # +continue_on_exists_proc+ parameter.
193
+ def add(entry, src_path, &continue_on_exists_proc)
194
+ if reserved_entry?(entry) || managed_directory?(entry)
195
+ raise ReservedNameClashError.new(entry.to_s)
196
+ end
197
+
198
+ @zipfile.add(entry, src_path, &continue_on_exists_proc)
199
+ end
200
+
201
+ # :call-seq:
202
+ # commit -> boolean
203
+ # close -> boolean
204
+ #
205
+ # Commits changes that have been made since the previous commit to the
206
+ # ZipContainer file. Returns +true+ if anything was actually done, +false+
207
+ # otherwise.
208
+ def commit
209
+ return false unless commit_required?
210
+
211
+ if on_disk?
212
+ @zipfile.commit
213
+ end
214
+ end
215
+
216
+ alias :close :commit
217
+
218
+ # :call-seq:
219
+ # dir -> Zip::ZipFsDir
220
+ #
221
+ # Returns an object which can be used like ruby's built in +Dir+ (class)
222
+ # object, except that it works on the ZipContainer file on which this
223
+ # method is invoked.
224
+ #
225
+ # See the rubyzip documentation for details.
226
+ def dir
227
+ @fs_dir
228
+ end
229
+
230
+ # :call-seq:
231
+ # file -> Zip::ZipFsFile
232
+ #
233
+ # Returns an object which can be used like ruby's built in +File+ (class)
234
+ # object, except that it works on the ZipContainer file on which this
235
+ # method is invoked.
236
+ #
237
+ # See the rubyzip documentation for details.
238
+ def file
239
+ @fs_file
240
+ end
241
+
242
+ # :call-seq:
243
+ # get_output_stream(entry, permission = nil) -> stream
244
+ # get_output_stream(entry, permission = nil) {|stream| ...}
245
+ #
246
+ # Returns an output stream to the specified entry. If a block is passed
247
+ # the stream object is passed to the block and the stream is automatically
248
+ # closed afterwards just as with ruby's built-in +File.open+ method.
249
+ #
250
+ # See the rubyzip documentation for details of the +permission_int+
251
+ # parameter.
252
+ def get_output_stream(entry, permission = nil, &block)
253
+ if reserved_entry?(entry) || managed_directory?(entry)
254
+ raise ReservedNameClashError.new(entry.to_s)
255
+ end
256
+
257
+ @zipfile.get_output_stream(entry, permission, &block)
258
+ end
259
+
260
+ # :call-seq:
261
+ # in_memory? -> boolean
262
+ #
263
+ # Is this ZipContainer file memory resident as opposed to stored on disk?
264
+ def in_memory?
265
+ !@on_disk
266
+ end
267
+
268
+ # :call-seq:
269
+ # mkdir(name, permission = 0755)
270
+ #
271
+ # Creates a directory in the ZipContainer file. If asked to create a
272
+ # directory with a name reserved for use by a file this method will raise
273
+ # a ReservedNameClashError.
274
+ #
275
+ # The new directory will be created with the supplied unix-style
276
+ # permissions. The default (+0755+) is owner read, write and list; group
277
+ # read and list; and world read and list.
278
+ def mkdir(name, permission = 0755)
279
+ if reserved_entry?(name) || managed_file?(name)
280
+ raise ReservedNameClashError.new(name)
281
+ end
282
+
283
+ @zipfile.mkdir(name, permission)
284
+ end
285
+
286
+ # :call-seq:
287
+ # on_disk? -> boolean
288
+ #
289
+ # Is this ZipContainer file stored on disk as opposed to memory resident?
290
+ def on_disk?
291
+ @on_disk
292
+ end
293
+
294
+ # :call-seq:
295
+ # remove(entry)
296
+ #
297
+ # Removes the specified entry from the ZipContainer file. If asked to
298
+ # remove any reserved files such as the special mimetype header file this
299
+ # method will do nothing.
300
+ def remove(entry)
301
+ return if reserved_entry?(entry)
302
+ @zipfile.remove(entry)
303
+ end
304
+
305
+ # :call-seq:
306
+ # rename(entry, new_name, &continue_on_exists_proc)
307
+ #
308
+ # Renames the specified entry in the ZipContainer file. If asked to rename
309
+ # any reserved files such as the special mimetype header file this method
310
+ # will do nothing. If asked to rename a file _to_ one of the reserved
311
+ # names a ReservedNameClashError is raised.
312
+ #
313
+ # See the rubyzip documentation for details of the
314
+ # +continue_on_exists_proc+ parameter.
315
+ def rename(entry, new_name, &continue_on_exists_proc)
316
+ return if reserved_entry?(entry)
317
+ raise ReservedNameClashError.new(new_name) if reserved_entry?(new_name)
318
+
319
+ @zipfile.rename(entry, new_name, &continue_on_exists_proc)
320
+ end
321
+
322
+ # :call-seq:
323
+ # replace(entry, src_path)
324
+ #
325
+ # Replaces the specified entry of the ZipContainer file with the contents
326
+ # of +src_path+ (from the file system). If asked to replace any reserved
327
+ # files such as the special mimetype header file this method will do
328
+ # nothing.
329
+ def replace(entry, src_path)
330
+ return if reserved_entry?(entry)
331
+ @zipfile.replace(entry, src_path)
332
+ end
333
+
334
+ # :call-seq:
335
+ # to_s -> String
336
+ #
337
+ # Return a textual summary of this ZipContainer file.
338
+ def to_s
339
+ @zipfile.to_s + " - #{@mimetype}"
340
+ end
341
+
342
+ # :call-seq:
343
+ # verify!
344
+ #
345
+ # Verify the contents of this ZipContainer file. All managed files and
346
+ # directories are checked to make sure that they exist, if required.
347
+ def verify!
348
+ verify_managed_entries!
349
+ end
350
+
351
+ private
352
+
353
+ def open_document(document)
354
+ ::Zip::ZipFile.new(document)
355
+ end
356
+
357
+ def check_mimetype!
358
+ # Check mimetype file is present and correct.
359
+ entry = @zipfile.find_entry(MIMETYPE_FILE)
360
+
361
+ raise MalformedZipContainerError.new("'mimetype' file is missing.") if entry.nil?
362
+ if entry.localHeaderOffset != 0
363
+ raise MalformedZipContainerError.new("'mimetype' file is not at offset 0 in the archive.")
364
+ end
365
+ if entry.compression_method != ::Zip::ZipEntry::STORED
366
+ raise MalformedZipContainerError.new("'mimetype' file is compressed.")
367
+ end
368
+
369
+ true
370
+ end
371
+
372
+ def read_mimetype
373
+ @zipfile.read(MIMETYPE_FILE)
374
+ end
375
+
376
+ public
377
+
378
+ # Lots of extra docs out of the way at the end here...
379
+
380
+ ##
381
+ # :method: comment
382
+ # :call-seq:
383
+ # comment -> String
384
+ #
385
+ # Returns the ZipContainer file comment, if it has one.
386
+
387
+ ##
388
+ # :method: comment=
389
+ # :call-seq:
390
+ # comment = comment
391
+ #
392
+ # Set the ZipContainer file comment to the new value.
393
+
394
+ ##
395
+ # :method: commit_required?
396
+ # :call-seq:
397
+ # commit_required? -> boolean
398
+ #
399
+ # Returns +true+ if any changes have been made to this ZipContainer file
400
+ # since the last commit, +false+ otherwise.
401
+
402
+ ##
403
+ # :method: each
404
+ # :call-seq:
405
+ # each -> Enumerator
406
+ # each {|entry| ...}
407
+ #
408
+ # Iterate over the entries in the ZipContainer file. The entry objects
409
+ # returned by this method are Zip::ZipEntry objects. Please see the
410
+ # rubyzip documentation for details.
411
+
412
+ ##
413
+ # :method:
414
+ # :call-seq:
415
+ # entries -> Enumerable
416
+ #
417
+ # Returns an Enumerable containing all the entries in the ZipContainer
418
+ # file The entry objects returned by this method are Zip::ZipEntry
419
+ # objects. Please see the rubyzip documentation for details.
420
+
421
+ ##
422
+ # :method: extract
423
+ # :call-seq:
424
+ # extract(entry, dest_path, &on_exists_proc)
425
+ #
426
+ # Extracts the specified entry of the ZipContainer file to +dest_path+.
427
+ #
428
+ # See the rubyzip documentation for details of the +on_exists_proc+
429
+ # parameter.
430
+
431
+ ##
432
+ # :method: find_entry
433
+ # :call-seq:
434
+ # find_entry(entry) -> Zip::ZipEntry
435
+ #
436
+ # Searches for entries within the ZipContainer file with the specified
437
+ # name. Returns +nil+ if no entry is found. See also +get_entry+.
438
+
439
+ ##
440
+ # :method: get_entry
441
+ # :call-seq:
442
+ # get_entry(entry) -> Zip::ZipEntry
443
+ #
444
+ # Searches for an entry within the ZipContainer file in a similar manner
445
+ # to +find_entry+, but throws +Errno::ENOENT+ if no entry is found.
446
+
447
+ ##
448
+ # :method: get_input_stream
449
+ # :call-seq:
450
+ # get_input_stream(entry) -> stream
451
+ # get_input_stream(entry) {|stream| ...}
452
+ #
453
+ # Returns an input stream to the specified entry. If a block is passed the
454
+ # stream object is passed to the block and the stream is automatically
455
+ # closed afterwards just as with ruby's built in +File.open+ method.
456
+
457
+ ##
458
+ # :method: glob
459
+ # :call-seq:
460
+ # glob(*args) -> Array of Zip::ZipEntry
461
+ # glob(*args) {|entry| ...}
462
+ #
463
+ # Searches for entries within the ZipContainer file that match the given
464
+ # glob.
465
+ #
466
+ # See the rubyzip documentation for details of the parameters that can be
467
+ # passed in.
468
+
469
+ ##
470
+ # :method: name
471
+ # :call-seq:
472
+ # name -> String
473
+ #
474
+ # Returns the filename of this ZipContainer file.
475
+
476
+ ##
477
+ # :method: read
478
+ # :call-seq:
479
+ # read(entry) -> String
480
+ #
481
+ # Returns a string containing the contents of the specified entry.
482
+
483
+ ##
484
+ # :method: size
485
+ # :call-seq:
486
+ # size -> int
487
+ #
488
+ # Returns the number of entries in the ZipContainer file.
489
+
490
+ end
491
+ end