chd 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +30 -0
  3. data/chd.gemspec +29 -0
  4. data/ext/chd.c +1008 -0
  5. data/ext/extconf.rb +60 -0
  6. data/lib/chd/cd.rb +272 -0
  7. data/lib/chd/metadata.rb +196 -0
  8. data/lib/chd/version.rb +4 -0
  9. data/lib/chd.rb +21 -0
  10. data/libchdr/CMakeLists.txt +104 -0
  11. data/libchdr/LICENSE.txt +24 -0
  12. data/libchdr/README.md +7 -0
  13. data/libchdr/deps/lzma-19.00/CMakeLists.txt +33 -0
  14. data/libchdr/deps/lzma-19.00/LICENSE +3 -0
  15. data/libchdr/deps/lzma-19.00/include/7zTypes.h +375 -0
  16. data/libchdr/deps/lzma-19.00/include/Alloc.h +51 -0
  17. data/libchdr/deps/lzma-19.00/include/Bra.h +64 -0
  18. data/libchdr/deps/lzma-19.00/include/Compiler.h +33 -0
  19. data/libchdr/deps/lzma-19.00/include/CpuArch.h +336 -0
  20. data/libchdr/deps/lzma-19.00/include/Delta.h +19 -0
  21. data/libchdr/deps/lzma-19.00/include/LzFind.h +121 -0
  22. data/libchdr/deps/lzma-19.00/include/LzHash.h +57 -0
  23. data/libchdr/deps/lzma-19.00/include/Lzma86.h +111 -0
  24. data/libchdr/deps/lzma-19.00/include/LzmaDec.h +234 -0
  25. data/libchdr/deps/lzma-19.00/include/LzmaEnc.h +76 -0
  26. data/libchdr/deps/lzma-19.00/include/LzmaLib.h +131 -0
  27. data/libchdr/deps/lzma-19.00/include/Precomp.h +10 -0
  28. data/libchdr/deps/lzma-19.00/include/Sort.h +18 -0
  29. data/libchdr/deps/lzma-19.00/lzma-history.txt +446 -0
  30. data/libchdr/deps/lzma-19.00/lzma.txt +328 -0
  31. data/libchdr/deps/lzma-19.00/lzma.vcxproj +543 -0
  32. data/libchdr/deps/lzma-19.00/lzma.vcxproj.filters +17 -0
  33. data/libchdr/deps/lzma-19.00/src/Alloc.c +455 -0
  34. data/libchdr/deps/lzma-19.00/src/Bra86.c +82 -0
  35. data/libchdr/deps/lzma-19.00/src/BraIA64.c +53 -0
  36. data/libchdr/deps/lzma-19.00/src/CpuArch.c +218 -0
  37. data/libchdr/deps/lzma-19.00/src/Delta.c +64 -0
  38. data/libchdr/deps/lzma-19.00/src/LzFind.c +1127 -0
  39. data/libchdr/deps/lzma-19.00/src/Lzma86Dec.c +54 -0
  40. data/libchdr/deps/lzma-19.00/src/LzmaDec.c +1185 -0
  41. data/libchdr/deps/lzma-19.00/src/LzmaEnc.c +1330 -0
  42. data/libchdr/deps/lzma-19.00/src/Sort.c +141 -0
  43. data/libchdr/deps/zlib-1.2.11/CMakeLists.txt +29 -0
  44. data/libchdr/deps/zlib-1.2.11/ChangeLog +1515 -0
  45. data/libchdr/deps/zlib-1.2.11/FAQ +368 -0
  46. data/libchdr/deps/zlib-1.2.11/INDEX +68 -0
  47. data/libchdr/deps/zlib-1.2.11/Makefile +5 -0
  48. data/libchdr/deps/zlib-1.2.11/Makefile.in +410 -0
  49. data/libchdr/deps/zlib-1.2.11/README +115 -0
  50. data/libchdr/deps/zlib-1.2.11/adler32.c +186 -0
  51. data/libchdr/deps/zlib-1.2.11/compress.c +86 -0
  52. data/libchdr/deps/zlib-1.2.11/configure +921 -0
  53. data/libchdr/deps/zlib-1.2.11/crc32.c +442 -0
  54. data/libchdr/deps/zlib-1.2.11/crc32.h +441 -0
  55. data/libchdr/deps/zlib-1.2.11/deflate.c +2163 -0
  56. data/libchdr/deps/zlib-1.2.11/deflate.h +349 -0
  57. data/libchdr/deps/zlib-1.2.11/doc/algorithm.txt +209 -0
  58. data/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt +619 -0
  59. data/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt +955 -0
  60. data/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt +675 -0
  61. data/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt +107 -0
  62. data/libchdr/deps/zlib-1.2.11/gzclose.c +25 -0
  63. data/libchdr/deps/zlib-1.2.11/gzguts.h +218 -0
  64. data/libchdr/deps/zlib-1.2.11/gzlib.c +637 -0
  65. data/libchdr/deps/zlib-1.2.11/gzread.c +654 -0
  66. data/libchdr/deps/zlib-1.2.11/gzwrite.c +665 -0
  67. data/libchdr/deps/zlib-1.2.11/infback.c +640 -0
  68. data/libchdr/deps/zlib-1.2.11/inffast.c +323 -0
  69. data/libchdr/deps/zlib-1.2.11/inffast.h +11 -0
  70. data/libchdr/deps/zlib-1.2.11/inffixed.h +94 -0
  71. data/libchdr/deps/zlib-1.2.11/inflate.c +1561 -0
  72. data/libchdr/deps/zlib-1.2.11/inflate.h +125 -0
  73. data/libchdr/deps/zlib-1.2.11/inftrees.c +304 -0
  74. data/libchdr/deps/zlib-1.2.11/inftrees.h +62 -0
  75. data/libchdr/deps/zlib-1.2.11/make_vms.com +867 -0
  76. data/libchdr/deps/zlib-1.2.11/treebuild.xml +116 -0
  77. data/libchdr/deps/zlib-1.2.11/trees.c +1203 -0
  78. data/libchdr/deps/zlib-1.2.11/trees.h +128 -0
  79. data/libchdr/deps/zlib-1.2.11/uncompr.c +93 -0
  80. data/libchdr/deps/zlib-1.2.11/zconf.h +534 -0
  81. data/libchdr/deps/zlib-1.2.11/zconf.h.cmakein +536 -0
  82. data/libchdr/deps/zlib-1.2.11/zconf.h.in +534 -0
  83. data/libchdr/deps/zlib-1.2.11/zlib.3 +149 -0
  84. data/libchdr/deps/zlib-1.2.11/zlib.3.pdf +0 -0
  85. data/libchdr/deps/zlib-1.2.11/zlib.h +1912 -0
  86. data/libchdr/deps/zlib-1.2.11/zlib.map +94 -0
  87. data/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein +13 -0
  88. data/libchdr/deps/zlib-1.2.11/zlib.pc.in +13 -0
  89. data/libchdr/deps/zlib-1.2.11/zlib2ansi +152 -0
  90. data/libchdr/deps/zlib-1.2.11/zutil.c +325 -0
  91. data/libchdr/deps/zlib-1.2.11/zutil.h +271 -0
  92. data/libchdr/include/dr_libs/dr_flac.h +12280 -0
  93. data/libchdr/include/libchdr/bitstream.h +43 -0
  94. data/libchdr/include/libchdr/cdrom.h +110 -0
  95. data/libchdr/include/libchdr/chd.h +427 -0
  96. data/libchdr/include/libchdr/chdconfig.h +10 -0
  97. data/libchdr/include/libchdr/coretypes.h +60 -0
  98. data/libchdr/include/libchdr/flac.h +50 -0
  99. data/libchdr/include/libchdr/huffman.h +90 -0
  100. data/libchdr/pkg-config.pc.in +10 -0
  101. data/libchdr/src/libchdr_bitstream.c +125 -0
  102. data/libchdr/src/libchdr_cdrom.c +415 -0
  103. data/libchdr/src/libchdr_chd.c +2744 -0
  104. data/libchdr/src/libchdr_flac.c +302 -0
  105. data/libchdr/src/libchdr_huffman.c +545 -0
  106. data/libchdr/src/link.T +5 -0
  107. data/libchdr/tests/CMakeLists.txt +2 -0
  108. data/libchdr/tests/benchmark.c +52 -0
  109. metadata +183 -0
@@ -0,0 +1,675 @@
1
+
2
+
3
+
4
+
5
+
6
+
7
+ Network Working Group P. Deutsch
8
+ Request for Comments: 1952 Aladdin Enterprises
9
+ Category: Informational May 1996
10
+
11
+
12
+ GZIP file format specification version 4.3
13
+
14
+ Status of This Memo
15
+
16
+ This memo provides information for the Internet community. This memo
17
+ does not specify an Internet standard of any kind. Distribution of
18
+ this memo is unlimited.
19
+
20
+ IESG Note:
21
+
22
+ The IESG takes no position on the validity of any Intellectual
23
+ Property Rights statements contained in this document.
24
+
25
+ Notices
26
+
27
+ Copyright (c) 1996 L. Peter Deutsch
28
+
29
+ Permission is granted to copy and distribute this document for any
30
+ purpose and without charge, including translations into other
31
+ languages and incorporation into compilations, provided that the
32
+ copyright notice and this notice are preserved, and that any
33
+ substantive changes or deletions from the original are clearly
34
+ marked.
35
+
36
+ A pointer to the latest version of this and related documentation in
37
+ HTML format can be found at the URL
38
+ <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
39
+
40
+ Abstract
41
+
42
+ This specification defines a lossless compressed data format that is
43
+ compatible with the widely used GZIP utility. The format includes a
44
+ cyclic redundancy check value for detecting data corruption. The
45
+ format presently uses the DEFLATE method of compression but can be
46
+ easily extended to use other compression methods. The format can be
47
+ implemented readily in a manner not covered by patents.
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
58
+ Deutsch Informational [Page 1]
59
+
60
+ RFC 1952 GZIP File Format Specification May 1996
61
+
62
+
63
+ Table of Contents
64
+
65
+ 1. Introduction ................................................... 2
66
+ 1.1. Purpose ................................................... 2
67
+ 1.2. Intended audience ......................................... 3
68
+ 1.3. Scope ..................................................... 3
69
+ 1.4. Compliance ................................................ 3
70
+ 1.5. Definitions of terms and conventions used ................. 3
71
+ 1.6. Changes from previous versions ............................ 3
72
+ 2. Detailed specification ......................................... 4
73
+ 2.1. Overall conventions ....................................... 4
74
+ 2.2. File format ............................................... 5
75
+ 2.3. Member format ............................................. 5
76
+ 2.3.1. Member header and trailer ........................... 6
77
+ 2.3.1.1. Extra field ................................... 8
78
+ 2.3.1.2. Compliance .................................... 9
79
+ 3. References .................................................. 9
80
+ 4. Security Considerations .................................... 10
81
+ 5. Acknowledgements ........................................... 10
82
+ 6. Author's Address ........................................... 10
83
+ 7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
84
+ 8. Appendix: Sample CRC Code .................................. 11
85
+
86
+ 1. Introduction
87
+
88
+ 1.1. Purpose
89
+
90
+ The purpose of this specification is to define a lossless
91
+ compressed data format that:
92
+
93
+ * Is independent of CPU type, operating system, file system,
94
+ and character set, and hence can be used for interchange;
95
+ * Can compress or decompress a data stream (as opposed to a
96
+ randomly accessible file) to produce another data stream,
97
+ using only an a priori bounded amount of intermediate
98
+ storage, and hence can be used in data communications or
99
+ similar structures such as Unix filters;
100
+ * Compresses data with efficiency comparable to the best
101
+ currently available general-purpose compression methods,
102
+ and in particular considerably better than the "compress"
103
+ program;
104
+ * Can be implemented readily in a manner not covered by
105
+ patents, and hence can be practiced freely;
106
+ * Is compatible with the file format produced by the current
107
+ widely used gzip utility, in that conforming decompressors
108
+ will be able to read data produced by the existing gzip
109
+ compressor.
110
+
111
+
112
+
113
+
114
+ Deutsch Informational [Page 2]
115
+
116
+ RFC 1952 GZIP File Format Specification May 1996
117
+
118
+
119
+ The data format defined by this specification does not attempt to:
120
+
121
+ * Provide random access to compressed data;
122
+ * Compress specialized data (e.g., raster graphics) as well as
123
+ the best currently available specialized algorithms.
124
+
125
+ 1.2. Intended audience
126
+
127
+ This specification is intended for use by implementors of software
128
+ to compress data into gzip format and/or decompress data from gzip
129
+ format.
130
+
131
+ The text of the specification assumes a basic background in
132
+ programming at the level of bits and other primitive data
133
+ representations.
134
+
135
+ 1.3. Scope
136
+
137
+ The specification specifies a compression method and a file format
138
+ (the latter assuming only that a file can store a sequence of
139
+ arbitrary bytes). It does not specify any particular interface to
140
+ a file system or anything about character sets or encodings
141
+ (except for file names and comments, which are optional).
142
+
143
+ 1.4. Compliance
144
+
145
+ Unless otherwise indicated below, a compliant decompressor must be
146
+ able to accept and decompress any file that conforms to all the
147
+ specifications presented here; a compliant compressor must produce
148
+ files that conform to all the specifications presented here. The
149
+ material in the appendices is not part of the specification per se
150
+ and is not relevant to compliance.
151
+
152
+ 1.5. Definitions of terms and conventions used
153
+
154
+ byte: 8 bits stored or transmitted as a unit (same as an octet).
155
+ (For this specification, a byte is exactly 8 bits, even on
156
+ machines which store a character on a number of bits different
157
+ from 8.) See below for the numbering of bits within a byte.
158
+
159
+ 1.6. Changes from previous versions
160
+
161
+ There have been no technical changes to the gzip format since
162
+ version 4.1 of this specification. In version 4.2, some
163
+ terminology was changed, and the sample CRC code was rewritten for
164
+ clarity and to eliminate the requirement for the caller to do pre-
165
+ and post-conditioning. Version 4.3 is a conversion of the
166
+ specification to RFC style.
167
+
168
+
169
+
170
+ Deutsch Informational [Page 3]
171
+
172
+ RFC 1952 GZIP File Format Specification May 1996
173
+
174
+
175
+ 2. Detailed specification
176
+
177
+ 2.1. Overall conventions
178
+
179
+ In the diagrams below, a box like this:
180
+
181
+ +---+
182
+ | | <-- the vertical bars might be missing
183
+ +---+
184
+
185
+ represents one byte; a box like this:
186
+
187
+ +==============+
188
+ | |
189
+ +==============+
190
+
191
+ represents a variable number of bytes.
192
+
193
+ Bytes stored within a computer do not have a "bit order", since
194
+ they are always treated as a unit. However, a byte considered as
195
+ an integer between 0 and 255 does have a most- and least-
196
+ significant bit, and since we write numbers with the most-
197
+ significant digit on the left, we also write bytes with the most-
198
+ significant bit on the left. In the diagrams below, we number the
199
+ bits of a byte so that bit 0 is the least-significant bit, i.e.,
200
+ the bits are numbered:
201
+
202
+ +--------+
203
+ |76543210|
204
+ +--------+
205
+
206
+ This document does not address the issue of the order in which
207
+ bits of a byte are transmitted on a bit-sequential medium, since
208
+ the data format described here is byte- rather than bit-oriented.
209
+
210
+ Within a computer, a number may occupy multiple bytes. All
211
+ multi-byte numbers in the format described here are stored with
212
+ the least-significant byte first (at the lower memory address).
213
+ For example, the decimal number 520 is stored as:
214
+
215
+ 0 1
216
+ +--------+--------+
217
+ |00001000|00000010|
218
+ +--------+--------+
219
+ ^ ^
220
+ | |
221
+ | + more significant byte = 2 x 256
222
+ + less significant byte = 8
223
+
224
+
225
+
226
+ Deutsch Informational [Page 4]
227
+
228
+ RFC 1952 GZIP File Format Specification May 1996
229
+
230
+
231
+ 2.2. File format
232
+
233
+ A gzip file consists of a series of "members" (compressed data
234
+ sets). The format of each member is specified in the following
235
+ section. The members simply appear one after another in the file,
236
+ with no additional information before, between, or after them.
237
+
238
+ 2.3. Member format
239
+
240
+ Each member has the following structure:
241
+
242
+ +---+---+---+---+---+---+---+---+---+---+
243
+ |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
244
+ +---+---+---+---+---+---+---+---+---+---+
245
+
246
+ (if FLG.FEXTRA set)
247
+
248
+ +---+---+=================================+
249
+ | XLEN |...XLEN bytes of "extra field"...| (more-->)
250
+ +---+---+=================================+
251
+
252
+ (if FLG.FNAME set)
253
+
254
+ +=========================================+
255
+ |...original file name, zero-terminated...| (more-->)
256
+ +=========================================+
257
+
258
+ (if FLG.FCOMMENT set)
259
+
260
+ +===================================+
261
+ |...file comment, zero-terminated...| (more-->)
262
+ +===================================+
263
+
264
+ (if FLG.FHCRC set)
265
+
266
+ +---+---+
267
+ | CRC16 |
268
+ +---+---+
269
+
270
+ +=======================+
271
+ |...compressed blocks...| (more-->)
272
+ +=======================+
273
+
274
+ 0 1 2 3 4 5 6 7
275
+ +---+---+---+---+---+---+---+---+
276
+ | CRC32 | ISIZE |
277
+ +---+---+---+---+---+---+---+---+
278
+
279
+
280
+
281
+
282
+ Deutsch Informational [Page 5]
283
+
284
+ RFC 1952 GZIP File Format Specification May 1996
285
+
286
+
287
+ 2.3.1. Member header and trailer
288
+
289
+ ID1 (IDentification 1)
290
+ ID2 (IDentification 2)
291
+ These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
292
+ (0x8b, \213), to identify the file as being in gzip format.
293
+
294
+ CM (Compression Method)
295
+ This identifies the compression method used in the file. CM
296
+ = 0-7 are reserved. CM = 8 denotes the "deflate"
297
+ compression method, which is the one customarily used by
298
+ gzip and which is documented elsewhere.
299
+
300
+ FLG (FLaGs)
301
+ This flag byte is divided into individual bits as follows:
302
+
303
+ bit 0 FTEXT
304
+ bit 1 FHCRC
305
+ bit 2 FEXTRA
306
+ bit 3 FNAME
307
+ bit 4 FCOMMENT
308
+ bit 5 reserved
309
+ bit 6 reserved
310
+ bit 7 reserved
311
+
312
+ If FTEXT is set, the file is probably ASCII text. This is
313
+ an optional indication, which the compressor may set by
314
+ checking a small amount of the input data to see whether any
315
+ non-ASCII characters are present. In case of doubt, FTEXT
316
+ is cleared, indicating binary data. For systems which have
317
+ different file formats for ascii text and binary data, the
318
+ decompressor can use FTEXT to choose the appropriate format.
319
+ We deliberately do not specify the algorithm used to set
320
+ this bit, since a compressor always has the option of
321
+ leaving it cleared and a decompressor always has the option
322
+ of ignoring it and letting some other program handle issues
323
+ of data conversion.
324
+
325
+ If FHCRC is set, a CRC16 for the gzip header is present,
326
+ immediately before the compressed data. The CRC16 consists
327
+ of the two least significant bytes of the CRC32 for all
328
+ bytes of the gzip header up to and not including the CRC16.
329
+ [The FHCRC bit was never set by versions of gzip up to
330
+ 1.2.4, even though it was documented with a different
331
+ meaning in gzip 1.2.4.]
332
+
333
+ If FEXTRA is set, optional extra fields are present, as
334
+ described in a following section.
335
+
336
+
337
+
338
+ Deutsch Informational [Page 6]
339
+
340
+ RFC 1952 GZIP File Format Specification May 1996
341
+
342
+
343
+ If FNAME is set, an original file name is present,
344
+ terminated by a zero byte. The name must consist of ISO
345
+ 8859-1 (LATIN-1) characters; on operating systems using
346
+ EBCDIC or any other character set for file names, the name
347
+ must be translated to the ISO LATIN-1 character set. This
348
+ is the original name of the file being compressed, with any
349
+ directory components removed, and, if the file being
350
+ compressed is on a file system with case insensitive names,
351
+ forced to lower case. There is no original file name if the
352
+ data was compressed from a source other than a named file;
353
+ for example, if the source was stdin on a Unix system, there
354
+ is no file name.
355
+
356
+ If FCOMMENT is set, a zero-terminated file comment is
357
+ present. This comment is not interpreted; it is only
358
+ intended for human consumption. The comment must consist of
359
+ ISO 8859-1 (LATIN-1) characters. Line breaks should be
360
+ denoted by a single line feed character (10 decimal).
361
+
362
+ Reserved FLG bits must be zero.
363
+
364
+ MTIME (Modification TIME)
365
+ This gives the most recent modification time of the original
366
+ file being compressed. The time is in Unix format, i.e.,
367
+ seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
368
+ may cause problems for MS-DOS and other systems that use
369
+ local rather than Universal time.) If the compressed data
370
+ did not come from a file, MTIME is set to the time at which
371
+ compression started. MTIME = 0 means no time stamp is
372
+ available.
373
+
374
+ XFL (eXtra FLags)
375
+ These flags are available for use by specific compression
376
+ methods. The "deflate" method (CM = 8) sets these flags as
377
+ follows:
378
+
379
+ XFL = 2 - compressor used maximum compression,
380
+ slowest algorithm
381
+ XFL = 4 - compressor used fastest algorithm
382
+
383
+ OS (Operating System)
384
+ This identifies the type of file system on which compression
385
+ took place. This may be useful in determining end-of-line
386
+ convention for text files. The currently defined values are
387
+ as follows:
388
+
389
+
390
+
391
+
392
+
393
+
394
+ Deutsch Informational [Page 7]
395
+
396
+ RFC 1952 GZIP File Format Specification May 1996
397
+
398
+
399
+ 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
400
+ 1 - Amiga
401
+ 2 - VMS (or OpenVMS)
402
+ 3 - Unix
403
+ 4 - VM/CMS
404
+ 5 - Atari TOS
405
+ 6 - HPFS filesystem (OS/2, NT)
406
+ 7 - Macintosh
407
+ 8 - Z-System
408
+ 9 - CP/M
409
+ 10 - TOPS-20
410
+ 11 - NTFS filesystem (NT)
411
+ 12 - QDOS
412
+ 13 - Acorn RISCOS
413
+ 255 - unknown
414
+
415
+ XLEN (eXtra LENgth)
416
+ If FLG.FEXTRA is set, this gives the length of the optional
417
+ extra field. See below for details.
418
+
419
+ CRC32 (CRC-32)
420
+ This contains a Cyclic Redundancy Check value of the
421
+ uncompressed data computed according to CRC-32 algorithm
422
+ used in the ISO 3309 standard and in section 8.1.1.6.2 of
423
+ ITU-T recommendation V.42. (See http://www.iso.ch for
424
+ ordering ISO documents. See gopher://info.itu.ch for an
425
+ online version of ITU-T V.42.)
426
+
427
+ ISIZE (Input SIZE)
428
+ This contains the size of the original (uncompressed) input
429
+ data modulo 2^32.
430
+
431
+ 2.3.1.1. Extra field
432
+
433
+ If the FLG.FEXTRA bit is set, an "extra field" is present in
434
+ the header, with total length XLEN bytes. It consists of a
435
+ series of subfields, each of the form:
436
+
437
+ +---+---+---+---+==================================+
438
+ |SI1|SI2| LEN |... LEN bytes of subfield data ...|
439
+ +---+---+---+---+==================================+
440
+
441
+ SI1 and SI2 provide a subfield ID, typically two ASCII letters
442
+ with some mnemonic value. Jean-Loup Gailly
443
+ <gzip@prep.ai.mit.edu> is maintaining a registry of subfield
444
+ IDs; please send him any subfield ID you wish to use. Subfield
445
+ IDs with SI2 = 0 are reserved for future use. The following
446
+ IDs are currently defined:
447
+
448
+
449
+
450
+ Deutsch Informational [Page 8]
451
+
452
+ RFC 1952 GZIP File Format Specification May 1996
453
+
454
+
455
+ SI1 SI2 Data
456
+ ---------- ---------- ----
457
+ 0x41 ('A') 0x70 ('P') Apollo file type information
458
+
459
+ LEN gives the length of the subfield data, excluding the 4
460
+ initial bytes.
461
+
462
+ 2.3.1.2. Compliance
463
+
464
+ A compliant compressor must produce files with correct ID1,
465
+ ID2, CM, CRC32, and ISIZE, but may set all the other fields in
466
+ the fixed-length part of the header to default values (255 for
467
+ OS, 0 for all others). The compressor must set all reserved
468
+ bits to zero.
469
+
470
+ A compliant decompressor must check ID1, ID2, and CM, and
471
+ provide an error indication if any of these have incorrect
472
+ values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
473
+ at least so it can skip over the optional fields if they are
474
+ present. It need not examine any other part of the header or
475
+ trailer; in particular, a decompressor may ignore FTEXT and OS
476
+ and always produce binary output, and still be compliant. A
477
+ compliant decompressor must give an error indication if any
478
+ reserved bit is non-zero, since such a bit could indicate the
479
+ presence of a new field that would cause subsequent data to be
480
+ interpreted incorrectly.
481
+
482
+ 3. References
483
+
484
+ [1] "Information Processing - 8-bit single-byte coded graphic
485
+ character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
486
+ The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
487
+ ASCII. Files defining this character set are available as
488
+ iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
489
+
490
+ [2] ISO 3309
491
+
492
+ [3] ITU-T recommendation V.42
493
+
494
+ [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
495
+ available in ftp://ftp.uu.net/pub/archiving/zip/doc/
496
+
497
+ [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
498
+ ftp://prep.ai.mit.edu/pub/gnu/
499
+
500
+ [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
501
+ Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
502
+
503
+
504
+
505
+
506
+ Deutsch Informational [Page 9]
507
+
508
+ RFC 1952 GZIP File Format Specification May 1996
509
+
510
+
511
+ [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
512
+ pp.118-133.
513
+
514
+ [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
515
+ describing the CRC concept.
516
+
517
+ 4. Security Considerations
518
+
519
+ Any data compression method involves the reduction of redundancy in
520
+ the data. Consequently, any corruption of the data is likely to have
521
+ severe effects and be difficult to correct. Uncompressed text, on
522
+ the other hand, will probably still be readable despite the presence
523
+ of some corrupted bytes.
524
+
525
+ It is recommended that systems using this data format provide some
526
+ means of validating the integrity of the compressed data, such as by
527
+ setting and checking the CRC-32 check value.
528
+
529
+ 5. Acknowledgements
530
+
531
+ Trademarks cited in this document are the property of their
532
+ respective owners.
533
+
534
+ Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
535
+ the related software described in this specification. Glenn
536
+ Randers-Pehrson converted this document to RFC and HTML format.
537
+
538
+ 6. Author's Address
539
+
540
+ L. Peter Deutsch
541
+ Aladdin Enterprises
542
+ 203 Santa Margarita Ave.
543
+ Menlo Park, CA 94025
544
+
545
+ Phone: (415) 322-0103 (AM only)
546
+ FAX: (415) 322-1734
547
+ EMail: <ghost@aladdin.com>
548
+
549
+ Questions about the technical content of this specification can be
550
+ sent by email to:
551
+
552
+ Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
553
+ Mark Adler <madler@alumni.caltech.edu>
554
+
555
+ Editorial comments on this specification can be sent by email to:
556
+
557
+ L. Peter Deutsch <ghost@aladdin.com> and
558
+ Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
559
+
560
+
561
+
562
+ Deutsch Informational [Page 10]
563
+
564
+ RFC 1952 GZIP File Format Specification May 1996
565
+
566
+
567
+ 7. Appendix: Jean-Loup Gailly's gzip utility
568
+
569
+ The most widely used implementation of gzip compression, and the
570
+ original documentation on which this specification is based, were
571
+ created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>. Since this
572
+ implementation is a de facto standard, we mention some more of its
573
+ features here. Again, the material in this section is not part of
574
+ the specification per se, and implementations need not follow it to
575
+ be compliant.
576
+
577
+ When compressing or decompressing a file, gzip preserves the
578
+ protection, ownership, and modification time attributes on the local
579
+ file system, since there is no provision for representing protection
580
+ attributes in the gzip file format itself. Since the file format
581
+ includes a modification time, the gzip decompressor provides a
582
+ command line switch that assigns the modification time from the file,
583
+ rather than the local modification time of the compressed input, to
584
+ the decompressed output.
585
+
586
+ 8. Appendix: Sample CRC Code
587
+
588
+ The following sample code represents a practical implementation of
589
+ the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
590
+ for a formal specification.)
591
+
592
+ The sample code is in the ANSI C programming language. Non C users
593
+ may find it easier to read with these hints:
594
+
595
+ & Bitwise AND operator.
596
+ ^ Bitwise exclusive-OR operator.
597
+ >> Bitwise right shift operator. When applied to an
598
+ unsigned quantity, as here, right shift inserts zero
599
+ bit(s) at the left.
600
+ ! Logical NOT operator.
601
+ ++ "n++" increments the variable n.
602
+ 0xNNN 0x introduces a hexadecimal (base 16) constant.
603
+ Suffix L indicates a long value (at least 32 bits).
604
+
605
+ /* Table of CRCs of all 8-bit messages. */
606
+ unsigned long crc_table[256];
607
+
608
+ /* Flag: has the table been computed? Initially false. */
609
+ int crc_table_computed = 0;
610
+
611
+ /* Make the table for a fast CRC. */
612
+ void make_crc_table(void)
613
+ {
614
+ unsigned long c;
615
+
616
+
617
+
618
+ Deutsch Informational [Page 11]
619
+
620
+ RFC 1952 GZIP File Format Specification May 1996
621
+
622
+
623
+ int n, k;
624
+ for (n = 0; n < 256; n++) {
625
+ c = (unsigned long) n;
626
+ for (k = 0; k < 8; k++) {
627
+ if (c & 1) {
628
+ c = 0xedb88320L ^ (c >> 1);
629
+ } else {
630
+ c = c >> 1;
631
+ }
632
+ }
633
+ crc_table[n] = c;
634
+ }
635
+ crc_table_computed = 1;
636
+ }
637
+
638
+ /*
639
+ Update a running crc with the bytes buf[0..len-1] and return
640
+ the updated crc. The crc should be initialized to zero. Pre- and
641
+ post-conditioning (one's complement) is performed within this
642
+ function so it shouldn't be done by the caller. Usage example:
643
+
644
+ unsigned long crc = 0L;
645
+
646
+ while (read_buffer(buffer, length) != EOF) {
647
+ crc = update_crc(crc, buffer, length);
648
+ }
649
+ if (crc != original_crc) error();
650
+ */
651
+ unsigned long update_crc(unsigned long crc,
652
+ unsigned char *buf, int len)
653
+ {
654
+ unsigned long c = crc ^ 0xffffffffL;
655
+ int n;
656
+
657
+ if (!crc_table_computed)
658
+ make_crc_table();
659
+ for (n = 0; n < len; n++) {
660
+ c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
661
+ }
662
+ return c ^ 0xffffffffL;
663
+ }
664
+
665
+ /* Return the CRC of the bytes buf[0..len-1]. */
666
+ unsigned long crc(unsigned char *buf, int len)
667
+ {
668
+ return update_crc(0L, buf, len);
669
+ }
670
+
671
+
672
+
673
+
674
+ Deutsch Informational [Page 12]
675
+