@davidsouther/jiffies 2026.24.0 → 2026.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/lib/esm/assert.d.ts +26 -0
  2. package/lib/esm/assert.js +38 -0
  3. package/lib/esm/awaitable.js +1 -0
  4. package/lib/esm/case.d.ts +1 -0
  5. package/lib/esm/case.js +5 -0
  6. package/lib/esm/components/accordion.d.ts +5 -0
  7. package/lib/esm/components/accordion.js +9 -0
  8. package/lib/esm/components/alert.d.ts +7 -0
  9. package/lib/esm/components/alert.js +31 -0
  10. package/lib/esm/components/button_bar.d.ts +8 -0
  11. package/lib/esm/components/button_bar.js +25 -0
  12. package/lib/esm/components/card.d.ts +8 -0
  13. package/lib/esm/components/card.js +31 -0
  14. package/lib/esm/components/children.d.ts +2 -0
  15. package/{src/components/children.ts → lib/esm/components/children.js} +2 -6
  16. package/lib/esm/components/form.d.ts +5 -0
  17. package/lib/esm/components/form.js +13 -0
  18. package/{src/components/index.ts → lib/esm/components/index.d.ts} +2 -15
  19. package/lib/esm/components/index.js +10 -0
  20. package/lib/esm/components/inline_edit.d.ts +12 -0
  21. package/lib/esm/components/inline_edit.js +48 -0
  22. package/lib/esm/components/link.d.ts +5 -0
  23. package/lib/esm/components/link.js +11 -0
  24. package/lib/esm/components/logger.d.ts +6 -0
  25. package/lib/esm/components/logger.js +22 -0
  26. package/lib/esm/components/modal.d.ts +2 -0
  27. package/{src/components/modal.ts → lib/esm/components/modal.js} +3 -8
  28. package/lib/esm/components/nav.d.ts +11 -0
  29. package/lib/esm/components/nav.js +27 -0
  30. package/lib/esm/components/property.d.ts +9 -0
  31. package/lib/esm/components/property.js +16 -0
  32. package/lib/esm/components/select.d.ts +10 -0
  33. package/lib/esm/components/select.js +3 -0
  34. package/lib/esm/components/tabs.d.ts +20 -0
  35. package/lib/esm/components/tabs.js +45 -0
  36. package/lib/esm/components/virtual_scroll.d.ts +42 -0
  37. package/lib/esm/components/virtual_scroll.js +94 -0
  38. package/lib/esm/debounce.d.ts +1 -0
  39. package/lib/esm/debounce.js +11 -0
  40. package/lib/esm/diff.d.ts +15 -0
  41. package/lib/esm/diff.js +50 -0
  42. package/lib/esm/display.d.ts +5 -0
  43. package/lib/esm/display.js +11 -0
  44. package/lib/esm/dom/css/border.d.ts +11 -0
  45. package/lib/esm/dom/css/border.js +27 -0
  46. package/lib/esm/dom/css/constants.d.ts +31 -0
  47. package/lib/esm/dom/css/constants.js +28 -0
  48. package/lib/esm/dom/css/core.d.ts +5 -0
  49. package/lib/esm/dom/css/core.js +24 -0
  50. package/lib/esm/dom/css/fstyle.d.ts +5 -0
  51. package/lib/esm/dom/css/fstyle.js +32 -0
  52. package/lib/esm/dom/css/sizing.d.ts +5 -0
  53. package/lib/esm/dom/css/sizing.js +10 -0
  54. package/lib/esm/dom/dom.d.ts +36 -0
  55. package/lib/esm/dom/dom.js +217 -0
  56. package/lib/esm/dom/fc.d.ts +10 -0
  57. package/lib/esm/dom/fc.js +32 -0
  58. package/lib/esm/dom/form/form.app.d.ts +1 -0
  59. package/lib/esm/dom/form/form.app.js +19 -0
  60. package/lib/esm/dom/form/form.d.ts +27 -0
  61. package/lib/esm/dom/form/form.js +65 -0
  62. package/lib/esm/dom/html.d.ts +112 -0
  63. package/{src/dom/html.ts → lib/esm/dom/html.js} +2 -14
  64. package/lib/esm/dom/hydrate.d.ts +39 -0
  65. package/lib/esm/dom/hydrate.js +187 -0
  66. package/lib/esm/dom/index.js +2 -0
  67. package/lib/esm/dom/navigation/index.d.ts +76 -0
  68. package/lib/esm/dom/navigation/index.js +292 -0
  69. package/lib/esm/dom/observable.d.ts +2 -0
  70. package/lib/esm/dom/observable.js +6 -0
  71. package/lib/esm/dom/provide.d.ts +3 -0
  72. package/lib/esm/dom/provide.js +7 -0
  73. package/lib/esm/dom/render.d.ts +8 -0
  74. package/lib/esm/dom/render.js +28 -0
  75. package/lib/esm/dom/router/link.d.ts +6 -0
  76. package/lib/esm/dom/router/link.js +3 -0
  77. package/lib/esm/dom/router/router.d.ts +13 -0
  78. package/lib/esm/dom/router/router.js +52 -0
  79. package/lib/esm/dom/svg.d.ts +64 -0
  80. package/{src/dom/svg.ts → lib/esm/dom/svg.js} +2 -19
  81. package/lib/esm/dom/types/css.d.ts +6590 -0
  82. package/lib/esm/dom/types/css.js +1 -0
  83. package/lib/esm/dom/types/dom.js +1 -0
  84. package/lib/esm/dom/types/html.d.ts +614 -0
  85. package/lib/esm/dom/types/html.js +1 -0
  86. package/lib/esm/dom/xml.d.ts +1 -0
  87. package/lib/esm/dom/xml.js +4 -0
  88. package/lib/esm/equal.d.ts +11 -0
  89. package/lib/esm/equal.js +43 -0
  90. package/lib/esm/fs.d.ts +72 -0
  91. package/lib/esm/fs.js +227 -0
  92. package/lib/esm/fs_node.d.ts +15 -0
  93. package/lib/esm/fs_node.js +45 -0
  94. package/lib/esm/generator.d.ts +1 -0
  95. package/lib/esm/generator.js +10 -0
  96. package/lib/esm/lock.d.ts +1 -0
  97. package/lib/esm/lock.js +23 -0
  98. package/lib/esm/log.d.ts +69 -0
  99. package/lib/esm/log.js +211 -0
  100. package/lib/esm/observable/event.d.ts +35 -0
  101. package/lib/esm/observable/event.js +46 -0
  102. package/lib/esm/observable/observable.d.ts +134 -0
  103. package/lib/esm/observable/observable.js +349 -0
  104. package/lib/esm/range.d.ts +1 -0
  105. package/lib/esm/range.js +7 -0
  106. package/lib/esm/result.d.ts +31 -0
  107. package/lib/esm/result.js +66 -0
  108. package/lib/esm/safe.d.ts +1 -0
  109. package/lib/esm/safe.js +10 -0
  110. package/lib/esm/server/http/apps.d.ts +5 -0
  111. package/lib/esm/server/http/apps.js +23 -0
  112. package/lib/esm/server/http/css.d.ts +5 -0
  113. package/lib/esm/server/http/css.js +43 -0
  114. package/lib/esm/server/http/index.d.ts +16 -0
  115. package/lib/esm/server/http/index.js +78 -0
  116. package/lib/esm/server/http/response.d.ts +4 -0
  117. package/lib/esm/server/http/response.js +43 -0
  118. package/lib/esm/server/http/sitemap.d.ts +2 -0
  119. package/lib/esm/server/http/sitemap.js +22 -0
  120. package/lib/esm/server/http/static.d.ts +2 -0
  121. package/lib/esm/server/http/static.js +22 -0
  122. package/lib/esm/server/http/typescript.d.ts +5 -0
  123. package/lib/esm/server/http/typescript.js +40 -0
  124. package/lib/esm/server/live-reload.d.ts +46 -0
  125. package/lib/esm/server/live-reload.js +161 -0
  126. package/lib/esm/server/main.d.ts +2 -0
  127. package/{src/server/main.ts → lib/esm/server/main.js} +8 -15
  128. package/lib/esm/server/ws/frame.d.ts +2 -0
  129. package/lib/esm/server/ws/frame.js +35 -0
  130. package/lib/esm/server/ws/handshake.d.ts +4 -0
  131. package/lib/esm/server/ws/handshake.js +32 -0
  132. package/lib/esm/server/ws/index.d.ts +14 -0
  133. package/lib/esm/server/ws/index.js +68 -0
  134. package/lib/esm/ssg/bundle.d.ts +14 -0
  135. package/lib/esm/ssg/bundle.js +73 -0
  136. package/lib/esm/ssg/copy-public.d.ts +6 -0
  137. package/lib/esm/ssg/copy-public.js +34 -0
  138. package/lib/esm/ssg/discover.d.ts +15 -0
  139. package/lib/esm/ssg/discover.js +117 -0
  140. package/lib/esm/ssg/main.d.ts +2 -0
  141. package/lib/esm/ssg/main.js +122 -0
  142. package/lib/esm/ssg/rewrite.d.ts +9 -0
  143. package/{src/ssg/rewrite.ts → lib/esm/ssg/rewrite.js} +6 -9
  144. package/lib/esm/ssg/ssg.d.ts +26 -0
  145. package/lib/esm/ssg/ssg.js +84 -0
  146. package/lib/esm/transpile.d.mts +3 -0
  147. package/lib/esm/transpile.mjs +12 -0
  148. package/package.json +11 -7
  149. package/src/404.html +0 -14
  150. package/src/assert.ts +0 -56
  151. package/src/case.ts +0 -5
  152. package/src/components/_notes +0 -33
  153. package/src/components/accordion.ts +0 -25
  154. package/src/components/alert.ts +0 -47
  155. package/src/components/button_bar.ts +0 -42
  156. package/src/components/card.ts +0 -54
  157. package/src/components/form.ts +0 -25
  158. package/src/components/inline_edit.ts +0 -78
  159. package/src/components/link.ts +0 -22
  160. package/src/components/logger.ts +0 -35
  161. package/src/components/nav.ts +0 -42
  162. package/src/components/property.ts +0 -32
  163. package/src/components/select.ts +0 -22
  164. package/src/components/tabs.ts +0 -82
  165. package/src/components/virtual_scroll.ts +0 -199
  166. package/src/debounce.ts +0 -14
  167. package/src/diff.ts +0 -82
  168. package/src/display.ts +0 -18
  169. package/src/dom/README.md +0 -107
  170. package/src/dom/SKILL.md +0 -201
  171. package/src/dom/css/border.ts +0 -47
  172. package/src/dom/css/constants.ts +0 -34
  173. package/src/dom/css/core.ts +0 -28
  174. package/src/dom/css/fstyle.ts +0 -42
  175. package/src/dom/css/sizing.ts +0 -11
  176. package/src/dom/dom.ts +0 -327
  177. package/src/dom/fc.ts +0 -81
  178. package/src/dom/form/form.app.ts +0 -44
  179. package/src/dom/form/form.ts +0 -151
  180. package/src/dom/form/index.html +0 -15
  181. package/src/dom/hydrate.ts +0 -206
  182. package/src/dom/navigation/index.ts +0 -349
  183. package/src/dom/observable.ts +0 -11
  184. package/src/dom/provide.ts +0 -11
  185. package/src/dom/render.ts +0 -41
  186. package/src/dom/router/link.ts +0 -14
  187. package/src/dom/router/router.ts +0 -72
  188. package/src/dom/types/css.ts +0 -10088
  189. package/src/dom/types/html.ts +0 -629
  190. package/src/dom/xml.ts +0 -11
  191. package/src/equal.ts +0 -66
  192. package/src/favicon.ico +0 -0
  193. package/src/fs.ts +0 -300
  194. package/src/fs_node.ts +0 -57
  195. package/src/generator.ts +0 -12
  196. package/src/hooks/_notes +0 -6
  197. package/src/lock.ts +0 -23
  198. package/src/log.ts +0 -307
  199. package/src/observable/_notes +0 -26
  200. package/src/observable/event.ts +0 -93
  201. package/src/observable/observable.ts +0 -484
  202. package/src/range.ts +0 -7
  203. package/src/result.ts +0 -107
  204. package/src/safe.ts +0 -12
  205. package/src/server/http/apps.ts +0 -26
  206. package/src/server/http/css.ts +0 -49
  207. package/src/server/http/index.ts +0 -127
  208. package/src/server/http/response.ts +0 -60
  209. package/src/server/http/sitemap.ts +0 -24
  210. package/src/server/http/static.ts +0 -28
  211. package/src/server/http/typescript.ts +0 -46
  212. package/src/server/live-reload.ts +0 -208
  213. package/src/server/ws/frame.ts +0 -36
  214. package/src/server/ws/handshake.ts +0 -42
  215. package/src/server/ws/index.ts +0 -100
  216. package/src/ssg/bundle.ts +0 -85
  217. package/src/ssg/copy-public.ts +0 -44
  218. package/src/ssg/discover.ts +0 -143
  219. package/src/ssg/main.ts +0 -168
  220. package/src/ssg/ssg.ts +0 -134
  221. package/src/transpile.mjs +0 -16
  222. package/src/zip/spec.txt +0 -3260
  223. package/tsconfig.json +0 -34
  224. /package/{src/awaitable.ts → lib/esm/awaitable.d.ts} +0 -0
  225. /package/{src/dom/index.ts → lib/esm/dom/index.d.ts} +0 -0
  226. /package/{src/dom/types/dom.ts → lib/esm/dom/types/dom.d.ts} +0 -0
package/src/zip/spec.txt DELETED
@@ -1,3260 +0,0 @@
1
- File: APPNOTE.TXT - .ZIP File Format Specification
2
- Version: 6.3.9
3
- Status: FINAL - replaces version 6.3.8
4
- Revised: July 15, 2020
5
- Copyright (c) 1989 - 2014, 2018, 2019, 2020 PKWARE Inc., All Rights Reserved.
6
-
7
-
8
- 4.0 ZIP Files
9
- -------------
10
-
11
- 4.1 What is a ZIP file
12
- ----------------------
13
-
14
- 4.1.1 ZIP files MAY be identified by the standard .ZIP file extension
15
-
16
- 4.1.2 ZIP files SHOULD contain at least one file and MAY contain
17
- multiple files.
18
-
19
- 4.1.3 Data compression MAY be used to reduce the size of files
20
- placed into a ZIP file.
21
- use of multiple data compression algorithms. When compression is used,
22
- one of the documented compression algorithms MUST be used.
23
- Compression method 8 (Deflate) is the method used by default by most
24
- ZIP compatible application programs.
25
-
26
- 4.1.5 Data integrity MUST be provided for each file using CRC32.
27
-
28
- 4.1.7 Files MAY be placed within a ZIP file uncompressed or stored.
29
- The term "stored" as used in the context of this document means the file
30
- is copied into the ZIP file uncompressed.
31
-
32
- 4.1.8 Each data file placed into a ZIP file MAY be compressed or stored,
33
- independent of how other data files in the same ZIP file are archived.
34
-
35
- 4.1.12 ZIP files MAY be placed within other ZIP files.
36
-
37
- 4.2 ZIP Metadata
38
- ----------------
39
-
40
- 4.2.1 ZIP files are identified by metadata consisting of defined record types
41
- containing the storage information necessary for maintaining the files
42
- placed into a ZIP file. Each record type MUST be identified using a header
43
- signature that identifies the record type. Signature values begin with the
44
- two byte constant marker of 0x4b50, representing the characters "PK".
45
-
46
-
47
- 4.3 General Format of a .ZIP file
48
- ---------------------------------
49
-
50
- 4.3.1 A ZIP file MUST contain an "end of central directory record". A ZIP
51
- file containing only an "end of central directory record" is considered an
52
- empty ZIP file. Files MAY be added or replaced within a ZIP file, or deleted.
53
- A ZIP file MUST have only one "end of central directory record". Other
54
- records defined in this specification MAY be used as needed to support
55
- storage requirements for individual ZIP files.
56
-
57
- 4.3.2 Each file placed into a ZIP file MUST be preceded by a "local
58
- file header" record for that file. Each "local file header" MUST be
59
- accompanied by a corresponding "central directory header" record within
60
- the central directory section of the ZIP file.
61
-
62
- 4.3.3 Files MAY be stored in arbitrary order within a ZIP file. A ZIP
63
- file MAY span multiple volumes or it MAY be split into user-defined
64
- segment sizes. All values MUST be stored in little-endian byte order unless
65
- otherwise specified in this document for a specific data element.
66
-
67
- 4.3.4 Compression MUST NOT be applied to a "local file header", an "encryption
68
- header", or an "end of central directory record". Individual "central
69
- directory records" MUST NOT be compressed, but the aggregate of all central
70
- directory records MAY be compressed.
71
-
72
- 4.3.6 Overall .ZIP file format:
73
-
74
- [local file header 1]
75
- [file data 1]
76
- .
77
- .
78
- .
79
- [local file header n]
80
- [file data n]
81
- [central directory header 1]
82
- .
83
- .
84
- .
85
- [central directory header n]
86
- [end of central directory record]
87
-
88
-
89
- 4.3.7 Local file header:
90
-
91
- local file header signature 4 bytes (0x04034b50)
92
- version needed to extract 2 bytes (0x0014)
93
- general purpose bit flag 2 bytes
94
- compression method 2 bytes
95
- last mod file time 2 bytes
96
- last mod file date 2 bytes
97
- crc-32 4 bytes
98
- compressed size 4 bytes
99
- uncompressed size 4 bytes
100
- file name length 2 bytes
101
- extra field length 2 bytes (0x0000)
102
-
103
- file name (variable size)
104
- extra field (variable size)
105
-
106
- 4.3.8 File data
107
-
108
- Immediately following the local header for a file
109
- SHOULD be placed the compressed or stored data for the file.
110
- If the file is encrypted, the encryption header for the file
111
- SHOULD be placed after the local header and before the file
112
- data. The series of [local file header][encryption header]
113
- [file data][data descriptor] repeats for each file in the
114
- .ZIP archive.
115
-
116
- Zero-byte files, directories, and other file types that
117
- contain no content MUST NOT include file data.
118
-
119
- 4.3.12 Central directory structure:
120
-
121
- [central directory header 1]
122
- .
123
- .
124
- .
125
- [central directory header n]
126
- [digital signature]
127
-
128
- File header:
129
-
130
- central file header signature 4 bytes (0x02014b50)
131
- version made by 2 bytes
132
- version needed to extract 2 bytes
133
- general purpose bit flag 2 bytes
134
- compression method 2 bytes
135
- last mod file time 2 bytes
136
- last mod file date 2 bytes
137
- crc-32 4 bytes
138
- compressed size 4 bytes
139
- uncompressed size 4 bytes
140
- file name length 2 bytes
141
- extra field length 2 bytes
142
- file comment length 2 bytes
143
- disk number start 2 bytes
144
- internal file attributes 2 bytes
145
- external file attributes 4 bytes
146
- relative offset of local header 4 bytes
147
-
148
- file name (variable size)
149
- extra field (variable size)
150
- file comment (variable size)
151
-
152
- 4.3.16 End of central directory record:
153
-
154
- end of central dir signature 4 bytes (0x06054b50)
155
- number of this disk 2 bytes
156
- number of the disk with the
157
- start of the central directory 2 bytes
158
- total number of entries in the
159
- central directory on this disk 2 bytes
160
- total number of entries in
161
- the central directory 2 bytes
162
- size of the central directory 4 bytes
163
- offset of start of central
164
- directory with respect to
165
- the starting disk number 4 bytes
166
- .ZIP file comment length 2 bytes
167
- .ZIP file comment (variable size)
168
-
169
- 4.4 Explanation of fields
170
- --------------------------
171
-
172
- 4.4.1 General notes on fields
173
-
174
- 4.4.1.1 All fields unless otherwise noted are unsigned and stored
175
- in Intel low-byte:high-byte, low-word:high-word order.
176
-
177
- 4.4.1.2 String fields are not null terminated, since the length
178
- is given explicitly.
179
-
180
- 4.4.1.3 The entries in the central directory MAY NOT necessarily
181
- be in the same order that files appear in the .ZIP file.
182
-
183
- 4.4.1.4 If one of the fields in the end of central directory
184
- record is too small to hold required data, the field SHOULD be
185
- set to -1 (0xFFFF or 0xFFFFFFFF) and the ZIP64 format record
186
- SHOULD be created.
187
-
188
- 4.4.1.5 The end of central directory record and the Zip64 end
189
- of central directory locator record MUST reside on the same
190
- disk when splitting or spanning an archive.
191
-
192
- 4.4.2 version made by (2 bytes)
193
-
194
- 4.4.2.1 The upper byte indicates the compatibility of the file
195
- attribute information. If the external file attributes
196
- are compatible with MS-DOS and can be read by PKZIP for
197
- DOS version 2.04g then this value will be zero. If these
198
- attributes are not compatible, then this value will
199
- identify the host system on which the attributes are
200
- compatible. Software can use this information to determine
201
- the line record format for text files etc.
202
-
203
- 4.4.2.2 The current mappings are:
204
-
205
- 0 - MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems)
206
- 1 - Amiga 2 - OpenVMS
207
- 3 - UNIX 4 - VM/CMS
208
- 5 - Atari ST 6 - OS/2 H.P.F.S.
209
- 7 - Macintosh 8 - Z-System
210
- 9 - CP/M 10 - Windows NTFS
211
- 11 - MVS (OS/390 - Z/OS) 12 - VSE
212
- 13 - Acorn Risc 14 - VFAT
213
- 15 - alternate MVS 16 - BeOS
214
- 17 - Tandem 18 - OS/400
215
- 19 - OS X (Darwin) 20 thru 255 - unused
216
-
217
- 4.4.2.3 The lower byte indicates the ZIP specification version
218
- (the version of this document) supported by the software
219
- used to encode the file. The value/10 indicates the major
220
- version number, and the value mod 10 is the minor version
221
- number.
222
-
223
- 4.4.3 version needed to extract (2 bytes)
224
-
225
- 4.4.3.1 The minimum supported ZIP specification version needed
226
- to extract the file, mapped as above. This value is based on
227
- the specific format features a ZIP program MUST support to
228
- be able to extract the file. If multiple features are
229
- applied to a file, the minimum version MUST be set to the
230
- feature having the highest value. New features or feature
231
- changes affecting the published format specification will be
232
- implemented using higher version numbers than the last
233
- published value to avoid conflict.
234
-
235
- 4.4.3.2 Current minimum feature versions are as defined below:
236
-
237
- 1.0 - Default value
238
- 1.1 - File is a volume label
239
- 2.0 - File is a folder (directory)
240
- 2.0 - File is compressed using Deflate compression
241
-
242
-
243
-
244
- 4.4.4 general purpose bit flag: (2 bytes)
245
-
246
- Bit 0: If set, indicates that the file is encrypted.
247
-
248
- (For Method 6 - Imploding)
249
- Bit 1: If the compression method used was type 6,
250
- Imploding, then this bit, if set, indicates
251
- an 8K sliding dictionary was used. If clear,
252
- then a 4K sliding dictionary was used.
253
-
254
- Bit 2: If the compression method used was type 6,
255
- Imploding, then this bit, if set, indicates
256
- 3 Shannon-Fano trees were used to encode the
257
- sliding dictionary output. If clear, then 2
258
- Shannon-Fano trees were used.
259
-
260
- (For Methods 8 and 9 - Deflating)
261
- Bit 2 Bit 1
262
- 0 0 Normal (-en) compression option was used.
263
- 0 1 Maximum (-exx/-ex) compression option was used.
264
- 1 0 Fast (-ef) compression option was used.
265
- 1 1 Super Fast (-es) compression option was used.
266
-
267
- (For Method 14 - LZMA)
268
- Bit 1: If the compression method used was type 14,
269
- LZMA, then this bit, if set, indicates
270
- an end-of-stream (EOS) marker is used to
271
- mark the end of the compressed data stream.
272
- If clear, then an EOS marker is not present
273
- and the compressed data size must be known
274
- to extract.
275
-
276
- Note: Bits 1 and 2 are undefined if the compression
277
- method is any other.
278
-
279
- Bit 3: If this bit is set, the fields crc-32, compressed
280
- size and uncompressed size are set to zero in the
281
- local header. The correct values are put in the
282
- data descriptor immediately following the compressed
283
- data. (Note: PKZIP version 2.04g for DOS only
284
- recognizes this bit for method 8 compression, newer
285
- versions of PKZIP recognize this bit for any
286
- compression method.)
287
-
288
- Bit 4: Reserved for use with method 8, for enhanced
289
- deflating.
290
-
291
- Bit 5: If this bit is set, this indicates that the file is
292
- compressed patched data. (Note: Requires PKZIP
293
- version 2.70 or greater)
294
-
295
- Bit 6: Strong encryption. If this bit is set, you MUST
296
- set the version needed to extract value to at least
297
- 50 and you MUST also set bit 0. If AES encryption
298
- is used, the version needed to extract value MUST
299
- be at least 51. See the section describing the Strong
300
- Encryption Specification for details. Refer to the
301
- section in this document entitled "Incorporating PKWARE
302
- Proprietary Technology into Your Product" for more
303
- information.
304
-
305
- Bit 7: Currently unused.
306
-
307
- Bit 8: Currently unused.
308
-
309
- Bit 9: Currently unused.
310
-
311
- Bit 10: Currently unused.
312
-
313
- Bit 11: Language encoding flag (EFS). If this bit is set,
314
- the filename and comment fields for this file
315
- MUST be encoded using UTF-8. (see APPENDIX D)
316
-
317
- Bit 12: Reserved by PKWARE for enhanced compression.
318
-
319
- Bit 13: Set when encrypting the Central Directory to indicate
320
- selected data values in the Local Header are masked to
321
- hide their actual values. See the section describing
322
- the Strong Encryption Specification for details. Refer
323
- to the section in this document entitled "Incorporating
324
- PKWARE Proprietary Technology into Your Product" for
325
- more information.
326
-
327
- Bit 14: Reserved by PKWARE for alternate streams.
328
-
329
- Bit 15: Reserved by PKWARE.
330
-
331
- 4.4.5 compression method: (2 bytes)
332
-
333
- 0 - The file is stored (no compression)
334
- 1 - The file is Shrunk
335
- 2 - The file is Reduced with compression factor 1
336
- 3 - The file is Reduced with compression factor 2
337
- 4 - The file is Reduced with compression factor 3
338
- 5 - The file is Reduced with compression factor 4
339
- 6 - The file is Imploded
340
- 7 - Reserved for Tokenizing compression algorithm
341
- 8 - The file is Deflated
342
- 9 - Enhanced Deflating using Deflate64(tm)
343
- 10 - PKWARE Data Compression Library Imploding (old IBM TERSE)
344
- 11 - Reserved by PKWARE
345
- 12 - File is compressed using BZIP2 algorithm
346
- 13 - Reserved by PKWARE
347
- 14 - LZMA
348
- 15 - Reserved by PKWARE
349
- 16 - IBM z/OS CMPSC Compression
350
- 17 - Reserved by PKWARE
351
- 18 - File is compressed using IBM TERSE (new)
352
- 19 - IBM LZ77 z Architecture
353
- 20 - deprecated (use method 93 for zstd)
354
- 93 - Zstandard (zstd) Compression
355
- 94 - MP3 Compression
356
- 95 - XZ Compression
357
- 96 - JPEG variant
358
- 97 - WavPack compressed data
359
- 98 - PPMd version I, Rev 1
360
- 99 - AE-x encryption marker (see APPENDIX E)
361
-
362
- 4.4.5.1 Methods 1-6 are legacy algorithms and are no longer
363
- recommended for use when compressing files.
364
-
365
- 4.4.6 date and time fields: (2 bytes each)
366
-
367
- The date and time are encoded in standard MS-DOS format.
368
- If input came from standard input, the date and time are
369
- those at which compression was started for this data.
370
- If encrypting the central directory and general purpose bit
371
- flag 13 is set indicating masking, the value stored in the
372
- Local Header will be zero. MS-DOS time format is different
373
- from more commonly used computer time formats such as
374
- UTC. For example, MS-DOS uses year values relative to 1980
375
- and 2 second precision.
376
-
377
- 4.4.7 CRC-32: (4 bytes)
378
-
379
- The CRC-32 algorithm was generously contributed by
380
- David Schwaderer and can be found in his excellent
381
- book "C Programmers Guide to NetBIOS" published by
382
- Howard W. Sams & Co. Inc. The 'magic number' for
383
- the CRC is 0xdebb20e3. The proper CRC pre and post
384
- conditioning is used, meaning that the CRC register
385
- is pre-conditioned with all ones (a starting value
386
- of 0xffffffff) and the value is post-conditioned by
387
- taking the one's complement of the CRC residual.
388
- If bit 3 of the general purpose flag is set, this
389
- field is set to zero in the local header and the correct
390
- value is put in the data descriptor and in the central
391
- directory. When encrypting the central directory, if the
392
- local header is not in ZIP64 format and general purpose
393
- bit flag 13 is set indicating masking, the value stored
394
- in the Local Header will be zero.
395
-
396
- 4.4.8 compressed size: (4 bytes)
397
- 4.4.9 uncompressed size: (4 bytes)
398
-
399
- The size of the file compressed (4.4.8) and uncompressed,
400
- (4.4.9) respectively. When a decryption header is present it
401
- will be placed in front of the file data and the value of the
402
- compressed file size will include the bytes of the decryption
403
- header. If bit 3 of the general purpose bit flag is set,
404
- these fields are set to zero in the local header and the
405
- correct values are put in the data descriptor and
406
- in the central directory. If an archive is in ZIP64 format
407
- and the value in this field is 0xFFFFFFFF, the size will be
408
- in the corresponding 8 byte ZIP64 extended information
409
- extra field. When encrypting the central directory, if the
410
- local header is not in ZIP64 format and general purpose bit
411
- flag 13 is set indicating masking, the value stored for the
412
- uncompressed size in the Local Header will be zero.
413
-
414
- 4.4.10 file name length: (2 bytes)
415
- 4.4.11 extra field length: (2 bytes)
416
- 4.4.12 file comment length: (2 bytes)
417
-
418
- The length of the file name, extra field, and comment
419
- fields respectively. The combined length of any
420
- directory record and these three fields SHOULD NOT
421
- generally exceed 65,535 bytes. If input came from standard
422
- input, the file name length is set to zero.
423
-
424
-
425
- 4.4.13 disk number start: (2 bytes)
426
-
427
- The number of the disk on which this file begins. If an
428
- archive is in ZIP64 format and the value in this field is
429
- 0xFFFF, the size will be in the corresponding 4 byte zip64
430
- extended information extra field.
431
-
432
- 4.4.14 internal file attributes: (2 bytes)
433
-
434
- Bits 1 and 2 are reserved for use by PKWARE.
435
-
436
- 4.4.14.1 The lowest bit of this field indicates, if set,
437
- that the file is apparently an ASCII or text file. If not
438
- set, that the file apparently contains binary data.
439
- The remaining bits are unused in version 1.0.
440
-
441
- 4.4.14.2 The 0x0002 bit of this field indicates, if set, that
442
- a 4 byte variable record length control field precedes each
443
- logical record indicating the length of the record. The
444
- record length control field is stored in little-endian byte
445
- order. This flag is independent of text control characters,
446
- and if used in conjunction with text data, includes any
447
- control characters in the total length of the record. This
448
- value is provided for mainframe data transfer support.
449
-
450
- 4.4.15 external file attributes: (4 bytes)
451
-
452
- The mapping of the external attributes is
453
- host-system dependent (see 'version made by'). For
454
- MS-DOS, the low order byte is the MS-DOS directory
455
- attribute byte. If input came from standard input, this
456
- field is set to zero.
457
-
458
- 4.4.16 relative offset of local header: (4 bytes)
459
-
460
- This is the offset from the start of the first disk on
461
- which this file appears, to where the local header SHOULD
462
- be found. If an archive is in ZIP64 format and the value
463
- in this field is 0xFFFFFFFF, the size will be in the
464
- corresponding 8 byte zip64 extended information extra field.
465
-
466
- 4.4.17 file name: (Variable)
467
-
468
- 4.4.17.1 The name of the file, with optional relative path.
469
- The path stored MUST NOT contain a drive or
470
- device letter, or a leading slash. All slashes
471
- MUST be forward slashes '/' as opposed to
472
- backwards slashes '\' for compatibility with Amiga
473
- and UNIX file systems etc. If input came from standard
474
- input, there is no file name field.
475
-
476
- 4.4.17.2 If using the Central Directory Encryption Feature and
477
- general purpose bit flag 13 is set indicating masking, the file
478
- name stored in the Local Header will not be the actual file name.
479
- A masking value consisting of a unique hexadecimal value will
480
- be stored. This value will be sequentially incremented for each
481
- file in the archive. See the section on the Strong Encryption
482
- Specification for details on retrieving the encrypted file name.
483
- Refer to the section in this document entitled "Incorporating PKWARE
484
- Proprietary Technology into Your Product" for more information.
485
-
486
-
487
- 4.4.18 file comment: (Variable)
488
-
489
- The comment for this file.
490
-
491
- 4.4.19 number of this disk: (2 bytes)
492
-
493
- The number of this disk, which contains central
494
- directory end record. If an archive is in ZIP64 format
495
- and the value in this field is 0xFFFF, the size will
496
- be in the corresponding 4 byte zip64 end of central
497
- directory field.
498
-
499
-
500
- 4.4.20 number of the disk with the start of the central
501
- directory: (2 bytes)
502
-
503
- The number of the disk on which the central
504
- directory starts. If an archive is in ZIP64 format
505
- and the value in this field is 0xFFFF, the size will
506
- be in the corresponding 4 byte zip64 end of central
507
- directory field.
508
-
509
- 4.4.21 total number of entries in the central dir on
510
- this disk: (2 bytes)
511
-
512
- The number of central directory entries on this disk.
513
- If an archive is in ZIP64 format and the value in
514
- this field is 0xFFFF, the size will be in the
515
- corresponding 8 byte zip64 end of central
516
- directory field.
517
-
518
- 4.4.22 total number of entries in the central dir: (2 bytes)
519
-
520
- The total number of files in the .ZIP file. If an
521
- archive is in ZIP64 format and the value in this field
522
- is 0xFFFF, the size will be in the corresponding 8 byte
523
- zip64 end of central directory field.
524
-
525
- 4.4.23 size of the central directory: (4 bytes)
526
-
527
- The size (in bytes) of the entire central directory.
528
- If an archive is in ZIP64 format and the value in
529
- this field is 0xFFFFFFFF, the size will be in the
530
- corresponding 8 byte zip64 end of central
531
- directory field.
532
-
533
- 4.4.24 offset of start of central directory with respect to
534
- the starting disk number: (4 bytes)
535
-
536
- Offset of the start of the central directory on the
537
- disk on which the central directory starts. If an
538
- archive is in ZIP64 format and the value in this
539
- field is 0xFFFFFFFF, the size will be in the
540
- corresponding 8 byte zip64 end of central
541
- directory field.
542
-
543
- 4.4.25 .ZIP file comment length: (2 bytes)
544
-
545
- The length of the comment for this .ZIP file.
546
-
547
- 4.4.26 .ZIP file comment: (Variable)
548
-
549
- The comment for this .ZIP file. ZIP file comment data
550
- is stored unsecured. No encryption or data authentication
551
- is applied to this area at this time. Confidential information
552
- SHOULD NOT be stored in this section.
553
-
554
- 4.4.27 zip64 extensible data sector (variable size)
555
-
556
- (currently reserved for use by PKWARE)
557
-
558
-
559
- 4.4.28 extra field: (Variable)
560
-
561
- This SHOULD be used for storage expansion. If additional
562
- information needs to be stored within a ZIP file for special
563
- application or platform needs, it SHOULD be stored here.
564
- Programs supporting earlier versions of this specification can
565
- then safely skip the file, and find the next file or header.
566
- This field will be 0 length in version 1.0.
567
-
568
- Existing extra fields are defined in the section
569
- Extensible data fields that follows.
570
-
571
- 4.5 Extensible data fields
572
- --------------------------
573
-
574
- 4.5.1 In order to allow different programs and different types
575
- of information to be stored in the 'extra' field in .ZIP
576
- files, the following structure MUST be used for all
577
- programs storing data in this field:
578
-
579
- header1+data1 + header2+data2 . . .
580
-
581
- Each header MUST consist of:
582
-
583
- Header ID - 2 bytes
584
- Data Size - 2 bytes
585
-
586
- Note: all fields stored in Intel low-byte/high-byte order.
587
-
588
- The Header ID field indicates the type of data that is in
589
- the following data block.
590
-
591
- Header IDs of 0 thru 31 are reserved for use by PKWARE.
592
- The remaining IDs can be used by third party vendors for
593
- proprietary usage.
594
-
595
- 4.5.2 The current Header ID mappings defined by PKWARE are:
596
-
597
- 0x0001 Zip64 extended information extra field
598
- 0x0007 AV Info
599
- 0x0008 Reserved for extended language encoding data (PFS)
600
- (see APPENDIX D)
601
- 0x0009 OS/2
602
- 0x000a NTFS
603
- 0x000c OpenVMS
604
- 0x000d UNIX
605
- 0x000e Reserved for file stream and fork descriptors
606
- 0x000f Patch Descriptor
607
- 0x0014 PKCS#7 Store for X.509 Certificates
608
- 0x0015 X.509 Certificate ID and Signature for
609
- individual file
610
- 0x0016 X.509 Certificate ID for Central Directory
611
- 0x0017 Strong Encryption Header
612
- 0x0018 Record Management Controls
613
- 0x0019 PKCS#7 Encryption Recipient Certificate List
614
- 0x0020 Reserved for Timestamp record
615
- 0x0021 Policy Decryption Key Record
616
- 0x0022 Smartcrypt Key Provider Record
617
- 0x0023 Smartcrypt Policy Key Data Record
618
- 0x0065 IBM S/390 (Z390), AS/400 (I400) attributes
619
- - uncompressed
620
- 0x0066 Reserved for IBM S/390 (Z390), AS/400 (I400)
621
- attributes - compressed
622
- 0x4690 POSZIP 4690 (reserved)
623
-
624
-
625
- 4.5.3 -Zip64 Extended Information Extra Field (0x0001):
626
-
627
- The following is the layout of the zip64 extended
628
- information "extra" block. If one of the size or
629
- offset fields in the Local or Central directory
630
- record is too small to hold the required data,
631
- a Zip64 extended information record is created.
632
- The order of the fields in the zip64 extended
633
- information record is fixed, but the fields MUST
634
- only appear if the corresponding Local or Central
635
- directory record field is set to 0xFFFF or 0xFFFFFFFF.
636
-
637
- Note: all fields stored in Intel low-byte/high-byte order.
638
-
639
- Value Size Description
640
- ----- ---- -----------
641
- (ZIP64) 0x0001 2 bytes Tag for this "extra" block type
642
- Size 2 bytes Size of this "extra" block
643
- Original
644
- Size 8 bytes Original uncompressed file size
645
- Compressed
646
- Size 8 bytes Size of compressed data
647
- Relative Header
648
- Offset 8 bytes Offset of local header record
649
- Disk Start
650
- Number 4 bytes Number of the disk on which
651
- this file starts
652
-
653
- This entry in the Local header MUST include BOTH original
654
- and compressed file size fields. If encrypting the
655
- central directory and bit 13 of the general purpose bit
656
- flag is set indicating masking, the value stored in the
657
- Local Header for the original file size will be zero.
658
-
659
-
660
- 4.5.4 -OS/2 Extra Field (0x0009):
661
-
662
- The following is the layout of the OS/2 attributes "extra"
663
- block. (Last Revision 09/05/95)
664
-
665
- Note: all fields stored in Intel low-byte/high-byte order.
666
-
667
- Value Size Description
668
- ----- ---- -----------
669
- (OS/2) 0x0009 2 bytes Tag for this "extra" block type
670
- TSize 2 bytes Size for the following data block
671
- BSize 4 bytes Uncompressed Block Size
672
- CType 2 bytes Compression type
673
- EACRC 4 bytes CRC value for uncompress block
674
- (var) variable Compressed block
675
-
676
- The OS/2 extended attribute structure (FEA2LIST) is
677
- compressed and then stored in its entirety within this
678
- structure. There will only ever be one "block" of data in
679
- VarFields[].
680
-
681
- 4.5.5 -NTFS Extra Field (0x000a):
682
-
683
- The following is the layout of the NTFS attributes
684
- "extra" block. (Note: At this time the Mtime, Atime
685
- and Ctime values MAY be used on any WIN32 system.)
686
-
687
- Note: all fields stored in Intel low-byte/high-byte order.
688
-
689
- Value Size Description
690
- ----- ---- -----------
691
- (NTFS) 0x000a 2 bytes Tag for this "extra" block type
692
- TSize 2 bytes Size of the total "extra" block
693
- Reserved 4 bytes Reserved for future use
694
- Tag1 2 bytes NTFS attribute tag value #1
695
- Size1 2 bytes Size of attribute #1, in bytes
696
- (var) Size1 Attribute #1 data
697
- .
698
- .
699
- .
700
- TagN 2 bytes NTFS attribute tag value #N
701
- SizeN 2 bytes Size of attribute #N, in bytes
702
- (var) SizeN Attribute #N data
703
-
704
- For NTFS, values for Tag1 through TagN are as follows:
705
- (currently only one set of attributes is defined for NTFS)
706
-
707
- Tag Size Description
708
- ----- ---- -----------
709
- 0x0001 2 bytes Tag for attribute #1
710
- Size1 2 bytes Size of attribute #1, in bytes
711
- Mtime 8 bytes File last modification time
712
- Atime 8 bytes File last access time
713
- Ctime 8 bytes File creation time
714
-
715
- 4.5.6 -OpenVMS Extra Field (0x000c):
716
-
717
- The following is the layout of the OpenVMS attributes
718
- "extra" block.
719
-
720
- Note: all fields stored in Intel low-byte/high-byte order.
721
-
722
- Value Size Description
723
- ----- ---- -----------
724
- (VMS) 0x000c 2 bytes Tag for this "extra" block type
725
- TSize 2 bytes Size of the total "extra" block
726
- CRC 4 bytes 32-bit CRC for remainder of the block
727
- Tag1 2 bytes OpenVMS attribute tag value #1
728
- Size1 2 bytes Size of attribute #1, in bytes
729
- (var) Size1 Attribute #1 data
730
- .
731
- .
732
- .
733
- TagN 2 bytes OpenVMS attribute tag value #N
734
- SizeN 2 bytes Size of attribute #N, in bytes
735
- (var) SizeN Attribute #N data
736
-
737
- OpenVMS Extra Field Rules:
738
-
739
- 4.5.6.1. There will be one or more attributes present, which
740
- will each be preceded by the above TagX & SizeX values.
741
- These values are identical to the ATR$C_XXXX and ATR$S_XXXX
742
- constants which are defined in ATR.H under OpenVMS C. Neither
743
- of these values will ever be zero.
744
-
745
- 4.5.6.2. No word alignment or padding is performed.
746
-
747
- 4.5.6.3. A well-behaved PKZIP/OpenVMS program SHOULD NOT produce
748
- more than one sub-block with the same TagX value. Also, there MUST
749
- NOT be more than one "extra" block of type 0x000c in a particular
750
- directory record.
751
-
752
- 4.5.7 -UNIX Extra Field (0x000d):
753
-
754
- The following is the layout of the UNIX "extra" block.
755
- Note: all fields are stored in Intel low-byte/high-byte
756
- order.
757
-
758
- Value Size Description
759
- ----- ---- -----------
760
- (UNIX) 0x000d 2 bytes Tag for this "extra" block type
761
- TSize 2 bytes Size for the following data block
762
- Atime 4 bytes File last access time
763
- Mtime 4 bytes File last modification time
764
- Uid 2 bytes File user ID
765
- Gid 2 bytes File group ID
766
- (var) variable Variable length data field
767
-
768
- The variable length data field will contain file type
769
- specific data. Currently the only values allowed are
770
- the original "linked to" file names for hard or symbolic
771
- links, and the major and minor device node numbers for
772
- character and block device nodes. Since device nodes
773
- cannot be either symbolic or hard links, only one set of
774
- variable length data is stored. Link files will have the
775
- name of the original file stored. This name is NOT NULL
776
- terminated. Its size can be determined by checking TSize -
777
- 12. Device entries will have eight bytes stored as two 4
778
- byte entries (in little endian format). The first entry
779
- will be the major device number, and the second the minor
780
- device number.
781
-
782
- 4.5.8 -PATCH Descriptor Extra Field (0x000f):
783
-
784
- 4.5.8.1 The following is the layout of the Patch Descriptor
785
- "extra" block.
786
-
787
- Note: all fields stored in Intel low-byte/high-byte order.
788
-
789
- Value Size Description
790
- ----- ---- -----------
791
- (Patch) 0x000f 2 bytes Tag for this "extra" block type
792
- TSize 2 bytes Size of the total "extra" block
793
- Version 2 bytes Version of the descriptor
794
- Flags 4 bytes Actions and reactions (see below)
795
- OldSize 4 bytes Size of the file about to be patched
796
- OldCRC 4 bytes 32-bit CRC of the file to be patched
797
- NewSize 4 bytes Size of the resulting file
798
- NewCRC 4 bytes 32-bit CRC of the resulting file
799
-
800
- 4.5.8.2 Actions and reactions
801
-
802
- Bits Description
803
- ---- ----------------
804
- 0 Use for auto detection
805
- 1 Treat as a self-patch
806
- 2-3 RESERVED
807
- 4-5 Action (see below)
808
- 6-7 RESERVED
809
- 8-9 Reaction (see below) to absent file
810
- 10-11 Reaction (see below) to newer file
811
- 12-13 Reaction (see below) to unknown file
812
- 14-15 RESERVED
813
- 16-31 RESERVED
814
-
815
- 4.5.8.2.1 Actions
816
-
817
- Action Value
818
- ------ -----
819
- none 0
820
- add 1
821
- delete 2
822
- patch 3
823
-
824
- 4.5.8.2.2 Reactions
825
-
826
- Reaction Value
827
- -------- -----
828
- ask 0
829
- skip 1
830
- ignore 2
831
- fail 3
832
-
833
- 4.5.8.3 Patch support is provided by PKPatchMaker(tm) technology
834
- and is covered under U.S. Patents and Patents Pending. The use or
835
- implementation in a product of certain technological aspects set
836
- forth in the current APPNOTE, including those with regard to
837
- strong encryption or patching requires a license from PKWARE.
838
- Refer to the section in this document entitled "Incorporating
839
- PKWARE Proprietary Technology into Your Product" for more
840
- information.
841
-
842
- 4.5.9 -PKCS#7 Store for X.509 Certificates (0x0014):
843
-
844
- This field MUST contain information about each of the certificates
845
- files MAY be signed with. When the Central Directory Encryption
846
- feature is enabled for a ZIP file, this record will appear in
847
- the Archive Extra Data Record, otherwise it will appear in the
848
- first central directory record and will be ignored in any
849
- other record.
850
-
851
-
852
- Note: all fields stored in Intel low-byte/high-byte order.
853
-
854
- Value Size Description
855
- ----- ---- -----------
856
- (Store) 0x0014 2 bytes Tag for this "extra" block type
857
- TSize 2 bytes Size of the store data
858
- TData TSize Data about the store
859
-
860
-
861
- 4.5.10 -X.509 Certificate ID and Signature for individual file (0x0015):
862
-
863
- This field contains the information about which certificate in
864
- the PKCS#7 store was used to sign a particular file. It also
865
- contains the signature data. This field can appear multiple
866
- times, but can only appear once per certificate.
867
-
868
- Note: all fields stored in Intel low-byte/high-byte order.
869
-
870
- Value Size Description
871
- ----- ---- -----------
872
- (CID) 0x0015 2 bytes Tag for this "extra" block type
873
- TSize 2 bytes Size of data that follows
874
- TData TSize Signature Data
875
-
876
- 4.5.11 -X.509 Certificate ID and Signature for central directory (0x0016):
877
-
878
- This field contains the information about which certificate in
879
- the PKCS#7 store was used to sign the central directory structure.
880
- When the Central Directory Encryption feature is enabled for a
881
- ZIP file, this record will appear in the Archive Extra Data Record,
882
- otherwise it will appear in the first central directory record.
883
-
884
- Note: all fields stored in Intel low-byte/high-byte order.
885
-
886
- Value Size Description
887
- ----- ---- -----------
888
- (CDID) 0x0016 2 bytes Tag for this "extra" block type
889
- TSize 2 bytes Size of data that follows
890
- TData TSize Data
891
-
892
- 4.5.12 -Strong Encryption Header (0x0017):
893
-
894
- Value Size Description
895
- ----- ---- -----------
896
- 0x0017 2 bytes Tag for this "extra" block type
897
- TSize 2 bytes Size of data that follows
898
- Format 2 bytes Format definition for this record
899
- AlgID 2 bytes Encryption algorithm identifier
900
- Bitlen 2 bytes Bit length of encryption key
901
- Flags 2 bytes Processing flags
902
- CertData TSize-8 Certificate decryption extra field data
903
- (refer to the explanation for CertData
904
- in the section describing the
905
- Certificate Processing Method under
906
- the Strong Encryption Specification)
907
-
908
- See the section describing the Strong Encryption Specification
909
- for details. Refer to the section in this document entitled
910
- "Incorporating PKWARE Proprietary Technology into Your Product"
911
- for more information.
912
-
913
- 4.5.13 -Record Management Controls (0x0018):
914
-
915
- Value Size Description
916
- ----- ---- -----------
917
- (Rec-CTL) 0x0018 2 bytes Tag for this "extra" block type
918
- CSize 2 bytes Size of total extra block data
919
- Tag1 2 bytes Record control attribute 1
920
- Size1 2 bytes Size of attribute 1, in bytes
921
- Data1 Size1 Attribute 1 data
922
- .
923
- .
924
- .
925
- TagN 2 bytes Record control attribute N
926
- SizeN 2 bytes Size of attribute N, in bytes
927
- DataN SizeN Attribute N data
928
-
929
-
930
- 4.5.14 -PKCS#7 Encryption Recipient Certificate List (0x0019):
931
-
932
- This field MAY contain information about each of the certificates
933
- used in encryption processing and it can be used to identify who is
934
- allowed to decrypt encrypted files. This field SHOULD only appear
935
- in the archive extra data record. This field is not required and
936
- serves only to aid archive modifications by preserving public
937
- encryption key data. Individual security requirements may dictate
938
- that this data be omitted to deter information exposure.
939
-
940
- Note: all fields stored in Intel low-byte/high-byte order.
941
-
942
- Value Size Description
943
- ----- ---- -----------
944
- (CStore) 0x0019 2 bytes Tag for this "extra" block type
945
- TSize 2 bytes Size of the store data
946
- TData TSize Data about the store
947
-
948
- TData:
949
-
950
- Value Size Description
951
- ----- ---- -----------
952
- Version 2 bytes Format version number - MUST be 0x0001 at this time
953
- CStore (var) PKCS#7 data blob
954
-
955
- See the section describing the Strong Encryption Specification
956
- for details. Refer to the section in this document entitled
957
- "Incorporating PKWARE Proprietary Technology into Your Product"
958
- for more information.
959
-
960
- 4.5.15 -MVS Extra Field (0x0065):
961
-
962
- The following is the layout of the MVS "extra" block.
963
- Note: Some fields are stored in Big Endian format.
964
- All text is in EBCDIC format unless otherwise specified.
965
- Value Size Description
966
- ----- ---- -----------
967
- (MVS) 0x0065 2 bytes Tag for this "extra" block type
968
- TSize 2 bytes Size for the following data block
969
- ID 4 bytes EBCDIC "Z390" 0xE9F3F9F0 or
970
- "T4MV" for TargetFour
971
- (var) TSize-4 Attribute data (see APPENDIX B)
972
-
973
-
974
- 4.5.16 -OS/400 Extra Field (0x0065):
975
-
976
- The following is the layout of the OS/400 "extra" block.
977
- Note: Some fields are stored in Big Endian format.
978
- All text is in EBCDIC format unless otherwise specified.
979
-
980
- Value Size Description
981
- ----- ---- -----------
982
- (OS400) 0x0065 2 bytes Tag for this "extra" block type
983
- TSize 2 bytes Size for the following data block
984
- ID 4 bytes EBCDIC "I400" 0xC9F4F0F0 or
985
- "T4MV" for TargetFour
986
- (var) TSize-4 Attribute data (see APPENDIX A)
987
-
988
- 4.5.17 -Policy Decryption Key Record Extra Field (0x0021):
989
-
990
- The following is the layout of the Policy Decryption Key "extra" block.
991
- TData is a variable length, variable content field. It holds
992
- information about encryptions and/or encryption key sources.
993
- Contact PKWARE for information on current TData structures.
994
- Information in this "extra" block may aternatively be placed
995
- within comment fields. Refer to the section in this document
996
- entitled "Incorporating PKWARE Proprietary Technology into Your
997
- Product" for more information.
998
-
999
- Value Size Description
1000
- ----- ---- -----------
1001
- 0x0021 2 bytes Tag for this "extra" block type
1002
- TSize 2 bytes Size for the following data block
1003
- TData TSize Data about the key
1004
-
1005
- 4.5.18 -Key Provider Record Extra Field (0x0022):
1006
-
1007
- The following is the layout of the Key Provider "extra" block.
1008
- TData is a variable length, variable content field. It holds
1009
- information about encryptions and/or encryption key sources.
1010
- Contact PKWARE for information on current TData structures.
1011
- Information in this "extra" block may aternatively be placed
1012
- within comment fields. Refer to the section in this document
1013
- entitled "Incorporating PKWARE Proprietary Technology into Your
1014
- Product" for more information.
1015
-
1016
- Value Size Description
1017
- ----- ---- -----------
1018
- 0x0022 2 bytes Tag for this "extra" block type
1019
- TSize 2 bytes Size for the following data block
1020
- TData TSize Data about the key
1021
-
1022
- 4.5.19 -Policy Key Data Record Record Extra Field (0x0023):
1023
-
1024
- The following is the layout of the Policy Key Data "extra" block.
1025
- TData is a variable length, variable content field. It holds
1026
- information about encryptions and/or encryption key sources.
1027
- Contact PKWARE for information on current TData structures.
1028
- Information in this "extra" block may aternatively be placed
1029
- within comment fields. Refer to the section in this document
1030
- entitled "Incorporating PKWARE Proprietary Technology into Your
1031
- Product" for more information.
1032
-
1033
- Value Size Description
1034
- ----- ---- -----------
1035
- 0x0023 2 bytes Tag for this "extra" block type
1036
- TSize 2 bytes Size for the following data block
1037
- TData TSize Data about the key
1038
-
1039
- 4.6 Third Party Mappings
1040
- ------------------------
1041
-
1042
- 4.6.1 Third party mappings commonly used are:
1043
-
1044
- 0x07c8 Macintosh
1045
- 0x2605 ZipIt Macintosh
1046
- 0x2705 ZipIt Macintosh 1.3.5+
1047
- 0x2805 ZipIt Macintosh 1.3.5+
1048
- 0x334d Info-ZIP Macintosh
1049
- 0x4341 Acorn/SparkFS
1050
- 0x4453 Windows NT security descriptor (binary ACL)
1051
- 0x4704 VM/CMS
1052
- 0x470f MVS
1053
- 0x4b46 FWKCS MD5 (see below)
1054
- 0x4c41 OS/2 access control list (text ACL)
1055
- 0x4d49 Info-ZIP OpenVMS
1056
- 0x4f4c Xceed original location extra field
1057
- 0x5356 AOS/VS (ACL)
1058
- 0x5455 extended timestamp
1059
- 0x554e Xceed unicode extra field
1060
- 0x5855 Info-ZIP UNIX (original, also OS/2, NT, etc)
1061
- 0x6375 Info-ZIP Unicode Comment Extra Field
1062
- 0x6542 BeOS/BeBox
1063
- 0x7075 Info-ZIP Unicode Path Extra Field
1064
- 0x756e ASi UNIX
1065
- 0x7855 Info-ZIP UNIX (new)
1066
- 0xa11e Data Stream Alignment (Apache Commons-Compress)
1067
- 0xa220 Microsoft Open Packaging Growth Hint
1068
- 0xfd4a SMS/QDOS
1069
- 0x9901 AE-x encryption structure (see APPENDIX E)
1070
- 0x9902 unknown
1071
-
1072
-
1073
- Detailed descriptions of Extra Fields defined by third
1074
- party mappings will be documented as information on
1075
- these data structures is made available to PKWARE.
1076
- PKWARE does not guarantee the accuracy of any published
1077
- third party data.
1078
-
1079
- 4.6.2 Third-party Extra Fields MUST include a Header ID using
1080
- the format defined in the section of this document
1081
- titled Extensible Data Fields (section 4.5).
1082
-
1083
- The Data Size field indicates the size of the following
1084
- data block. Programs can use this value to skip to the
1085
- next header block, passing over any data blocks that are
1086
- not of interest.
1087
-
1088
- Note: As stated above, the size of the entire .ZIP file
1089
- header, including the file name, comment, and extra
1090
- field SHOULD NOT exceed 64K in size.
1091
-
1092
- 4.6.3 In case two different programs appropriate the same
1093
- Header ID value, it is strongly recommended that each
1094
- program SHOULD place a unique signature of at least two bytes in
1095
- size (and preferably 4 bytes or bigger) at the start of
1096
- each data area. Every program SHOULD verify that its
1097
- unique signature is present, in addition to the Header ID
1098
- value being correct, before assuming that it is a block of
1099
- known type.
1100
-
1101
- Third-party Mappings:
1102
-
1103
- 4.6.4 -ZipIt Macintosh Extra Field (long) (0x2605):
1104
-
1105
- The following is the layout of the ZipIt extra block
1106
- for Macintosh. The local-header and central-header versions
1107
- are identical. This block MUST be present if the file is
1108
- stored MacBinary-encoded and it SHOULD NOT be used if the file
1109
- is not stored MacBinary-encoded.
1110
-
1111
- Value Size Description
1112
- ----- ---- -----------
1113
- (Mac2) 0x2605 Short tag for this extra block type
1114
- TSize Short total data size for this block
1115
- "ZPIT" beLong extra-field signature
1116
- FnLen Byte length of FileName
1117
- FileName variable full Macintosh filename
1118
- FileType Byte[4] four-byte Mac file type string
1119
- Creator Byte[4] four-byte Mac creator string
1120
-
1121
-
1122
- 4.6.5 -ZipIt Macintosh Extra Field (short, for files) (0x2705):
1123
-
1124
- The following is the layout of a shortened variant of the
1125
- ZipIt extra block for Macintosh (without "full name" entry).
1126
- This variant is used by ZipIt 1.3.5 and newer for entries of
1127
- files (not directories) that do not have a MacBinary encoded
1128
- file. The local-header and central-header versions are identical.
1129
-
1130
- Value Size Description
1131
- ----- ---- -----------
1132
- (Mac2b) 0x2705 Short tag for this extra block type
1133
- TSize Short total data size for this block (12)
1134
- "ZPIT" beLong extra-field signature
1135
- FileType Byte[4] four-byte Mac file type string
1136
- Creator Byte[4] four-byte Mac creator string
1137
- fdFlags beShort attributes from FInfo.frFlags,
1138
- MAY be omitted
1139
- 0x0000 beShort reserved, MAY be omitted
1140
-
1141
-
1142
- 4.6.6 -ZipIt Macintosh Extra Field (short, for directories) (0x2805):
1143
-
1144
- The following is the layout of a shortened variant of the
1145
- ZipIt extra block for Macintosh used only for directory
1146
- entries. This variant is used by ZipIt 1.3.5 and newer to
1147
- save some optional Mac-specific information about directories.
1148
- The local-header and central-header versions are identical.
1149
-
1150
- Value Size Description
1151
- ----- ---- -----------
1152
- (Mac2c) 0x2805 Short tag for this extra block type
1153
- TSize Short total data size for this block (12)
1154
- "ZPIT" beLong extra-field signature
1155
- frFlags beShort attributes from DInfo.frFlags, MAY
1156
- be omitted
1157
- View beShort ZipIt view flag, MAY be omitted
1158
-
1159
-
1160
- The View field specifies ZipIt-internal settings as follows:
1161
-
1162
- Bits of the Flags:
1163
- bit 0 if set, the folder is shown expanded (open)
1164
- when the archive contents are viewed in ZipIt.
1165
- bits 1-15 reserved, zero;
1166
-
1167
-
1168
- 4.6.7 -FWKCS MD5 Extra Field (0x4b46):
1169
-
1170
- The FWKCS Contents_Signature System, used in
1171
- automatically identifying files independent of file name,
1172
- optionally adds and uses an extra field to support the
1173
- rapid creation of an enhanced contents_signature:
1174
-
1175
- Header ID = 0x4b46
1176
- Data Size = 0x0013
1177
- Preface = 'M','D','5'
1178
- followed by 16 bytes containing the uncompressed file's
1179
- 128_bit MD5 hash(1), low byte first.
1180
-
1181
- When FWKCS revises a .ZIP file central directory to add
1182
- this extra field for a file, it also replaces the
1183
- central directory entry for that file's uncompressed
1184
- file length with a measured value.
1185
-
1186
- FWKCS provides an option to strip this extra field, if
1187
- present, from a .ZIP file central directory. In adding
1188
- this extra field, FWKCS preserves .ZIP file Authenticity
1189
- Verification; if stripping this extra field, FWKCS
1190
- preserves all versions of AV through PKZIP version 2.04g.
1191
-
1192
- FWKCS, and FWKCS Contents_Signature System, are
1193
- trademarks of Frederick W. Kantor.
1194
-
1195
- (1) R. Rivest, RFC1321.TXT, MIT Laboratory for Computer
1196
- Science and RSA Data Security, Inc., April 1992.
1197
- ll.76-77: "The MD5 algorithm is being placed in the
1198
- public domain for review and possible adoption as a
1199
- standard."
1200
-
1201
-
1202
- 4.6.8 -Info-ZIP Unicode Comment Extra Field (0x6375):
1203
-
1204
- Stores the UTF-8 version of the file comment as stored in the
1205
- central directory header. (Last Revision 20070912)
1206
-
1207
- Value Size Description
1208
- ----- ---- -----------
1209
- (UCom) 0x6375 Short tag for this extra block type ("uc")
1210
- TSize Short total data size for this block
1211
- Version 1 byte version of this extra field, currently 1
1212
- ComCRC32 4 bytes Comment Field CRC32 Checksum
1213
- UnicodeCom Variable UTF-8 version of the entry comment
1214
-
1215
- Currently Version is set to the number 1. If there is a need
1216
- to change this field, the version will be incremented. Changes
1217
- MAY NOT be backward compatible so this extra field SHOULD NOT be
1218
- used if the version is not recognized.
1219
-
1220
- The ComCRC32 is the standard zip CRC32 checksum of the File Comment
1221
- field in the central directory header. This is used to verify that
1222
- the comment field has not changed since the Unicode Comment extra field
1223
- was created. This can happen if a utility changes the File Comment
1224
- field but does not update the UTF-8 Comment extra field. If the CRC
1225
- check fails, this Unicode Comment extra field SHOULD be ignored and
1226
- the File Comment field in the header SHOULD be used instead.
1227
-
1228
- The UnicodeCom field is the UTF-8 version of the File Comment field
1229
- in the header. As UnicodeCom is defined to be UTF-8, no UTF-8 byte
1230
- order mark (BOM) is used. The length of this field is determined by
1231
- subtracting the size of the previous fields from TSize. If both the
1232
- File Name and Comment fields are UTF-8, the new General Purpose Bit
1233
- Flag, bit 11 (Language encoding flag (EFS)), can be used to indicate
1234
- both the header File Name and Comment fields are UTF-8 and, in this
1235
- case, the Unicode Path and Unicode Comment extra fields are not
1236
- needed and SHOULD NOT be created. Note that, for backward
1237
- compatibility, bit 11 SHOULD only be used if the native character set
1238
- of the paths and comments being zipped up are already in UTF-8. It is
1239
- expected that the same file comment storage method, either general
1240
- purpose bit 11 or extra fields, be used in both the Local and Central
1241
- Directory Header for a file.
1242
-
1243
-
1244
- 4.6.9 -Info-ZIP Unicode Path Extra Field (0x7075):
1245
-
1246
- Stores the UTF-8 version of the file name field as stored in the
1247
- local header and central directory header. (Last Revision 20070912)
1248
-
1249
- Value Size Description
1250
- ----- ---- -----------
1251
- (UPath) 0x7075 Short tag for this extra block type ("up")
1252
- TSize Short total data size for this block
1253
- Version 1 byte version of this extra field, currently 1
1254
- NameCRC32 4 bytes File Name Field CRC32 Checksum
1255
- UnicodeName Variable UTF-8 version of the entry File Name
1256
-
1257
- Currently Version is set to the number 1. If there is a need
1258
- to change this field, the version will be incremented. Changes
1259
- MAY NOT be backward compatible so this extra field SHOULD NOT be
1260
- used if the version is not recognized.
1261
-
1262
- The NameCRC32 is the standard zip CRC32 checksum of the File Name
1263
- field in the header. This is used to verify that the header
1264
- File Name field has not changed since the Unicode Path extra field
1265
- was created. This can happen if a utility renames the File Name but
1266
- does not update the UTF-8 path extra field. If the CRC check fails,
1267
- this UTF-8 Path Extra Field SHOULD be ignored and the File Name field
1268
- in the header SHOULD be used instead.
1269
-
1270
- The UnicodeName is the UTF-8 version of the contents of the File Name
1271
- field in the header. As UnicodeName is defined to be UTF-8, no UTF-8
1272
- byte order mark (BOM) is used. The length of this field is determined
1273
- by subtracting the size of the previous fields from TSize. If both
1274
- the File Name and Comment fields are UTF-8, the new General Purpose
1275
- Bit Flag, bit 11 (Language encoding flag (EFS)), can be used to
1276
- indicate that both the header File Name and Comment fields are UTF-8
1277
- and, in this case, the Unicode Path and Unicode Comment extra fields
1278
- are not needed and SHOULD NOT be created. Note that, for backward
1279
- compatibility, bit 11 SHOULD only be used if the native character set
1280
- of the paths and comments being zipped up are already in UTF-8. It is
1281
- expected that the same file name storage method, either general
1282
- purpose bit 11 or extra fields, be used in both the Local and Central
1283
- Directory Header for a file.
1284
-
1285
-
1286
- 4.6.10 -Microsoft Open Packaging Growth Hint (0xa220):
1287
-
1288
- Value Size Description
1289
- ----- ---- -----------
1290
- 0xa220 Short tag for this extra block type
1291
- TSize Short size of Sig + PadVal + Padding
1292
- Sig Short verification signature (A028)
1293
- PadVal Short Initial padding value
1294
- Padding variable filled with NULL characters
1295
-
1296
- 4.6.11 -Data Stream Alignment (Apache Commons-Compress) (0xa11e):
1297
-
1298
- (per Zbynek Vyskovsky) Defines alignment of data stream of this
1299
- entry within the zip archive. Additionally, indicates whether the
1300
- compression method should be kept when re-compressing the zip file.
1301
-
1302
- The purpose of this extra field is to align specific resources to
1303
- word or page boundaries so they can be easily mapped into memory.
1304
-
1305
- Value Size Description
1306
- ----- ---- -----------
1307
- 0xa11e Short tag for this extra block type
1308
- TSize Short total data size for this block (2+padding)
1309
- alignment Short required alignment and indicator
1310
- 0x00 Variable padding
1311
-
1312
- The alignment field (lower 15 bits) defines the minimal alignment
1313
- required by the data stream. Bit 15 of alignment field indicates
1314
- whether the compression method of this entry can be changed when
1315
- recompressing the zip file. The value 0 means the compression method
1316
- should not be changed. The value 1 indicates the compression method
1317
- may be changed. The padding field contains padding to ensure the correct
1318
- alignment. It can be changed at any time when the offset or required
1319
- alignment changes. (see https://issues.apache.org/jira/browse/COMPRESS-391)
1320
-
1321
-
1322
- 4.7 Manifest Files
1323
- ------------------
1324
-
1325
- 4.7.1 Applications using ZIP files MAY have a need for additional
1326
- information that MUST be included with the files placed into
1327
- a ZIP file. Application specific information that cannot be
1328
- stored using the defined ZIP storage records SHOULD be stored
1329
- using the extensible Extra Field convention defined in this
1330
- document. However, some applications MAY use a manifest
1331
- file as a means for storing additional information. One
1332
- example is the META-INF/MANIFEST.MF file used in ZIP formatted
1333
- files having the .JAR extension (JAR files).
1334
-
1335
- 4.7.2 A manifest file is a file created for the application process
1336
- that requires this information. A manifest file MAY be of any
1337
- file type required by the defining application process. It is
1338
- placed within the same ZIP file as files to which this information
1339
- applies. By convention, this file is typically the first file placed
1340
- into the ZIP file and it MAY include a defined directory path.
1341
-
1342
- 4.7.3 Manifest files MAY be compressed or encrypted as needed for
1343
- application processing of the files inside the ZIP files.
1344
-
1345
- Manifest files are outside of the scope of this specification.
1346
-
1347
-
1348
- 5.0 Explanation of compression methods
1349
- --------------------------------------
1350
-
1351
-
1352
- 5.1 UnShrinking - Method 1
1353
- --------------------------
1354
-
1355
- 5.1.1 Shrinking is a Dynamic Ziv-Lempel-Welch compression algorithm
1356
- with partial clearing. The initial code size is 9 bits, and the
1357
- maximum code size is 13 bits. Shrinking differs from conventional
1358
- Dynamic Ziv-Lempel-Welch implementations in several respects:
1359
-
1360
- 5.1.2 The code size is controlled by the compressor, and is
1361
- not automatically increased when codes larger than the current
1362
- code size are created (but not necessarily used). When
1363
- the decompressor encounters the code sequence 256
1364
- (decimal) followed by 1, it SHOULD increase the code size
1365
- read from the input stream to the next bit size. No
1366
- blocking of the codes is performed, so the next code at
1367
- the increased size SHOULD be read from the input stream
1368
- immediately after where the previous code at the smaller
1369
- bit size was read. Again, the decompressor SHOULD NOT
1370
- increase the code size used until the sequence 256,1 is
1371
- encountered.
1372
-
1373
- 5.1.3 When the table becomes full, total clearing is not
1374
- performed. Rather, when the compressor emits the code
1375
- sequence 256,2 (decimal), the decompressor SHOULD clear
1376
- all leaf nodes from the Ziv-Lempel tree, and continue to
1377
- use the current code size. The nodes that are cleared
1378
- from the Ziv-Lempel tree are then re-used, with the lowest
1379
- code value re-used first, and the highest code value
1380
- re-used last. The compressor can emit the sequence 256,2
1381
- at any time.
1382
-
1383
- 5.2 Expanding - Methods 2-5
1384
- ---------------------------
1385
-
1386
- 5.2.1 The Reducing algorithm is actually a combination of two
1387
- distinct algorithms. The first algorithm compresses repeated
1388
- byte sequences, and the second algorithm takes the compressed
1389
- stream from the first algorithm and applies a probabilistic
1390
- compression method.
1391
-
1392
- 5.2.2 The probabilistic compression stores an array of 'follower
1393
- sets' S(j), for j=0 to 255, corresponding to each possible
1394
- ASCII character. Each set contains between 0 and 32
1395
- characters, to be denoted as S(j)[0],...,S(j)[m], where m<32.
1396
- The sets are stored at the beginning of the data area for a
1397
- Reduced file, in reverse order, with S(255) first, and S(0)
1398
- last.
1399
-
1400
- 5.2.3 The sets are encoded as { N(j), S(j)[0],...,S(j)[N(j)-1] },
1401
- where N(j) is the size of set S(j). N(j) can be 0, in which
1402
- case the follower set for S(j) is empty. Each N(j) value is
1403
- encoded in 6 bits, followed by N(j) eight bit character values
1404
- corresponding to S(j)[0] to S(j)[N(j)-1] respectively. If
1405
- N(j) is 0, then no values for S(j) are stored, and the value
1406
- for N(j-1) immediately follows.
1407
-
1408
- 5.2.4 Immediately after the follower sets, is the compressed data
1409
- stream. The compressed data stream can be interpreted for the
1410
- probabilistic decompression as follows:
1411
-
1412
- let Last-Character <- 0.
1413
- loop until done
1414
- if the follower set S(Last-Character) is empty then
1415
- read 8 bits from the input stream, and copy this
1416
- value to the output stream.
1417
- otherwise if the follower set S(Last-Character) is non-empty then
1418
- read 1 bit from the input stream.
1419
- if this bit is not zero then
1420
- read 8 bits from the input stream, and copy this
1421
- value to the output stream.
1422
- otherwise if this bit is zero then
1423
- read B(N(Last-Character)) bits from the input
1424
- stream, and assign this value to I.
1425
- Copy the value of S(Last-Character)[I] to the
1426
- output stream.
1427
-
1428
- assign the last value placed on the output stream to
1429
- Last-Character.
1430
- end loop
1431
-
1432
- B(N(j)) is defined as the minimal number of bits required to
1433
- encode the value N(j)-1.
1434
-
1435
- 5.2.5 The decompressed stream from above can then be expanded to
1436
- re-create the original file as follows:
1437
-
1438
- let State <- 0.
1439
-
1440
- loop until done
1441
- read 8 bits from the input stream into C.
1442
- case State of
1443
- 0: if C is not equal to DLE (144 decimal) then
1444
- copy C to the output stream.
1445
- otherwise if C is equal to DLE then
1446
- let State <- 1.
1447
-
1448
- 1: if C is non-zero then
1449
- let V <- C.
1450
- let Len <- L(V)
1451
- let State <- F(Len).
1452
- otherwise if C is zero then
1453
- copy the value 144 (decimal) to the output stream.
1454
- let State <- 0
1455
-
1456
- 2: let Len <- Len + C
1457
- let State <- 3.
1458
-
1459
- 3: move backwards D(V,C) bytes in the output stream
1460
- (if this position is before the start of the output
1461
- stream, then assume that all the data before the
1462
- start of the output stream is filled with zeros).
1463
- copy Len+3 bytes from this position to the output stream.
1464
- let State <- 0.
1465
- end case
1466
- end loop
1467
-
1468
- The functions F,L, and D are dependent on the 'compression
1469
- factor', 1 through 4, and are defined as follows:
1470
-
1471
- For compression factor 1:
1472
- L(X) equals the lower 7 bits of X.
1473
- F(X) equals 2 if X equals 127 otherwise F(X) equals 3.
1474
- D(X,Y) equals the (upper 1 bit of X) * 256 + Y + 1.
1475
- For compression factor 2:
1476
- L(X) equals the lower 6 bits of X.
1477
- F(X) equals 2 if X equals 63 otherwise F(X) equals 3.
1478
- D(X,Y) equals the (upper 2 bits of X) * 256 + Y + 1.
1479
- For compression factor 3:
1480
- L(X) equals the lower 5 bits of X.
1481
- F(X) equals 2 if X equals 31 otherwise F(X) equals 3.
1482
- D(X,Y) equals the (upper 3 bits of X) * 256 + Y + 1.
1483
- For compression factor 4:
1484
- L(X) equals the lower 4 bits of X.
1485
- F(X) equals 2 if X equals 15 otherwise F(X) equals 3.
1486
- D(X,Y) equals the (upper 4 bits of X) * 256 + Y + 1.
1487
-
1488
- 5.3 Imploding - Method 6
1489
- ------------------------
1490
-
1491
- 5.3.1 The Imploding algorithm is actually a combination of two
1492
- distinct algorithms. The first algorithm compresses repeated byte
1493
- sequences using a sliding dictionary. The second algorithm is
1494
- used to compress the encoding of the sliding dictionary output,
1495
- using multiple Shannon-Fano trees.
1496
-
1497
- 5.3.2 The Imploding algorithm can use a 4K or 8K sliding dictionary
1498
- size. The dictionary size used can be determined by bit 1 in the
1499
- general purpose flag word; a 0 bit indicates a 4K dictionary
1500
- while a 1 bit indicates an 8K dictionary.
1501
-
1502
- 5.3.3 The Shannon-Fano trees are stored at the start of the
1503
- compressed file. The number of trees stored is defined by bit 2 in
1504
- the general purpose flag word; a 0 bit indicates two trees stored,
1505
- a 1 bit indicates three trees are stored. If 3 trees are stored,
1506
- the first Shannon-Fano tree represents the encoding of the
1507
- Literal characters, the second tree represents the encoding of
1508
- the Length information, the third represents the encoding of the
1509
- Distance information. When 2 Shannon-Fano trees are stored, the
1510
- Length tree is stored first, followed by the Distance tree.
1511
-
1512
- 5.3.4 The Literal Shannon-Fano tree, if present is used to represent
1513
- the entire ASCII character set, and contains 256 values. This
1514
- tree is used to compress any data not compressed by the sliding
1515
- dictionary algorithm. When this tree is present, the Minimum
1516
- Match Length for the sliding dictionary is 3. If this tree is
1517
- not present, the Minimum Match Length is 2.
1518
-
1519
- 5.3.5 The Length Shannon-Fano tree is used to compress the Length
1520
- part of the (length,distance) pairs from the sliding dictionary
1521
- output. The Length tree contains 64 values, ranging from the
1522
- Minimum Match Length, to 63 plus the Minimum Match Length.
1523
-
1524
- 5.3.6 The Distance Shannon-Fano tree is used to compress the Distance
1525
- part of the (length,distance) pairs from the sliding dictionary
1526
- output. The Distance tree contains 64 values, ranging from 0 to
1527
- 63, representing the upper 6 bits of the distance value. The
1528
- distance values themselves will be between 0 and the sliding
1529
- dictionary size, either 4K or 8K.
1530
-
1531
- 5.3.7 The Shannon-Fano trees themselves are stored in a compressed
1532
- format. The first byte of the tree data represents the number of
1533
- bytes of data representing the (compressed) Shannon-Fano tree
1534
- minus 1. The remaining bytes represent the Shannon-Fano tree
1535
- data encoded as:
1536
-
1537
- High 4 bits: Number of values at this bit length + 1. (1 - 16)
1538
- Low 4 bits: Bit Length needed to represent value + 1. (1 - 16)
1539
-
1540
- 5.3.8 The Shannon-Fano codes can be constructed from the bit lengths
1541
- using the following algorithm:
1542
-
1543
- 1) Sort the Bit Lengths in ascending order, while retaining the
1544
- order of the original lengths stored in the file.
1545
-
1546
- 2) Generate the Shannon-Fano trees:
1547
-
1548
- Code <- 0
1549
- CodeIncrement <- 0
1550
- LastBitLength <- 0
1551
- i <- number of Shannon-Fano codes - 1 (either 255 or 63)
1552
-
1553
- loop while i >= 0
1554
- Code = Code + CodeIncrement
1555
- if BitLength(i) <> LastBitLength then
1556
- LastBitLength=BitLength(i)
1557
- CodeIncrement = 1 shifted left (16 - LastBitLength)
1558
- ShannonCode(i) = Code
1559
- i <- i - 1
1560
- end loop
1561
-
1562
- 3) Reverse the order of all the bits in the above ShannonCode()
1563
- vector, so that the most significant bit becomes the least
1564
- significant bit. For example, the value 0x1234 (hex) would
1565
- become 0x2C48 (hex).
1566
-
1567
- 4) Restore the order of Shannon-Fano codes as originally stored
1568
- within the file.
1569
-
1570
- Example:
1571
-
1572
- This example will show the encoding of a Shannon-Fano tree
1573
- of size 8. Notice that the actual Shannon-Fano trees used
1574
- for Imploding are either 64 or 256 entries in size.
1575
-
1576
- Example: 0x02, 0x42, 0x01, 0x13
1577
-
1578
- The first byte indicates 3 values in this table. Decoding the
1579
- bytes:
1580
- 0x42 = 5 codes of 3 bits long
1581
- 0x01 = 1 code of 2 bits long
1582
- 0x13 = 2 codes of 4 bits long
1583
-
1584
- This would generate the original bit length array of:
1585
- (3, 3, 3, 3, 3, 2, 4, 4)
1586
-
1587
- There are 8 codes in this table for the values 0 thru 7. Using
1588
- the algorithm to obtain the Shannon-Fano codes produces:
1589
-
1590
- Reversed Order Original
1591
- Val Sorted Constructed Code Value Restored Length
1592
- --- ------ ----------------- -------- -------- ------
1593
- 0: 2 1100000000000000 11 101 3
1594
- 1: 3 1010000000000000 101 001 3
1595
- 2: 3 1000000000000000 001 110 3
1596
- 3: 3 0110000000000000 110 010 3
1597
- 4: 3 0100000000000000 010 100 3
1598
- 5: 3 0010000000000000 100 11 2
1599
- 6: 4 0001000000000000 1000 1000 4
1600
- 7: 4 0000000000000000 0000 0000 4
1601
-
1602
- The values in the Val, Order Restored and Original Length columns
1603
- now represent the Shannon-Fano encoding tree that can be used for
1604
- decoding the Shannon-Fano encoded data. How to parse the
1605
- variable length Shannon-Fano values from the data stream is beyond
1606
- the scope of this document. (See the references listed at the end of
1607
- this document for more information.) However, traditional decoding
1608
- schemes used for Huffman variable length decoding, such as the
1609
- Greenlaw algorithm, can be successfully applied.
1610
-
1611
- 5.3.9 The compressed data stream begins immediately after the
1612
- compressed Shannon-Fano data. The compressed data stream can be
1613
- interpreted as follows:
1614
-
1615
- loop until done
1616
- read 1 bit from input stream.
1617
-
1618
- if this bit is non-zero then (encoded data is literal data)
1619
- if Literal Shannon-Fano tree is present
1620
- read and decode character using Literal Shannon-Fano tree.
1621
- otherwise
1622
- read 8 bits from input stream.
1623
- copy character to the output stream.
1624
- otherwise (encoded data is sliding dictionary match)
1625
- if 8K dictionary size
1626
- read 7 bits for offset Distance (lower 7 bits of offset).
1627
- otherwise
1628
- read 6 bits for offset Distance (lower 6 bits of offset).
1629
-
1630
- using the Distance Shannon-Fano tree, read and decode the
1631
- upper 6 bits of the Distance value.
1632
-
1633
- using the Length Shannon-Fano tree, read and decode
1634
- the Length value.
1635
-
1636
- Length <- Length + Minimum Match Length
1637
-
1638
- if Length = 63 + Minimum Match Length
1639
- read 8 bits from the input stream,
1640
- add this value to Length.
1641
-
1642
- move backwards Distance+1 bytes in the output stream, and
1643
- copy Length characters from this position to the output
1644
- stream. (if this position is before the start of the output
1645
- stream, then assume that all the data before the start of
1646
- the output stream is filled with zeros).
1647
- end loop
1648
-
1649
- 5.4 Tokenizing - Method 7
1650
- -------------------------
1651
-
1652
- 5.4.1 This method is not used by PKZIP.
1653
-
1654
- 5.5 Deflating - Method 8
1655
- ------------------------
1656
-
1657
- 5.5.1 The Deflate algorithm is similar to the Implode algorithm using
1658
- a sliding dictionary of up to 32K with secondary compression
1659
- from Huffman/Shannon-Fano codes.
1660
-
1661
- 5.5.2 The compressed data is stored in blocks with a header describing
1662
- the block and the Huffman codes used in the data block. The header
1663
- format is as follows:
1664
-
1665
- Bit 0: Last Block bit This bit is set to 1 if this is the last
1666
- compressed block in the data.
1667
- Bits 1-2: Block type
1668
- 00 (0) - Block is stored - All stored data is byte aligned.
1669
- Skip bits until next byte, then next word = block
1670
- length, followed by the ones compliment of the block
1671
- length word. Remaining data in block is the stored
1672
- data.
1673
-
1674
- 01 (1) - Use fixed Huffman codes for literal and distance codes.
1675
- Lit Code Bits Dist Code Bits
1676
- --------- ---- --------- ----
1677
- 0 - 143 8 0 - 31 5
1678
- 144 - 255 9
1679
- 256 - 279 7
1680
- 280 - 287 8
1681
-
1682
- Literal codes 286-287 and distance codes 30-31 are
1683
- never used but participate in the huffman construction.
1684
-
1685
- 10 (2) - Dynamic Huffman codes. (See expanding Huffman codes)
1686
-
1687
- 11 (3) - Reserved - Flag a "Error in compressed data" if seen.
1688
-
1689
- 5.5.3 Expanding Huffman Codes
1690
-
1691
- If the data block is stored with dynamic Huffman codes, the Huffman
1692
- codes are sent in the following compressed format:
1693
-
1694
- 5 Bits: # of Literal codes sent - 256 (256 - 286)
1695
- All other codes are never sent.
1696
- 5 Bits: # of Dist codes - 1 (1 - 32)
1697
- 4 Bits: # of Bit Length codes - 3 (3 - 19)
1698
-
1699
- The Huffman codes are sent as bit lengths and the codes are built as
1700
- described in the implode algorithm. The bit lengths themselves are
1701
- compressed with Huffman codes. There are 19 bit length codes:
1702
-
1703
- 0 - 15: Represent bit lengths of 0 - 15
1704
- 16: Copy the previous bit length 3 - 6 times.
1705
- The next 2 bits indicate repeat length (0 = 3, ... ,3 = 6)
1706
- Example: Codes 8, 16 (+2 bits 11), 16 (+2 bits 10) will
1707
- expand to 12 bit lengths of 8 (1 + 6 + 5)
1708
- 17: Repeat a bit length of 0 for 3 - 10 times. (3 bits of length)
1709
- 18: Repeat a bit length of 0 for 11 - 138 times (7 bits of length)
1710
-
1711
- The lengths of the bit length codes are sent packed 3 bits per value
1712
- (0 - 7) in the following order:
1713
-
1714
- 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
1715
-
1716
- The Huffman codes SHOULD be built as described in the Implode algorithm
1717
- except codes are assigned starting at the shortest bit length, i.e. the
1718
- shortest code SHOULD be all 0's rather than all 1's. Also, codes with
1719
- a bit length of zero do not participate in the tree construction. The
1720
- codes are then used to decode the bit lengths for the literal and
1721
- distance tables.
1722
-
1723
- The bit lengths for the literal tables are sent first with the number
1724
- of entries sent described by the 5 bits sent earlier. There are up
1725
- to 286 literal characters; the first 256 represent the respective 8
1726
- bit character, code 256 represents the End-Of-Block code, the remaining
1727
- 29 codes represent copy lengths of 3 thru 258. There are up to 30
1728
- distance codes representing distances from 1 thru 32k as described
1729
- below.
1730
-
1731
- Length Codes
1732
- ------------
1733
- Extra Extra Extra Extra
1734
- Code Bits Length Code Bits Lengths Code Bits Lengths Code Bits Length(s)
1735
- ---- ---- ------ ---- ---- ------- ---- ---- ------- ---- ---- ---------
1736
- 257 0 3 265 1 11,12 273 3 35-42 281 5 131-162
1737
- 258 0 4 266 1 13,14 274 3 43-50 282 5 163-194
1738
- 259 0 5 267 1 15,16 275 3 51-58 283 5 195-226
1739
- 260 0 6 268 1 17,18 276 3 59-66 284 5 227-257
1740
- 261 0 7 269 2 19-22 277 4 67-82 285 0 258
1741
- 262 0 8 270 2 23-26 278 4 83-98
1742
- 263 0 9 271 2 27-30 279 4 99-114
1743
- 264 0 10 272 2 31-34 280 4 115-130
1744
-
1745
- Distance Codes
1746
- --------------
1747
- Extra Extra Extra Extra
1748
- Code Bits Dist Code Bits Dist Code Bits Distance Code Bits Distance
1749
- ---- ---- ---- ---- ---- ------ ---- ---- -------- ---- ---- --------
1750
- 0 0 1 8 3 17-24 16 7 257-384 24 11 4097-6144
1751
- 1 0 2 9 3 25-32 17 7 385-512 25 11 6145-8192
1752
- 2 0 3 10 4 33-48 18 8 513-768 26 12 8193-12288
1753
- 3 0 4 11 4 49-64 19 8 769-1024 27 12 12289-16384
1754
- 4 1 5,6 12 5 65-96 20 9 1025-1536 28 13 16385-24576
1755
- 5 1 7,8 13 5 97-128 21 9 1537-2048 29 13 24577-32768
1756
- 6 2 9-12 14 6 129-192 22 10 2049-3072
1757
- 7 2 13-16 15 6 193-256 23 10 3073-4096
1758
-
1759
- 5.5.4 The compressed data stream begins immediately after the
1760
- compressed header data. The compressed data stream can be
1761
- interpreted as follows:
1762
-
1763
- do
1764
- read header from input stream.
1765
-
1766
- if stored block
1767
- skip bits until byte aligned
1768
- read count and 1's compliment of count
1769
- copy count bytes data block
1770
- otherwise
1771
- loop until end of block code sent
1772
- decode literal character from input stream
1773
- if literal < 256
1774
- copy character to the output stream
1775
- otherwise
1776
- if literal = end of block
1777
- break from loop
1778
- otherwise
1779
- decode distance from input stream
1780
-
1781
- move backwards distance bytes in the output stream, and
1782
- copy length characters from this position to the output
1783
- stream.
1784
- end loop
1785
- while not last block
1786
-
1787
- if data descriptor exists
1788
- skip bits until byte aligned
1789
- read crc and sizes
1790
- endif
1791
-
1792
- 5.6 Enhanced Deflating - Method 9
1793
- ---------------------------------
1794
-
1795
- 5.6.1 The Enhanced Deflating algorithm is similar to Deflate but uses
1796
- a sliding dictionary of up to 64K. Deflate64(tm) is supported
1797
- by the Deflate extractor.
1798
-
1799
- 5.7 BZIP2 - Method 12
1800
- ---------------------
1801
-
1802
- 5.7.1 BZIP2 is an open-source data compression algorithm developed by
1803
- Julian Seward. Information and source code for this algorithm
1804
- can be found on the internet.
1805
-
1806
- 5.8 LZMA - Method 14
1807
- ---------------------
1808
-
1809
- 5.8.1 LZMA is a block-oriented, general purpose data compression
1810
- algorithm developed and maintained by Igor Pavlov. It is a derivative
1811
- of LZ77 that utilizes Markov chains and a range coder. Information and
1812
- source code for this algorithm can be found on the internet. Consult
1813
- with the author of this algorithm for information on terms or
1814
- restrictions on use.
1815
-
1816
- Support for LZMA within the ZIP format is defined as follows:
1817
-
1818
- 5.8.2 The Compression method field within the ZIP Local and Central
1819
- Header records will be set to the value 14 to indicate data was
1820
- compressed using LZMA.
1821
-
1822
- 5.8.3 The Version needed to extract field within the ZIP Local and
1823
- Central Header records will be set to 6.3 to indicate the minimum
1824
- ZIP format version supporting this feature.
1825
-
1826
- 5.8.4 File data compressed using the LZMA algorithm MUST be placed
1827
- immediately following the Local Header for the file. If a standard
1828
- ZIP encryption header is required, it will follow the Local Header
1829
- and will precede the LZMA compressed file data segment. The location
1830
- of LZMA compressed data segment within the ZIP format will be as shown:
1831
-
1832
- [local header file 1]
1833
- [encryption header file 1]
1834
- [LZMA compressed data segment for file 1]
1835
- [data descriptor 1]
1836
- [local header file 2]
1837
-
1838
- 5.8.5 The encryption header and data descriptor records MAY
1839
- be conditionally present. The LZMA Compressed Data Segment
1840
- will consist of an LZMA Properties Header followed by the
1841
- LZMA Compressed Data as shown:
1842
-
1843
- [LZMA properties header for file 1]
1844
- [LZMA compressed data for file 1]
1845
-
1846
- 5.8.6 The LZMA Compressed Data will be stored as provided by the
1847
- LZMA compression library. Compressed size, uncompressed size and
1848
- other file characteristics about the file being compressed MUST be
1849
- stored in standard ZIP storage format.
1850
-
1851
- 5.8.7 The LZMA Properties Header will store specific data required
1852
- to decompress the LZMA compressed Data. This data is set by the
1853
- LZMA compression engine using the function WriteCoderProperties()
1854
- as documented within the LZMA SDK.
1855
-
1856
- 5.8.8 Storage fields for the property information within the LZMA
1857
- Properties Header are as follows:
1858
-
1859
- LZMA Version Information 2 bytes
1860
- LZMA Properties Size 2 bytes
1861
- LZMA Properties Data variable, defined by "LZMA Properties Size"
1862
-
1863
- 5.8.8.1 LZMA Version Information - this field identifies which version
1864
- of the LZMA SDK was used to compress a file. The first byte will
1865
- store the major version number of the LZMA SDK and the second
1866
- byte will store the minor number.
1867
-
1868
- 5.8.8.2 LZMA Properties Size - this field defines the size of the
1869
- remaining property data. Typically this size SHOULD be determined by
1870
- the version of the SDK. This size field is included as a convenience
1871
- and to help avoid any ambiguity arising in the future due
1872
- to changes in this compression algorithm.
1873
-
1874
- 5.8.8.3 LZMA Property Data - this variable sized field records the
1875
- required values for the decompressor as defined by the LZMA SDK.
1876
- The data stored in this field SHOULD be obtained using the
1877
- WriteCoderProperties() in the version of the SDK defined by
1878
- the "LZMA Version Information" field.
1879
-
1880
- 5.8.8.4 The layout of the "LZMA Properties Data" field is a function of
1881
- the LZMA compression algorithm. It is possible that this layout MAY be
1882
- changed by the author over time. The data layout in version 4.3 of the
1883
- LZMA SDK defines a 5 byte array that uses 4 bytes to store the dictionary
1884
- size in little-endian order. This is preceded by a single packed byte as
1885
- the first element of the array that contains the following fields:
1886
-
1887
- PosStateBits
1888
- LiteralPosStateBits
1889
- LiteralContextBits
1890
-
1891
- Refer to the LZMA documentation for a more detailed explanation of
1892
- these fields.
1893
-
1894
- 5.8.9 Data compressed with method 14, LZMA, MAY include an end-of-stream
1895
- (EOS) marker ending the compressed data stream. This marker is not
1896
- required, but its use is highly recommended to facilitate processing
1897
- and implementers SHOULD include the EOS marker whenever possible.
1898
- When the EOS marker is used, general purpose bit 1 MUSY be set. If
1899
- general purpose bit 1 is not set, the EOS marker is not present.
1900
-
1901
- 5.9 WavPack - Method 97
1902
- -----------------------
1903
-
1904
- 5.9.1 Information describing the use of compression method 97 is
1905
- provided by WinZIP International, LLC. This method relies on the
1906
- open source WavPack audio compression utility developed by David Bryant.
1907
- Information on WavPack is available at www.wavpack.com. Please consult
1908
- with the author of this algorithm for information on terms and
1909
- restrictions on use.
1910
-
1911
- 5.9.2 WavPack data for a file begins immediately after the end of the
1912
- local header data. This data is the output from WavPack compression
1913
- routines. Within the ZIP file, the use of WavPack compression is
1914
- indicated by setting the compression method field to a value of 97
1915
- in both the local header and the central directory header. The Version
1916
- needed to extract and version made by fields use the same values as are
1917
- used for data compressed using the Deflate algorithm.
1918
-
1919
- 5.9.3 An implementation note for storing digital sample data when using
1920
- WavPack compression within ZIP files is that all of the bytes of
1921
- the sample data SHOULD be compressed. This includes any unused
1922
- bits up to the byte boundary. An example is a 2 byte sample that
1923
- uses only 12 bits for the sample data with 4 unused bits. If only
1924
- 12 bits are passed as the sample size to the WavPack routines, the 4
1925
- unused bits will be set to 0 on extraction regardless of their original
1926
- state. To avoid this, the full 16 bits of the sample data size
1927
- SHOULD be provided.
1928
-
1929
- 5.10 PPMd - Method 98
1930
- ---------------------
1931
-
1932
- 5.10.1 PPMd is a data compression algorithm developed by Dmitry Shkarin
1933
- which includes a carryless rangecoder developed by Dmitry Subbotin.
1934
- This algorithm is based on predictive phrase matching on multiple
1935
- order contexts. Information and source code for this algorithm
1936
- can be found on the internet. Consult with the author of this
1937
- algorithm for information on terms or restrictions on use.
1938
-
1939
- 5.10.2 Support for PPMd within the ZIP format currently is provided only
1940
- for version I, revision 1 of the algorithm. Storage requirements
1941
- for using this algorithm are as follows:
1942
-
1943
- 5.10.3 Parameters needed to control the algorithm are stored in the two
1944
- bytes immediately preceding the compressed data. These bytes are
1945
- used to store the following fields:
1946
-
1947
- Model order - sets the maximum model order, default is 8, possible
1948
- values are from 2 to 16 inclusive
1949
-
1950
- Sub-allocator size - sets the size of sub-allocator in MB, default is 50,
1951
- possible values are from 1MB to 256MB inclusive
1952
-
1953
- Model restoration method - sets the method used to restart context
1954
- model at memory insufficiency, values are:
1955
-
1956
- 0 - restarts model from scratch - default
1957
- 1 - cut off model - decreases performance by as much as 2x
1958
- 2 - freeze context tree - not recommended
1959
-
1960
- 5.10.4 An example for packing these fields into the 2 byte storage field is
1961
- illustrated below. These values are stored in Intel low-byte/high-byte
1962
- order.
1963
-
1964
- wPPMd = (Model order - 1) +
1965
- ((Sub-allocator size - 1) << 4) +
1966
- (Model restoration method << 12)
1967
-
1968
-
1969
- 5.11 AE-x Encryption marker - Method 99
1970
- -------------------------------------------
1971
-
1972
- 5.12 JPEG variant - Method 96
1973
- -------------------------------------------
1974
-
1975
- 5.13 PKWARE Data Compression Library Imploding - Method 10
1976
- -----------------------------------------------------------
1977
-
1978
- 5.14 Reserved - Method 11
1979
- -------------------------------------------
1980
-
1981
- 5.15 Reserved - Method 13
1982
- -------------------------------------------
1983
-
1984
- 5.16 Reserved - Method 15
1985
- -------------------------------------------
1986
-
1987
- 5.17 IBM z/OS CMPSC Compression - Method 16
1988
- -------------------------------------------
1989
-
1990
- Method 16 utilizes the IBM hardware compression facility available
1991
- on most IBM mainframes. Hardware compression can significantly
1992
- increase the speed of data compression. This method uses a variant
1993
- of the LZ78 algorithm. CMPSC hardware compression is performed
1994
- using the COMPRESSION CALL instruction.
1995
-
1996
- ZIP archives can be created using this method only on mainframes
1997
- supporting the CP instruction. Extraction MAY occur on any
1998
- platform supporting this compression algorithm. Use of this
1999
- algorithm requires creation of a compression dictionary and
2000
- an expansion dictionary. The expansion dictionary MUST be
2001
- placed into the ZIP archive for use on the system where
2002
- extraction will occur.
2003
-
2004
- Additional information on this compression algorithm and dictionaries
2005
- can be found in the IBM provided document titled IBM ESA/390 Data
2006
- Compression (SA22-7208-01). Storage requirements for using CMPSC
2007
- compression are as follows.
2008
-
2009
- The format for the compressed data stream placed into the ZIP
2010
- archive following the Local Header is:
2011
-
2012
- [dictionary header]
2013
- [expansion dictionary]
2014
- [CMPSC compressed data]
2015
-
2016
- If encryption is used to encrypt a file compressed with CMPSC, these
2017
- sections MUST be encrypted as a single entity.
2018
-
2019
- The format of the dictionary header is:
2020
-
2021
- Value Size Description
2022
- ----- ---- -----------
2023
- Version 1 byte 1
2024
- Flags/Symsize 1 byte Processing flags and
2025
- symbol size
2026
- DictionaryLen 4 bytes Length of the
2027
- expansion dictionary
2028
-
2029
- Explanation of processing flags and symbol size:
2030
-
2031
- The high 4 bits are used to store the processing flags. The low
2032
- 4 bits represent the size of a symbol, in bits (values range
2033
- from 9-13). Flag values are defined below.
2034
-
2035
- 0x80 - expansion dictionary
2036
- 0x40 - expansion dictionary is compressed using Deflate
2037
- 0x20 - Reserved
2038
- 0x10 - Reserved
2039
-
2040
-
2041
- 5.18 Reserved - Method 17
2042
- -------------------------------------------
2043
-
2044
- 5.19 IBM TERSE - Method 18
2045
- -------------------------------------------
2046
-
2047
- 5.20 IBM LZ77 z Architecture - Method 19
2048
- -----------------------------------------
2049
-
2050
- 6.0 Traditional PKWARE Encryption
2051
- ----------------------------------
2052
-
2053
- 6.0.1 The following information discusses the decryption steps
2054
- required to support traditional PKWARE encryption. This
2055
- form of encryption is considered weak by today's standards
2056
- and its use is recommended only for situations with
2057
- low security needs or for compatibility with older .ZIP
2058
- applications.
2059
-
2060
- 6.1 Traditional PKWARE Decryption
2061
- ---------------------------------
2062
-
2063
- 6.1.1 PKWARE is grateful to Mr. Roger Schlafly for his expert
2064
- contribution towards the development of PKWARE's traditional
2065
- encryption.
2066
-
2067
- 6.1.2 PKZIP encrypts the compressed data stream. Encrypted files
2068
- MUST be decrypted before they can be extracted to their original
2069
- form.
2070
-
2071
- 6.1.3 Each encrypted file has an extra 12 bytes stored at the start
2072
- of the data area defining the encryption header for that file. The
2073
- encryption header is originally set to random values, and then
2074
- itself encrypted, using three, 32-bit keys. The key values are
2075
- initialized using the supplied encryption password. After each byte
2076
- is encrypted, the keys are then updated using pseudo-random number
2077
- generation techniques in combination with the same CRC-32 algorithm
2078
- used in PKZIP and described elsewhere in this document.
2079
-
2080
- 6.1.4 The following are the basic steps required to decrypt a file:
2081
-
2082
- 1) Initialize the three 32-bit keys with the password.
2083
- 2) Read and decrypt the 12-byte encryption header, further
2084
- initializing the encryption keys.
2085
- 3) Read and decrypt the compressed data stream using the
2086
- encryption keys.
2087
-
2088
- 6.1.5 Initializing the encryption keys
2089
-
2090
- Key(0) <- 305419896
2091
- Key(1) <- 591751049
2092
- Key(2) <- 878082192
2093
-
2094
- loop for i <- 0 to length(password)-1
2095
- update_keys(password(i))
2096
- end loop
2097
-
2098
- Where update_keys() is defined as:
2099
-
2100
- update_keys(char):
2101
- Key(0) <- crc32(key(0),char)
2102
- Key(1) <- Key(1) + (Key(0) & 000000ffH)
2103
- Key(1) <- Key(1) * 134775813 + 1
2104
- Key(2) <- crc32(key(2),key(1) >> 24)
2105
- end update_keys
2106
-
2107
- Where crc32(old_crc,char) is a routine that given a CRC value and a
2108
- character, returns an updated CRC value after applying the CRC-32
2109
- algorithm described elsewhere in this document.
2110
-
2111
- 6.1.6 Decrypting the encryption header
2112
-
2113
- The purpose of this step is to further initialize the encryption
2114
- keys, based on random data, to render a plaintext attack on the
2115
- data ineffective.
2116
-
2117
- Read the 12-byte encryption header into Buffer, in locations
2118
- Buffer(0) thru Buffer(11).
2119
-
2120
- loop for i <- 0 to 11
2121
- C <- buffer(i) ^ decrypt_byte()
2122
- update_keys(C)
2123
- buffer(i) <- C
2124
- end loop
2125
-
2126
- Where decrypt_byte() is defined as:
2127
-
2128
- unsigned char decrypt_byte()
2129
- local unsigned short temp
2130
- temp <- Key(2) | 2
2131
- decrypt_byte <- (temp * (temp ^ 1)) >> 8
2132
- end decrypt_byte
2133
-
2134
- After the header is decrypted, the last 1 or 2 bytes in Buffer
2135
- SHOULD be the high-order word/byte of the CRC for the file being
2136
- decrypted, stored in Intel low-byte/high-byte order. Versions of
2137
- PKZIP prior to 2.0 used a 2 byte CRC check; a 1 byte CRC check is
2138
- used on versions after 2.0. This can be used to test if the password
2139
- supplied is correct or not.
2140
-
2141
- 6.1.7 Decrypting the compressed data stream
2142
-
2143
- The compressed data stream can be decrypted as follows:
2144
-
2145
- loop until done
2146
- read a character into C
2147
- Temp <- C ^ decrypt_byte()
2148
- update_keys(temp)
2149
- output Temp
2150
- end loop
2151
-
2152
-
2153
- 7.0 Strong Encryption Specification
2154
- -----------------------------------
2155
-
2156
- 7.0.1 Portions of the Strong Encryption technology defined in this
2157
- specification are covered under patents and pending patent applications.
2158
- Refer to the section in this document entitled "Incorporating
2159
- PKWARE Proprietary Technology into Your Product" for more information.
2160
-
2161
- 7.1 Strong Encryption Overview
2162
- ------------------------------
2163
-
2164
- 7.1.1 Version 5.x of this specification introduced support for strong
2165
- encryption algorithms. These algorithms can be used with either
2166
- a password or an X.509v3 digital certificate to encrypt each file.
2167
- This format specification supports either password or certificate
2168
- based encryption to meet the security needs of today, to enable
2169
- interoperability between users within both PKI and non-PKI
2170
- environments, and to ensure interoperability between different
2171
- computing platforms that are running a ZIP program.
2172
-
2173
- 7.1.2 Password based encryption is the most common form of encryption
2174
- people are familiar with. However, inherent weaknesses with
2175
- passwords (e.g. susceptibility to dictionary/brute force attack)
2176
- as well as password management and support issues make certificate
2177
- based encryption a more secure and scalable option. Industry
2178
- efforts and support are defining and moving towards more advanced
2179
- security solutions built around X.509v3 digital certificates and
2180
- Public Key Infrastructures(PKI) because of the greater scalability,
2181
- administrative options, and more robust security over traditional
2182
- password based encryption.
2183
-
2184
- 7.1.3 Most standard encryption algorithms are supported with this
2185
- specification. Reference implementations for many of these
2186
- algorithms are available from either commercial or open source
2187
- distributors. Readily available cryptographic toolkits make
2188
- implementation of the encryption features straight-forward.
2189
- This document is not intended to provide a treatise on data
2190
- encryption principles or theory. Its purpose is to document the
2191
- data structures required for implementing interoperable data
2192
- encryption within the .ZIP format. It is strongly recommended that
2193
- you have a good understanding of data encryption before reading
2194
- further.
2195
-
2196
- 7.1.4 The algorithms introduced in Version 5.0 of this specification
2197
- include:
2198
-
2199
- RC2 40 bit, 64 bit, and 128 bit
2200
- RC4 40 bit, 64 bit, and 128 bit
2201
- DES
2202
- 3DES 112 bit and 168 bit
2203
-
2204
- Version 5.1 adds support for the following:
2205
-
2206
- AES 128 bit, 192 bit, and 256 bit
2207
-
2208
-
2209
- 7.1.5 Version 6.1 introduces encryption data changes to support
2210
- interoperability with Smartcard and USB Token certificate storage
2211
- methods which do not support the OAEP strengthening standard.
2212
-
2213
- 7.1.6 Version 6.2 introduces support for encrypting metadata by compressing
2214
- and encrypting the central directory data structure to reduce information
2215
- leakage. Information leakage can occur in legacy ZIP applications
2216
- through exposure of information about a file even though that file is
2217
- stored encrypted. The information exposed consists of file
2218
- characteristics stored within the records and fields defined by this
2219
- specification. This includes data such as a file's name, its original
2220
- size, timestamp and CRC32 value.
2221
-
2222
- 7.1.7 Version 6.3 introduces support for encrypting data using the Blowfish
2223
- and Twofish algorithms. These are symmetric block ciphers developed
2224
- by Bruce Schneier. Blowfish supports using a variable length key from
2225
- 32 to 448 bits. Block size is 64 bits. Implementations SHOULD use 16
2226
- rounds and the only mode supported within ZIP files is CBC. Twofish
2227
- supports key sizes 128, 192 and 256 bits. Block size is 128 bits.
2228
- Implementations SHOULD use 16 rounds and the only mode supported within
2229
- ZIP files is CBC. Information and source code for both Blowfish and
2230
- Twofish algorithms can be found on the internet. Consult with the author
2231
- of these algorithms for information on terms or restrictions on use.
2232
-
2233
- 7.1.8 Central Directory Encryption provides greater protection against
2234
- information leakage by encrypting the Central Directory structure and
2235
- by masking key values that are replicated in the unencrypted Local
2236
- Header. ZIP compatible programs that cannot interpret an encrypted
2237
- Central Directory structure cannot rely on the data in the corresponding
2238
- Local Header for decompression information.
2239
-
2240
- 7.1.9 Extra Field records that MAY contain information about a file that SHOULD
2241
- not be exposed SHOULD NOT be stored in the Local Header and SHOULD only
2242
- be written to the Central Directory where they can be encrypted. This
2243
- design currently does not support streaming. Information in the End of
2244
- Central Directory record, the Zip64 End of Central Directory Locator,
2245
- and the Zip64 End of Central Directory records are not encrypted. Access
2246
- to view data on files within a ZIP file with an encrypted Central Directory
2247
- requires the appropriate password or private key for decryption prior to
2248
- viewing any files, or any information about the files, in the archive.
2249
-
2250
- 7.1.10 Older ZIP compatible programs not familiar with the Central Directory
2251
- Encryption feature will no longer be able to recognize the Central
2252
- Directory and MAY assume the ZIP file is corrupt. Programs that
2253
- attempt streaming access using Local Headers will see invalid
2254
- information for each file. Central Directory Encryption need not be
2255
- used for every ZIP file. Its use is recommended for greater security.
2256
- ZIP files not using Central Directory Encryption SHOULD operate as
2257
- in the past.
2258
-
2259
- 7.1.11 This strong encryption feature specification is intended to provide for
2260
- scalable, cross-platform encryption needs ranging from simple password
2261
- encryption to authenticated public/private key encryption.
2262
-
2263
- 7.1.12 Encryption provides data confidentiality and privacy. It is
2264
- recommended that you combine X.509 digital signing with encryption
2265
- to add authentication and non-repudiation.
2266
-
2267
-
2268
- 7.2 Single Password Symmetric Encryption Method
2269
- -----------------------------------------------
2270
-
2271
- 7.2.1 The Single Password Symmetric Encryption Method using strong
2272
- encryption algorithms operates similarly to the traditional
2273
- PKWARE encryption defined in this format. Additional data
2274
- structures are added to support the processing needs of the
2275
- strong algorithms.
2276
-
2277
- The Strong Encryption data structures are:
2278
-
2279
- 7.2.2 General Purpose Bits - Bits 0 and 6 of the General Purpose bit
2280
- flag in both local and central header records. Both bits set
2281
- indicates strong encryption. Bit 13, when set indicates the Central
2282
- Directory is encrypted and that selected fields in the Local Header
2283
- are masked to hide their actual value.
2284
-
2285
-
2286
- 7.2.3 Extra Field 0x0017 in central header only.
2287
-
2288
- Fields to consider in this record are:
2289
-
2290
- 7.2.3.1 Format - the data format identifier for this record. The only
2291
- value allowed at this time is the integer value 2.
2292
-
2293
- 7.2.3.2 AlgId - integer identifier of the encryption algorithm from the
2294
- following range
2295
-
2296
- 0x6601 - DES
2297
- 0x6602 - RC2 (version needed to extract < 5.2)
2298
- 0x6603 - 3DES 168
2299
- 0x6609 - 3DES 112
2300
- 0x660E - AES 128
2301
- 0x660F - AES 192
2302
- 0x6610 - AES 256
2303
- 0x6702 - RC2 (version needed to extract >= 5.2)
2304
- 0x6720 - Blowfish
2305
- 0x6721 - Twofish
2306
- 0x6801 - RC4
2307
- 0xFFFF - Unknown algorithm
2308
-
2309
- 7.2.3.3 Bitlen - Explicit bit length of key
2310
-
2311
- 32 - 448 bits
2312
-
2313
- 7.2.3.4 Flags - Processing flags needed for decryption
2314
-
2315
- 0x0001 - Password is required to decrypt
2316
- 0x0002 - Certificates only
2317
- 0x0003 - Password or certificate required to decrypt
2318
-
2319
- Values > 0x0003 reserved for certificate processing
2320
-
2321
-
2322
- 7.2.4 Decryption header record preceding compressed file data.
2323
-
2324
- -Decryption Header:
2325
-
2326
- Value Size Description
2327
- ----- ---- -----------
2328
- IVSize 2 bytes Size of initialization vector (IV)
2329
- IVData IVSize Initialization vector for this file
2330
- Size 4 bytes Size of remaining decryption header data
2331
- Format 2 bytes Format definition for this record
2332
- AlgID 2 bytes Encryption algorithm identifier
2333
- Bitlen 2 bytes Bit length of encryption key
2334
- Flags 2 bytes Processing flags
2335
- ErdSize 2 bytes Size of Encrypted Random Data
2336
- ErdData ErdSize Encrypted Random Data
2337
- Reserved1 4 bytes Reserved certificate processing data
2338
- Reserved2 (var) Reserved for certificate processing data
2339
- VSize 2 bytes Size of password validation data
2340
- VData VSize-4 Password validation data
2341
- VCRC32 4 bytes Standard ZIP CRC32 of password validation data
2342
-
2343
- 7.2.4.1 IVData - The size of the IV SHOULD match the algorithm block size.
2344
- The IVData can be completely random data. If the size of
2345
- the randomly generated data does not match the block size
2346
- it SHOULD be complemented with zero's or truncated as
2347
- necessary. If IVSize is 0,then IV = CRC32 + Uncompressed
2348
- File Size (as a 64 bit little-endian, unsigned integer value).
2349
-
2350
- 7.2.4.2 Format - the data format identifier for this record. The only
2351
- value allowed at this time is the integer value 3.
2352
-
2353
- 7.2.4.3 AlgId - integer identifier of the encryption algorithm from the
2354
- following range
2355
-
2356
- 0x6601 - DES
2357
- 0x6602 - RC2 (version needed to extract < 5.2)
2358
- 0x6603 - 3DES 168
2359
- 0x6609 - 3DES 112
2360
- 0x660E - AES 128
2361
- 0x660F - AES 192
2362
- 0x6610 - AES 256
2363
- 0x6702 - RC2 (version needed to extract >= 5.2)
2364
- 0x6720 - Blowfish
2365
- 0x6721 - Twofish
2366
- 0x6801 - RC4
2367
- 0xFFFF - Unknown algorithm
2368
-
2369
- 7.2.4.4 Bitlen - Explicit bit length of key
2370
-
2371
- 32 - 448 bits
2372
-
2373
- 7.2.4.5 Flags - Processing flags needed for decryption
2374
-
2375
- 0x0001 - Password is required to decrypt
2376
- 0x0002 - Certificates only
2377
- 0x0003 - Password or certificate required to decrypt
2378
-
2379
- Values > 0x0003 reserved for certificate processing
2380
-
2381
- 7.2.4.6 ErdData - Encrypted random data is used to store random data that
2382
- is used to generate a file session key for encrypting
2383
- each file. SHA1 is used to calculate hash data used to
2384
- derive keys. File session keys are derived from a master
2385
- session key generated from the user-supplied password.
2386
- If the Flags field in the decryption header contains
2387
- the value 0x4000, then the ErdData field MUST be
2388
- decrypted using 3DES. If the value 0x4000 is not set,
2389
- then the ErdData field MUST be decrypted using AlgId.
2390
-
2391
-
2392
- 7.2.4.7 Reserved1 - Reserved for certificate processing, if value is
2393
- zero, then Reserved2 data is absent. See the explanation
2394
- under the Certificate Processing Method for details on
2395
- this data structure.
2396
-
2397
- 7.2.4.8 Reserved2 - If present, the size of the Reserved2 data structure
2398
- is located by skipping the first 4 bytes of this field
2399
- and using the next 2 bytes as the remaining size. See
2400
- the explanation under the Certificate Processing Method
2401
- for details on this data structure.
2402
-
2403
- 7.2.4.9 VSize - This size value will always include the 4 bytes of the
2404
- VCRC32 data and will be greater than 4 bytes.
2405
-
2406
- 7.2.4.10 VData - Random data for password validation. This data is VSize
2407
- in length and VSize MUST be a multiple of the encryption
2408
- block size. VCRC32 is a checksum value of VData.
2409
- VData and VCRC32 are stored encrypted and start the
2410
- stream of encrypted data for a file.
2411
-
2412
-
2413
- 7.2.5 Useful Tips
2414
-
2415
- 7.2.5.1 Strong Encryption is always applied to a file after compression. The
2416
- block oriented algorithms all operate in Cypher Block Chaining (CBC)
2417
- mode. The block size used for AES encryption is 16. All other block
2418
- algorithms use a block size of 8. Two IDs are defined for RC2 to
2419
- account for a discrepancy found in the implementation of the RC2
2420
- algorithm in the cryptographic library on Windows XP SP1 and all
2421
- earlier versions of Windows. It is recommended that zero length files
2422
- not be encrypted, however programs SHOULD be prepared to extract them
2423
- if they are found within a ZIP file.
2424
-
2425
- 7.2.5.2 A pseudo-code representation of the encryption process is as follows:
2426
-
2427
- Password = GetUserPassword()
2428
- MasterSessionKey = DeriveKey(SHA1(Password))
2429
- RD = CryptographicStrengthRandomData()
2430
- For Each File
2431
- IV = CryptographicStrengthRandomData()
2432
- VData = CryptographicStrengthRandomData()
2433
- VCRC32 = CRC32(VData)
2434
- FileSessionKey = DeriveKey(SHA1(IV + RD)
2435
- ErdData = Encrypt(RD,MasterSessionKey,IV)
2436
- Encrypt(VData + VCRC32 + FileData, FileSessionKey,IV)
2437
- Done
2438
-
2439
- 7.2.5.3 The function names and parameter requirements will depend on
2440
- the choice of the cryptographic toolkit selected. Almost any
2441
- toolkit supporting the reference implementations for each
2442
- algorithm can be used. The RSA BSAFE(r), OpenSSL, and Microsoft
2443
- CryptoAPI libraries are all known to work well.
2444
-
2445
-
2446
- 7.3 Single Password - Central Directory Encryption
2447
- --------------------------------------------------
2448
-
2449
- 7.3.1 Central Directory Encryption is achieved within the .ZIP format by
2450
- encrypting the Central Directory structure. This encapsulates the metadata
2451
- most often used for processing .ZIP files. Additional metadata is stored for
2452
- redundancy in the Local Header for each file. The process of concealing
2453
- metadata by encrypting the Central Directory does not protect the data within
2454
- the Local Header. To avoid information leakage from the exposed metadata
2455
- in the Local Header, the fields containing information about a file are masked.
2456
-
2457
- 7.3.2 Local Header
2458
-
2459
- Masking replaces the true content of the fields for a file in the Local
2460
- Header with false information. When masked, the Local Header is not
2461
- suitable for streaming access and the options for data recovery of damaged
2462
- archives is reduced. Extra Data fields that MAY contain confidential
2463
- data SHOULD NOT be stored within the Local Header. The value set into
2464
- the Version needed to extract field SHOULD be the correct value needed to
2465
- extract the file without regard to Central Directory Encryption. The fields
2466
- within the Local Header targeted for masking when the Central Directory is
2467
- encrypted are:
2468
-
2469
- Field Name Mask Value
2470
- ------------------ ---------------------------
2471
- compression method 0
2472
- last mod file time 0
2473
- last mod file date 0
2474
- crc-32 0
2475
- compressed size 0
2476
- uncompressed size 0
2477
- file name (variable size) Base 16 value from the
2478
- range 1 - 0xFFFFFFFFFFFFFFFF
2479
- represented as a string whose
2480
- size will be set into the
2481
- file name length field
2482
-
2483
- The Base 16 value assigned as a masked file name is simply a sequentially
2484
- incremented value for each file starting with 1 for the first file.
2485
- Modifications to a ZIP file MAY cause different values to be stored for
2486
- each file. For compatibility, the file name field in the Local Header
2487
- SHOULD NOT be left blank. As of Version 6.2 of this specification,
2488
- the Compression Method and Compressed Size fields are not yet masked.
2489
- Fields having a value of 0xFFFF or 0xFFFFFFFF for the ZIP64 format
2490
- SHOULD NOT be masked.
2491
-
2492
- 7.3.3 Encrypting the Central Directory
2493
-
2494
- Encryption of the Central Directory does not include encryption of the
2495
- Central Directory Signature data, the Zip64 End of Central Directory
2496
- record, the Zip64 End of Central Directory Locator, or the End
2497
- of Central Directory record. The ZIP file comment data is never
2498
- encrypted.
2499
-
2500
- Before encrypting the Central Directory, it MAY optionally be compressed.
2501
- Compression is not required, but for storage efficiency it is assumed
2502
- this structure will be compressed before encrypting. Similarly, this
2503
- specification supports compressing the Central Directory without
2504
- requiring that it also be encrypted. Early implementations of this
2505
- feature will assume the encryption method applied to files matches the
2506
- encryption applied to the Central Directory.
2507
-
2508
- Encryption of the Central Directory is done in a manner similar to
2509
- that of file encryption. The encrypted data is preceded by a
2510
- decryption header. The decryption header is known as the Archive
2511
- Decryption Header. The fields of this record are identical to
2512
- the decryption header preceding each encrypted file. The location
2513
- of the Archive Decryption Header is determined by the value in the
2514
- Start of the Central Directory field in the Zip64 End of Central
2515
- Directory record. When the Central Directory is encrypted, the
2516
- Zip64 End of Central Directory record will always be present.
2517
-
2518
- The layout of the Zip64 End of Central Directory record for all
2519
- versions starting with 6.2 of this specification will follow the
2520
- Version 2 format. The Version 2 format is as follows:
2521
-
2522
- The leading fixed size fields within the Version 1 format for this
2523
- record remain unchanged. The record signature for both Version 1
2524
- and Version 2 will be 0x06064b50. Immediately following the last
2525
- byte of the field known as the Offset of Start of Central
2526
- Directory With Respect to the Starting Disk Number will begin the
2527
- new fields defining Version 2 of this record.
2528
-
2529
- 7.3.4 New fields for Version 2
2530
-
2531
- Note: all fields stored in Intel low-byte/high-byte order.
2532
-
2533
- Value Size Description
2534
- ----- ---- -----------
2535
- Compression Method 2 bytes Method used to compress the
2536
- Central Directory
2537
- Compressed Size 8 bytes Size of the compressed data
2538
- Original Size 8 bytes Original uncompressed size
2539
- AlgId 2 bytes Encryption algorithm ID
2540
- BitLen 2 bytes Encryption key length
2541
- Flags 2 bytes Encryption flags
2542
- HashID 2 bytes Hash algorithm identifier
2543
- Hash Length 2 bytes Length of hash data
2544
- Hash Data (variable) Hash data
2545
-
2546
- The Compression Method accepts the same range of values as the
2547
- corresponding field in the Central Header.
2548
-
2549
- The Compressed Size and Original Size values will not include the
2550
- data of the Central Directory Signature which is compressed or
2551
- encrypted.
2552
-
2553
- The AlgId, BitLen, and Flags fields accept the same range of values
2554
- the corresponding fields within the 0x0017 record.
2555
-
2556
- Hash ID identifies the algorithm used to hash the Central Directory
2557
- data. This data does not have to be hashed, in which case the
2558
- values for both the HashID and Hash Length will be 0. Possible
2559
- values for HashID are:
2560
-
2561
- Value Algorithm
2562
- ------ ---------
2563
- 0x0000 none
2564
- 0x0001 CRC32
2565
- 0x8003 MD5
2566
- 0x8004 SHA1
2567
- 0x8007 RIPEMD160
2568
- 0x800C SHA256
2569
- 0x800D SHA384
2570
- 0x800E SHA512
2571
-
2572
- 7.3.5 When the Central Directory data is signed, the same hash algorithm
2573
- used to hash the Central Directory for signing SHOULD be used.
2574
- This is recommended for processing efficiency, however, it is
2575
- permissible for any of the above algorithms to be used independent
2576
- of the signing process.
2577
-
2578
- The Hash Data will contain the hash data for the Central Directory.
2579
- The length of this data will vary depending on the algorithm used.
2580
-
2581
- The Version Needed to Extract SHOULD be set to 62.
2582
-
2583
- The value for the Total Number of Entries on the Current Disk will
2584
- be 0. These records will no longer support random access when
2585
- encrypting the Central Directory.
2586
-
2587
- 7.3.6 When the Central Directory is compressed and/or encrypted, the
2588
- End of Central Directory record will store the value 0xFFFFFFFF
2589
- as the value for the Total Number of Entries in the Central
2590
- Directory. The value stored in the Total Number of Entries in
2591
- the Central Directory on this Disk field will be 0. The actual
2592
- values will be stored in the equivalent fields of the Zip64
2593
- End of Central Directory record.
2594
-
2595
- 7.3.7 Decrypting and decompressing the Central Directory is accomplished
2596
- in the same manner as decrypting and decompressing a file.
2597
-
2598
- 7.4 Certificate Processing Method
2599
- ---------------------------------
2600
-
2601
- The Certificate Processing Method for ZIP file encryption
2602
- defines the following additional data fields:
2603
-
2604
- 7.4.1 Certificate Flag Values
2605
-
2606
- Additional processing flags that can be present in the Flags field of both
2607
- the 0x0017 field of the central directory Extra Field and the Decryption
2608
- header record preceding compressed file data are:
2609
-
2610
- 0x0007 - reserved for future use
2611
- 0x000F - reserved for future use
2612
- 0x0100 - Indicates non-OAEP key wrapping was used. If this
2613
- this field is set, the version needed to extract MUST
2614
- be at least 61. This means OAEP key wrapping is not
2615
- used when generating a Master Session Key using
2616
- ErdData.
2617
- 0x4000 - ErdData MUST be decrypted using 3DES-168, otherwise use the
2618
- same algorithm used for encrypting the file contents.
2619
- 0x8000 - reserved for future use
2620
-
2621
-
2622
- 7.4.2 CertData - Extra Field 0x0017 record certificate data structure
2623
-
2624
- The data structure used to store certificate data within the section
2625
- of the Extra Field defined by the CertData field of the 0x0017
2626
- record are as shown:
2627
-
2628
- Value Size Description
2629
- ----- ---- -----------
2630
- RCount 4 bytes Number of recipients.
2631
- HashAlg 2 bytes Hash algorithm identifier
2632
- HSize 2 bytes Hash size
2633
- SRList (var) Simple list of recipients hashed public keys
2634
-
2635
-
2636
- RCount This defines the number intended recipients whose
2637
- public keys were used for encryption. This identifies
2638
- the number of elements in the SRList.
2639
-
2640
- HashAlg This defines the hash algorithm used to calculate
2641
- the public key hash of each public key used
2642
- for encryption. This field currently supports
2643
- only the following value for SHA-1
2644
-
2645
- 0x8004 - SHA1
2646
-
2647
- HSize This defines the size of a hashed public key.
2648
-
2649
- SRList This is a variable length list of the hashed
2650
- public keys for each intended recipient. Each
2651
- element in this list is HSize. The total size of
2652
- SRList is determined using RCount * HSize.
2653
-
2654
-
2655
- 7.4.3 Reserved1 - Certificate Decryption Header Reserved1 Data
2656
-
2657
- Value Size Description
2658
- ----- ---- -----------
2659
- RCount 4 bytes Number of recipients.
2660
-
2661
- RCount This defines the number intended recipients whose
2662
- public keys were used for encryption. This defines
2663
- the number of elements in the REList field defined below.
2664
-
2665
-
2666
- 7.4.4 Reserved2 - Certificate Decryption Header Reserved2 Data Structures
2667
-
2668
-
2669
- Value Size Description
2670
- ----- ---- -----------
2671
- HashAlg 2 bytes Hash algorithm identifier
2672
- HSize 2 bytes Hash size
2673
- REList (var) List of recipient data elements
2674
-
2675
-
2676
- HashAlg This defines the hash algorithm used to calculate
2677
- the public key hash of each public key used
2678
- for encryption. This field currently supports
2679
- only the following value for SHA-1
2680
-
2681
- 0x8004 - SHA1
2682
-
2683
- HSize This defines the size of a hashed public key
2684
- defined in REHData.
2685
-
2686
- REList This is a variable length of list of recipient data.
2687
- Each element in this list consists of a Recipient
2688
- Element data structure as follows:
2689
-
2690
-
2691
- Recipient Element (REList) Data Structure:
2692
-
2693
- Value Size Description
2694
- ----- ---- -----------
2695
- RESize 2 bytes Size of REHData + REKData
2696
- REHData HSize Hash of recipients public key
2697
- REKData (var) Simple key blob
2698
-
2699
-
2700
- RESize This defines the size of an individual REList
2701
- element. This value is the combined size of the
2702
- REHData field + REKData field. REHData is defined by
2703
- HSize. REKData is variable and can be calculated
2704
- for each REList element using RESize and HSize.
2705
-
2706
- REHData Hashed public key for this recipient.
2707
-
2708
- REKData Simple Key Blob. The format of this data structure
2709
- is identical to that defined in the Microsoft
2710
- CryptoAPI and generated using the CryptExportKey()
2711
- function. The version of the Simple Key Blob
2712
- supported at this time is 0x02 as defined by
2713
- Microsoft.
2714
-
2715
- 7.5 Certificate Processing - Central Directory Encryption
2716
- ---------------------------------------------------------
2717
-
2718
- 7.5.1 Central Directory Encryption using Digital Certificates will
2719
- operate in a manner similar to that of Single Password Central
2720
- Directory Encryption. This record will only be present when there
2721
- is data to place into it. Currently, data is placed into this
2722
- record when digital certificates are used for either encrypting
2723
- or signing the files within a ZIP file. When only password
2724
- encryption is used with no certificate encryption or digital
2725
- signing, this record is not currently needed. When present, this
2726
- record will appear before the start of the actual Central Directory
2727
- data structure and will be located immediately after the Archive
2728
- Decryption Header if the Central Directory is encrypted.
2729
-
2730
- 7.5.2 The Archive Extra Data record will be used to store the following
2731
- information. Additional data MAY be added in future versions.
2732
-
2733
- Extra Data Fields:
2734
-
2735
- 0x0014 - PKCS#7 Store for X.509 Certificates
2736
- 0x0016 - X.509 Certificate ID and Signature for central directory
2737
- 0x0019 - PKCS#7 Encryption Recipient Certificate List
2738
-
2739
- The 0x0014 and 0x0016 Extra Data records that otherwise would be
2740
- located in the first record of the Central Directory for digital
2741
- certificate processing. When encrypting or compressing the Central
2742
- Directory, the 0x0014 and 0x0016 records MUST be located in the
2743
- Archive Extra Data record and they SHOULD NOT remain in the first
2744
- Central Directory record. The Archive Extra Data record will also
2745
- be used to store the 0x0019 data.
2746
-
2747
- 7.5.3 When present, the size of the Archive Extra Data record will be
2748
- included in the size of the Central Directory. The data of the
2749
- Archive Extra Data record will also be compressed and encrypted
2750
- along with the Central Directory data structure.
2751
-
2752
- 7.6 Certificate Processing Differences
2753
- --------------------------------------
2754
-
2755
- 7.6.1 The Certificate Processing Method of encryption differs from the
2756
- Single Password Symmetric Encryption Method as follows. Instead
2757
- of using a user-defined password to generate a master session key,
2758
- cryptographically random data is used. The key material is then
2759
- wrapped using standard key-wrapping techniques. This key material
2760
- is wrapped using the public key of each recipient that will need
2761
- to decrypt the file using their corresponding private key.
2762
-
2763
- 7.6.2 This specification currently assumes digital certificates will follow
2764
- the X.509 V3 format for 1024 bit and higher RSA format digital
2765
- certificates. Implementation of this Certificate Processing Method
2766
- requires supporting logic for key access and management. This logic
2767
- is outside the scope of this specification.
2768
-
2769
- 7.7 OAEP Processing with Certificate-based Encryption
2770
- -----------------------------------------------------
2771
-
2772
- 7.7.1 OAEP stands for Optimal Asymmetric Encryption Padding. It is a
2773
- strengthening technique used for small encoded items such as decryption
2774
- keys. This is commonly applied in cryptographic key-wrapping techniques
2775
- and is supported by PKCS #1. Versions 5.0 and 6.0 of this specification
2776
- were designed to support OAEP key-wrapping for certificate-based
2777
- decryption keys for additional security.
2778
-
2779
- 7.7.2 Support for private keys stored on Smartcards or Tokens introduced
2780
- a conflict with this OAEP logic. Most card and token products do
2781
- not support the additional strengthening applied to OAEP key-wrapped
2782
- data. In order to resolve this conflict, versions 6.1 and above of this
2783
- specification will no longer support OAEP when encrypting using
2784
- digital certificates.
2785
-
2786
- 7.7.3 Versions of PKZIP available during initial development of the
2787
- certificate processing method set a value of 61 into the
2788
- version needed to extract field for a file. This indicates that
2789
- non-OAEP key wrapping is used. This affects certificate encryption
2790
- only, and password encryption functions SHOULD NOT be affected by
2791
- this value. This means values of 61 MAY be found on files encrypted
2792
- with certificates only, or on files encrypted with both password
2793
- encryption and certificate encryption. Files encrypted with both
2794
- methods can safely be decrypted using the password methods documented.
2795
-
2796
- 7.8 Additional Encryption/Decryption Data Records
2797
- -----------------------------------------------------
2798
-
2799
- 7.8.1 Additional information MAY be stored within a ZIP file in support
2800
- of the strong password and certificate encryption methods defined above.
2801
- These include, but are not limited to the following record types.
2802
-
2803
- 0x0021 Policy Decryption Key Record
2804
- 0x0022 Smartcrypt Key Provider Record
2805
- 0x0023 Smartcrypt Policy Key Data Record
2806
-
2807
- 8.0 Splitting and Spanning ZIP files
2808
- -------------------------------------
2809
-
2810
- 8.1 Spanned ZIP files
2811
-
2812
- 8.1.1 Spanning is the process of segmenting a ZIP file across
2813
- multiple removable media. This support has typically only
2814
- been provided for DOS formatted floppy diskettes.
2815
-
2816
- 8.2 Split ZIP files
2817
-
2818
- 8.2.1 File splitting is a newer derivation of spanning.
2819
- Splitting follows the same segmentation process as
2820
- spanning, however, it does not require writing each
2821
- segment to a unique removable medium and instead supports
2822
- placing all pieces onto local or non-removable locations
2823
- such as file systems, local drives, folders, etc.
2824
-
2825
- 8.3 File Naming Differences
2826
-
2827
- 8.3.1 A key difference between spanned and split ZIP files is
2828
- that all pieces of a spanned ZIP file have the same name.
2829
- Since each piece is written to a separate volume, no name
2830
- collisions occur and each segment can reuse the original
2831
- .ZIP file name given to the archive.
2832
-
2833
- 8.3.2 Sequence ordering for DOS spanned archives uses the DOS
2834
- volume label to determine segment numbers. Volume labels
2835
- for each segment are written using the form PKBACK#xxx,
2836
- where xxx is the segment number written as a decimal
2837
- value from 001 - nnn.
2838
-
2839
- 8.3.3 Split ZIP files are typically written to the same location
2840
- and are subject to name collisions if the spanned name
2841
- format is used since each segment will reside on the same
2842
- drive. To avoid name collisions, split archives are named
2843
- as follows.
2844
-
2845
- Segment 1 = filename.z01
2846
- Segment n-1 = filename.z(n-1)
2847
- Segment n = filename.zip
2848
-
2849
- 8.3.4 The .ZIP extension is used on the last segment to support
2850
- quickly reading the central directory. The segment number
2851
- n SHOULD be a decimal value.
2852
-
2853
- 8.4 Spanned Self-extracting ZIP Files
2854
-
2855
- 8.4.1 Spanned ZIP files MAY be PKSFX Self-extracting ZIP files.
2856
- PKSFX files MAY also be split, however, in this case
2857
- the first segment MUST be named filename.exe. The first
2858
- segment of a split PKSFX archive MUST be large enough to
2859
- include the entire executable program.
2860
-
2861
- 8.5 Capacities and Markers
2862
-
2863
- 8.5.1 Capacities for split archives are as follows:
2864
-
2865
- Maximum number of segments = 4,294,967,295 - 1
2866
- Maximum .ZIP segment size = 4,294,967,295 bytes
2867
- Minimum segment size = 64K
2868
- Maximum PKSFX segment size = 2,147,483,647 bytes
2869
-
2870
- 8.5.2 Segment sizes MAY be different however by convention, all
2871
- segment sizes SHOULD be the same with the exception of the
2872
- last, which MAY be smaller. Local and central directory
2873
- header records MUST NOT be split across a segment boundary.
2874
- When writing a header record, if the number of bytes remaining
2875
- within a segment is less than the size of the header record,
2876
- end the current segment and write the header at the start
2877
- of the next segment. The central directory MAY span segment
2878
- boundaries, but no single record in the central directory
2879
- SHOULD be split across segments.
2880
-
2881
- 8.5.3 Spanned/Split archives created using PKZIP for Windows
2882
- (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
2883
- or PKZIP Explorer will include a special spanning
2884
- signature as the first 4 bytes of the first segment of
2885
- the archive. This signature (0x08074b50) will be
2886
- followed immediately by the local header signature for
2887
- the first file in the archive.
2888
-
2889
- 8.5.4 A special spanning marker MAY also appear in spanned/split
2890
- archives if the spanning or splitting process starts but
2891
- only requires one segment. In this case the 0x08074b50
2892
- signature will be replaced with the temporary spanning
2893
- marker signature of 0x30304b50. Split archives can
2894
- only be uncompressed by other versions of PKZIP that
2895
- know how to create a split archive.
2896
-
2897
- 8.5.5 The signature value 0x08074b50 is also used by some
2898
- ZIP implementations as a marker for the Data Descriptor
2899
- record. Conflict in this alternate assignment can be
2900
- avoided by ensuring the position of the signature
2901
- within the ZIP file to determine the use for which it
2902
- is intended.
2903
-
2904
- 9.0 Change Process
2905
- ------------------
2906
-
2907
- 9.1 In order for the .ZIP file format to remain a viable technology, this
2908
- specification SHOULD be considered as open for periodic review and
2909
- revision. Although this format was originally designed with a
2910
- certain level of extensibility, not all changes in technology
2911
- (present or future) were or will be necessarily considered in its
2912
- design.
2913
-
2914
- 9.2 If your application requires new definitions to the
2915
- extensible sections in this format, or if you would like to
2916
- submit new data structures or new capabilities, please forward
2917
- your request to zipformat@pkware.com. All submissions will be
2918
- reviewed by the ZIP File Specification Committee for possible
2919
- inclusion into future versions of this specification.
2920
-
2921
- 9.3 Periodic revisions to this specification will be published as
2922
- DRAFT or as FINAL status to ensure interoperability. We encourage
2923
- comments and feedback that MAY help improve clarity or content.
2924
-
2925
-
2926
- 10.0 Incorporating PKWARE Proprietary Technology into Your Product
2927
- ------------------------------------------------------------------
2928
-
2929
- 10.1 The Use or Implementation in a product of APPNOTE technological
2930
- components pertaining to either strong encryption or patching requires
2931
- a separate, executed license agreement from PKWARE. Please contact
2932
- PKWARE at zipformat@pkware.com or +1-414-289-9788 with regard to
2933
- acquiring such a license.
2934
-
2935
- 10.2 Additional information regarding PKWARE proprietary technology is
2936
- available at http://www.pkware.com/appnote.
2937
-
2938
- 11.0 Acknowledgements
2939
- ---------------------
2940
-
2941
- In addition to the above mentioned contributors to PKZIP and PKUNZIP,
2942
- PKWARE would like to extend special thanks to Robert Mahoney for
2943
- suggesting the extension .ZIP for this software.
2944
-
2945
- 12.0 References
2946
- ---------------
2947
-
2948
- Fiala, Edward R., and Greene, Daniel H., "Data compression with
2949
- finite windows", Communications of the ACM, Volume 32, Number 4,
2950
- April 1989, pages 490-505.
2951
-
2952
- Held, Gilbert, "Data Compression, Techniques and Applications,
2953
- Hardware and Software Considerations", John Wiley & Sons, 1987.
2954
-
2955
- Huffman, D.A., "A method for the construction of minimum-redundancy
2956
- codes", Proceedings of the IRE, Volume 40, Number 9, September 1952,
2957
- pages 1098-1101.
2958
-
2959
- Nelson, Mark, "LZW Data Compression", Dr. Dobbs Journal, Volume 14,
2960
- Number 10, October 1989, pages 29-37.
2961
-
2962
- Nelson, Mark, "The Data Compression Book", M&T Books, 1991.
2963
-
2964
- Storer, James A., "Data Compression, Methods and Theory",
2965
- Computer Science Press, 1988
2966
-
2967
- Welch, Terry, "A Technique for High-Performance Data Compression",
2968
- IEEE Computer, Volume 17, Number 6, June 1984, pages 8-19.
2969
-
2970
- Ziv, J. and Lempel, A., "A universal algorithm for sequential data
2971
- compression", Communications of the ACM, Volume 30, Number 6,
2972
- June 1987, pages 520-540.
2973
-
2974
- Ziv, J. and Lempel, A., "Compression of individual sequences via
2975
- variable-rate coding", IEEE Transactions on Information Theory,
2976
- Volume 24, Number 5, September 1978, pages 530-536.
2977
-
2978
-
2979
- APPENDIX A - AS/400 Extra Field (0x0065) Attribute Definitions
2980
- --------------------------------------------------------------
2981
-
2982
- A.1 Field Definition Structure:
2983
-
2984
- a. field length including length 2 bytes Big Endian
2985
- b. field code 2 bytes
2986
- c. data x bytes
2987
-
2988
- A.2 Field Code Description
2989
-
2990
- 4001 Source type i.e. CLP etc
2991
- 4002 The text description of the library
2992
- 4003 The text description of the file
2993
- 4004 The text description of the member
2994
- 4005 x'F0' or 0 is PF-DTA, x'F1' or 1 is PF_SRC
2995
- 4007 Database Type Code 1 byte
2996
- 4008 Database file and fields definition
2997
- 4009 GZIP file type 2 bytes
2998
- 400B IFS code page 2 bytes
2999
- 400C IFS Time of last file status change 4 bytes
3000
- 400D IFS Access Time 4 bytes
3001
- 400E IFS Modification time 4 bytes
3002
- 005C Length of the records in the file 2 bytes
3003
- 0068 GZIP two words 8 bytes
3004
-
3005
- APPENDIX B - z/OS Extra Field (0x0065) Attribute Definitions
3006
- ------------------------------------------------------------
3007
-
3008
- B.1 Field Definition Structure:
3009
-
3010
- a. field length including length 2 bytes Big Endian
3011
- b. field code 2 bytes
3012
- c. data x bytes
3013
-
3014
- B.2 Field Code Description
3015
-
3016
- 0001 File Type 2 bytes
3017
- 0002 NonVSAM Record Format 1 byte
3018
- 0003 Reserved
3019
- 0004 NonVSAM Block Size 2 bytes Big Endian
3020
- 0005 Primary Space Allocation 3 bytes Big Endian
3021
- 0006 Secondary Space Allocation 3 bytes Big Endian
3022
- 0007 Space Allocation Type1 byte flag
3023
- 0008 Modification Date Retired with PKZIP 5.0 +
3024
- 0009 Expiration Date Retired with PKZIP 5.0 +
3025
- 000A PDS Directory Block Allocation 3 bytes Big Endian binary value
3026
- 000B NonVSAM Volume List variable
3027
- 000C UNIT Reference Retired with PKZIP 5.0 +
3028
- 000D DF/SMS Management Class 8 bytes EBCDIC Text Value
3029
- 000E DF/SMS Storage Class 8 bytes EBCDIC Text Value
3030
- 000F DF/SMS Data Class 8 bytes EBCDIC Text Value
3031
- 0010 PDS/PDSE Member Info. 30 bytes
3032
- 0011 VSAM sub-filetype 2 bytes
3033
- 0012 VSAM LRECL 13 bytes EBCDIC "(num_avg num_max)"
3034
- 0013 VSAM Cluster Name Retired with PKZIP 5.0 +
3035
- 0014 VSAM KSDS Key Information 13 bytes EBCDIC "(num_length num_position)"
3036
- 0015 VSAM Average LRECL 5 bytes EBCDIC num_value padded with blanks
3037
- 0016 VSAM Maximum LRECL 5 bytes EBCDIC num_value padded with blanks
3038
- 0017 VSAM KSDS Key Length 5 bytes EBCDIC num_value padded with blanks
3039
- 0018 VSAM KSDS Key Position 5 bytes EBCDIC num_value padded with blanks
3040
- 0019 VSAM Data Name 1-44 bytes EBCDIC text string
3041
- 001A VSAM KSDS Index Name 1-44 bytes EBCDIC text string
3042
- 001B VSAM Catalog Name 1-44 bytes EBCDIC text string
3043
- 001C VSAM Data Space Type 9 bytes EBCDIC text string
3044
- 001D VSAM Data Space Primary 9 bytes EBCDIC num_value left-justified
3045
- 001E VSAM Data Space Secondary 9 bytes EBCDIC num_value left-justified
3046
- 001F VSAM Data Volume List variable EBCDIC text list of 6-character Volume IDs
3047
- 0020 VSAM Data Buffer Space 8 bytes EBCDIC num_value left-justified
3048
- 0021 VSAM Data CISIZE 5 bytes EBCDIC num_value left-justified
3049
- 0022 VSAM Erase Flag 1 byte flag
3050
- 0023 VSAM Free CI % 3 bytes EBCDIC num_value left-justified
3051
- 0024 VSAM Free CA % 3 bytes EBCDIC num_value left-justified
3052
- 0025 VSAM Index Volume List variable EBCDIC text list of 6-character Volume IDs
3053
- 0026 VSAM Ordered Flag 1 byte flag
3054
- 0027 VSAM REUSE Flag 1 byte flag
3055
- 0028 VSAM SPANNED Flag 1 byte flag
3056
- 0029 VSAM Recovery Flag 1 byte flag
3057
- 002A VSAM WRITECHK Flag 1 byte flag
3058
- 002B VSAM Cluster/Data SHROPTS 3 bytes EBCDIC "n,y"
3059
- 002C VSAM Index SHROPTS 3 bytes EBCDIC "n,y"
3060
- 002D VSAM Index Space Type 9 bytes EBCDIC text string
3061
- 002E VSAM Index Space Primary 9 bytes EBCDIC num_value left-justified
3062
- 002F VSAM Index Space Secondary 9 bytes EBCDIC num_value left-justified
3063
- 0030 VSAM Index CISIZE 5 bytes EBCDIC num_value left-justified
3064
- 0031 VSAM Index IMBED 1 byte flag
3065
- 0032 VSAM Index Ordered Flag 1 byte flag
3066
- 0033 VSAM REPLICATE Flag 1 byte flag
3067
- 0034 VSAM Index REUSE Flag 1 byte flag
3068
- 0035 VSAM Index WRITECHK Flag 1 byte flag Retired with PKZIP 5.0 +
3069
- 0036 VSAM Owner 8 bytes EBCDIC text string
3070
- 0037 VSAM Index Owner 8 bytes EBCDIC text string
3071
- 0038 Reserved
3072
- 0039 Reserved
3073
- 003A Reserved
3074
- 003B Reserved
3075
- 003C Reserved
3076
- 003D Reserved
3077
- 003E Reserved
3078
- 003F Reserved
3079
- 0040 Reserved
3080
- 0041 Reserved
3081
- 0042 Reserved
3082
- 0043 Reserved
3083
- 0044 Reserved
3084
- 0045 Reserved
3085
- 0046 Reserved
3086
- 0047 Reserved
3087
- 0048 Reserved
3088
- 0049 Reserved
3089
- 004A Reserved
3090
- 004B Reserved
3091
- 004C Reserved
3092
- 004D Reserved
3093
- 004E Reserved
3094
- 004F Reserved
3095
- 0050 Reserved
3096
- 0051 Reserved
3097
- 0052 Reserved
3098
- 0053 Reserved
3099
- 0054 Reserved
3100
- 0055 Reserved
3101
- 0056 Reserved
3102
- 0057 Reserved
3103
- 0058 PDS/PDSE Member TTR Info. 6 bytes Big Endian
3104
- 0059 PDS 1st LMOD Text TTR 3 bytes Big Endian
3105
- 005A PDS LMOD EP Rec # 4 bytes Big Endian
3106
- 005B Reserved
3107
- 005C Max Length of records 2 bytes Big Endian
3108
- 005D PDSE Flag 1 byte flag
3109
- 005E Reserved
3110
- 005F Reserved
3111
- 0060 Reserved
3112
- 0061 Reserved
3113
- 0062 Reserved
3114
- 0063 Reserved
3115
- 0064 Reserved
3116
- 0065 Last Date Referenced 4 bytes Packed Hex "yyyymmdd"
3117
- 0066 Date Created 4 bytes Packed Hex "yyyymmdd"
3118
- 0068 GZIP two words 8 bytes
3119
- 0071 Extended NOTE Location 12 bytes Big Endian
3120
- 0072 Archive device UNIT 6 bytes EBCDIC
3121
- 0073 Archive 1st Volume 6 bytes EBCDIC
3122
- 0074 Archive 1st VOL File Seq# 2 bytes Binary
3123
- 0075 Native I/O Flags 2 bytes
3124
- 0081 Unix File Type 1 byte enumerated
3125
- 0082 Unix File Format 1 byte enumerated
3126
- 0083 Unix File Character Set Tag Info 4 bytes
3127
- 0090 ZIP Environmental Processing Info 4 bytes
3128
- 0091 EAV EATTR Flags 1 byte
3129
- 0092 DSNTYPE Flags 1 byte
3130
- 0093 Total Space Allocation (Cyls) 4 bytes Big Endian
3131
- 009D NONVSAM DSORG 2 bytes
3132
- 009E Program Virtual Object Info 3 bytes
3133
- 009F Encapsulated file Info 9 bytes
3134
- 400C Unix File Creation Time 4 bytes
3135
- 400D Unix File Access Time 4 bytes
3136
- 400E Unix File Modification time 4 bytes
3137
- 4101 IBMCMPSC Compression Info variable
3138
- 4102 IBMCMPSC Compression Size 8 bytes Big Endian
3139
-
3140
- APPENDIX C - Zip64 Extensible Data Sector Mappings
3141
- ---------------------------------------------------
3142
-
3143
- -Z390 Extra Field:
3144
-
3145
- The following is the general layout of the attributes for the
3146
- ZIP 64 "extra" block for extended tape operations.
3147
-
3148
- Note: some fields stored in Big Endian format. All text is
3149
- in EBCDIC format unless otherwise specified.
3150
-
3151
- Value Size Description
3152
- ----- ---- -----------
3153
- (Z390) 0x0065 2 bytes Tag for this "extra" block type
3154
- Size 4 bytes Size for the following data block
3155
- Tag 4 bytes EBCDIC "Z390"
3156
- Length71 2 bytes Big Endian
3157
- Subcode71 2 bytes Enote type code
3158
- FMEPos 1 byte
3159
- Length72 2 bytes Big Endian
3160
- Subcode72 2 bytes Unit type code
3161
- Unit 1 byte Unit
3162
- Length73 2 bytes Big Endian
3163
- Subcode73 2 bytes Volume1 type code
3164
- FirstVol 1 byte Volume
3165
- Length74 2 bytes Big Endian
3166
- Subcode74 2 bytes FirstVol file sequence
3167
- FileSeq 2 bytes Sequence
3168
-
3169
- APPENDIX D - Language Encoding (EFS)
3170
- ------------------------------------
3171
-
3172
- D.1 The ZIP format has historically supported only the original IBM PC character
3173
- encoding set, commonly referred to as IBM Code Page 437. This limits storing
3174
- file name characters to only those within the original MS-DOS range of values
3175
- and does not properly support file names in other character encodings, or
3176
- languages. To address this limitation, this specification will support the
3177
- following change.
3178
-
3179
- D.2 If general purpose bit 11 is unset, the file name and comment SHOULD conform
3180
- to the original ZIP character encoding. If general purpose bit 11 is set, the
3181
- filename and comment MUST support The Unicode Standard, Version 4.1.0 or
3182
- greater using the character encoding form defined by the UTF-8 storage
3183
- specification. The Unicode Standard is published by the The Unicode
3184
- Consortium (www.unicode.org). UTF-8 encoded data stored within ZIP files
3185
- is expected to not include a byte order mark (BOM).
3186
-
3187
- D.3 Applications MAY choose to supplement this file name storage through the use
3188
- of the 0x0008 Extra Field. Storage for this optional field is currently
3189
- undefined, however it will be used to allow storing extended information
3190
- on source or target encoding that MAY further assist applications with file
3191
- name, or file content encoding tasks. Please contact PKWARE with any
3192
- requirements on how this field SHOULD be used.
3193
-
3194
- D.4 The 0x0008 Extra Field storage MAY be used with either setting for general
3195
- purpose bit 11. Examples of the intended usage for this field is to store
3196
- whether "modified-UTF-8" (JAVA) is used, or UTF-8-MAC. Similarly, other
3197
- commonly used character encoding (code page) designations can be indicated
3198
- through this field. Formalized values for use of the 0x0008 record remain
3199
- undefined at this time. The definition for the layout of the 0x0008 field
3200
- will be published when available. Use of the 0x0008 Extra Field provides
3201
- for storing data within a ZIP file in an encoding other than IBM Code
3202
- Page 437 or UTF-8.
3203
-
3204
- D.5 General purpose bit 11 will not imply any encoding of file content or
3205
- password. Values defining character encoding for file content or
3206
- password MUST be stored within the 0x0008 Extended Language Encoding
3207
- Extra Field.
3208
-
3209
- D.6 Ed Gordon of the Info-ZIP group has defined a pair of "extra field" records
3210
- that can be used to store UTF-8 file name and file comment fields. These
3211
- records can be used for cases when the general purpose bit 11 method
3212
- for storing UTF-8 data in the standard file name and comment fields is
3213
- not desirable. A common case for this alternate method is if backward
3214
- compatibility with older programs is required.
3215
-
3216
- D.7 Definitions for the record structure of these fields are included above
3217
- in the section on 3rd party mappings for "extra field" records. These
3218
- records are identified by Header ID's 0x6375 (Info-ZIP Unicode Comment
3219
- Extra Field) and 0x7075 (Info-ZIP Unicode Path Extra Field).
3220
-
3221
- D.8 The choice of which storage method to use when writing a ZIP file is left
3222
- to the implementation. Developers SHOULD expect that a ZIP file MAY
3223
- contain either method and SHOULD provide support for reading data in
3224
- either format. Use of general purpose bit 11 reduces storage requirements
3225
- for file name data by not requiring additional "extra field" data for
3226
- each file, but can result in older ZIP programs not being able to extract
3227
- files. Use of the 0x6375 and 0x7075 records will result in a ZIP file
3228
- that SHOULD always be readable by older ZIP programs, but requires more
3229
- storage per file to write file name and/or file comment fields.
3230
-
3231
- APPENDIX E - AE-x encryption marker
3232
- -----------------------------------
3233
-
3234
- E.1 AE-x defines an alternate password-based encryption method used
3235
- in ZIP files that is based on a file encryption utility developed by
3236
- Dr. Brian Gladman. Information on Dr. Gladman's method is available at
3237
-
3238
- http://www.gladman.me.uk/cryptography_technology/fileencrypt/
3239
-
3240
- E.2 AE-x uses AES with CTR (counter mode) and HMAC-SHA1. It defines
3241
- encryption using key sizes of 128 bits or 256 bits. It does not
3242
- restrict support for decrypting 192 bits.
3243
-
3244
- E.3 This method uses the standard ZIP encryption bit (bit 0)
3245
- of the general purpose bit flag (section 4.4.4) to indicate a
3246
- file is encrypted.
3247
-
3248
- E.4 The compression method field (section 4.4.5) is set to 99
3249
- to indicate a file has been encrypted using this method.
3250
-
3251
- E.5 The actual compression method is stored in an extra field
3252
- structure identified by a Header ID of 0x9901. Information on this
3253
- record structure can be found at http://www.winzip.com/aes_info.htm.
3254
-
3255
- E.6 Two versions are defined for the 0x9901 structure.
3256
-
3257
- E.6.1 Version 1 stores the file CRC value in the CRC-32 field
3258
- (section 4.4.7).
3259
-
3260
- E.6.2 Version 2 stores a value of 0 in the CRC-32 field.