libmspack 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +5 -0
- data/.yardopts +1 -0
- data/Gemfile +4 -0
- data/README.md +75 -0
- data/Rakefile +22 -0
- data/UNLICENSE +24 -0
- data/ext/Rakefile +16 -0
- data/ext/i386-windows/libmspack.dll +0 -0
- data/ext/libmspack/AUTHORS +12 -0
- data/ext/libmspack/COPYING.LIB +504 -0
- data/ext/libmspack/ChangeLog +491 -0
- data/ext/libmspack/Makefile.am +100 -0
- data/ext/libmspack/NEWS +0 -0
- data/ext/libmspack/README +130 -0
- data/ext/libmspack/TODO +8 -0
- data/ext/libmspack/cleanup.sh +9 -0
- data/ext/libmspack/configure.ac +50 -0
- data/ext/libmspack/debian/changelog +6 -0
- data/ext/libmspack/debian/control +14 -0
- data/ext/libmspack/debian/rules +101 -0
- data/ext/libmspack/doc/Doxyfile.in +22 -0
- data/ext/libmspack/doc/Makefile.in +14 -0
- data/ext/libmspack/doc/szdd_kwaj_format.html +331 -0
- data/ext/libmspack/libmspack.pc.in +10 -0
- data/ext/libmspack/mspack/cab.h +127 -0
- data/ext/libmspack/mspack/cabc.c +24 -0
- data/ext/libmspack/mspack/cabd.c +1444 -0
- data/ext/libmspack/mspack/chm.h +122 -0
- data/ext/libmspack/mspack/chmc.c +24 -0
- data/ext/libmspack/mspack/chmd.c +1392 -0
- data/ext/libmspack/mspack/crc32.c +95 -0
- data/ext/libmspack/mspack/crc32.h +17 -0
- data/ext/libmspack/mspack/des.h +15 -0
- data/ext/libmspack/mspack/hlp.h +33 -0
- data/ext/libmspack/mspack/hlpc.c +24 -0
- data/ext/libmspack/mspack/hlpd.c +24 -0
- data/ext/libmspack/mspack/kwaj.h +118 -0
- data/ext/libmspack/mspack/kwajc.c +24 -0
- data/ext/libmspack/mspack/kwajd.c +561 -0
- data/ext/libmspack/mspack/lit.h +35 -0
- data/ext/libmspack/mspack/litc.c +24 -0
- data/ext/libmspack/mspack/litd.c +24 -0
- data/ext/libmspack/mspack/lzss.h +66 -0
- data/ext/libmspack/mspack/lzssd.c +93 -0
- data/ext/libmspack/mspack/lzx.h +221 -0
- data/ext/libmspack/mspack/lzxc.c +18 -0
- data/ext/libmspack/mspack/lzxd.c +895 -0
- data/ext/libmspack/mspack/mspack.def +28 -0
- data/ext/libmspack/mspack/mspack.h +2353 -0
- data/ext/libmspack/mspack/mszip.h +126 -0
- data/ext/libmspack/mspack/mszipc.c +18 -0
- data/ext/libmspack/mspack/mszipd.c +514 -0
- data/ext/libmspack/mspack/oab.h +60 -0
- data/ext/libmspack/mspack/oabc.c +24 -0
- data/ext/libmspack/mspack/oabd.c +408 -0
- data/ext/libmspack/mspack/qtm.h +128 -0
- data/ext/libmspack/mspack/qtmc.c +18 -0
- data/ext/libmspack/mspack/qtmd.c +489 -0
- data/ext/libmspack/mspack/readbits.h +207 -0
- data/ext/libmspack/mspack/readhuff.h +173 -0
- data/ext/libmspack/mspack/sha.h +15 -0
- data/ext/libmspack/mspack/system.c +239 -0
- data/ext/libmspack/mspack/system.h +124 -0
- data/ext/libmspack/mspack/szdd.h +39 -0
- data/ext/libmspack/mspack/szddc.c +24 -0
- data/ext/libmspack/mspack/szddd.c +247 -0
- data/ext/libmspack/rebuild.sh +8 -0
- data/ext/libmspack/test/cabd_c10 +19 -0
- data/ext/libmspack/test/cabd_compare +34 -0
- data/ext/libmspack/test/cabd_md5.c +161 -0
- data/ext/libmspack/test/cabd_memory.c +179 -0
- data/ext/libmspack/test/cabd_test.c +386 -0
- data/ext/libmspack/test/cabrip.c +81 -0
- data/ext/libmspack/test/chmd_compare +38 -0
- data/ext/libmspack/test/chmd_find.c +95 -0
- data/ext/libmspack/test/chmd_md5.c +67 -0
- data/ext/libmspack/test/chmd_order.c +144 -0
- data/ext/libmspack/test/chminfo.c +284 -0
- data/ext/libmspack/test/chmx.c +216 -0
- data/ext/libmspack/test/error.h +22 -0
- data/ext/libmspack/test/expand.c +79 -0
- data/ext/libmspack/test/md5.c +457 -0
- data/ext/libmspack/test/md5.h +165 -0
- data/ext/libmspack/test/md5_fh.h +123 -0
- data/ext/libmspack/test/msdecompile_md5 +24 -0
- data/ext/libmspack/test/msexpand_md5 +39 -0
- data/ext/libmspack/test/multifh.c +435 -0
- data/ext/libmspack/test/oabx.c +41 -0
- data/ext/libmspack/test/test_files/cabd/1.pl +84 -0
- data/ext/libmspack/test/test_files/cabd/2.pl +75 -0
- data/ext/libmspack/test/test_files/cabd/bad_folderindex.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_nofiles.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_nofolders.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_signature.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt1.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt2.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt3.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt4.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt5.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/normal_255c_filename.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/normal_2files_1folder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nodata.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nofiles.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nofolder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortextheader.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfile1.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfile2.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfolder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortheader.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nofname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_noninfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nonname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nopinfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nopname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortfname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortninfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortnname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortpinfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortpname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_---.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_--D.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_-F-.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_-FD.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_H--.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_H-D.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_HF-.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_HFD.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/search_basic.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/search_tricky1.cab +0 -0
- data/ext/libmspack/winbuild.sh +26 -0
- data/ext/x86_64-windows/libmspack.dll +0 -0
- data/lib/libmspack/constants.rb +9 -0
- data/lib/libmspack/exceptions.rb +12 -0
- data/lib/libmspack/mscab.rb +722 -0
- data/lib/libmspack/mschm.rb +301 -0
- data/lib/libmspack/mshlp.rb +15 -0
- data/lib/libmspack/mskwaj.rb +124 -0
- data/lib/libmspack/mslit.rb +18 -0
- data/lib/libmspack/msoab.rb +36 -0
- data/lib/libmspack/mspack.rb +208 -0
- data/lib/libmspack/msszdd.rb +81 -0
- data/lib/libmspack/system.rb +84 -0
- data/lib/libmspack/version.rb +4 -0
- data/lib/libmspack.rb +121 -0
- data/libmspack.gemspec +33 -0
- data/spec/libmspack_spec.rb +26 -0
- data/spec/spec_helper.rb +5 -0
- metadata +309 -0
@@ -0,0 +1,331 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="eng">
|
5
|
+
<head>
|
6
|
+
<style type="text/css">
|
7
|
+
dt {
|
8
|
+
font-weight:bold;
|
9
|
+
}
|
10
|
+
pre {
|
11
|
+
background-color:#F9F9F9;
|
12
|
+
border:1px dashed #2F6FAB;
|
13
|
+
color:black;
|
14
|
+
padding:1em;
|
15
|
+
}
|
16
|
+
table.wikitable {
|
17
|
+
background:none repeat scroll 0 0 #F9F9F9;
|
18
|
+
border:1px solid #AAAAAA;
|
19
|
+
border-collapse:collapse;
|
20
|
+
margin:1em 1em 1em 0;
|
21
|
+
}
|
22
|
+
.wikitable th, .wikitable td {
|
23
|
+
border:1px solid #AAAAAA;
|
24
|
+
padding:0.2em;
|
25
|
+
}
|
26
|
+
.wikitable th {
|
27
|
+
background:none repeat scroll 0 0 #F2F2F2;
|
28
|
+
text-align:center;
|
29
|
+
}
|
30
|
+
.wikitable caption {
|
31
|
+
font-weight:bold;
|
32
|
+
}
|
33
|
+
.c.source-c .de1, .c.source-c .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
|
34
|
+
.c.source-c {font-family:monospace;}
|
35
|
+
.c.source-c .imp {font-weight: bold; color: red;}
|
36
|
+
.c.source-c li, .c.source-c .li1 {font-weight: normal; vertical-align:top;}
|
37
|
+
.c.source-c .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
|
38
|
+
.c.source-c .li2 {font-weight: bold; vertical-align:top;}
|
39
|
+
.c.source-c .kw1 {color: #b1b100;}
|
40
|
+
.c.source-c .kw2 {color: #000000; font-weight: bold;}
|
41
|
+
.c.source-c .kw3 {color: #000066;}
|
42
|
+
.c.source-c .kw4 {color: #993333;}
|
43
|
+
.c.source-c .co1 {color: #666666; font-style: italic;}
|
44
|
+
.c.source-c .co2 {color: #339933;}
|
45
|
+
.c.source-c .coMULTI {color: #808080; font-style: italic;}
|
46
|
+
.c.source-c .es0 {color: #000099; font-weight: bold;}
|
47
|
+
.c.source-c .es1 {color: #000099; font-weight: bold;}
|
48
|
+
.c.source-c .es2 {color: #660099; font-weight: bold;}
|
49
|
+
.c.source-c .es3 {color: #660099; font-weight: bold;}
|
50
|
+
.c.source-c .es4 {color: #660099; font-weight: bold;}
|
51
|
+
.c.source-c .es5 {color: #006699; font-weight: bold;}
|
52
|
+
.c.source-c .br0 {color: #009900;}
|
53
|
+
.c.source-c .sy0 {color: #339933;}
|
54
|
+
.c.source-c .st0 {color: #ff0000;}
|
55
|
+
.c.source-c .nu0 {color: #0000dd;}
|
56
|
+
.c.source-c .nu6 {color: #208080;}
|
57
|
+
.c.source-c .nu8 {color: #208080;}
|
58
|
+
.c.source-c .nu12 {color: #208080;}
|
59
|
+
.c.source-c .nu16 {color:#800080;}
|
60
|
+
.c.source-c .nu17 {color:#800080;}
|
61
|
+
.c.source-c .nu18 {color:#800080;}
|
62
|
+
.c.source-c .nu19 {color:#800080;}
|
63
|
+
.c.source-c .me1 {color: #202020;}
|
64
|
+
.c.source-c .me2 {color: #202020;}
|
65
|
+
.c.source-c .ln-xtra, .c.source-c li.ln-xtra, .c.source-c div.ln-xtra {background-color: #ffc;}
|
66
|
+
.c.source-c span.xtra { display:block; }
|
67
|
+
</style>
|
68
|
+
<meta name="author" content="Stuart Caie" />
|
69
|
+
<title>COMPRESS.EXE file formats: SZDD and KWAJ</title>
|
70
|
+
</head>
|
71
|
+
<body>
|
72
|
+
<h1>COMPRESS.EXE file formats: SZDD and KWAJ</h1>
|
73
|
+
|
74
|
+
<p>This document describes the <b>SZDD</b> and <b>KWAJ</b> file
|
75
|
+
formats which are implemented in the MS-DOS commands
|
76
|
+
<tt>COMPRESS.EXE</tt> and <tt>EXPAND.EXE</tt>.</p>
|
77
|
+
|
78
|
+
<p>Both formats compress a single file to another single file,
|
79
|
+
replacing the last character in the filename with an underscore or
|
80
|
+
dollar character, e.g. <tt>README.TXT</tt> becomes <tt>README.TX_</tt>
|
81
|
+
or <tt>README.TX$</tt>.</p>
|
82
|
+
|
83
|
+
<a name="SZDD_file_format"><h2>SZDD file format</h2></a>
|
84
|
+
|
85
|
+
<p>An SZDD file begins with this fixed header:</p>
|
86
|
+
|
87
|
+
<table class="wikitable">
|
88
|
+
<caption>SZDD header format</caption>
|
89
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
90
|
+
<tr><td>0x00</td><td>8</td><td>"SZDD" signature: 0x53,0x5A,0x44,0x44,0x88,0xF0,0x27,0x33</td></tr>
|
91
|
+
<tr><td>0x08</td><td>1</td><td>Compression mode: only "A" (0x41) is valid here</td></tr>
|
92
|
+
<tr><td>0x09</td><td>1</td><td>The character missing from the end of the filename (0=unknown)</td></tr>
|
93
|
+
<tr><td>0x0A</td><td>4</td><td>The integer length of the file when unpacked</td></tr>
|
94
|
+
</table>
|
95
|
+
|
96
|
+
<p>The header is immediately followed by the compressed data. The
|
97
|
+
following pseudocode explains how to unpack this data; it's a form of
|
98
|
+
the LZSS algorithm.</p>
|
99
|
+
|
100
|
+
<table class="wikitable">
|
101
|
+
<caption>SZDD decompression pseudocode</caption>
|
102
|
+
<tr><td>
|
103
|
+
<div dir="ltr" style="text-align: left;"><div class="c source-c" style="font-family:monospace;"><pre class="de1"><span class="kw4">char</span> window<span class="br0">[</span><span class="nu0">4096</span><span class="br0">]</span><span class="sy0">;</span>
|
104
|
+
<span class="kw4">int</span> pos <span class="sy0">=</span> <span class="nu0">4096</span> <span class="sy0">-</span> <span class="nu0">16</span><span class="sy0">;</span>
|
105
|
+
memset<span class="br0">(</span>window<span class="sy0">,</span> <span class="nu12">0x20</span><span class="sy0">,</span> <span class="nu0">4096</span><span class="br0">)</span><span class="sy0">;</span> <span class="coMULTI">/* window initially full of spaces */</span>
|
106
|
+
<span class="kw1">for</span> <span class="br0">(</span><span class="sy0">;;</span><span class="br0">)</span> <span class="br0">{</span>
|
107
|
+
<span class="kw4">int</span> control <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
108
|
+
<span class="kw1">if</span> <span class="br0">(</span>control <span class="sy0">==</span> EOF<span class="br0">)</span> <span class="kw2">break</span><span class="sy0">;</span> <span class="coMULTI">/* exit if no more to read */</span>
|
109
|
+
<span class="kw1">for</span> <span class="br0">(</span><span class="kw4">int</span> cbit <span class="sy0">=</span> <span class="nu12">0x01</span><span class="sy0">;</span> cbit <span class="sy0">&</span> <span class="nu12">0xFF</span><span class="sy0">;</span> cbit <span class="sy0"><<=</span> <span class="nu0">1</span><span class="br0">)</span> <span class="br0">{</span>
|
110
|
+
<span class="kw1">if</span> <span class="br0">(</span>control <span class="sy0">&</span> cbit<span class="br0">)</span> <span class="br0">{</span>
|
111
|
+
<span class="coMULTI">/* literal */</span>
|
112
|
+
PUTBYTE<span class="br0">(</span>window<span class="br0">[</span>pos<span class="sy0">++</span><span class="br0">]</span> <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="br0">)</span><span class="sy0">;</span>
|
113
|
+
<span class="br0">}</span>
|
114
|
+
<span class="kw1">else</span> <span class="br0">{</span>
|
115
|
+
<span class="coMULTI">/* match */</span>
|
116
|
+
<span class="kw4">int</span> matchpos <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
117
|
+
<span class="kw4">int</span> matchlen <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
118
|
+
matchpos <span class="sy0">|=</span> <span class="br0">(</span>matchlen <span class="sy0">&</span> <span class="nu12">0xF0</span><span class="br0">)</span> <span class="sy0"><<</span> <span class="nu0">4</span><span class="sy0">;</span>
|
119
|
+
matchlen <span class="sy0">=</span> <span class="br0">(</span>matchlen <span class="sy0">&</span> <span class="nu12">0x0F</span><span class="br0">)</span> <span class="sy0">+</span> <span class="nu0">3</span><span class="sy0">;</span>
|
120
|
+
<span class="kw1">while</span> <span class="br0">(</span>matchlen<span class="sy0">--</span><span class="br0">)</span> <span class="br0">{</span>
|
121
|
+
PUTBYTE<span class="br0">(</span>window<span class="br0">[</span>pos<span class="sy0">++</span><span class="br0">]</span> <span class="sy0">=</span> window<span class="br0">[</span>matchpos<span class="sy0">++</span><span class="br0">]</span><span class="br0">)</span><span class="sy0">;</span>
|
122
|
+
pos <span class="sy0">&=</span> <span class="nu0">4095</span><span class="sy0">;</span> matchpos <span class="sy0">&=</span> <span class="nu0">4095</span><span class="sy0">;</span>
|
123
|
+
<span class="br0">}</span>
|
124
|
+
<span class="br0">}</span>
|
125
|
+
<span class="br0">}</span>
|
126
|
+
<span class="br0">}</span></pre></div></div>
|
127
|
+
</td></tr></table>
|
128
|
+
|
129
|
+
<p>There is also a variant SZDD format seen in the installation
|
130
|
+
package for QBasic 4.5, so I call it the QBasic variant. It has a
|
131
|
+
different header and the <tt>pos</tt> variable in the pseudocode above
|
132
|
+
is set to <tt>4096-18</tt> instead of <tt>4096-16</tt>.</p>
|
133
|
+
|
134
|
+
<table class="wikitable">
|
135
|
+
<caption>QBasic SZDD variant header format</caption>
|
136
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
137
|
+
<tr><td>0x00</td><td>8</td><td>"SZ" signature: 0x53,0x5A,0x20,0x88,0xF0,0x27,0x33,0xD1</td></tr>
|
138
|
+
<tr><td>0x08</td><td>4</td><td>The integer length of the file when unpacked</td></tr></table>
|
139
|
+
|
140
|
+
<a name="KWAJ_file_format"><h2>KWAJ file format</h2></a>
|
141
|
+
|
142
|
+
<p>A KWAJ file begins with this fixed header:</p>
|
143
|
+
|
144
|
+
<table class="wikitable">
|
145
|
+
<caption>KWAJ header format</caption>
|
146
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
147
|
+
<tr><td>0x00</td><td>8</td><td>"KWAJ" signature: 0x4B,0x57,0x41,0x4A,0x88,0xF0,0x27,0xD1</td></tr>
|
148
|
+
<tr><td>0x08</td><td>2</td><td>compression method (0-4)</td></tr>
|
149
|
+
<tr><td>0x0A</td><td>2</td><td>file offset of compressed data</td></tr>
|
150
|
+
<tr><td>0x0C</td><td>2</td><td>header flags to mark header extensions</td></tr>
|
151
|
+
</table>
|
152
|
+
|
153
|
+
<a name="Compression_methods"><h3>Compression methods</h3></a>
|
154
|
+
|
155
|
+
<p>The "compression method" field indicates the type of data
|
156
|
+
compression used:</p>
|
157
|
+
|
158
|
+
<ol start="0">
|
159
|
+
<li>No compression</li>
|
160
|
+
<li>No compression, data is XORed with byte 0xFF</li>
|
161
|
+
<li>The same compression method as regular SZDD</li>
|
162
|
+
<li>LZ + Huffman "Jeff Johnson" compression</li>
|
163
|
+
<li>MS-ZIP</li>
|
164
|
+
</ol>
|
165
|
+
|
166
|
+
<a name="Header_extensions"><h3>Header extensions</h3></a>
|
167
|
+
|
168
|
+
<p>Header extensions immediately follow the header.</p>
|
169
|
+
|
170
|
+
<p>If you don't care about the header extensions, use the file offset
|
171
|
+
to skip to the compressed data.</p>
|
172
|
+
|
173
|
+
<p>The header extensions appear in this order:</p>
|
174
|
+
|
175
|
+
<dl>
|
176
|
+
<dt>When header flags bit 0 is set</dt><dd>4 bytes: decompressed length of file</dd>
|
177
|
+
<dt>When header flags bit 1 is set</dt><dd>2 bytes: unknown purpose</dd>
|
178
|
+
<dt>When header flags bit 2 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (unknown purpose) data</dd>
|
179
|
+
<dt>When header flags bit 3 is set</dt><dd>1-9 bytes: null-terminated string with max length 8: file name</dd>
|
180
|
+
<dt>When header flags bit 4 is set</dt><dd>1-4 bytes: null-terminated string with max length 3: file extension</dd>
|
181
|
+
<dt>When header flags bit 5 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (arbitrary text) data</dd>
|
182
|
+
</dl>
|
183
|
+
|
184
|
+
<a name="KWAJ_compression_method_3"><h3>KWAJ compression method 3</h3></a>
|
185
|
+
|
186
|
+
<p>Compression method 3 is unique to the KWAJ format. It's an
|
187
|
+
LZ+Huffman algorithm created by Jeff Johnson.</p>
|
188
|
+
|
189
|
+
<p>Bits are always read from MSB to LSB, one byte at a time.</p>
|
190
|
+
|
191
|
+
<p>There are three parts:</p>
|
192
|
+
|
193
|
+
<ol>
|
194
|
+
<li>The data starts off with 6 nybbles; 4 bits each. Each nybble is
|
195
|
+
between 0-3 and is the encoding type of the 5 huffman length lists to
|
196
|
+
follow. The 6th nybble is just padding.</li>
|
197
|
+
<li>Then follow 5 huffman code length lists.</li>
|
198
|
+
<li>Then follows the compressed data, which is a mix of huffman
|
199
|
+
symbols and raw bits.</li>
|
200
|
+
</ol>
|
201
|
+
|
202
|
+
<a name="Huffman_code_length_lists"><h4>Huffman code length lists</h4></a>
|
203
|
+
|
204
|
+
<p>KWAJ uses 5 huffman trees. They always have the same number of
|
205
|
+
symbols in them. They are, in order:</p>
|
206
|
+
|
207
|
+
<ol>
|
208
|
+
<li>16 symbol tree (0-15) to store match run lengths (MATCHLEN)</li>
|
209
|
+
<li>16 symbol tree (0-15) to store match run lengths immediately following a short literal run (MATCHLEN2)</li>
|
210
|
+
<li>32 symbol tree (0-31) to store literal run lengths (LITLEN)</li>
|
211
|
+
<li>64 symbol tree (0-63) to store the upper 6 bits of match distances (OFFSET)</li>
|
212
|
+
<li>256 symbol tree (0-255) to store literals (LITERAL)</li>
|
213
|
+
</ol>
|
214
|
+
|
215
|
+
<p>Canonical huffman codes are used, which means you simply need to
|
216
|
+
know how many symbols in each huffman tree (given above), and how long
|
217
|
+
each huffman symbol is</p>
|
218
|
+
|
219
|
+
<p>How the symbol lengths are encoded depends on the encoding type, as
|
220
|
+
given by the 6 nybbles at the start of the compressed data.</p>
|
221
|
+
|
222
|
+
<p>Symbol lengths are read in ascending order, and the number of
|
223
|
+
symbols to read is implied by which tree you're defining.</p>
|
224
|
+
|
225
|
+
<dl>
|
226
|
+
<dt>Huffman code length list, encoding type 0</dt>
|
227
|
+
<dd>All symbol have the same length, implied by the number of symbols in the tree:
|
228
|
+
<ul>
|
229
|
+
<li>16 symbols -> all symbols are length 4</li>
|
230
|
+
<li>32 symbols -> all symbols are length 5</li>
|
231
|
+
<li>64 symbols -> all symbols are length 6</li>
|
232
|
+
<li>256 symbols -> all symbols are length 8</li>
|
233
|
+
</ul>
|
234
|
+
</dd>
|
235
|
+
<dd>You don't need to read anything.</dd>
|
236
|
+
</dl>
|
237
|
+
|
238
|
+
<dl>
|
239
|
+
<dt>Huffman code length list, encoding type 1</dt>
|
240
|
+
<dd>A run-length encoding is used:
|
241
|
+
<ul>
|
242
|
+
<li>read 4 bits for the first symbol length (0-15)</li>
|
243
|
+
<li>LOOP:
|
244
|
+
<ul>
|
245
|
+
<li>read 1 bit == 0 if symbol length is the same as the previous, OTHERWISE:</li>
|
246
|
+
<li>read 1 bit == 0 if symbol length is previous + 1, OTHERWISE:</li>
|
247
|
+
<li>read 4 bits for symbol length (0-15)</li>
|
248
|
+
</ul>
|
249
|
+
</li>
|
250
|
+
</ul>
|
251
|
+
</dd>
|
252
|
+
</dl>
|
253
|
+
|
254
|
+
<dl>
|
255
|
+
<dt>Huffman code length list, encoding type 2</dt>
|
256
|
+
<dd>Another run-length encoding is used:
|
257
|
+
<ul>
|
258
|
+
<li>read 4 bits for the first symbol length (0-15)</li>
|
259
|
+
<li>LOOP:
|
260
|
+
<ul>
|
261
|
+
<li> read 2 bits as selector (0-3):
|
262
|
+
<ul>
|
263
|
+
<li> selector == 3: read 4 bits for symbol length, OTHERWISE:</li>
|
264
|
+
<li> symbol length is previous symbol + (selector-1), i.e. -1, 0 or +1</li>
|
265
|
+
</ul>
|
266
|
+
</li>
|
267
|
+
</ul>
|
268
|
+
</li>
|
269
|
+
</ul>
|
270
|
+
</dd>
|
271
|
+
</dl>
|
272
|
+
|
273
|
+
<dl>
|
274
|
+
<dt>Huffman code length list, encoding type 3</dt>
|
275
|
+
<dd>There is no compression. Read 4 bits per symbol (0-15).</dd>
|
276
|
+
</dl>
|
277
|
+
|
278
|
+
<a name="Compressed_data"><h4>Compressed data</h4></a>
|
279
|
+
|
280
|
+
<p>At this point, the compressed data begins.</p>
|
281
|
+
|
282
|
+
<p>We have a 4096 byte ring buffer, initially filled with byte 0x20
|
283
|
+
(ASCII space). Unlike the SZDD format, the starting position in the
|
284
|
+
buffer is irrelevant, as match positions are stored relative to the
|
285
|
+
current position in the window, not as absolute positions in the
|
286
|
+
window.</p>
|
287
|
+
|
288
|
+
<p>Pseudo-code:</p>
|
289
|
+
<pre>
|
290
|
+
ring buffer position = 4096-17
|
291
|
+
selected table = MATCHLEN
|
292
|
+
LOOP:
|
293
|
+
code = read huffman code using selected table (MATCHLEN or MATCHLEN2)
|
294
|
+
if EOF reached, exit loop
|
295
|
+
if code > 0, this is a match:
|
296
|
+
match length = code + 2
|
297
|
+
x = read huffman code using OFFSET table
|
298
|
+
y = read 6 bits
|
299
|
+
match offset = current ring buffer position - (x<<6 | y)
|
300
|
+
copy match as output and into the ring buffer
|
301
|
+
selected table = MATCHLEN
|
302
|
+
if code == 0, this is a run of literals:
|
303
|
+
x = read huffman code using LITLEN table
|
304
|
+
if x != 31, selected table = MATCHLEN2
|
305
|
+
read {x+1} literals using LITERAL huffman table, copy as output and into the ring buffer
|
306
|
+
</pre>
|
307
|
+
|
308
|
+
<a name="MSZIP"><h2>MS-ZIP</h2></a>
|
309
|
+
|
310
|
+
KWAJ type 4 compression is called MS-ZIP, because it is almost
|
311
|
+
identical to the MS-ZIP compression found in Microsoft Cabinet files.
|
312
|
+
|
313
|
+
Each 32768 bytes of data is compressed independently using Phil
|
314
|
+
Katz's DEFLATE algorithm. However, the history window is shared
|
315
|
+
between blocks, so they must be unpacked in order.
|
316
|
+
The format of each block is as follows:
|
317
|
+
|
318
|
+
<table class="wikitable">
|
319
|
+
<caption>KWAJ MS-ZIP block format</caption>
|
320
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
321
|
+
<tr><td>0</td><td>2</td><td>Compressed length of this block (n).
|
322
|
+
Stored in Intel byte order.
|
323
|
+
Doesn't include these two bytes.</td></tr>
|
324
|
+
<tr><td>2</td><td>2</td><td>"CK" in ASCII (0x43, 0x4B)</td></tr>
|
325
|
+
<tr><td>4</td><td>n-2</td><td>Data compressed in DEFLATE format</td></tr>
|
326
|
+
</table>
|
327
|
+
|
328
|
+
The final block will unpack to 1-32768 bytes. It will be followed by two
|
329
|
+
zero bytes.
|
330
|
+
|
331
|
+
</body></html>
|
@@ -0,0 +1,127 @@
|
|
1
|
+
/* This file is part of libmspack.
|
2
|
+
* (C) 2003-2004 Stuart Caie.
|
3
|
+
*
|
4
|
+
* libmspack is free software; you can redistribute it and/or modify it under
|
5
|
+
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
6
|
+
*
|
7
|
+
* For further details, see the file COPYING.LIB distributed with libmspack
|
8
|
+
*/
|
9
|
+
|
10
|
+
#ifndef MSPACK_CAB_H
|
11
|
+
#define MSPACK_CAB_H 1
|
12
|
+
|
13
|
+
#include <mszip.h>
|
14
|
+
#include <qtm.h>
|
15
|
+
#include <lzx.h>
|
16
|
+
|
17
|
+
/* generic CAB definitions */
|
18
|
+
|
19
|
+
/* structure offsets */
|
20
|
+
#define cfhead_Signature (0x00)
|
21
|
+
#define cfhead_CabinetSize (0x08)
|
22
|
+
#define cfhead_FileOffset (0x10)
|
23
|
+
#define cfhead_MinorVersion (0x18)
|
24
|
+
#define cfhead_MajorVersion (0x19)
|
25
|
+
#define cfhead_NumFolders (0x1A)
|
26
|
+
#define cfhead_NumFiles (0x1C)
|
27
|
+
#define cfhead_Flags (0x1E)
|
28
|
+
#define cfhead_SetID (0x20)
|
29
|
+
#define cfhead_CabinetIndex (0x22)
|
30
|
+
#define cfhead_SIZEOF (0x24)
|
31
|
+
#define cfheadext_HeaderReserved (0x00)
|
32
|
+
#define cfheadext_FolderReserved (0x02)
|
33
|
+
#define cfheadext_DataReserved (0x03)
|
34
|
+
#define cfheadext_SIZEOF (0x04)
|
35
|
+
#define cffold_DataOffset (0x00)
|
36
|
+
#define cffold_NumBlocks (0x04)
|
37
|
+
#define cffold_CompType (0x06)
|
38
|
+
#define cffold_SIZEOF (0x08)
|
39
|
+
#define cffile_UncompressedSize (0x00)
|
40
|
+
#define cffile_FolderOffset (0x04)
|
41
|
+
#define cffile_FolderIndex (0x08)
|
42
|
+
#define cffile_Date (0x0A)
|
43
|
+
#define cffile_Time (0x0C)
|
44
|
+
#define cffile_Attribs (0x0E)
|
45
|
+
#define cffile_SIZEOF (0x10)
|
46
|
+
#define cfdata_CheckSum (0x00)
|
47
|
+
#define cfdata_CompressedSize (0x04)
|
48
|
+
#define cfdata_UncompressedSize (0x06)
|
49
|
+
#define cfdata_SIZEOF (0x08)
|
50
|
+
|
51
|
+
/* flags */
|
52
|
+
#define cffoldCOMPTYPE_MASK (0x000f)
|
53
|
+
#define cffoldCOMPTYPE_NONE (0x0000)
|
54
|
+
#define cffoldCOMPTYPE_MSZIP (0x0001)
|
55
|
+
#define cffoldCOMPTYPE_QUANTUM (0x0002)
|
56
|
+
#define cffoldCOMPTYPE_LZX (0x0003)
|
57
|
+
#define cfheadPREV_CABINET (0x0001)
|
58
|
+
#define cfheadNEXT_CABINET (0x0002)
|
59
|
+
#define cfheadRESERVE_PRESENT (0x0004)
|
60
|
+
#define cffileCONTINUED_FROM_PREV (0xFFFD)
|
61
|
+
#define cffileCONTINUED_TO_NEXT (0xFFFE)
|
62
|
+
#define cffileCONTINUED_PREV_AND_NEXT (0xFFFF)
|
63
|
+
|
64
|
+
/* CAB data blocks are <= 32768 bytes in uncompressed form. Uncompressed
|
65
|
+
* blocks have zero growth. MSZIP guarantees that it won't grow above
|
66
|
+
* uncompressed size by more than 12 bytes. LZX guarantees it won't grow
|
67
|
+
* more than 6144 bytes. Quantum has no documentation, but the largest
|
68
|
+
* block seen in the wild is 337 bytes above uncompressed size.
|
69
|
+
*/
|
70
|
+
#define CAB_BLOCKMAX (32768)
|
71
|
+
#define CAB_INPUTMAX (CAB_BLOCKMAX+6144)
|
72
|
+
|
73
|
+
/* CAB compression definitions */
|
74
|
+
|
75
|
+
struct mscab_compressor_p {
|
76
|
+
struct mscab_compressor base;
|
77
|
+
struct mspack_system *system;
|
78
|
+
/* todo */
|
79
|
+
};
|
80
|
+
|
81
|
+
/* CAB decompression definitions */
|
82
|
+
|
83
|
+
struct mscabd_decompress_state {
|
84
|
+
struct mscabd_folder_p *folder; /* current folder we're extracting from */
|
85
|
+
struct mscabd_folder_data *data; /* current folder split we're in */
|
86
|
+
unsigned int offset; /* uncompressed offset within folder */
|
87
|
+
unsigned int block; /* which block are we decompressing? */
|
88
|
+
struct mspack_system sys; /* special I/O code for decompressor */
|
89
|
+
int comp_type; /* type of compression used by folder */
|
90
|
+
int (*decompress)(void *, off_t); /* decompressor code */
|
91
|
+
void *state; /* decompressor state */
|
92
|
+
struct mscabd_cabinet_p *incab; /* cabinet where input data comes from */
|
93
|
+
struct mspack_file *infh; /* input file handle */
|
94
|
+
struct mspack_file *outfh; /* output file handle */
|
95
|
+
unsigned char *i_ptr, *i_end; /* input data consumed, end */
|
96
|
+
unsigned char input[CAB_INPUTMAX]; /* one input block of data */
|
97
|
+
};
|
98
|
+
|
99
|
+
struct mscab_decompressor_p {
|
100
|
+
struct mscab_decompressor base;
|
101
|
+
struct mscabd_decompress_state *d;
|
102
|
+
struct mspack_system *system;
|
103
|
+
int param[3]; /* !!! MATCH THIS TO NUM OF PARAMS IN MSPACK.H !!! */
|
104
|
+
int error, read_error;
|
105
|
+
};
|
106
|
+
|
107
|
+
struct mscabd_cabinet_p {
|
108
|
+
struct mscabd_cabinet base;
|
109
|
+
off_t blocks_off; /* offset to data blocks */
|
110
|
+
int block_resv; /* reserved space in data blocks */
|
111
|
+
};
|
112
|
+
|
113
|
+
/* there is one of these for every cabinet a folder spans */
|
114
|
+
struct mscabd_folder_data {
|
115
|
+
struct mscabd_folder_data *next;
|
116
|
+
struct mscabd_cabinet_p *cab; /* cabinet file of this folder span */
|
117
|
+
off_t offset; /* cabinet offset of first datablock */
|
118
|
+
};
|
119
|
+
|
120
|
+
struct mscabd_folder_p {
|
121
|
+
struct mscabd_folder base;
|
122
|
+
struct mscabd_folder_data data; /* where are the data blocks? */
|
123
|
+
struct mscabd_file *merge_prev; /* first file needing backwards merge */
|
124
|
+
struct mscabd_file *merge_next; /* first file needing forwards merge */
|
125
|
+
};
|
126
|
+
|
127
|
+
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
/* This file is part of libmspack.
|
2
|
+
* (C) 2003-2004 Stuart Caie.
|
3
|
+
*
|
4
|
+
* libmspack is free software; you can redistribute it and/or modify it under
|
5
|
+
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
6
|
+
*
|
7
|
+
* For further details, see the file COPYING.LIB distributed with libmspack
|
8
|
+
*/
|
9
|
+
|
10
|
+
/* CAB compression implementation */
|
11
|
+
|
12
|
+
#include <system.h>
|
13
|
+
#include <cab.h>
|
14
|
+
|
15
|
+
struct mscab_compressor *
|
16
|
+
mspack_create_cab_compressor(struct mspack_system *sys)
|
17
|
+
{
|
18
|
+
/* todo */
|
19
|
+
return NULL;
|
20
|
+
}
|
21
|
+
|
22
|
+
void mspack_destroy_cab_compressor(struct mscab_compressor *self) {
|
23
|
+
/* todo */
|
24
|
+
}
|