libmspack 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +5 -0
- data/.yardopts +1 -0
- data/Gemfile +4 -0
- data/README.md +75 -0
- data/Rakefile +22 -0
- data/UNLICENSE +24 -0
- data/ext/Rakefile +16 -0
- data/ext/i386-windows/libmspack.dll +0 -0
- data/ext/libmspack/AUTHORS +12 -0
- data/ext/libmspack/COPYING.LIB +504 -0
- data/ext/libmspack/ChangeLog +491 -0
- data/ext/libmspack/Makefile.am +100 -0
- data/ext/libmspack/NEWS +0 -0
- data/ext/libmspack/README +130 -0
- data/ext/libmspack/TODO +8 -0
- data/ext/libmspack/cleanup.sh +9 -0
- data/ext/libmspack/configure.ac +50 -0
- data/ext/libmspack/debian/changelog +6 -0
- data/ext/libmspack/debian/control +14 -0
- data/ext/libmspack/debian/rules +101 -0
- data/ext/libmspack/doc/Doxyfile.in +22 -0
- data/ext/libmspack/doc/Makefile.in +14 -0
- data/ext/libmspack/doc/szdd_kwaj_format.html +331 -0
- data/ext/libmspack/libmspack.pc.in +10 -0
- data/ext/libmspack/mspack/cab.h +127 -0
- data/ext/libmspack/mspack/cabc.c +24 -0
- data/ext/libmspack/mspack/cabd.c +1444 -0
- data/ext/libmspack/mspack/chm.h +122 -0
- data/ext/libmspack/mspack/chmc.c +24 -0
- data/ext/libmspack/mspack/chmd.c +1392 -0
- data/ext/libmspack/mspack/crc32.c +95 -0
- data/ext/libmspack/mspack/crc32.h +17 -0
- data/ext/libmspack/mspack/des.h +15 -0
- data/ext/libmspack/mspack/hlp.h +33 -0
- data/ext/libmspack/mspack/hlpc.c +24 -0
- data/ext/libmspack/mspack/hlpd.c +24 -0
- data/ext/libmspack/mspack/kwaj.h +118 -0
- data/ext/libmspack/mspack/kwajc.c +24 -0
- data/ext/libmspack/mspack/kwajd.c +561 -0
- data/ext/libmspack/mspack/lit.h +35 -0
- data/ext/libmspack/mspack/litc.c +24 -0
- data/ext/libmspack/mspack/litd.c +24 -0
- data/ext/libmspack/mspack/lzss.h +66 -0
- data/ext/libmspack/mspack/lzssd.c +93 -0
- data/ext/libmspack/mspack/lzx.h +221 -0
- data/ext/libmspack/mspack/lzxc.c +18 -0
- data/ext/libmspack/mspack/lzxd.c +895 -0
- data/ext/libmspack/mspack/mspack.def +28 -0
- data/ext/libmspack/mspack/mspack.h +2353 -0
- data/ext/libmspack/mspack/mszip.h +126 -0
- data/ext/libmspack/mspack/mszipc.c +18 -0
- data/ext/libmspack/mspack/mszipd.c +514 -0
- data/ext/libmspack/mspack/oab.h +60 -0
- data/ext/libmspack/mspack/oabc.c +24 -0
- data/ext/libmspack/mspack/oabd.c +408 -0
- data/ext/libmspack/mspack/qtm.h +128 -0
- data/ext/libmspack/mspack/qtmc.c +18 -0
- data/ext/libmspack/mspack/qtmd.c +489 -0
- data/ext/libmspack/mspack/readbits.h +207 -0
- data/ext/libmspack/mspack/readhuff.h +173 -0
- data/ext/libmspack/mspack/sha.h +15 -0
- data/ext/libmspack/mspack/system.c +239 -0
- data/ext/libmspack/mspack/system.h +124 -0
- data/ext/libmspack/mspack/szdd.h +39 -0
- data/ext/libmspack/mspack/szddc.c +24 -0
- data/ext/libmspack/mspack/szddd.c +247 -0
- data/ext/libmspack/rebuild.sh +8 -0
- data/ext/libmspack/test/cabd_c10 +19 -0
- data/ext/libmspack/test/cabd_compare +34 -0
- data/ext/libmspack/test/cabd_md5.c +161 -0
- data/ext/libmspack/test/cabd_memory.c +179 -0
- data/ext/libmspack/test/cabd_test.c +386 -0
- data/ext/libmspack/test/cabrip.c +81 -0
- data/ext/libmspack/test/chmd_compare +38 -0
- data/ext/libmspack/test/chmd_find.c +95 -0
- data/ext/libmspack/test/chmd_md5.c +67 -0
- data/ext/libmspack/test/chmd_order.c +144 -0
- data/ext/libmspack/test/chminfo.c +284 -0
- data/ext/libmspack/test/chmx.c +216 -0
- data/ext/libmspack/test/error.h +22 -0
- data/ext/libmspack/test/expand.c +79 -0
- data/ext/libmspack/test/md5.c +457 -0
- data/ext/libmspack/test/md5.h +165 -0
- data/ext/libmspack/test/md5_fh.h +123 -0
- data/ext/libmspack/test/msdecompile_md5 +24 -0
- data/ext/libmspack/test/msexpand_md5 +39 -0
- data/ext/libmspack/test/multifh.c +435 -0
- data/ext/libmspack/test/oabx.c +41 -0
- data/ext/libmspack/test/test_files/cabd/1.pl +84 -0
- data/ext/libmspack/test/test_files/cabd/2.pl +75 -0
- data/ext/libmspack/test/test_files/cabd/bad_folderindex.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_nofiles.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_nofolders.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/bad_signature.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt1.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt2.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt3.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt4.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/multi_basic_pt5.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/normal_255c_filename.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/normal_2files_1folder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nodata.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nofiles.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_nofolder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortextheader.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfile1.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfile2.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortfolder.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_shortheader.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nofname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_noninfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nonname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nopinfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_nopname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortfname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortninfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortnname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortpinfo.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/partial_str_shortpname.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_---.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_--D.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_-F-.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_-FD.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_H--.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_H-D.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_HF-.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/reserve_HFD.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/search_basic.cab +0 -0
- data/ext/libmspack/test/test_files/cabd/search_tricky1.cab +0 -0
- data/ext/libmspack/winbuild.sh +26 -0
- data/ext/x86_64-windows/libmspack.dll +0 -0
- data/lib/libmspack/constants.rb +9 -0
- data/lib/libmspack/exceptions.rb +12 -0
- data/lib/libmspack/mscab.rb +722 -0
- data/lib/libmspack/mschm.rb +301 -0
- data/lib/libmspack/mshlp.rb +15 -0
- data/lib/libmspack/mskwaj.rb +124 -0
- data/lib/libmspack/mslit.rb +18 -0
- data/lib/libmspack/msoab.rb +36 -0
- data/lib/libmspack/mspack.rb +208 -0
- data/lib/libmspack/msszdd.rb +81 -0
- data/lib/libmspack/system.rb +84 -0
- data/lib/libmspack/version.rb +4 -0
- data/lib/libmspack.rb +121 -0
- data/libmspack.gemspec +33 -0
- data/spec/libmspack_spec.rb +26 -0
- data/spec/spec_helper.rb +5 -0
- metadata +309 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="eng">
|
|
5
|
+
<head>
|
|
6
|
+
<style type="text/css">
|
|
7
|
+
dt {
|
|
8
|
+
font-weight:bold;
|
|
9
|
+
}
|
|
10
|
+
pre {
|
|
11
|
+
background-color:#F9F9F9;
|
|
12
|
+
border:1px dashed #2F6FAB;
|
|
13
|
+
color:black;
|
|
14
|
+
padding:1em;
|
|
15
|
+
}
|
|
16
|
+
table.wikitable {
|
|
17
|
+
background:none repeat scroll 0 0 #F9F9F9;
|
|
18
|
+
border:1px solid #AAAAAA;
|
|
19
|
+
border-collapse:collapse;
|
|
20
|
+
margin:1em 1em 1em 0;
|
|
21
|
+
}
|
|
22
|
+
.wikitable th, .wikitable td {
|
|
23
|
+
border:1px solid #AAAAAA;
|
|
24
|
+
padding:0.2em;
|
|
25
|
+
}
|
|
26
|
+
.wikitable th {
|
|
27
|
+
background:none repeat scroll 0 0 #F2F2F2;
|
|
28
|
+
text-align:center;
|
|
29
|
+
}
|
|
30
|
+
.wikitable caption {
|
|
31
|
+
font-weight:bold;
|
|
32
|
+
}
|
|
33
|
+
.c.source-c .de1, .c.source-c .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
|
|
34
|
+
.c.source-c {font-family:monospace;}
|
|
35
|
+
.c.source-c .imp {font-weight: bold; color: red;}
|
|
36
|
+
.c.source-c li, .c.source-c .li1 {font-weight: normal; vertical-align:top;}
|
|
37
|
+
.c.source-c .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
|
|
38
|
+
.c.source-c .li2 {font-weight: bold; vertical-align:top;}
|
|
39
|
+
.c.source-c .kw1 {color: #b1b100;}
|
|
40
|
+
.c.source-c .kw2 {color: #000000; font-weight: bold;}
|
|
41
|
+
.c.source-c .kw3 {color: #000066;}
|
|
42
|
+
.c.source-c .kw4 {color: #993333;}
|
|
43
|
+
.c.source-c .co1 {color: #666666; font-style: italic;}
|
|
44
|
+
.c.source-c .co2 {color: #339933;}
|
|
45
|
+
.c.source-c .coMULTI {color: #808080; font-style: italic;}
|
|
46
|
+
.c.source-c .es0 {color: #000099; font-weight: bold;}
|
|
47
|
+
.c.source-c .es1 {color: #000099; font-weight: bold;}
|
|
48
|
+
.c.source-c .es2 {color: #660099; font-weight: bold;}
|
|
49
|
+
.c.source-c .es3 {color: #660099; font-weight: bold;}
|
|
50
|
+
.c.source-c .es4 {color: #660099; font-weight: bold;}
|
|
51
|
+
.c.source-c .es5 {color: #006699; font-weight: bold;}
|
|
52
|
+
.c.source-c .br0 {color: #009900;}
|
|
53
|
+
.c.source-c .sy0 {color: #339933;}
|
|
54
|
+
.c.source-c .st0 {color: #ff0000;}
|
|
55
|
+
.c.source-c .nu0 {color: #0000dd;}
|
|
56
|
+
.c.source-c .nu6 {color: #208080;}
|
|
57
|
+
.c.source-c .nu8 {color: #208080;}
|
|
58
|
+
.c.source-c .nu12 {color: #208080;}
|
|
59
|
+
.c.source-c .nu16 {color:#800080;}
|
|
60
|
+
.c.source-c .nu17 {color:#800080;}
|
|
61
|
+
.c.source-c .nu18 {color:#800080;}
|
|
62
|
+
.c.source-c .nu19 {color:#800080;}
|
|
63
|
+
.c.source-c .me1 {color: #202020;}
|
|
64
|
+
.c.source-c .me2 {color: #202020;}
|
|
65
|
+
.c.source-c .ln-xtra, .c.source-c li.ln-xtra, .c.source-c div.ln-xtra {background-color: #ffc;}
|
|
66
|
+
.c.source-c span.xtra { display:block; }
|
|
67
|
+
</style>
|
|
68
|
+
<meta name="author" content="Stuart Caie" />
|
|
69
|
+
<title>COMPRESS.EXE file formats: SZDD and KWAJ</title>
|
|
70
|
+
</head>
|
|
71
|
+
<body>
|
|
72
|
+
<h1>COMPRESS.EXE file formats: SZDD and KWAJ</h1>
|
|
73
|
+
|
|
74
|
+
<p>This document describes the <b>SZDD</b> and <b>KWAJ</b> file
|
|
75
|
+
formats which are implemented in the MS-DOS commands
|
|
76
|
+
<tt>COMPRESS.EXE</tt> and <tt>EXPAND.EXE</tt>.</p>
|
|
77
|
+
|
|
78
|
+
<p>Both formats compress a single file to another single file,
|
|
79
|
+
replacing the last character in the filename with an underscore or
|
|
80
|
+
dollar character, e.g. <tt>README.TXT</tt> becomes <tt>README.TX_</tt>
|
|
81
|
+
or <tt>README.TX$</tt>.</p>
|
|
82
|
+
|
|
83
|
+
<a name="SZDD_file_format"><h2>SZDD file format</h2></a>
|
|
84
|
+
|
|
85
|
+
<p>An SZDD file begins with this fixed header:</p>
|
|
86
|
+
|
|
87
|
+
<table class="wikitable">
|
|
88
|
+
<caption>SZDD header format</caption>
|
|
89
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
|
90
|
+
<tr><td>0x00</td><td>8</td><td>"SZDD" signature: 0x53,0x5A,0x44,0x44,0x88,0xF0,0x27,0x33</td></tr>
|
|
91
|
+
<tr><td>0x08</td><td>1</td><td>Compression mode: only "A" (0x41) is valid here</td></tr>
|
|
92
|
+
<tr><td>0x09</td><td>1</td><td>The character missing from the end of the filename (0=unknown)</td></tr>
|
|
93
|
+
<tr><td>0x0A</td><td>4</td><td>The integer length of the file when unpacked</td></tr>
|
|
94
|
+
</table>
|
|
95
|
+
|
|
96
|
+
<p>The header is immediately followed by the compressed data. The
|
|
97
|
+
following pseudocode explains how to unpack this data; it's a form of
|
|
98
|
+
the LZSS algorithm.</p>
|
|
99
|
+
|
|
100
|
+
<table class="wikitable">
|
|
101
|
+
<caption>SZDD decompression pseudocode</caption>
|
|
102
|
+
<tr><td>
|
|
103
|
+
<div dir="ltr" style="text-align: left;"><div class="c source-c" style="font-family:monospace;"><pre class="de1"><span class="kw4">char</span> window<span class="br0">[</span><span class="nu0">4096</span><span class="br0">]</span><span class="sy0">;</span>
|
|
104
|
+
<span class="kw4">int</span> pos <span class="sy0">=</span> <span class="nu0">4096</span> <span class="sy0">-</span> <span class="nu0">16</span><span class="sy0">;</span>
|
|
105
|
+
memset<span class="br0">(</span>window<span class="sy0">,</span> <span class="nu12">0x20</span><span class="sy0">,</span> <span class="nu0">4096</span><span class="br0">)</span><span class="sy0">;</span> <span class="coMULTI">/* window initially full of spaces */</span>
|
|
106
|
+
<span class="kw1">for</span> <span class="br0">(</span><span class="sy0">;;</span><span class="br0">)</span> <span class="br0">{</span>
|
|
107
|
+
<span class="kw4">int</span> control <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
|
108
|
+
<span class="kw1">if</span> <span class="br0">(</span>control <span class="sy0">==</span> EOF<span class="br0">)</span> <span class="kw2">break</span><span class="sy0">;</span> <span class="coMULTI">/* exit if no more to read */</span>
|
|
109
|
+
<span class="kw1">for</span> <span class="br0">(</span><span class="kw4">int</span> cbit <span class="sy0">=</span> <span class="nu12">0x01</span><span class="sy0">;</span> cbit <span class="sy0">&</span> <span class="nu12">0xFF</span><span class="sy0">;</span> cbit <span class="sy0"><<=</span> <span class="nu0">1</span><span class="br0">)</span> <span class="br0">{</span>
|
|
110
|
+
<span class="kw1">if</span> <span class="br0">(</span>control <span class="sy0">&</span> cbit<span class="br0">)</span> <span class="br0">{</span>
|
|
111
|
+
<span class="coMULTI">/* literal */</span>
|
|
112
|
+
PUTBYTE<span class="br0">(</span>window<span class="br0">[</span>pos<span class="sy0">++</span><span class="br0">]</span> <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="br0">)</span><span class="sy0">;</span>
|
|
113
|
+
<span class="br0">}</span>
|
|
114
|
+
<span class="kw1">else</span> <span class="br0">{</span>
|
|
115
|
+
<span class="coMULTI">/* match */</span>
|
|
116
|
+
<span class="kw4">int</span> matchpos <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
|
117
|
+
<span class="kw4">int</span> matchlen <span class="sy0">=</span> GETBYTE<span class="br0">(</span><span class="br0">)</span><span class="sy0">;</span>
|
|
118
|
+
matchpos <span class="sy0">|=</span> <span class="br0">(</span>matchlen <span class="sy0">&</span> <span class="nu12">0xF0</span><span class="br0">)</span> <span class="sy0"><<</span> <span class="nu0">4</span><span class="sy0">;</span>
|
|
119
|
+
matchlen <span class="sy0">=</span> <span class="br0">(</span>matchlen <span class="sy0">&</span> <span class="nu12">0x0F</span><span class="br0">)</span> <span class="sy0">+</span> <span class="nu0">3</span><span class="sy0">;</span>
|
|
120
|
+
<span class="kw1">while</span> <span class="br0">(</span>matchlen<span class="sy0">--</span><span class="br0">)</span> <span class="br0">{</span>
|
|
121
|
+
PUTBYTE<span class="br0">(</span>window<span class="br0">[</span>pos<span class="sy0">++</span><span class="br0">]</span> <span class="sy0">=</span> window<span class="br0">[</span>matchpos<span class="sy0">++</span><span class="br0">]</span><span class="br0">)</span><span class="sy0">;</span>
|
|
122
|
+
pos <span class="sy0">&=</span> <span class="nu0">4095</span><span class="sy0">;</span> matchpos <span class="sy0">&=</span> <span class="nu0">4095</span><span class="sy0">;</span>
|
|
123
|
+
<span class="br0">}</span>
|
|
124
|
+
<span class="br0">}</span>
|
|
125
|
+
<span class="br0">}</span>
|
|
126
|
+
<span class="br0">}</span></pre></div></div>
|
|
127
|
+
</td></tr></table>
|
|
128
|
+
|
|
129
|
+
<p>There is also a variant SZDD format seen in the installation
|
|
130
|
+
package for QBasic 4.5, so I call it the QBasic variant. It has a
|
|
131
|
+
different header and the <tt>pos</tt> variable in the pseudocode above
|
|
132
|
+
is set to <tt>4096-18</tt> instead of <tt>4096-16</tt>.</p>
|
|
133
|
+
|
|
134
|
+
<table class="wikitable">
|
|
135
|
+
<caption>QBasic SZDD variant header format</caption>
|
|
136
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
|
137
|
+
<tr><td>0x00</td><td>8</td><td>"SZ" signature: 0x53,0x5A,0x20,0x88,0xF0,0x27,0x33,0xD1</td></tr>
|
|
138
|
+
<tr><td>0x08</td><td>4</td><td>The integer length of the file when unpacked</td></tr></table>
|
|
139
|
+
|
|
140
|
+
<a name="KWAJ_file_format"><h2>KWAJ file format</h2></a>
|
|
141
|
+
|
|
142
|
+
<p>A KWAJ file begins with this fixed header:</p>
|
|
143
|
+
|
|
144
|
+
<table class="wikitable">
|
|
145
|
+
<caption>KWAJ header format</caption>
|
|
146
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
|
147
|
+
<tr><td>0x00</td><td>8</td><td>"KWAJ" signature: 0x4B,0x57,0x41,0x4A,0x88,0xF0,0x27,0xD1</td></tr>
|
|
148
|
+
<tr><td>0x08</td><td>2</td><td>compression method (0-4)</td></tr>
|
|
149
|
+
<tr><td>0x0A</td><td>2</td><td>file offset of compressed data</td></tr>
|
|
150
|
+
<tr><td>0x0C</td><td>2</td><td>header flags to mark header extensions</td></tr>
|
|
151
|
+
</table>
|
|
152
|
+
|
|
153
|
+
<a name="Compression_methods"><h3>Compression methods</h3></a>
|
|
154
|
+
|
|
155
|
+
<p>The "compression method" field indicates the type of data
|
|
156
|
+
compression used:</p>
|
|
157
|
+
|
|
158
|
+
<ol start="0">
|
|
159
|
+
<li>No compression</li>
|
|
160
|
+
<li>No compression, data is XORed with byte 0xFF</li>
|
|
161
|
+
<li>The same compression method as regular SZDD</li>
|
|
162
|
+
<li>LZ + Huffman "Jeff Johnson" compression</li>
|
|
163
|
+
<li>MS-ZIP</li>
|
|
164
|
+
</ol>
|
|
165
|
+
|
|
166
|
+
<a name="Header_extensions"><h3>Header extensions</h3></a>
|
|
167
|
+
|
|
168
|
+
<p>Header extensions immediately follow the header.</p>
|
|
169
|
+
|
|
170
|
+
<p>If you don't care about the header extensions, use the file offset
|
|
171
|
+
to skip to the compressed data.</p>
|
|
172
|
+
|
|
173
|
+
<p>The header extensions appear in this order:</p>
|
|
174
|
+
|
|
175
|
+
<dl>
|
|
176
|
+
<dt>When header flags bit 0 is set</dt><dd>4 bytes: decompressed length of file</dd>
|
|
177
|
+
<dt>When header flags bit 1 is set</dt><dd>2 bytes: unknown purpose</dd>
|
|
178
|
+
<dt>When header flags bit 2 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (unknown purpose) data</dd>
|
|
179
|
+
<dt>When header flags bit 3 is set</dt><dd>1-9 bytes: null-terminated string with max length 8: file name</dd>
|
|
180
|
+
<dt>When header flags bit 4 is set</dt><dd>1-4 bytes: null-terminated string with max length 3: file extension</dd>
|
|
181
|
+
<dt>When header flags bit 5 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (arbitrary text) data</dd>
|
|
182
|
+
</dl>
|
|
183
|
+
|
|
184
|
+
<a name="KWAJ_compression_method_3"><h3>KWAJ compression method 3</h3></a>
|
|
185
|
+
|
|
186
|
+
<p>Compression method 3 is unique to the KWAJ format. It's an
|
|
187
|
+
LZ+Huffman algorithm created by Jeff Johnson.</p>
|
|
188
|
+
|
|
189
|
+
<p>Bits are always read from MSB to LSB, one byte at a time.</p>
|
|
190
|
+
|
|
191
|
+
<p>There are three parts:</p>
|
|
192
|
+
|
|
193
|
+
<ol>
|
|
194
|
+
<li>The data starts off with 6 nybbles; 4 bits each. Each nybble is
|
|
195
|
+
between 0-3 and is the encoding type of the 5 huffman length lists to
|
|
196
|
+
follow. The 6th nybble is just padding.</li>
|
|
197
|
+
<li>Then follow 5 huffman code length lists.</li>
|
|
198
|
+
<li>Then follows the compressed data, which is a mix of huffman
|
|
199
|
+
symbols and raw bits.</li>
|
|
200
|
+
</ol>
|
|
201
|
+
|
|
202
|
+
<a name="Huffman_code_length_lists"><h4>Huffman code length lists</h4></a>
|
|
203
|
+
|
|
204
|
+
<p>KWAJ uses 5 huffman trees. They always have the same number of
|
|
205
|
+
symbols in them. They are, in order:</p>
|
|
206
|
+
|
|
207
|
+
<ol>
|
|
208
|
+
<li>16 symbol tree (0-15) to store match run lengths (MATCHLEN)</li>
|
|
209
|
+
<li>16 symbol tree (0-15) to store match run lengths immediately following a short literal run (MATCHLEN2)</li>
|
|
210
|
+
<li>32 symbol tree (0-31) to store literal run lengths (LITLEN)</li>
|
|
211
|
+
<li>64 symbol tree (0-63) to store the upper 6 bits of match distances (OFFSET)</li>
|
|
212
|
+
<li>256 symbol tree (0-255) to store literals (LITERAL)</li>
|
|
213
|
+
</ol>
|
|
214
|
+
|
|
215
|
+
<p>Canonical huffman codes are used, which means you simply need to
|
|
216
|
+
know how many symbols in each huffman tree (given above), and how long
|
|
217
|
+
each huffman symbol is</p>
|
|
218
|
+
|
|
219
|
+
<p>How the symbol lengths are encoded depends on the encoding type, as
|
|
220
|
+
given by the 6 nybbles at the start of the compressed data.</p>
|
|
221
|
+
|
|
222
|
+
<p>Symbol lengths are read in ascending order, and the number of
|
|
223
|
+
symbols to read is implied by which tree you're defining.</p>
|
|
224
|
+
|
|
225
|
+
<dl>
|
|
226
|
+
<dt>Huffman code length list, encoding type 0</dt>
|
|
227
|
+
<dd>All symbol have the same length, implied by the number of symbols in the tree:
|
|
228
|
+
<ul>
|
|
229
|
+
<li>16 symbols -> all symbols are length 4</li>
|
|
230
|
+
<li>32 symbols -> all symbols are length 5</li>
|
|
231
|
+
<li>64 symbols -> all symbols are length 6</li>
|
|
232
|
+
<li>256 symbols -> all symbols are length 8</li>
|
|
233
|
+
</ul>
|
|
234
|
+
</dd>
|
|
235
|
+
<dd>You don't need to read anything.</dd>
|
|
236
|
+
</dl>
|
|
237
|
+
|
|
238
|
+
<dl>
|
|
239
|
+
<dt>Huffman code length list, encoding type 1</dt>
|
|
240
|
+
<dd>A run-length encoding is used:
|
|
241
|
+
<ul>
|
|
242
|
+
<li>read 4 bits for the first symbol length (0-15)</li>
|
|
243
|
+
<li>LOOP:
|
|
244
|
+
<ul>
|
|
245
|
+
<li>read 1 bit == 0 if symbol length is the same as the previous, OTHERWISE:</li>
|
|
246
|
+
<li>read 1 bit == 0 if symbol length is previous + 1, OTHERWISE:</li>
|
|
247
|
+
<li>read 4 bits for symbol length (0-15)</li>
|
|
248
|
+
</ul>
|
|
249
|
+
</li>
|
|
250
|
+
</ul>
|
|
251
|
+
</dd>
|
|
252
|
+
</dl>
|
|
253
|
+
|
|
254
|
+
<dl>
|
|
255
|
+
<dt>Huffman code length list, encoding type 2</dt>
|
|
256
|
+
<dd>Another run-length encoding is used:
|
|
257
|
+
<ul>
|
|
258
|
+
<li>read 4 bits for the first symbol length (0-15)</li>
|
|
259
|
+
<li>LOOP:
|
|
260
|
+
<ul>
|
|
261
|
+
<li> read 2 bits as selector (0-3):
|
|
262
|
+
<ul>
|
|
263
|
+
<li> selector == 3: read 4 bits for symbol length, OTHERWISE:</li>
|
|
264
|
+
<li> symbol length is previous symbol + (selector-1), i.e. -1, 0 or +1</li>
|
|
265
|
+
</ul>
|
|
266
|
+
</li>
|
|
267
|
+
</ul>
|
|
268
|
+
</li>
|
|
269
|
+
</ul>
|
|
270
|
+
</dd>
|
|
271
|
+
</dl>
|
|
272
|
+
|
|
273
|
+
<dl>
|
|
274
|
+
<dt>Huffman code length list, encoding type 3</dt>
|
|
275
|
+
<dd>There is no compression. Read 4 bits per symbol (0-15).</dd>
|
|
276
|
+
</dl>
|
|
277
|
+
|
|
278
|
+
<a name="Compressed_data"><h4>Compressed data</h4></a>
|
|
279
|
+
|
|
280
|
+
<p>At this point, the compressed data begins.</p>
|
|
281
|
+
|
|
282
|
+
<p>We have a 4096 byte ring buffer, initially filled with byte 0x20
|
|
283
|
+
(ASCII space). Unlike the SZDD format, the starting position in the
|
|
284
|
+
buffer is irrelevant, as match positions are stored relative to the
|
|
285
|
+
current position in the window, not as absolute positions in the
|
|
286
|
+
window.</p>
|
|
287
|
+
|
|
288
|
+
<p>Pseudo-code:</p>
|
|
289
|
+
<pre>
|
|
290
|
+
ring buffer position = 4096-17
|
|
291
|
+
selected table = MATCHLEN
|
|
292
|
+
LOOP:
|
|
293
|
+
code = read huffman code using selected table (MATCHLEN or MATCHLEN2)
|
|
294
|
+
if EOF reached, exit loop
|
|
295
|
+
if code > 0, this is a match:
|
|
296
|
+
match length = code + 2
|
|
297
|
+
x = read huffman code using OFFSET table
|
|
298
|
+
y = read 6 bits
|
|
299
|
+
match offset = current ring buffer position - (x<<6 | y)
|
|
300
|
+
copy match as output and into the ring buffer
|
|
301
|
+
selected table = MATCHLEN
|
|
302
|
+
if code == 0, this is a run of literals:
|
|
303
|
+
x = read huffman code using LITLEN table
|
|
304
|
+
if x != 31, selected table = MATCHLEN2
|
|
305
|
+
read {x+1} literals using LITERAL huffman table, copy as output and into the ring buffer
|
|
306
|
+
</pre>
|
|
307
|
+
|
|
308
|
+
<a name="MSZIP"><h2>MS-ZIP</h2></a>
|
|
309
|
+
|
|
310
|
+
KWAJ type 4 compression is called MS-ZIP, because it is almost
|
|
311
|
+
identical to the MS-ZIP compression found in Microsoft Cabinet files.
|
|
312
|
+
|
|
313
|
+
Each 32768 bytes of data is compressed independently using Phil
|
|
314
|
+
Katz's DEFLATE algorithm. However, the history window is shared
|
|
315
|
+
between blocks, so they must be unpacked in order.
|
|
316
|
+
The format of each block is as follows:
|
|
317
|
+
|
|
318
|
+
<table class="wikitable">
|
|
319
|
+
<caption>KWAJ MS-ZIP block format</caption>
|
|
320
|
+
<tr><th>Offset</th><th>Length</th><th>Description</th></tr>
|
|
321
|
+
<tr><td>0</td><td>2</td><td>Compressed length of this block (n).
|
|
322
|
+
Stored in Intel byte order.
|
|
323
|
+
Doesn't include these two bytes.</td></tr>
|
|
324
|
+
<tr><td>2</td><td>2</td><td>"CK" in ASCII (0x43, 0x4B)</td></tr>
|
|
325
|
+
<tr><td>4</td><td>n-2</td><td>Data compressed in DEFLATE format</td></tr>
|
|
326
|
+
</table>
|
|
327
|
+
|
|
328
|
+
The final block will unpack to 1-32768 bytes. It will be followed by two
|
|
329
|
+
zero bytes.
|
|
330
|
+
|
|
331
|
+
</body></html>
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/* This file is part of libmspack.
|
|
2
|
+
* (C) 2003-2004 Stuart Caie.
|
|
3
|
+
*
|
|
4
|
+
* libmspack is free software; you can redistribute it and/or modify it under
|
|
5
|
+
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
|
6
|
+
*
|
|
7
|
+
* For further details, see the file COPYING.LIB distributed with libmspack
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#ifndef MSPACK_CAB_H
|
|
11
|
+
#define MSPACK_CAB_H 1
|
|
12
|
+
|
|
13
|
+
#include <mszip.h>
|
|
14
|
+
#include <qtm.h>
|
|
15
|
+
#include <lzx.h>
|
|
16
|
+
|
|
17
|
+
/* generic CAB definitions */
|
|
18
|
+
|
|
19
|
+
/* structure offsets */
|
|
20
|
+
#define cfhead_Signature (0x00)
|
|
21
|
+
#define cfhead_CabinetSize (0x08)
|
|
22
|
+
#define cfhead_FileOffset (0x10)
|
|
23
|
+
#define cfhead_MinorVersion (0x18)
|
|
24
|
+
#define cfhead_MajorVersion (0x19)
|
|
25
|
+
#define cfhead_NumFolders (0x1A)
|
|
26
|
+
#define cfhead_NumFiles (0x1C)
|
|
27
|
+
#define cfhead_Flags (0x1E)
|
|
28
|
+
#define cfhead_SetID (0x20)
|
|
29
|
+
#define cfhead_CabinetIndex (0x22)
|
|
30
|
+
#define cfhead_SIZEOF (0x24)
|
|
31
|
+
#define cfheadext_HeaderReserved (0x00)
|
|
32
|
+
#define cfheadext_FolderReserved (0x02)
|
|
33
|
+
#define cfheadext_DataReserved (0x03)
|
|
34
|
+
#define cfheadext_SIZEOF (0x04)
|
|
35
|
+
#define cffold_DataOffset (0x00)
|
|
36
|
+
#define cffold_NumBlocks (0x04)
|
|
37
|
+
#define cffold_CompType (0x06)
|
|
38
|
+
#define cffold_SIZEOF (0x08)
|
|
39
|
+
#define cffile_UncompressedSize (0x00)
|
|
40
|
+
#define cffile_FolderOffset (0x04)
|
|
41
|
+
#define cffile_FolderIndex (0x08)
|
|
42
|
+
#define cffile_Date (0x0A)
|
|
43
|
+
#define cffile_Time (0x0C)
|
|
44
|
+
#define cffile_Attribs (0x0E)
|
|
45
|
+
#define cffile_SIZEOF (0x10)
|
|
46
|
+
#define cfdata_CheckSum (0x00)
|
|
47
|
+
#define cfdata_CompressedSize (0x04)
|
|
48
|
+
#define cfdata_UncompressedSize (0x06)
|
|
49
|
+
#define cfdata_SIZEOF (0x08)
|
|
50
|
+
|
|
51
|
+
/* flags */
|
|
52
|
+
#define cffoldCOMPTYPE_MASK (0x000f)
|
|
53
|
+
#define cffoldCOMPTYPE_NONE (0x0000)
|
|
54
|
+
#define cffoldCOMPTYPE_MSZIP (0x0001)
|
|
55
|
+
#define cffoldCOMPTYPE_QUANTUM (0x0002)
|
|
56
|
+
#define cffoldCOMPTYPE_LZX (0x0003)
|
|
57
|
+
#define cfheadPREV_CABINET (0x0001)
|
|
58
|
+
#define cfheadNEXT_CABINET (0x0002)
|
|
59
|
+
#define cfheadRESERVE_PRESENT (0x0004)
|
|
60
|
+
#define cffileCONTINUED_FROM_PREV (0xFFFD)
|
|
61
|
+
#define cffileCONTINUED_TO_NEXT (0xFFFE)
|
|
62
|
+
#define cffileCONTINUED_PREV_AND_NEXT (0xFFFF)
|
|
63
|
+
|
|
64
|
+
/* CAB data blocks are <= 32768 bytes in uncompressed form. Uncompressed
|
|
65
|
+
* blocks have zero growth. MSZIP guarantees that it won't grow above
|
|
66
|
+
* uncompressed size by more than 12 bytes. LZX guarantees it won't grow
|
|
67
|
+
* more than 6144 bytes. Quantum has no documentation, but the largest
|
|
68
|
+
* block seen in the wild is 337 bytes above uncompressed size.
|
|
69
|
+
*/
|
|
70
|
+
#define CAB_BLOCKMAX (32768)
|
|
71
|
+
#define CAB_INPUTMAX (CAB_BLOCKMAX+6144)
|
|
72
|
+
|
|
73
|
+
/* CAB compression definitions */
|
|
74
|
+
|
|
75
|
+
struct mscab_compressor_p {
|
|
76
|
+
struct mscab_compressor base;
|
|
77
|
+
struct mspack_system *system;
|
|
78
|
+
/* todo */
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/* CAB decompression definitions */
|
|
82
|
+
|
|
83
|
+
struct mscabd_decompress_state {
|
|
84
|
+
struct mscabd_folder_p *folder; /* current folder we're extracting from */
|
|
85
|
+
struct mscabd_folder_data *data; /* current folder split we're in */
|
|
86
|
+
unsigned int offset; /* uncompressed offset within folder */
|
|
87
|
+
unsigned int block; /* which block are we decompressing? */
|
|
88
|
+
struct mspack_system sys; /* special I/O code for decompressor */
|
|
89
|
+
int comp_type; /* type of compression used by folder */
|
|
90
|
+
int (*decompress)(void *, off_t); /* decompressor code */
|
|
91
|
+
void *state; /* decompressor state */
|
|
92
|
+
struct mscabd_cabinet_p *incab; /* cabinet where input data comes from */
|
|
93
|
+
struct mspack_file *infh; /* input file handle */
|
|
94
|
+
struct mspack_file *outfh; /* output file handle */
|
|
95
|
+
unsigned char *i_ptr, *i_end; /* input data consumed, end */
|
|
96
|
+
unsigned char input[CAB_INPUTMAX]; /* one input block of data */
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
struct mscab_decompressor_p {
|
|
100
|
+
struct mscab_decompressor base;
|
|
101
|
+
struct mscabd_decompress_state *d;
|
|
102
|
+
struct mspack_system *system;
|
|
103
|
+
int param[3]; /* !!! MATCH THIS TO NUM OF PARAMS IN MSPACK.H !!! */
|
|
104
|
+
int error, read_error;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
struct mscabd_cabinet_p {
|
|
108
|
+
struct mscabd_cabinet base;
|
|
109
|
+
off_t blocks_off; /* offset to data blocks */
|
|
110
|
+
int block_resv; /* reserved space in data blocks */
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/* there is one of these for every cabinet a folder spans */
|
|
114
|
+
struct mscabd_folder_data {
|
|
115
|
+
struct mscabd_folder_data *next;
|
|
116
|
+
struct mscabd_cabinet_p *cab; /* cabinet file of this folder span */
|
|
117
|
+
off_t offset; /* cabinet offset of first datablock */
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
struct mscabd_folder_p {
|
|
121
|
+
struct mscabd_folder base;
|
|
122
|
+
struct mscabd_folder_data data; /* where are the data blocks? */
|
|
123
|
+
struct mscabd_file *merge_prev; /* first file needing backwards merge */
|
|
124
|
+
struct mscabd_file *merge_next; /* first file needing forwards merge */
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
#endif
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/* This file is part of libmspack.
|
|
2
|
+
* (C) 2003-2004 Stuart Caie.
|
|
3
|
+
*
|
|
4
|
+
* libmspack is free software; you can redistribute it and/or modify it under
|
|
5
|
+
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
|
|
6
|
+
*
|
|
7
|
+
* For further details, see the file COPYING.LIB distributed with libmspack
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/* CAB compression implementation */
|
|
11
|
+
|
|
12
|
+
#include <system.h>
|
|
13
|
+
#include <cab.h>
|
|
14
|
+
|
|
15
|
+
struct mscab_compressor *
|
|
16
|
+
mspack_create_cab_compressor(struct mspack_system *sys)
|
|
17
|
+
{
|
|
18
|
+
/* todo */
|
|
19
|
+
return NULL;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
void mspack_destroy_cab_compressor(struct mscab_compressor *self) {
|
|
23
|
+
/* todo */
|
|
24
|
+
}
|