libmspack 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +5 -0
  4. data/.yardopts +1 -0
  5. data/Gemfile +4 -0
  6. data/README.md +75 -0
  7. data/Rakefile +22 -0
  8. data/UNLICENSE +24 -0
  9. data/ext/Rakefile +16 -0
  10. data/ext/i386-windows/libmspack.dll +0 -0
  11. data/ext/libmspack/AUTHORS +12 -0
  12. data/ext/libmspack/COPYING.LIB +504 -0
  13. data/ext/libmspack/ChangeLog +491 -0
  14. data/ext/libmspack/Makefile.am +100 -0
  15. data/ext/libmspack/NEWS +0 -0
  16. data/ext/libmspack/README +130 -0
  17. data/ext/libmspack/TODO +8 -0
  18. data/ext/libmspack/cleanup.sh +9 -0
  19. data/ext/libmspack/configure.ac +50 -0
  20. data/ext/libmspack/debian/changelog +6 -0
  21. data/ext/libmspack/debian/control +14 -0
  22. data/ext/libmspack/debian/rules +101 -0
  23. data/ext/libmspack/doc/Doxyfile.in +22 -0
  24. data/ext/libmspack/doc/Makefile.in +14 -0
  25. data/ext/libmspack/doc/szdd_kwaj_format.html +331 -0
  26. data/ext/libmspack/libmspack.pc.in +10 -0
  27. data/ext/libmspack/mspack/cab.h +127 -0
  28. data/ext/libmspack/mspack/cabc.c +24 -0
  29. data/ext/libmspack/mspack/cabd.c +1444 -0
  30. data/ext/libmspack/mspack/chm.h +122 -0
  31. data/ext/libmspack/mspack/chmc.c +24 -0
  32. data/ext/libmspack/mspack/chmd.c +1392 -0
  33. data/ext/libmspack/mspack/crc32.c +95 -0
  34. data/ext/libmspack/mspack/crc32.h +17 -0
  35. data/ext/libmspack/mspack/des.h +15 -0
  36. data/ext/libmspack/mspack/hlp.h +33 -0
  37. data/ext/libmspack/mspack/hlpc.c +24 -0
  38. data/ext/libmspack/mspack/hlpd.c +24 -0
  39. data/ext/libmspack/mspack/kwaj.h +118 -0
  40. data/ext/libmspack/mspack/kwajc.c +24 -0
  41. data/ext/libmspack/mspack/kwajd.c +561 -0
  42. data/ext/libmspack/mspack/lit.h +35 -0
  43. data/ext/libmspack/mspack/litc.c +24 -0
  44. data/ext/libmspack/mspack/litd.c +24 -0
  45. data/ext/libmspack/mspack/lzss.h +66 -0
  46. data/ext/libmspack/mspack/lzssd.c +93 -0
  47. data/ext/libmspack/mspack/lzx.h +221 -0
  48. data/ext/libmspack/mspack/lzxc.c +18 -0
  49. data/ext/libmspack/mspack/lzxd.c +895 -0
  50. data/ext/libmspack/mspack/mspack.def +28 -0
  51. data/ext/libmspack/mspack/mspack.h +2353 -0
  52. data/ext/libmspack/mspack/mszip.h +126 -0
  53. data/ext/libmspack/mspack/mszipc.c +18 -0
  54. data/ext/libmspack/mspack/mszipd.c +514 -0
  55. data/ext/libmspack/mspack/oab.h +60 -0
  56. data/ext/libmspack/mspack/oabc.c +24 -0
  57. data/ext/libmspack/mspack/oabd.c +408 -0
  58. data/ext/libmspack/mspack/qtm.h +128 -0
  59. data/ext/libmspack/mspack/qtmc.c +18 -0
  60. data/ext/libmspack/mspack/qtmd.c +489 -0
  61. data/ext/libmspack/mspack/readbits.h +207 -0
  62. data/ext/libmspack/mspack/readhuff.h +173 -0
  63. data/ext/libmspack/mspack/sha.h +15 -0
  64. data/ext/libmspack/mspack/system.c +239 -0
  65. data/ext/libmspack/mspack/system.h +124 -0
  66. data/ext/libmspack/mspack/szdd.h +39 -0
  67. data/ext/libmspack/mspack/szddc.c +24 -0
  68. data/ext/libmspack/mspack/szddd.c +247 -0
  69. data/ext/libmspack/rebuild.sh +8 -0
  70. data/ext/libmspack/test/cabd_c10 +19 -0
  71. data/ext/libmspack/test/cabd_compare +34 -0
  72. data/ext/libmspack/test/cabd_md5.c +161 -0
  73. data/ext/libmspack/test/cabd_memory.c +179 -0
  74. data/ext/libmspack/test/cabd_test.c +386 -0
  75. data/ext/libmspack/test/cabrip.c +81 -0
  76. data/ext/libmspack/test/chmd_compare +38 -0
  77. data/ext/libmspack/test/chmd_find.c +95 -0
  78. data/ext/libmspack/test/chmd_md5.c +67 -0
  79. data/ext/libmspack/test/chmd_order.c +144 -0
  80. data/ext/libmspack/test/chminfo.c +284 -0
  81. data/ext/libmspack/test/chmx.c +216 -0
  82. data/ext/libmspack/test/error.h +22 -0
  83. data/ext/libmspack/test/expand.c +79 -0
  84. data/ext/libmspack/test/md5.c +457 -0
  85. data/ext/libmspack/test/md5.h +165 -0
  86. data/ext/libmspack/test/md5_fh.h +123 -0
  87. data/ext/libmspack/test/msdecompile_md5 +24 -0
  88. data/ext/libmspack/test/msexpand_md5 +39 -0
  89. data/ext/libmspack/test/multifh.c +435 -0
  90. data/ext/libmspack/test/oabx.c +41 -0
  91. data/ext/libmspack/test/test_files/cabd/1.pl +84 -0
  92. data/ext/libmspack/test/test_files/cabd/2.pl +75 -0
  93. data/ext/libmspack/test/test_files/cabd/bad_folderindex.cab +0 -0
  94. data/ext/libmspack/test/test_files/cabd/bad_nofiles.cab +0 -0
  95. data/ext/libmspack/test/test_files/cabd/bad_nofolders.cab +0 -0
  96. data/ext/libmspack/test/test_files/cabd/bad_signature.cab +0 -0
  97. data/ext/libmspack/test/test_files/cabd/multi_basic_pt1.cab +0 -0
  98. data/ext/libmspack/test/test_files/cabd/multi_basic_pt2.cab +0 -0
  99. data/ext/libmspack/test/test_files/cabd/multi_basic_pt3.cab +0 -0
  100. data/ext/libmspack/test/test_files/cabd/multi_basic_pt4.cab +0 -0
  101. data/ext/libmspack/test/test_files/cabd/multi_basic_pt5.cab +0 -0
  102. data/ext/libmspack/test/test_files/cabd/normal_255c_filename.cab +0 -0
  103. data/ext/libmspack/test/test_files/cabd/normal_2files_1folder.cab +0 -0
  104. data/ext/libmspack/test/test_files/cabd/partial_nodata.cab +0 -0
  105. data/ext/libmspack/test/test_files/cabd/partial_nofiles.cab +0 -0
  106. data/ext/libmspack/test/test_files/cabd/partial_nofolder.cab +0 -0
  107. data/ext/libmspack/test/test_files/cabd/partial_shortextheader.cab +0 -0
  108. data/ext/libmspack/test/test_files/cabd/partial_shortfile1.cab +0 -0
  109. data/ext/libmspack/test/test_files/cabd/partial_shortfile2.cab +0 -0
  110. data/ext/libmspack/test/test_files/cabd/partial_shortfolder.cab +0 -0
  111. data/ext/libmspack/test/test_files/cabd/partial_shortheader.cab +0 -0
  112. data/ext/libmspack/test/test_files/cabd/partial_str_nofname.cab +0 -0
  113. data/ext/libmspack/test/test_files/cabd/partial_str_noninfo.cab +0 -0
  114. data/ext/libmspack/test/test_files/cabd/partial_str_nonname.cab +0 -0
  115. data/ext/libmspack/test/test_files/cabd/partial_str_nopinfo.cab +0 -0
  116. data/ext/libmspack/test/test_files/cabd/partial_str_nopname.cab +0 -0
  117. data/ext/libmspack/test/test_files/cabd/partial_str_shortfname.cab +0 -0
  118. data/ext/libmspack/test/test_files/cabd/partial_str_shortninfo.cab +0 -0
  119. data/ext/libmspack/test/test_files/cabd/partial_str_shortnname.cab +0 -0
  120. data/ext/libmspack/test/test_files/cabd/partial_str_shortpinfo.cab +0 -0
  121. data/ext/libmspack/test/test_files/cabd/partial_str_shortpname.cab +0 -0
  122. data/ext/libmspack/test/test_files/cabd/reserve_---.cab +0 -0
  123. data/ext/libmspack/test/test_files/cabd/reserve_--D.cab +0 -0
  124. data/ext/libmspack/test/test_files/cabd/reserve_-F-.cab +0 -0
  125. data/ext/libmspack/test/test_files/cabd/reserve_-FD.cab +0 -0
  126. data/ext/libmspack/test/test_files/cabd/reserve_H--.cab +0 -0
  127. data/ext/libmspack/test/test_files/cabd/reserve_H-D.cab +0 -0
  128. data/ext/libmspack/test/test_files/cabd/reserve_HF-.cab +0 -0
  129. data/ext/libmspack/test/test_files/cabd/reserve_HFD.cab +0 -0
  130. data/ext/libmspack/test/test_files/cabd/search_basic.cab +0 -0
  131. data/ext/libmspack/test/test_files/cabd/search_tricky1.cab +0 -0
  132. data/ext/libmspack/winbuild.sh +26 -0
  133. data/ext/x86_64-windows/libmspack.dll +0 -0
  134. data/lib/libmspack/constants.rb +9 -0
  135. data/lib/libmspack/exceptions.rb +12 -0
  136. data/lib/libmspack/mscab.rb +722 -0
  137. data/lib/libmspack/mschm.rb +301 -0
  138. data/lib/libmspack/mshlp.rb +15 -0
  139. data/lib/libmspack/mskwaj.rb +124 -0
  140. data/lib/libmspack/mslit.rb +18 -0
  141. data/lib/libmspack/msoab.rb +36 -0
  142. data/lib/libmspack/mspack.rb +208 -0
  143. data/lib/libmspack/msszdd.rb +81 -0
  144. data/lib/libmspack/system.rb +84 -0
  145. data/lib/libmspack/version.rb +4 -0
  146. data/lib/libmspack.rb +121 -0
  147. data/libmspack.gemspec +33 -0
  148. data/spec/libmspack_spec.rb +26 -0
  149. data/spec/spec_helper.rb +5 -0
  150. metadata +309 -0
@@ -0,0 +1,331 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="eng">
5
+ <head>
6
+ <style type="text/css">
7
+ dt {
8
+ font-weight:bold;
9
+ }
10
+ pre {
11
+ background-color:#F9F9F9;
12
+ border:1px dashed #2F6FAB;
13
+ color:black;
14
+ padding:1em;
15
+ }
16
+ table.wikitable {
17
+ background:none repeat scroll 0 0 #F9F9F9;
18
+ border:1px solid #AAAAAA;
19
+ border-collapse:collapse;
20
+ margin:1em 1em 1em 0;
21
+ }
22
+ .wikitable th, .wikitable td {
23
+ border:1px solid #AAAAAA;
24
+ padding:0.2em;
25
+ }
26
+ .wikitable th {
27
+ background:none repeat scroll 0 0 #F2F2F2;
28
+ text-align:center;
29
+ }
30
+ .wikitable caption {
31
+ font-weight:bold;
32
+ }
33
+ .c.source-c .de1, .c.source-c .de2 {font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;}
34
+ .c.source-c {font-family:monospace;}
35
+ .c.source-c .imp {font-weight: bold; color: red;}
36
+ .c.source-c li, .c.source-c .li1 {font-weight: normal; vertical-align:top;}
37
+ .c.source-c .ln {width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;}
38
+ .c.source-c .li2 {font-weight: bold; vertical-align:top;}
39
+ .c.source-c .kw1 {color: #b1b100;}
40
+ .c.source-c .kw2 {color: #000000; font-weight: bold;}
41
+ .c.source-c .kw3 {color: #000066;}
42
+ .c.source-c .kw4 {color: #993333;}
43
+ .c.source-c .co1 {color: #666666; font-style: italic;}
44
+ .c.source-c .co2 {color: #339933;}
45
+ .c.source-c .coMULTI {color: #808080; font-style: italic;}
46
+ .c.source-c .es0 {color: #000099; font-weight: bold;}
47
+ .c.source-c .es1 {color: #000099; font-weight: bold;}
48
+ .c.source-c .es2 {color: #660099; font-weight: bold;}
49
+ .c.source-c .es3 {color: #660099; font-weight: bold;}
50
+ .c.source-c .es4 {color: #660099; font-weight: bold;}
51
+ .c.source-c .es5 {color: #006699; font-weight: bold;}
52
+ .c.source-c .br0 {color: #009900;}
53
+ .c.source-c .sy0 {color: #339933;}
54
+ .c.source-c .st0 {color: #ff0000;}
55
+ .c.source-c .nu0 {color: #0000dd;}
56
+ .c.source-c .nu6 {color: #208080;}
57
+ .c.source-c .nu8 {color: #208080;}
58
+ .c.source-c .nu12 {color: #208080;}
59
+ .c.source-c .nu16 {color:#800080;}
60
+ .c.source-c .nu17 {color:#800080;}
61
+ .c.source-c .nu18 {color:#800080;}
62
+ .c.source-c .nu19 {color:#800080;}
63
+ .c.source-c .me1 {color: #202020;}
64
+ .c.source-c .me2 {color: #202020;}
65
+ .c.source-c .ln-xtra, .c.source-c li.ln-xtra, .c.source-c div.ln-xtra {background-color: #ffc;}
66
+ .c.source-c span.xtra { display:block; }
67
+ </style>
68
+ <meta name="author" content="Stuart Caie" />
69
+ <title>COMPRESS.EXE file formats: SZDD and KWAJ</title>
70
+ </head>
71
+ <body>
72
+ <h1>COMPRESS.EXE file formats: SZDD and KWAJ</h1>
73
+
74
+ <p>This document describes the <b>SZDD</b> and <b>KWAJ</b> file
75
+ formats which are implemented in the MS-DOS commands
76
+ <tt>COMPRESS.EXE</tt> and <tt>EXPAND.EXE</tt>.</p>
77
+
78
+ <p>Both formats compress a single file to another single file,
79
+ replacing the last character in the filename with an underscore or
80
+ dollar character, e.g. <tt>README.TXT</tt> becomes <tt>README.TX_</tt>
81
+ or <tt>README.TX$</tt>.</p>
82
+
83
+ <a name="SZDD_file_format"><h2>SZDD file format</h2></a>
84
+
85
+ <p>An SZDD file begins with this fixed header:</p>
86
+
87
+ <table class="wikitable">
88
+ <caption>SZDD header format</caption>
89
+ <tr><th>Offset</th><th>Length</th><th>Description</th></tr>
90
+ <tr><td>0x00</td><td>8</td><td>"SZDD" signature: 0x53,0x5A,0x44,0x44,0x88,0xF0,0x27,0x33</td></tr>
91
+ <tr><td>0x08</td><td>1</td><td>Compression mode: only "A" (0x41) is valid here</td></tr>
92
+ <tr><td>0x09</td><td>1</td><td>The character missing from the end of the filename (0=unknown)</td></tr>
93
+ <tr><td>0x0A</td><td>4</td><td>The integer length of the file when unpacked</td></tr>
94
+ </table>
95
+
96
+ <p>The header is immediately followed by the compressed data. The
97
+ following pseudocode explains how to unpack this data; it's a form of
98
+ the LZSS algorithm.</p>
99
+
100
+ <table class="wikitable">
101
+ <caption>SZDD decompression pseudocode</caption>
102
+ <tr><td>
103
+ <div dir="ltr" style="text-align: left;"><div class="c source-c" style="font-family:monospace;"><pre class="de1"><span class="kw4">char</span> window<span class="br0">&#91;</span><span class="nu0">4096</span><span class="br0">&#93;</span><span class="sy0">;</span>
104
+ <span class="kw4">int</span> pos <span class="sy0">=</span> <span class="nu0">4096</span> <span class="sy0">-</span> <span class="nu0">16</span><span class="sy0">;</span>
105
+ memset<span class="br0">&#40;</span>window<span class="sy0">,</span> <span class="nu12">0x20</span><span class="sy0">,</span> <span class="nu0">4096</span><span class="br0">&#41;</span><span class="sy0">;</span> <span class="coMULTI">/* window initially full of spaces */</span>
106
+ <span class="kw1">for</span> <span class="br0">&#40;</span><span class="sy0">;;</span><span class="br0">&#41;</span> <span class="br0">&#123;</span>
107
+ <span class="kw4">int</span> control <span class="sy0">=</span> GETBYTE<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span>
108
+ <span class="kw1">if</span> <span class="br0">&#40;</span>control <span class="sy0">==</span> EOF<span class="br0">&#41;</span> <span class="kw2">break</span><span class="sy0">;</span> <span class="coMULTI">/* exit if no more to read */</span>
109
+ <span class="kw1">for</span> <span class="br0">&#40;</span><span class="kw4">int</span> cbit <span class="sy0">=</span> <span class="nu12">0x01</span><span class="sy0">;</span> cbit <span class="sy0">&amp;</span> <span class="nu12">0xFF</span><span class="sy0">;</span> cbit <span class="sy0">&lt;&lt;=</span> <span class="nu0">1</span><span class="br0">&#41;</span> <span class="br0">&#123;</span>
110
+ <span class="kw1">if</span> <span class="br0">&#40;</span>control <span class="sy0">&amp;</span> cbit<span class="br0">&#41;</span> <span class="br0">&#123;</span>
111
+ <span class="coMULTI">/* literal */</span>
112
+ PUTBYTE<span class="br0">&#40;</span>window<span class="br0">&#91;</span>pos<span class="sy0">++</span><span class="br0">&#93;</span> <span class="sy0">=</span> GETBYTE<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="br0">&#41;</span><span class="sy0">;</span>
113
+ <span class="br0">&#125;</span>
114
+ <span class="kw1">else</span> <span class="br0">&#123;</span>
115
+ <span class="coMULTI">/* match */</span>
116
+ <span class="kw4">int</span> matchpos <span class="sy0">=</span> GETBYTE<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span>
117
+ <span class="kw4">int</span> matchlen <span class="sy0">=</span> GETBYTE<span class="br0">&#40;</span><span class="br0">&#41;</span><span class="sy0">;</span>
118
+ matchpos <span class="sy0">|=</span> <span class="br0">&#40;</span>matchlen <span class="sy0">&amp;</span> <span class="nu12">0xF0</span><span class="br0">&#41;</span> <span class="sy0">&lt;&lt;</span> <span class="nu0">4</span><span class="sy0">;</span>
119
+ matchlen <span class="sy0">=</span> <span class="br0">&#40;</span>matchlen <span class="sy0">&amp;</span> <span class="nu12">0x0F</span><span class="br0">&#41;</span> <span class="sy0">+</span> <span class="nu0">3</span><span class="sy0">;</span>
120
+ <span class="kw1">while</span> <span class="br0">&#40;</span>matchlen<span class="sy0">--</span><span class="br0">&#41;</span> <span class="br0">&#123;</span>
121
+ PUTBYTE<span class="br0">&#40;</span>window<span class="br0">&#91;</span>pos<span class="sy0">++</span><span class="br0">&#93;</span> <span class="sy0">=</span> window<span class="br0">&#91;</span>matchpos<span class="sy0">++</span><span class="br0">&#93;</span><span class="br0">&#41;</span><span class="sy0">;</span>
122
+ pos <span class="sy0">&amp;=</span> <span class="nu0">4095</span><span class="sy0">;</span> matchpos <span class="sy0">&amp;=</span> <span class="nu0">4095</span><span class="sy0">;</span>
123
+ <span class="br0">&#125;</span>
124
+ <span class="br0">&#125;</span>
125
+ <span class="br0">&#125;</span>
126
+ <span class="br0">&#125;</span></pre></div></div>
127
+ </td></tr></table>
128
+
129
+ <p>There is also a variant SZDD format seen in the installation
130
+ package for QBasic 4.5, so I call it the QBasic variant. It has a
131
+ different header and the <tt>pos</tt> variable in the pseudocode above
132
+ is set to <tt>4096-18</tt> instead of <tt>4096-16</tt>.</p>
133
+
134
+ <table class="wikitable">
135
+ <caption>QBasic SZDD variant header format</caption>
136
+ <tr><th>Offset</th><th>Length</th><th>Description</th></tr>
137
+ <tr><td>0x00</td><td>8</td><td>"SZ" signature: 0x53,0x5A,0x20,0x88,0xF0,0x27,0x33,0xD1</td></tr>
138
+ <tr><td>0x08</td><td>4</td><td>The integer length of the file when unpacked</td></tr></table>
139
+
140
+ <a name="KWAJ_file_format"><h2>KWAJ file format</h2></a>
141
+
142
+ <p>A KWAJ file begins with this fixed header:</p>
143
+
144
+ <table class="wikitable">
145
+ <caption>KWAJ header format</caption>
146
+ <tr><th>Offset</th><th>Length</th><th>Description</th></tr>
147
+ <tr><td>0x00</td><td>8</td><td>"KWAJ" signature: 0x4B,0x57,0x41,0x4A,0x88,0xF0,0x27,0xD1</td></tr>
148
+ <tr><td>0x08</td><td>2</td><td>compression method (0-4)</td></tr>
149
+ <tr><td>0x0A</td><td>2</td><td>file offset of compressed data</td></tr>
150
+ <tr><td>0x0C</td><td>2</td><td>header flags to mark header extensions</td></tr>
151
+ </table>
152
+
153
+ <a name="Compression_methods"><h3>Compression methods</h3></a>
154
+
155
+ <p>The "compression method" field indicates the type of data
156
+ compression used:</p>
157
+
158
+ <ol start="0">
159
+ <li>No compression</li>
160
+ <li>No compression, data is XORed with byte 0xFF</li>
161
+ <li>The same compression method as regular SZDD</li>
162
+ <li>LZ + Huffman "Jeff Johnson" compression</li>
163
+ <li>MS-ZIP</li>
164
+ </ol>
165
+
166
+ <a name="Header_extensions"><h3>Header extensions</h3></a>
167
+
168
+ <p>Header extensions immediately follow the header.</p>
169
+
170
+ <p>If you don't care about the header extensions, use the file offset
171
+ to skip to the compressed data.</p>
172
+
173
+ <p>The header extensions appear in this order:</p>
174
+
175
+ <dl>
176
+ <dt>When header flags bit 0 is set</dt><dd>4 bytes: decompressed length of file</dd>
177
+ <dt>When header flags bit 1 is set</dt><dd>2 bytes: unknown purpose</dd>
178
+ <dt>When header flags bit 2 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (unknown purpose) data</dd>
179
+ <dt>When header flags bit 3 is set</dt><dd>1-9 bytes: null-terminated string with max length 8: file name</dd>
180
+ <dt>When header flags bit 4 is set</dt><dd>1-4 bytes: null-terminated string with max length 3: file extension</dd>
181
+ <dt>When header flags bit 5 is set</dt><dd>2 bytes: length of data, followed by that many bytes of (arbitrary text) data</dd>
182
+ </dl>
183
+
184
+ <a name="KWAJ_compression_method_3"><h3>KWAJ compression method 3</h3></a>
185
+
186
+ <p>Compression method 3 is unique to the KWAJ format. It's an
187
+ LZ+Huffman algorithm created by Jeff Johnson.</p>
188
+
189
+ <p>Bits are always read from MSB to LSB, one byte at a time.</p>
190
+
191
+ <p>There are three parts:</p>
192
+
193
+ <ol>
194
+ <li>The data starts off with 6 nybbles; 4 bits each. Each nybble is
195
+ between 0-3 and is the encoding type of the 5 huffman length lists to
196
+ follow. The 6th nybble is just padding.</li>
197
+ <li>Then follow 5 huffman code length lists.</li>
198
+ <li>Then follows the compressed data, which is a mix of huffman
199
+ symbols and raw bits.</li>
200
+ </ol>
201
+
202
+ <a name="Huffman_code_length_lists"><h4>Huffman code length lists</h4></a>
203
+
204
+ <p>KWAJ uses 5 huffman trees. They always have the same number of
205
+ symbols in them. They are, in order:</p>
206
+
207
+ <ol>
208
+ <li>16 symbol tree (0-15) to store match run lengths (MATCHLEN)</li>
209
+ <li>16 symbol tree (0-15) to store match run lengths immediately following a short literal run (MATCHLEN2)</li>
210
+ <li>32 symbol tree (0-31) to store literal run lengths (LITLEN)</li>
211
+ <li>64 symbol tree (0-63) to store the upper 6 bits of match distances (OFFSET)</li>
212
+ <li>256 symbol tree (0-255) to store literals (LITERAL)</li>
213
+ </ol>
214
+
215
+ <p>Canonical huffman codes are used, which means you simply need to
216
+ know how many symbols in each huffman tree (given above), and how long
217
+ each huffman symbol is</p>
218
+
219
+ <p>How the symbol lengths are encoded depends on the encoding type, as
220
+ given by the 6 nybbles at the start of the compressed data.</p>
221
+
222
+ <p>Symbol lengths are read in ascending order, and the number of
223
+ symbols to read is implied by which tree you're defining.</p>
224
+
225
+ <dl>
226
+ <dt>Huffman code length list, encoding type 0</dt>
227
+ <dd>All symbol have the same length, implied by the number of symbols in the tree:
228
+ <ul>
229
+ <li>16 symbols -&gt; all symbols are length 4</li>
230
+ <li>32 symbols -&gt; all symbols are length 5</li>
231
+ <li>64 symbols -&gt; all symbols are length 6</li>
232
+ <li>256 symbols -&gt; all symbols are length 8</li>
233
+ </ul>
234
+ </dd>
235
+ <dd>You don't need to read anything.</dd>
236
+ </dl>
237
+
238
+ <dl>
239
+ <dt>Huffman code length list, encoding type 1</dt>
240
+ <dd>A run-length encoding is used:
241
+ <ul>
242
+ <li>read 4 bits for the first symbol length (0-15)</li>
243
+ <li>LOOP:
244
+ <ul>
245
+ <li>read 1 bit == 0 if symbol length is the same as the previous, OTHERWISE:</li>
246
+ <li>read 1 bit == 0 if symbol length is previous + 1, OTHERWISE:</li>
247
+ <li>read 4 bits for symbol length (0-15)</li>
248
+ </ul>
249
+ </li>
250
+ </ul>
251
+ </dd>
252
+ </dl>
253
+
254
+ <dl>
255
+ <dt>Huffman code length list, encoding type 2</dt>
256
+ <dd>Another run-length encoding is used:
257
+ <ul>
258
+ <li>read 4 bits for the first symbol length (0-15)</li>
259
+ <li>LOOP:
260
+ <ul>
261
+ <li> read 2 bits as selector (0-3):
262
+ <ul>
263
+ <li> selector == 3: read 4 bits for symbol length, OTHERWISE:</li>
264
+ <li> symbol length is previous symbol + (selector-1), i.e. -1, 0 or +1</li>
265
+ </ul>
266
+ </li>
267
+ </ul>
268
+ </li>
269
+ </ul>
270
+ </dd>
271
+ </dl>
272
+
273
+ <dl>
274
+ <dt>Huffman code length list, encoding type 3</dt>
275
+ <dd>There is no compression. Read 4 bits per symbol (0-15).</dd>
276
+ </dl>
277
+
278
+ <a name="Compressed_data"><h4>Compressed data</h4></a>
279
+
280
+ <p>At this point, the compressed data begins.</p>
281
+
282
+ <p>We have a 4096 byte ring buffer, initially filled with byte 0x20
283
+ (ASCII space). Unlike the SZDD format, the starting position in the
284
+ buffer is irrelevant, as match positions are stored relative to the
285
+ current position in the window, not as absolute positions in the
286
+ window.</p>
287
+
288
+ <p>Pseudo-code:</p>
289
+ <pre>
290
+ ring buffer position = 4096-17
291
+ selected table = MATCHLEN
292
+ LOOP:
293
+ code = read huffman code using selected table (MATCHLEN or MATCHLEN2)
294
+ if EOF reached, exit loop
295
+ if code &gt; 0, this is a match:
296
+ match length = code + 2
297
+ x = read huffman code using OFFSET table
298
+ y = read 6 bits
299
+ match offset = current ring buffer position - (x&lt;&lt;6 | y)
300
+ copy match as output and into the ring buffer
301
+ selected table = MATCHLEN
302
+ if code == 0, this is a run of literals:
303
+ x = read huffman code using LITLEN table
304
+ if x != 31, selected table = MATCHLEN2
305
+ read {x+1} literals using LITERAL huffman table, copy as output and into the ring buffer
306
+ </pre>
307
+
308
+ <a name="MSZIP"><h2>MS-ZIP</h2></a>
309
+
310
+ KWAJ type 4 compression is called MS-ZIP, because it is almost
311
+ identical to the MS-ZIP compression found in Microsoft Cabinet files.
312
+
313
+ Each 32768 bytes of data is compressed independently using Phil
314
+ Katz's DEFLATE algorithm. However, the history window is shared
315
+ between blocks, so they must be unpacked in order.
316
+ The format of each block is as follows:
317
+
318
+ <table class="wikitable">
319
+ <caption>KWAJ MS-ZIP block format</caption>
320
+ <tr><th>Offset</th><th>Length</th><th>Description</th></tr>
321
+ <tr><td>0</td><td>2</td><td>Compressed length of this block (n).
322
+ Stored in Intel byte order.
323
+ Doesn't include these two bytes.</td></tr>
324
+ <tr><td>2</td><td>2</td><td>"CK" in ASCII (0x43, 0x4B)</td></tr>
325
+ <tr><td>4</td><td>n-2</td><td>Data compressed in DEFLATE format</td></tr>
326
+ </table>
327
+
328
+ The final block will unpack to 1-32768 bytes. It will be followed by two
329
+ zero bytes.
330
+
331
+ </body></html>
@@ -0,0 +1,10 @@
1
+ prefix=@prefix@
2
+ exec_prefix=@exec_prefix@
3
+ libdir=@libdir@
4
+ includedir=@includedir@
5
+
6
+ Name: @PACKAGE@
7
+ Description: Compressors and decompressors for Microsoft formats
8
+ Version: @VERSION@
9
+ Libs: -L${libdir} -lmspack
10
+ Cflags: -I${includedir}
@@ -0,0 +1,127 @@
1
+ /* This file is part of libmspack.
2
+ * (C) 2003-2004 Stuart Caie.
3
+ *
4
+ * libmspack is free software; you can redistribute it and/or modify it under
5
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
6
+ *
7
+ * For further details, see the file COPYING.LIB distributed with libmspack
8
+ */
9
+
10
+ #ifndef MSPACK_CAB_H
11
+ #define MSPACK_CAB_H 1
12
+
13
+ #include <mszip.h>
14
+ #include <qtm.h>
15
+ #include <lzx.h>
16
+
17
+ /* generic CAB definitions */
18
+
19
+ /* structure offsets */
20
+ #define cfhead_Signature (0x00)
21
+ #define cfhead_CabinetSize (0x08)
22
+ #define cfhead_FileOffset (0x10)
23
+ #define cfhead_MinorVersion (0x18)
24
+ #define cfhead_MajorVersion (0x19)
25
+ #define cfhead_NumFolders (0x1A)
26
+ #define cfhead_NumFiles (0x1C)
27
+ #define cfhead_Flags (0x1E)
28
+ #define cfhead_SetID (0x20)
29
+ #define cfhead_CabinetIndex (0x22)
30
+ #define cfhead_SIZEOF (0x24)
31
+ #define cfheadext_HeaderReserved (0x00)
32
+ #define cfheadext_FolderReserved (0x02)
33
+ #define cfheadext_DataReserved (0x03)
34
+ #define cfheadext_SIZEOF (0x04)
35
+ #define cffold_DataOffset (0x00)
36
+ #define cffold_NumBlocks (0x04)
37
+ #define cffold_CompType (0x06)
38
+ #define cffold_SIZEOF (0x08)
39
+ #define cffile_UncompressedSize (0x00)
40
+ #define cffile_FolderOffset (0x04)
41
+ #define cffile_FolderIndex (0x08)
42
+ #define cffile_Date (0x0A)
43
+ #define cffile_Time (0x0C)
44
+ #define cffile_Attribs (0x0E)
45
+ #define cffile_SIZEOF (0x10)
46
+ #define cfdata_CheckSum (0x00)
47
+ #define cfdata_CompressedSize (0x04)
48
+ #define cfdata_UncompressedSize (0x06)
49
+ #define cfdata_SIZEOF (0x08)
50
+
51
+ /* flags */
52
+ #define cffoldCOMPTYPE_MASK (0x000f)
53
+ #define cffoldCOMPTYPE_NONE (0x0000)
54
+ #define cffoldCOMPTYPE_MSZIP (0x0001)
55
+ #define cffoldCOMPTYPE_QUANTUM (0x0002)
56
+ #define cffoldCOMPTYPE_LZX (0x0003)
57
+ #define cfheadPREV_CABINET (0x0001)
58
+ #define cfheadNEXT_CABINET (0x0002)
59
+ #define cfheadRESERVE_PRESENT (0x0004)
60
+ #define cffileCONTINUED_FROM_PREV (0xFFFD)
61
+ #define cffileCONTINUED_TO_NEXT (0xFFFE)
62
+ #define cffileCONTINUED_PREV_AND_NEXT (0xFFFF)
63
+
64
+ /* CAB data blocks are <= 32768 bytes in uncompressed form. Uncompressed
65
+ * blocks have zero growth. MSZIP guarantees that it won't grow above
66
+ * uncompressed size by more than 12 bytes. LZX guarantees it won't grow
67
+ * more than 6144 bytes. Quantum has no documentation, but the largest
68
+ * block seen in the wild is 337 bytes above uncompressed size.
69
+ */
70
+ #define CAB_BLOCKMAX (32768)
71
+ #define CAB_INPUTMAX (CAB_BLOCKMAX+6144)
72
+
73
+ /* CAB compression definitions */
74
+
75
+ struct mscab_compressor_p {
76
+ struct mscab_compressor base;
77
+ struct mspack_system *system;
78
+ /* todo */
79
+ };
80
+
81
+ /* CAB decompression definitions */
82
+
83
+ struct mscabd_decompress_state {
84
+ struct mscabd_folder_p *folder; /* current folder we're extracting from */
85
+ struct mscabd_folder_data *data; /* current folder split we're in */
86
+ unsigned int offset; /* uncompressed offset within folder */
87
+ unsigned int block; /* which block are we decompressing? */
88
+ struct mspack_system sys; /* special I/O code for decompressor */
89
+ int comp_type; /* type of compression used by folder */
90
+ int (*decompress)(void *, off_t); /* decompressor code */
91
+ void *state; /* decompressor state */
92
+ struct mscabd_cabinet_p *incab; /* cabinet where input data comes from */
93
+ struct mspack_file *infh; /* input file handle */
94
+ struct mspack_file *outfh; /* output file handle */
95
+ unsigned char *i_ptr, *i_end; /* input data consumed, end */
96
+ unsigned char input[CAB_INPUTMAX]; /* one input block of data */
97
+ };
98
+
99
+ struct mscab_decompressor_p {
100
+ struct mscab_decompressor base;
101
+ struct mscabd_decompress_state *d;
102
+ struct mspack_system *system;
103
+ int param[3]; /* !!! MATCH THIS TO NUM OF PARAMS IN MSPACK.H !!! */
104
+ int error, read_error;
105
+ };
106
+
107
+ struct mscabd_cabinet_p {
108
+ struct mscabd_cabinet base;
109
+ off_t blocks_off; /* offset to data blocks */
110
+ int block_resv; /* reserved space in data blocks */
111
+ };
112
+
113
+ /* there is one of these for every cabinet a folder spans */
114
+ struct mscabd_folder_data {
115
+ struct mscabd_folder_data *next;
116
+ struct mscabd_cabinet_p *cab; /* cabinet file of this folder span */
117
+ off_t offset; /* cabinet offset of first datablock */
118
+ };
119
+
120
+ struct mscabd_folder_p {
121
+ struct mscabd_folder base;
122
+ struct mscabd_folder_data data; /* where are the data blocks? */
123
+ struct mscabd_file *merge_prev; /* first file needing backwards merge */
124
+ struct mscabd_file *merge_next; /* first file needing forwards merge */
125
+ };
126
+
127
+ #endif
@@ -0,0 +1,24 @@
1
+ /* This file is part of libmspack.
2
+ * (C) 2003-2004 Stuart Caie.
3
+ *
4
+ * libmspack is free software; you can redistribute it and/or modify it under
5
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
6
+ *
7
+ * For further details, see the file COPYING.LIB distributed with libmspack
8
+ */
9
+
10
+ /* CAB compression implementation */
11
+
12
+ #include <system.h>
13
+ #include <cab.h>
14
+
15
+ struct mscab_compressor *
16
+ mspack_create_cab_compressor(struct mspack_system *sys)
17
+ {
18
+ /* todo */
19
+ return NULL;
20
+ }
21
+
22
+ void mspack_destroy_cab_compressor(struct mscab_compressor *self) {
23
+ /* todo */
24
+ }