lzfse 0.0.1.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ /*
2
+ Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9
+ in the documentation and/or other materials provided with the distribution.
10
+
11
+ 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12
+ from this software without specific prior written permission.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
+ */
21
+
22
+ #ifndef LZFSE_TUNABLES_H
23
+ #define LZFSE_TUNABLES_H
24
+
25
+ // Parameters controlling details of the LZ-style match search. These values
26
+ // may be modified to fine tune compression ratio vs. encoding speed, while
27
+ // keeping the compressed format compatible with LZFSE. Note that
28
+ // modifying them will also change the amount of work space required by
29
+ // the encoder. The values here are those used in the compression library
30
+ // on iOS and OS X.
31
+
32
+ // Number of bits for hash function to produce. Should be in the range
33
+ // [10, 16]. Larger values reduce the number of false-positive found during
34
+ // the match search, and expand the history table, which may allow additional
35
+ // matches to be found, generally improving the achieved compression ratio.
36
+ // Larger values also increase the workspace size, and make it less likely
37
+ // that the history table will be present in cache, which reduces performance.
38
+ #define LZFSE_ENCODE_HASH_BITS 14
39
+
40
+ // Number of positions to store for each line in the history table. May
41
+ // be either 4 or 8. Using 8 doubles the size of the history table, which
42
+ // increases the chance of finding matches (thus improving compression ratio),
43
+ // but also increases the workspace size.
44
+ #define LZFSE_ENCODE_HASH_WIDTH 4
45
+
46
+ // Match length in bytes to cause immediate emission. Generally speaking,
47
+ // LZFSE maintains multiple candidate matches and waits to decide which match
48
+ // to emit until more information is available. When a match exceeds this
49
+ // threshold, it is emitted immediately. Thus, smaller values may give
50
+ // somewhat better performance, and larger values may give somewhat better
51
+ // compression ratios.
52
+ #define LZFSE_ENCODE_GOOD_MATCH 40
53
+
54
+ // When the source buffer is very small, LZFSE doesn't compress as well as
55
+ // some simpler algorithms. To maintain reasonable compression for these
56
+ // cases, we transition to use LZVN instead if the size of the source buffer
57
+ // is below this threshold.
58
+ #define LZFSE_ENCODE_LZVN_THRESHOLD 4096
59
+
60
+ #endif // LZFSE_TUNABLES_H
@@ -0,0 +1,711 @@
1
+ /*
2
+ Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9
+ in the documentation and/or other materials provided with the distribution.
10
+
11
+ 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12
+ from this software without specific prior written permission.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
+ */
21
+
22
+ // LZVN low-level decoder
23
+
24
+ #include "lzvn_decode_base.h"
25
+
26
+ #if !defined(HAVE_LABELS_AS_VALUES)
27
+ # if defined(__GNUC__) || defined(__clang__)
28
+ # define HAVE_LABELS_AS_VALUES 1
29
+ # else
30
+ # define HAVE_LABELS_AS_VALUES 0
31
+ # endif
32
+ #endif
33
+
34
+ // Both the source and destination buffers are represented by a pointer and
35
+ // a length; they are *always* updated in concert using this macro; however
36
+ // many bytes the pointer is advanced, the length is decremented by the same
37
+ // amount. Thus, pointer + length always points to the byte one past the end
38
+ // of the buffer.
39
+ #define PTR_LEN_INC(_pointer, _length, _increment) \
40
+ (_pointer += _increment, _length -= _increment)
41
+
42
+ // Update state with current positions and distance, corresponding to the
43
+ // beginning of an instruction in both streams
44
+ #define UPDATE_GOOD \
45
+ (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
46
+
47
+ void lzvn_decode(lzvn_decoder_state *state) {
48
+ #if HAVE_LABELS_AS_VALUES
49
+ // Jump table for all instructions
50
+ static const void *opc_tbl[256] = {
51
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
52
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
53
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
54
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
55
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
56
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
57
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
58
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
59
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
60
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
61
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
62
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
63
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
64
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
65
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
66
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
67
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
68
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
69
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
70
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
71
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
72
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
73
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
74
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
75
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
76
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
77
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
78
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
79
+ &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
80
+ &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
81
+ &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
82
+ &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
83
+ #endif
84
+ size_t src_len = state->src_end - state->src;
85
+ size_t dst_len = state->dst_end - state->dst;
86
+ if (src_len == 0 || dst_len == 0)
87
+ return; // empty buffer
88
+
89
+ const unsigned char *src_ptr = state->src;
90
+ unsigned char *dst_ptr = state->dst;
91
+ size_t D = state->d_prev;
92
+ size_t M;
93
+ size_t L;
94
+ size_t opc_len;
95
+
96
+ // Do we have a partially expanded match saved in state?
97
+ if (state->L != 0 || state->M != 0) {
98
+ L = state->L;
99
+ M = state->M;
100
+ D = state->D;
101
+ opc_len = 0; // we already skipped the op
102
+ state->L = state->M = state->D = 0;
103
+ if (M == 0)
104
+ goto copy_literal;
105
+ if (L == 0)
106
+ goto copy_match;
107
+ goto copy_literal_and_match;
108
+ }
109
+
110
+ unsigned char opc = src_ptr[0];
111
+
112
+ #if HAVE_LABELS_AS_VALUES
113
+ goto *opc_tbl[opc];
114
+ #else
115
+ for (;;) {
116
+ switch (opc) {
117
+ #endif
118
+ // ===============================================================
119
+ // These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
120
+ // literal and a match. The bulk of their implementations are shared;
121
+ // each label here only does the work of setting the opcode length (not
122
+ // including any literal bytes), and extracting the literal length, match
123
+ // length, and match distance (except in pre_d). They then jump into the
124
+ // shared implementation to actually output the literal and match bytes.
125
+ //
126
+ // No error checking happens in the first stage, except for ensuring that
127
+ // the source has enough length to represent the full opcode before
128
+ // reading past the first byte.
129
+ sml_d:
130
+ #if !HAVE_LABELS_AS_VALUES
131
+ case 0:
132
+ case 1:
133
+ case 2:
134
+ case 3:
135
+ case 4:
136
+ case 5:
137
+ case 8:
138
+ case 9:
139
+ case 10:
140
+ case 11:
141
+ case 12:
142
+ case 13:
143
+ case 16:
144
+ case 17:
145
+ case 18:
146
+ case 19:
147
+ case 20:
148
+ case 21:
149
+ case 24:
150
+ case 25:
151
+ case 26:
152
+ case 27:
153
+ case 28:
154
+ case 29:
155
+ case 32:
156
+ case 33:
157
+ case 34:
158
+ case 35:
159
+ case 36:
160
+ case 37:
161
+ case 40:
162
+ case 41:
163
+ case 42:
164
+ case 43:
165
+ case 44:
166
+ case 45:
167
+ case 48:
168
+ case 49:
169
+ case 50:
170
+ case 51:
171
+ case 52:
172
+ case 53:
173
+ case 56:
174
+ case 57:
175
+ case 58:
176
+ case 59:
177
+ case 60:
178
+ case 61:
179
+ case 64:
180
+ case 65:
181
+ case 66:
182
+ case 67:
183
+ case 68:
184
+ case 69:
185
+ case 72:
186
+ case 73:
187
+ case 74:
188
+ case 75:
189
+ case 76:
190
+ case 77:
191
+ case 80:
192
+ case 81:
193
+ case 82:
194
+ case 83:
195
+ case 84:
196
+ case 85:
197
+ case 88:
198
+ case 89:
199
+ case 90:
200
+ case 91:
201
+ case 92:
202
+ case 93:
203
+ case 96:
204
+ case 97:
205
+ case 98:
206
+ case 99:
207
+ case 100:
208
+ case 101:
209
+ case 104:
210
+ case 105:
211
+ case 106:
212
+ case 107:
213
+ case 108:
214
+ case 109:
215
+ case 128:
216
+ case 129:
217
+ case 130:
218
+ case 131:
219
+ case 132:
220
+ case 133:
221
+ case 136:
222
+ case 137:
223
+ case 138:
224
+ case 139:
225
+ case 140:
226
+ case 141:
227
+ case 144:
228
+ case 145:
229
+ case 146:
230
+ case 147:
231
+ case 148:
232
+ case 149:
233
+ case 152:
234
+ case 153:
235
+ case 154:
236
+ case 155:
237
+ case 156:
238
+ case 157:
239
+ case 192:
240
+ case 193:
241
+ case 194:
242
+ case 195:
243
+ case 196:
244
+ case 197:
245
+ case 200:
246
+ case 201:
247
+ case 202:
248
+ case 203:
249
+ case 204:
250
+ case 205:
251
+ #endif
252
+ UPDATE_GOOD;
253
+ // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
254
+ // where the length of literal (0-3 bytes) is encoded by the high 2 bits of
255
+ // the first byte. We first extract the literal length so we know how long
256
+ // the opcode is, then check that the source can hold both this opcode and
257
+ // at least one byte of the next (because any valid input stream must be
258
+ // terminated with an eos token).
259
+ opc_len = 2;
260
+ L = (size_t)extract(opc, 6, 2);
261
+ M = (size_t)extract(opc, 3, 3) + 3;
262
+ // We need to ensure that the source buffer is long enough that we can
263
+ // safely read this entire opcode, the literal that follows, and the first
264
+ // byte of the next opcode. Once we satisfy this requirement, we can
265
+ // safely unpack the match distance. A check similar to this one is
266
+ // present in all the opcode implementations.
267
+ if (src_len <= opc_len + L)
268
+ return; // source truncated
269
+ D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
270
+ goto copy_literal_and_match;
271
+
272
+ med_d:
273
+ #if !HAVE_LABELS_AS_VALUES
274
+ case 160:
275
+ case 161:
276
+ case 162:
277
+ case 163:
278
+ case 164:
279
+ case 165:
280
+ case 166:
281
+ case 167:
282
+ case 168:
283
+ case 169:
284
+ case 170:
285
+ case 171:
286
+ case 172:
287
+ case 173:
288
+ case 174:
289
+ case 175:
290
+ case 176:
291
+ case 177:
292
+ case 178:
293
+ case 179:
294
+ case 180:
295
+ case 181:
296
+ case 182:
297
+ case 183:
298
+ case 184:
299
+ case 185:
300
+ case 186:
301
+ case 187:
302
+ case 188:
303
+ case 189:
304
+ case 190:
305
+ case 191:
306
+ #endif
307
+ UPDATE_GOOD;
308
+ // "medium distance": This is a minor variant of the "small distance"
309
+ // encoding, where we will now use two extra bytes instead of one to encode
310
+ // the restof the match length and distance. This allows an extra two bits
311
+ // for the match length, and an extra three bits for the match distance. The
312
+ // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
313
+ opc_len = 3;
314
+ L = (size_t)extract(opc, 3, 2);
315
+ if (src_len <= opc_len + L)
316
+ return; // source truncated
317
+ uint16_t opc23 = load2(&src_ptr[1]);
318
+ M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
319
+ D = (size_t)extract(opc23, 2, 14);
320
+ goto copy_literal_and_match;
321
+
322
+ lrg_d:
323
+ #if !HAVE_LABELS_AS_VALUES
324
+ case 7:
325
+ case 15:
326
+ case 23:
327
+ case 31:
328
+ case 39:
329
+ case 47:
330
+ case 55:
331
+ case 63:
332
+ case 71:
333
+ case 79:
334
+ case 87:
335
+ case 95:
336
+ case 103:
337
+ case 111:
338
+ case 135:
339
+ case 143:
340
+ case 151:
341
+ case 159:
342
+ case 199:
343
+ case 207:
344
+ #endif
345
+ UPDATE_GOOD;
346
+ // "large distance": This is another variant of the "small distance"
347
+ // encoding, where we will now use two extra bytes to encode the match
348
+ // distance, which allows distances up to 65535 to be represented. The full
349
+ // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
350
+ opc_len = 3;
351
+ L = (size_t)extract(opc, 6, 2);
352
+ M = (size_t)extract(opc, 3, 3) + 3;
353
+ if (src_len <= opc_len + L)
354
+ return; // source truncated
355
+ D = load2(&src_ptr[1]);
356
+ goto copy_literal_and_match;
357
+
358
+ pre_d:
359
+ #if !HAVE_LABELS_AS_VALUES
360
+ case 70:
361
+ case 78:
362
+ case 86:
363
+ case 94:
364
+ case 102:
365
+ case 110:
366
+ case 134:
367
+ case 142:
368
+ case 150:
369
+ case 158:
370
+ case 198:
371
+ case 206:
372
+ #endif
373
+ UPDATE_GOOD;
374
+ // "previous distance": This opcode has the structure LLMMM110, where the
375
+ // length of the literal (0-3 bytes) is encoded by the high 2 bits of the
376
+ // first byte. We first extract the literal length so we know how long
377
+ // the opcode is, then check that the source can hold both this opcode and
378
+ // at least one byte of the next (because any valid input stream must be
379
+ // terminated with an eos token).
380
+ opc_len = 1;
381
+ L = (size_t)extract(opc, 6, 2);
382
+ M = (size_t)extract(opc, 3, 3) + 3;
383
+ if (src_len <= opc_len + L)
384
+ return; // source truncated
385
+ goto copy_literal_and_match;
386
+
387
+ copy_literal_and_match:
388
+ // Common implementation of writing data for opcodes that have both a
389
+ // literal and a match. We begin by advancing the source pointer past
390
+ // the opcode, so that it points at the first literal byte (if L
391
+ // is non-zero; otherwise it points at the next opcode).
392
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
393
+ // Now we copy the literal from the source pointer to the destination.
394
+ if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
395
+ // The literal is 0-3 bytes; if we are not near the end of the buffer,
396
+ // we can safely just do a 4 byte copy (which is guaranteed to cover
397
+ // the complete literal, and may include some other bytes as well).
398
+ store4(dst_ptr, load4(src_ptr));
399
+ } else if (L <= dst_len) {
400
+ // We are too close to the end of either the input or output stream
401
+ // to be able to safely use a four-byte copy, but we will not exhaust
402
+ // either stream (we already know that the source will not be
403
+ // exhausted from checks in the individual opcode implementations,
404
+ // and we just tested that dst_len > L). Thus, we need to do a
405
+ // byte-by-byte copy of the literal. This is slow, but it can only ever
406
+ // happen near the very end of a buffer, so it is not an important case to
407
+ // optimize.
408
+ for (size_t i = 0; i < L; ++i)
409
+ dst_ptr[i] = src_ptr[i];
410
+ } else {
411
+ // Destination truncated: fill DST, and store partial match
412
+
413
+ // Copy partial literal
414
+ for (size_t i = 0; i < dst_len; ++i)
415
+ dst_ptr[i] = src_ptr[i];
416
+ // Save state
417
+ state->src = src_ptr + dst_len;
418
+ state->dst = dst_ptr + dst_len;
419
+ state->L = L - dst_len;
420
+ state->M = M;
421
+ state->D = D;
422
+ return; // destination truncated
423
+ }
424
+ // Having completed the copy of the literal, we advance both the source
425
+ // and destination pointers by the number of literal bytes.
426
+ PTR_LEN_INC(dst_ptr, dst_len, L);
427
+ PTR_LEN_INC(src_ptr, src_len, L);
428
+ // Check if the match distance is valid; matches must not reference
429
+ // bytes that preceed the start of the output buffer, nor can the match
430
+ // distance be zero.
431
+ if (D > dst_ptr - state->dst_begin || D == 0)
432
+ goto invalid_match_distance;
433
+ copy_match:
434
+ // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
435
+ // in mind that we may have D < M, in which case the source and destination
436
+ // windows overlap in the copy. The semantics of the match copy are *not*
437
+ // those of memmove( ); if the buffers overlap it needs to behave as though
438
+ // we were copying byte-by-byte in increasing address order. If, for example,
439
+ // D is 1, the copy operation is equivalent to:
440
+ //
441
+ // memset(dst_ptr, dst_ptr[-1], M);
442
+ //
443
+ // i.e. it splats the previous byte. This means that we need to be very
444
+ // careful about using wide loads or stores to perform the copy operation.
445
+ if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
446
+ // We are not near the end of the buffer, and the match distance
447
+ // is at least eight. Thus, we can safely loop using eight byte
448
+ // copies. The last of these may slop over the intended end of
449
+ // the match, but this is OK because we know we have a safety bound
450
+ // away from the end of the destination buffer.
451
+ for (size_t i = 0; i < M; i += 8)
452
+ store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
453
+ } else if (M <= dst_len) {
454
+ // Either the match distance is too small, or we are too close to
455
+ // the end of the buffer to safely use eight byte copies. Fall back
456
+ // on a simple byte-by-byte implementation.
457
+ for (size_t i = 0; i < M; ++i)
458
+ dst_ptr[i] = dst_ptr[i - D];
459
+ } else {
460
+ // Destination truncated: fill DST, and store partial match
461
+
462
+ // Copy partial match
463
+ for (size_t i = 0; i < dst_len; ++i)
464
+ dst_ptr[i] = dst_ptr[i - D];
465
+ // Save state
466
+ state->src = src_ptr;
467
+ state->dst = dst_ptr + dst_len;
468
+ state->L = 0;
469
+ state->M = M - dst_len;
470
+ state->D = D;
471
+ return; // destination truncated
472
+ }
473
+ // Update the destination pointer and length to account for the bytes
474
+ // written by the match, then load the next opcode byte and branch to
475
+ // the appropriate implementation.
476
+ PTR_LEN_INC(dst_ptr, dst_len, M);
477
+ opc = src_ptr[0];
478
+ #if HAVE_LABELS_AS_VALUES
479
+ goto *opc_tbl[opc];
480
+ #else
481
+ break;
482
+ #endif
483
+
484
+ // ===============================================================
485
+ // Opcodes representing only a match (no literal).
486
+ // These two opcodes (lrg_m and sml_m) encode only a match. The match
487
+ // distance is carried over from the previous opcode, so all they need
488
+ // to encode is the match length. We are able to reuse the match copy
489
+ // sequence from the literal and match opcodes to perform the actual
490
+ // copy implementation.
491
+ sml_m:
492
+ #if !HAVE_LABELS_AS_VALUES
493
+ case 241:
494
+ case 242:
495
+ case 243:
496
+ case 244:
497
+ case 245:
498
+ case 246:
499
+ case 247:
500
+ case 248:
501
+ case 249:
502
+ case 250:
503
+ case 251:
504
+ case 252:
505
+ case 253:
506
+ case 254:
507
+ case 255:
508
+ #endif
509
+ UPDATE_GOOD;
510
+ // "small match": This opcode has no literal, and uses the previous match
511
+ // distance (i.e. it encodes only the match length), in a single byte as
512
+ // 1111MMMM.
513
+ opc_len = 1;
514
+ if (src_len <= opc_len)
515
+ return; // source truncated
516
+ M = (size_t)extract(opc, 0, 4);
517
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
518
+ goto copy_match;
519
+
520
+ lrg_m:
521
+ #if !HAVE_LABELS_AS_VALUES
522
+ case 240:
523
+ #endif
524
+ UPDATE_GOOD;
525
+ // "large match": This opcode has no literal, and uses the previous match
526
+ // distance (i.e. it encodes only the match length). It is encoded in two
527
+ // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
528
+ // be represented by sml_m, there is an implicit bias of 16 on the match
529
+ // length; the representable values are [16,271].
530
+ opc_len = 2;
531
+ if (src_len <= opc_len)
532
+ return; // source truncated
533
+ M = src_ptr[1] + 16;
534
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
535
+ goto copy_match;
536
+
537
+ // ===============================================================
538
+ // Opcodes representing only a literal (no match).
539
+ // These two opcodes (lrg_l and sml_l) encode only a literal. There is no
540
+ // match length or match distance to worry about (but we need to *not*
541
+ // touch D, as it must be preserved between opcodes).
542
+ sml_l:
543
+ #if !HAVE_LABELS_AS_VALUES
544
+ case 225:
545
+ case 226:
546
+ case 227:
547
+ case 228:
548
+ case 229:
549
+ case 230:
550
+ case 231:
551
+ case 232:
552
+ case 233:
553
+ case 234:
554
+ case 235:
555
+ case 236:
556
+ case 237:
557
+ case 238:
558
+ case 239:
559
+ #endif
560
+ UPDATE_GOOD;
561
+ // "small literal": This opcode has no match, and encodes only a literal
562
+ // of length up to 15 bytes. The format is 1110LLLL LITERAL.
563
+ opc_len = 1;
564
+ L = (size_t)extract(opc, 0, 4);
565
+ goto copy_literal;
566
+
567
+ lrg_l:
568
+ #if !HAVE_LABELS_AS_VALUES
569
+ case 224:
570
+ #endif
571
+ UPDATE_GOOD;
572
+ // "large literal": This opcode has no match, and uses the previous match
573
+ // distance (i.e. it encodes only the match length). It is encoded in two
574
+ // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
575
+ // bytes can be represented by sml_l, there is an implicit bias of 16 on
576
+ // the literal length; the representable values are [16,271].
577
+ opc_len = 2;
578
+ if (src_len <= 2)
579
+ return; // source truncated
580
+ L = src_ptr[1] + 16;
581
+ goto copy_literal;
582
+
583
+ copy_literal:
584
+ // Check that the source buffer is large enough to hold the complete
585
+ // literal and at least the first byte of the next opcode. If so, advance
586
+ // the source pointer to point to the first byte of the literal and adjust
587
+ // the source length accordingly.
588
+ if (src_len <= opc_len + L)
589
+ return; // source truncated
590
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
591
+ // Now we copy the literal from the source pointer to the destination.
592
+ if (dst_len >= L + 7 && src_len >= L + 7) {
593
+ // We are not near the end of the source or destination buffers; thus
594
+ // we can safely copy the literal using wide copies, without worrying
595
+ // about reading or writing past the end of either buffer.
596
+ for (size_t i = 0; i < L; i += 8)
597
+ store8(&dst_ptr[i], load8(&src_ptr[i]));
598
+ } else if (L <= dst_len) {
599
+ // We are too close to the end of either the input or output stream
600
+ // to be able to safely use an eight-byte copy. Instead we copy the
601
+ // literal byte-by-byte.
602
+ for (size_t i = 0; i < L; ++i)
603
+ dst_ptr[i] = src_ptr[i];
604
+ } else {
605
+ // Destination truncated: fill DST, and store partial match
606
+
607
+ // Copy partial literal
608
+ for (size_t i = 0; i < dst_len; ++i)
609
+ dst_ptr[i] = src_ptr[i];
610
+ // Save state
611
+ state->src = src_ptr + dst_len;
612
+ state->dst = dst_ptr + dst_len;
613
+ state->L = L - dst_len;
614
+ state->M = 0;
615
+ state->D = D;
616
+ return; // destination truncated
617
+ }
618
+ // Having completed the copy of the literal, we advance both the source
619
+ // and destination pointers by the number of literal bytes.
620
+ PTR_LEN_INC(dst_ptr, dst_len, L);
621
+ PTR_LEN_INC(src_ptr, src_len, L);
622
+ // Load the first byte of the next opcode, and jump to its implementation.
623
+ opc = src_ptr[0];
624
+ #if HAVE_LABELS_AS_VALUES
625
+ goto *opc_tbl[opc];
626
+ #else
627
+ break;
628
+ #endif
629
+
630
+ // ===============================================================
631
+ // Other opcodes
632
+ nop:
633
+ #if !HAVE_LABELS_AS_VALUES
634
+ case 14:
635
+ case 22:
636
+ #endif
637
+ UPDATE_GOOD;
638
+ opc_len = 1;
639
+ if (src_len <= opc_len)
640
+ return; // source truncated
641
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
642
+ opc = src_ptr[0];
643
+ #if HAVE_LABELS_AS_VALUES
644
+ goto *opc_tbl[opc];
645
+ #else
646
+ break;
647
+ #endif
648
+
649
+ eos:
650
+ #if !HAVE_LABELS_AS_VALUES
651
+ case 6:
652
+ #endif
653
+ opc_len = 8;
654
+ if (src_len < opc_len)
655
+ return; // source truncated (here we don't need an extra byte for next op
656
+ // code)
657
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
658
+ state->end_of_stream = 1;
659
+ UPDATE_GOOD;
660
+ return; // end-of-stream
661
+
662
+ // ===============================================================
663
+ // Return on error
664
+ udef:
665
+ #if !HAVE_LABELS_AS_VALUES
666
+ case 30:
667
+ case 38:
668
+ case 46:
669
+ case 54:
670
+ case 62:
671
+ case 112:
672
+ case 113:
673
+ case 114:
674
+ case 115:
675
+ case 116:
676
+ case 117:
677
+ case 118:
678
+ case 119:
679
+ case 120:
680
+ case 121:
681
+ case 122:
682
+ case 123:
683
+ case 124:
684
+ case 125:
685
+ case 126:
686
+ case 127:
687
+ case 208:
688
+ case 209:
689
+ case 210:
690
+ case 211:
691
+ case 212:
692
+ case 213:
693
+ case 214:
694
+ case 215:
695
+ case 216:
696
+ case 217:
697
+ case 218:
698
+ case 219:
699
+ case 220:
700
+ case 221:
701
+ case 222:
702
+ case 223:
703
+ #endif
704
+ invalid_match_distance:
705
+
706
+ return; // we already updated state
707
+ #if !HAVE_LABELS_AS_VALUES
708
+ }
709
+ }
710
+ #endif
711
+ }