lzfse 0.0.1.pre.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ /*
2
+ Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9
+ in the documentation and/or other materials provided with the distribution.
10
+
11
+ 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12
+ from this software without specific prior written permission.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
+ */
21
+
22
+ #ifndef LZFSE_TUNABLES_H
23
+ #define LZFSE_TUNABLES_H
24
+
25
+ // Parameters controlling details of the LZ-style match search. These values
26
+ // may be modified to fine tune compression ratio vs. encoding speed, while
27
+ // keeping the compressed format compatible with LZFSE. Note that
28
+ // modifying them will also change the amount of work space required by
29
+ // the encoder. The values here are those used in the compression library
30
+ // on iOS and OS X.
31
+
32
+ // Number of bits for hash function to produce. Should be in the range
33
+ // [10, 16]. Larger values reduce the number of false-positive found during
34
+ // the match search, and expand the history table, which may allow additional
35
+ // matches to be found, generally improving the achieved compression ratio.
36
+ // Larger values also increase the workspace size, and make it less likely
37
+ // that the history table will be present in cache, which reduces performance.
38
+ #define LZFSE_ENCODE_HASH_BITS 14
39
+
40
+ // Number of positions to store for each line in the history table. May
41
+ // be either 4 or 8. Using 8 doubles the size of the history table, which
42
+ // increases the chance of finding matches (thus improving compression ratio),
43
+ // but also increases the workspace size.
44
+ #define LZFSE_ENCODE_HASH_WIDTH 4
45
+
46
+ // Match length in bytes to cause immediate emission. Generally speaking,
47
+ // LZFSE maintains multiple candidate matches and waits to decide which match
48
+ // to emit until more information is available. When a match exceeds this
49
+ // threshold, it is emitted immediately. Thus, smaller values may give
50
+ // somewhat better performance, and larger values may give somewhat better
51
+ // compression ratios.
52
+ #define LZFSE_ENCODE_GOOD_MATCH 40
53
+
54
+ // When the source buffer is very small, LZFSE doesn't compress as well as
55
+ // some simpler algorithms. To maintain reasonable compression for these
56
+ // cases, we transition to use LZVN instead if the size of the source buffer
57
+ // is below this threshold.
58
+ #define LZFSE_ENCODE_LZVN_THRESHOLD 4096
59
+
60
+ #endif // LZFSE_TUNABLES_H
@@ -0,0 +1,711 @@
1
+ /*
2
+ Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9
+ in the documentation and/or other materials provided with the distribution.
10
+
11
+ 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12
+ from this software without specific prior written permission.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
+ */
21
+
22
+ // LZVN low-level decoder
23
+
24
+ #include "lzvn_decode_base.h"
25
+
26
+ #if !defined(HAVE_LABELS_AS_VALUES)
27
+ # if defined(__GNUC__) || defined(__clang__)
28
+ # define HAVE_LABELS_AS_VALUES 1
29
+ # else
30
+ # define HAVE_LABELS_AS_VALUES 0
31
+ # endif
32
+ #endif
33
+
34
+ // Both the source and destination buffers are represented by a pointer and
35
+ // a length; they are *always* updated in concert using this macro; however
36
+ // many bytes the pointer is advanced, the length is decremented by the same
37
+ // amount. Thus, pointer + length always points to the byte one past the end
38
+ // of the buffer.
39
+ #define PTR_LEN_INC(_pointer, _length, _increment) \
40
+ (_pointer += _increment, _length -= _increment)
41
+
42
+ // Update state with current positions and distance, corresponding to the
43
+ // beginning of an instruction in both streams
44
+ #define UPDATE_GOOD \
45
+ (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
46
+
47
+ void lzvn_decode(lzvn_decoder_state *state) {
48
+ #if HAVE_LABELS_AS_VALUES
49
+ // Jump table for all instructions
50
+ static const void *opc_tbl[256] = {
51
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
52
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
53
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
54
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
55
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
56
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
57
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
58
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
59
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
60
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
61
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
62
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
63
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
64
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
65
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
66
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
67
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
68
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
69
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
70
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
71
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
72
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
73
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
74
+ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
75
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
76
+ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
77
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
78
+ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
79
+ &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
80
+ &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
81
+ &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
82
+ &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
83
+ #endif
84
+ size_t src_len = state->src_end - state->src;
85
+ size_t dst_len = state->dst_end - state->dst;
86
+ if (src_len == 0 || dst_len == 0)
87
+ return; // empty buffer
88
+
89
+ const unsigned char *src_ptr = state->src;
90
+ unsigned char *dst_ptr = state->dst;
91
+ size_t D = state->d_prev;
92
+ size_t M;
93
+ size_t L;
94
+ size_t opc_len;
95
+
96
+ // Do we have a partially expanded match saved in state?
97
+ if (state->L != 0 || state->M != 0) {
98
+ L = state->L;
99
+ M = state->M;
100
+ D = state->D;
101
+ opc_len = 0; // we already skipped the op
102
+ state->L = state->M = state->D = 0;
103
+ if (M == 0)
104
+ goto copy_literal;
105
+ if (L == 0)
106
+ goto copy_match;
107
+ goto copy_literal_and_match;
108
+ }
109
+
110
+ unsigned char opc = src_ptr[0];
111
+
112
+ #if HAVE_LABELS_AS_VALUES
113
+ goto *opc_tbl[opc];
114
+ #else
115
+ for (;;) {
116
+ switch (opc) {
117
+ #endif
118
+ // ===============================================================
119
+ // These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
120
+ // literal and a match. The bulk of their implementations are shared;
121
+ // each label here only does the work of setting the opcode length (not
122
+ // including any literal bytes), and extracting the literal length, match
123
+ // length, and match distance (except in pre_d). They then jump into the
124
+ // shared implementation to actually output the literal and match bytes.
125
+ //
126
+ // No error checking happens in the first stage, except for ensuring that
127
+ // the source has enough length to represent the full opcode before
128
+ // reading past the first byte.
129
+ sml_d:
130
+ #if !HAVE_LABELS_AS_VALUES
131
+ case 0:
132
+ case 1:
133
+ case 2:
134
+ case 3:
135
+ case 4:
136
+ case 5:
137
+ case 8:
138
+ case 9:
139
+ case 10:
140
+ case 11:
141
+ case 12:
142
+ case 13:
143
+ case 16:
144
+ case 17:
145
+ case 18:
146
+ case 19:
147
+ case 20:
148
+ case 21:
149
+ case 24:
150
+ case 25:
151
+ case 26:
152
+ case 27:
153
+ case 28:
154
+ case 29:
155
+ case 32:
156
+ case 33:
157
+ case 34:
158
+ case 35:
159
+ case 36:
160
+ case 37:
161
+ case 40:
162
+ case 41:
163
+ case 42:
164
+ case 43:
165
+ case 44:
166
+ case 45:
167
+ case 48:
168
+ case 49:
169
+ case 50:
170
+ case 51:
171
+ case 52:
172
+ case 53:
173
+ case 56:
174
+ case 57:
175
+ case 58:
176
+ case 59:
177
+ case 60:
178
+ case 61:
179
+ case 64:
180
+ case 65:
181
+ case 66:
182
+ case 67:
183
+ case 68:
184
+ case 69:
185
+ case 72:
186
+ case 73:
187
+ case 74:
188
+ case 75:
189
+ case 76:
190
+ case 77:
191
+ case 80:
192
+ case 81:
193
+ case 82:
194
+ case 83:
195
+ case 84:
196
+ case 85:
197
+ case 88:
198
+ case 89:
199
+ case 90:
200
+ case 91:
201
+ case 92:
202
+ case 93:
203
+ case 96:
204
+ case 97:
205
+ case 98:
206
+ case 99:
207
+ case 100:
208
+ case 101:
209
+ case 104:
210
+ case 105:
211
+ case 106:
212
+ case 107:
213
+ case 108:
214
+ case 109:
215
+ case 128:
216
+ case 129:
217
+ case 130:
218
+ case 131:
219
+ case 132:
220
+ case 133:
221
+ case 136:
222
+ case 137:
223
+ case 138:
224
+ case 139:
225
+ case 140:
226
+ case 141:
227
+ case 144:
228
+ case 145:
229
+ case 146:
230
+ case 147:
231
+ case 148:
232
+ case 149:
233
+ case 152:
234
+ case 153:
235
+ case 154:
236
+ case 155:
237
+ case 156:
238
+ case 157:
239
+ case 192:
240
+ case 193:
241
+ case 194:
242
+ case 195:
243
+ case 196:
244
+ case 197:
245
+ case 200:
246
+ case 201:
247
+ case 202:
248
+ case 203:
249
+ case 204:
250
+ case 205:
251
+ #endif
252
+ UPDATE_GOOD;
253
+ // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
254
+ // where the length of literal (0-3 bytes) is encoded by the high 2 bits of
255
+ // the first byte. We first extract the literal length so we know how long
256
+ // the opcode is, then check that the source can hold both this opcode and
257
+ // at least one byte of the next (because any valid input stream must be
258
+ // terminated with an eos token).
259
+ opc_len = 2;
260
+ L = (size_t)extract(opc, 6, 2);
261
+ M = (size_t)extract(opc, 3, 3) + 3;
262
+ // We need to ensure that the source buffer is long enough that we can
263
+ // safely read this entire opcode, the literal that follows, and the first
264
+ // byte of the next opcode. Once we satisfy this requirement, we can
265
+ // safely unpack the match distance. A check similar to this one is
266
+ // present in all the opcode implementations.
267
+ if (src_len <= opc_len + L)
268
+ return; // source truncated
269
+ D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
270
+ goto copy_literal_and_match;
271
+
272
+ med_d:
273
+ #if !HAVE_LABELS_AS_VALUES
274
+ case 160:
275
+ case 161:
276
+ case 162:
277
+ case 163:
278
+ case 164:
279
+ case 165:
280
+ case 166:
281
+ case 167:
282
+ case 168:
283
+ case 169:
284
+ case 170:
285
+ case 171:
286
+ case 172:
287
+ case 173:
288
+ case 174:
289
+ case 175:
290
+ case 176:
291
+ case 177:
292
+ case 178:
293
+ case 179:
294
+ case 180:
295
+ case 181:
296
+ case 182:
297
+ case 183:
298
+ case 184:
299
+ case 185:
300
+ case 186:
301
+ case 187:
302
+ case 188:
303
+ case 189:
304
+ case 190:
305
+ case 191:
306
+ #endif
307
+ UPDATE_GOOD;
308
+ // "medium distance": This is a minor variant of the "small distance"
309
+ // encoding, where we will now use two extra bytes instead of one to encode
310
+ // the restof the match length and distance. This allows an extra two bits
311
+ // for the match length, and an extra three bits for the match distance. The
312
+ // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
313
+ opc_len = 3;
314
+ L = (size_t)extract(opc, 3, 2);
315
+ if (src_len <= opc_len + L)
316
+ return; // source truncated
317
+ uint16_t opc23 = load2(&src_ptr[1]);
318
+ M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
319
+ D = (size_t)extract(opc23, 2, 14);
320
+ goto copy_literal_and_match;
321
+
322
+ lrg_d:
323
+ #if !HAVE_LABELS_AS_VALUES
324
+ case 7:
325
+ case 15:
326
+ case 23:
327
+ case 31:
328
+ case 39:
329
+ case 47:
330
+ case 55:
331
+ case 63:
332
+ case 71:
333
+ case 79:
334
+ case 87:
335
+ case 95:
336
+ case 103:
337
+ case 111:
338
+ case 135:
339
+ case 143:
340
+ case 151:
341
+ case 159:
342
+ case 199:
343
+ case 207:
344
+ #endif
345
+ UPDATE_GOOD;
346
+ // "large distance": This is another variant of the "small distance"
347
+ // encoding, where we will now use two extra bytes to encode the match
348
+ // distance, which allows distances up to 65535 to be represented. The full
349
+ // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
350
+ opc_len = 3;
351
+ L = (size_t)extract(opc, 6, 2);
352
+ M = (size_t)extract(opc, 3, 3) + 3;
353
+ if (src_len <= opc_len + L)
354
+ return; // source truncated
355
+ D = load2(&src_ptr[1]);
356
+ goto copy_literal_and_match;
357
+
358
+ pre_d:
359
+ #if !HAVE_LABELS_AS_VALUES
360
+ case 70:
361
+ case 78:
362
+ case 86:
363
+ case 94:
364
+ case 102:
365
+ case 110:
366
+ case 134:
367
+ case 142:
368
+ case 150:
369
+ case 158:
370
+ case 198:
371
+ case 206:
372
+ #endif
373
+ UPDATE_GOOD;
374
+ // "previous distance": This opcode has the structure LLMMM110, where the
375
+ // length of the literal (0-3 bytes) is encoded by the high 2 bits of the
376
+ // first byte. We first extract the literal length so we know how long
377
+ // the opcode is, then check that the source can hold both this opcode and
378
+ // at least one byte of the next (because any valid input stream must be
379
+ // terminated with an eos token).
380
+ opc_len = 1;
381
+ L = (size_t)extract(opc, 6, 2);
382
+ M = (size_t)extract(opc, 3, 3) + 3;
383
+ if (src_len <= opc_len + L)
384
+ return; // source truncated
385
+ goto copy_literal_and_match;
386
+
387
+ copy_literal_and_match:
388
+ // Common implementation of writing data for opcodes that have both a
389
+ // literal and a match. We begin by advancing the source pointer past
390
+ // the opcode, so that it points at the first literal byte (if L
391
+ // is non-zero; otherwise it points at the next opcode).
392
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
393
+ // Now we copy the literal from the source pointer to the destination.
394
+ if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
395
+ // The literal is 0-3 bytes; if we are not near the end of the buffer,
396
+ // we can safely just do a 4 byte copy (which is guaranteed to cover
397
+ // the complete literal, and may include some other bytes as well).
398
+ store4(dst_ptr, load4(src_ptr));
399
+ } else if (L <= dst_len) {
400
+ // We are too close to the end of either the input or output stream
401
+ // to be able to safely use a four-byte copy, but we will not exhaust
402
+ // either stream (we already know that the source will not be
403
+ // exhausted from checks in the individual opcode implementations,
404
+ // and we just tested that dst_len > L). Thus, we need to do a
405
+ // byte-by-byte copy of the literal. This is slow, but it can only ever
406
+ // happen near the very end of a buffer, so it is not an important case to
407
+ // optimize.
408
+ for (size_t i = 0; i < L; ++i)
409
+ dst_ptr[i] = src_ptr[i];
410
+ } else {
411
+ // Destination truncated: fill DST, and store partial match
412
+
413
+ // Copy partial literal
414
+ for (size_t i = 0; i < dst_len; ++i)
415
+ dst_ptr[i] = src_ptr[i];
416
+ // Save state
417
+ state->src = src_ptr + dst_len;
418
+ state->dst = dst_ptr + dst_len;
419
+ state->L = L - dst_len;
420
+ state->M = M;
421
+ state->D = D;
422
+ return; // destination truncated
423
+ }
424
+ // Having completed the copy of the literal, we advance both the source
425
+ // and destination pointers by the number of literal bytes.
426
+ PTR_LEN_INC(dst_ptr, dst_len, L);
427
+ PTR_LEN_INC(src_ptr, src_len, L);
428
+ // Check if the match distance is valid; matches must not reference
429
+ // bytes that preceed the start of the output buffer, nor can the match
430
+ // distance be zero.
431
+ if (D > dst_ptr - state->dst_begin || D == 0)
432
+ goto invalid_match_distance;
433
+ copy_match:
434
+ // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
435
+ // in mind that we may have D < M, in which case the source and destination
436
+ // windows overlap in the copy. The semantics of the match copy are *not*
437
+ // those of memmove( ); if the buffers overlap it needs to behave as though
438
+ // we were copying byte-by-byte in increasing address order. If, for example,
439
+ // D is 1, the copy operation is equivalent to:
440
+ //
441
+ // memset(dst_ptr, dst_ptr[-1], M);
442
+ //
443
+ // i.e. it splats the previous byte. This means that we need to be very
444
+ // careful about using wide loads or stores to perform the copy operation.
445
+ if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
446
+ // We are not near the end of the buffer, and the match distance
447
+ // is at least eight. Thus, we can safely loop using eight byte
448
+ // copies. The last of these may slop over the intended end of
449
+ // the match, but this is OK because we know we have a safety bound
450
+ // away from the end of the destination buffer.
451
+ for (size_t i = 0; i < M; i += 8)
452
+ store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
453
+ } else if (M <= dst_len) {
454
+ // Either the match distance is too small, or we are too close to
455
+ // the end of the buffer to safely use eight byte copies. Fall back
456
+ // on a simple byte-by-byte implementation.
457
+ for (size_t i = 0; i < M; ++i)
458
+ dst_ptr[i] = dst_ptr[i - D];
459
+ } else {
460
+ // Destination truncated: fill DST, and store partial match
461
+
462
+ // Copy partial match
463
+ for (size_t i = 0; i < dst_len; ++i)
464
+ dst_ptr[i] = dst_ptr[i - D];
465
+ // Save state
466
+ state->src = src_ptr;
467
+ state->dst = dst_ptr + dst_len;
468
+ state->L = 0;
469
+ state->M = M - dst_len;
470
+ state->D = D;
471
+ return; // destination truncated
472
+ }
473
+ // Update the destination pointer and length to account for the bytes
474
+ // written by the match, then load the next opcode byte and branch to
475
+ // the appropriate implementation.
476
+ PTR_LEN_INC(dst_ptr, dst_len, M);
477
+ opc = src_ptr[0];
478
+ #if HAVE_LABELS_AS_VALUES
479
+ goto *opc_tbl[opc];
480
+ #else
481
+ break;
482
+ #endif
483
+
484
+ // ===============================================================
485
+ // Opcodes representing only a match (no literal).
486
+ // These two opcodes (lrg_m and sml_m) encode only a match. The match
487
+ // distance is carried over from the previous opcode, so all they need
488
+ // to encode is the match length. We are able to reuse the match copy
489
+ // sequence from the literal and match opcodes to perform the actual
490
+ // copy implementation.
491
+ sml_m:
492
+ #if !HAVE_LABELS_AS_VALUES
493
+ case 241:
494
+ case 242:
495
+ case 243:
496
+ case 244:
497
+ case 245:
498
+ case 246:
499
+ case 247:
500
+ case 248:
501
+ case 249:
502
+ case 250:
503
+ case 251:
504
+ case 252:
505
+ case 253:
506
+ case 254:
507
+ case 255:
508
+ #endif
509
+ UPDATE_GOOD;
510
+ // "small match": This opcode has no literal, and uses the previous match
511
+ // distance (i.e. it encodes only the match length), in a single byte as
512
+ // 1111MMMM.
513
+ opc_len = 1;
514
+ if (src_len <= opc_len)
515
+ return; // source truncated
516
+ M = (size_t)extract(opc, 0, 4);
517
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
518
+ goto copy_match;
519
+
520
+ lrg_m:
521
+ #if !HAVE_LABELS_AS_VALUES
522
+ case 240:
523
+ #endif
524
+ UPDATE_GOOD;
525
+ // "large match": This opcode has no literal, and uses the previous match
526
+ // distance (i.e. it encodes only the match length). It is encoded in two
527
+ // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
528
+ // be represented by sml_m, there is an implicit bias of 16 on the match
529
+ // length; the representable values are [16,271].
530
+ opc_len = 2;
531
+ if (src_len <= opc_len)
532
+ return; // source truncated
533
+ M = src_ptr[1] + 16;
534
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
535
+ goto copy_match;
536
+
537
+ // ===============================================================
538
+ // Opcodes representing only a literal (no match).
539
+ // These two opcodes (lrg_l and sml_l) encode only a literal. There is no
540
+ // match length or match distance to worry about (but we need to *not*
541
+ // touch D, as it must be preserved between opcodes).
542
+ sml_l:
543
+ #if !HAVE_LABELS_AS_VALUES
544
+ case 225:
545
+ case 226:
546
+ case 227:
547
+ case 228:
548
+ case 229:
549
+ case 230:
550
+ case 231:
551
+ case 232:
552
+ case 233:
553
+ case 234:
554
+ case 235:
555
+ case 236:
556
+ case 237:
557
+ case 238:
558
+ case 239:
559
+ #endif
560
+ UPDATE_GOOD;
561
+ // "small literal": This opcode has no match, and encodes only a literal
562
+ // of length up to 15 bytes. The format is 1110LLLL LITERAL.
563
+ opc_len = 1;
564
+ L = (size_t)extract(opc, 0, 4);
565
+ goto copy_literal;
566
+
567
+ lrg_l:
568
+ #if !HAVE_LABELS_AS_VALUES
569
+ case 224:
570
+ #endif
571
+ UPDATE_GOOD;
572
+ // "large literal": This opcode has no match, and uses the previous match
573
+ // distance (i.e. it encodes only the match length). It is encoded in two
574
+ // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
575
+ // bytes can be represented by sml_l, there is an implicit bias of 16 on
576
+ // the literal length; the representable values are [16,271].
577
+ opc_len = 2;
578
+ if (src_len <= 2)
579
+ return; // source truncated
580
+ L = src_ptr[1] + 16;
581
+ goto copy_literal;
582
+
583
+ copy_literal:
584
+ // Check that the source buffer is large enough to hold the complete
585
+ // literal and at least the first byte of the next opcode. If so, advance
586
+ // the source pointer to point to the first byte of the literal and adjust
587
+ // the source length accordingly.
588
+ if (src_len <= opc_len + L)
589
+ return; // source truncated
590
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
591
+ // Now we copy the literal from the source pointer to the destination.
592
+ if (dst_len >= L + 7 && src_len >= L + 7) {
593
+ // We are not near the end of the source or destination buffers; thus
594
+ // we can safely copy the literal using wide copies, without worrying
595
+ // about reading or writing past the end of either buffer.
596
+ for (size_t i = 0; i < L; i += 8)
597
+ store8(&dst_ptr[i], load8(&src_ptr[i]));
598
+ } else if (L <= dst_len) {
599
+ // We are too close to the end of either the input or output stream
600
+ // to be able to safely use an eight-byte copy. Instead we copy the
601
+ // literal byte-by-byte.
602
+ for (size_t i = 0; i < L; ++i)
603
+ dst_ptr[i] = src_ptr[i];
604
+ } else {
605
+ // Destination truncated: fill DST, and store partial match
606
+
607
+ // Copy partial literal
608
+ for (size_t i = 0; i < dst_len; ++i)
609
+ dst_ptr[i] = src_ptr[i];
610
+ // Save state
611
+ state->src = src_ptr + dst_len;
612
+ state->dst = dst_ptr + dst_len;
613
+ state->L = L - dst_len;
614
+ state->M = 0;
615
+ state->D = D;
616
+ return; // destination truncated
617
+ }
618
+ // Having completed the copy of the literal, we advance both the source
619
+ // and destination pointers by the number of literal bytes.
620
+ PTR_LEN_INC(dst_ptr, dst_len, L);
621
+ PTR_LEN_INC(src_ptr, src_len, L);
622
+ // Load the first byte of the next opcode, and jump to its implementation.
623
+ opc = src_ptr[0];
624
+ #if HAVE_LABELS_AS_VALUES
625
+ goto *opc_tbl[opc];
626
+ #else
627
+ break;
628
+ #endif
629
+
630
+ // ===============================================================
631
+ // Other opcodes
632
+ nop:
633
+ #if !HAVE_LABELS_AS_VALUES
634
+ case 14:
635
+ case 22:
636
+ #endif
637
+ UPDATE_GOOD;
638
+ opc_len = 1;
639
+ if (src_len <= opc_len)
640
+ return; // source truncated
641
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
642
+ opc = src_ptr[0];
643
+ #if HAVE_LABELS_AS_VALUES
644
+ goto *opc_tbl[opc];
645
+ #else
646
+ break;
647
+ #endif
648
+
649
+ eos:
650
+ #if !HAVE_LABELS_AS_VALUES
651
+ case 6:
652
+ #endif
653
+ opc_len = 8;
654
+ if (src_len < opc_len)
655
+ return; // source truncated (here we don't need an extra byte for next op
656
+ // code)
657
+ PTR_LEN_INC(src_ptr, src_len, opc_len);
658
+ state->end_of_stream = 1;
659
+ UPDATE_GOOD;
660
+ return; // end-of-stream
661
+
662
+ // ===============================================================
663
+ // Return on error
664
+ udef:
665
+ #if !HAVE_LABELS_AS_VALUES
666
+ case 30:
667
+ case 38:
668
+ case 46:
669
+ case 54:
670
+ case 62:
671
+ case 112:
672
+ case 113:
673
+ case 114:
674
+ case 115:
675
+ case 116:
676
+ case 117:
677
+ case 118:
678
+ case 119:
679
+ case 120:
680
+ case 121:
681
+ case 122:
682
+ case 123:
683
+ case 124:
684
+ case 125:
685
+ case 126:
686
+ case 127:
687
+ case 208:
688
+ case 209:
689
+ case 210:
690
+ case 211:
691
+ case 212:
692
+ case 213:
693
+ case 214:
694
+ case 215:
695
+ case 216:
696
+ case 217:
697
+ case 218:
698
+ case 219:
699
+ case 220:
700
+ case 221:
701
+ case 222:
702
+ case 223:
703
+ #endif
704
+ invalid_match_distance:
705
+
706
+ return; // we already updated state
707
+ #if !HAVE_LABELS_AS_VALUES
708
+ }
709
+ }
710
+ #endif
711
+ }