zopfli 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ #ifndef ZOPFLI_KATAJAINEN_H_
21
+ #define ZOPFLI_KATAJAINEN_H_
22
+
23
+ #include <string.h>
24
+
25
+ /*
26
+ Outputs minimum-redundancy length-limited code bitlengths for symbols with the
27
+ given counts. The bitlengths are limited by maxbits.
28
+
29
+ The output is tailored for DEFLATE: symbols that never occur, get a bit length
30
+ of 0, and if only a single symbol occurs at least once, its bitlength will be 1,
31
+ and not 0 as would theoretically be needed for a single symbol.
32
+
33
+ frequencies: The amount of occurances of each symbol.
34
+ n: The amount of symbols.
35
+ maxbits: Maximum bit length, inclusive.
36
+ bitlengths: Output, the bitlengths for the symbol prefix codes.
37
+ return: 0 for OK, non-0 for error.
38
+ */
39
+ int ZopfliLengthLimitedCodeLengths(
40
+ const size_t* frequencies, int n, int maxbits, unsigned* bitlengths);
41
+
42
+ #endif /* ZOPFLI_KATAJAINEN_H_ */
@@ -0,0 +1,468 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ #include "lz77.h"
21
+ #include "util.h"
22
+
23
+ #include <assert.h>
24
+ #include <stdio.h>
25
+ #include <stdlib.h>
26
+
27
+ void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28
+ store->size = 0;
29
+ store->litlens = 0;
30
+ store->dists = 0;
31
+ }
32
+
33
+ void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34
+ free(store->litlens);
35
+ free(store->dists);
36
+ }
37
+
38
+ void ZopfliCopyLZ77Store(
39
+ const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40
+ size_t i;
41
+ ZopfliCleanLZ77Store(dest);
42
+ dest->litlens =
43
+ (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44
+ dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45
+
46
+ if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
47
+
48
+ dest->size = source->size;
49
+ for (i = 0; i < source->size; i++) {
50
+ dest->litlens[i] = source->litlens[i];
51
+ dest->dists[i] = source->dists[i];
52
+ }
53
+ }
54
+
55
+ /*
56
+ Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57
+ context must be a ZopfliLZ77Store*.
58
+ */
59
+ void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60
+ ZopfliLZ77Store* store) {
61
+ size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
62
+ ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63
+ ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
64
+ }
65
+
66
+ /*
67
+ Gets the value of the length given the distance. Typically, the value of the
68
+ length is the length, but if the distance is very long, decrease the value of
69
+ the length a bit to make up for the fact that long distances use large amounts
70
+ of extra bits.
71
+ */
72
+ static int GetLengthValue(int length, int distance) {
73
+ /*
74
+ At distance > 1024, using length 3 is no longer good, due to the large amount
75
+ of extra bits for the distance code. distance > 1024 uses 9+ extra bits, and
76
+ this seems to be the sweet spot.
77
+ */
78
+ return distance > 1024 ? length - 1 : length;
79
+ }
80
+
81
+ void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
82
+ unsigned short dist, unsigned short length) {
83
+
84
+ /* TODO(lode): make this only run in a debug compile, it's for assert only. */
85
+ size_t i;
86
+
87
+ assert(pos + length <= datasize);
88
+ for (i = 0; i < length; i++) {
89
+ if (data[pos - dist + i] != data[pos + i]) {
90
+ assert(data[pos - dist + i] == data[pos + i]);
91
+ break;
92
+ }
93
+ }
94
+ }
95
+
96
+ /*
97
+ Finds how long the match of scan and match is. Can be used to find how many
98
+ bytes starting from scan, and from match, are equal. Returns the last byte
99
+ after scan, which is still equal to the correspondinb byte after match.
100
+ scan is the position to compare
101
+ match is the earlier position to compare.
102
+ end is the last possible byte, beyond which to stop looking.
103
+ safe_end is a few (8) bytes before end, for comparing multiple bytes at once.
104
+ */
105
+ static const unsigned char* GetMatch(const unsigned char* scan,
106
+ const unsigned char* match,
107
+ const unsigned char* end,
108
+ const unsigned char* safe_end) {
109
+
110
+ if (sizeof(size_t) == 8) {
111
+ /* 8 checks at once per array bounds check (size_t is 64-bit). */
112
+ while (scan < safe_end && *((size_t*)scan) == *((size_t*)match)) {
113
+ scan += 8;
114
+ match += 8;
115
+ }
116
+ } else if (sizeof(unsigned int) == 4) {
117
+ /* 4 checks at once per array bounds check (unsigned int is 32-bit). */
118
+ while (scan < safe_end
119
+ && *((unsigned int*)scan) == *((unsigned int*)match)) {
120
+ scan += 4;
121
+ match += 4;
122
+ }
123
+ } else {
124
+ /* do 8 checks at once per array bounds check. */
125
+ while (scan < safe_end && *scan == *match && *++scan == *++match
126
+ && *++scan == *++match && *++scan == *++match
127
+ && *++scan == *++match && *++scan == *++match
128
+ && *++scan == *++match && *++scan == *++match) {
129
+ scan++; match++;
130
+ }
131
+ }
132
+
133
+ /* The remaining few bytes. */
134
+ while (scan != end && *scan == *match) {
135
+ scan++; match++;
136
+ }
137
+
138
+ return scan;
139
+ }
140
+
141
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
142
+ /*
143
+ Gets distance, length and sublen values from the cache if possible.
144
+ Returns 1 if it got the values from the cache, 0 if not.
145
+ Updates the limit value to a smaller one if possible with more limited
146
+ information from the cache.
147
+ */
148
+ static int TryGetFromLongestMatchCache(ZopfliBlockState* s,
149
+ size_t pos, size_t* limit,
150
+ unsigned short* sublen, unsigned short* distance, unsigned short* length) {
151
+ /* The LMC cache starts at the beginning of the block rather than the
152
+ beginning of the whole array. */
153
+ size_t lmcpos = pos - s->blockstart;
154
+
155
+ /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
156
+ that this cache value is not filled in yet. */
157
+ unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
158
+ s->lmc->dist[lmcpos] != 0);
159
+ unsigned char limit_ok_for_cache = cache_available &&
160
+ (*limit == ZOPFLI_MAX_MATCH || s->lmc->length[lmcpos] <= *limit ||
161
+ (sublen && ZopfliMaxCachedSublen(s->lmc,
162
+ lmcpos, s->lmc->length[lmcpos]) >= *limit));
163
+
164
+ if (s->lmc && limit_ok_for_cache && cache_available) {
165
+ if (!sublen || s->lmc->length[lmcpos]
166
+ <= ZopfliMaxCachedSublen(s->lmc, lmcpos, s->lmc->length[lmcpos])) {
167
+ *length = s->lmc->length[lmcpos];
168
+ if (*length > *limit) *length = *limit;
169
+ if (sublen) {
170
+ ZopfliCacheToSublen(s->lmc, lmcpos, *length, sublen);
171
+ *distance = sublen[*length];
172
+ if (*limit == ZOPFLI_MAX_MATCH && *length >= ZOPFLI_MIN_MATCH) {
173
+ assert(sublen[*length] == s->lmc->dist[lmcpos]);
174
+ }
175
+ } else {
176
+ *distance = s->lmc->dist[lmcpos];
177
+ }
178
+ return 1;
179
+ }
180
+ /* Can't use much of the cache, since the "sublens" need to be calculated,
181
+ but at least we already know when to stop. */
182
+ *limit = s->lmc->length[lmcpos];
183
+ }
184
+
185
+ return 0;
186
+ }
187
+
188
+ /*
189
+ Stores the found sublen, distance and length in the longest match cache, if
190
+ possible.
191
+ */
192
+ static void StoreInLongestMatchCache(ZopfliBlockState* s,
193
+ size_t pos, size_t limit,
194
+ const unsigned short* sublen,
195
+ unsigned short distance, unsigned short length) {
196
+ /* The LMC cache starts at the beginning of the block rather than the
197
+ beginning of the whole array. */
198
+ size_t lmcpos = pos - s->blockstart;
199
+
200
+ /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
201
+ that this cache value is not filled in yet. */
202
+ unsigned char cache_available = s->lmc && (s->lmc->length[lmcpos] == 0 ||
203
+ s->lmc->dist[lmcpos] != 0);
204
+
205
+ if (s->lmc && limit == ZOPFLI_MAX_MATCH && sublen && !cache_available) {
206
+ assert(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0);
207
+ s->lmc->dist[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : distance;
208
+ s->lmc->length[lmcpos] = length < ZOPFLI_MIN_MATCH ? 0 : length;
209
+ assert(!(s->lmc->length[lmcpos] == 1 && s->lmc->dist[lmcpos] == 0));
210
+ ZopfliSublenToCache(sublen, lmcpos, length, s->lmc);
211
+ }
212
+ }
213
+ #endif
214
+
215
+ void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
216
+ const unsigned char* array,
217
+ size_t pos, size_t size, size_t limit,
218
+ unsigned short* sublen, unsigned short* distance, unsigned short* length) {
219
+ unsigned short hpos = pos & ZOPFLI_WINDOW_MASK, p, pp;
220
+ unsigned short bestdist = 0;
221
+ unsigned short bestlength = 1;
222
+ const unsigned char* scan;
223
+ const unsigned char* match;
224
+ const unsigned char* arrayend;
225
+ const unsigned char* arrayend_safe;
226
+ #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
227
+ int chain_counter = ZOPFLI_MAX_CHAIN_HITS; /* For quitting early. */
228
+ #endif
229
+
230
+ unsigned dist = 0; /* Not unsigned short on purpose. */
231
+
232
+ int* hhead = h->head;
233
+ unsigned short* hprev = h->prev;
234
+ int* hhashval = h->hashval;
235
+ int hval = h->val;
236
+
237
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
238
+ if (TryGetFromLongestMatchCache(s, pos, &limit, sublen, distance, length)) {
239
+ assert(pos + *length <= size);
240
+ return;
241
+ }
242
+ #endif
243
+
244
+ assert(limit <= ZOPFLI_MAX_MATCH);
245
+ assert(limit >= ZOPFLI_MIN_MATCH);
246
+ assert(pos < size);
247
+
248
+ if (size - pos < ZOPFLI_MIN_MATCH) {
249
+ /* The rest of the code assumes there are at least ZOPFLI_MIN_MATCH bytes to
250
+ try. */
251
+ *length = 0;
252
+ *distance = 0;
253
+ return;
254
+ }
255
+
256
+ if (pos + limit > size) {
257
+ limit = size - pos;
258
+ }
259
+ arrayend = &array[pos] + limit;
260
+ arrayend_safe = arrayend - 8;
261
+
262
+ assert(hval < 65536);
263
+
264
+ pp = hhead[hval]; /* During the whole loop, p == hprev[pp]. */
265
+ p = hprev[pp];
266
+
267
+ assert(pp == hpos);
268
+
269
+ dist = p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
270
+
271
+ /* Go through all distances. */
272
+ while (dist < ZOPFLI_WINDOW_SIZE) {
273
+ unsigned short currentlength = 0;
274
+
275
+ assert(p < ZOPFLI_WINDOW_SIZE);
276
+ assert(p == hprev[pp]);
277
+ assert(hhashval[p] == hval);
278
+
279
+ if (dist > 0) {
280
+ assert(pos < size);
281
+ assert(dist <= pos);
282
+ scan = &array[pos];
283
+ match = &array[pos - dist];
284
+
285
+ /* Testing the byte at position bestlength first, goes slightly faster. */
286
+ if (pos + bestlength >= size
287
+ || *(scan + bestlength) == *(match + bestlength)) {
288
+
289
+ #ifdef ZOPFLI_HASH_SAME
290
+ unsigned short same0 = h->same[pos & ZOPFLI_WINDOW_MASK];
291
+ if (same0 > 2 && *scan == *match) {
292
+ unsigned short same1 = h->same[(pos - dist) & ZOPFLI_WINDOW_MASK];
293
+ unsigned short same = same0 < same1 ? same0 : same1;
294
+ if (same > limit) same = limit;
295
+ scan += same;
296
+ match += same;
297
+ }
298
+ #endif
299
+ scan = GetMatch(scan, match, arrayend, arrayend_safe);
300
+ currentlength = scan - &array[pos]; /* The found length. */
301
+ }
302
+
303
+ if (currentlength > bestlength) {
304
+ if (sublen) {
305
+ unsigned short j;
306
+ for (j = bestlength + 1; j <= currentlength; j++) {
307
+ sublen[j] = dist;
308
+ }
309
+ }
310
+ bestdist = dist;
311
+ bestlength = currentlength;
312
+ if (currentlength >= limit) break;
313
+ }
314
+ }
315
+
316
+
317
+ #ifdef ZOPFLI_HASH_SAME_HASH
318
+ /* Switch to the other hash once this will be more efficient. */
319
+ if (hhead != h->head2 && bestlength >= h->same[hpos] &&
320
+ h->val2 == h->hashval2[p]) {
321
+ /* Now use the hash that encodes the length and first byte. */
322
+ hhead = h->head2;
323
+ hprev = h->prev2;
324
+ hhashval = h->hashval2;
325
+ hval = h->val2;
326
+ }
327
+ #endif
328
+
329
+ pp = p;
330
+ p = hprev[p];
331
+ if (p == pp) break; /* Uninited prev value. */
332
+
333
+ dist += p < pp ? pp - p : ((ZOPFLI_WINDOW_SIZE - p) + pp);
334
+
335
+ #if ZOPFLI_MAX_CHAIN_HITS < ZOPFLI_WINDOW_SIZE
336
+ chain_counter--;
337
+ if (chain_counter <= 0) break;
338
+ #endif
339
+ }
340
+
341
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
342
+ StoreInLongestMatchCache(s, pos, limit, sublen, bestdist, bestlength);
343
+ #endif
344
+
345
+ assert(bestlength <= limit);
346
+
347
+ *distance = bestdist;
348
+ *length = bestlength;
349
+ assert(pos + *length <= size);
350
+ }
351
+
352
+ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
353
+ size_t instart, size_t inend,
354
+ ZopfliLZ77Store* store) {
355
+ size_t i = 0, j;
356
+ unsigned short leng;
357
+ unsigned short dist;
358
+ int lengvalue;
359
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
360
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
361
+ unsigned short dummysublen[259];
362
+
363
+ ZopfliHash hash;
364
+ ZopfliHash* h = &hash;
365
+
366
+ #ifdef ZOPFLI_LAZY_MATCHING
367
+ /* Lazy matching. */
368
+ unsigned prev_length = 0;
369
+ unsigned prev_match = 0;
370
+ int prevlengvalue;
371
+ int match_available = 0;
372
+ #endif
373
+
374
+ if (instart == inend) return;
375
+
376
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
377
+ ZopfliWarmupHash(in, windowstart, inend, h);
378
+ for (i = windowstart; i < instart; i++) {
379
+ ZopfliUpdateHash(in, i, inend, h);
380
+ }
381
+
382
+ for (i = instart; i < inend; i++) {
383
+ ZopfliUpdateHash(in, i, inend, h);
384
+
385
+ ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, dummysublen,
386
+ &dist, &leng);
387
+ lengvalue = GetLengthValue(leng, dist);
388
+
389
+ #ifdef ZOPFLI_LAZY_MATCHING
390
+ /* Lazy matching. */
391
+ prevlengvalue = GetLengthValue(prev_length, prev_match);
392
+ if (match_available) {
393
+ match_available = 0;
394
+ if (lengvalue > prevlengvalue + 1) {
395
+ ZopfliStoreLitLenDist(in[i - 1], 0, store);
396
+ if (lengvalue >= ZOPFLI_MIN_MATCH && lengvalue < ZOPFLI_MAX_MATCH) {
397
+ match_available = 1;
398
+ prev_length = leng;
399
+ prev_match = dist;
400
+ continue;
401
+ }
402
+ } else {
403
+ /* Add previous to output. */
404
+ leng = prev_length;
405
+ dist = prev_match;
406
+ lengvalue = prevlengvalue;
407
+ /* Add to output. */
408
+ ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
409
+ ZopfliStoreLitLenDist(leng, dist, store);
410
+ for (j = 2; j < leng; j++) {
411
+ assert(i < inend);
412
+ i++;
413
+ ZopfliUpdateHash(in, i, inend, h);
414
+ }
415
+ continue;
416
+ }
417
+ }
418
+ else if (lengvalue >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
419
+ match_available = 1;
420
+ prev_length = leng;
421
+ prev_match = dist;
422
+ continue;
423
+ }
424
+ /* End of lazy matching. */
425
+ #endif
426
+
427
+ /* Add to output. */
428
+ if (lengvalue >= ZOPFLI_MIN_MATCH) {
429
+ ZopfliVerifyLenDist(in, inend, i, dist, leng);
430
+ ZopfliStoreLitLenDist(leng, dist, store);
431
+ } else {
432
+ leng = 1;
433
+ ZopfliStoreLitLenDist(in[i], 0, store);
434
+ }
435
+ for (j = 1; j < leng; j++) {
436
+ assert(i < inend);
437
+ i++;
438
+ ZopfliUpdateHash(in, i, inend, h);
439
+ }
440
+ }
441
+
442
+ ZopfliCleanHash(h);
443
+ }
444
+
445
+ void ZopfliLZ77Counts(const unsigned short* litlens,
446
+ const unsigned short* dists,
447
+ size_t start, size_t end,
448
+ size_t* ll_count, size_t* d_count) {
449
+ size_t i;
450
+
451
+ for (i = 0; i < 288; i++) {
452
+ ll_count[i] = 0;
453
+ }
454
+ for (i = 0; i < 32; i++) {
455
+ d_count[i] = 0;
456
+ }
457
+
458
+ for (i = start; i < end; i++) {
459
+ if (dists[i] == 0) {
460
+ ll_count[litlens[i]]++;
461
+ } else {
462
+ ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
463
+ d_count[ZopfliGetDistSymbol(dists[i])]++;
464
+ }
465
+ }
466
+
467
+ ll_count[256] = 1; /* End symbol. */
468
+ }