zopfli 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,126 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ /*
21
+ Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
22
+ compression.
23
+ */
24
+
25
+ #ifndef ZOPFLI_LZ77_H_
26
+ #define ZOPFLI_LZ77_H_
27
+
28
+ #include <stdlib.h>
29
+
30
+ #include "cache.h"
31
+ #include "hash.h"
32
+ #include "zopfli.h"
33
+
34
+ /*
35
+ Stores lit/length and dist pairs for LZ77.
36
+ litlens: Contains the literal symbols or length values.
37
+ dists: Indicates the distance, or 0 to indicate that there is no distance and
38
+ litlens contains a literal instead of a length.
39
+ litlens and dists both have the same size.
40
+ */
41
+ typedef struct ZopfliLZ77Store {
42
+ unsigned short* litlens; /* Lit or len. */
43
+ unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
44
+ if > 0: length in corresponding litlens, this is the distance. */
45
+ size_t size;
46
+ } ZopfliLZ77Store;
47
+
48
+ void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
49
+ void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
50
+ void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
51
+ void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
52
+ ZopfliLZ77Store* store);
53
+
54
+ /*
55
+ Some state information for compressing a block.
56
+ This is currently a bit under-used (with mainly only the longest match cache),
57
+ but is kept for easy future expansion.
58
+ */
59
+ typedef struct ZopfliBlockState {
60
+ const ZopfliOptions* options;
61
+
62
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
63
+ /* Cache for length/distance pairs found so far. */
64
+ ZopfliLongestMatchCache* lmc;
65
+ #endif
66
+
67
+ /* The start (inclusive) and end (not inclusive) of the current block. */
68
+ size_t blockstart;
69
+ size_t blockend;
70
+ } ZopfliBlockState;
71
+
72
+ /*
73
+ Finds the longest match (length and corresponding distance) for LZ77
74
+ compression.
75
+ Even when not using "sublen", it can be more efficient to provide an array,
76
+ because only then the caching is used.
77
+ array: the data
78
+ pos: position in the data to find the match for
79
+ size: size of the data
80
+ limit: limit length to maximum this value (default should be 258). This allows
81
+ finding a shorter dist for that length (= less extra bits). Must be
82
+ in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
83
+ sublen: output array of 259 elements, or null. Has, for each length, the
84
+ smallest distance required to reach this length. Only 256 of its 259 values
85
+ are used, the first 3 are ignored (the shortest length is 3. It is purely
86
+ for convenience that the array is made 3 longer).
87
+ */
88
+ void ZopfliFindLongestMatch(
89
+ ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
90
+ size_t pos, size_t size, size_t limit,
91
+ unsigned short* sublen, unsigned short* distance, unsigned short* length);
92
+
93
+ /*
94
+ Verifies if length and dist are indeed valid, only used for assertion.
95
+ */
96
+ void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
97
+ unsigned short dist, unsigned short length);
98
+
99
+ /*
100
+ Counts the number of literal, length and distance symbols in the given lz77
101
+ arrays.
102
+ litlens: lz77 lit/lengths
103
+ dists: ll77 distances
104
+ start: where to begin counting in litlens and dists
105
+ end: where to stop counting in litlens and dists (not inclusive)
106
+ ll_count: count of each lit/len symbol, must have size 288 (see deflate
107
+ standard)
108
+ d_count: count of each dist symbol, must have size 32 (see deflate standard)
109
+ */
110
+ void ZopfliLZ77Counts(const unsigned short* litlens,
111
+ const unsigned short* dists,
112
+ size_t start, size_t end,
113
+ size_t* ll_count, size_t* d_count);
114
+
115
+ /*
116
+ Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
117
+ with the slow but better "squeeze" implementation.
118
+ The result is placed in the ZopfliLZ77Store.
119
+ If instart is larger than 0, it uses values before instart as starting
120
+ dictionary.
121
+ */
122
+ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
123
+ size_t instart, size_t inend,
124
+ ZopfliLZ77Store* store);
125
+
126
+ #endif /* ZOPFLI_LZ77_H_ */
@@ -0,0 +1,5 @@
1
+ make:
2
+ gcc *.c -O2 -W -Wall -Wextra -ansi -pedantic -lm -o zopfli
3
+
4
+ debug:
5
+ gcc *.c -g3 -lm -o zopfli
@@ -0,0 +1,543 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ #include "squeeze.h"
21
+
22
+ #include <assert.h>
23
+ #include <math.h>
24
+ #include <stdio.h>
25
+
26
+ #include "blocksplitter.h"
27
+ #include "deflate.h"
28
+ #include "tree.h"
29
+ #include "util.h"
30
+
31
+ typedef struct SymbolStats {
32
+ /* The literal and length symbols. */
33
+ size_t litlens[288];
34
+ /* The 32 unique dist symbols, not the 32768 possible dists. */
35
+ size_t dists[32];
36
+
37
+ double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
38
+ double d_symbols[32]; /* Length of each dist symbol in bits. */
39
+ } SymbolStats;
40
+
41
+ /* Sets everything to 0. */
42
+ static void InitStats(SymbolStats* stats) {
43
+ memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
44
+ memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
45
+
46
+ memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
47
+ memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
48
+ }
49
+
50
+ static void CopyStats(SymbolStats* source, SymbolStats* dest) {
51
+ memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
52
+ memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
53
+
54
+ memcpy(dest->ll_symbols, source->ll_symbols,
55
+ 288 * sizeof(dest->ll_symbols[0]));
56
+ memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
57
+ }
58
+
59
+ /* Adds the bit lengths. */
60
+ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
61
+ const SymbolStats* stats2, double w2,
62
+ SymbolStats* result) {
63
+ size_t i;
64
+ for (i = 0; i < 288; i++) {
65
+ result->litlens[i] =
66
+ (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
67
+ }
68
+ for (i = 0; i < 32; i++) {
69
+ result->dists[i] =
70
+ (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
71
+ }
72
+ result->litlens[256] = 1; /* End symbol. */
73
+ }
74
+
75
+ typedef struct RanState {
76
+ unsigned int m_w, m_z;
77
+ } RanState;
78
+
79
+ static void InitRanState(RanState* state) {
80
+ state->m_w = 1;
81
+ state->m_z = 2;
82
+ }
83
+
84
+ /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
85
+ static unsigned int Ran(RanState* state) {
86
+ state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16);
87
+ state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16);
88
+ return (state->m_z << 16) + state->m_w; /* 32-bit result. */
89
+ }
90
+
91
+ static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
92
+ int i;
93
+ for (i = 0; i < n; i++) {
94
+ if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n];
95
+ }
96
+ }
97
+
98
+ static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
99
+ RandomizeFreqs(state, stats->litlens, 288);
100
+ RandomizeFreqs(state, stats->dists, 32);
101
+ stats->litlens[256] = 1; /* End symbol. */
102
+ }
103
+
104
+ static void ClearStatFreqs(SymbolStats* stats) {
105
+ size_t i;
106
+ for (i = 0; i < 288; i++) stats->litlens[i] = 0;
107
+ for (i = 0; i < 32; i++) stats->dists[i] = 0;
108
+ }
109
+
110
+ /*
111
+ Function that calculates a cost based on a model for the given LZ77 symbol.
112
+ litlen: means literal symbol if dist is 0, length otherwise.
113
+ */
114
+ typedef double CostModelFun(unsigned litlen, unsigned dist, void* context);
115
+
116
+ /*
117
+ Cost model which should exactly match fixed tree.
118
+ type: CostModelFun
119
+ */
120
+ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
121
+ (void)unused;
122
+ if (dist == 0) {
123
+ if (litlen <= 143) return 8;
124
+ else return 9;
125
+ } else {
126
+ int dbits = ZopfliGetDistExtraBits(dist);
127
+ int lbits = ZopfliGetLengthExtraBits(litlen);
128
+ int lsym = ZopfliGetLengthSymbol(litlen);
129
+ double cost = 0;
130
+ if (lsym <= 279) cost += 7;
131
+ else cost += 8;
132
+ cost += 5; /* Every dist symbol has length 5. */
133
+ return cost + dbits + lbits;
134
+ }
135
+ }
136
+
137
+ /*
138
+ Cost model based on symbol statistics.
139
+ type: CostModelFun
140
+ */
141
+ static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
142
+ SymbolStats* stats = (SymbolStats*)context;
143
+ if (dist == 0) {
144
+ return stats->ll_symbols[litlen];
145
+ } else {
146
+ int lsym = ZopfliGetLengthSymbol(litlen);
147
+ int lbits = ZopfliGetLengthExtraBits(litlen);
148
+ int dsym = ZopfliGetDistSymbol(dist);
149
+ int dbits = ZopfliGetDistExtraBits(dist);
150
+ return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
151
+ }
152
+ }
153
+
154
+ /*
155
+ Finds the minimum possible cost this cost model can return for valid length and
156
+ distance symbols.
157
+ */
158
+ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
159
+ double mincost;
160
+ int bestlength = 0; /* length that has lowest cost in the cost model */
161
+ int bestdist = 0; /* distance that has lowest cost in the cost model */
162
+ int i;
163
+ /*
164
+ Table of distances that have a different distance symbol in the deflate
165
+ specification. Each value is the first distance that has a new symbol. Only
166
+ different symbols affect the cost model so only these need to be checked.
167
+ See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
168
+ */
169
+ static const int dsymbols[30] = {
170
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
171
+ 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
172
+ };
173
+
174
+ mincost = ZOPFLI_LARGE_FLOAT;
175
+ for (i = 3; i < 259; i++) {
176
+ double c = costmodel(i, 1, costcontext);
177
+ if (c < mincost) {
178
+ bestlength = i;
179
+ mincost = c;
180
+ }
181
+ }
182
+
183
+ mincost = ZOPFLI_LARGE_FLOAT;
184
+ for (i = 0; i < 30; i++) {
185
+ double c = costmodel(3, dsymbols[i], costcontext);
186
+ if (c < mincost) {
187
+ bestdist = dsymbols[i];
188
+ mincost = c;
189
+ }
190
+ }
191
+
192
+ return costmodel(bestlength, bestdist, costcontext);
193
+ }
194
+
195
+ /*
196
+ Performs the forward pass for "squeeze". Gets the most optimal length to reach
197
+ every byte from a previous byte, using cost calculations.
198
+ s: the ZopfliBlockState
199
+ in: the input data array
200
+ instart: where to start
201
+ inend: where to stop (not inclusive)
202
+ costmodel: function to calculate the cost of some lit/len/dist pair.
203
+ costcontext: abstract context for the costmodel function
204
+ length_array: output array of size (inend - instart) which will receive the best
205
+ length to reach this byte from a previous byte.
206
+ returns the cost that was, according to the costmodel, needed to get to the end.
207
+ */
208
+ static double GetBestLengths(ZopfliBlockState *s,
209
+ const unsigned char* in,
210
+ size_t instart, size_t inend,
211
+ CostModelFun* costmodel, void* costcontext,
212
+ unsigned short* length_array) {
213
+ /* Best cost to get here so far. */
214
+ size_t blocksize = inend - instart;
215
+ float* costs;
216
+ size_t i = 0, k;
217
+ unsigned short leng;
218
+ unsigned short dist;
219
+ unsigned short sublen[259];
220
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
221
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
222
+ ZopfliHash hash;
223
+ ZopfliHash* h = &hash;
224
+ double result;
225
+ double mincost = GetCostModelMinCost(costmodel, costcontext);
226
+
227
+ if (instart == inend) return 0;
228
+
229
+ costs = (float*)malloc(sizeof(float) * (blocksize + 1));
230
+ if (!costs) exit(-1); /* Allocation failed. */
231
+
232
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
233
+ ZopfliWarmupHash(in, windowstart, inend, h);
234
+ for (i = windowstart; i < instart; i++) {
235
+ ZopfliUpdateHash(in, i, inend, h);
236
+ }
237
+
238
+ for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT;
239
+ costs[0] = 0; /* Because it's the start. */
240
+ length_array[0] = 0;
241
+
242
+ for (i = instart; i < inend; i++) {
243
+ size_t j = i - instart; /* Index in the costs array and length_array. */
244
+ ZopfliUpdateHash(in, i, inend, h);
245
+
246
+ #ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS
247
+ /* If we're in a long repetition of the same character and have more than
248
+ ZOPFLI_MAX_MATCH characters before and after our position. */
249
+ if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2
250
+ && i > instart + ZOPFLI_MAX_MATCH + 1
251
+ && i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
252
+ && h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK]
253
+ > ZOPFLI_MAX_MATCH) {
254
+ double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext);
255
+ /* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to
256
+ the cost corresponding to that length. Doing this, we skip
257
+ ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */
258
+ for (k = 0; k < ZOPFLI_MAX_MATCH; k++) {
259
+ costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost;
260
+ length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH;
261
+ i++;
262
+ j++;
263
+ ZopfliUpdateHash(in, i, inend, h);
264
+ }
265
+ }
266
+ #endif
267
+
268
+ ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen,
269
+ &dist, &leng);
270
+
271
+ /* Literal. */
272
+ if (i + 1 <= inend) {
273
+ double newCost = costs[j] + costmodel(in[i], 0, costcontext);
274
+ assert(newCost >= 0);
275
+ if (newCost < costs[j + 1]) {
276
+ costs[j + 1] = newCost;
277
+ length_array[j + 1] = 1;
278
+ }
279
+ }
280
+ /* Lengths. */
281
+ for (k = 3; k <= leng && i + k <= inend; k++) {
282
+ double newCost;
283
+
284
+ /* Calling the cost model is expensive, avoid this if we are already at
285
+ the minimum possible cost that it can return. */
286
+ if (costs[j + k] - costs[j] <= mincost) continue;
287
+
288
+ newCost = costs[j] + costmodel(k, sublen[k], costcontext);
289
+ assert(newCost >= 0);
290
+ if (newCost < costs[j + k]) {
291
+ assert(k <= ZOPFLI_MAX_MATCH);
292
+ costs[j + k] = newCost;
293
+ length_array[j + k] = k;
294
+ }
295
+ }
296
+ }
297
+
298
+ assert(costs[blocksize] >= 0);
299
+ result = costs[blocksize];
300
+
301
+ ZopfliCleanHash(h);
302
+ free(costs);
303
+
304
+ return result;
305
+ }
306
+
307
+ /*
308
+ Calculates the optimal path of lz77 lengths to use, from the calculated
309
+ length_array. The length_array must contain the optimal length to reach that
310
+ byte. The path will be filled with the lengths to use, so its data size will be
311
+ the amount of lz77 symbols.
312
+ */
313
+ static void TraceBackwards(size_t size, const unsigned short* length_array,
314
+ unsigned short** path, size_t* pathsize) {
315
+ size_t index = size;
316
+ if (size == 0) return;
317
+ for (;;) {
318
+ ZOPFLI_APPEND_DATA(length_array[index], path, pathsize);
319
+ assert(length_array[index] <= index);
320
+ assert(length_array[index] <= ZOPFLI_MAX_MATCH);
321
+ assert(length_array[index] != 0);
322
+ index -= length_array[index];
323
+ if (index == 0) break;
324
+ }
325
+
326
+ /* Mirror result. */
327
+ for (index = 0; index < *pathsize / 2; index++) {
328
+ unsigned short temp = (*path)[index];
329
+ (*path)[index] = (*path)[*pathsize - index - 1];
330
+ (*path)[*pathsize - index - 1] = temp;
331
+ }
332
+ }
333
+
334
+ static void FollowPath(ZopfliBlockState* s,
335
+ const unsigned char* in, size_t instart, size_t inend,
336
+ unsigned short* path, size_t pathsize,
337
+ ZopfliLZ77Store* store) {
338
+ size_t i, j, pos = 0;
339
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
340
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
341
+
342
+ size_t total_length_test = 0;
343
+
344
+ ZopfliHash hash;
345
+ ZopfliHash* h = &hash;
346
+
347
+ if (instart == inend) return;
348
+
349
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
350
+ ZopfliWarmupHash(in, windowstart, inend, h);
351
+ for (i = windowstart; i < instart; i++) {
352
+ ZopfliUpdateHash(in, i, inend, h);
353
+ }
354
+
355
+ pos = instart;
356
+ for (i = 0; i < pathsize; i++) {
357
+ unsigned short length = path[i];
358
+ unsigned short dummy_length;
359
+ unsigned short dist;
360
+ assert(pos < inend);
361
+
362
+ ZopfliUpdateHash(in, pos, inend, h);
363
+
364
+ /* Add to output. */
365
+ if (length >= ZOPFLI_MIN_MATCH) {
366
+ /* Get the distance by recalculating longest match. The found length
367
+ should match the length from the path. */
368
+ ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0,
369
+ &dist, &dummy_length);
370
+ assert(!(dummy_length != length && length > 2 && dummy_length > 2));
371
+ ZopfliVerifyLenDist(in, inend, pos, dist, length);
372
+ ZopfliStoreLitLenDist(length, dist, store);
373
+ total_length_test += length;
374
+ } else {
375
+ length = 1;
376
+ ZopfliStoreLitLenDist(in[pos], 0, store);
377
+ total_length_test++;
378
+ }
379
+
380
+
381
+ assert(pos + length <= inend);
382
+ for (j = 1; j < length; j++) {
383
+ ZopfliUpdateHash(in, pos + j, inend, h);
384
+ }
385
+
386
+ pos += length;
387
+ }
388
+
389
+ ZopfliCleanHash(h);
390
+ }
391
+
392
+ /* Calculates the entropy of the statistics */
393
+ static void CalculateStatistics(SymbolStats* stats) {
394
+ ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
395
+ ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
396
+ }
397
+
398
+ /* Appends the symbol statistics from the store. */
399
+ static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
400
+ size_t i;
401
+ for (i = 0; i < store->size; i++) {
402
+ if (store->dists[i] == 0) {
403
+ stats->litlens[store->litlens[i]]++;
404
+ } else {
405
+ stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++;
406
+ stats->dists[ZopfliGetDistSymbol(store->dists[i])]++;
407
+ }
408
+ }
409
+ stats->litlens[256] = 1; /* End symbol. */
410
+
411
+ CalculateStatistics(stats);
412
+ }
413
+
414
+ /*
415
+ Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
416
+ with updated statistics should be performed.
417
+
418
+ s: the block state
419
+ in: the input data array
420
+ instart: where to start
421
+ inend: where to stop (not inclusive)
422
+ path: pointer to dynamically allocated memory to store the path
423
+ pathsize: pointer to the size of the dynamic path array
424
+ length_array: array if size (inend - instart) used to store lengths
425
+ costmodel: function to use as the cost model for this squeeze run
426
+ costcontext: abstract context for the costmodel function
427
+ store: place to output the LZ77 data
428
+ returns the cost that was, according to the costmodel, needed to get to the end.
429
+ This is not the actual cost.
430
+ */
431
+ static double LZ77OptimalRun(ZopfliBlockState* s,
432
+ const unsigned char* in, size_t instart, size_t inend,
433
+ unsigned short** path, size_t* pathsize,
434
+ unsigned short* length_array, CostModelFun* costmodel,
435
+ void* costcontext, ZopfliLZ77Store* store) {
436
+ double cost = GetBestLengths(
437
+ s, in, instart, inend, costmodel, costcontext, length_array);
438
+ free(*path);
439
+ *path = 0;
440
+ *pathsize = 0;
441
+ TraceBackwards(inend - instart, length_array, path, pathsize);
442
+ FollowPath(s, in, instart, inend, *path, *pathsize, store);
443
+ assert(cost < ZOPFLI_LARGE_FLOAT);
444
+ return cost;
445
+ }
446
+
447
+ void ZopfliLZ77Optimal(ZopfliBlockState *s,
448
+ const unsigned char* in, size_t instart, size_t inend,
449
+ ZopfliLZ77Store* store) {
450
+ /* Dist to get to here with smallest cost. */
451
+ size_t blocksize = inend - instart;
452
+ unsigned short* length_array =
453
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
454
+ unsigned short* path = 0;
455
+ size_t pathsize = 0;
456
+ ZopfliLZ77Store currentstore;
457
+ SymbolStats stats, beststats, laststats;
458
+ int i;
459
+ double cost;
460
+ double bestcost = ZOPFLI_LARGE_FLOAT;
461
+ double lastcost = 0;
462
+ /* Try randomizing the costs a bit once the size stabilizes. */
463
+ RanState ran_state;
464
+ int lastrandomstep = -1;
465
+
466
+ if (!length_array) exit(-1); /* Allocation failed. */
467
+
468
+ InitRanState(&ran_state);
469
+ InitStats(&stats);
470
+ ZopfliInitLZ77Store(&currentstore);
471
+
472
+ /* Do regular deflate, then loop multiple shortest path runs, each time using
473
+ the statistics of the previous run. */
474
+
475
+ /* Initial run. */
476
+ ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
477
+ GetStatistics(&currentstore, &stats);
478
+
479
+ /* Repeat statistics with each time the cost model from the previous stat
480
+ run. */
481
+ for (i = 0; i < s->options->numiterations; i++) {
482
+ ZopfliCleanLZ77Store(&currentstore);
483
+ ZopfliInitLZ77Store(&currentstore);
484
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
485
+ length_array, GetCostStat, (void*)&stats,
486
+ &currentstore);
487
+ cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
488
+ 0, currentstore.size, 2);
489
+ if (cost < bestcost) {
490
+ /* Copy to the output store. */
491
+ ZopfliCopyLZ77Store(&currentstore, store);
492
+ CopyStats(&stats, &beststats);
493
+ bestcost = cost;
494
+ }
495
+ CopyStats(&stats, &laststats);
496
+ ClearStatFreqs(&stats);
497
+ GetStatistics(&currentstore, &stats);
498
+ if (lastrandomstep != -1) {
499
+ /* This makes it converge slower but better. Do it only once the
500
+ randomness kicks in so that if the user does few iterations, it gives a
501
+ better result sooner. */
502
+ AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats);
503
+ CalculateStatistics(&stats);
504
+ }
505
+ if (i > 5 && cost == lastcost) {
506
+ CopyStats(&beststats, &stats);
507
+ RandomizeStatFreqs(&ran_state, &stats);
508
+ CalculateStatistics(&stats);
509
+ lastrandomstep = i;
510
+ }
511
+ lastcost = cost;
512
+ }
513
+
514
+ free(length_array);
515
+ free(path);
516
+ ZopfliCleanLZ77Store(&currentstore);
517
+ }
518
+
519
+ void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
520
+ const unsigned char* in,
521
+ size_t instart, size_t inend,
522
+ ZopfliLZ77Store* store)
523
+ {
524
+ /* Dist to get to here with smallest cost. */
525
+ size_t blocksize = inend - instart;
526
+ unsigned short* length_array =
527
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
528
+ unsigned short* path = 0;
529
+ size_t pathsize = 0;
530
+
531
+ if (!length_array) exit(-1); /* Allocation failed. */
532
+
533
+ s->blockstart = instart;
534
+ s->blockend = inend;
535
+
536
+ /* Shortest path for fixed tree This one should give the shortest possible
537
+ result for fixed tree, no repeated runs are needed since the tree is known. */
538
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
539
+ length_array, GetCostFixed, 0, store);
540
+
541
+ free(length_array);
542
+ free(path);
543
+ }