zopfli-bin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.testguardrc +1 -0
  5. data/Gemfile +17 -0
  6. data/Gemfile.lock +111 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.rdoc +19 -0
  9. data/Rakefile +39 -0
  10. data/VERSION +1 -0
  11. data/ext/Makefile +8 -0
  12. data/ext/extconf.rb +4 -0
  13. data/lib/zopfli-bin.rb +5 -0
  14. data/lib/zopfli/bin.rb +34 -0
  15. data/test/helper.rb +19 -0
  16. data/test/test_zopfli-bin.rb +33 -0
  17. data/vendor/zopfli/CONTRIBUTORS +7 -0
  18. data/vendor/zopfli/COPYING +201 -0
  19. data/vendor/zopfli/Makefile +37 -0
  20. data/vendor/zopfli/README +32 -0
  21. data/vendor/zopfli/README.zopflipng +35 -0
  22. data/vendor/zopfli/src/zopfli/blocksplitter.c +342 -0
  23. data/vendor/zopfli/src/zopfli/blocksplitter.h +77 -0
  24. data/vendor/zopfli/src/zopfli/cache.c +119 -0
  25. data/vendor/zopfli/src/zopfli/cache.h +66 -0
  26. data/vendor/zopfli/src/zopfli/deflate.c +866 -0
  27. data/vendor/zopfli/src/zopfli/deflate.h +86 -0
  28. data/vendor/zopfli/src/zopfli/gzip_container.c +117 -0
  29. data/vendor/zopfli/src/zopfli/gzip_container.h +50 -0
  30. data/vendor/zopfli/src/zopfli/hash.c +135 -0
  31. data/vendor/zopfli/src/zopfli/hash.h +70 -0
  32. data/vendor/zopfli/src/zopfli/katajainen.c +251 -0
  33. data/vendor/zopfli/src/zopfli/katajainen.h +42 -0
  34. data/vendor/zopfli/src/zopfli/lz77.c +482 -0
  35. data/vendor/zopfli/src/zopfli/lz77.h +129 -0
  36. data/vendor/zopfli/src/zopfli/squeeze.c +546 -0
  37. data/vendor/zopfli/src/zopfli/squeeze.h +60 -0
  38. data/vendor/zopfli/src/zopfli/tree.c +101 -0
  39. data/vendor/zopfli/src/zopfli/tree.h +51 -0
  40. data/vendor/zopfli/src/zopfli/util.c +213 -0
  41. data/vendor/zopfli/src/zopfli/util.h +175 -0
  42. data/vendor/zopfli/src/zopfli/zlib_container.c +79 -0
  43. data/vendor/zopfli/src/zopfli/zlib_container.h +50 -0
  44. data/vendor/zopfli/src/zopfli/zopfli.h +97 -0
  45. data/vendor/zopfli/src/zopfli/zopfli_bin.c +203 -0
  46. data/vendor/zopfli/src/zopfli/zopfli_lib.c +42 -0
  47. data/vendor/zopfli/src/zopflipng/lodepng/lodepng.cpp +6260 -0
  48. data/vendor/zopfli/src/zopflipng/lodepng/lodepng.h +1716 -0
  49. data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.cpp +656 -0
  50. data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.h +151 -0
  51. data/vendor/zopfli/src/zopflipng/zopflipng_bin.cc +407 -0
  52. data/vendor/zopfli/src/zopflipng/zopflipng_lib.cc +425 -0
  53. data/vendor/zopfli/src/zopflipng/zopflipng_lib.h +79 -0
  54. data/zopfli-bin.gemspec +119 -0
  55. metadata +225 -0
@@ -0,0 +1,129 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ /*
21
+ Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
22
+ compression.
23
+ */
24
+
25
+ #ifndef ZOPFLI_LZ77_H_
26
+ #define ZOPFLI_LZ77_H_
27
+
28
+ #include <stdlib.h>
29
+
30
+ #include "cache.h"
31
+ #include "hash.h"
32
+ #include "zopfli.h"
33
+
34
+ /*
35
+ Stores lit/length and dist pairs for LZ77.
36
+ Parameter litlens: Contains the literal symbols or length values.
37
+ Parameter dists: Contains the distances. A value is 0 to indicate that there is
38
+ no dist and the corresponding litlens value is a literal instead of a length.
39
+ Parameter size: The size of both the litlens and dists arrays.
40
+ The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
41
+ ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
42
+
43
+ */
44
+ typedef struct ZopfliLZ77Store {
45
+ unsigned short* litlens; /* Lit or len. */
46
+ unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
47
+ if > 0: length in corresponding litlens, this is the distance. */
48
+ size_t size;
49
+ } ZopfliLZ77Store;
50
+
51
+ void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
52
+ void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
53
+ void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
54
+ void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
55
+ ZopfliLZ77Store* store);
56
+
57
+ /*
58
+ Some state information for compressing a block.
59
+ This is currently a bit under-used (with mainly only the longest match cache),
60
+ but is kept for easy future expansion.
61
+ */
62
+ typedef struct ZopfliBlockState {
63
+ const ZopfliOptions* options;
64
+
65
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
66
+ /* Cache for length/distance pairs found so far. */
67
+ ZopfliLongestMatchCache* lmc;
68
+ #endif
69
+
70
+ /* The start (inclusive) and end (not inclusive) of the current block. */
71
+ size_t blockstart;
72
+ size_t blockend;
73
+ } ZopfliBlockState;
74
+
75
+ /*
76
+ Finds the longest match (length and corresponding distance) for LZ77
77
+ compression.
78
+ Even when not using "sublen", it can be more efficient to provide an array,
79
+ because only then the caching is used.
80
+ array: the data
81
+ pos: position in the data to find the match for
82
+ size: size of the data
83
+ limit: limit length to maximum this value (default should be 258). This allows
84
+ finding a shorter dist for that length (= less extra bits). Must be
85
+ in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
86
+ sublen: output array of 259 elements, or null. Has, for each length, the
87
+ smallest distance required to reach this length. Only 256 of its 259 values
88
+ are used, the first 3 are ignored (the shortest length is 3. It is purely
89
+ for convenience that the array is made 3 longer).
90
+ */
91
+ void ZopfliFindLongestMatch(
92
+ ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
93
+ size_t pos, size_t size, size_t limit,
94
+ unsigned short* sublen, unsigned short* distance, unsigned short* length);
95
+
96
+ /*
97
+ Verifies if length and dist are indeed valid, only used for assertion.
98
+ */
99
+ void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
100
+ unsigned short dist, unsigned short length);
101
+
102
+ /*
103
+ Counts the number of literal, length and distance symbols in the given lz77
104
+ arrays.
105
+ litlens: lz77 lit/lengths
106
+ dists: ll77 distances
107
+ start: where to begin counting in litlens and dists
108
+ end: where to stop counting in litlens and dists (not inclusive)
109
+ ll_count: count of each lit/len symbol, must have size 288 (see deflate
110
+ standard)
111
+ d_count: count of each dist symbol, must have size 32 (see deflate standard)
112
+ */
113
+ void ZopfliLZ77Counts(const unsigned short* litlens,
114
+ const unsigned short* dists,
115
+ size_t start, size_t end,
116
+ size_t* ll_count, size_t* d_count);
117
+
118
+ /*
119
+ Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
120
+ with the slow but better "squeeze" implementation.
121
+ The result is placed in the ZopfliLZ77Store.
122
+ If instart is larger than 0, it uses values before instart as starting
123
+ dictionary.
124
+ */
125
+ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
126
+ size_t instart, size_t inend,
127
+ ZopfliLZ77Store* store);
128
+
129
+ #endif /* ZOPFLI_LZ77_H_ */
@@ -0,0 +1,546 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ #include "squeeze.h"
21
+
22
+ #include <assert.h>
23
+ #include <math.h>
24
+ #include <stdio.h>
25
+
26
+ #include "blocksplitter.h"
27
+ #include "deflate.h"
28
+ #include "tree.h"
29
+ #include "util.h"
30
+
31
+ typedef struct SymbolStats {
32
+ /* The literal and length symbols. */
33
+ size_t litlens[288];
34
+ /* The 32 unique dist symbols, not the 32768 possible dists. */
35
+ size_t dists[32];
36
+
37
+ double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
38
+ double d_symbols[32]; /* Length of each dist symbol in bits. */
39
+ } SymbolStats;
40
+
41
+ /* Sets everything to 0. */
42
+ static void InitStats(SymbolStats* stats) {
43
+ memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
44
+ memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
45
+
46
+ memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
47
+ memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
48
+ }
49
+
50
+ static void CopyStats(SymbolStats* source, SymbolStats* dest) {
51
+ memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
52
+ memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
53
+
54
+ memcpy(dest->ll_symbols, source->ll_symbols,
55
+ 288 * sizeof(dest->ll_symbols[0]));
56
+ memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
57
+ }
58
+
59
+ /* Adds the bit lengths. */
60
+ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
61
+ const SymbolStats* stats2, double w2,
62
+ SymbolStats* result) {
63
+ size_t i;
64
+ for (i = 0; i < 288; i++) {
65
+ result->litlens[i] =
66
+ (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
67
+ }
68
+ for (i = 0; i < 32; i++) {
69
+ result->dists[i] =
70
+ (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
71
+ }
72
+ result->litlens[256] = 1; /* End symbol. */
73
+ }
74
+
75
+ typedef struct RanState {
76
+ unsigned int m_w, m_z;
77
+ } RanState;
78
+
79
+ static void InitRanState(RanState* state) {
80
+ state->m_w = 1;
81
+ state->m_z = 2;
82
+ }
83
+
84
+ /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
85
+ static unsigned int Ran(RanState* state) {
86
+ state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16);
87
+ state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16);
88
+ return (state->m_z << 16) + state->m_w; /* 32-bit result. */
89
+ }
90
+
91
+ static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
92
+ int i;
93
+ for (i = 0; i < n; i++) {
94
+ if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n];
95
+ }
96
+ }
97
+
98
+ static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
99
+ RandomizeFreqs(state, stats->litlens, 288);
100
+ RandomizeFreqs(state, stats->dists, 32);
101
+ stats->litlens[256] = 1; /* End symbol. */
102
+ }
103
+
104
+ static void ClearStatFreqs(SymbolStats* stats) {
105
+ size_t i;
106
+ for (i = 0; i < 288; i++) stats->litlens[i] = 0;
107
+ for (i = 0; i < 32; i++) stats->dists[i] = 0;
108
+ }
109
+
110
+ /*
111
+ Function that calculates a cost based on a model for the given LZ77 symbol.
112
+ litlen: means literal symbol if dist is 0, length otherwise.
113
+ */
114
+ typedef double CostModelFun(unsigned litlen, unsigned dist, void* context);
115
+
116
+ /*
117
+ Cost model which should exactly match fixed tree.
118
+ type: CostModelFun
119
+ */
120
+ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
121
+ (void)unused;
122
+ if (dist == 0) {
123
+ if (litlen <= 143) return 8;
124
+ else return 9;
125
+ } else {
126
+ int dbits = ZopfliGetDistExtraBits(dist);
127
+ int lbits = ZopfliGetLengthExtraBits(litlen);
128
+ int lsym = ZopfliGetLengthSymbol(litlen);
129
+ double cost = 0;
130
+ if (lsym <= 279) cost += 7;
131
+ else cost += 8;
132
+ cost += 5; /* Every dist symbol has length 5. */
133
+ return cost + dbits + lbits;
134
+ }
135
+ }
136
+
137
+ /*
138
+ Cost model based on symbol statistics.
139
+ type: CostModelFun
140
+ */
141
+ static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
142
+ SymbolStats* stats = (SymbolStats*)context;
143
+ if (dist == 0) {
144
+ return stats->ll_symbols[litlen];
145
+ } else {
146
+ int lsym = ZopfliGetLengthSymbol(litlen);
147
+ int lbits = ZopfliGetLengthExtraBits(litlen);
148
+ int dsym = ZopfliGetDistSymbol(dist);
149
+ int dbits = ZopfliGetDistExtraBits(dist);
150
+ return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
151
+ }
152
+ }
153
+
154
+ /*
155
+ Finds the minimum possible cost this cost model can return for valid length and
156
+ distance symbols.
157
+ */
158
+ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
159
+ double mincost;
160
+ int bestlength = 0; /* length that has lowest cost in the cost model */
161
+ int bestdist = 0; /* distance that has lowest cost in the cost model */
162
+ int i;
163
+ /*
164
+ Table of distances that have a different distance symbol in the deflate
165
+ specification. Each value is the first distance that has a new symbol. Only
166
+ different symbols affect the cost model so only these need to be checked.
167
+ See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
168
+ */
169
+ static const int dsymbols[30] = {
170
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
171
+ 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
172
+ };
173
+
174
+ mincost = ZOPFLI_LARGE_FLOAT;
175
+ for (i = 3; i < 259; i++) {
176
+ double c = costmodel(i, 1, costcontext);
177
+ if (c < mincost) {
178
+ bestlength = i;
179
+ mincost = c;
180
+ }
181
+ }
182
+
183
+ mincost = ZOPFLI_LARGE_FLOAT;
184
+ for (i = 0; i < 30; i++) {
185
+ double c = costmodel(3, dsymbols[i], costcontext);
186
+ if (c < mincost) {
187
+ bestdist = dsymbols[i];
188
+ mincost = c;
189
+ }
190
+ }
191
+
192
+ return costmodel(bestlength, bestdist, costcontext);
193
+ }
194
+
195
+ /*
196
+ Performs the forward pass for "squeeze". Gets the most optimal length to reach
197
+ every byte from a previous byte, using cost calculations.
198
+ s: the ZopfliBlockState
199
+ in: the input data array
200
+ instart: where to start
201
+ inend: where to stop (not inclusive)
202
+ costmodel: function to calculate the cost of some lit/len/dist pair.
203
+ costcontext: abstract context for the costmodel function
204
+ length_array: output array of size (inend - instart) which will receive the best
205
+ length to reach this byte from a previous byte.
206
+ returns the cost that was, according to the costmodel, needed to get to the end.
207
+ */
208
+ static double GetBestLengths(ZopfliBlockState *s,
209
+ const unsigned char* in,
210
+ size_t instart, size_t inend,
211
+ CostModelFun* costmodel, void* costcontext,
212
+ unsigned short* length_array) {
213
+ /* Best cost to get here so far. */
214
+ size_t blocksize = inend - instart;
215
+ float* costs;
216
+ size_t i = 0, k;
217
+ unsigned short leng;
218
+ unsigned short dist;
219
+ unsigned short sublen[259];
220
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
221
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
222
+ ZopfliHash hash;
223
+ ZopfliHash* h = &hash;
224
+ double result;
225
+ double mincost = GetCostModelMinCost(costmodel, costcontext);
226
+
227
+ if (instart == inend) return 0;
228
+
229
+ costs = (float*)malloc(sizeof(float) * (blocksize + 1));
230
+ if (!costs) exit(-1); /* Allocation failed. */
231
+
232
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
233
+ ZopfliWarmupHash(in, windowstart, inend, h);
234
+ for (i = windowstart; i < instart; i++) {
235
+ ZopfliUpdateHash(in, i, inend, h);
236
+ }
237
+
238
+ for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT;
239
+ costs[0] = 0; /* Because it's the start. */
240
+ length_array[0] = 0;
241
+
242
+ for (i = instart; i < inend; i++) {
243
+ size_t j = i - instart; /* Index in the costs array and length_array. */
244
+ ZopfliUpdateHash(in, i, inend, h);
245
+
246
+ #ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS
247
+ /* If we're in a long repetition of the same character and have more than
248
+ ZOPFLI_MAX_MATCH characters before and after our position. */
249
+ if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2
250
+ && i > instart + ZOPFLI_MAX_MATCH + 1
251
+ && i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
252
+ && h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK]
253
+ > ZOPFLI_MAX_MATCH) {
254
+ double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext);
255
+ /* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to
256
+ the cost corresponding to that length. Doing this, we skip
257
+ ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */
258
+ for (k = 0; k < ZOPFLI_MAX_MATCH; k++) {
259
+ costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost;
260
+ length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH;
261
+ i++;
262
+ j++;
263
+ ZopfliUpdateHash(in, i, inend, h);
264
+ }
265
+ }
266
+ #endif
267
+
268
+ ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen,
269
+ &dist, &leng);
270
+
271
+ /* Literal. */
272
+ if (i + 1 <= inend) {
273
+ double newCost = costs[j] + costmodel(in[i], 0, costcontext);
274
+ assert(newCost >= 0);
275
+ if (newCost < costs[j + 1]) {
276
+ costs[j + 1] = newCost;
277
+ length_array[j + 1] = 1;
278
+ }
279
+ }
280
+ /* Lengths. */
281
+ for (k = 3; k <= leng && i + k <= inend; k++) {
282
+ double newCost;
283
+
284
+ /* Calling the cost model is expensive, avoid this if we are already at
285
+ the minimum possible cost that it can return. */
286
+ if (costs[j + k] - costs[j] <= mincost) continue;
287
+
288
+ newCost = costs[j] + costmodel(k, sublen[k], costcontext);
289
+ assert(newCost >= 0);
290
+ if (newCost < costs[j + k]) {
291
+ assert(k <= ZOPFLI_MAX_MATCH);
292
+ costs[j + k] = newCost;
293
+ length_array[j + k] = k;
294
+ }
295
+ }
296
+ }
297
+
298
+ assert(costs[blocksize] >= 0);
299
+ result = costs[blocksize];
300
+
301
+ ZopfliCleanHash(h);
302
+ free(costs);
303
+
304
+ return result;
305
+ }
306
+
307
+ /*
308
+ Calculates the optimal path of lz77 lengths to use, from the calculated
309
+ length_array. The length_array must contain the optimal length to reach that
310
+ byte. The path will be filled with the lengths to use, so its data size will be
311
+ the amount of lz77 symbols.
312
+ */
313
+ static void TraceBackwards(size_t size, const unsigned short* length_array,
314
+ unsigned short** path, size_t* pathsize) {
315
+ size_t index = size;
316
+ if (size == 0) return;
317
+ for (;;) {
318
+ ZOPFLI_APPEND_DATA(length_array[index], path, pathsize);
319
+ assert(length_array[index] <= index);
320
+ assert(length_array[index] <= ZOPFLI_MAX_MATCH);
321
+ assert(length_array[index] != 0);
322
+ index -= length_array[index];
323
+ if (index == 0) break;
324
+ }
325
+
326
+ /* Mirror result. */
327
+ for (index = 0; index < *pathsize / 2; index++) {
328
+ unsigned short temp = (*path)[index];
329
+ (*path)[index] = (*path)[*pathsize - index - 1];
330
+ (*path)[*pathsize - index - 1] = temp;
331
+ }
332
+ }
333
+
334
+ static void FollowPath(ZopfliBlockState* s,
335
+ const unsigned char* in, size_t instart, size_t inend,
336
+ unsigned short* path, size_t pathsize,
337
+ ZopfliLZ77Store* store) {
338
+ size_t i, j, pos = 0;
339
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
340
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
341
+
342
+ size_t total_length_test = 0;
343
+
344
+ ZopfliHash hash;
345
+ ZopfliHash* h = &hash;
346
+
347
+ if (instart == inend) return;
348
+
349
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
350
+ ZopfliWarmupHash(in, windowstart, inend, h);
351
+ for (i = windowstart; i < instart; i++) {
352
+ ZopfliUpdateHash(in, i, inend, h);
353
+ }
354
+
355
+ pos = instart;
356
+ for (i = 0; i < pathsize; i++) {
357
+ unsigned short length = path[i];
358
+ unsigned short dummy_length;
359
+ unsigned short dist;
360
+ assert(pos < inend);
361
+
362
+ ZopfliUpdateHash(in, pos, inend, h);
363
+
364
+ /* Add to output. */
365
+ if (length >= ZOPFLI_MIN_MATCH) {
366
+ /* Get the distance by recalculating longest match. The found length
367
+ should match the length from the path. */
368
+ ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0,
369
+ &dist, &dummy_length);
370
+ assert(!(dummy_length != length && length > 2 && dummy_length > 2));
371
+ ZopfliVerifyLenDist(in, inend, pos, dist, length);
372
+ ZopfliStoreLitLenDist(length, dist, store);
373
+ total_length_test += length;
374
+ } else {
375
+ length = 1;
376
+ ZopfliStoreLitLenDist(in[pos], 0, store);
377
+ total_length_test++;
378
+ }
379
+
380
+
381
+ assert(pos + length <= inend);
382
+ for (j = 1; j < length; j++) {
383
+ ZopfliUpdateHash(in, pos + j, inend, h);
384
+ }
385
+
386
+ pos += length;
387
+ }
388
+
389
+ ZopfliCleanHash(h);
390
+ }
391
+
392
+ /* Calculates the entropy of the statistics */
393
+ static void CalculateStatistics(SymbolStats* stats) {
394
+ ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
395
+ ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
396
+ }
397
+
398
+ /* Appends the symbol statistics from the store. */
399
+ static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
400
+ size_t i;
401
+ for (i = 0; i < store->size; i++) {
402
+ if (store->dists[i] == 0) {
403
+ stats->litlens[store->litlens[i]]++;
404
+ } else {
405
+ stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++;
406
+ stats->dists[ZopfliGetDistSymbol(store->dists[i])]++;
407
+ }
408
+ }
409
+ stats->litlens[256] = 1; /* End symbol. */
410
+
411
+ CalculateStatistics(stats);
412
+ }
413
+
414
+ /*
415
+ Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
416
+ with updated statistics should be performed.
417
+
418
+ s: the block state
419
+ in: the input data array
420
+ instart: where to start
421
+ inend: where to stop (not inclusive)
422
+ path: pointer to dynamically allocated memory to store the path
423
+ pathsize: pointer to the size of the dynamic path array
424
+ length_array: array if size (inend - instart) used to store lengths
425
+ costmodel: function to use as the cost model for this squeeze run
426
+ costcontext: abstract context for the costmodel function
427
+ store: place to output the LZ77 data
428
+ returns the cost that was, according to the costmodel, needed to get to the end.
429
+ This is not the actual cost.
430
+ */
431
+ static double LZ77OptimalRun(ZopfliBlockState* s,
432
+ const unsigned char* in, size_t instart, size_t inend,
433
+ unsigned short** path, size_t* pathsize,
434
+ unsigned short* length_array, CostModelFun* costmodel,
435
+ void* costcontext, ZopfliLZ77Store* store) {
436
+ double cost = GetBestLengths(
437
+ s, in, instart, inend, costmodel, costcontext, length_array);
438
+ free(*path);
439
+ *path = 0;
440
+ *pathsize = 0;
441
+ TraceBackwards(inend - instart, length_array, path, pathsize);
442
+ FollowPath(s, in, instart, inend, *path, *pathsize, store);
443
+ assert(cost < ZOPFLI_LARGE_FLOAT);
444
+ return cost;
445
+ }
446
+
447
+ void ZopfliLZ77Optimal(ZopfliBlockState *s,
448
+ const unsigned char* in, size_t instart, size_t inend,
449
+ ZopfliLZ77Store* store) {
450
+ /* Dist to get to here with smallest cost. */
451
+ size_t blocksize = inend - instart;
452
+ unsigned short* length_array =
453
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
454
+ unsigned short* path = 0;
455
+ size_t pathsize = 0;
456
+ ZopfliLZ77Store currentstore;
457
+ SymbolStats stats, beststats, laststats;
458
+ int i;
459
+ double cost;
460
+ double bestcost = ZOPFLI_LARGE_FLOAT;
461
+ double lastcost = 0;
462
+ /* Try randomizing the costs a bit once the size stabilizes. */
463
+ RanState ran_state;
464
+ int lastrandomstep = -1;
465
+
466
+ if (!length_array) exit(-1); /* Allocation failed. */
467
+
468
+ InitRanState(&ran_state);
469
+ InitStats(&stats);
470
+ ZopfliInitLZ77Store(&currentstore);
471
+
472
+ /* Do regular deflate, then loop multiple shortest path runs, each time using
473
+ the statistics of the previous run. */
474
+
475
+ /* Initial run. */
476
+ ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
477
+ GetStatistics(&currentstore, &stats);
478
+
479
+ /* Repeat statistics with each time the cost model from the previous stat
480
+ run. */
481
+ for (i = 0; i < s->options->numiterations; i++) {
482
+ ZopfliCleanLZ77Store(&currentstore);
483
+ ZopfliInitLZ77Store(&currentstore);
484
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
485
+ length_array, GetCostStat, (void*)&stats,
486
+ &currentstore);
487
+ cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
488
+ 0, currentstore.size, 2);
489
+ if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
490
+ fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
491
+ }
492
+ if (cost < bestcost) {
493
+ /* Copy to the output store. */
494
+ ZopfliCopyLZ77Store(&currentstore, store);
495
+ CopyStats(&stats, &beststats);
496
+ bestcost = cost;
497
+ }
498
+ CopyStats(&stats, &laststats);
499
+ ClearStatFreqs(&stats);
500
+ GetStatistics(&currentstore, &stats);
501
+ if (lastrandomstep != -1) {
502
+ /* This makes it converge slower but better. Do it only once the
503
+ randomness kicks in so that if the user does few iterations, it gives a
504
+ better result sooner. */
505
+ AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats);
506
+ CalculateStatistics(&stats);
507
+ }
508
+ if (i > 5 && cost == lastcost) {
509
+ CopyStats(&beststats, &stats);
510
+ RandomizeStatFreqs(&ran_state, &stats);
511
+ CalculateStatistics(&stats);
512
+ lastrandomstep = i;
513
+ }
514
+ lastcost = cost;
515
+ }
516
+
517
+ free(length_array);
518
+ free(path);
519
+ ZopfliCleanLZ77Store(&currentstore);
520
+ }
521
+
522
+ void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
523
+ const unsigned char* in,
524
+ size_t instart, size_t inend,
525
+ ZopfliLZ77Store* store)
526
+ {
527
+ /* Dist to get to here with smallest cost. */
528
+ size_t blocksize = inend - instart;
529
+ unsigned short* length_array =
530
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
531
+ unsigned short* path = 0;
532
+ size_t pathsize = 0;
533
+
534
+ if (!length_array) exit(-1); /* Allocation failed. */
535
+
536
+ s->blockstart = instart;
537
+ s->blockend = inend;
538
+
539
+ /* Shortest path for fixed tree This one should give the shortest possible
540
+ result for fixed tree, no repeated runs are needed since the tree is known. */
541
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
542
+ length_array, GetCostFixed, 0, store);
543
+
544
+ free(length_array);
545
+ free(path);
546
+ }