zopfli-bin 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.testguardrc +1 -0
  5. data/Gemfile +17 -0
  6. data/Gemfile.lock +111 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.rdoc +19 -0
  9. data/Rakefile +39 -0
  10. data/VERSION +1 -0
  11. data/ext/Makefile +8 -0
  12. data/ext/extconf.rb +4 -0
  13. data/lib/zopfli-bin.rb +5 -0
  14. data/lib/zopfli/bin.rb +34 -0
  15. data/test/helper.rb +19 -0
  16. data/test/test_zopfli-bin.rb +33 -0
  17. data/vendor/zopfli/CONTRIBUTORS +7 -0
  18. data/vendor/zopfli/COPYING +201 -0
  19. data/vendor/zopfli/Makefile +37 -0
  20. data/vendor/zopfli/README +32 -0
  21. data/vendor/zopfli/README.zopflipng +35 -0
  22. data/vendor/zopfli/src/zopfli/blocksplitter.c +342 -0
  23. data/vendor/zopfli/src/zopfli/blocksplitter.h +77 -0
  24. data/vendor/zopfli/src/zopfli/cache.c +119 -0
  25. data/vendor/zopfli/src/zopfli/cache.h +66 -0
  26. data/vendor/zopfli/src/zopfli/deflate.c +866 -0
  27. data/vendor/zopfli/src/zopfli/deflate.h +86 -0
  28. data/vendor/zopfli/src/zopfli/gzip_container.c +117 -0
  29. data/vendor/zopfli/src/zopfli/gzip_container.h +50 -0
  30. data/vendor/zopfli/src/zopfli/hash.c +135 -0
  31. data/vendor/zopfli/src/zopfli/hash.h +70 -0
  32. data/vendor/zopfli/src/zopfli/katajainen.c +251 -0
  33. data/vendor/zopfli/src/zopfli/katajainen.h +42 -0
  34. data/vendor/zopfli/src/zopfli/lz77.c +482 -0
  35. data/vendor/zopfli/src/zopfli/lz77.h +129 -0
  36. data/vendor/zopfli/src/zopfli/squeeze.c +546 -0
  37. data/vendor/zopfli/src/zopfli/squeeze.h +60 -0
  38. data/vendor/zopfli/src/zopfli/tree.c +101 -0
  39. data/vendor/zopfli/src/zopfli/tree.h +51 -0
  40. data/vendor/zopfli/src/zopfli/util.c +213 -0
  41. data/vendor/zopfli/src/zopfli/util.h +175 -0
  42. data/vendor/zopfli/src/zopfli/zlib_container.c +79 -0
  43. data/vendor/zopfli/src/zopfli/zlib_container.h +50 -0
  44. data/vendor/zopfli/src/zopfli/zopfli.h +97 -0
  45. data/vendor/zopfli/src/zopfli/zopfli_bin.c +203 -0
  46. data/vendor/zopfli/src/zopfli/zopfli_lib.c +42 -0
  47. data/vendor/zopfli/src/zopflipng/lodepng/lodepng.cpp +6260 -0
  48. data/vendor/zopfli/src/zopflipng/lodepng/lodepng.h +1716 -0
  49. data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.cpp +656 -0
  50. data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.h +151 -0
  51. data/vendor/zopfli/src/zopflipng/zopflipng_bin.cc +407 -0
  52. data/vendor/zopfli/src/zopflipng/zopflipng_lib.cc +425 -0
  53. data/vendor/zopfli/src/zopflipng/zopflipng_lib.h +79 -0
  54. data/zopfli-bin.gemspec +119 -0
  55. metadata +225 -0
@@ -0,0 +1,129 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ /*
21
+ Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
22
+ compression.
23
+ */
24
+
25
+ #ifndef ZOPFLI_LZ77_H_
26
+ #define ZOPFLI_LZ77_H_
27
+
28
+ #include <stdlib.h>
29
+
30
+ #include "cache.h"
31
+ #include "hash.h"
32
+ #include "zopfli.h"
33
+
34
+ /*
35
+ Stores lit/length and dist pairs for LZ77.
36
+ Parameter litlens: Contains the literal symbols or length values.
37
+ Parameter dists: Contains the distances. A value is 0 to indicate that there is
38
+ no dist and the corresponding litlens value is a literal instead of a length.
39
+ Parameter size: The size of both the litlens and dists arrays.
40
+ The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
41
+ ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
42
+
43
+ */
44
+ typedef struct ZopfliLZ77Store {
45
+ unsigned short* litlens; /* Lit or len. */
46
+ unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
47
+ if > 0: length in corresponding litlens, this is the distance. */
48
+ size_t size;
49
+ } ZopfliLZ77Store;
50
+
51
+ void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
52
+ void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
53
+ void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
54
+ void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
55
+ ZopfliLZ77Store* store);
56
+
57
+ /*
58
+ Some state information for compressing a block.
59
+ This is currently a bit under-used (with mainly only the longest match cache),
60
+ but is kept for easy future expansion.
61
+ */
62
+ typedef struct ZopfliBlockState {
63
+ const ZopfliOptions* options;
64
+
65
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
66
+ /* Cache for length/distance pairs found so far. */
67
+ ZopfliLongestMatchCache* lmc;
68
+ #endif
69
+
70
+ /* The start (inclusive) and end (not inclusive) of the current block. */
71
+ size_t blockstart;
72
+ size_t blockend;
73
+ } ZopfliBlockState;
74
+
75
+ /*
76
+ Finds the longest match (length and corresponding distance) for LZ77
77
+ compression.
78
+ Even when not using "sublen", it can be more efficient to provide an array,
79
+ because only then the caching is used.
80
+ array: the data
81
+ pos: position in the data to find the match for
82
+ size: size of the data
83
+ limit: limit length to maximum this value (default should be 258). This allows
84
+ finding a shorter dist for that length (= less extra bits). Must be
85
+ in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
86
+ sublen: output array of 259 elements, or null. Has, for each length, the
87
+ smallest distance required to reach this length. Only 256 of its 259 values
88
+ are used, the first 3 are ignored (the shortest length is 3. It is purely
89
+ for convenience that the array is made 3 longer).
90
+ */
91
+ void ZopfliFindLongestMatch(
92
+ ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
93
+ size_t pos, size_t size, size_t limit,
94
+ unsigned short* sublen, unsigned short* distance, unsigned short* length);
95
+
96
+ /*
97
+ Verifies if length and dist are indeed valid, only used for assertion.
98
+ */
99
+ void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
100
+ unsigned short dist, unsigned short length);
101
+
102
+ /*
103
+ Counts the number of literal, length and distance symbols in the given lz77
104
+ arrays.
105
+ litlens: lz77 lit/lengths
106
+ dists: ll77 distances
107
+ start: where to begin counting in litlens and dists
108
+ end: where to stop counting in litlens and dists (not inclusive)
109
+ ll_count: count of each lit/len symbol, must have size 288 (see deflate
110
+ standard)
111
+ d_count: count of each dist symbol, must have size 32 (see deflate standard)
112
+ */
113
+ void ZopfliLZ77Counts(const unsigned short* litlens,
114
+ const unsigned short* dists,
115
+ size_t start, size_t end,
116
+ size_t* ll_count, size_t* d_count);
117
+
118
+ /*
119
+ Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
120
+ with the slow but better "squeeze" implementation.
121
+ The result is placed in the ZopfliLZ77Store.
122
+ If instart is larger than 0, it uses values before instart as starting
123
+ dictionary.
124
+ */
125
+ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
126
+ size_t instart, size_t inend,
127
+ ZopfliLZ77Store* store);
128
+
129
+ #endif /* ZOPFLI_LZ77_H_ */
@@ -0,0 +1,546 @@
1
+ /*
2
+ Copyright 2011 Google Inc. All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17
+ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
+ */
19
+
20
+ #include "squeeze.h"
21
+
22
+ #include <assert.h>
23
+ #include <math.h>
24
+ #include <stdio.h>
25
+
26
+ #include "blocksplitter.h"
27
+ #include "deflate.h"
28
+ #include "tree.h"
29
+ #include "util.h"
30
+
31
+ typedef struct SymbolStats {
32
+ /* The literal and length symbols. */
33
+ size_t litlens[288];
34
+ /* The 32 unique dist symbols, not the 32768 possible dists. */
35
+ size_t dists[32];
36
+
37
+ double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
38
+ double d_symbols[32]; /* Length of each dist symbol in bits. */
39
+ } SymbolStats;
40
+
41
+ /* Sets everything to 0. */
42
+ static void InitStats(SymbolStats* stats) {
43
+ memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
44
+ memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
45
+
46
+ memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
47
+ memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
48
+ }
49
+
50
+ static void CopyStats(SymbolStats* source, SymbolStats* dest) {
51
+ memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
52
+ memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
53
+
54
+ memcpy(dest->ll_symbols, source->ll_symbols,
55
+ 288 * sizeof(dest->ll_symbols[0]));
56
+ memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
57
+ }
58
+
59
+ /* Adds the bit lengths. */
60
+ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
61
+ const SymbolStats* stats2, double w2,
62
+ SymbolStats* result) {
63
+ size_t i;
64
+ for (i = 0; i < 288; i++) {
65
+ result->litlens[i] =
66
+ (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
67
+ }
68
+ for (i = 0; i < 32; i++) {
69
+ result->dists[i] =
70
+ (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
71
+ }
72
+ result->litlens[256] = 1; /* End symbol. */
73
+ }
74
+
75
+ typedef struct RanState {
76
+ unsigned int m_w, m_z;
77
+ } RanState;
78
+
79
+ static void InitRanState(RanState* state) {
80
+ state->m_w = 1;
81
+ state->m_z = 2;
82
+ }
83
+
84
+ /* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
85
+ static unsigned int Ran(RanState* state) {
86
+ state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16);
87
+ state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16);
88
+ return (state->m_z << 16) + state->m_w; /* 32-bit result. */
89
+ }
90
+
91
+ static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
92
+ int i;
93
+ for (i = 0; i < n; i++) {
94
+ if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n];
95
+ }
96
+ }
97
+
98
+ static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
99
+ RandomizeFreqs(state, stats->litlens, 288);
100
+ RandomizeFreqs(state, stats->dists, 32);
101
+ stats->litlens[256] = 1; /* End symbol. */
102
+ }
103
+
104
+ static void ClearStatFreqs(SymbolStats* stats) {
105
+ size_t i;
106
+ for (i = 0; i < 288; i++) stats->litlens[i] = 0;
107
+ for (i = 0; i < 32; i++) stats->dists[i] = 0;
108
+ }
109
+
110
+ /*
111
+ Function that calculates a cost based on a model for the given LZ77 symbol.
112
+ litlen: means literal symbol if dist is 0, length otherwise.
113
+ */
114
+ typedef double CostModelFun(unsigned litlen, unsigned dist, void* context);
115
+
116
+ /*
117
+ Cost model which should exactly match fixed tree.
118
+ type: CostModelFun
119
+ */
120
+ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
121
+ (void)unused;
122
+ if (dist == 0) {
123
+ if (litlen <= 143) return 8;
124
+ else return 9;
125
+ } else {
126
+ int dbits = ZopfliGetDistExtraBits(dist);
127
+ int lbits = ZopfliGetLengthExtraBits(litlen);
128
+ int lsym = ZopfliGetLengthSymbol(litlen);
129
+ double cost = 0;
130
+ if (lsym <= 279) cost += 7;
131
+ else cost += 8;
132
+ cost += 5; /* Every dist symbol has length 5. */
133
+ return cost + dbits + lbits;
134
+ }
135
+ }
136
+
137
+ /*
138
+ Cost model based on symbol statistics.
139
+ type: CostModelFun
140
+ */
141
+ static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
142
+ SymbolStats* stats = (SymbolStats*)context;
143
+ if (dist == 0) {
144
+ return stats->ll_symbols[litlen];
145
+ } else {
146
+ int lsym = ZopfliGetLengthSymbol(litlen);
147
+ int lbits = ZopfliGetLengthExtraBits(litlen);
148
+ int dsym = ZopfliGetDistSymbol(dist);
149
+ int dbits = ZopfliGetDistExtraBits(dist);
150
+ return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
151
+ }
152
+ }
153
+
154
+ /*
155
+ Finds the minimum possible cost this cost model can return for valid length and
156
+ distance symbols.
157
+ */
158
+ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
159
+ double mincost;
160
+ int bestlength = 0; /* length that has lowest cost in the cost model */
161
+ int bestdist = 0; /* distance that has lowest cost in the cost model */
162
+ int i;
163
+ /*
164
+ Table of distances that have a different distance symbol in the deflate
165
+ specification. Each value is the first distance that has a new symbol. Only
166
+ different symbols affect the cost model so only these need to be checked.
167
+ See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
168
+ */
169
+ static const int dsymbols[30] = {
170
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
171
+ 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
172
+ };
173
+
174
+ mincost = ZOPFLI_LARGE_FLOAT;
175
+ for (i = 3; i < 259; i++) {
176
+ double c = costmodel(i, 1, costcontext);
177
+ if (c < mincost) {
178
+ bestlength = i;
179
+ mincost = c;
180
+ }
181
+ }
182
+
183
+ mincost = ZOPFLI_LARGE_FLOAT;
184
+ for (i = 0; i < 30; i++) {
185
+ double c = costmodel(3, dsymbols[i], costcontext);
186
+ if (c < mincost) {
187
+ bestdist = dsymbols[i];
188
+ mincost = c;
189
+ }
190
+ }
191
+
192
+ return costmodel(bestlength, bestdist, costcontext);
193
+ }
194
+
195
+ /*
196
+ Performs the forward pass for "squeeze". Gets the most optimal length to reach
197
+ every byte from a previous byte, using cost calculations.
198
+ s: the ZopfliBlockState
199
+ in: the input data array
200
+ instart: where to start
201
+ inend: where to stop (not inclusive)
202
+ costmodel: function to calculate the cost of some lit/len/dist pair.
203
+ costcontext: abstract context for the costmodel function
204
+ length_array: output array of size (inend - instart) which will receive the best
205
+ length to reach this byte from a previous byte.
206
+ returns the cost that was, according to the costmodel, needed to get to the end.
207
+ */
208
+ static double GetBestLengths(ZopfliBlockState *s,
209
+ const unsigned char* in,
210
+ size_t instart, size_t inend,
211
+ CostModelFun* costmodel, void* costcontext,
212
+ unsigned short* length_array) {
213
+ /* Best cost to get here so far. */
214
+ size_t blocksize = inend - instart;
215
+ float* costs;
216
+ size_t i = 0, k;
217
+ unsigned short leng;
218
+ unsigned short dist;
219
+ unsigned short sublen[259];
220
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
221
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
222
+ ZopfliHash hash;
223
+ ZopfliHash* h = &hash;
224
+ double result;
225
+ double mincost = GetCostModelMinCost(costmodel, costcontext);
226
+
227
+ if (instart == inend) return 0;
228
+
229
+ costs = (float*)malloc(sizeof(float) * (blocksize + 1));
230
+ if (!costs) exit(-1); /* Allocation failed. */
231
+
232
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
233
+ ZopfliWarmupHash(in, windowstart, inend, h);
234
+ for (i = windowstart; i < instart; i++) {
235
+ ZopfliUpdateHash(in, i, inend, h);
236
+ }
237
+
238
+ for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT;
239
+ costs[0] = 0; /* Because it's the start. */
240
+ length_array[0] = 0;
241
+
242
+ for (i = instart; i < inend; i++) {
243
+ size_t j = i - instart; /* Index in the costs array and length_array. */
244
+ ZopfliUpdateHash(in, i, inend, h);
245
+
246
+ #ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS
247
+ /* If we're in a long repetition of the same character and have more than
248
+ ZOPFLI_MAX_MATCH characters before and after our position. */
249
+ if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2
250
+ && i > instart + ZOPFLI_MAX_MATCH + 1
251
+ && i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
252
+ && h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK]
253
+ > ZOPFLI_MAX_MATCH) {
254
+ double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext);
255
+ /* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to
256
+ the cost corresponding to that length. Doing this, we skip
257
+ ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */
258
+ for (k = 0; k < ZOPFLI_MAX_MATCH; k++) {
259
+ costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost;
260
+ length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH;
261
+ i++;
262
+ j++;
263
+ ZopfliUpdateHash(in, i, inend, h);
264
+ }
265
+ }
266
+ #endif
267
+
268
+ ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen,
269
+ &dist, &leng);
270
+
271
+ /* Literal. */
272
+ if (i + 1 <= inend) {
273
+ double newCost = costs[j] + costmodel(in[i], 0, costcontext);
274
+ assert(newCost >= 0);
275
+ if (newCost < costs[j + 1]) {
276
+ costs[j + 1] = newCost;
277
+ length_array[j + 1] = 1;
278
+ }
279
+ }
280
+ /* Lengths. */
281
+ for (k = 3; k <= leng && i + k <= inend; k++) {
282
+ double newCost;
283
+
284
+ /* Calling the cost model is expensive, avoid this if we are already at
285
+ the minimum possible cost that it can return. */
286
+ if (costs[j + k] - costs[j] <= mincost) continue;
287
+
288
+ newCost = costs[j] + costmodel(k, sublen[k], costcontext);
289
+ assert(newCost >= 0);
290
+ if (newCost < costs[j + k]) {
291
+ assert(k <= ZOPFLI_MAX_MATCH);
292
+ costs[j + k] = newCost;
293
+ length_array[j + k] = k;
294
+ }
295
+ }
296
+ }
297
+
298
+ assert(costs[blocksize] >= 0);
299
+ result = costs[blocksize];
300
+
301
+ ZopfliCleanHash(h);
302
+ free(costs);
303
+
304
+ return result;
305
+ }
306
+
307
+ /*
308
+ Calculates the optimal path of lz77 lengths to use, from the calculated
309
+ length_array. The length_array must contain the optimal length to reach that
310
+ byte. The path will be filled with the lengths to use, so its data size will be
311
+ the amount of lz77 symbols.
312
+ */
313
+ static void TraceBackwards(size_t size, const unsigned short* length_array,
314
+ unsigned short** path, size_t* pathsize) {
315
+ size_t index = size;
316
+ if (size == 0) return;
317
+ for (;;) {
318
+ ZOPFLI_APPEND_DATA(length_array[index], path, pathsize);
319
+ assert(length_array[index] <= index);
320
+ assert(length_array[index] <= ZOPFLI_MAX_MATCH);
321
+ assert(length_array[index] != 0);
322
+ index -= length_array[index];
323
+ if (index == 0) break;
324
+ }
325
+
326
+ /* Mirror result. */
327
+ for (index = 0; index < *pathsize / 2; index++) {
328
+ unsigned short temp = (*path)[index];
329
+ (*path)[index] = (*path)[*pathsize - index - 1];
330
+ (*path)[*pathsize - index - 1] = temp;
331
+ }
332
+ }
333
+
334
+ static void FollowPath(ZopfliBlockState* s,
335
+ const unsigned char* in, size_t instart, size_t inend,
336
+ unsigned short* path, size_t pathsize,
337
+ ZopfliLZ77Store* store) {
338
+ size_t i, j, pos = 0;
339
+ size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
340
+ ? instart - ZOPFLI_WINDOW_SIZE : 0;
341
+
342
+ size_t total_length_test = 0;
343
+
344
+ ZopfliHash hash;
345
+ ZopfliHash* h = &hash;
346
+
347
+ if (instart == inend) return;
348
+
349
+ ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
350
+ ZopfliWarmupHash(in, windowstart, inend, h);
351
+ for (i = windowstart; i < instart; i++) {
352
+ ZopfliUpdateHash(in, i, inend, h);
353
+ }
354
+
355
+ pos = instart;
356
+ for (i = 0; i < pathsize; i++) {
357
+ unsigned short length = path[i];
358
+ unsigned short dummy_length;
359
+ unsigned short dist;
360
+ assert(pos < inend);
361
+
362
+ ZopfliUpdateHash(in, pos, inend, h);
363
+
364
+ /* Add to output. */
365
+ if (length >= ZOPFLI_MIN_MATCH) {
366
+ /* Get the distance by recalculating longest match. The found length
367
+ should match the length from the path. */
368
+ ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0,
369
+ &dist, &dummy_length);
370
+ assert(!(dummy_length != length && length > 2 && dummy_length > 2));
371
+ ZopfliVerifyLenDist(in, inend, pos, dist, length);
372
+ ZopfliStoreLitLenDist(length, dist, store);
373
+ total_length_test += length;
374
+ } else {
375
+ length = 1;
376
+ ZopfliStoreLitLenDist(in[pos], 0, store);
377
+ total_length_test++;
378
+ }
379
+
380
+
381
+ assert(pos + length <= inend);
382
+ for (j = 1; j < length; j++) {
383
+ ZopfliUpdateHash(in, pos + j, inend, h);
384
+ }
385
+
386
+ pos += length;
387
+ }
388
+
389
+ ZopfliCleanHash(h);
390
+ }
391
+
392
+ /* Calculates the entropy of the statistics */
393
+ static void CalculateStatistics(SymbolStats* stats) {
394
+ ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
395
+ ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
396
+ }
397
+
398
+ /* Appends the symbol statistics from the store. */
399
+ static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
400
+ size_t i;
401
+ for (i = 0; i < store->size; i++) {
402
+ if (store->dists[i] == 0) {
403
+ stats->litlens[store->litlens[i]]++;
404
+ } else {
405
+ stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++;
406
+ stats->dists[ZopfliGetDistSymbol(store->dists[i])]++;
407
+ }
408
+ }
409
+ stats->litlens[256] = 1; /* End symbol. */
410
+
411
+ CalculateStatistics(stats);
412
+ }
413
+
414
+ /*
415
+ Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
416
+ with updated statistics should be performed.
417
+
418
+ s: the block state
419
+ in: the input data array
420
+ instart: where to start
421
+ inend: where to stop (not inclusive)
422
+ path: pointer to dynamically allocated memory to store the path
423
+ pathsize: pointer to the size of the dynamic path array
424
+ length_array: array if size (inend - instart) used to store lengths
425
+ costmodel: function to use as the cost model for this squeeze run
426
+ costcontext: abstract context for the costmodel function
427
+ store: place to output the LZ77 data
428
+ returns the cost that was, according to the costmodel, needed to get to the end.
429
+ This is not the actual cost.
430
+ */
431
+ static double LZ77OptimalRun(ZopfliBlockState* s,
432
+ const unsigned char* in, size_t instart, size_t inend,
433
+ unsigned short** path, size_t* pathsize,
434
+ unsigned short* length_array, CostModelFun* costmodel,
435
+ void* costcontext, ZopfliLZ77Store* store) {
436
+ double cost = GetBestLengths(
437
+ s, in, instart, inend, costmodel, costcontext, length_array);
438
+ free(*path);
439
+ *path = 0;
440
+ *pathsize = 0;
441
+ TraceBackwards(inend - instart, length_array, path, pathsize);
442
+ FollowPath(s, in, instart, inend, *path, *pathsize, store);
443
+ assert(cost < ZOPFLI_LARGE_FLOAT);
444
+ return cost;
445
+ }
446
+
447
+ void ZopfliLZ77Optimal(ZopfliBlockState *s,
448
+ const unsigned char* in, size_t instart, size_t inend,
449
+ ZopfliLZ77Store* store) {
450
+ /* Dist to get to here with smallest cost. */
451
+ size_t blocksize = inend - instart;
452
+ unsigned short* length_array =
453
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
454
+ unsigned short* path = 0;
455
+ size_t pathsize = 0;
456
+ ZopfliLZ77Store currentstore;
457
+ SymbolStats stats, beststats, laststats;
458
+ int i;
459
+ double cost;
460
+ double bestcost = ZOPFLI_LARGE_FLOAT;
461
+ double lastcost = 0;
462
+ /* Try randomizing the costs a bit once the size stabilizes. */
463
+ RanState ran_state;
464
+ int lastrandomstep = -1;
465
+
466
+ if (!length_array) exit(-1); /* Allocation failed. */
467
+
468
+ InitRanState(&ran_state);
469
+ InitStats(&stats);
470
+ ZopfliInitLZ77Store(&currentstore);
471
+
472
+ /* Do regular deflate, then loop multiple shortest path runs, each time using
473
+ the statistics of the previous run. */
474
+
475
+ /* Initial run. */
476
+ ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
477
+ GetStatistics(&currentstore, &stats);
478
+
479
+ /* Repeat statistics with each time the cost model from the previous stat
480
+ run. */
481
+ for (i = 0; i < s->options->numiterations; i++) {
482
+ ZopfliCleanLZ77Store(&currentstore);
483
+ ZopfliInitLZ77Store(&currentstore);
484
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
485
+ length_array, GetCostStat, (void*)&stats,
486
+ &currentstore);
487
+ cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
488
+ 0, currentstore.size, 2);
489
+ if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
490
+ fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
491
+ }
492
+ if (cost < bestcost) {
493
+ /* Copy to the output store. */
494
+ ZopfliCopyLZ77Store(&currentstore, store);
495
+ CopyStats(&stats, &beststats);
496
+ bestcost = cost;
497
+ }
498
+ CopyStats(&stats, &laststats);
499
+ ClearStatFreqs(&stats);
500
+ GetStatistics(&currentstore, &stats);
501
+ if (lastrandomstep != -1) {
502
+ /* This makes it converge slower but better. Do it only once the
503
+ randomness kicks in so that if the user does few iterations, it gives a
504
+ better result sooner. */
505
+ AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats);
506
+ CalculateStatistics(&stats);
507
+ }
508
+ if (i > 5 && cost == lastcost) {
509
+ CopyStats(&beststats, &stats);
510
+ RandomizeStatFreqs(&ran_state, &stats);
511
+ CalculateStatistics(&stats);
512
+ lastrandomstep = i;
513
+ }
514
+ lastcost = cost;
515
+ }
516
+
517
+ free(length_array);
518
+ free(path);
519
+ ZopfliCleanLZ77Store(&currentstore);
520
+ }
521
+
522
+ void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
523
+ const unsigned char* in,
524
+ size_t instart, size_t inend,
525
+ ZopfliLZ77Store* store)
526
+ {
527
+ /* Dist to get to here with smallest cost. */
528
+ size_t blocksize = inend - instart;
529
+ unsigned short* length_array =
530
+ (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
531
+ unsigned short* path = 0;
532
+ size_t pathsize = 0;
533
+
534
+ if (!length_array) exit(-1); /* Allocation failed. */
535
+
536
+ s->blockstart = instart;
537
+ s->blockend = inend;
538
+
539
+ /* Shortest path for fixed tree This one should give the shortest possible
540
+ result for fixed tree, no repeated runs are needed since the tree is known. */
541
+ LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
542
+ length_array, GetCostFixed, 0, store);
543
+
544
+ free(length_array);
545
+ free(path);
546
+ }