zopfli 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +17 -0
- data/.gitmodules +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +34 -0
- data/Rakefile +25 -0
- data/ext/extconf.rb +43 -0
- data/ext/zopfli.c +38 -0
- data/lib/zopfli/version.rb +3 -0
- data/test/fixtures/alice29.txt +3609 -0
- data/test/test_zopfli_deflate.rb +24 -0
- data/vendor/zopfli/CONTRIBUTORS +6 -0
- data/vendor/zopfli/COPYING +201 -0
- data/vendor/zopfli/README +24 -0
- data/vendor/zopfli/blocksplitter.c +344 -0
- data/vendor/zopfli/blocksplitter.h +77 -0
- data/vendor/zopfli/cache.c +119 -0
- data/vendor/zopfli/cache.h +66 -0
- data/vendor/zopfli/deflate.c +697 -0
- data/vendor/zopfli/deflate.h +77 -0
- data/vendor/zopfli/gzip_container.c +117 -0
- data/vendor/zopfli/gzip_container.h +42 -0
- data/vendor/zopfli/hash.c +135 -0
- data/vendor/zopfli/hash.h +70 -0
- data/vendor/zopfli/katajainen.c +251 -0
- data/vendor/zopfli/katajainen.h +42 -0
- data/vendor/zopfli/lz77.c +468 -0
- data/vendor/zopfli/lz77.h +126 -0
- data/vendor/zopfli/makefile +5 -0
- data/vendor/zopfli/squeeze.c +543 -0
- data/vendor/zopfli/squeeze.h +60 -0
- data/vendor/zopfli/tree.c +101 -0
- data/vendor/zopfli/tree.h +51 -0
- data/vendor/zopfli/util.c +202 -0
- data/vendor/zopfli/util.h +175 -0
- data/vendor/zopfli/zlib_container.c +79 -0
- data/vendor/zopfli/zlib_container.h +42 -0
- data/vendor/zopfli/zopfli.h +71 -0
- data/vendor/zopfli/zopfli_bin.c +204 -0
- data/vendor/zopfli/zopfli_lib.c +37 -0
- data/zopfli.gemspec +49 -0
- metadata +130 -0
@@ -0,0 +1,126 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
|
16
|
+
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
|
17
|
+
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
|
18
|
+
*/
|
19
|
+
|
20
|
+
/*
|
21
|
+
Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
|
22
|
+
compression.
|
23
|
+
*/
|
24
|
+
|
25
|
+
#ifndef ZOPFLI_LZ77_H_
|
26
|
+
#define ZOPFLI_LZ77_H_
|
27
|
+
|
28
|
+
#include <stdlib.h>
|
29
|
+
|
30
|
+
#include "cache.h"
|
31
|
+
#include "hash.h"
|
32
|
+
#include "zopfli.h"
|
33
|
+
|
34
|
+
/*
|
35
|
+
Stores lit/length and dist pairs for LZ77.
|
36
|
+
litlens: Contains the literal symbols or length values.
|
37
|
+
dists: Indicates the distance, or 0 to indicate that there is no distance and
|
38
|
+
litlens contains a literal instead of a length.
|
39
|
+
litlens and dists both have the same size.
|
40
|
+
*/
|
41
|
+
typedef struct ZopfliLZ77Store {
|
42
|
+
unsigned short* litlens; /* Lit or len. */
|
43
|
+
unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
|
44
|
+
if > 0: length in corresponding litlens, this is the distance. */
|
45
|
+
size_t size;
|
46
|
+
} ZopfliLZ77Store;
|
47
|
+
|
48
|
+
void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
|
49
|
+
void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
|
50
|
+
void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
|
51
|
+
void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
|
52
|
+
ZopfliLZ77Store* store);
|
53
|
+
|
54
|
+
/*
|
55
|
+
Some state information for compressing a block.
|
56
|
+
This is currently a bit under-used (with mainly only the longest match cache),
|
57
|
+
but is kept for easy future expansion.
|
58
|
+
*/
|
59
|
+
typedef struct ZopfliBlockState {
|
60
|
+
const ZopfliOptions* options;
|
61
|
+
|
62
|
+
#ifdef ZOPFLI_LONGEST_MATCH_CACHE
|
63
|
+
/* Cache for length/distance pairs found so far. */
|
64
|
+
ZopfliLongestMatchCache* lmc;
|
65
|
+
#endif
|
66
|
+
|
67
|
+
/* The start (inclusive) and end (not inclusive) of the current block. */
|
68
|
+
size_t blockstart;
|
69
|
+
size_t blockend;
|
70
|
+
} ZopfliBlockState;
|
71
|
+
|
72
|
+
/*
|
73
|
+
Finds the longest match (length and corresponding distance) for LZ77
|
74
|
+
compression.
|
75
|
+
Even when not using "sublen", it can be more efficient to provide an array,
|
76
|
+
because only then the caching is used.
|
77
|
+
array: the data
|
78
|
+
pos: position in the data to find the match for
|
79
|
+
size: size of the data
|
80
|
+
limit: limit length to maximum this value (default should be 258). This allows
|
81
|
+
finding a shorter dist for that length (= less extra bits). Must be
|
82
|
+
in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
|
83
|
+
sublen: output array of 259 elements, or null. Has, for each length, the
|
84
|
+
smallest distance required to reach this length. Only 256 of its 259 values
|
85
|
+
are used, the first 3 are ignored (the shortest length is 3. It is purely
|
86
|
+
for convenience that the array is made 3 longer).
|
87
|
+
*/
|
88
|
+
void ZopfliFindLongestMatch(
|
89
|
+
ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
|
90
|
+
size_t pos, size_t size, size_t limit,
|
91
|
+
unsigned short* sublen, unsigned short* distance, unsigned short* length);
|
92
|
+
|
93
|
+
/*
|
94
|
+
Verifies if length and dist are indeed valid, only used for assertion.
|
95
|
+
*/
|
96
|
+
void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
|
97
|
+
unsigned short dist, unsigned short length);
|
98
|
+
|
99
|
+
/*
|
100
|
+
Counts the number of literal, length and distance symbols in the given lz77
|
101
|
+
arrays.
|
102
|
+
litlens: lz77 lit/lengths
|
103
|
+
dists: ll77 distances
|
104
|
+
start: where to begin counting in litlens and dists
|
105
|
+
end: where to stop counting in litlens and dists (not inclusive)
|
106
|
+
ll_count: count of each lit/len symbol, must have size 288 (see deflate
|
107
|
+
standard)
|
108
|
+
d_count: count of each dist symbol, must have size 32 (see deflate standard)
|
109
|
+
*/
|
110
|
+
void ZopfliLZ77Counts(const unsigned short* litlens,
|
111
|
+
const unsigned short* dists,
|
112
|
+
size_t start, size_t end,
|
113
|
+
size_t* ll_count, size_t* d_count);
|
114
|
+
|
115
|
+
/*
|
116
|
+
Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
|
117
|
+
with the slow but better "squeeze" implementation.
|
118
|
+
The result is placed in the ZopfliLZ77Store.
|
119
|
+
If instart is larger than 0, it uses values before instart as starting
|
120
|
+
dictionary.
|
121
|
+
*/
|
122
|
+
void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
|
123
|
+
size_t instart, size_t inend,
|
124
|
+
ZopfliLZ77Store* store);
|
125
|
+
|
126
|
+
#endif /* ZOPFLI_LZ77_H_ */
|
@@ -0,0 +1,543 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
|
16
|
+
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
|
17
|
+
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "squeeze.h"
|
21
|
+
|
22
|
+
#include <assert.h>
|
23
|
+
#include <math.h>
|
24
|
+
#include <stdio.h>
|
25
|
+
|
26
|
+
#include "blocksplitter.h"
|
27
|
+
#include "deflate.h"
|
28
|
+
#include "tree.h"
|
29
|
+
#include "util.h"
|
30
|
+
|
31
|
+
typedef struct SymbolStats {
|
32
|
+
/* The literal and length symbols. */
|
33
|
+
size_t litlens[288];
|
34
|
+
/* The 32 unique dist symbols, not the 32768 possible dists. */
|
35
|
+
size_t dists[32];
|
36
|
+
|
37
|
+
double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
|
38
|
+
double d_symbols[32]; /* Length of each dist symbol in bits. */
|
39
|
+
} SymbolStats;
|
40
|
+
|
41
|
+
/* Sets everything to 0. */
|
42
|
+
static void InitStats(SymbolStats* stats) {
|
43
|
+
memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
|
44
|
+
memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
|
45
|
+
|
46
|
+
memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
|
47
|
+
memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
|
48
|
+
}
|
49
|
+
|
50
|
+
static void CopyStats(SymbolStats* source, SymbolStats* dest) {
|
51
|
+
memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
|
52
|
+
memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
|
53
|
+
|
54
|
+
memcpy(dest->ll_symbols, source->ll_symbols,
|
55
|
+
288 * sizeof(dest->ll_symbols[0]));
|
56
|
+
memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
|
57
|
+
}
|
58
|
+
|
59
|
+
/* Adds the bit lengths. */
|
60
|
+
static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
|
61
|
+
const SymbolStats* stats2, double w2,
|
62
|
+
SymbolStats* result) {
|
63
|
+
size_t i;
|
64
|
+
for (i = 0; i < 288; i++) {
|
65
|
+
result->litlens[i] =
|
66
|
+
(size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
|
67
|
+
}
|
68
|
+
for (i = 0; i < 32; i++) {
|
69
|
+
result->dists[i] =
|
70
|
+
(size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
|
71
|
+
}
|
72
|
+
result->litlens[256] = 1; /* End symbol. */
|
73
|
+
}
|
74
|
+
|
75
|
+
typedef struct RanState {
|
76
|
+
unsigned int m_w, m_z;
|
77
|
+
} RanState;
|
78
|
+
|
79
|
+
static void InitRanState(RanState* state) {
|
80
|
+
state->m_w = 1;
|
81
|
+
state->m_z = 2;
|
82
|
+
}
|
83
|
+
|
84
|
+
/* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
|
85
|
+
static unsigned int Ran(RanState* state) {
|
86
|
+
state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16);
|
87
|
+
state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16);
|
88
|
+
return (state->m_z << 16) + state->m_w; /* 32-bit result. */
|
89
|
+
}
|
90
|
+
|
91
|
+
static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
|
92
|
+
int i;
|
93
|
+
for (i = 0; i < n; i++) {
|
94
|
+
if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n];
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
|
99
|
+
RandomizeFreqs(state, stats->litlens, 288);
|
100
|
+
RandomizeFreqs(state, stats->dists, 32);
|
101
|
+
stats->litlens[256] = 1; /* End symbol. */
|
102
|
+
}
|
103
|
+
|
104
|
+
static void ClearStatFreqs(SymbolStats* stats) {
|
105
|
+
size_t i;
|
106
|
+
for (i = 0; i < 288; i++) stats->litlens[i] = 0;
|
107
|
+
for (i = 0; i < 32; i++) stats->dists[i] = 0;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
Function that calculates a cost based on a model for the given LZ77 symbol.
|
112
|
+
litlen: means literal symbol if dist is 0, length otherwise.
|
113
|
+
*/
|
114
|
+
typedef double CostModelFun(unsigned litlen, unsigned dist, void* context);
|
115
|
+
|
116
|
+
/*
|
117
|
+
Cost model which should exactly match fixed tree.
|
118
|
+
type: CostModelFun
|
119
|
+
*/
|
120
|
+
static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
|
121
|
+
(void)unused;
|
122
|
+
if (dist == 0) {
|
123
|
+
if (litlen <= 143) return 8;
|
124
|
+
else return 9;
|
125
|
+
} else {
|
126
|
+
int dbits = ZopfliGetDistExtraBits(dist);
|
127
|
+
int lbits = ZopfliGetLengthExtraBits(litlen);
|
128
|
+
int lsym = ZopfliGetLengthSymbol(litlen);
|
129
|
+
double cost = 0;
|
130
|
+
if (lsym <= 279) cost += 7;
|
131
|
+
else cost += 8;
|
132
|
+
cost += 5; /* Every dist symbol has length 5. */
|
133
|
+
return cost + dbits + lbits;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
|
137
|
+
/*
|
138
|
+
Cost model based on symbol statistics.
|
139
|
+
type: CostModelFun
|
140
|
+
*/
|
141
|
+
static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
|
142
|
+
SymbolStats* stats = (SymbolStats*)context;
|
143
|
+
if (dist == 0) {
|
144
|
+
return stats->ll_symbols[litlen];
|
145
|
+
} else {
|
146
|
+
int lsym = ZopfliGetLengthSymbol(litlen);
|
147
|
+
int lbits = ZopfliGetLengthExtraBits(litlen);
|
148
|
+
int dsym = ZopfliGetDistSymbol(dist);
|
149
|
+
int dbits = ZopfliGetDistExtraBits(dist);
|
150
|
+
return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
/*
|
155
|
+
Finds the minimum possible cost this cost model can return for valid length and
|
156
|
+
distance symbols.
|
157
|
+
*/
|
158
|
+
static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
|
159
|
+
double mincost;
|
160
|
+
int bestlength = 0; /* length that has lowest cost in the cost model */
|
161
|
+
int bestdist = 0; /* distance that has lowest cost in the cost model */
|
162
|
+
int i;
|
163
|
+
/*
|
164
|
+
Table of distances that have a different distance symbol in the deflate
|
165
|
+
specification. Each value is the first distance that has a new symbol. Only
|
166
|
+
different symbols affect the cost model so only these need to be checked.
|
167
|
+
See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
|
168
|
+
*/
|
169
|
+
static const int dsymbols[30] = {
|
170
|
+
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
|
171
|
+
769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
|
172
|
+
};
|
173
|
+
|
174
|
+
mincost = ZOPFLI_LARGE_FLOAT;
|
175
|
+
for (i = 3; i < 259; i++) {
|
176
|
+
double c = costmodel(i, 1, costcontext);
|
177
|
+
if (c < mincost) {
|
178
|
+
bestlength = i;
|
179
|
+
mincost = c;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
mincost = ZOPFLI_LARGE_FLOAT;
|
184
|
+
for (i = 0; i < 30; i++) {
|
185
|
+
double c = costmodel(3, dsymbols[i], costcontext);
|
186
|
+
if (c < mincost) {
|
187
|
+
bestdist = dsymbols[i];
|
188
|
+
mincost = c;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
return costmodel(bestlength, bestdist, costcontext);
|
193
|
+
}
|
194
|
+
|
195
|
+
/*
|
196
|
+
Performs the forward pass for "squeeze". Gets the most optimal length to reach
|
197
|
+
every byte from a previous byte, using cost calculations.
|
198
|
+
s: the ZopfliBlockState
|
199
|
+
in: the input data array
|
200
|
+
instart: where to start
|
201
|
+
inend: where to stop (not inclusive)
|
202
|
+
costmodel: function to calculate the cost of some lit/len/dist pair.
|
203
|
+
costcontext: abstract context for the costmodel function
|
204
|
+
length_array: output array of size (inend - instart) which will receive the best
|
205
|
+
length to reach this byte from a previous byte.
|
206
|
+
returns the cost that was, according to the costmodel, needed to get to the end.
|
207
|
+
*/
|
208
|
+
static double GetBestLengths(ZopfliBlockState *s,
|
209
|
+
const unsigned char* in,
|
210
|
+
size_t instart, size_t inend,
|
211
|
+
CostModelFun* costmodel, void* costcontext,
|
212
|
+
unsigned short* length_array) {
|
213
|
+
/* Best cost to get here so far. */
|
214
|
+
size_t blocksize = inend - instart;
|
215
|
+
float* costs;
|
216
|
+
size_t i = 0, k;
|
217
|
+
unsigned short leng;
|
218
|
+
unsigned short dist;
|
219
|
+
unsigned short sublen[259];
|
220
|
+
size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
|
221
|
+
? instart - ZOPFLI_WINDOW_SIZE : 0;
|
222
|
+
ZopfliHash hash;
|
223
|
+
ZopfliHash* h = &hash;
|
224
|
+
double result;
|
225
|
+
double mincost = GetCostModelMinCost(costmodel, costcontext);
|
226
|
+
|
227
|
+
if (instart == inend) return 0;
|
228
|
+
|
229
|
+
costs = (float*)malloc(sizeof(float) * (blocksize + 1));
|
230
|
+
if (!costs) exit(-1); /* Allocation failed. */
|
231
|
+
|
232
|
+
ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
|
233
|
+
ZopfliWarmupHash(in, windowstart, inend, h);
|
234
|
+
for (i = windowstart; i < instart; i++) {
|
235
|
+
ZopfliUpdateHash(in, i, inend, h);
|
236
|
+
}
|
237
|
+
|
238
|
+
for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT;
|
239
|
+
costs[0] = 0; /* Because it's the start. */
|
240
|
+
length_array[0] = 0;
|
241
|
+
|
242
|
+
for (i = instart; i < inend; i++) {
|
243
|
+
size_t j = i - instart; /* Index in the costs array and length_array. */
|
244
|
+
ZopfliUpdateHash(in, i, inend, h);
|
245
|
+
|
246
|
+
#ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS
|
247
|
+
/* If we're in a long repetition of the same character and have more than
|
248
|
+
ZOPFLI_MAX_MATCH characters before and after our position. */
|
249
|
+
if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2
|
250
|
+
&& i > instart + ZOPFLI_MAX_MATCH + 1
|
251
|
+
&& i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
|
252
|
+
&& h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK]
|
253
|
+
> ZOPFLI_MAX_MATCH) {
|
254
|
+
double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext);
|
255
|
+
/* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to
|
256
|
+
the cost corresponding to that length. Doing this, we skip
|
257
|
+
ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */
|
258
|
+
for (k = 0; k < ZOPFLI_MAX_MATCH; k++) {
|
259
|
+
costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost;
|
260
|
+
length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH;
|
261
|
+
i++;
|
262
|
+
j++;
|
263
|
+
ZopfliUpdateHash(in, i, inend, h);
|
264
|
+
}
|
265
|
+
}
|
266
|
+
#endif
|
267
|
+
|
268
|
+
ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen,
|
269
|
+
&dist, &leng);
|
270
|
+
|
271
|
+
/* Literal. */
|
272
|
+
if (i + 1 <= inend) {
|
273
|
+
double newCost = costs[j] + costmodel(in[i], 0, costcontext);
|
274
|
+
assert(newCost >= 0);
|
275
|
+
if (newCost < costs[j + 1]) {
|
276
|
+
costs[j + 1] = newCost;
|
277
|
+
length_array[j + 1] = 1;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
/* Lengths. */
|
281
|
+
for (k = 3; k <= leng && i + k <= inend; k++) {
|
282
|
+
double newCost;
|
283
|
+
|
284
|
+
/* Calling the cost model is expensive, avoid this if we are already at
|
285
|
+
the minimum possible cost that it can return. */
|
286
|
+
if (costs[j + k] - costs[j] <= mincost) continue;
|
287
|
+
|
288
|
+
newCost = costs[j] + costmodel(k, sublen[k], costcontext);
|
289
|
+
assert(newCost >= 0);
|
290
|
+
if (newCost < costs[j + k]) {
|
291
|
+
assert(k <= ZOPFLI_MAX_MATCH);
|
292
|
+
costs[j + k] = newCost;
|
293
|
+
length_array[j + k] = k;
|
294
|
+
}
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
assert(costs[blocksize] >= 0);
|
299
|
+
result = costs[blocksize];
|
300
|
+
|
301
|
+
ZopfliCleanHash(h);
|
302
|
+
free(costs);
|
303
|
+
|
304
|
+
return result;
|
305
|
+
}
|
306
|
+
|
307
|
+
/*
|
308
|
+
Calculates the optimal path of lz77 lengths to use, from the calculated
|
309
|
+
length_array. The length_array must contain the optimal length to reach that
|
310
|
+
byte. The path will be filled with the lengths to use, so its data size will be
|
311
|
+
the amount of lz77 symbols.
|
312
|
+
*/
|
313
|
+
static void TraceBackwards(size_t size, const unsigned short* length_array,
|
314
|
+
unsigned short** path, size_t* pathsize) {
|
315
|
+
size_t index = size;
|
316
|
+
if (size == 0) return;
|
317
|
+
for (;;) {
|
318
|
+
ZOPFLI_APPEND_DATA(length_array[index], path, pathsize);
|
319
|
+
assert(length_array[index] <= index);
|
320
|
+
assert(length_array[index] <= ZOPFLI_MAX_MATCH);
|
321
|
+
assert(length_array[index] != 0);
|
322
|
+
index -= length_array[index];
|
323
|
+
if (index == 0) break;
|
324
|
+
}
|
325
|
+
|
326
|
+
/* Mirror result. */
|
327
|
+
for (index = 0; index < *pathsize / 2; index++) {
|
328
|
+
unsigned short temp = (*path)[index];
|
329
|
+
(*path)[index] = (*path)[*pathsize - index - 1];
|
330
|
+
(*path)[*pathsize - index - 1] = temp;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
334
|
+
static void FollowPath(ZopfliBlockState* s,
|
335
|
+
const unsigned char* in, size_t instart, size_t inend,
|
336
|
+
unsigned short* path, size_t pathsize,
|
337
|
+
ZopfliLZ77Store* store) {
|
338
|
+
size_t i, j, pos = 0;
|
339
|
+
size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
|
340
|
+
? instart - ZOPFLI_WINDOW_SIZE : 0;
|
341
|
+
|
342
|
+
size_t total_length_test = 0;
|
343
|
+
|
344
|
+
ZopfliHash hash;
|
345
|
+
ZopfliHash* h = &hash;
|
346
|
+
|
347
|
+
if (instart == inend) return;
|
348
|
+
|
349
|
+
ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
|
350
|
+
ZopfliWarmupHash(in, windowstart, inend, h);
|
351
|
+
for (i = windowstart; i < instart; i++) {
|
352
|
+
ZopfliUpdateHash(in, i, inend, h);
|
353
|
+
}
|
354
|
+
|
355
|
+
pos = instart;
|
356
|
+
for (i = 0; i < pathsize; i++) {
|
357
|
+
unsigned short length = path[i];
|
358
|
+
unsigned short dummy_length;
|
359
|
+
unsigned short dist;
|
360
|
+
assert(pos < inend);
|
361
|
+
|
362
|
+
ZopfliUpdateHash(in, pos, inend, h);
|
363
|
+
|
364
|
+
/* Add to output. */
|
365
|
+
if (length >= ZOPFLI_MIN_MATCH) {
|
366
|
+
/* Get the distance by recalculating longest match. The found length
|
367
|
+
should match the length from the path. */
|
368
|
+
ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0,
|
369
|
+
&dist, &dummy_length);
|
370
|
+
assert(!(dummy_length != length && length > 2 && dummy_length > 2));
|
371
|
+
ZopfliVerifyLenDist(in, inend, pos, dist, length);
|
372
|
+
ZopfliStoreLitLenDist(length, dist, store);
|
373
|
+
total_length_test += length;
|
374
|
+
} else {
|
375
|
+
length = 1;
|
376
|
+
ZopfliStoreLitLenDist(in[pos], 0, store);
|
377
|
+
total_length_test++;
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
assert(pos + length <= inend);
|
382
|
+
for (j = 1; j < length; j++) {
|
383
|
+
ZopfliUpdateHash(in, pos + j, inend, h);
|
384
|
+
}
|
385
|
+
|
386
|
+
pos += length;
|
387
|
+
}
|
388
|
+
|
389
|
+
ZopfliCleanHash(h);
|
390
|
+
}
|
391
|
+
|
392
|
+
/* Calculates the entropy of the statistics */
|
393
|
+
static void CalculateStatistics(SymbolStats* stats) {
|
394
|
+
ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
|
395
|
+
ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
|
396
|
+
}
|
397
|
+
|
398
|
+
/* Appends the symbol statistics from the store. */
|
399
|
+
static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
|
400
|
+
size_t i;
|
401
|
+
for (i = 0; i < store->size; i++) {
|
402
|
+
if (store->dists[i] == 0) {
|
403
|
+
stats->litlens[store->litlens[i]]++;
|
404
|
+
} else {
|
405
|
+
stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++;
|
406
|
+
stats->dists[ZopfliGetDistSymbol(store->dists[i])]++;
|
407
|
+
}
|
408
|
+
}
|
409
|
+
stats->litlens[256] = 1; /* End symbol. */
|
410
|
+
|
411
|
+
CalculateStatistics(stats);
|
412
|
+
}
|
413
|
+
|
414
|
+
/*
|
415
|
+
Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
|
416
|
+
with updated statistics should be performed.
|
417
|
+
|
418
|
+
s: the block state
|
419
|
+
in: the input data array
|
420
|
+
instart: where to start
|
421
|
+
inend: where to stop (not inclusive)
|
422
|
+
path: pointer to dynamically allocated memory to store the path
|
423
|
+
pathsize: pointer to the size of the dynamic path array
|
424
|
+
length_array: array if size (inend - instart) used to store lengths
|
425
|
+
costmodel: function to use as the cost model for this squeeze run
|
426
|
+
costcontext: abstract context for the costmodel function
|
427
|
+
store: place to output the LZ77 data
|
428
|
+
returns the cost that was, according to the costmodel, needed to get to the end.
|
429
|
+
This is not the actual cost.
|
430
|
+
*/
|
431
|
+
static double LZ77OptimalRun(ZopfliBlockState* s,
|
432
|
+
const unsigned char* in, size_t instart, size_t inend,
|
433
|
+
unsigned short** path, size_t* pathsize,
|
434
|
+
unsigned short* length_array, CostModelFun* costmodel,
|
435
|
+
void* costcontext, ZopfliLZ77Store* store) {
|
436
|
+
double cost = GetBestLengths(
|
437
|
+
s, in, instart, inend, costmodel, costcontext, length_array);
|
438
|
+
free(*path);
|
439
|
+
*path = 0;
|
440
|
+
*pathsize = 0;
|
441
|
+
TraceBackwards(inend - instart, length_array, path, pathsize);
|
442
|
+
FollowPath(s, in, instart, inend, *path, *pathsize, store);
|
443
|
+
assert(cost < ZOPFLI_LARGE_FLOAT);
|
444
|
+
return cost;
|
445
|
+
}
|
446
|
+
|
447
|
+
void ZopfliLZ77Optimal(ZopfliBlockState *s,
|
448
|
+
const unsigned char* in, size_t instart, size_t inend,
|
449
|
+
ZopfliLZ77Store* store) {
|
450
|
+
/* Dist to get to here with smallest cost. */
|
451
|
+
size_t blocksize = inend - instart;
|
452
|
+
unsigned short* length_array =
|
453
|
+
(unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
|
454
|
+
unsigned short* path = 0;
|
455
|
+
size_t pathsize = 0;
|
456
|
+
ZopfliLZ77Store currentstore;
|
457
|
+
SymbolStats stats, beststats, laststats;
|
458
|
+
int i;
|
459
|
+
double cost;
|
460
|
+
double bestcost = ZOPFLI_LARGE_FLOAT;
|
461
|
+
double lastcost = 0;
|
462
|
+
/* Try randomizing the costs a bit once the size stabilizes. */
|
463
|
+
RanState ran_state;
|
464
|
+
int lastrandomstep = -1;
|
465
|
+
|
466
|
+
if (!length_array) exit(-1); /* Allocation failed. */
|
467
|
+
|
468
|
+
InitRanState(&ran_state);
|
469
|
+
InitStats(&stats);
|
470
|
+
ZopfliInitLZ77Store(¤tstore);
|
471
|
+
|
472
|
+
/* Do regular deflate, then loop multiple shortest path runs, each time using
|
473
|
+
the statistics of the previous run. */
|
474
|
+
|
475
|
+
/* Initial run. */
|
476
|
+
ZopfliLZ77Greedy(s, in, instart, inend, ¤tstore);
|
477
|
+
GetStatistics(¤tstore, &stats);
|
478
|
+
|
479
|
+
/* Repeat statistics with each time the cost model from the previous stat
|
480
|
+
run. */
|
481
|
+
for (i = 0; i < s->options->numiterations; i++) {
|
482
|
+
ZopfliCleanLZ77Store(¤tstore);
|
483
|
+
ZopfliInitLZ77Store(¤tstore);
|
484
|
+
LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
|
485
|
+
length_array, GetCostStat, (void*)&stats,
|
486
|
+
¤tstore);
|
487
|
+
cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
|
488
|
+
0, currentstore.size, 2);
|
489
|
+
if (cost < bestcost) {
|
490
|
+
/* Copy to the output store. */
|
491
|
+
ZopfliCopyLZ77Store(¤tstore, store);
|
492
|
+
CopyStats(&stats, &beststats);
|
493
|
+
bestcost = cost;
|
494
|
+
}
|
495
|
+
CopyStats(&stats, &laststats);
|
496
|
+
ClearStatFreqs(&stats);
|
497
|
+
GetStatistics(¤tstore, &stats);
|
498
|
+
if (lastrandomstep != -1) {
|
499
|
+
/* This makes it converge slower but better. Do it only once the
|
500
|
+
randomness kicks in so that if the user does few iterations, it gives a
|
501
|
+
better result sooner. */
|
502
|
+
AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats);
|
503
|
+
CalculateStatistics(&stats);
|
504
|
+
}
|
505
|
+
if (i > 5 && cost == lastcost) {
|
506
|
+
CopyStats(&beststats, &stats);
|
507
|
+
RandomizeStatFreqs(&ran_state, &stats);
|
508
|
+
CalculateStatistics(&stats);
|
509
|
+
lastrandomstep = i;
|
510
|
+
}
|
511
|
+
lastcost = cost;
|
512
|
+
}
|
513
|
+
|
514
|
+
free(length_array);
|
515
|
+
free(path);
|
516
|
+
ZopfliCleanLZ77Store(¤tstore);
|
517
|
+
}
|
518
|
+
|
519
|
+
void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
|
520
|
+
const unsigned char* in,
|
521
|
+
size_t instart, size_t inend,
|
522
|
+
ZopfliLZ77Store* store)
|
523
|
+
{
|
524
|
+
/* Dist to get to here with smallest cost. */
|
525
|
+
size_t blocksize = inend - instart;
|
526
|
+
unsigned short* length_array =
|
527
|
+
(unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
|
528
|
+
unsigned short* path = 0;
|
529
|
+
size_t pathsize = 0;
|
530
|
+
|
531
|
+
if (!length_array) exit(-1); /* Allocation failed. */
|
532
|
+
|
533
|
+
s->blockstart = instart;
|
534
|
+
s->blockend = inend;
|
535
|
+
|
536
|
+
/* Shortest path for fixed tree This one should give the shortest possible
|
537
|
+
result for fixed tree, no repeated runs are needed since the tree is known. */
|
538
|
+
LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
|
539
|
+
length_array, GetCostFixed, 0, store);
|
540
|
+
|
541
|
+
free(length_array);
|
542
|
+
free(path);
|
543
|
+
}
|