zopfli-bin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.testguardrc +1 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +111 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +39 -0
- data/VERSION +1 -0
- data/ext/Makefile +8 -0
- data/ext/extconf.rb +4 -0
- data/lib/zopfli-bin.rb +5 -0
- data/lib/zopfli/bin.rb +34 -0
- data/test/helper.rb +19 -0
- data/test/test_zopfli-bin.rb +33 -0
- data/vendor/zopfli/CONTRIBUTORS +7 -0
- data/vendor/zopfli/COPYING +201 -0
- data/vendor/zopfli/Makefile +37 -0
- data/vendor/zopfli/README +32 -0
- data/vendor/zopfli/README.zopflipng +35 -0
- data/vendor/zopfli/src/zopfli/blocksplitter.c +342 -0
- data/vendor/zopfli/src/zopfli/blocksplitter.h +77 -0
- data/vendor/zopfli/src/zopfli/cache.c +119 -0
- data/vendor/zopfli/src/zopfli/cache.h +66 -0
- data/vendor/zopfli/src/zopfli/deflate.c +866 -0
- data/vendor/zopfli/src/zopfli/deflate.h +86 -0
- data/vendor/zopfli/src/zopfli/gzip_container.c +117 -0
- data/vendor/zopfli/src/zopfli/gzip_container.h +50 -0
- data/vendor/zopfli/src/zopfli/hash.c +135 -0
- data/vendor/zopfli/src/zopfli/hash.h +70 -0
- data/vendor/zopfli/src/zopfli/katajainen.c +251 -0
- data/vendor/zopfli/src/zopfli/katajainen.h +42 -0
- data/vendor/zopfli/src/zopfli/lz77.c +482 -0
- data/vendor/zopfli/src/zopfli/lz77.h +129 -0
- data/vendor/zopfli/src/zopfli/squeeze.c +546 -0
- data/vendor/zopfli/src/zopfli/squeeze.h +60 -0
- data/vendor/zopfli/src/zopfli/tree.c +101 -0
- data/vendor/zopfli/src/zopfli/tree.h +51 -0
- data/vendor/zopfli/src/zopfli/util.c +213 -0
- data/vendor/zopfli/src/zopfli/util.h +175 -0
- data/vendor/zopfli/src/zopfli/zlib_container.c +79 -0
- data/vendor/zopfli/src/zopfli/zlib_container.h +50 -0
- data/vendor/zopfli/src/zopfli/zopfli.h +97 -0
- data/vendor/zopfli/src/zopfli/zopfli_bin.c +203 -0
- data/vendor/zopfli/src/zopfli/zopfli_lib.c +42 -0
- data/vendor/zopfli/src/zopflipng/lodepng/lodepng.cpp +6260 -0
- data/vendor/zopfli/src/zopflipng/lodepng/lodepng.h +1716 -0
- data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.cpp +656 -0
- data/vendor/zopfli/src/zopflipng/lodepng/lodepng_util.h +151 -0
- data/vendor/zopfli/src/zopflipng/zopflipng_bin.cc +407 -0
- data/vendor/zopfli/src/zopflipng/zopflipng_lib.cc +425 -0
- data/vendor/zopfli/src/zopflipng/zopflipng_lib.h +79 -0
- data/zopfli-bin.gemspec +119 -0
- metadata +225 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
|
16
|
+
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
|
17
|
+
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
|
18
|
+
*/
|
19
|
+
|
20
|
+
/*
|
21
|
+
Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
|
22
|
+
compression.
|
23
|
+
*/
|
24
|
+
|
25
|
+
#ifndef ZOPFLI_LZ77_H_
|
26
|
+
#define ZOPFLI_LZ77_H_
|
27
|
+
|
28
|
+
#include <stdlib.h>
|
29
|
+
|
30
|
+
#include "cache.h"
|
31
|
+
#include "hash.h"
|
32
|
+
#include "zopfli.h"
|
33
|
+
|
34
|
+
/*
|
35
|
+
Stores lit/length and dist pairs for LZ77.
|
36
|
+
Parameter litlens: Contains the literal symbols or length values.
|
37
|
+
Parameter dists: Contains the distances. A value is 0 to indicate that there is
|
38
|
+
no dist and the corresponding litlens value is a literal instead of a length.
|
39
|
+
Parameter size: The size of both the litlens and dists arrays.
|
40
|
+
The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
|
41
|
+
ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
|
42
|
+
|
43
|
+
*/
|
44
|
+
typedef struct ZopfliLZ77Store {
|
45
|
+
unsigned short* litlens; /* Lit or len. */
|
46
|
+
unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
|
47
|
+
if > 0: length in corresponding litlens, this is the distance. */
|
48
|
+
size_t size;
|
49
|
+
} ZopfliLZ77Store;
|
50
|
+
|
51
|
+
void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
|
52
|
+
void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
|
53
|
+
void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
|
54
|
+
void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
|
55
|
+
ZopfliLZ77Store* store);
|
56
|
+
|
57
|
+
/*
|
58
|
+
Some state information for compressing a block.
|
59
|
+
This is currently a bit under-used (with mainly only the longest match cache),
|
60
|
+
but is kept for easy future expansion.
|
61
|
+
*/
|
62
|
+
typedef struct ZopfliBlockState {
|
63
|
+
const ZopfliOptions* options;
|
64
|
+
|
65
|
+
#ifdef ZOPFLI_LONGEST_MATCH_CACHE
|
66
|
+
/* Cache for length/distance pairs found so far. */
|
67
|
+
ZopfliLongestMatchCache* lmc;
|
68
|
+
#endif
|
69
|
+
|
70
|
+
/* The start (inclusive) and end (not inclusive) of the current block. */
|
71
|
+
size_t blockstart;
|
72
|
+
size_t blockend;
|
73
|
+
} ZopfliBlockState;
|
74
|
+
|
75
|
+
/*
|
76
|
+
Finds the longest match (length and corresponding distance) for LZ77
|
77
|
+
compression.
|
78
|
+
Even when not using "sublen", it can be more efficient to provide an array,
|
79
|
+
because only then the caching is used.
|
80
|
+
array: the data
|
81
|
+
pos: position in the data to find the match for
|
82
|
+
size: size of the data
|
83
|
+
limit: limit length to maximum this value (default should be 258). This allows
|
84
|
+
finding a shorter dist for that length (= less extra bits). Must be
|
85
|
+
in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
|
86
|
+
sublen: output array of 259 elements, or null. Has, for each length, the
|
87
|
+
smallest distance required to reach this length. Only 256 of its 259 values
|
88
|
+
are used, the first 3 are ignored (the shortest length is 3. It is purely
|
89
|
+
for convenience that the array is made 3 longer).
|
90
|
+
*/
|
91
|
+
void ZopfliFindLongestMatch(
|
92
|
+
ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
|
93
|
+
size_t pos, size_t size, size_t limit,
|
94
|
+
unsigned short* sublen, unsigned short* distance, unsigned short* length);
|
95
|
+
|
96
|
+
/*
|
97
|
+
Verifies if length and dist are indeed valid, only used for assertion.
|
98
|
+
*/
|
99
|
+
void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
|
100
|
+
unsigned short dist, unsigned short length);
|
101
|
+
|
102
|
+
/*
|
103
|
+
Counts the number of literal, length and distance symbols in the given lz77
|
104
|
+
arrays.
|
105
|
+
litlens: lz77 lit/lengths
|
106
|
+
dists: ll77 distances
|
107
|
+
start: where to begin counting in litlens and dists
|
108
|
+
end: where to stop counting in litlens and dists (not inclusive)
|
109
|
+
ll_count: count of each lit/len symbol, must have size 288 (see deflate
|
110
|
+
standard)
|
111
|
+
d_count: count of each dist symbol, must have size 32 (see deflate standard)
|
112
|
+
*/
|
113
|
+
void ZopfliLZ77Counts(const unsigned short* litlens,
|
114
|
+
const unsigned short* dists,
|
115
|
+
size_t start, size_t end,
|
116
|
+
size_t* ll_count, size_t* d_count);
|
117
|
+
|
118
|
+
/*
|
119
|
+
Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
|
120
|
+
with the slow but better "squeeze" implementation.
|
121
|
+
The result is placed in the ZopfliLZ77Store.
|
122
|
+
If instart is larger than 0, it uses values before instart as starting
|
123
|
+
dictionary.
|
124
|
+
*/
|
125
|
+
void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
|
126
|
+
size_t instart, size_t inend,
|
127
|
+
ZopfliLZ77Store* store);
|
128
|
+
|
129
|
+
#endif /* ZOPFLI_LZ77_H_ */
|
@@ -0,0 +1,546 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2011 Google Inc. All Rights Reserved.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
|
16
|
+
Author: lode.vandevenne@gmail.com (Lode Vandevenne)
|
17
|
+
Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "squeeze.h"
|
21
|
+
|
22
|
+
#include <assert.h>
|
23
|
+
#include <math.h>
|
24
|
+
#include <stdio.h>
|
25
|
+
|
26
|
+
#include "blocksplitter.h"
|
27
|
+
#include "deflate.h"
|
28
|
+
#include "tree.h"
|
29
|
+
#include "util.h"
|
30
|
+
|
31
|
+
typedef struct SymbolStats {
|
32
|
+
/* The literal and length symbols. */
|
33
|
+
size_t litlens[288];
|
34
|
+
/* The 32 unique dist symbols, not the 32768 possible dists. */
|
35
|
+
size_t dists[32];
|
36
|
+
|
37
|
+
double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
|
38
|
+
double d_symbols[32]; /* Length of each dist symbol in bits. */
|
39
|
+
} SymbolStats;
|
40
|
+
|
41
|
+
/* Sets everything to 0. */
|
42
|
+
static void InitStats(SymbolStats* stats) {
|
43
|
+
memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
|
44
|
+
memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
|
45
|
+
|
46
|
+
memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
|
47
|
+
memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
|
48
|
+
}
|
49
|
+
|
50
|
+
static void CopyStats(SymbolStats* source, SymbolStats* dest) {
|
51
|
+
memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
|
52
|
+
memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
|
53
|
+
|
54
|
+
memcpy(dest->ll_symbols, source->ll_symbols,
|
55
|
+
288 * sizeof(dest->ll_symbols[0]));
|
56
|
+
memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
|
57
|
+
}
|
58
|
+
|
59
|
+
/* Adds the bit lengths. */
|
60
|
+
static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
|
61
|
+
const SymbolStats* stats2, double w2,
|
62
|
+
SymbolStats* result) {
|
63
|
+
size_t i;
|
64
|
+
for (i = 0; i < 288; i++) {
|
65
|
+
result->litlens[i] =
|
66
|
+
(size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
|
67
|
+
}
|
68
|
+
for (i = 0; i < 32; i++) {
|
69
|
+
result->dists[i] =
|
70
|
+
(size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
|
71
|
+
}
|
72
|
+
result->litlens[256] = 1; /* End symbol. */
|
73
|
+
}
|
74
|
+
|
75
|
+
typedef struct RanState {
|
76
|
+
unsigned int m_w, m_z;
|
77
|
+
} RanState;
|
78
|
+
|
79
|
+
static void InitRanState(RanState* state) {
|
80
|
+
state->m_w = 1;
|
81
|
+
state->m_z = 2;
|
82
|
+
}
|
83
|
+
|
84
|
+
/* Get random number: "Multiply-With-Carry" generator of G. Marsaglia */
|
85
|
+
static unsigned int Ran(RanState* state) {
|
86
|
+
state->m_z = 36969 * (state->m_z & 65535) + (state->m_z >> 16);
|
87
|
+
state->m_w = 18000 * (state->m_w & 65535) + (state->m_w >> 16);
|
88
|
+
return (state->m_z << 16) + state->m_w; /* 32-bit result. */
|
89
|
+
}
|
90
|
+
|
91
|
+
static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
|
92
|
+
int i;
|
93
|
+
for (i = 0; i < n; i++) {
|
94
|
+
if ((Ran(state) >> 4) % 3 == 0) freqs[i] = freqs[Ran(state) % n];
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
|
99
|
+
RandomizeFreqs(state, stats->litlens, 288);
|
100
|
+
RandomizeFreqs(state, stats->dists, 32);
|
101
|
+
stats->litlens[256] = 1; /* End symbol. */
|
102
|
+
}
|
103
|
+
|
104
|
+
static void ClearStatFreqs(SymbolStats* stats) {
|
105
|
+
size_t i;
|
106
|
+
for (i = 0; i < 288; i++) stats->litlens[i] = 0;
|
107
|
+
for (i = 0; i < 32; i++) stats->dists[i] = 0;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
Function that calculates a cost based on a model for the given LZ77 symbol.
|
112
|
+
litlen: means literal symbol if dist is 0, length otherwise.
|
113
|
+
*/
|
114
|
+
typedef double CostModelFun(unsigned litlen, unsigned dist, void* context);
|
115
|
+
|
116
|
+
/*
|
117
|
+
Cost model which should exactly match fixed tree.
|
118
|
+
type: CostModelFun
|
119
|
+
*/
|
120
|
+
static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
|
121
|
+
(void)unused;
|
122
|
+
if (dist == 0) {
|
123
|
+
if (litlen <= 143) return 8;
|
124
|
+
else return 9;
|
125
|
+
} else {
|
126
|
+
int dbits = ZopfliGetDistExtraBits(dist);
|
127
|
+
int lbits = ZopfliGetLengthExtraBits(litlen);
|
128
|
+
int lsym = ZopfliGetLengthSymbol(litlen);
|
129
|
+
double cost = 0;
|
130
|
+
if (lsym <= 279) cost += 7;
|
131
|
+
else cost += 8;
|
132
|
+
cost += 5; /* Every dist symbol has length 5. */
|
133
|
+
return cost + dbits + lbits;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
|
137
|
+
/*
|
138
|
+
Cost model based on symbol statistics.
|
139
|
+
type: CostModelFun
|
140
|
+
*/
|
141
|
+
static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
|
142
|
+
SymbolStats* stats = (SymbolStats*)context;
|
143
|
+
if (dist == 0) {
|
144
|
+
return stats->ll_symbols[litlen];
|
145
|
+
} else {
|
146
|
+
int lsym = ZopfliGetLengthSymbol(litlen);
|
147
|
+
int lbits = ZopfliGetLengthExtraBits(litlen);
|
148
|
+
int dsym = ZopfliGetDistSymbol(dist);
|
149
|
+
int dbits = ZopfliGetDistExtraBits(dist);
|
150
|
+
return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
/*
|
155
|
+
Finds the minimum possible cost this cost model can return for valid length and
|
156
|
+
distance symbols.
|
157
|
+
*/
|
158
|
+
static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
|
159
|
+
double mincost;
|
160
|
+
int bestlength = 0; /* length that has lowest cost in the cost model */
|
161
|
+
int bestdist = 0; /* distance that has lowest cost in the cost model */
|
162
|
+
int i;
|
163
|
+
/*
|
164
|
+
Table of distances that have a different distance symbol in the deflate
|
165
|
+
specification. Each value is the first distance that has a new symbol. Only
|
166
|
+
different symbols affect the cost model so only these need to be checked.
|
167
|
+
See RFC 1951 section 3.2.5. Compressed blocks (length and distance codes).
|
168
|
+
*/
|
169
|
+
static const int dsymbols[30] = {
|
170
|
+
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
|
171
|
+
769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
|
172
|
+
};
|
173
|
+
|
174
|
+
mincost = ZOPFLI_LARGE_FLOAT;
|
175
|
+
for (i = 3; i < 259; i++) {
|
176
|
+
double c = costmodel(i, 1, costcontext);
|
177
|
+
if (c < mincost) {
|
178
|
+
bestlength = i;
|
179
|
+
mincost = c;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
mincost = ZOPFLI_LARGE_FLOAT;
|
184
|
+
for (i = 0; i < 30; i++) {
|
185
|
+
double c = costmodel(3, dsymbols[i], costcontext);
|
186
|
+
if (c < mincost) {
|
187
|
+
bestdist = dsymbols[i];
|
188
|
+
mincost = c;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
return costmodel(bestlength, bestdist, costcontext);
|
193
|
+
}
|
194
|
+
|
195
|
+
/*
|
196
|
+
Performs the forward pass for "squeeze". Gets the most optimal length to reach
|
197
|
+
every byte from a previous byte, using cost calculations.
|
198
|
+
s: the ZopfliBlockState
|
199
|
+
in: the input data array
|
200
|
+
instart: where to start
|
201
|
+
inend: where to stop (not inclusive)
|
202
|
+
costmodel: function to calculate the cost of some lit/len/dist pair.
|
203
|
+
costcontext: abstract context for the costmodel function
|
204
|
+
length_array: output array of size (inend - instart) which will receive the best
|
205
|
+
length to reach this byte from a previous byte.
|
206
|
+
returns the cost that was, according to the costmodel, needed to get to the end.
|
207
|
+
*/
|
208
|
+
static double GetBestLengths(ZopfliBlockState *s,
|
209
|
+
const unsigned char* in,
|
210
|
+
size_t instart, size_t inend,
|
211
|
+
CostModelFun* costmodel, void* costcontext,
|
212
|
+
unsigned short* length_array) {
|
213
|
+
/* Best cost to get here so far. */
|
214
|
+
size_t blocksize = inend - instart;
|
215
|
+
float* costs;
|
216
|
+
size_t i = 0, k;
|
217
|
+
unsigned short leng;
|
218
|
+
unsigned short dist;
|
219
|
+
unsigned short sublen[259];
|
220
|
+
size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
|
221
|
+
? instart - ZOPFLI_WINDOW_SIZE : 0;
|
222
|
+
ZopfliHash hash;
|
223
|
+
ZopfliHash* h = &hash;
|
224
|
+
double result;
|
225
|
+
double mincost = GetCostModelMinCost(costmodel, costcontext);
|
226
|
+
|
227
|
+
if (instart == inend) return 0;
|
228
|
+
|
229
|
+
costs = (float*)malloc(sizeof(float) * (blocksize + 1));
|
230
|
+
if (!costs) exit(-1); /* Allocation failed. */
|
231
|
+
|
232
|
+
ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
|
233
|
+
ZopfliWarmupHash(in, windowstart, inend, h);
|
234
|
+
for (i = windowstart; i < instart; i++) {
|
235
|
+
ZopfliUpdateHash(in, i, inend, h);
|
236
|
+
}
|
237
|
+
|
238
|
+
for (i = 1; i < blocksize + 1; i++) costs[i] = ZOPFLI_LARGE_FLOAT;
|
239
|
+
costs[0] = 0; /* Because it's the start. */
|
240
|
+
length_array[0] = 0;
|
241
|
+
|
242
|
+
for (i = instart; i < inend; i++) {
|
243
|
+
size_t j = i - instart; /* Index in the costs array and length_array. */
|
244
|
+
ZopfliUpdateHash(in, i, inend, h);
|
245
|
+
|
246
|
+
#ifdef ZOPFLI_SHORTCUT_LONG_REPETITIONS
|
247
|
+
/* If we're in a long repetition of the same character and have more than
|
248
|
+
ZOPFLI_MAX_MATCH characters before and after our position. */
|
249
|
+
if (h->same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH * 2
|
250
|
+
&& i > instart + ZOPFLI_MAX_MATCH + 1
|
251
|
+
&& i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
|
252
|
+
&& h->same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK]
|
253
|
+
> ZOPFLI_MAX_MATCH) {
|
254
|
+
double symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1, costcontext);
|
255
|
+
/* Set the length to reach each one to ZOPFLI_MAX_MATCH, and the cost to
|
256
|
+
the cost corresponding to that length. Doing this, we skip
|
257
|
+
ZOPFLI_MAX_MATCH values to avoid calling ZopfliFindLongestMatch. */
|
258
|
+
for (k = 0; k < ZOPFLI_MAX_MATCH; k++) {
|
259
|
+
costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost;
|
260
|
+
length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH;
|
261
|
+
i++;
|
262
|
+
j++;
|
263
|
+
ZopfliUpdateHash(in, i, inend, h);
|
264
|
+
}
|
265
|
+
}
|
266
|
+
#endif
|
267
|
+
|
268
|
+
ZopfliFindLongestMatch(s, h, in, i, inend, ZOPFLI_MAX_MATCH, sublen,
|
269
|
+
&dist, &leng);
|
270
|
+
|
271
|
+
/* Literal. */
|
272
|
+
if (i + 1 <= inend) {
|
273
|
+
double newCost = costs[j] + costmodel(in[i], 0, costcontext);
|
274
|
+
assert(newCost >= 0);
|
275
|
+
if (newCost < costs[j + 1]) {
|
276
|
+
costs[j + 1] = newCost;
|
277
|
+
length_array[j + 1] = 1;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
/* Lengths. */
|
281
|
+
for (k = 3; k <= leng && i + k <= inend; k++) {
|
282
|
+
double newCost;
|
283
|
+
|
284
|
+
/* Calling the cost model is expensive, avoid this if we are already at
|
285
|
+
the minimum possible cost that it can return. */
|
286
|
+
if (costs[j + k] - costs[j] <= mincost) continue;
|
287
|
+
|
288
|
+
newCost = costs[j] + costmodel(k, sublen[k], costcontext);
|
289
|
+
assert(newCost >= 0);
|
290
|
+
if (newCost < costs[j + k]) {
|
291
|
+
assert(k <= ZOPFLI_MAX_MATCH);
|
292
|
+
costs[j + k] = newCost;
|
293
|
+
length_array[j + k] = k;
|
294
|
+
}
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
assert(costs[blocksize] >= 0);
|
299
|
+
result = costs[blocksize];
|
300
|
+
|
301
|
+
ZopfliCleanHash(h);
|
302
|
+
free(costs);
|
303
|
+
|
304
|
+
return result;
|
305
|
+
}
|
306
|
+
|
307
|
+
/*
|
308
|
+
Calculates the optimal path of lz77 lengths to use, from the calculated
|
309
|
+
length_array. The length_array must contain the optimal length to reach that
|
310
|
+
byte. The path will be filled with the lengths to use, so its data size will be
|
311
|
+
the amount of lz77 symbols.
|
312
|
+
*/
|
313
|
+
static void TraceBackwards(size_t size, const unsigned short* length_array,
|
314
|
+
unsigned short** path, size_t* pathsize) {
|
315
|
+
size_t index = size;
|
316
|
+
if (size == 0) return;
|
317
|
+
for (;;) {
|
318
|
+
ZOPFLI_APPEND_DATA(length_array[index], path, pathsize);
|
319
|
+
assert(length_array[index] <= index);
|
320
|
+
assert(length_array[index] <= ZOPFLI_MAX_MATCH);
|
321
|
+
assert(length_array[index] != 0);
|
322
|
+
index -= length_array[index];
|
323
|
+
if (index == 0) break;
|
324
|
+
}
|
325
|
+
|
326
|
+
/* Mirror result. */
|
327
|
+
for (index = 0; index < *pathsize / 2; index++) {
|
328
|
+
unsigned short temp = (*path)[index];
|
329
|
+
(*path)[index] = (*path)[*pathsize - index - 1];
|
330
|
+
(*path)[*pathsize - index - 1] = temp;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
334
|
+
static void FollowPath(ZopfliBlockState* s,
|
335
|
+
const unsigned char* in, size_t instart, size_t inend,
|
336
|
+
unsigned short* path, size_t pathsize,
|
337
|
+
ZopfliLZ77Store* store) {
|
338
|
+
size_t i, j, pos = 0;
|
339
|
+
size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
|
340
|
+
? instart - ZOPFLI_WINDOW_SIZE : 0;
|
341
|
+
|
342
|
+
size_t total_length_test = 0;
|
343
|
+
|
344
|
+
ZopfliHash hash;
|
345
|
+
ZopfliHash* h = &hash;
|
346
|
+
|
347
|
+
if (instart == inend) return;
|
348
|
+
|
349
|
+
ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
|
350
|
+
ZopfliWarmupHash(in, windowstart, inend, h);
|
351
|
+
for (i = windowstart; i < instart; i++) {
|
352
|
+
ZopfliUpdateHash(in, i, inend, h);
|
353
|
+
}
|
354
|
+
|
355
|
+
pos = instart;
|
356
|
+
for (i = 0; i < pathsize; i++) {
|
357
|
+
unsigned short length = path[i];
|
358
|
+
unsigned short dummy_length;
|
359
|
+
unsigned short dist;
|
360
|
+
assert(pos < inend);
|
361
|
+
|
362
|
+
ZopfliUpdateHash(in, pos, inend, h);
|
363
|
+
|
364
|
+
/* Add to output. */
|
365
|
+
if (length >= ZOPFLI_MIN_MATCH) {
|
366
|
+
/* Get the distance by recalculating longest match. The found length
|
367
|
+
should match the length from the path. */
|
368
|
+
ZopfliFindLongestMatch(s, h, in, pos, inend, length, 0,
|
369
|
+
&dist, &dummy_length);
|
370
|
+
assert(!(dummy_length != length && length > 2 && dummy_length > 2));
|
371
|
+
ZopfliVerifyLenDist(in, inend, pos, dist, length);
|
372
|
+
ZopfliStoreLitLenDist(length, dist, store);
|
373
|
+
total_length_test += length;
|
374
|
+
} else {
|
375
|
+
length = 1;
|
376
|
+
ZopfliStoreLitLenDist(in[pos], 0, store);
|
377
|
+
total_length_test++;
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
assert(pos + length <= inend);
|
382
|
+
for (j = 1; j < length; j++) {
|
383
|
+
ZopfliUpdateHash(in, pos + j, inend, h);
|
384
|
+
}
|
385
|
+
|
386
|
+
pos += length;
|
387
|
+
}
|
388
|
+
|
389
|
+
ZopfliCleanHash(h);
|
390
|
+
}
|
391
|
+
|
392
|
+
/* Calculates the entropy of the statistics */
|
393
|
+
static void CalculateStatistics(SymbolStats* stats) {
|
394
|
+
ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
|
395
|
+
ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
|
396
|
+
}
|
397
|
+
|
398
|
+
/* Appends the symbol statistics from the store. */
|
399
|
+
static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
|
400
|
+
size_t i;
|
401
|
+
for (i = 0; i < store->size; i++) {
|
402
|
+
if (store->dists[i] == 0) {
|
403
|
+
stats->litlens[store->litlens[i]]++;
|
404
|
+
} else {
|
405
|
+
stats->litlens[ZopfliGetLengthSymbol(store->litlens[i])]++;
|
406
|
+
stats->dists[ZopfliGetDistSymbol(store->dists[i])]++;
|
407
|
+
}
|
408
|
+
}
|
409
|
+
stats->litlens[256] = 1; /* End symbol. */
|
410
|
+
|
411
|
+
CalculateStatistics(stats);
|
412
|
+
}
|
413
|
+
|
414
|
+
/*
|
415
|
+
Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
|
416
|
+
with updated statistics should be performed.
|
417
|
+
|
418
|
+
s: the block state
|
419
|
+
in: the input data array
|
420
|
+
instart: where to start
|
421
|
+
inend: where to stop (not inclusive)
|
422
|
+
path: pointer to dynamically allocated memory to store the path
|
423
|
+
pathsize: pointer to the size of the dynamic path array
|
424
|
+
length_array: array if size (inend - instart) used to store lengths
|
425
|
+
costmodel: function to use as the cost model for this squeeze run
|
426
|
+
costcontext: abstract context for the costmodel function
|
427
|
+
store: place to output the LZ77 data
|
428
|
+
returns the cost that was, according to the costmodel, needed to get to the end.
|
429
|
+
This is not the actual cost.
|
430
|
+
*/
|
431
|
+
static double LZ77OptimalRun(ZopfliBlockState* s,
|
432
|
+
const unsigned char* in, size_t instart, size_t inend,
|
433
|
+
unsigned short** path, size_t* pathsize,
|
434
|
+
unsigned short* length_array, CostModelFun* costmodel,
|
435
|
+
void* costcontext, ZopfliLZ77Store* store) {
|
436
|
+
double cost = GetBestLengths(
|
437
|
+
s, in, instart, inend, costmodel, costcontext, length_array);
|
438
|
+
free(*path);
|
439
|
+
*path = 0;
|
440
|
+
*pathsize = 0;
|
441
|
+
TraceBackwards(inend - instart, length_array, path, pathsize);
|
442
|
+
FollowPath(s, in, instart, inend, *path, *pathsize, store);
|
443
|
+
assert(cost < ZOPFLI_LARGE_FLOAT);
|
444
|
+
return cost;
|
445
|
+
}
|
446
|
+
|
447
|
+
void ZopfliLZ77Optimal(ZopfliBlockState *s,
|
448
|
+
const unsigned char* in, size_t instart, size_t inend,
|
449
|
+
ZopfliLZ77Store* store) {
|
450
|
+
/* Dist to get to here with smallest cost. */
|
451
|
+
size_t blocksize = inend - instart;
|
452
|
+
unsigned short* length_array =
|
453
|
+
(unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
|
454
|
+
unsigned short* path = 0;
|
455
|
+
size_t pathsize = 0;
|
456
|
+
ZopfliLZ77Store currentstore;
|
457
|
+
SymbolStats stats, beststats, laststats;
|
458
|
+
int i;
|
459
|
+
double cost;
|
460
|
+
double bestcost = ZOPFLI_LARGE_FLOAT;
|
461
|
+
double lastcost = 0;
|
462
|
+
/* Try randomizing the costs a bit once the size stabilizes. */
|
463
|
+
RanState ran_state;
|
464
|
+
int lastrandomstep = -1;
|
465
|
+
|
466
|
+
if (!length_array) exit(-1); /* Allocation failed. */
|
467
|
+
|
468
|
+
InitRanState(&ran_state);
|
469
|
+
InitStats(&stats);
|
470
|
+
ZopfliInitLZ77Store(¤tstore);
|
471
|
+
|
472
|
+
/* Do regular deflate, then loop multiple shortest path runs, each time using
|
473
|
+
the statistics of the previous run. */
|
474
|
+
|
475
|
+
/* Initial run. */
|
476
|
+
ZopfliLZ77Greedy(s, in, instart, inend, ¤tstore);
|
477
|
+
GetStatistics(¤tstore, &stats);
|
478
|
+
|
479
|
+
/* Repeat statistics with each time the cost model from the previous stat
|
480
|
+
run. */
|
481
|
+
for (i = 0; i < s->options->numiterations; i++) {
|
482
|
+
ZopfliCleanLZ77Store(¤tstore);
|
483
|
+
ZopfliInitLZ77Store(¤tstore);
|
484
|
+
LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
|
485
|
+
length_array, GetCostStat, (void*)&stats,
|
486
|
+
¤tstore);
|
487
|
+
cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
|
488
|
+
0, currentstore.size, 2);
|
489
|
+
if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
|
490
|
+
fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
|
491
|
+
}
|
492
|
+
if (cost < bestcost) {
|
493
|
+
/* Copy to the output store. */
|
494
|
+
ZopfliCopyLZ77Store(¤tstore, store);
|
495
|
+
CopyStats(&stats, &beststats);
|
496
|
+
bestcost = cost;
|
497
|
+
}
|
498
|
+
CopyStats(&stats, &laststats);
|
499
|
+
ClearStatFreqs(&stats);
|
500
|
+
GetStatistics(¤tstore, &stats);
|
501
|
+
if (lastrandomstep != -1) {
|
502
|
+
/* This makes it converge slower but better. Do it only once the
|
503
|
+
randomness kicks in so that if the user does few iterations, it gives a
|
504
|
+
better result sooner. */
|
505
|
+
AddWeighedStatFreqs(&stats, 1.0, &laststats, 0.5, &stats);
|
506
|
+
CalculateStatistics(&stats);
|
507
|
+
}
|
508
|
+
if (i > 5 && cost == lastcost) {
|
509
|
+
CopyStats(&beststats, &stats);
|
510
|
+
RandomizeStatFreqs(&ran_state, &stats);
|
511
|
+
CalculateStatistics(&stats);
|
512
|
+
lastrandomstep = i;
|
513
|
+
}
|
514
|
+
lastcost = cost;
|
515
|
+
}
|
516
|
+
|
517
|
+
free(length_array);
|
518
|
+
free(path);
|
519
|
+
ZopfliCleanLZ77Store(¤tstore);
|
520
|
+
}
|
521
|
+
|
522
|
+
void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
|
523
|
+
const unsigned char* in,
|
524
|
+
size_t instart, size_t inend,
|
525
|
+
ZopfliLZ77Store* store)
|
526
|
+
{
|
527
|
+
/* Dist to get to here with smallest cost. */
|
528
|
+
size_t blocksize = inend - instart;
|
529
|
+
unsigned short* length_array =
|
530
|
+
(unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
|
531
|
+
unsigned short* path = 0;
|
532
|
+
size_t pathsize = 0;
|
533
|
+
|
534
|
+
if (!length_array) exit(-1); /* Allocation failed. */
|
535
|
+
|
536
|
+
s->blockstart = instart;
|
537
|
+
s->blockend = inend;
|
538
|
+
|
539
|
+
/* Shortest path for fixed tree This one should give the shortest possible
|
540
|
+
result for fixed tree, no repeated runs are needed since the tree is known. */
|
541
|
+
LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
|
542
|
+
length_array, GetCostFixed, 0, store);
|
543
|
+
|
544
|
+
free(length_array);
|
545
|
+
free(path);
|
546
|
+
}
|