zopfli 0.0.2 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +6 -14
  2. data/.gitmodules +1 -1
  3. data/.travis.yml +23 -0
  4. data/Gemfile +2 -0
  5. data/README.md +6 -1
  6. data/Rakefile +8 -10
  7. data/ext/extconf.rb +2 -1
  8. data/ext/zopfli.c +39 -20
  9. data/lib/zopfli/version.rb +1 -1
  10. data/smoke.sh +9 -0
  11. data/{test → spec}/fixtures/alice29.txt +0 -0
  12. data/spec/spec_helper.rb +2 -0
  13. data/spec/zopfli_spec.rb +68 -0
  14. data/vendor/zopfli/{blocksplitter.c → src/zopfli/blocksplitter.c} +41 -53
  15. data/vendor/zopfli/{blocksplitter.h → src/zopfli/blocksplitter.h} +2 -6
  16. data/vendor/zopfli/{cache.c → src/zopfli/cache.c} +6 -0
  17. data/vendor/zopfli/{cache.h → src/zopfli/cache.h} +0 -0
  18. data/vendor/zopfli/src/zopfli/deflate.c +931 -0
  19. data/vendor/zopfli/{deflate.h → src/zopfli/deflate.h} +17 -2
  20. data/vendor/zopfli/src/zopfli/gzip_container.c +124 -0
  21. data/vendor/zopfli/{gzip_container.h → src/zopfli/gzip_container.h} +8 -0
  22. data/vendor/zopfli/{hash.c → src/zopfli/hash.c} +18 -10
  23. data/vendor/zopfli/{hash.h → src/zopfli/hash.h} +10 -7
  24. data/vendor/zopfli/{katajainen.c → src/zopfli/katajainen.c} +73 -62
  25. data/vendor/zopfli/{katajainen.h → src/zopfli/katajainen.h} +1 -1
  26. data/vendor/zopfli/{lz77.c → src/zopfli/lz77.c} +190 -42
  27. data/vendor/zopfli/{lz77.h → src/zopfli/lz77.h} +39 -23
  28. data/vendor/zopfli/{squeeze.c → src/zopfli/squeeze.c} +75 -61
  29. data/vendor/zopfli/{squeeze.h → src/zopfli/squeeze.h} +1 -0
  30. data/vendor/zopfli/{util.c → src/zopfli/symbols.h} +49 -23
  31. data/vendor/zopfli/{tree.c → src/zopfli/tree.c} +0 -0
  32. data/vendor/zopfli/{tree.h → src/zopfli/tree.h} +0 -0
  33. data/vendor/zopfli/src/zopfli/util.c +35 -0
  34. data/vendor/zopfli/{util.h → src/zopfli/util.h} +6 -23
  35. data/vendor/zopfli/{zlib_container.c → src/zopfli/zlib_container.c} +1 -1
  36. data/vendor/zopfli/{zlib_container.h → src/zopfli/zlib_container.h} +8 -0
  37. data/vendor/zopfli/{zopfli.h → src/zopfli/zopfli.h} +10 -4
  38. data/vendor/zopfli/{zopfli_bin.c → src/zopfli/zopfli_bin.c} +31 -15
  39. data/vendor/zopfli/{zopfli_lib.c → src/zopfli/zopfli_lib.c} +1 -2
  40. data/zopfli.gemspec +9 -28
  41. metadata +51 -50
  42. data/test/test_zopfli_deflate.rb +0 -47
  43. data/vendor/zopfli/CONTRIBUTORS +0 -6
  44. data/vendor/zopfli/README +0 -25
  45. data/vendor/zopfli/deflate.c +0 -698
  46. data/vendor/zopfli/gzip_container.c +0 -117
  47. data/vendor/zopfli/makefile +0 -5
@@ -30,7 +30,7 @@ The output is tailored for DEFLATE: symbols that never occur, get a bit length
30
30
  of 0, and if only a single symbol occurs at least once, its bitlength will be 1,
31
31
  and not 0 as would theoretically be needed for a single symbol.
32
32
 
33
- frequencies: The amount of occurances of each symbol.
33
+ frequencies: The amount of occurrences of each symbol.
34
34
  n: The amount of symbols.
35
35
  maxbits: Maximum bit length, inclusive.
36
36
  bitlengths: Output, the bitlengths for the symbol prefix codes.
@@ -18,37 +18,76 @@ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
18
  */
19
19
 
20
20
  #include "lz77.h"
21
+ #include "symbols.h"
21
22
  #include "util.h"
22
23
 
23
24
  #include <assert.h>
24
25
  #include <stdio.h>
25
26
  #include <stdlib.h>
26
27
 
27
- void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28
+ void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store) {
28
29
  store->size = 0;
29
30
  store->litlens = 0;
30
31
  store->dists = 0;
32
+ store->pos = 0;
33
+ store->data = data;
34
+ store->ll_symbol = 0;
35
+ store->d_symbol = 0;
36
+ store->ll_counts = 0;
37
+ store->d_counts = 0;
31
38
  }
32
39
 
33
40
  void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34
41
  free(store->litlens);
35
42
  free(store->dists);
43
+ free(store->pos);
44
+ free(store->ll_symbol);
45
+ free(store->d_symbol);
46
+ free(store->ll_counts);
47
+ free(store->d_counts);
48
+ }
49
+
50
+ static size_t CeilDiv(size_t a, size_t b) {
51
+ return (a + b - 1) / b;
36
52
  }
37
53
 
38
54
  void ZopfliCopyLZ77Store(
39
55
  const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40
56
  size_t i;
57
+ size_t llsize = ZOPFLI_NUM_LL * CeilDiv(source->size, ZOPFLI_NUM_LL);
58
+ size_t dsize = ZOPFLI_NUM_D * CeilDiv(source->size, ZOPFLI_NUM_D);
41
59
  ZopfliCleanLZ77Store(dest);
60
+ ZopfliInitLZ77Store(source->data, dest);
42
61
  dest->litlens =
43
62
  (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44
63
  dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45
-
46
- if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
64
+ dest->pos = (size_t*)malloc(sizeof(*dest->pos) * source->size);
65
+ dest->ll_symbol =
66
+ (unsigned short*)malloc(sizeof(*dest->ll_symbol) * source->size);
67
+ dest->d_symbol =
68
+ (unsigned short*)malloc(sizeof(*dest->d_symbol) * source->size);
69
+ dest->ll_counts = (size_t*)malloc(sizeof(*dest->ll_counts) * llsize);
70
+ dest->d_counts = (size_t*)malloc(sizeof(*dest->d_counts) * dsize);
71
+
72
+ /* Allocation failed. */
73
+ if (!dest->litlens || !dest->dists) exit(-1);
74
+ if (!dest->pos) exit(-1);
75
+ if (!dest->ll_symbol || !dest->d_symbol) exit(-1);
76
+ if (!dest->ll_counts || !dest->d_counts) exit(-1);
47
77
 
48
78
  dest->size = source->size;
49
79
  for (i = 0; i < source->size; i++) {
50
80
  dest->litlens[i] = source->litlens[i];
51
81
  dest->dists[i] = source->dists[i];
82
+ dest->pos[i] = source->pos[i];
83
+ dest->ll_symbol[i] = source->ll_symbol[i];
84
+ dest->d_symbol[i] = source->d_symbol[i];
85
+ }
86
+ for (i = 0; i < llsize; i++) {
87
+ dest->ll_counts[i] = source->ll_counts[i];
88
+ }
89
+ for (i = 0; i < dsize; i++) {
90
+ dest->d_counts[i] = source->d_counts[i];
52
91
  }
53
92
  }
54
93
 
@@ -57,10 +96,149 @@ Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57
96
  context must be a ZopfliLZ77Store*.
58
97
  */
59
98
  void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60
- ZopfliLZ77Store* store) {
61
- size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
99
+ size_t pos, ZopfliLZ77Store* store) {
100
+ size_t i;
101
+ /* Needed for using ZOPFLI_APPEND_DATA multiple times. */
102
+ size_t origsize = store->size;
103
+ size_t llstart = ZOPFLI_NUM_LL * (origsize / ZOPFLI_NUM_LL);
104
+ size_t dstart = ZOPFLI_NUM_D * (origsize / ZOPFLI_NUM_D);
105
+
106
+ /* Everytime the index wraps around, a new cumulative histogram is made: we're
107
+ keeping one histogram value per LZ77 symbol rather than a full histogram for
108
+ each to save memory. */
109
+ if (origsize % ZOPFLI_NUM_LL == 0) {
110
+ size_t llsize = origsize;
111
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
112
+ ZOPFLI_APPEND_DATA(
113
+ origsize == 0 ? 0 : store->ll_counts[origsize - ZOPFLI_NUM_LL + i],
114
+ &store->ll_counts, &llsize);
115
+ }
116
+ }
117
+ if (origsize % ZOPFLI_NUM_D == 0) {
118
+ size_t dsize = origsize;
119
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
120
+ ZOPFLI_APPEND_DATA(
121
+ origsize == 0 ? 0 : store->d_counts[origsize - ZOPFLI_NUM_D + i],
122
+ &store->d_counts, &dsize);
123
+ }
124
+ }
125
+
62
126
  ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63
- ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
127
+ store->size = origsize;
128
+ ZOPFLI_APPEND_DATA(dist, &store->dists, &store->size);
129
+ store->size = origsize;
130
+ ZOPFLI_APPEND_DATA(pos, &store->pos, &store->size);
131
+ assert(length < 259);
132
+
133
+ if (dist == 0) {
134
+ store->size = origsize;
135
+ ZOPFLI_APPEND_DATA(length, &store->ll_symbol, &store->size);
136
+ store->size = origsize;
137
+ ZOPFLI_APPEND_DATA(0, &store->d_symbol, &store->size);
138
+ store->ll_counts[llstart + length]++;
139
+ } else {
140
+ store->size = origsize;
141
+ ZOPFLI_APPEND_DATA(ZopfliGetLengthSymbol(length),
142
+ &store->ll_symbol, &store->size);
143
+ store->size = origsize;
144
+ ZOPFLI_APPEND_DATA(ZopfliGetDistSymbol(dist),
145
+ &store->d_symbol, &store->size);
146
+ store->ll_counts[llstart + ZopfliGetLengthSymbol(length)]++;
147
+ store->d_counts[dstart + ZopfliGetDistSymbol(dist)]++;
148
+ }
149
+ }
150
+
151
+ void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
152
+ ZopfliLZ77Store* target) {
153
+ size_t i;
154
+ for (i = 0; i < store->size; i++) {
155
+ ZopfliStoreLitLenDist(store->litlens[i], store->dists[i],
156
+ store->pos[i], target);
157
+ }
158
+ }
159
+
160
+ size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
161
+ size_t lstart, size_t lend) {
162
+ size_t l = lend - 1;
163
+ if (lstart == lend) return 0;
164
+ return lz77->pos[l] + ((lz77->dists[l] == 0) ?
165
+ 1 : lz77->litlens[l]) - lz77->pos[lstart];
166
+ }
167
+
168
+ static void ZopfliLZ77GetHistogramAt(const ZopfliLZ77Store* lz77, size_t lpos,
169
+ size_t* ll_counts, size_t* d_counts) {
170
+ /* The real histogram is created by using the histogram for this chunk, but
171
+ all superfluous values of this chunk subtracted. */
172
+ size_t llpos = ZOPFLI_NUM_LL * (lpos / ZOPFLI_NUM_LL);
173
+ size_t dpos = ZOPFLI_NUM_D * (lpos / ZOPFLI_NUM_D);
174
+ size_t i;
175
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
176
+ ll_counts[i] = lz77->ll_counts[llpos + i];
177
+ }
178
+ for (i = lpos + 1; i < llpos + ZOPFLI_NUM_LL && i < lz77->size; i++) {
179
+ ll_counts[lz77->ll_symbol[i]]--;
180
+ }
181
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
182
+ d_counts[i] = lz77->d_counts[dpos + i];
183
+ }
184
+ for (i = lpos + 1; i < dpos + ZOPFLI_NUM_D && i < lz77->size; i++) {
185
+ if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]--;
186
+ }
187
+ }
188
+
189
+ void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
190
+ size_t lstart, size_t lend,
191
+ size_t* ll_counts, size_t* d_counts) {
192
+ size_t i;
193
+ if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
194
+ memset(ll_counts, 0, sizeof(*ll_counts) * ZOPFLI_NUM_LL);
195
+ memset(d_counts, 0, sizeof(*d_counts) * ZOPFLI_NUM_D);
196
+ for (i = lstart; i < lend; i++) {
197
+ ll_counts[lz77->ll_symbol[i]]++;
198
+ if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]++;
199
+ }
200
+ } else {
201
+ /* Subtract the cumulative histograms at the end and the start to get the
202
+ histogram for this range. */
203
+ ZopfliLZ77GetHistogramAt(lz77, lend - 1, ll_counts, d_counts);
204
+ if (lstart > 0) {
205
+ size_t ll_counts2[ZOPFLI_NUM_LL];
206
+ size_t d_counts2[ZOPFLI_NUM_D];
207
+ ZopfliLZ77GetHistogramAt(lz77, lstart - 1, ll_counts2, d_counts2);
208
+
209
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
210
+ ll_counts[i] -= ll_counts2[i];
211
+ }
212
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
213
+ d_counts[i] -= d_counts2[i];
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ void ZopfliInitBlockState(const ZopfliOptions* options,
220
+ size_t blockstart, size_t blockend, int add_lmc,
221
+ ZopfliBlockState* s) {
222
+ s->options = options;
223
+ s->blockstart = blockstart;
224
+ s->blockend = blockend;
225
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
226
+ if (add_lmc) {
227
+ s->lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
228
+ ZopfliInitCache(blockend - blockstart, s->lmc);
229
+ } else {
230
+ s->lmc = 0;
231
+ }
232
+ #endif
233
+ }
234
+
235
+ void ZopfliCleanBlockState(ZopfliBlockState* s) {
236
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
237
+ if (s->lmc) {
238
+ ZopfliCleanCache(s->lmc);
239
+ free(s->lmc);
240
+ }
241
+ #endif
64
242
  }
65
243
 
66
244
  /*
@@ -365,7 +543,7 @@ void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
365
543
 
366
544
  void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
367
545
  size_t instart, size_t inend,
368
- ZopfliLZ77Store* store) {
546
+ ZopfliLZ77Store* store, ZopfliHash* h) {
369
547
  size_t i = 0, j;
370
548
  unsigned short leng;
371
549
  unsigned short dist;
@@ -374,9 +552,6 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
374
552
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
375
553
  unsigned short dummysublen[259];
376
554
 
377
- ZopfliHash hash;
378
- ZopfliHash* h = &hash;
379
-
380
555
  #ifdef ZOPFLI_LAZY_MATCHING
381
556
  /* Lazy matching. */
382
557
  unsigned prev_length = 0;
@@ -387,7 +562,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
387
562
 
388
563
  if (instart == inend) return;
389
564
 
390
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
565
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
391
566
  ZopfliWarmupHash(in, windowstart, inend, h);
392
567
  for (i = windowstart; i < instart; i++) {
393
568
  ZopfliUpdateHash(in, i, inend, h);
@@ -406,7 +581,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
406
581
  if (match_available) {
407
582
  match_available = 0;
408
583
  if (lengthscore > prevlengthscore + 1) {
409
- ZopfliStoreLitLenDist(in[i - 1], 0, store);
584
+ ZopfliStoreLitLenDist(in[i - 1], 0, i - 1, store);
410
585
  if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
411
586
  match_available = 1;
412
587
  prev_length = leng;
@@ -420,7 +595,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
420
595
  lengthscore = prevlengthscore;
421
596
  /* Add to output. */
422
597
  ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
423
- ZopfliStoreLitLenDist(leng, dist, store);
598
+ ZopfliStoreLitLenDist(leng, dist, i - 1, store);
424
599
  for (j = 2; j < leng; j++) {
425
600
  assert(i < inend);
426
601
  i++;
@@ -441,10 +616,10 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
441
616
  /* Add to output. */
442
617
  if (lengthscore >= ZOPFLI_MIN_MATCH) {
443
618
  ZopfliVerifyLenDist(in, inend, i, dist, leng);
444
- ZopfliStoreLitLenDist(leng, dist, store);
619
+ ZopfliStoreLitLenDist(leng, dist, i, store);
445
620
  } else {
446
621
  leng = 1;
447
- ZopfliStoreLitLenDist(in[i], 0, store);
622
+ ZopfliStoreLitLenDist(in[i], 0, i, store);
448
623
  }
449
624
  for (j = 1; j < leng; j++) {
450
625
  assert(i < inend);
@@ -452,31 +627,4 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
452
627
  ZopfliUpdateHash(in, i, inend, h);
453
628
  }
454
629
  }
455
-
456
- ZopfliCleanHash(h);
457
- }
458
-
459
- void ZopfliLZ77Counts(const unsigned short* litlens,
460
- const unsigned short* dists,
461
- size_t start, size_t end,
462
- size_t* ll_count, size_t* d_count) {
463
- size_t i;
464
-
465
- for (i = 0; i < 288; i++) {
466
- ll_count[i] = 0;
467
- }
468
- for (i = 0; i < 32; i++) {
469
- d_count[i] = 0;
470
- }
471
-
472
- for (i = start; i < end; i++) {
473
- if (dists[i] == 0) {
474
- ll_count[litlens[i]]++;
475
- } else {
476
- ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
477
- d_count[ZopfliGetDistSymbol(dists[i])]++;
478
- }
479
- }
480
-
481
- ll_count[256] = 1; /* End symbol. */
482
630
  }
@@ -33,23 +33,50 @@ compression.
33
33
 
34
34
  /*
35
35
  Stores lit/length and dist pairs for LZ77.
36
- litlens: Contains the literal symbols or length values.
37
- dists: Indicates the distance, or 0 to indicate that there is no distance and
38
- litlens contains a literal instead of a length.
39
- litlens and dists both have the same size.
36
+ Parameter litlens: Contains the literal symbols or length values.
37
+ Parameter dists: Contains the distances. A value is 0 to indicate that there is
38
+ no dist and the corresponding litlens value is a literal instead of a length.
39
+ Parameter size: The size of both the litlens and dists arrays.
40
+ The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
41
+ ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
42
+
40
43
  */
41
44
  typedef struct ZopfliLZ77Store {
42
45
  unsigned short* litlens; /* Lit or len. */
43
46
  unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
44
47
  if > 0: length in corresponding litlens, this is the distance. */
45
48
  size_t size;
49
+
50
+ const unsigned char* data; /* original data */
51
+ size_t* pos; /* position in data where this LZ77 command begins */
52
+
53
+ unsigned short* ll_symbol;
54
+ unsigned short* d_symbol;
55
+
56
+ /* Cumulative histograms wrapping around per chunk. Each chunk has the amount
57
+ of distinct symbols as length, so using 1 value per LZ77 symbol, we have a
58
+ precise histogram at every N symbols, and the rest can be calculated by
59
+ looping through the actual symbols of this chunk. */
60
+ size_t* ll_counts;
61
+ size_t* d_counts;
46
62
  } ZopfliLZ77Store;
47
63
 
48
- void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
64
+ void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store);
49
65
  void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
50
66
  void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
51
67
  void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
52
- ZopfliLZ77Store* store);
68
+ size_t pos, ZopfliLZ77Store* store);
69
+ void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
70
+ ZopfliLZ77Store* target);
71
+ /* Gets the amount of raw bytes that this range of LZ77 symbols spans. */
72
+ size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
73
+ size_t lstart, size_t lend);
74
+ /* Gets the histogram of lit/len and dist symbols in the given range, using the
75
+ cumulative histograms, so faster than adding one by one for large range. Does
76
+ not add the one end symbol of value 256. */
77
+ void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
78
+ size_t lstart, size_t lend,
79
+ size_t* ll_counts, size_t* d_counts);
53
80
 
54
81
  /*
55
82
  Some state information for compressing a block.
@@ -69,6 +96,11 @@ typedef struct ZopfliBlockState {
69
96
  size_t blockend;
70
97
  } ZopfliBlockState;
71
98
 
99
+ void ZopfliInitBlockState(const ZopfliOptions* options,
100
+ size_t blockstart, size_t blockend, int add_lmc,
101
+ ZopfliBlockState* s);
102
+ void ZopfliCleanBlockState(ZopfliBlockState* s);
103
+
72
104
  /*
73
105
  Finds the longest match (length and corresponding distance) for LZ77
74
106
  compression.
@@ -96,22 +128,6 @@ Verifies if length and dist are indeed valid, only used for assertion.
96
128
  void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
97
129
  unsigned short dist, unsigned short length);
98
130
 
99
- /*
100
- Counts the number of literal, length and distance symbols in the given lz77
101
- arrays.
102
- litlens: lz77 lit/lengths
103
- dists: ll77 distances
104
- start: where to begin counting in litlens and dists
105
- end: where to stop counting in litlens and dists (not inclusive)
106
- ll_count: count of each lit/len symbol, must have size 288 (see deflate
107
- standard)
108
- d_count: count of each dist symbol, must have size 32 (see deflate standard)
109
- */
110
- void ZopfliLZ77Counts(const unsigned short* litlens,
111
- const unsigned short* dists,
112
- size_t start, size_t end,
113
- size_t* ll_count, size_t* d_count);
114
-
115
131
  /*
116
132
  Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
117
133
  with the slow but better "squeeze" implementation.
@@ -121,6 +137,6 @@ dictionary.
121
137
  */
122
138
  void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
123
139
  size_t instart, size_t inend,
124
- ZopfliLZ77Store* store);
140
+ ZopfliLZ77Store* store, ZopfliHash* h);
125
141
 
126
142
  #endif /* ZOPFLI_LZ77_H_ */
@@ -25,35 +25,40 @@ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
25
25
 
26
26
  #include "blocksplitter.h"
27
27
  #include "deflate.h"
28
+ #include "symbols.h"
28
29
  #include "tree.h"
29
30
  #include "util.h"
30
31
 
31
32
  typedef struct SymbolStats {
32
33
  /* The literal and length symbols. */
33
- size_t litlens[288];
34
+ size_t litlens[ZOPFLI_NUM_LL];
34
35
  /* The 32 unique dist symbols, not the 32768 possible dists. */
35
- size_t dists[32];
36
+ size_t dists[ZOPFLI_NUM_D];
36
37
 
37
- double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
38
- double d_symbols[32]; /* Length of each dist symbol in bits. */
38
+ /* Length of each lit/len symbol in bits. */
39
+ double ll_symbols[ZOPFLI_NUM_LL];
40
+ /* Length of each dist symbol in bits. */
41
+ double d_symbols[ZOPFLI_NUM_D];
39
42
  } SymbolStats;
40
43
 
41
44
  /* Sets everything to 0. */
42
45
  static void InitStats(SymbolStats* stats) {
43
- memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
44
- memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
46
+ memset(stats->litlens, 0, ZOPFLI_NUM_LL * sizeof(stats->litlens[0]));
47
+ memset(stats->dists, 0, ZOPFLI_NUM_D * sizeof(stats->dists[0]));
45
48
 
46
- memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
47
- memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
49
+ memset(stats->ll_symbols, 0, ZOPFLI_NUM_LL * sizeof(stats->ll_symbols[0]));
50
+ memset(stats->d_symbols, 0, ZOPFLI_NUM_D * sizeof(stats->d_symbols[0]));
48
51
  }
49
52
 
50
53
  static void CopyStats(SymbolStats* source, SymbolStats* dest) {
51
- memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
52
- memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
54
+ memcpy(dest->litlens, source->litlens,
55
+ ZOPFLI_NUM_LL * sizeof(dest->litlens[0]));
56
+ memcpy(dest->dists, source->dists, ZOPFLI_NUM_D * sizeof(dest->dists[0]));
53
57
 
54
58
  memcpy(dest->ll_symbols, source->ll_symbols,
55
- 288 * sizeof(dest->ll_symbols[0]));
56
- memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
59
+ ZOPFLI_NUM_LL * sizeof(dest->ll_symbols[0]));
60
+ memcpy(dest->d_symbols, source->d_symbols,
61
+ ZOPFLI_NUM_D * sizeof(dest->d_symbols[0]));
57
62
  }
58
63
 
59
64
  /* Adds the bit lengths. */
@@ -61,11 +66,11 @@ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
61
66
  const SymbolStats* stats2, double w2,
62
67
  SymbolStats* result) {
63
68
  size_t i;
64
- for (i = 0; i < 288; i++) {
69
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
65
70
  result->litlens[i] =
66
71
  (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
67
72
  }
68
- for (i = 0; i < 32; i++) {
73
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
69
74
  result->dists[i] =
70
75
  (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
71
76
  }
@@ -96,15 +101,15 @@ static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
96
101
  }
97
102
 
98
103
  static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
99
- RandomizeFreqs(state, stats->litlens, 288);
100
- RandomizeFreqs(state, stats->dists, 32);
104
+ RandomizeFreqs(state, stats->litlens, ZOPFLI_NUM_LL);
105
+ RandomizeFreqs(state, stats->dists, ZOPFLI_NUM_D);
101
106
  stats->litlens[256] = 1; /* End symbol. */
102
107
  }
103
108
 
104
109
  static void ClearStatFreqs(SymbolStats* stats) {
105
110
  size_t i;
106
- for (i = 0; i < 288; i++) stats->litlens[i] = 0;
107
- for (i = 0; i < 32; i++) stats->dists[i] = 0;
111
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) stats->litlens[i] = 0;
112
+ for (i = 0; i < ZOPFLI_NUM_D; i++) stats->dists[i] = 0;
108
113
  }
109
114
 
110
115
  /*
@@ -126,7 +131,7 @@ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
126
131
  int dbits = ZopfliGetDistExtraBits(dist);
127
132
  int lbits = ZopfliGetLengthExtraBits(litlen);
128
133
  int lsym = ZopfliGetLengthSymbol(litlen);
129
- double cost = 0;
134
+ int cost = 0;
130
135
  if (lsym <= 279) cost += 7;
131
136
  else cost += 8;
132
137
  cost += 5; /* Every dist symbol has length 5. */
@@ -147,7 +152,7 @@ static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
147
152
  int lbits = ZopfliGetLengthExtraBits(litlen);
148
153
  int dsym = ZopfliGetDistSymbol(dist);
149
154
  int dbits = ZopfliGetDistExtraBits(dist);
150
- return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
155
+ return lbits + dbits + stats->ll_symbols[lsym] + stats->d_symbols[dsym];
151
156
  }
152
157
  }
153
158
 
@@ -192,6 +197,10 @@ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
192
197
  return costmodel(bestlength, bestdist, costcontext);
193
198
  }
194
199
 
200
+ static size_t zopfli_min(size_t a, size_t b) {
201
+ return a < b ? a : b;
202
+ }
203
+
195
204
  /*
196
205
  Performs the forward pass for "squeeze". Gets the most optimal length to reach
197
206
  every byte from a previous byte, using cost calculations.
@@ -209,27 +218,23 @@ static double GetBestLengths(ZopfliBlockState *s,
209
218
  const unsigned char* in,
210
219
  size_t instart, size_t inend,
211
220
  CostModelFun* costmodel, void* costcontext,
212
- unsigned short* length_array) {
221
+ unsigned short* length_array,
222
+ ZopfliHash* h, float* costs) {
213
223
  /* Best cost to get here so far. */
214
224
  size_t blocksize = inend - instart;
215
- float* costs;
216
- size_t i = 0, k;
225
+ size_t i = 0, k, kend;
217
226
  unsigned short leng;
218
227
  unsigned short dist;
219
228
  unsigned short sublen[259];
220
229
  size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
221
230
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
222
- ZopfliHash hash;
223
- ZopfliHash* h = &hash;
224
231
  double result;
225
232
  double mincost = GetCostModelMinCost(costmodel, costcontext);
233
+ double mincostaddcostj;
226
234
 
227
235
  if (instart == inend) return 0;
228
236
 
229
- costs = (float*)malloc(sizeof(float) * (blocksize + 1));
230
- if (!costs) exit(-1); /* Allocation failed. */
231
-
232
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
237
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
233
238
  ZopfliWarmupHash(in, windowstart, inend, h);
234
239
  for (i = windowstart; i < instart; i++) {
235
240
  ZopfliUpdateHash(in, i, inend, h);
@@ -270,7 +275,7 @@ static double GetBestLengths(ZopfliBlockState *s,
270
275
 
271
276
  /* Literal. */
272
277
  if (i + 1 <= inend) {
273
- double newCost = costs[j] + costmodel(in[i], 0, costcontext);
278
+ double newCost = costmodel(in[i], 0, costcontext) + costs[j];
274
279
  assert(newCost >= 0);
275
280
  if (newCost < costs[j + 1]) {
276
281
  costs[j + 1] = newCost;
@@ -278,14 +283,16 @@ static double GetBestLengths(ZopfliBlockState *s,
278
283
  }
279
284
  }
280
285
  /* Lengths. */
281
- for (k = 3; k <= leng && i + k <= inend; k++) {
286
+ kend = zopfli_min(leng, inend-i);
287
+ mincostaddcostj = mincost + costs[j];
288
+ for (k = 3; k <= kend; k++) {
282
289
  double newCost;
283
290
 
284
291
  /* Calling the cost model is expensive, avoid this if we are already at
285
292
  the minimum possible cost that it can return. */
286
- if (costs[j + k] - costs[j] <= mincost) continue;
293
+ if (costs[j + k] <= mincostaddcostj) continue;
287
294
 
288
- newCost = costs[j] + costmodel(k, sublen[k], costcontext);
295
+ newCost = costmodel(k, sublen[k], costcontext) + costs[j];
289
296
  assert(newCost >= 0);
290
297
  if (newCost < costs[j + k]) {
291
298
  assert(k <= ZOPFLI_MAX_MATCH);
@@ -298,9 +305,6 @@ static double GetBestLengths(ZopfliBlockState *s,
298
305
  assert(costs[blocksize] >= 0);
299
306
  result = costs[blocksize];
300
307
 
301
- ZopfliCleanHash(h);
302
- free(costs);
303
-
304
308
  return result;
305
309
  }
306
310
 
@@ -334,19 +338,16 @@ static void TraceBackwards(size_t size, const unsigned short* length_array,
334
338
  static void FollowPath(ZopfliBlockState* s,
335
339
  const unsigned char* in, size_t instart, size_t inend,
336
340
  unsigned short* path, size_t pathsize,
337
- ZopfliLZ77Store* store) {
341
+ ZopfliLZ77Store* store, ZopfliHash *h) {
338
342
  size_t i, j, pos = 0;
339
343
  size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
340
344
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
341
345
 
342
346
  size_t total_length_test = 0;
343
347
 
344
- ZopfliHash hash;
345
- ZopfliHash* h = &hash;
346
-
347
348
  if (instart == inend) return;
348
349
 
349
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
350
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
350
351
  ZopfliWarmupHash(in, windowstart, inend, h);
351
352
  for (i = windowstart; i < instart; i++) {
352
353
  ZopfliUpdateHash(in, i, inend, h);
@@ -369,11 +370,11 @@ static void FollowPath(ZopfliBlockState* s,
369
370
  &dist, &dummy_length);
370
371
  assert(!(dummy_length != length && length > 2 && dummy_length > 2));
371
372
  ZopfliVerifyLenDist(in, inend, pos, dist, length);
372
- ZopfliStoreLitLenDist(length, dist, store);
373
+ ZopfliStoreLitLenDist(length, dist, pos, store);
373
374
  total_length_test += length;
374
375
  } else {
375
376
  length = 1;
376
- ZopfliStoreLitLenDist(in[pos], 0, store);
377
+ ZopfliStoreLitLenDist(in[pos], 0, pos, store);
377
378
  total_length_test++;
378
379
  }
379
380
 
@@ -385,14 +386,12 @@ static void FollowPath(ZopfliBlockState* s,
385
386
 
386
387
  pos += length;
387
388
  }
388
-
389
- ZopfliCleanHash(h);
390
389
  }
391
390
 
392
391
  /* Calculates the entropy of the statistics */
393
392
  static void CalculateStatistics(SymbolStats* stats) {
394
- ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
395
- ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
393
+ ZopfliCalculateEntropy(stats->litlens, ZOPFLI_NUM_LL, stats->ll_symbols);
394
+ ZopfliCalculateEntropy(stats->dists, ZOPFLI_NUM_D, stats->d_symbols);
396
395
  }
397
396
 
398
397
  /* Appends the symbol statistics from the store. */
@@ -414,14 +413,13 @@ static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
414
413
  /*
415
414
  Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
416
415
  with updated statistics should be performed.
417
-
418
416
  s: the block state
419
417
  in: the input data array
420
418
  instart: where to start
421
419
  inend: where to stop (not inclusive)
422
420
  path: pointer to dynamically allocated memory to store the path
423
421
  pathsize: pointer to the size of the dynamic path array
424
- length_array: array if size (inend - instart) used to store lengths
422
+ length_array: array of size (inend - instart) used to store lengths
425
423
  costmodel: function to use as the cost model for this squeeze run
426
424
  costcontext: abstract context for the costmodel function
427
425
  store: place to output the LZ77 data
@@ -432,20 +430,22 @@ static double LZ77OptimalRun(ZopfliBlockState* s,
432
430
  const unsigned char* in, size_t instart, size_t inend,
433
431
  unsigned short** path, size_t* pathsize,
434
432
  unsigned short* length_array, CostModelFun* costmodel,
435
- void* costcontext, ZopfliLZ77Store* store) {
436
- double cost = GetBestLengths(
437
- s, in, instart, inend, costmodel, costcontext, length_array);
433
+ void* costcontext, ZopfliLZ77Store* store,
434
+ ZopfliHash* h, float* costs) {
435
+ double cost = GetBestLengths(s, in, instart, inend, costmodel,
436
+ costcontext, length_array, h, costs);
438
437
  free(*path);
439
438
  *path = 0;
440
439
  *pathsize = 0;
441
440
  TraceBackwards(inend - instart, length_array, path, pathsize);
442
- FollowPath(s, in, instart, inend, *path, *pathsize, store);
441
+ FollowPath(s, in, instart, inend, *path, *pathsize, store, h);
443
442
  assert(cost < ZOPFLI_LARGE_FLOAT);
444
443
  return cost;
445
444
  }
446
445
 
447
446
  void ZopfliLZ77Optimal(ZopfliBlockState *s,
448
447
  const unsigned char* in, size_t instart, size_t inend,
448
+ int numiterations,
449
449
  ZopfliLZ77Store* store) {
450
450
  /* Dist to get to here with smallest cost. */
451
451
  size_t blocksize = inend - instart;
@@ -454,8 +454,11 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
454
454
  unsigned short* path = 0;
455
455
  size_t pathsize = 0;
456
456
  ZopfliLZ77Store currentstore;
457
+ ZopfliHash hash;
458
+ ZopfliHash* h = &hash;
457
459
  SymbolStats stats, beststats, laststats;
458
460
  int i;
461
+ float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
459
462
  double cost;
460
463
  double bestcost = ZOPFLI_LARGE_FLOAT;
461
464
  double lastcost = 0;
@@ -463,29 +466,30 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
463
466
  RanState ran_state;
464
467
  int lastrandomstep = -1;
465
468
 
469
+ if (!costs) exit(-1); /* Allocation failed. */
466
470
  if (!length_array) exit(-1); /* Allocation failed. */
467
471
 
468
472
  InitRanState(&ran_state);
469
473
  InitStats(&stats);
470
- ZopfliInitLZ77Store(&currentstore);
474
+ ZopfliInitLZ77Store(in, &currentstore);
475
+ ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
471
476
 
472
477
  /* Do regular deflate, then loop multiple shortest path runs, each time using
473
478
  the statistics of the previous run. */
474
479
 
475
480
  /* Initial run. */
476
- ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
481
+ ZopfliLZ77Greedy(s, in, instart, inend, &currentstore, h);
477
482
  GetStatistics(&currentstore, &stats);
478
483
 
479
484
  /* Repeat statistics with each time the cost model from the previous stat
480
485
  run. */
481
- for (i = 0; i < s->options->numiterations; i++) {
486
+ for (i = 0; i < numiterations; i++) {
482
487
  ZopfliCleanLZ77Store(&currentstore);
483
- ZopfliInitLZ77Store(&currentstore);
488
+ ZopfliInitLZ77Store(in, &currentstore);
484
489
  LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
485
490
  length_array, GetCostStat, (void*)&stats,
486
- &currentstore);
487
- cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
488
- 0, currentstore.size, 2);
491
+ &currentstore, h, costs);
492
+ cost = ZopfliCalculateBlockSize(&currentstore, 0, currentstore.size, 2);
489
493
  if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
490
494
  fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
491
495
  }
@@ -516,7 +520,9 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
516
520
 
517
521
  free(length_array);
518
522
  free(path);
523
+ free(costs);
519
524
  ZopfliCleanLZ77Store(&currentstore);
525
+ ZopfliCleanHash(h);
520
526
  }
521
527
 
522
528
  void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
@@ -530,17 +536,25 @@ void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
530
536
  (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
531
537
  unsigned short* path = 0;
532
538
  size_t pathsize = 0;
539
+ ZopfliHash hash;
540
+ ZopfliHash* h = &hash;
541
+ float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
533
542
 
543
+ if (!costs) exit(-1); /* Allocation failed. */
534
544
  if (!length_array) exit(-1); /* Allocation failed. */
535
545
 
546
+ ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
547
+
536
548
  s->blockstart = instart;
537
549
  s->blockend = inend;
538
550
 
539
551
  /* Shortest path for fixed tree This one should give the shortest possible
540
552
  result for fixed tree, no repeated runs are needed since the tree is known. */
541
553
  LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
542
- length_array, GetCostFixed, 0, store);
554
+ length_array, GetCostFixed, 0, store, h, costs);
543
555
 
544
556
  free(length_array);
545
557
  free(path);
558
+ free(costs);
559
+ ZopfliCleanHash(h);
546
560
  }