zopfli 0.0.2 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +6 -14
  2. data/.gitmodules +1 -1
  3. data/.travis.yml +23 -0
  4. data/Gemfile +2 -0
  5. data/README.md +6 -1
  6. data/Rakefile +8 -10
  7. data/ext/extconf.rb +2 -1
  8. data/ext/zopfli.c +39 -20
  9. data/lib/zopfli/version.rb +1 -1
  10. data/smoke.sh +9 -0
  11. data/{test → spec}/fixtures/alice29.txt +0 -0
  12. data/spec/spec_helper.rb +2 -0
  13. data/spec/zopfli_spec.rb +68 -0
  14. data/vendor/zopfli/{blocksplitter.c → src/zopfli/blocksplitter.c} +41 -53
  15. data/vendor/zopfli/{blocksplitter.h → src/zopfli/blocksplitter.h} +2 -6
  16. data/vendor/zopfli/{cache.c → src/zopfli/cache.c} +6 -0
  17. data/vendor/zopfli/{cache.h → src/zopfli/cache.h} +0 -0
  18. data/vendor/zopfli/src/zopfli/deflate.c +931 -0
  19. data/vendor/zopfli/{deflate.h → src/zopfli/deflate.h} +17 -2
  20. data/vendor/zopfli/src/zopfli/gzip_container.c +124 -0
  21. data/vendor/zopfli/{gzip_container.h → src/zopfli/gzip_container.h} +8 -0
  22. data/vendor/zopfli/{hash.c → src/zopfli/hash.c} +18 -10
  23. data/vendor/zopfli/{hash.h → src/zopfli/hash.h} +10 -7
  24. data/vendor/zopfli/{katajainen.c → src/zopfli/katajainen.c} +73 -62
  25. data/vendor/zopfli/{katajainen.h → src/zopfli/katajainen.h} +1 -1
  26. data/vendor/zopfli/{lz77.c → src/zopfli/lz77.c} +190 -42
  27. data/vendor/zopfli/{lz77.h → src/zopfli/lz77.h} +39 -23
  28. data/vendor/zopfli/{squeeze.c → src/zopfli/squeeze.c} +75 -61
  29. data/vendor/zopfli/{squeeze.h → src/zopfli/squeeze.h} +1 -0
  30. data/vendor/zopfli/{util.c → src/zopfli/symbols.h} +49 -23
  31. data/vendor/zopfli/{tree.c → src/zopfli/tree.c} +0 -0
  32. data/vendor/zopfli/{tree.h → src/zopfli/tree.h} +0 -0
  33. data/vendor/zopfli/src/zopfli/util.c +35 -0
  34. data/vendor/zopfli/{util.h → src/zopfli/util.h} +6 -23
  35. data/vendor/zopfli/{zlib_container.c → src/zopfli/zlib_container.c} +1 -1
  36. data/vendor/zopfli/{zlib_container.h → src/zopfli/zlib_container.h} +8 -0
  37. data/vendor/zopfli/{zopfli.h → src/zopfli/zopfli.h} +10 -4
  38. data/vendor/zopfli/{zopfli_bin.c → src/zopfli/zopfli_bin.c} +31 -15
  39. data/vendor/zopfli/{zopfli_lib.c → src/zopfli/zopfli_lib.c} +1 -2
  40. data/zopfli.gemspec +9 -28
  41. metadata +51 -50
  42. data/test/test_zopfli_deflate.rb +0 -47
  43. data/vendor/zopfli/CONTRIBUTORS +0 -6
  44. data/vendor/zopfli/README +0 -25
  45. data/vendor/zopfli/deflate.c +0 -698
  46. data/vendor/zopfli/gzip_container.c +0 -117
  47. data/vendor/zopfli/makefile +0 -5
@@ -30,7 +30,7 @@ The output is tailored for DEFLATE: symbols that never occur, get a bit length
30
30
  of 0, and if only a single symbol occurs at least once, its bitlength will be 1,
31
31
  and not 0 as would theoretically be needed for a single symbol.
32
32
 
33
- frequencies: The amount of occurances of each symbol.
33
+ frequencies: The amount of occurrences of each symbol.
34
34
  n: The amount of symbols.
35
35
  maxbits: Maximum bit length, inclusive.
36
36
  bitlengths: Output, the bitlengths for the symbol prefix codes.
@@ -18,37 +18,76 @@ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18
18
  */
19
19
 
20
20
  #include "lz77.h"
21
+ #include "symbols.h"
21
22
  #include "util.h"
22
23
 
23
24
  #include <assert.h>
24
25
  #include <stdio.h>
25
26
  #include <stdlib.h>
26
27
 
27
- void ZopfliInitLZ77Store(ZopfliLZ77Store* store) {
28
+ void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store) {
28
29
  store->size = 0;
29
30
  store->litlens = 0;
30
31
  store->dists = 0;
32
+ store->pos = 0;
33
+ store->data = data;
34
+ store->ll_symbol = 0;
35
+ store->d_symbol = 0;
36
+ store->ll_counts = 0;
37
+ store->d_counts = 0;
31
38
  }
32
39
 
33
40
  void ZopfliCleanLZ77Store(ZopfliLZ77Store* store) {
34
41
  free(store->litlens);
35
42
  free(store->dists);
43
+ free(store->pos);
44
+ free(store->ll_symbol);
45
+ free(store->d_symbol);
46
+ free(store->ll_counts);
47
+ free(store->d_counts);
48
+ }
49
+
50
+ static size_t CeilDiv(size_t a, size_t b) {
51
+ return (a + b - 1) / b;
36
52
  }
37
53
 
38
54
  void ZopfliCopyLZ77Store(
39
55
  const ZopfliLZ77Store* source, ZopfliLZ77Store* dest) {
40
56
  size_t i;
57
+ size_t llsize = ZOPFLI_NUM_LL * CeilDiv(source->size, ZOPFLI_NUM_LL);
58
+ size_t dsize = ZOPFLI_NUM_D * CeilDiv(source->size, ZOPFLI_NUM_D);
41
59
  ZopfliCleanLZ77Store(dest);
60
+ ZopfliInitLZ77Store(source->data, dest);
42
61
  dest->litlens =
43
62
  (unsigned short*)malloc(sizeof(*dest->litlens) * source->size);
44
63
  dest->dists = (unsigned short*)malloc(sizeof(*dest->dists) * source->size);
45
-
46
- if (!dest->litlens || !dest->dists) exit(-1); /* Allocation failed. */
64
+ dest->pos = (size_t*)malloc(sizeof(*dest->pos) * source->size);
65
+ dest->ll_symbol =
66
+ (unsigned short*)malloc(sizeof(*dest->ll_symbol) * source->size);
67
+ dest->d_symbol =
68
+ (unsigned short*)malloc(sizeof(*dest->d_symbol) * source->size);
69
+ dest->ll_counts = (size_t*)malloc(sizeof(*dest->ll_counts) * llsize);
70
+ dest->d_counts = (size_t*)malloc(sizeof(*dest->d_counts) * dsize);
71
+
72
+ /* Allocation failed. */
73
+ if (!dest->litlens || !dest->dists) exit(-1);
74
+ if (!dest->pos) exit(-1);
75
+ if (!dest->ll_symbol || !dest->d_symbol) exit(-1);
76
+ if (!dest->ll_counts || !dest->d_counts) exit(-1);
47
77
 
48
78
  dest->size = source->size;
49
79
  for (i = 0; i < source->size; i++) {
50
80
  dest->litlens[i] = source->litlens[i];
51
81
  dest->dists[i] = source->dists[i];
82
+ dest->pos[i] = source->pos[i];
83
+ dest->ll_symbol[i] = source->ll_symbol[i];
84
+ dest->d_symbol[i] = source->d_symbol[i];
85
+ }
86
+ for (i = 0; i < llsize; i++) {
87
+ dest->ll_counts[i] = source->ll_counts[i];
88
+ }
89
+ for (i = 0; i < dsize; i++) {
90
+ dest->d_counts[i] = source->d_counts[i];
52
91
  }
53
92
  }
54
93
 
@@ -57,10 +96,149 @@ Appends the length and distance to the LZ77 arrays of the ZopfliLZ77Store.
57
96
  context must be a ZopfliLZ77Store*.
58
97
  */
59
98
  void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
60
- ZopfliLZ77Store* store) {
61
- size_t size2 = store->size; /* Needed for using ZOPFLI_APPEND_DATA twice. */
99
+ size_t pos, ZopfliLZ77Store* store) {
100
+ size_t i;
101
+ /* Needed for using ZOPFLI_APPEND_DATA multiple times. */
102
+ size_t origsize = store->size;
103
+ size_t llstart = ZOPFLI_NUM_LL * (origsize / ZOPFLI_NUM_LL);
104
+ size_t dstart = ZOPFLI_NUM_D * (origsize / ZOPFLI_NUM_D);
105
+
106
+ /* Everytime the index wraps around, a new cumulative histogram is made: we're
107
+ keeping one histogram value per LZ77 symbol rather than a full histogram for
108
+ each to save memory. */
109
+ if (origsize % ZOPFLI_NUM_LL == 0) {
110
+ size_t llsize = origsize;
111
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
112
+ ZOPFLI_APPEND_DATA(
113
+ origsize == 0 ? 0 : store->ll_counts[origsize - ZOPFLI_NUM_LL + i],
114
+ &store->ll_counts, &llsize);
115
+ }
116
+ }
117
+ if (origsize % ZOPFLI_NUM_D == 0) {
118
+ size_t dsize = origsize;
119
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
120
+ ZOPFLI_APPEND_DATA(
121
+ origsize == 0 ? 0 : store->d_counts[origsize - ZOPFLI_NUM_D + i],
122
+ &store->d_counts, &dsize);
123
+ }
124
+ }
125
+
62
126
  ZOPFLI_APPEND_DATA(length, &store->litlens, &store->size);
63
- ZOPFLI_APPEND_DATA(dist, &store->dists, &size2);
127
+ store->size = origsize;
128
+ ZOPFLI_APPEND_DATA(dist, &store->dists, &store->size);
129
+ store->size = origsize;
130
+ ZOPFLI_APPEND_DATA(pos, &store->pos, &store->size);
131
+ assert(length < 259);
132
+
133
+ if (dist == 0) {
134
+ store->size = origsize;
135
+ ZOPFLI_APPEND_DATA(length, &store->ll_symbol, &store->size);
136
+ store->size = origsize;
137
+ ZOPFLI_APPEND_DATA(0, &store->d_symbol, &store->size);
138
+ store->ll_counts[llstart + length]++;
139
+ } else {
140
+ store->size = origsize;
141
+ ZOPFLI_APPEND_DATA(ZopfliGetLengthSymbol(length),
142
+ &store->ll_symbol, &store->size);
143
+ store->size = origsize;
144
+ ZOPFLI_APPEND_DATA(ZopfliGetDistSymbol(dist),
145
+ &store->d_symbol, &store->size);
146
+ store->ll_counts[llstart + ZopfliGetLengthSymbol(length)]++;
147
+ store->d_counts[dstart + ZopfliGetDistSymbol(dist)]++;
148
+ }
149
+ }
150
+
151
+ void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
152
+ ZopfliLZ77Store* target) {
153
+ size_t i;
154
+ for (i = 0; i < store->size; i++) {
155
+ ZopfliStoreLitLenDist(store->litlens[i], store->dists[i],
156
+ store->pos[i], target);
157
+ }
158
+ }
159
+
160
+ size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
161
+ size_t lstart, size_t lend) {
162
+ size_t l = lend - 1;
163
+ if (lstart == lend) return 0;
164
+ return lz77->pos[l] + ((lz77->dists[l] == 0) ?
165
+ 1 : lz77->litlens[l]) - lz77->pos[lstart];
166
+ }
167
+
168
+ static void ZopfliLZ77GetHistogramAt(const ZopfliLZ77Store* lz77, size_t lpos,
169
+ size_t* ll_counts, size_t* d_counts) {
170
+ /* The real histogram is created by using the histogram for this chunk, but
171
+ all superfluous values of this chunk subtracted. */
172
+ size_t llpos = ZOPFLI_NUM_LL * (lpos / ZOPFLI_NUM_LL);
173
+ size_t dpos = ZOPFLI_NUM_D * (lpos / ZOPFLI_NUM_D);
174
+ size_t i;
175
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
176
+ ll_counts[i] = lz77->ll_counts[llpos + i];
177
+ }
178
+ for (i = lpos + 1; i < llpos + ZOPFLI_NUM_LL && i < lz77->size; i++) {
179
+ ll_counts[lz77->ll_symbol[i]]--;
180
+ }
181
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
182
+ d_counts[i] = lz77->d_counts[dpos + i];
183
+ }
184
+ for (i = lpos + 1; i < dpos + ZOPFLI_NUM_D && i < lz77->size; i++) {
185
+ if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]--;
186
+ }
187
+ }
188
+
189
+ void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
190
+ size_t lstart, size_t lend,
191
+ size_t* ll_counts, size_t* d_counts) {
192
+ size_t i;
193
+ if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
194
+ memset(ll_counts, 0, sizeof(*ll_counts) * ZOPFLI_NUM_LL);
195
+ memset(d_counts, 0, sizeof(*d_counts) * ZOPFLI_NUM_D);
196
+ for (i = lstart; i < lend; i++) {
197
+ ll_counts[lz77->ll_symbol[i]]++;
198
+ if (lz77->dists[i] != 0) d_counts[lz77->d_symbol[i]]++;
199
+ }
200
+ } else {
201
+ /* Subtract the cumulative histograms at the end and the start to get the
202
+ histogram for this range. */
203
+ ZopfliLZ77GetHistogramAt(lz77, lend - 1, ll_counts, d_counts);
204
+ if (lstart > 0) {
205
+ size_t ll_counts2[ZOPFLI_NUM_LL];
206
+ size_t d_counts2[ZOPFLI_NUM_D];
207
+ ZopfliLZ77GetHistogramAt(lz77, lstart - 1, ll_counts2, d_counts2);
208
+
209
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
210
+ ll_counts[i] -= ll_counts2[i];
211
+ }
212
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
213
+ d_counts[i] -= d_counts2[i];
214
+ }
215
+ }
216
+ }
217
+ }
218
+
219
+ void ZopfliInitBlockState(const ZopfliOptions* options,
220
+ size_t blockstart, size_t blockend, int add_lmc,
221
+ ZopfliBlockState* s) {
222
+ s->options = options;
223
+ s->blockstart = blockstart;
224
+ s->blockend = blockend;
225
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
226
+ if (add_lmc) {
227
+ s->lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
228
+ ZopfliInitCache(blockend - blockstart, s->lmc);
229
+ } else {
230
+ s->lmc = 0;
231
+ }
232
+ #endif
233
+ }
234
+
235
+ void ZopfliCleanBlockState(ZopfliBlockState* s) {
236
+ #ifdef ZOPFLI_LONGEST_MATCH_CACHE
237
+ if (s->lmc) {
238
+ ZopfliCleanCache(s->lmc);
239
+ free(s->lmc);
240
+ }
241
+ #endif
64
242
  }
65
243
 
66
244
  /*
@@ -365,7 +543,7 @@ void ZopfliFindLongestMatch(ZopfliBlockState* s, const ZopfliHash* h,
365
543
 
366
544
  void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
367
545
  size_t instart, size_t inend,
368
- ZopfliLZ77Store* store) {
546
+ ZopfliLZ77Store* store, ZopfliHash* h) {
369
547
  size_t i = 0, j;
370
548
  unsigned short leng;
371
549
  unsigned short dist;
@@ -374,9 +552,6 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
374
552
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
375
553
  unsigned short dummysublen[259];
376
554
 
377
- ZopfliHash hash;
378
- ZopfliHash* h = &hash;
379
-
380
555
  #ifdef ZOPFLI_LAZY_MATCHING
381
556
  /* Lazy matching. */
382
557
  unsigned prev_length = 0;
@@ -387,7 +562,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
387
562
 
388
563
  if (instart == inend) return;
389
564
 
390
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
565
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
391
566
  ZopfliWarmupHash(in, windowstart, inend, h);
392
567
  for (i = windowstart; i < instart; i++) {
393
568
  ZopfliUpdateHash(in, i, inend, h);
@@ -406,7 +581,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
406
581
  if (match_available) {
407
582
  match_available = 0;
408
583
  if (lengthscore > prevlengthscore + 1) {
409
- ZopfliStoreLitLenDist(in[i - 1], 0, store);
584
+ ZopfliStoreLitLenDist(in[i - 1], 0, i - 1, store);
410
585
  if (lengthscore >= ZOPFLI_MIN_MATCH && leng < ZOPFLI_MAX_MATCH) {
411
586
  match_available = 1;
412
587
  prev_length = leng;
@@ -420,7 +595,7 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
420
595
  lengthscore = prevlengthscore;
421
596
  /* Add to output. */
422
597
  ZopfliVerifyLenDist(in, inend, i - 1, dist, leng);
423
- ZopfliStoreLitLenDist(leng, dist, store);
598
+ ZopfliStoreLitLenDist(leng, dist, i - 1, store);
424
599
  for (j = 2; j < leng; j++) {
425
600
  assert(i < inend);
426
601
  i++;
@@ -441,10 +616,10 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
441
616
  /* Add to output. */
442
617
  if (lengthscore >= ZOPFLI_MIN_MATCH) {
443
618
  ZopfliVerifyLenDist(in, inend, i, dist, leng);
444
- ZopfliStoreLitLenDist(leng, dist, store);
619
+ ZopfliStoreLitLenDist(leng, dist, i, store);
445
620
  } else {
446
621
  leng = 1;
447
- ZopfliStoreLitLenDist(in[i], 0, store);
622
+ ZopfliStoreLitLenDist(in[i], 0, i, store);
448
623
  }
449
624
  for (j = 1; j < leng; j++) {
450
625
  assert(i < inend);
@@ -452,31 +627,4 @@ void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
452
627
  ZopfliUpdateHash(in, i, inend, h);
453
628
  }
454
629
  }
455
-
456
- ZopfliCleanHash(h);
457
- }
458
-
459
- void ZopfliLZ77Counts(const unsigned short* litlens,
460
- const unsigned short* dists,
461
- size_t start, size_t end,
462
- size_t* ll_count, size_t* d_count) {
463
- size_t i;
464
-
465
- for (i = 0; i < 288; i++) {
466
- ll_count[i] = 0;
467
- }
468
- for (i = 0; i < 32; i++) {
469
- d_count[i] = 0;
470
- }
471
-
472
- for (i = start; i < end; i++) {
473
- if (dists[i] == 0) {
474
- ll_count[litlens[i]]++;
475
- } else {
476
- ll_count[ZopfliGetLengthSymbol(litlens[i])]++;
477
- d_count[ZopfliGetDistSymbol(dists[i])]++;
478
- }
479
- }
480
-
481
- ll_count[256] = 1; /* End symbol. */
482
630
  }
@@ -33,23 +33,50 @@ compression.
33
33
 
34
34
  /*
35
35
  Stores lit/length and dist pairs for LZ77.
36
- litlens: Contains the literal symbols or length values.
37
- dists: Indicates the distance, or 0 to indicate that there is no distance and
38
- litlens contains a literal instead of a length.
39
- litlens and dists both have the same size.
36
+ Parameter litlens: Contains the literal symbols or length values.
37
+ Parameter dists: Contains the distances. A value is 0 to indicate that there is
38
+ no dist and the corresponding litlens value is a literal instead of a length.
39
+ Parameter size: The size of both the litlens and dists arrays.
40
+ The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
41
+ ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
42
+
40
43
  */
41
44
  typedef struct ZopfliLZ77Store {
42
45
  unsigned short* litlens; /* Lit or len. */
43
46
  unsigned short* dists; /* If 0: indicates literal in corresponding litlens,
44
47
  if > 0: length in corresponding litlens, this is the distance. */
45
48
  size_t size;
49
+
50
+ const unsigned char* data; /* original data */
51
+ size_t* pos; /* position in data where this LZ77 command begins */
52
+
53
+ unsigned short* ll_symbol;
54
+ unsigned short* d_symbol;
55
+
56
+ /* Cumulative histograms wrapping around per chunk. Each chunk has the amount
57
+ of distinct symbols as length, so using 1 value per LZ77 symbol, we have a
58
+ precise histogram at every N symbols, and the rest can be calculated by
59
+ looping through the actual symbols of this chunk. */
60
+ size_t* ll_counts;
61
+ size_t* d_counts;
46
62
  } ZopfliLZ77Store;
47
63
 
48
- void ZopfliInitLZ77Store(ZopfliLZ77Store* store);
64
+ void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store);
49
65
  void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
50
66
  void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
51
67
  void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
52
- ZopfliLZ77Store* store);
68
+ size_t pos, ZopfliLZ77Store* store);
69
+ void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
70
+ ZopfliLZ77Store* target);
71
+ /* Gets the amount of raw bytes that this range of LZ77 symbols spans. */
72
+ size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
73
+ size_t lstart, size_t lend);
74
+ /* Gets the histogram of lit/len and dist symbols in the given range, using the
75
+ cumulative histograms, so faster than adding one by one for large range. Does
76
+ not add the one end symbol of value 256. */
77
+ void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
78
+ size_t lstart, size_t lend,
79
+ size_t* ll_counts, size_t* d_counts);
53
80
 
54
81
  /*
55
82
  Some state information for compressing a block.
@@ -69,6 +96,11 @@ typedef struct ZopfliBlockState {
69
96
  size_t blockend;
70
97
  } ZopfliBlockState;
71
98
 
99
+ void ZopfliInitBlockState(const ZopfliOptions* options,
100
+ size_t blockstart, size_t blockend, int add_lmc,
101
+ ZopfliBlockState* s);
102
+ void ZopfliCleanBlockState(ZopfliBlockState* s);
103
+
72
104
  /*
73
105
  Finds the longest match (length and corresponding distance) for LZ77
74
106
  compression.
@@ -96,22 +128,6 @@ Verifies if length and dist are indeed valid, only used for assertion.
96
128
  void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
97
129
  unsigned short dist, unsigned short length);
98
130
 
99
- /*
100
- Counts the number of literal, length and distance symbols in the given lz77
101
- arrays.
102
- litlens: lz77 lit/lengths
103
- dists: ll77 distances
104
- start: where to begin counting in litlens and dists
105
- end: where to stop counting in litlens and dists (not inclusive)
106
- ll_count: count of each lit/len symbol, must have size 288 (see deflate
107
- standard)
108
- d_count: count of each dist symbol, must have size 32 (see deflate standard)
109
- */
110
- void ZopfliLZ77Counts(const unsigned short* litlens,
111
- const unsigned short* dists,
112
- size_t start, size_t end,
113
- size_t* ll_count, size_t* d_count);
114
-
115
131
  /*
116
132
  Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
117
133
  with the slow but better "squeeze" implementation.
@@ -121,6 +137,6 @@ dictionary.
121
137
  */
122
138
  void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
123
139
  size_t instart, size_t inend,
124
- ZopfliLZ77Store* store);
140
+ ZopfliLZ77Store* store, ZopfliHash* h);
125
141
 
126
142
  #endif /* ZOPFLI_LZ77_H_ */
@@ -25,35 +25,40 @@ Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
25
25
 
26
26
  #include "blocksplitter.h"
27
27
  #include "deflate.h"
28
+ #include "symbols.h"
28
29
  #include "tree.h"
29
30
  #include "util.h"
30
31
 
31
32
  typedef struct SymbolStats {
32
33
  /* The literal and length symbols. */
33
- size_t litlens[288];
34
+ size_t litlens[ZOPFLI_NUM_LL];
34
35
  /* The 32 unique dist symbols, not the 32768 possible dists. */
35
- size_t dists[32];
36
+ size_t dists[ZOPFLI_NUM_D];
36
37
 
37
- double ll_symbols[288]; /* Length of each lit/len symbol in bits. */
38
- double d_symbols[32]; /* Length of each dist symbol in bits. */
38
+ /* Length of each lit/len symbol in bits. */
39
+ double ll_symbols[ZOPFLI_NUM_LL];
40
+ /* Length of each dist symbol in bits. */
41
+ double d_symbols[ZOPFLI_NUM_D];
39
42
  } SymbolStats;
40
43
 
41
44
  /* Sets everything to 0. */
42
45
  static void InitStats(SymbolStats* stats) {
43
- memset(stats->litlens, 0, 288 * sizeof(stats->litlens[0]));
44
- memset(stats->dists, 0, 32 * sizeof(stats->dists[0]));
46
+ memset(stats->litlens, 0, ZOPFLI_NUM_LL * sizeof(stats->litlens[0]));
47
+ memset(stats->dists, 0, ZOPFLI_NUM_D * sizeof(stats->dists[0]));
45
48
 
46
- memset(stats->ll_symbols, 0, 288 * sizeof(stats->ll_symbols[0]));
47
- memset(stats->d_symbols, 0, 32 * sizeof(stats->d_symbols[0]));
49
+ memset(stats->ll_symbols, 0, ZOPFLI_NUM_LL * sizeof(stats->ll_symbols[0]));
50
+ memset(stats->d_symbols, 0, ZOPFLI_NUM_D * sizeof(stats->d_symbols[0]));
48
51
  }
49
52
 
50
53
  static void CopyStats(SymbolStats* source, SymbolStats* dest) {
51
- memcpy(dest->litlens, source->litlens, 288 * sizeof(dest->litlens[0]));
52
- memcpy(dest->dists, source->dists, 32 * sizeof(dest->dists[0]));
54
+ memcpy(dest->litlens, source->litlens,
55
+ ZOPFLI_NUM_LL * sizeof(dest->litlens[0]));
56
+ memcpy(dest->dists, source->dists, ZOPFLI_NUM_D * sizeof(dest->dists[0]));
53
57
 
54
58
  memcpy(dest->ll_symbols, source->ll_symbols,
55
- 288 * sizeof(dest->ll_symbols[0]));
56
- memcpy(dest->d_symbols, source->d_symbols, 32 * sizeof(dest->d_symbols[0]));
59
+ ZOPFLI_NUM_LL * sizeof(dest->ll_symbols[0]));
60
+ memcpy(dest->d_symbols, source->d_symbols,
61
+ ZOPFLI_NUM_D * sizeof(dest->d_symbols[0]));
57
62
  }
58
63
 
59
64
  /* Adds the bit lengths. */
@@ -61,11 +66,11 @@ static void AddWeighedStatFreqs(const SymbolStats* stats1, double w1,
61
66
  const SymbolStats* stats2, double w2,
62
67
  SymbolStats* result) {
63
68
  size_t i;
64
- for (i = 0; i < 288; i++) {
69
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) {
65
70
  result->litlens[i] =
66
71
  (size_t) (stats1->litlens[i] * w1 + stats2->litlens[i] * w2);
67
72
  }
68
- for (i = 0; i < 32; i++) {
73
+ for (i = 0; i < ZOPFLI_NUM_D; i++) {
69
74
  result->dists[i] =
70
75
  (size_t) (stats1->dists[i] * w1 + stats2->dists[i] * w2);
71
76
  }
@@ -96,15 +101,15 @@ static void RandomizeFreqs(RanState* state, size_t* freqs, int n) {
96
101
  }
97
102
 
98
103
  static void RandomizeStatFreqs(RanState* state, SymbolStats* stats) {
99
- RandomizeFreqs(state, stats->litlens, 288);
100
- RandomizeFreqs(state, stats->dists, 32);
104
+ RandomizeFreqs(state, stats->litlens, ZOPFLI_NUM_LL);
105
+ RandomizeFreqs(state, stats->dists, ZOPFLI_NUM_D);
101
106
  stats->litlens[256] = 1; /* End symbol. */
102
107
  }
103
108
 
104
109
  static void ClearStatFreqs(SymbolStats* stats) {
105
110
  size_t i;
106
- for (i = 0; i < 288; i++) stats->litlens[i] = 0;
107
- for (i = 0; i < 32; i++) stats->dists[i] = 0;
111
+ for (i = 0; i < ZOPFLI_NUM_LL; i++) stats->litlens[i] = 0;
112
+ for (i = 0; i < ZOPFLI_NUM_D; i++) stats->dists[i] = 0;
108
113
  }
109
114
 
110
115
  /*
@@ -126,7 +131,7 @@ static double GetCostFixed(unsigned litlen, unsigned dist, void* unused) {
126
131
  int dbits = ZopfliGetDistExtraBits(dist);
127
132
  int lbits = ZopfliGetLengthExtraBits(litlen);
128
133
  int lsym = ZopfliGetLengthSymbol(litlen);
129
- double cost = 0;
134
+ int cost = 0;
130
135
  if (lsym <= 279) cost += 7;
131
136
  else cost += 8;
132
137
  cost += 5; /* Every dist symbol has length 5. */
@@ -147,7 +152,7 @@ static double GetCostStat(unsigned litlen, unsigned dist, void* context) {
147
152
  int lbits = ZopfliGetLengthExtraBits(litlen);
148
153
  int dsym = ZopfliGetDistSymbol(dist);
149
154
  int dbits = ZopfliGetDistExtraBits(dist);
150
- return stats->ll_symbols[lsym] + lbits + stats->d_symbols[dsym] + dbits;
155
+ return lbits + dbits + stats->ll_symbols[lsym] + stats->d_symbols[dsym];
151
156
  }
152
157
  }
153
158
 
@@ -192,6 +197,10 @@ static double GetCostModelMinCost(CostModelFun* costmodel, void* costcontext) {
192
197
  return costmodel(bestlength, bestdist, costcontext);
193
198
  }
194
199
 
200
+ static size_t zopfli_min(size_t a, size_t b) {
201
+ return a < b ? a : b;
202
+ }
203
+
195
204
  /*
196
205
  Performs the forward pass for "squeeze". Gets the most optimal length to reach
197
206
  every byte from a previous byte, using cost calculations.
@@ -209,27 +218,23 @@ static double GetBestLengths(ZopfliBlockState *s,
209
218
  const unsigned char* in,
210
219
  size_t instart, size_t inend,
211
220
  CostModelFun* costmodel, void* costcontext,
212
- unsigned short* length_array) {
221
+ unsigned short* length_array,
222
+ ZopfliHash* h, float* costs) {
213
223
  /* Best cost to get here so far. */
214
224
  size_t blocksize = inend - instart;
215
- float* costs;
216
- size_t i = 0, k;
225
+ size_t i = 0, k, kend;
217
226
  unsigned short leng;
218
227
  unsigned short dist;
219
228
  unsigned short sublen[259];
220
229
  size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
221
230
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
222
- ZopfliHash hash;
223
- ZopfliHash* h = &hash;
224
231
  double result;
225
232
  double mincost = GetCostModelMinCost(costmodel, costcontext);
233
+ double mincostaddcostj;
226
234
 
227
235
  if (instart == inend) return 0;
228
236
 
229
- costs = (float*)malloc(sizeof(float) * (blocksize + 1));
230
- if (!costs) exit(-1); /* Allocation failed. */
231
-
232
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
237
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
233
238
  ZopfliWarmupHash(in, windowstart, inend, h);
234
239
  for (i = windowstart; i < instart; i++) {
235
240
  ZopfliUpdateHash(in, i, inend, h);
@@ -270,7 +275,7 @@ static double GetBestLengths(ZopfliBlockState *s,
270
275
 
271
276
  /* Literal. */
272
277
  if (i + 1 <= inend) {
273
- double newCost = costs[j] + costmodel(in[i], 0, costcontext);
278
+ double newCost = costmodel(in[i], 0, costcontext) + costs[j];
274
279
  assert(newCost >= 0);
275
280
  if (newCost < costs[j + 1]) {
276
281
  costs[j + 1] = newCost;
@@ -278,14 +283,16 @@ static double GetBestLengths(ZopfliBlockState *s,
278
283
  }
279
284
  }
280
285
  /* Lengths. */
281
- for (k = 3; k <= leng && i + k <= inend; k++) {
286
+ kend = zopfli_min(leng, inend-i);
287
+ mincostaddcostj = mincost + costs[j];
288
+ for (k = 3; k <= kend; k++) {
282
289
  double newCost;
283
290
 
284
291
  /* Calling the cost model is expensive, avoid this if we are already at
285
292
  the minimum possible cost that it can return. */
286
- if (costs[j + k] - costs[j] <= mincost) continue;
293
+ if (costs[j + k] <= mincostaddcostj) continue;
287
294
 
288
- newCost = costs[j] + costmodel(k, sublen[k], costcontext);
295
+ newCost = costmodel(k, sublen[k], costcontext) + costs[j];
289
296
  assert(newCost >= 0);
290
297
  if (newCost < costs[j + k]) {
291
298
  assert(k <= ZOPFLI_MAX_MATCH);
@@ -298,9 +305,6 @@ static double GetBestLengths(ZopfliBlockState *s,
298
305
  assert(costs[blocksize] >= 0);
299
306
  result = costs[blocksize];
300
307
 
301
- ZopfliCleanHash(h);
302
- free(costs);
303
-
304
308
  return result;
305
309
  }
306
310
 
@@ -334,19 +338,16 @@ static void TraceBackwards(size_t size, const unsigned short* length_array,
334
338
  static void FollowPath(ZopfliBlockState* s,
335
339
  const unsigned char* in, size_t instart, size_t inend,
336
340
  unsigned short* path, size_t pathsize,
337
- ZopfliLZ77Store* store) {
341
+ ZopfliLZ77Store* store, ZopfliHash *h) {
338
342
  size_t i, j, pos = 0;
339
343
  size_t windowstart = instart > ZOPFLI_WINDOW_SIZE
340
344
  ? instart - ZOPFLI_WINDOW_SIZE : 0;
341
345
 
342
346
  size_t total_length_test = 0;
343
347
 
344
- ZopfliHash hash;
345
- ZopfliHash* h = &hash;
346
-
347
348
  if (instart == inend) return;
348
349
 
349
- ZopfliInitHash(ZOPFLI_WINDOW_SIZE, h);
350
+ ZopfliResetHash(ZOPFLI_WINDOW_SIZE, h);
350
351
  ZopfliWarmupHash(in, windowstart, inend, h);
351
352
  for (i = windowstart; i < instart; i++) {
352
353
  ZopfliUpdateHash(in, i, inend, h);
@@ -369,11 +370,11 @@ static void FollowPath(ZopfliBlockState* s,
369
370
  &dist, &dummy_length);
370
371
  assert(!(dummy_length != length && length > 2 && dummy_length > 2));
371
372
  ZopfliVerifyLenDist(in, inend, pos, dist, length);
372
- ZopfliStoreLitLenDist(length, dist, store);
373
+ ZopfliStoreLitLenDist(length, dist, pos, store);
373
374
  total_length_test += length;
374
375
  } else {
375
376
  length = 1;
376
- ZopfliStoreLitLenDist(in[pos], 0, store);
377
+ ZopfliStoreLitLenDist(in[pos], 0, pos, store);
377
378
  total_length_test++;
378
379
  }
379
380
 
@@ -385,14 +386,12 @@ static void FollowPath(ZopfliBlockState* s,
385
386
 
386
387
  pos += length;
387
388
  }
388
-
389
- ZopfliCleanHash(h);
390
389
  }
391
390
 
392
391
  /* Calculates the entropy of the statistics */
393
392
  static void CalculateStatistics(SymbolStats* stats) {
394
- ZopfliCalculateEntropy(stats->litlens, 288, stats->ll_symbols);
395
- ZopfliCalculateEntropy(stats->dists, 32, stats->d_symbols);
393
+ ZopfliCalculateEntropy(stats->litlens, ZOPFLI_NUM_LL, stats->ll_symbols);
394
+ ZopfliCalculateEntropy(stats->dists, ZOPFLI_NUM_D, stats->d_symbols);
396
395
  }
397
396
 
398
397
  /* Appends the symbol statistics from the store. */
@@ -414,14 +413,13 @@ static void GetStatistics(const ZopfliLZ77Store* store, SymbolStats* stats) {
414
413
  /*
415
414
  Does a single run for ZopfliLZ77Optimal. For good compression, repeated runs
416
415
  with updated statistics should be performed.
417
-
418
416
  s: the block state
419
417
  in: the input data array
420
418
  instart: where to start
421
419
  inend: where to stop (not inclusive)
422
420
  path: pointer to dynamically allocated memory to store the path
423
421
  pathsize: pointer to the size of the dynamic path array
424
- length_array: array if size (inend - instart) used to store lengths
422
+ length_array: array of size (inend - instart) used to store lengths
425
423
  costmodel: function to use as the cost model for this squeeze run
426
424
  costcontext: abstract context for the costmodel function
427
425
  store: place to output the LZ77 data
@@ -432,20 +430,22 @@ static double LZ77OptimalRun(ZopfliBlockState* s,
432
430
  const unsigned char* in, size_t instart, size_t inend,
433
431
  unsigned short** path, size_t* pathsize,
434
432
  unsigned short* length_array, CostModelFun* costmodel,
435
- void* costcontext, ZopfliLZ77Store* store) {
436
- double cost = GetBestLengths(
437
- s, in, instart, inend, costmodel, costcontext, length_array);
433
+ void* costcontext, ZopfliLZ77Store* store,
434
+ ZopfliHash* h, float* costs) {
435
+ double cost = GetBestLengths(s, in, instart, inend, costmodel,
436
+ costcontext, length_array, h, costs);
438
437
  free(*path);
439
438
  *path = 0;
440
439
  *pathsize = 0;
441
440
  TraceBackwards(inend - instart, length_array, path, pathsize);
442
- FollowPath(s, in, instart, inend, *path, *pathsize, store);
441
+ FollowPath(s, in, instart, inend, *path, *pathsize, store, h);
443
442
  assert(cost < ZOPFLI_LARGE_FLOAT);
444
443
  return cost;
445
444
  }
446
445
 
447
446
  void ZopfliLZ77Optimal(ZopfliBlockState *s,
448
447
  const unsigned char* in, size_t instart, size_t inend,
448
+ int numiterations,
449
449
  ZopfliLZ77Store* store) {
450
450
  /* Dist to get to here with smallest cost. */
451
451
  size_t blocksize = inend - instart;
@@ -454,8 +454,11 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
454
454
  unsigned short* path = 0;
455
455
  size_t pathsize = 0;
456
456
  ZopfliLZ77Store currentstore;
457
+ ZopfliHash hash;
458
+ ZopfliHash* h = &hash;
457
459
  SymbolStats stats, beststats, laststats;
458
460
  int i;
461
+ float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
459
462
  double cost;
460
463
  double bestcost = ZOPFLI_LARGE_FLOAT;
461
464
  double lastcost = 0;
@@ -463,29 +466,30 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
463
466
  RanState ran_state;
464
467
  int lastrandomstep = -1;
465
468
 
469
+ if (!costs) exit(-1); /* Allocation failed. */
466
470
  if (!length_array) exit(-1); /* Allocation failed. */
467
471
 
468
472
  InitRanState(&ran_state);
469
473
  InitStats(&stats);
470
- ZopfliInitLZ77Store(&currentstore);
474
+ ZopfliInitLZ77Store(in, &currentstore);
475
+ ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
471
476
 
472
477
  /* Do regular deflate, then loop multiple shortest path runs, each time using
473
478
  the statistics of the previous run. */
474
479
 
475
480
  /* Initial run. */
476
- ZopfliLZ77Greedy(s, in, instart, inend, &currentstore);
481
+ ZopfliLZ77Greedy(s, in, instart, inend, &currentstore, h);
477
482
  GetStatistics(&currentstore, &stats);
478
483
 
479
484
  /* Repeat statistics with each time the cost model from the previous stat
480
485
  run. */
481
- for (i = 0; i < s->options->numiterations; i++) {
486
+ for (i = 0; i < numiterations; i++) {
482
487
  ZopfliCleanLZ77Store(&currentstore);
483
- ZopfliInitLZ77Store(&currentstore);
488
+ ZopfliInitLZ77Store(in, &currentstore);
484
489
  LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
485
490
  length_array, GetCostStat, (void*)&stats,
486
- &currentstore);
487
- cost = ZopfliCalculateBlockSize(currentstore.litlens, currentstore.dists,
488
- 0, currentstore.size, 2);
491
+ &currentstore, h, costs);
492
+ cost = ZopfliCalculateBlockSize(&currentstore, 0, currentstore.size, 2);
489
493
  if (s->options->verbose_more || (s->options->verbose && cost < bestcost)) {
490
494
  fprintf(stderr, "Iteration %d: %d bit\n", i, (int) cost);
491
495
  }
@@ -516,7 +520,9 @@ void ZopfliLZ77Optimal(ZopfliBlockState *s,
516
520
 
517
521
  free(length_array);
518
522
  free(path);
523
+ free(costs);
519
524
  ZopfliCleanLZ77Store(&currentstore);
525
+ ZopfliCleanHash(h);
520
526
  }
521
527
 
522
528
  void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
@@ -530,17 +536,25 @@ void ZopfliLZ77OptimalFixed(ZopfliBlockState *s,
530
536
  (unsigned short*)malloc(sizeof(unsigned short) * (blocksize + 1));
531
537
  unsigned short* path = 0;
532
538
  size_t pathsize = 0;
539
+ ZopfliHash hash;
540
+ ZopfliHash* h = &hash;
541
+ float* costs = (float*)malloc(sizeof(float) * (blocksize + 1));
533
542
 
543
+ if (!costs) exit(-1); /* Allocation failed. */
534
544
  if (!length_array) exit(-1); /* Allocation failed. */
535
545
 
546
+ ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
547
+
536
548
  s->blockstart = instart;
537
549
  s->blockend = inend;
538
550
 
539
551
  /* Shortest path for fixed tree This one should give the shortest possible
540
552
  result for fixed tree, no repeated runs are needed since the tree is known. */
541
553
  LZ77OptimalRun(s, in, instart, inend, &path, &pathsize,
542
- length_array, GetCostFixed, 0, store);
554
+ length_array, GetCostFixed, 0, store, h, costs);
543
555
 
544
556
  free(length_array);
545
557
  free(path);
558
+ free(costs);
559
+ ZopfliCleanHash(h);
546
560
  }