extlzham 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +27 -0
  3. data/README.md +21 -0
  4. data/Rakefile +143 -0
  5. data/contrib/lzham/LICENSE +22 -0
  6. data/contrib/lzham/README.md +209 -0
  7. data/contrib/lzham/include/lzham.h +781 -0
  8. data/contrib/lzham/lzhamcomp/lzham_comp.h +38 -0
  9. data/contrib/lzham/lzhamcomp/lzham_lzbase.cpp +244 -0
  10. data/contrib/lzham/lzhamcomp/lzham_lzbase.h +45 -0
  11. data/contrib/lzham/lzhamcomp/lzham_lzcomp.cpp +608 -0
  12. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.cpp +1966 -0
  13. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.h +472 -0
  14. data/contrib/lzham/lzhamcomp/lzham_lzcomp_state.cpp +1413 -0
  15. data/contrib/lzham/lzhamcomp/lzham_match_accel.cpp +562 -0
  16. data/contrib/lzham/lzhamcomp/lzham_match_accel.h +146 -0
  17. data/contrib/lzham/lzhamcomp/lzham_null_threading.h +97 -0
  18. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.cpp +229 -0
  19. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.h +520 -0
  20. data/contrib/lzham/lzhamcomp/lzham_threading.h +12 -0
  21. data/contrib/lzham/lzhamcomp/lzham_win32_threading.cpp +220 -0
  22. data/contrib/lzham/lzhamcomp/lzham_win32_threading.h +368 -0
  23. data/contrib/lzham/lzhamdecomp/lzham_assert.cpp +66 -0
  24. data/contrib/lzham/lzhamdecomp/lzham_assert.h +40 -0
  25. data/contrib/lzham/lzhamdecomp/lzham_checksum.cpp +73 -0
  26. data/contrib/lzham/lzhamdecomp/lzham_checksum.h +13 -0
  27. data/contrib/lzham/lzhamdecomp/lzham_config.h +23 -0
  28. data/contrib/lzham/lzhamdecomp/lzham_core.h +264 -0
  29. data/contrib/lzham/lzhamdecomp/lzham_decomp.h +37 -0
  30. data/contrib/lzham/lzhamdecomp/lzham_helpers.h +54 -0
  31. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.cpp +262 -0
  32. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.h +14 -0
  33. data/contrib/lzham/lzhamdecomp/lzham_lzdecomp.cpp +1527 -0
  34. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.cpp +131 -0
  35. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.h +89 -0
  36. data/contrib/lzham/lzhamdecomp/lzham_math.h +142 -0
  37. data/contrib/lzham/lzhamdecomp/lzham_mem.cpp +284 -0
  38. data/contrib/lzham/lzhamdecomp/lzham_mem.h +112 -0
  39. data/contrib/lzham/lzhamdecomp/lzham_platform.cpp +157 -0
  40. data/contrib/lzham/lzhamdecomp/lzham_platform.h +284 -0
  41. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.cpp +351 -0
  42. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.h +146 -0
  43. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.cpp +1484 -0
  44. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.h +556 -0
  45. data/contrib/lzham/lzhamdecomp/lzham_timer.cpp +147 -0
  46. data/contrib/lzham/lzhamdecomp/lzham_timer.h +99 -0
  47. data/contrib/lzham/lzhamdecomp/lzham_traits.h +141 -0
  48. data/contrib/lzham/lzhamdecomp/lzham_types.h +97 -0
  49. data/contrib/lzham/lzhamdecomp/lzham_utils.h +58 -0
  50. data/contrib/lzham/lzhamdecomp/lzham_vector.cpp +75 -0
  51. data/contrib/lzham/lzhamdecomp/lzham_vector.h +588 -0
  52. data/contrib/lzham/lzhamlib/lzham_lib.cpp +179 -0
  53. data/examples/basic.rb +48 -0
  54. data/ext/extconf.rb +26 -0
  55. data/ext/extlzham.c +741 -0
  56. data/gemstub.rb +22 -0
  57. data/lib/extlzham/version.rb +5 -0
  58. data/lib/extlzham.rb +153 -0
  59. metadata +135 -0
@@ -0,0 +1,1966 @@
1
+ // File: lzham_lzcomp_internal.cpp
2
+ // See Copyright Notice and license at the end of include/lzham.h
3
+ #include "lzham_core.h"
4
+ #include "lzham_lzcomp_internal.h"
5
+ #include "lzham_checksum.h"
6
+ #include "lzham_timer.h"
7
+ #include "lzham_lzbase.h"
8
+ #include <string.h>
9
+
10
+ // Update and print high-level coding statistics if set to 1.
11
+ // TODO: Add match distance coding statistics.
12
+ #define LZHAM_UPDATE_STATS 0
13
+
14
+ // Only parse on the main thread, for easier debugging.
15
+ #define LZHAM_FORCE_SINGLE_THREADED_PARSING 0
16
+
17
+ // Verify all computed match costs against the generic/slow state::get_cost() method.
18
+ #define LZHAM_VERIFY_MATCH_COSTS 0
19
+
20
+ // Set to 1 to force all blocks to be uncompressed (raw).
21
+ #define LZHAM_FORCE_ALL_RAW_BLOCKS 0
22
+
23
+ namespace lzham
24
+ {
25
+ static comp_settings s_level_settings[cCompressionLevelCount] =
26
+ {
27
+ // cCompressionLevelFastest
28
+ {
29
+ 8, // m_fast_bytes
30
+ true, // m_fast_adaptive_huffman_updating
31
+ 1, // m_match_accel_max_matches_per_probe
32
+ 2, // m_match_accel_max_probes
33
+ },
34
+ // cCompressionLevelFaster
35
+ {
36
+ 24, // m_fast_bytes
37
+ true, // m_fast_adaptive_huffman_updating
38
+ 6, // m_match_accel_max_matches_per_probe
39
+ 12, // m_match_accel_max_probes
40
+ },
41
+ // cCompressionLevelDefault
42
+ {
43
+ 32, // m_fast_bytes
44
+ false, // m_fast_adaptive_huffman_updating
45
+ UINT_MAX, // m_match_accel_max_matches_per_probe
46
+ 16, // m_match_accel_max_probes
47
+ },
48
+ // cCompressionLevelBetter
49
+ {
50
+ 48, // m_fast_bytes
51
+ false, // m_fast_adaptive_huffman_updating
52
+ UINT_MAX, // m_match_accel_max_matches_per_probe
53
+ 32, // m_match_accel_max_probes
54
+ },
55
+ // cCompressionLevelUber
56
+ {
57
+ 64, // m_fast_bytes
58
+ false, // m_fast_adaptive_huffman_updating
59
+ UINT_MAX, // m_match_accel_max_matches_per_probe
60
+ cMatchAccelMaxSupportedProbes, // m_match_accel_max_probes
61
+ }
62
+ };
63
+
64
+ lzcompressor::lzcompressor() :
65
+ m_src_size(-1),
66
+ m_src_adler32(0),
67
+ m_step(0),
68
+ m_block_start_dict_ofs(0),
69
+ m_block_index(0),
70
+ m_finished(false),
71
+ m_num_parse_threads(0),
72
+ m_parse_jobs_remaining(0),
73
+ m_parse_jobs_complete(0, 1),
74
+ m_block_history_size(0),
75
+ m_block_history_next(0)
76
+ {
77
+ LZHAM_VERIFY( ((uint32_ptr)this & (LZHAM_GET_ALIGNMENT(lzcompressor) - 1)) == 0);
78
+ }
79
+
80
+ bool lzcompressor::init_seed_bytes()
81
+ {
82
+ uint cur_seed_ofs = 0;
83
+
84
+ while (cur_seed_ofs < m_params.m_num_seed_bytes)
85
+ {
86
+ uint total_bytes_remaining = m_params.m_num_seed_bytes - cur_seed_ofs;
87
+ uint num_bytes_to_add = math::minimum(total_bytes_remaining, m_params.m_block_size);
88
+
89
+ if (!m_accel.add_bytes_begin(num_bytes_to_add, static_cast<const uint8*>(m_params.m_pSeed_bytes) + cur_seed_ofs))
90
+ return false;
91
+ m_accel.add_bytes_end();
92
+
93
+ m_accel.advance_bytes(num_bytes_to_add);
94
+
95
+ cur_seed_ofs += num_bytes_to_add;
96
+ }
97
+
98
+ return true;
99
+ }
100
+
101
+ bool lzcompressor::init(const init_params& params)
102
+ {
103
+ clear();
104
+
105
+ if ((params.m_dict_size_log2 < CLZBase::cMinDictSizeLog2) || (params.m_dict_size_log2 > CLZBase::cMaxDictSizeLog2))
106
+ return false;
107
+ if ((params.m_compression_level < 0) || (params.m_compression_level > cCompressionLevelCount))
108
+ return false;
109
+
110
+ m_params = params;
111
+ m_use_task_pool = (m_params.m_pTask_pool) && (m_params.m_pTask_pool->get_num_threads() != 0) && (m_params.m_max_helper_threads > 0);
112
+
113
+ if (!m_use_task_pool)
114
+ m_params.m_max_helper_threads = 0;
115
+
116
+ m_settings = s_level_settings[params.m_compression_level];
117
+
118
+ const uint dict_size = 1U << m_params.m_dict_size_log2;
119
+
120
+ if (params.m_num_seed_bytes)
121
+ {
122
+ if (!params.m_pSeed_bytes)
123
+ return false;
124
+ if (params.m_num_seed_bytes > dict_size)
125
+ return false;
126
+ }
127
+
128
+ uint max_block_size = dict_size / 8;
129
+ if (m_params.m_block_size > max_block_size)
130
+ {
131
+ m_params.m_block_size = max_block_size;
132
+ }
133
+
134
+ m_num_parse_threads = 1;
135
+
136
+ #if !LZHAM_FORCE_SINGLE_THREADED_PARSING
137
+ if (m_params.m_max_helper_threads > 0)
138
+ {
139
+ LZHAM_ASSUME(cMaxParseThreads >= 4);
140
+
141
+ if (m_params.m_block_size < 16384)
142
+ {
143
+ m_num_parse_threads = LZHAM_MIN(cMaxParseThreads, m_params.m_max_helper_threads + 1);
144
+ }
145
+ else
146
+ {
147
+ if ((m_params.m_max_helper_threads == 1) || (m_params.m_compression_level == cCompressionLevelFastest))
148
+ {
149
+ m_num_parse_threads = 1;
150
+ }
151
+ else if (m_params.m_max_helper_threads <= 3)
152
+ {
153
+ m_num_parse_threads = 2;
154
+ }
155
+ else if (m_params.m_max_helper_threads <= 7)
156
+ {
157
+ if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber))
158
+ m_num_parse_threads = 4;
159
+ else
160
+ m_num_parse_threads = 2;
161
+ }
162
+ else
163
+ {
164
+ // 8-16
165
+ m_num_parse_threads = 4;
166
+ }
167
+ }
168
+ }
169
+ #endif
170
+
171
+ int num_parse_jobs = m_num_parse_threads - 1;
172
+ uint match_accel_helper_threads = LZHAM_MAX(0, (int)m_params.m_max_helper_threads - num_parse_jobs);
173
+
174
+ LZHAM_ASSERT(m_num_parse_threads >= 1);
175
+ LZHAM_ASSERT(m_num_parse_threads <= cMaxParseThreads);
176
+
177
+ if (!m_use_task_pool)
178
+ {
179
+ LZHAM_ASSERT(!match_accel_helper_threads && (m_num_parse_threads == 1));
180
+ }
181
+ else
182
+ {
183
+ LZHAM_ASSERT((match_accel_helper_threads + (m_num_parse_threads - 1)) <= m_params.m_max_helper_threads);
184
+ }
185
+
186
+ if (!m_accel.init(this, params.m_pTask_pool, match_accel_helper_threads, dict_size, m_settings.m_match_accel_max_matches_per_probe, false, m_settings.m_match_accel_max_probes))
187
+ return false;
188
+
189
+ init_position_slots(params.m_dict_size_log2);
190
+ init_slot_tabs();
191
+
192
+ //m_settings.m_fast_adaptive_huffman_updating
193
+ if (!m_state.init(*this, m_params.m_table_max_update_interval, m_params.m_table_update_interval_slow_rate))
194
+ return false;
195
+
196
+ if (!m_block_buf.try_reserve(m_params.m_block_size))
197
+ return false;
198
+
199
+ if (!m_comp_buf.try_reserve(m_params.m_block_size*2))
200
+ return false;
201
+
202
+ for (uint i = 0; i < m_num_parse_threads; i++)
203
+ {
204
+ //m_settings.m_fast_adaptive_huffman_updating
205
+ if (!m_parse_thread_state[i].m_initial_state.init(*this, m_params.m_table_max_update_interval, m_params.m_table_update_interval_slow_rate))
206
+ return false;
207
+ }
208
+
209
+ m_block_history_size = 0;
210
+ m_block_history_next = 0;
211
+
212
+ if (params.m_num_seed_bytes)
213
+ {
214
+ if (!init_seed_bytes())
215
+ return false;
216
+ }
217
+
218
+ if (!send_zlib_header())
219
+ return false;
220
+
221
+ m_src_size = 0;
222
+
223
+ return true;
224
+ }
225
+
226
+ // See http://www.gzip.org/zlib/rfc-zlib.html
227
+ // Method is set to 14 (LZHAM) and CINFO is (window_size - 15).
228
+ bool lzcompressor::send_zlib_header()
229
+ {
230
+ if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_WRITE_ZLIB_STREAM) == 0)
231
+ return true;
232
+
233
+ // set CM (method) and CINFO (dictionary size) fields
234
+ int cmf = LZHAM_Z_LZHAM | ((m_params.m_dict_size_log2 - 15) << 4);
235
+
236
+ // set FLEVEL by mapping LZHAM's compression level to zlib's
237
+ int flg = 0;
238
+ switch (m_params.m_compression_level)
239
+ {
240
+ case LZHAM_COMP_LEVEL_FASTEST:
241
+ {
242
+ flg = 0 << 6;
243
+ break;
244
+ }
245
+ case LZHAM_COMP_LEVEL_FASTER:
246
+ {
247
+ flg = 1 << 6;
248
+ break;
249
+ }
250
+ case LZHAM_COMP_LEVEL_DEFAULT:
251
+ case LZHAM_COMP_LEVEL_BETTER:
252
+ {
253
+ flg = 2 << 6;
254
+ break;
255
+ }
256
+ default:
257
+ {
258
+ flg = 3 << 6;
259
+ break;
260
+ }
261
+ }
262
+
263
+ // set FDICT flag
264
+ if (m_params.m_pSeed_bytes)
265
+ flg |= 32;
266
+
267
+ int check = ((cmf << 8) + flg) % 31;
268
+ if (check)
269
+ flg += (31 - check);
270
+
271
+ LZHAM_ASSERT(0 == (((cmf << 8) + flg) % 31));
272
+ if (!m_comp_buf.try_push_back(static_cast<uint8>(cmf)))
273
+ return false;
274
+ if (!m_comp_buf.try_push_back(static_cast<uint8>(flg)))
275
+ return false;
276
+
277
+ if (m_params.m_pSeed_bytes)
278
+ {
279
+ // send adler32 of DICT
280
+ uint dict_adler32 = adler32(m_params.m_pSeed_bytes, m_params.m_num_seed_bytes);
281
+ for (uint i = 0; i < 4; i++)
282
+ {
283
+ if (!m_comp_buf.try_push_back(static_cast<uint8>(dict_adler32 >> 24)))
284
+ return false;
285
+ dict_adler32 <<= 8;
286
+ }
287
+ }
288
+
289
+ return true;
290
+ }
291
+
292
+ void lzcompressor::clear()
293
+ {
294
+ m_codec.clear();
295
+ m_src_size = -1;
296
+ m_src_adler32 = cInitAdler32;
297
+ m_block_buf.clear();
298
+ m_comp_buf.clear();
299
+
300
+ m_step = 0;
301
+ m_finished = false;
302
+ m_use_task_pool = false;
303
+ m_block_start_dict_ofs = 0;
304
+ m_block_index = 0;
305
+ m_state.clear();
306
+ m_num_parse_threads = 0;
307
+ m_parse_jobs_remaining = 0;
308
+
309
+ for (uint i = 0; i < cMaxParseThreads; i++)
310
+ {
311
+ parse_thread_state &parse_state = m_parse_thread_state[i];
312
+ parse_state.m_initial_state.clear();
313
+
314
+ for (uint j = 0; j <= cMaxParseGraphNodes; j++)
315
+ parse_state.m_nodes[j].clear();
316
+
317
+ parse_state.m_start_ofs = 0;
318
+ parse_state.m_bytes_to_match = 0;
319
+ parse_state.m_best_decisions.clear();
320
+ parse_state.m_issue_reset_state_partial = false;
321
+ parse_state.m_emit_decisions_backwards = false;
322
+ parse_state.m_failed = false;
323
+ }
324
+
325
+ m_block_history_size = 0;
326
+ m_block_history_next = 0;
327
+ }
328
+
329
+ bool lzcompressor::reset()
330
+ {
331
+ if (m_src_size < 0)
332
+ return false;
333
+
334
+ m_accel.reset();
335
+ m_codec.reset();
336
+ m_stats.clear();
337
+ m_src_size = 0;
338
+ m_src_adler32 = cInitAdler32;
339
+ m_block_buf.try_resize(0);
340
+ m_comp_buf.try_resize(0);
341
+
342
+ m_step = 0;
343
+ m_finished = false;
344
+ m_block_start_dict_ofs = 0;
345
+ m_block_index = 0;
346
+ m_state.reset();
347
+
348
+ m_block_history_size = 0;
349
+ m_block_history_next = 0;
350
+
351
+ if (m_params.m_num_seed_bytes)
352
+ {
353
+ if (!init_seed_bytes())
354
+ return false;
355
+ }
356
+
357
+ return send_zlib_header();
358
+ }
359
+
360
+ bool lzcompressor::code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match)
361
+ {
362
+ #ifdef LZHAM_LZDEBUG
363
+ if (!m_codec.encode_bits(CLZBase::cLZHAMDebugSyncMarkerValue, CLZBase::cLZHAMDebugSyncMarkerBits)) return false;
364
+ if (!m_codec.encode_bits(lzdec.is_match(), 1)) return false;
365
+ if (!m_codec.encode_bits(lzdec.get_len(), 17)) return false;
366
+ if (!m_codec.encode_bits(m_state.m_cur_state, 4)) return false;
367
+ #endif
368
+
369
+ #ifdef LZHAM_LZVERIFY
370
+ if (lzdec.is_match())
371
+ {
372
+ uint match_dist = lzdec.get_match_dist(m_state);
373
+
374
+ LZHAM_VERIFY(m_accel[cur_ofs] == m_accel[(cur_ofs - match_dist) & (m_accel.get_max_dict_size() - 1)]);
375
+ }
376
+ #endif
377
+
378
+ const uint len = lzdec.get_len();
379
+
380
+ if (!m_state.encode(m_codec, *this, m_accel, lzdec))
381
+ return false;
382
+
383
+ cur_ofs += len;
384
+ LZHAM_ASSERT(bytes_to_match >= len);
385
+ bytes_to_match -= len;
386
+
387
+ m_accel.advance_bytes(len);
388
+
389
+ m_step++;
390
+
391
+ return true;
392
+ }
393
+
394
+ bool lzcompressor::send_sync_block(lzham_flush_t flush_type)
395
+ {
396
+ m_codec.reset();
397
+
398
+ if (!m_codec.start_encoding(128))
399
+ return false;
400
+ #ifdef LZHAM_LZDEBUG
401
+ if (!m_codec.encode_bits(166, 12))
402
+ return false;
403
+ #endif
404
+ if (!m_codec.encode_bits(cSyncBlock, cBlockHeaderBits))
405
+ return false;
406
+
407
+ int flush_code = 0;
408
+ switch (flush_type)
409
+ {
410
+ case LZHAM_FULL_FLUSH:
411
+ flush_code = 2;
412
+ break;
413
+ case LZHAM_TABLE_FLUSH:
414
+ flush_code = 1;
415
+ break;
416
+ case LZHAM_SYNC_FLUSH:
417
+ flush_code = 3;
418
+ break;
419
+ case LZHAM_NO_FLUSH:
420
+ case LZHAM_FINISH:
421
+ flush_code = 0;
422
+ break;
423
+ }
424
+ if (!m_codec.encode_bits(flush_code, cBlockFlushTypeBits))
425
+ return false;
426
+
427
+ if (!m_codec.encode_align_to_byte())
428
+ return false;
429
+ if (!m_codec.encode_bits(0x0000, 16))
430
+ return false;
431
+ if (!m_codec.encode_bits(0xFFFF, 16))
432
+ return false;
433
+ if (!m_codec.stop_encoding(true))
434
+ return false;
435
+ if (!m_comp_buf.append(m_codec.get_encoding_buf()))
436
+ return false;
437
+
438
+ m_block_index++;
439
+ return true;
440
+ }
441
+
442
+ bool lzcompressor::flush(lzham_flush_t flush_type)
443
+ {
444
+ LZHAM_ASSERT(!m_finished);
445
+ if (m_finished)
446
+ return false;
447
+
448
+ bool status = true;
449
+ if (m_block_buf.size())
450
+ {
451
+ status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
452
+
453
+ m_block_buf.try_resize(0);
454
+ }
455
+
456
+ if (status)
457
+ {
458
+ status = send_sync_block(flush_type);
459
+
460
+ if (LZHAM_FULL_FLUSH == flush_type)
461
+ {
462
+ m_accel.flush();
463
+ m_state.reset();
464
+ }
465
+ }
466
+
467
+ lzham_flush_buffered_printf();
468
+
469
+ return status;
470
+ }
471
+
472
+ bool lzcompressor::put_bytes(const void* pBuf, uint buf_len)
473
+ {
474
+ LZHAM_ASSERT(!m_finished);
475
+ if (m_finished)
476
+ return false;
477
+
478
+ bool status = true;
479
+
480
+ if (!pBuf)
481
+ {
482
+ // Last block - flush whatever's left and send the final block.
483
+ if (m_block_buf.size())
484
+ {
485
+ status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
486
+
487
+ m_block_buf.try_resize(0);
488
+ }
489
+
490
+ if (status)
491
+ {
492
+ if (!send_final_block())
493
+ {
494
+ status = false;
495
+ }
496
+ }
497
+
498
+ m_finished = true;
499
+ }
500
+ else
501
+ {
502
+ // Compress blocks.
503
+ const uint8 *pSrcBuf = static_cast<const uint8*>(pBuf);
504
+ uint num_src_bytes_remaining = buf_len;
505
+
506
+ while (num_src_bytes_remaining)
507
+ {
508
+ const uint num_bytes_to_copy = LZHAM_MIN(num_src_bytes_remaining, m_params.m_block_size - m_block_buf.size());
509
+
510
+ if (num_bytes_to_copy == m_params.m_block_size)
511
+ {
512
+ LZHAM_ASSERT(!m_block_buf.size());
513
+
514
+ // Full-block available - compress in-place.
515
+ status = compress_block(pSrcBuf, num_bytes_to_copy);
516
+ }
517
+ else
518
+ {
519
+ // Less than a full block available - append to already accumulated bytes.
520
+ if (!m_block_buf.append(static_cast<const uint8 *>(pSrcBuf), num_bytes_to_copy))
521
+ return false;
522
+
523
+ LZHAM_ASSERT(m_block_buf.size() <= m_params.m_block_size);
524
+
525
+ if (m_block_buf.size() == m_params.m_block_size)
526
+ {
527
+ status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
528
+
529
+ m_block_buf.try_resize(0);
530
+ }
531
+ }
532
+
533
+ if (!status)
534
+ return false;
535
+
536
+ pSrcBuf += num_bytes_to_copy;
537
+ num_src_bytes_remaining -= num_bytes_to_copy;
538
+ }
539
+ }
540
+
541
+ lzham_flush_buffered_printf();
542
+
543
+ return status;
544
+ }
545
+
546
+ bool lzcompressor::send_final_block()
547
+ {
548
+ if (!m_codec.start_encoding(16))
549
+ return false;
550
+
551
+ #ifdef LZHAM_LZDEBUG
552
+ if (!m_codec.encode_bits(166, 12))
553
+ return false;
554
+ #endif
555
+
556
+ if (!m_block_index)
557
+ {
558
+ if (!send_configuration())
559
+ return false;
560
+ }
561
+
562
+ if (!m_codec.encode_bits(cEOFBlock, cBlockHeaderBits))
563
+ return false;
564
+
565
+ if (!m_codec.encode_align_to_byte())
566
+ return false;
567
+
568
+ if (!m_codec.encode_bits(m_src_adler32, 32))
569
+ return false;
570
+
571
+ if (!m_codec.stop_encoding(true))
572
+ return false;
573
+
574
+ if (m_comp_buf.empty())
575
+ {
576
+ m_comp_buf.swap(m_codec.get_encoding_buf());
577
+ }
578
+ else
579
+ {
580
+ if (!m_comp_buf.append(m_codec.get_encoding_buf()))
581
+ return false;
582
+ }
583
+
584
+ m_block_index++;
585
+
586
+ #if LZHAM_UPDATE_STATS
587
+ m_stats.print();
588
+ #endif
589
+
590
+ return true;
591
+ }
592
+
593
+ bool lzcompressor::send_configuration()
594
+ {
595
+ // TODO: Currently unused.
596
+ //if (!m_codec.encode_bits(m_settings.m_fast_adaptive_huffman_updating, 1))
597
+ // return false;
598
+ //if (!m_codec.encode_bits(0, 1))
599
+ // return false;
600
+
601
+ return true;
602
+ }
603
+
604
+ void lzcompressor::node::add_state(
605
+ int parent_index, int parent_state_index,
606
+ const lzdecision &lzdec, state &parent_state,
607
+ bit_cost_t total_cost,
608
+ uint total_complexity)
609
+ {
610
+ state_base trial_state;
611
+ parent_state.save_partial_state(trial_state);
612
+ trial_state.partial_advance(lzdec);
613
+
614
+ for (int i = m_num_node_states - 1; i >= 0; i--)
615
+ {
616
+ node_state &cur_node_state = m_node_states[i];
617
+ if (cur_node_state.m_saved_state == trial_state)
618
+ {
619
+ if ( (total_cost < cur_node_state.m_total_cost) ||
620
+ ((total_cost == cur_node_state.m_total_cost) && (total_complexity < cur_node_state.m_total_complexity)) )
621
+ {
622
+ cur_node_state.m_parent_index = static_cast<int16>(parent_index);
623
+ cur_node_state.m_parent_state_index = static_cast<int8>(parent_state_index);
624
+ cur_node_state.m_lzdec = lzdec;
625
+ cur_node_state.m_total_cost = total_cost;
626
+ cur_node_state.m_total_complexity = total_complexity;
627
+
628
+ while (i > 0)
629
+ {
630
+ if ((m_node_states[i].m_total_cost < m_node_states[i - 1].m_total_cost) ||
631
+ ((m_node_states[i].m_total_cost == m_node_states[i - 1].m_total_cost) && (m_node_states[i].m_total_complexity < m_node_states[i - 1].m_total_complexity)))
632
+ {
633
+ std::swap(m_node_states[i], m_node_states[i - 1]);
634
+ i--;
635
+ }
636
+ else
637
+ break;
638
+ }
639
+ }
640
+
641
+ return;
642
+ }
643
+ }
644
+
645
+ int insert_index;
646
+ for (insert_index = m_num_node_states; insert_index > 0; insert_index--)
647
+ {
648
+ node_state &cur_node_state = m_node_states[insert_index - 1];
649
+
650
+ if ( (total_cost > cur_node_state.m_total_cost) ||
651
+ ((total_cost == cur_node_state.m_total_cost) && (total_complexity >= cur_node_state.m_total_complexity)) )
652
+ {
653
+ break;
654
+ }
655
+ }
656
+
657
+ if (insert_index == cMaxNodeStates)
658
+ return;
659
+
660
+ uint num_behind = m_num_node_states - insert_index;
661
+ uint num_to_move = (m_num_node_states < cMaxNodeStates) ? num_behind : (num_behind - 1);
662
+ if (num_to_move)
663
+ {
664
+ LZHAM_ASSERT((insert_index + 1 + num_to_move) <= cMaxNodeStates);
665
+ memmove( &m_node_states[insert_index + 1], &m_node_states[insert_index], sizeof(node_state) * num_to_move);
666
+ }
667
+
668
+ node_state *pNew_node_state = &m_node_states[insert_index];
669
+ pNew_node_state->m_parent_index = static_cast<int16>(parent_index);
670
+ pNew_node_state->m_parent_state_index = static_cast<uint8>(parent_state_index);
671
+ pNew_node_state->m_lzdec = lzdec;
672
+ pNew_node_state->m_total_cost = total_cost;
673
+ pNew_node_state->m_total_complexity = total_complexity;
674
+ pNew_node_state->m_saved_state = trial_state;
675
+
676
+ m_num_node_states = LZHAM_MIN(m_num_node_states + 1, static_cast<uint>(cMaxNodeStates));
677
+
678
+ #ifdef LZHAM_LZVERIFY
679
+ for (uint i = 0; i < (m_num_node_states - 1); ++i)
680
+ {
681
+ node_state &a = m_node_states[i];
682
+ node_state &b = m_node_states[i + 1];
683
+ LZHAM_VERIFY(
684
+ (a.m_total_cost < b.m_total_cost) ||
685
+ ((a.m_total_cost == b.m_total_cost) && (a.m_total_complexity <= b.m_total_complexity)) );
686
+ }
687
+ #endif
688
+ }
689
+
690
+ // The "extreme" parser tracks the best node::cMaxNodeStates (4) candidate LZ decisions per lookahead character.
691
+ // This allows the compressor to make locally suboptimal decisions that ultimately result in a better parse.
692
+ // It assumes the input statistics are locally stationary over the input block to parse.
693
+ bool lzcompressor::extreme_parse(parse_thread_state &parse_state)
694
+ {
695
+ LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes);
696
+
697
+ parse_state.m_failed = false;
698
+ parse_state.m_emit_decisions_backwards = true;
699
+
700
+ node *pNodes = parse_state.m_nodes;
701
+ for (uint i = 0; i <= cMaxParseGraphNodes; i++)
702
+ {
703
+ pNodes[i].clear();
704
+ }
705
+
706
+ state &approx_state = parse_state.m_initial_state;
707
+
708
+ pNodes[0].m_num_node_states = 1;
709
+ node_state &first_node_state = pNodes[0].m_node_states[0];
710
+ approx_state.save_partial_state(first_node_state.m_saved_state);
711
+ first_node_state.m_parent_index = -1;
712
+ first_node_state.m_parent_state_index = -1;
713
+ first_node_state.m_total_cost = 0;
714
+ first_node_state.m_total_complexity = 0;
715
+
716
+ const uint bytes_to_parse = parse_state.m_bytes_to_match;
717
+
718
+ const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
719
+
720
+ uint cur_dict_ofs = parse_state.m_start_ofs;
721
+ uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
722
+ uint cur_node_index = 0;
723
+
724
+ enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes };
725
+ uint match_lens[cMaxFullMatches];
726
+ uint match_distances[cMaxFullMatches];
727
+
728
+ bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1];
729
+
730
+ node prev_lit_node;
731
+ prev_lit_node.clear();
732
+
733
+ while (cur_node_index < bytes_to_parse)
734
+ {
735
+ node* pCur_node = &pNodes[cur_node_index];
736
+
737
+ const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index);
738
+ const uint find_dict_size = m_accel.get_cur_dict_size() + cur_lookahead_ofs;
739
+
740
+ const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1);
741
+
742
+ const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs];
743
+
744
+ // full matches
745
+ uint max_full_match_len = 0;
746
+ uint num_full_matches = 0;
747
+ uint len2_match_dist = 0;
748
+
749
+ if (max_admissable_match_len >= CLZBase::cMinMatchLen)
750
+ {
751
+ const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs);
752
+ if (pMatches)
753
+ {
754
+ for ( ; ; )
755
+ {
756
+ uint match_len = pMatches->get_len();
757
+ LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
758
+ match_len = LZHAM_MIN(match_len, max_admissable_match_len);
759
+
760
+ if (match_len > max_full_match_len)
761
+ {
762
+ max_full_match_len = match_len;
763
+
764
+ match_lens[num_full_matches] = match_len;
765
+ match_distances[num_full_matches] = pMatches->get_dist();
766
+ num_full_matches++;
767
+ }
768
+
769
+ if (pMatches->is_last())
770
+ break;
771
+ pMatches++;
772
+ }
773
+ }
774
+
775
+ len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs);
776
+ }
777
+
778
+ for (uint cur_node_state_index = 0; cur_node_state_index < pCur_node->m_num_node_states; cur_node_state_index++)
779
+ {
780
+ node_state &cur_node_state = pCur_node->m_node_states[cur_node_state_index];
781
+
782
+ if (cur_node_index)
783
+ {
784
+ LZHAM_ASSERT(cur_node_state.m_parent_index >= 0);
785
+
786
+ approx_state.restore_partial_state(cur_node_state.m_saved_state);
787
+ }
788
+
789
+ uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(approx_state.m_cur_state);
790
+
791
+ const bit_cost_t cur_node_total_cost = cur_node_state.m_total_cost;
792
+ const uint cur_node_total_complexity = cur_node_state.m_total_complexity;
793
+
794
+ // rep matches
795
+ uint match_hist_max_len = 0;
796
+ uint match_hist_min_match_len = 1;
797
+ for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++)
798
+ {
799
+ uint hist_match_len = 0;
800
+
801
+ uint dist = approx_state.m_match_hist[rep_match_index];
802
+ if (dist <= find_dict_size)
803
+ {
804
+ const uint comp_pos = static_cast<uint>((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask);
805
+ const uint8* pComp = &m_accel.m_dict[comp_pos];
806
+
807
+ for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++)
808
+ if (pComp[hist_match_len] != pLookahead[hist_match_len])
809
+ break;
810
+ }
811
+
812
+ if (hist_match_len >= match_hist_min_match_len)
813
+ {
814
+ match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
815
+
816
+ approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index);
817
+
818
+ uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index);
819
+ for (uint l = match_hist_min_match_len; l <= hist_match_len; l++)
820
+ {
821
+ #if LZHAM_VERIFY_MATCH_COSTS
822
+ {
823
+ lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1));
824
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
825
+ LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
826
+ }
827
+ #endif
828
+ node& dst_node = pCur_node[l];
829
+
830
+ bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
831
+
832
+ dst_node.add_state(cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, -((int)rep_match_index + 1)), approx_state, rep_match_total_cost, rep_match_total_complexity);
833
+ }
834
+ }
835
+
836
+ match_hist_min_match_len = CLZBase::cMinMatchLen;
837
+ }
838
+
839
+ uint min_truncate_match_len = match_hist_max_len;
840
+
841
+ // nearest len2 match
842
+ if (len2_match_dist)
843
+ {
844
+ lzdecision lzdec(cur_dict_ofs, 2, len2_match_dist);
845
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, lzdec);
846
+ pCur_node[2].add_state(cur_node_index, cur_node_state_index, lzdec, approx_state, cur_node_total_cost + actual_cost, cur_node_total_complexity + cShortMatchComplexity);
847
+
848
+ min_truncate_match_len = LZHAM_MAX(min_truncate_match_len, 2);
849
+ }
850
+
851
+ // full matches
852
+ if (max_full_match_len > min_truncate_match_len)
853
+ {
854
+ uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len);
855
+ for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++)
856
+ {
857
+ uint end_len = match_lens[full_match_index];
858
+ if (end_len <= min_truncate_match_len)
859
+ continue;
860
+
861
+ uint start_len = prev_max_match_len + 1;
862
+ uint match_dist = match_distances[full_match_index];
863
+
864
+ LZHAM_ASSERT(start_len <= end_len);
865
+
866
+ approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index);
867
+
868
+ for (uint l = start_len; l <= end_len; l++)
869
+ {
870
+ uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity;
871
+
872
+ #if LZHAM_VERIFY_MATCH_COSTS
873
+ {
874
+ lzdecision actual_dec(cur_dict_ofs, l, match_dist);
875
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
876
+ LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
877
+ }
878
+ #endif
879
+ node& dst_node = pCur_node[l];
880
+
881
+ bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
882
+ uint match_total_complexity = cur_node_total_complexity + match_complexity;
883
+
884
+ dst_node.add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, match_dist), approx_state, match_total_cost, match_total_complexity);
885
+ }
886
+
887
+ prev_max_match_len = end_len;
888
+ }
889
+ }
890
+
891
+ // literal
892
+ bit_cost_t lit_cost = approx_state.get_lit_cost(*this, m_accel, cur_dict_ofs, lit_pred0, is_match_model_index);
893
+ bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost;
894
+ uint lit_total_complexity = cur_node_total_complexity + cLitComplexity;
895
+ #if LZHAM_VERIFY_MATCH_COSTS
896
+ {
897
+ lzdecision actual_dec(cur_dict_ofs, 0, 0);
898
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
899
+ LZHAM_ASSERT(actual_cost == lit_cost);
900
+ }
901
+ #endif
902
+
903
+ pCur_node[1].add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, 0, 0), approx_state, lit_total_cost, lit_total_complexity);
904
+
905
+ } // cur_node_state_index
906
+
907
+ cur_dict_ofs++;
908
+ cur_lookahead_ofs++;
909
+ cur_node_index++;
910
+ }
911
+
912
+ // Now get the optimal decisions by starting from the goal node.
913
+ // m_best_decisions is filled backwards.
914
+ if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse))
915
+ {
916
+ parse_state.m_failed = true;
917
+ return false;
918
+ }
919
+
920
+ bit_cost_t lowest_final_cost = cBitCostMax; //math::cNearlyInfinite;
921
+ int node_state_index = 0;
922
+ node_state *pLast_node_states = pNodes[bytes_to_parse].m_node_states;
923
+ for (uint i = 0; i < pNodes[bytes_to_parse].m_num_node_states; i++)
924
+ {
925
+ if (pLast_node_states[i].m_total_cost < lowest_final_cost)
926
+ {
927
+ lowest_final_cost = pLast_node_states[i].m_total_cost;
928
+ node_state_index = i;
929
+ }
930
+ }
931
+
932
+ int node_index = bytes_to_parse;
933
+ lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr();
934
+ do
935
+ {
936
+ LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes));
937
+
938
+ node& cur_node = pNodes[node_index];
939
+ const node_state &cur_node_state = cur_node.m_node_states[node_state_index];
940
+
941
+ *pDst_dec++ = cur_node_state.m_lzdec;
942
+
943
+ node_index = cur_node_state.m_parent_index;
944
+ node_state_index = cur_node_state.m_parent_state_index;
945
+
946
+ } while (node_index > 0);
947
+
948
+ parse_state.m_best_decisions.try_resize(static_cast<uint>(pDst_dec - parse_state.m_best_decisions.get_ptr()));
949
+
950
+ return true;
951
+ }
952
+
953
+ // Parsing notes:
954
+ // The regular "optimal" parser only tracks the single cheapest candidate LZ decision per lookahead character.
955
+ // This function finds the shortest path through an extremely dense node graph using a streamlined/simplified Dijkstra's algorithm with some coding heuristics.
956
+ // Graph edges are LZ "decisions", cost is measured in fractional bits needed to code each graph edge, and graph nodes are lookahead characters.
957
+ // There is no need to track visited/unvisted nodes, or find the next cheapest unvisted node in each iteration. The search always proceeds sequentially, visiting each lookahead character in turn from left/right.
958
+ // The major CPU expense of this function is the complexity of LZ decision cost evaluation, so a lot of implementation effort is spent here reducing this overhead.
959
+ // To simplify the problem, it assumes the input statistics are locally stationary over the input block to parse. (Otherwise, it would need to store, track, and update
960
+ // unique symbol statistics for each lookahead character, which would be very costly.)
961
+ // This function always sequentially pushes "forward" the unvisited node horizon. This horizon frequently collapses to a single node, which guarantees that the shortest path through the
962
+ // graph must pass through this node. LZMA tracks cumulative bitprices relative to this node, while LZHAM currently always tracks cumulative bitprices relative to the first node in the lookahead buffer.
963
+ // In very early versions of LZHAM the parse was much more understandable (straight Dijkstra with almost no bit price optimizations or coding heuristics).
964
+ bool lzcompressor::optimal_parse(parse_thread_state &parse_state)
965
+ {
966
+ LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes);
967
+
968
+ parse_state.m_failed = false;
969
+ parse_state.m_emit_decisions_backwards = true;
970
+
971
+ node_state *pNodes = reinterpret_cast<node_state*>(parse_state.m_nodes);
972
+ pNodes[0].m_parent_index = -1;
973
+ pNodes[0].m_total_cost = 0;
974
+ pNodes[0].m_total_complexity = 0;
975
+
976
+ #if 0
977
+ for (uint i = 1; i <= cMaxParseGraphNodes; i++)
978
+ {
979
+ pNodes[i].clear();
980
+ }
981
+ #else
982
+ memset( &pNodes[1], 0xFF, cMaxParseGraphNodes * sizeof(node_state));
983
+ #endif
984
+
985
+ state &approx_state = parse_state.m_initial_state;
986
+
987
+ const uint bytes_to_parse = parse_state.m_bytes_to_match;
988
+
989
+ const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
990
+
991
+ uint cur_dict_ofs = parse_state.m_start_ofs;
992
+ uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
993
+ uint cur_node_index = 0;
994
+
995
+ enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes };
996
+ uint match_lens[cMaxFullMatches];
997
+ uint match_distances[cMaxFullMatches];
998
+
999
+ bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1];
1000
+
1001
+ while (cur_node_index < bytes_to_parse)
1002
+ {
1003
+ node_state* pCur_node = &pNodes[cur_node_index];
1004
+
1005
+ const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index);
1006
+ const uint find_dict_size = m_accel.m_cur_dict_size + cur_lookahead_ofs;
1007
+
1008
+ if (cur_node_index)
1009
+ {
1010
+ LZHAM_ASSERT(pCur_node->m_parent_index >= 0);
1011
+
1012
+ // Move to this node's state using the lowest cost LZ decision found.
1013
+ approx_state.restore_partial_state(pCur_node->m_saved_state);
1014
+ approx_state.partial_advance(pCur_node->m_lzdec);
1015
+ }
1016
+
1017
+ const bit_cost_t cur_node_total_cost = pCur_node->m_total_cost;
1018
+ // This assert includes a fudge factor - make sure we don't overflow our scaled costs.
1019
+ LZHAM_ASSERT((cBitCostMax - cur_node_total_cost) > (cBitCostScale * 64));
1020
+ const uint cur_node_total_complexity = pCur_node->m_total_complexity;
1021
+
1022
+ const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1);
1023
+ uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(approx_state.m_cur_state);
1024
+
1025
+ const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs];
1026
+
1027
+ // rep matches
1028
+ uint match_hist_max_len = 0;
1029
+ uint match_hist_min_match_len = 1;
1030
+ for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++)
1031
+ {
1032
+ uint hist_match_len = 0;
1033
+
1034
+ uint dist = approx_state.m_match_hist[rep_match_index];
1035
+ if (dist <= find_dict_size)
1036
+ {
1037
+ const uint comp_pos = static_cast<uint>((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask);
1038
+ const uint8* pComp = &m_accel.m_dict[comp_pos];
1039
+
1040
+ for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++)
1041
+ if (pComp[hist_match_len] != pLookahead[hist_match_len])
1042
+ break;
1043
+ }
1044
+
1045
+ if (hist_match_len >= match_hist_min_match_len)
1046
+ {
1047
+ match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
1048
+
1049
+ approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index);
1050
+
1051
+ uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index);
1052
+ for (uint l = match_hist_min_match_len; l <= hist_match_len; l++)
1053
+ {
1054
+ #if LZHAM_VERIFY_MATCH_COSTS
1055
+ {
1056
+ lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1));
1057
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
1058
+ LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
1059
+ }
1060
+ #endif
1061
+ node_state& dst_node = pCur_node[l];
1062
+
1063
+ bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
1064
+
1065
+ if ((rep_match_total_cost > dst_node.m_total_cost) || ((rep_match_total_cost == dst_node.m_total_cost) && (rep_match_total_complexity >= dst_node.m_total_complexity)))
1066
+ continue;
1067
+
1068
+ dst_node.m_total_cost = rep_match_total_cost;
1069
+ dst_node.m_total_complexity = rep_match_total_complexity;
1070
+ dst_node.m_parent_index = (uint16)cur_node_index;
1071
+ approx_state.save_partial_state(dst_node.m_saved_state);
1072
+ dst_node.m_lzdec.init(cur_dict_ofs, l, -((int)rep_match_index + 1));
1073
+ dst_node.m_lzdec.m_len = l;
1074
+ }
1075
+ }
1076
+
1077
+ match_hist_min_match_len = CLZBase::cMinMatchLen;
1078
+ }
1079
+
1080
+ uint max_match_len = match_hist_max_len;
1081
+
1082
+ if (max_match_len >= m_settings.m_fast_bytes)
1083
+ {
1084
+ cur_dict_ofs += max_match_len;
1085
+ cur_lookahead_ofs += max_match_len;
1086
+ cur_node_index += max_match_len;
1087
+ continue;
1088
+ }
1089
+
1090
+ // full matches
1091
+ if (max_admissable_match_len >= CLZBase::cMinMatchLen)
1092
+ {
1093
+ uint num_full_matches = 0;
1094
+
1095
+ if (match_hist_max_len < 2)
1096
+ {
1097
+ // Get the nearest len2 match if we didn't find a rep len2.
1098
+ uint len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs);
1099
+ if (len2_match_dist)
1100
+ {
1101
+ bit_cost_t cost = approx_state.get_len2_match_cost(*this, cur_dict_ofs, len2_match_dist, is_match_model_index);
1102
+
1103
+ #if LZHAM_VERIFY_MATCH_COSTS
1104
+ {
1105
+ lzdecision actual_dec(cur_dict_ofs, 2, len2_match_dist);
1106
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
1107
+ LZHAM_ASSERT(actual_cost == cost);
1108
+ }
1109
+ #endif
1110
+
1111
+ node_state& dst_node = pCur_node[2];
1112
+
1113
+ bit_cost_t match_total_cost = cur_node_total_cost + cost;
1114
+ uint match_total_complexity = cur_node_total_complexity + cShortMatchComplexity;
1115
+
1116
+ if ((match_total_cost < dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity < dst_node.m_total_complexity)))
1117
+ {
1118
+ dst_node.m_total_cost = match_total_cost;
1119
+ dst_node.m_total_complexity = match_total_complexity;
1120
+ dst_node.m_parent_index = (uint16)cur_node_index;
1121
+ approx_state.save_partial_state(dst_node.m_saved_state);
1122
+ dst_node.m_lzdec.init(cur_dict_ofs, 2, len2_match_dist);
1123
+ }
1124
+
1125
+ max_match_len = 2;
1126
+ }
1127
+ }
1128
+
1129
+ const uint min_truncate_match_len = max_match_len;
1130
+
1131
+ // Now get all full matches: the nearest matches at each match length. (Actually, we don't
1132
+ // always get the nearest match. The match finder favors those matches which have the lowest value
1133
+ // in the nibble of each match distance, all other things being equal, to help exploit how the lowest
1134
+ // nibble of match distances is separately coded.)
1135
+ const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs);
1136
+ if (pMatches)
1137
+ {
1138
+ for ( ; ; )
1139
+ {
1140
+ uint match_len = pMatches->get_len();
1141
+ LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
1142
+ match_len = LZHAM_MIN(match_len, max_admissable_match_len);
1143
+
1144
+ if (match_len > max_match_len)
1145
+ {
1146
+ max_match_len = match_len;
1147
+
1148
+ match_lens[num_full_matches] = match_len;
1149
+ match_distances[num_full_matches] = pMatches->get_dist();
1150
+ num_full_matches++;
1151
+ }
1152
+
1153
+ if (pMatches->is_last())
1154
+ break;
1155
+ pMatches++;
1156
+ }
1157
+ }
1158
+
1159
+ if (num_full_matches)
1160
+ {
1161
+ uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len);
1162
+ for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++)
1163
+ {
1164
+ uint start_len = prev_max_match_len + 1;
1165
+ uint end_len = match_lens[full_match_index];
1166
+ uint match_dist = match_distances[full_match_index];
1167
+
1168
+ LZHAM_ASSERT(start_len <= end_len);
1169
+
1170
+ approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index);
1171
+
1172
+ for (uint l = start_len; l <= end_len; l++)
1173
+ {
1174
+ uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity;
1175
+
1176
+ #if LZHAM_VERIFY_MATCH_COSTS
1177
+ {
1178
+ lzdecision actual_dec(cur_dict_ofs, l, match_dist);
1179
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
1180
+ LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
1181
+ }
1182
+ #endif
1183
+ node_state& dst_node = pCur_node[l];
1184
+
1185
+ bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
1186
+ uint match_total_complexity = cur_node_total_complexity + match_complexity;
1187
+
1188
+ if ((match_total_cost > dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity >= dst_node.m_total_complexity)))
1189
+ continue;
1190
+
1191
+ dst_node.m_total_cost = match_total_cost;
1192
+ dst_node.m_total_complexity = match_total_complexity;
1193
+ dst_node.m_parent_index = (uint16)cur_node_index;
1194
+ approx_state.save_partial_state(dst_node.m_saved_state);
1195
+ dst_node.m_lzdec.init(cur_dict_ofs, l, match_dist);
1196
+ }
1197
+
1198
+ prev_max_match_len = end_len;
1199
+ }
1200
+ }
1201
+ }
1202
+
1203
+ if (max_match_len >= m_settings.m_fast_bytes)
1204
+ {
1205
+ cur_dict_ofs += max_match_len;
1206
+ cur_lookahead_ofs += max_match_len;
1207
+ cur_node_index += max_match_len;
1208
+ continue;
1209
+ }
1210
+
1211
+ // literal
1212
+ bit_cost_t lit_cost = approx_state.get_lit_cost(*this, m_accel, cur_dict_ofs, lit_pred0, is_match_model_index);
1213
+ bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost;
1214
+ uint lit_total_complexity = cur_node_total_complexity + cLitComplexity;
1215
+ #if LZHAM_VERIFY_MATCH_COSTS
1216
+ {
1217
+ lzdecision actual_dec(cur_dict_ofs, 0, 0);
1218
+ bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
1219
+ LZHAM_ASSERT(actual_cost == lit_cost);
1220
+ }
1221
+ #endif
1222
+ if ((lit_total_cost < pCur_node[1].m_total_cost) || ((lit_total_cost == pCur_node[1].m_total_cost) && (lit_total_complexity < pCur_node[1].m_total_complexity)))
1223
+ {
1224
+ pCur_node[1].m_total_cost = lit_total_cost;
1225
+ pCur_node[1].m_total_complexity = lit_total_complexity;
1226
+ pCur_node[1].m_parent_index = (int16)cur_node_index;
1227
+ approx_state.save_partial_state(pCur_node[1].m_saved_state);
1228
+ pCur_node[1].m_lzdec.init(cur_dict_ofs, 0, 0);
1229
+ }
1230
+
1231
+ cur_dict_ofs++;
1232
+ cur_lookahead_ofs++;
1233
+ cur_node_index++;
1234
+
1235
+ } // graph search
1236
+
1237
+ // Now get the optimal decisions by starting from the goal node.
1238
+ // m_best_decisions is filled backwards.
1239
+ if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse))
1240
+ {
1241
+ parse_state.m_failed = true;
1242
+ return false;
1243
+ }
1244
+
1245
+ int node_index = bytes_to_parse;
1246
+ lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr();
1247
+ do
1248
+ {
1249
+ LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes));
1250
+ node_state& cur_node = pNodes[node_index];
1251
+
1252
+ *pDst_dec++ = cur_node.m_lzdec;
1253
+
1254
+ node_index = cur_node.m_parent_index;
1255
+
1256
+ } while (node_index > 0);
1257
+
1258
+ parse_state.m_best_decisions.try_resize(static_cast<uint>(pDst_dec - parse_state.m_best_decisions.get_ptr()));
1259
+
1260
+ return true;
1261
+ }
1262
+
1263
+ void lzcompressor::parse_job_callback(uint64 data, void* pData_ptr)
1264
+ {
1265
+ const uint parse_job_index = (uint)data;
1266
+ scoped_perf_section parse_job_timer(cVarArgs, "parse_job_callback %u", parse_job_index);
1267
+
1268
+ (void)pData_ptr;
1269
+
1270
+ parse_thread_state &parse_state = m_parse_thread_state[parse_job_index];
1271
+
1272
+ if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber))
1273
+ extreme_parse(parse_state);
1274
+ else
1275
+ optimal_parse(parse_state);
1276
+
1277
+ LZHAM_MEMORY_EXPORT_BARRIER
1278
+
1279
+ if (atomic_decrement32(&m_parse_jobs_remaining) == 0)
1280
+ {
1281
+ m_parse_jobs_complete.release();
1282
+ }
1283
+ }
1284
+
1285
+ // ofs is the absolute dictionary offset, must be >= the lookahead offset.
1286
+ // TODO: Doesn't find len2 matches
1287
+ int lzcompressor::enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector<lzpriced_decision>& decisions, uint min_match_len, uint max_match_len)
1288
+ {
1289
+ LZHAM_ASSERT(min_match_len >= 1);
1290
+
1291
+ uint start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
1292
+ LZHAM_ASSERT(ofs >= start_ofs);
1293
+ const uint lookahead_ofs = ofs - start_ofs;
1294
+
1295
+ uint largest_index = 0;
1296
+ uint largest_len;
1297
+ bit_cost_t largest_cost;
1298
+
1299
+ if (min_match_len <= 1)
1300
+ {
1301
+ if (!decisions.try_resize(1))
1302
+ return -1;
1303
+
1304
+ lzpriced_decision& lit_dec = decisions[0];
1305
+ lit_dec.init(ofs, 0, 0, 0);
1306
+ lit_dec.m_cost = cur_state.get_cost(*this, m_accel, lit_dec);
1307
+ largest_cost = lit_dec.m_cost;
1308
+
1309
+ largest_len = 1;
1310
+ }
1311
+ else
1312
+ {
1313
+ if (!decisions.try_resize(0))
1314
+ return -1;
1315
+
1316
+ largest_len = 0;
1317
+ largest_cost = cBitCostMax;
1318
+ }
1319
+
1320
+ uint match_hist_max_len = 0;
1321
+
1322
+ // Add rep matches.
1323
+ for (uint i = 0; i < cMatchHistSize; i++)
1324
+ {
1325
+ uint hist_match_len = m_accel.get_match_len(lookahead_ofs, cur_state.m_match_hist[i], max_match_len);
1326
+ if (hist_match_len < min_match_len)
1327
+ continue;
1328
+
1329
+ if ( ((hist_match_len == 1) && (i == 0)) || (hist_match_len >= CLZBase::cMinMatchLen) )
1330
+ {
1331
+ match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
1332
+
1333
+ lzpriced_decision dec(ofs, hist_match_len, -((int)i + 1));
1334
+ dec.m_cost = cur_state.get_cost(*this, m_accel, dec);
1335
+
1336
+ if (!decisions.try_push_back(dec))
1337
+ return -1;
1338
+
1339
+ if ( (hist_match_len > largest_len) || ((hist_match_len == largest_len) && (dec.m_cost < largest_cost)) )
1340
+ {
1341
+ largest_index = decisions.size() - 1;
1342
+ largest_len = hist_match_len;
1343
+ largest_cost = dec.m_cost;
1344
+ }
1345
+ }
1346
+ }
1347
+
1348
+ // Now add full matches.
1349
+ if ((max_match_len >= CLZBase::cMinMatchLen) && (match_hist_max_len < m_settings.m_fast_bytes))
1350
+ {
1351
+ const dict_match* pMatches = m_accel.find_matches(lookahead_ofs);
1352
+
1353
+ if (pMatches)
1354
+ {
1355
+ for ( ; ; )
1356
+ {
1357
+ uint match_len = math::minimum(pMatches->get_len(), max_match_len);
1358
+ LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
1359
+
1360
+ // Full matches are very likely to be more expensive than rep matches of the same length, so don't bother evaluating them.
1361
+ if ((match_len >= min_match_len) && (match_len > match_hist_max_len))
1362
+ {
1363
+ if ((max_match_len > CLZBase::cMaxMatchLen) && (match_len == CLZBase::cMaxMatchLen))
1364
+ {
1365
+ match_len = m_accel.get_match_len(lookahead_ofs, pMatches->get_dist(), max_match_len, CLZBase::cMaxMatchLen);
1366
+ }
1367
+
1368
+ lzpriced_decision dec(ofs, match_len, pMatches->get_dist());
1369
+ dec.m_cost = cur_state.get_cost(*this, m_accel, dec);
1370
+
1371
+ if (!decisions.try_push_back(dec))
1372
+ return -1;
1373
+
1374
+ if ( (match_len > largest_len) || ((match_len == largest_len) && (dec.get_cost() < largest_cost)) )
1375
+ {
1376
+ largest_index = decisions.size() - 1;
1377
+ largest_len = match_len;
1378
+ largest_cost = dec.get_cost();
1379
+ }
1380
+ }
1381
+ if (pMatches->is_last())
1382
+ break;
1383
+ pMatches++;
1384
+ }
1385
+ }
1386
+ }
1387
+
1388
+ return largest_index;
1389
+ }
1390
+
1391
+ bool lzcompressor::greedy_parse(parse_thread_state &parse_state)
1392
+ {
1393
+ parse_state.m_failed = true;
1394
+ parse_state.m_emit_decisions_backwards = false;
1395
+
1396
+ const uint bytes_to_parse = parse_state.m_bytes_to_match;
1397
+
1398
+ const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
1399
+
1400
+ uint cur_dict_ofs = parse_state.m_start_ofs;
1401
+ uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
1402
+ uint cur_ofs = 0;
1403
+
1404
+ state &approx_state = parse_state.m_initial_state;
1405
+
1406
+ lzham::vector<lzpriced_decision> &decisions = parse_state.m_temp_decisions;
1407
+
1408
+ if (!decisions.try_reserve(384))
1409
+ return false;
1410
+
1411
+ if (!parse_state.m_best_decisions.try_resize(0))
1412
+ return false;
1413
+
1414
+ while (cur_ofs < bytes_to_parse)
1415
+ {
1416
+ const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxHugeMatchLen), bytes_to_parse - cur_ofs);
1417
+
1418
+ int largest_dec_index = enumerate_lz_decisions(cur_dict_ofs, approx_state, decisions, 1, max_admissable_match_len);
1419
+ if (largest_dec_index < 0)
1420
+ return false;
1421
+
1422
+ const lzpriced_decision &dec = decisions[largest_dec_index];
1423
+
1424
+ if (!parse_state.m_best_decisions.try_push_back(dec))
1425
+ return false;
1426
+
1427
+ approx_state.partial_advance(dec);
1428
+
1429
+ uint match_len = dec.get_len();
1430
+ LZHAM_ASSERT(match_len <= max_admissable_match_len);
1431
+ cur_dict_ofs += match_len;
1432
+ cur_lookahead_ofs += match_len;
1433
+ cur_ofs += match_len;
1434
+
1435
+ if (parse_state.m_best_decisions.size() >= parse_state.m_max_greedy_decisions)
1436
+ {
1437
+ parse_state.m_greedy_parse_total_bytes_coded = cur_ofs;
1438
+ parse_state.m_greedy_parse_gave_up = true;
1439
+ return false;
1440
+ }
1441
+ }
1442
+
1443
+ parse_state.m_greedy_parse_total_bytes_coded = cur_ofs;
1444
+
1445
+ LZHAM_ASSERT(cur_ofs == bytes_to_parse);
1446
+
1447
+ parse_state.m_failed = false;
1448
+
1449
+ return true;
1450
+ }
1451
+
1452
+ bool lzcompressor::compress_block(const void* pBuf, uint buf_len)
1453
+ {
1454
+ uint cur_ofs = 0;
1455
+ uint bytes_remaining = buf_len;
1456
+ while (bytes_remaining)
1457
+ {
1458
+ uint bytes_to_compress = math::minimum(m_accel.get_max_add_bytes(), bytes_remaining);
1459
+ if (!compress_block_internal(static_cast<const uint8*>(pBuf) + cur_ofs, bytes_to_compress))
1460
+ return false;
1461
+
1462
+ cur_ofs += bytes_to_compress;
1463
+ bytes_remaining -= bytes_to_compress;
1464
+ }
1465
+ return true;
1466
+ }
1467
+
1468
+ void lzcompressor::update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate)
1469
+ {
1470
+ block_history& cur_block_history = m_block_history[m_block_history_next];
1471
+ m_block_history_next++;
1472
+ m_block_history_next %= cMaxBlockHistorySize;
1473
+
1474
+ cur_block_history.m_comp_size = comp_size;
1475
+ cur_block_history.m_src_size = src_size;
1476
+ cur_block_history.m_ratio = ratio;
1477
+ cur_block_history.m_raw_block = raw_block;
1478
+ cur_block_history.m_reset_update_rate = reset_update_rate;
1479
+
1480
+ m_block_history_size = LZHAM_MIN(m_block_history_size + 1, static_cast<uint>(cMaxBlockHistorySize));
1481
+ }
1482
+
1483
+ uint lzcompressor::get_recent_block_ratio()
1484
+ {
1485
+ if (!m_block_history_size)
1486
+ return 0;
1487
+
1488
+ uint64 total_scaled_ratio = 0;
1489
+ for (uint i = 0; i < m_block_history_size; i++)
1490
+ total_scaled_ratio += m_block_history[i].m_ratio;
1491
+ total_scaled_ratio /= m_block_history_size;
1492
+
1493
+ return static_cast<uint>(total_scaled_ratio);
1494
+ }
1495
+
1496
+ uint lzcompressor::get_min_block_ratio()
1497
+ {
1498
+ if (!m_block_history_size)
1499
+ return 0;
1500
+ uint min_scaled_ratio = UINT_MAX;
1501
+ for (uint i = 0; i < m_block_history_size; i++)
1502
+ min_scaled_ratio = LZHAM_MIN(m_block_history[i].m_ratio, min_scaled_ratio);
1503
+ return min_scaled_ratio;
1504
+ }
1505
+
1506
+ uint lzcompressor::get_max_block_ratio()
1507
+ {
1508
+ if (!m_block_history_size)
1509
+ return 0;
1510
+ uint max_scaled_ratio = 0;
1511
+ for (uint i = 0; i < m_block_history_size; i++)
1512
+ max_scaled_ratio = LZHAM_MAX(m_block_history[i].m_ratio, max_scaled_ratio);
1513
+ return max_scaled_ratio;
1514
+ }
1515
+
1516
+ uint lzcompressor::get_total_recent_reset_update_rate()
1517
+ {
1518
+ uint total_resets = 0;
1519
+ for (uint i = 0; i < m_block_history_size; i++)
1520
+ total_resets += m_block_history[i].m_reset_update_rate;
1521
+ return total_resets;
1522
+ }
1523
+
1524
+ bool lzcompressor::compress_block_internal(const void* pBuf, uint buf_len)
1525
+ {
1526
+ scoped_perf_section compress_block_timer(cVarArgs, "****** compress_block %u", m_block_index);
1527
+
1528
+ LZHAM_ASSERT(pBuf);
1529
+ LZHAM_ASSERT(buf_len <= m_params.m_block_size);
1530
+
1531
+ LZHAM_ASSERT(m_src_size >= 0);
1532
+ if (m_src_size < 0)
1533
+ return false;
1534
+
1535
+ m_src_size += buf_len;
1536
+
1537
+ // Important: Don't do any expensive work until after add_bytes_begin() is called, to increase parallelism.
1538
+ if (!m_accel.add_bytes_begin(buf_len, static_cast<const uint8*>(pBuf)))
1539
+ return false;
1540
+
1541
+ m_start_of_block_state = m_state;
1542
+
1543
+ m_src_adler32 = adler32(pBuf, buf_len, m_src_adler32);
1544
+
1545
+ m_block_start_dict_ofs = m_accel.get_lookahead_pos() & (m_accel.get_max_dict_size() - 1);
1546
+
1547
+ uint cur_dict_ofs = m_block_start_dict_ofs;
1548
+
1549
+ uint bytes_to_match = buf_len;
1550
+
1551
+ if (!m_codec.start_encoding((buf_len * 9) / 8))
1552
+ return false;
1553
+
1554
+ if (!m_block_index)
1555
+ {
1556
+ if (!send_configuration())
1557
+ return false;
1558
+ }
1559
+
1560
+ #ifdef LZHAM_LZDEBUG
1561
+ m_codec.encode_bits(166, 12);
1562
+ #endif
1563
+
1564
+ if (!m_codec.encode_bits(cCompBlock, cBlockHeaderBits))
1565
+ return false;
1566
+
1567
+ if (!m_codec.encode_arith_init())
1568
+ return false;
1569
+
1570
+ m_state.start_of_block(m_accel, cur_dict_ofs, m_block_index);
1571
+
1572
+ bool emit_reset_update_rate_command = false;
1573
+
1574
+ // Determine if it makes sense to reset the Huffman table update frequency back to their initial (maximum) rates.
1575
+ if ((m_block_history_size) && (m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_TRADEOFF_DECOMPRESSION_RATE_FOR_COMP_RATIO))
1576
+ {
1577
+ const block_history& prev_block_history = m_block_history[m_block_history_next ? (m_block_history_next - 1) : (cMaxBlockHistorySize - 1)];
1578
+
1579
+ if (prev_block_history.m_raw_block)
1580
+ emit_reset_update_rate_command = true;
1581
+ else if (get_total_recent_reset_update_rate() == 0)
1582
+ {
1583
+ if (get_recent_block_ratio() > (cBlockHistoryCompRatioScale * 95U / 100U))
1584
+ emit_reset_update_rate_command = true;
1585
+ else
1586
+ {
1587
+ uint recent_min_block_ratio = get_min_block_ratio();
1588
+ //uint recent_max_block_ratio = get_max_block_ratio();
1589
+
1590
+ // Compression ratio has recently dropped quite a bit - slam the table update rates back up.
1591
+ if (prev_block_history.m_ratio > (recent_min_block_ratio * 3U) / 2U)
1592
+ {
1593
+ //printf("Emitting reset: %u %u\n", prev_block_history.m_ratio, recent_min_block_ratio);
1594
+ emit_reset_update_rate_command = true;
1595
+ }
1596
+ }
1597
+ }
1598
+ }
1599
+
1600
+ if (emit_reset_update_rate_command)
1601
+ m_state.reset_update_rate();
1602
+
1603
+ m_codec.encode_bits(emit_reset_update_rate_command ? 1 : 0, cBlockFlushTypeBits);
1604
+
1605
+ //coding_stats initial_stats(m_stats);
1606
+
1607
+ uint initial_step = m_step;
1608
+
1609
+ while (bytes_to_match)
1610
+ {
1611
+ const uint cAvgAcceptableGreedyMatchLen = 384;
1612
+ if ((m_params.m_pSeed_bytes) && (bytes_to_match >= cAvgAcceptableGreedyMatchLen))
1613
+ {
1614
+ parse_thread_state &greedy_parse_state = m_parse_thread_state[cMaxParseThreads];
1615
+
1616
+ greedy_parse_state.m_initial_state = m_state;
1617
+ greedy_parse_state.m_initial_state.m_cur_ofs = cur_dict_ofs;
1618
+
1619
+ greedy_parse_state.m_issue_reset_state_partial = false;
1620
+ greedy_parse_state.m_start_ofs = cur_dict_ofs;
1621
+ greedy_parse_state.m_bytes_to_match = LZHAM_MIN(bytes_to_match, static_cast<uint>(CLZBase::cMaxHugeMatchLen));
1622
+
1623
+ greedy_parse_state.m_max_greedy_decisions = LZHAM_MAX((bytes_to_match / cAvgAcceptableGreedyMatchLen), 2);
1624
+ greedy_parse_state.m_greedy_parse_gave_up = false;
1625
+ greedy_parse_state.m_greedy_parse_total_bytes_coded = 0;
1626
+
1627
+ if (!greedy_parse(greedy_parse_state))
1628
+ {
1629
+ if (!greedy_parse_state.m_greedy_parse_gave_up)
1630
+ return false;
1631
+ }
1632
+
1633
+ uint num_greedy_decisions_to_code = 0;
1634
+
1635
+ const lzham::vector<lzdecision> &best_decisions = greedy_parse_state.m_best_decisions;
1636
+
1637
+ if (!greedy_parse_state.m_greedy_parse_gave_up)
1638
+ num_greedy_decisions_to_code = best_decisions.size();
1639
+ else
1640
+ {
1641
+ uint num_small_decisions = 0;
1642
+ uint total_match_len = 0;
1643
+ uint max_match_len = 0;
1644
+
1645
+ uint i;
1646
+ for (i = 0; i < best_decisions.size(); i++)
1647
+ {
1648
+ const lzdecision &dec = best_decisions[i];
1649
+ if (dec.get_len() <= CLZBase::cMaxMatchLen)
1650
+ {
1651
+ num_small_decisions++;
1652
+ if (num_small_decisions > 16)
1653
+ break;
1654
+ }
1655
+
1656
+ total_match_len += dec.get_len();
1657
+ max_match_len = LZHAM_MAX(max_match_len, dec.get_len());
1658
+ }
1659
+
1660
+ if (max_match_len > CLZBase::cMaxMatchLen)
1661
+ {
1662
+ if ((total_match_len / i) >= cAvgAcceptableGreedyMatchLen)
1663
+ {
1664
+ num_greedy_decisions_to_code = i;
1665
+ }
1666
+ }
1667
+ }
1668
+
1669
+ if (num_greedy_decisions_to_code)
1670
+ {
1671
+ for (uint i = 0; i < num_greedy_decisions_to_code; i++)
1672
+ {
1673
+ LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs);
1674
+ //LZHAM_ASSERT(i >= 0);
1675
+ LZHAM_ASSERT(i < best_decisions.size());
1676
+
1677
+ #if LZHAM_UPDATE_STATS
1678
+ bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]);
1679
+ m_stats.update(best_decisions[i], m_state, m_accel, cost);
1680
+ #endif
1681
+
1682
+ if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match))
1683
+ return false;
1684
+ }
1685
+
1686
+ if ((!greedy_parse_state.m_greedy_parse_gave_up) || (!bytes_to_match))
1687
+ continue;
1688
+ }
1689
+ }
1690
+
1691
+ uint num_parse_jobs = LZHAM_MIN(m_num_parse_threads, (bytes_to_match + cMaxParseGraphNodes - 1) / cMaxParseGraphNodes);
1692
+ if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_DETERMINISTIC_PARSING) == 0)
1693
+ {
1694
+ if (m_use_task_pool && m_accel.get_max_helper_threads())
1695
+ {
1696
+ // Increase the number of active parse jobs as the match finder finishes up to keep CPU utilization up.
1697
+ num_parse_jobs += m_accel.get_num_completed_helper_threads();
1698
+ num_parse_jobs = LZHAM_MIN(num_parse_jobs, cMaxParseThreads);
1699
+ }
1700
+ }
1701
+ if (bytes_to_match < 1536)
1702
+ num_parse_jobs = 1;
1703
+
1704
+ // Reduce block size near the beginning of the file so statistical models get going a bit faster.
1705
+ bool force_small_block = false;
1706
+ if ((!m_block_index) && ((cur_dict_ofs - m_block_start_dict_ofs) < cMaxParseGraphNodes))
1707
+ {
1708
+ num_parse_jobs = 1;
1709
+ force_small_block = true;
1710
+ }
1711
+
1712
+ uint parse_thread_start_ofs = cur_dict_ofs;
1713
+ uint parse_thread_total_size = LZHAM_MIN(bytes_to_match, cMaxParseGraphNodes * num_parse_jobs);
1714
+ if (force_small_block)
1715
+ {
1716
+ parse_thread_total_size = LZHAM_MIN(parse_thread_total_size, 1536);
1717
+ }
1718
+
1719
+ uint parse_thread_remaining = parse_thread_total_size;
1720
+ for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
1721
+ {
1722
+ parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index];
1723
+
1724
+ parse_thread.m_initial_state = m_state;
1725
+ parse_thread.m_initial_state.m_cur_ofs = parse_thread_start_ofs;
1726
+
1727
+ if (parse_thread_index > 0)
1728
+ {
1729
+ parse_thread.m_initial_state.reset_state_partial();
1730
+ parse_thread.m_issue_reset_state_partial = true;
1731
+ }
1732
+ else
1733
+ {
1734
+ parse_thread.m_issue_reset_state_partial = false;
1735
+ }
1736
+
1737
+ parse_thread.m_start_ofs = parse_thread_start_ofs;
1738
+ if (parse_thread_index == (num_parse_jobs - 1))
1739
+ parse_thread.m_bytes_to_match = parse_thread_remaining;
1740
+ else
1741
+ parse_thread.m_bytes_to_match = parse_thread_total_size / num_parse_jobs;
1742
+
1743
+ parse_thread.m_bytes_to_match = LZHAM_MIN(parse_thread.m_bytes_to_match, cMaxParseGraphNodes);
1744
+ LZHAM_ASSERT(parse_thread.m_bytes_to_match > 0);
1745
+
1746
+ parse_thread.m_max_greedy_decisions = UINT_MAX;
1747
+ parse_thread.m_greedy_parse_gave_up = false;
1748
+
1749
+ parse_thread_start_ofs += parse_thread.m_bytes_to_match;
1750
+ parse_thread_remaining -= parse_thread.m_bytes_to_match;
1751
+ }
1752
+
1753
+ {
1754
+ scoped_perf_section parse_timer("parsing");
1755
+
1756
+ if ((m_use_task_pool) && (num_parse_jobs > 1))
1757
+ {
1758
+ m_parse_jobs_remaining = num_parse_jobs;
1759
+
1760
+ {
1761
+ scoped_perf_section queue_task_timer("queuing parse tasks");
1762
+
1763
+ if (!m_params.m_pTask_pool->queue_multiple_object_tasks(this, &lzcompressor::parse_job_callback, 1, num_parse_jobs - 1))
1764
+ return false;
1765
+ }
1766
+
1767
+ parse_job_callback(0, NULL);
1768
+
1769
+ {
1770
+ scoped_perf_section wait_timer("waiting for jobs");
1771
+
1772
+ m_parse_jobs_complete.wait();
1773
+ }
1774
+ }
1775
+ else
1776
+ {
1777
+ m_parse_jobs_remaining = INT_MAX;
1778
+ for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
1779
+ {
1780
+ parse_job_callback(parse_thread_index, NULL);
1781
+ }
1782
+ }
1783
+ }
1784
+
1785
+ {
1786
+ scoped_perf_section coding_timer("coding");
1787
+
1788
+ for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
1789
+ {
1790
+ parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index];
1791
+ if (parse_thread.m_failed)
1792
+ return false;
1793
+
1794
+ const lzham::vector<lzdecision> &best_decisions = parse_thread.m_best_decisions;
1795
+
1796
+ if (parse_thread.m_issue_reset_state_partial)
1797
+ {
1798
+ if (!m_state.encode_reset_state_partial(m_codec, m_accel, cur_dict_ofs))
1799
+ return false;
1800
+ m_step++;
1801
+ }
1802
+
1803
+ if (best_decisions.size())
1804
+ {
1805
+ int i = 0;
1806
+ int end_dec_index = static_cast<int>(best_decisions.size()) - 1;
1807
+ int dec_step = 1;
1808
+ if (parse_thread.m_emit_decisions_backwards)
1809
+ {
1810
+ i = static_cast<int>(best_decisions.size()) - 1;
1811
+ end_dec_index = 0;
1812
+ dec_step = -1;
1813
+ LZHAM_ASSERT(best_decisions.back().m_pos == (int)parse_thread.m_start_ofs);
1814
+ }
1815
+ else
1816
+ {
1817
+ LZHAM_ASSERT(best_decisions.front().m_pos == (int)parse_thread.m_start_ofs);
1818
+ }
1819
+
1820
+ // Loop rearranged to avoid bad x64 codegen problem with MSVC2008.
1821
+ for ( ; ; )
1822
+ {
1823
+ LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs);
1824
+ LZHAM_ASSERT(i >= 0);
1825
+ LZHAM_ASSERT(i < (int)best_decisions.size());
1826
+
1827
+ #if LZHAM_UPDATE_STATS
1828
+ bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]);
1829
+ m_stats.update(best_decisions[i], m_state, m_accel, cost);
1830
+ //m_state.print(m_codec, *this, m_accel, best_decisions[i]);
1831
+ #endif
1832
+
1833
+ if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match))
1834
+ return false;
1835
+ if (i == end_dec_index)
1836
+ break;
1837
+ i += dec_step;
1838
+ }
1839
+
1840
+ LZHAM_NOTE_UNUSED(i);
1841
+ }
1842
+
1843
+ LZHAM_ASSERT(cur_dict_ofs == parse_thread.m_start_ofs + parse_thread.m_bytes_to_match);
1844
+
1845
+ } // parse_thread_index
1846
+
1847
+ }
1848
+ }
1849
+
1850
+ {
1851
+ scoped_perf_section add_bytes_timer("add_bytes_end");
1852
+ m_accel.add_bytes_end();
1853
+ }
1854
+
1855
+ if (!m_state.encode_eob(m_codec, m_accel, cur_dict_ofs))
1856
+ return false;
1857
+
1858
+ #ifdef LZHAM_LZDEBUG
1859
+ if (!m_codec.encode_bits(366, 12)) return false;
1860
+ #endif
1861
+
1862
+ {
1863
+ scoped_perf_section stop_encoding_timer("stop_encoding");
1864
+ if (!m_codec.stop_encoding(true)) return false;
1865
+ }
1866
+
1867
+ // Coded the entire block - now see if it makes more sense to just send a raw/uncompressed block.
1868
+
1869
+ uint compressed_size = m_codec.get_encoding_buf().size();
1870
+ LZHAM_NOTE_UNUSED(compressed_size);
1871
+
1872
+ bool used_raw_block = false;
1873
+
1874
+ #if !LZHAM_FORCE_ALL_RAW_BLOCKS
1875
+ #if (defined(LZHAM_DISABLE_RAW_BLOCKS) || defined(LZHAM_LZDEBUG))
1876
+ if (0)
1877
+ #else
1878
+ // TODO: Allow the user to control this threshold, i.e. if less than 1% then just store uncompressed.
1879
+ if (compressed_size >= buf_len)
1880
+ #endif
1881
+ #endif
1882
+ {
1883
+ // Failed to compress the block, so go back to our original state and just code a raw block.
1884
+ m_state = m_start_of_block_state;
1885
+ m_step = initial_step;
1886
+ //m_stats = initial_stats;
1887
+
1888
+ m_codec.reset();
1889
+
1890
+ if (!m_codec.start_encoding(buf_len + 16))
1891
+ return false;
1892
+
1893
+ if (!m_block_index)
1894
+ {
1895
+ if (!send_configuration())
1896
+ return false;
1897
+ }
1898
+
1899
+ #ifdef LZHAM_LZDEBUG
1900
+ if (!m_codec.encode_bits(166, 12))
1901
+ return false;
1902
+ #endif
1903
+
1904
+ if (!m_codec.encode_bits(cRawBlock, cBlockHeaderBits))
1905
+ return false;
1906
+
1907
+ LZHAM_ASSERT(buf_len <= 0x1000000);
1908
+ if (!m_codec.encode_bits(buf_len - 1, 24))
1909
+ return false;
1910
+
1911
+ // Write buf len check bits, to help increase the probability of detecting corrupted data more early.
1912
+ uint buf_len0 = (buf_len - 1) & 0xFF;
1913
+ uint buf_len1 = ((buf_len - 1) >> 8) & 0xFF;
1914
+ uint buf_len2 = ((buf_len - 1) >> 16) & 0xFF;
1915
+ if (!m_codec.encode_bits((buf_len0 ^ buf_len1) ^ buf_len2, 8))
1916
+ return false;
1917
+
1918
+ if (!m_codec.encode_align_to_byte())
1919
+ return false;
1920
+
1921
+ const uint8* pSrc = m_accel.get_ptr(m_block_start_dict_ofs);
1922
+
1923
+ for (uint i = 0; i < buf_len; i++)
1924
+ {
1925
+ if (!m_codec.encode_bits(*pSrc++, 8))
1926
+ return false;
1927
+ }
1928
+
1929
+ if (!m_codec.stop_encoding(true))
1930
+ return false;
1931
+
1932
+ used_raw_block = true;
1933
+ emit_reset_update_rate_command = false;
1934
+ }
1935
+
1936
+ uint comp_size = m_codec.get_encoding_buf().size();
1937
+ uint scaled_ratio = (comp_size * cBlockHistoryCompRatioScale) / buf_len;
1938
+ update_block_history(comp_size, buf_len, scaled_ratio, used_raw_block, emit_reset_update_rate_command);
1939
+
1940
+ //printf("\n%u, %u, %u, %u\n", m_block_index, 500*emit_reset_update_rate_command, scaled_ratio, get_recent_block_ratio());
1941
+
1942
+ {
1943
+ scoped_perf_section append_timer("append");
1944
+
1945
+ if (m_comp_buf.empty())
1946
+ {
1947
+ m_comp_buf.swap(m_codec.get_encoding_buf());
1948
+ }
1949
+ else
1950
+ {
1951
+ if (!m_comp_buf.append(m_codec.get_encoding_buf()))
1952
+ return false;
1953
+ }
1954
+ }
1955
+ #if LZHAM_UPDATE_STATS
1956
+ LZHAM_VERIFY(m_stats.m_total_bytes == m_src_size);
1957
+ if (emit_reset_update_rate_command)
1958
+ m_stats.m_total_update_rate_resets++;
1959
+ #endif
1960
+
1961
+ m_block_index++;
1962
+
1963
+ return true;
1964
+ }
1965
+
1966
+ } // namespace lzham