extlzham 0.0.1.PROTOTYPE3-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.md +27 -0
- data/README.md +74 -0
- data/Rakefile +152 -0
- data/contrib/lzham/LICENSE +22 -0
- data/contrib/lzham/README.md +209 -0
- data/contrib/lzham/include/lzham.h +781 -0
- data/contrib/lzham/lzhamcomp/lzham_comp.h +38 -0
- data/contrib/lzham/lzhamcomp/lzham_lzbase.cpp +244 -0
- data/contrib/lzham/lzhamcomp/lzham_lzbase.h +45 -0
- data/contrib/lzham/lzhamcomp/lzham_lzcomp.cpp +608 -0
- data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.cpp +1966 -0
- data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.h +472 -0
- data/contrib/lzham/lzhamcomp/lzham_lzcomp_state.cpp +1413 -0
- data/contrib/lzham/lzhamcomp/lzham_match_accel.cpp +562 -0
- data/contrib/lzham/lzhamcomp/lzham_match_accel.h +146 -0
- data/contrib/lzham/lzhamcomp/lzham_null_threading.h +97 -0
- data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.cpp +229 -0
- data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.h +520 -0
- data/contrib/lzham/lzhamcomp/lzham_threading.h +12 -0
- data/contrib/lzham/lzhamcomp/lzham_win32_threading.cpp +220 -0
- data/contrib/lzham/lzhamcomp/lzham_win32_threading.h +368 -0
- data/contrib/lzham/lzhamdecomp/lzham_assert.cpp +66 -0
- data/contrib/lzham/lzhamdecomp/lzham_assert.h +40 -0
- data/contrib/lzham/lzhamdecomp/lzham_checksum.cpp +73 -0
- data/contrib/lzham/lzhamdecomp/lzham_checksum.h +13 -0
- data/contrib/lzham/lzhamdecomp/lzham_config.h +23 -0
- data/contrib/lzham/lzhamdecomp/lzham_core.h +264 -0
- data/contrib/lzham/lzhamdecomp/lzham_decomp.h +37 -0
- data/contrib/lzham/lzhamdecomp/lzham_helpers.h +54 -0
- data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.cpp +262 -0
- data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.h +14 -0
- data/contrib/lzham/lzhamdecomp/lzham_lzdecomp.cpp +1527 -0
- data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.cpp +131 -0
- data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.h +89 -0
- data/contrib/lzham/lzhamdecomp/lzham_math.h +142 -0
- data/contrib/lzham/lzhamdecomp/lzham_mem.cpp +284 -0
- data/contrib/lzham/lzhamdecomp/lzham_mem.h +112 -0
- data/contrib/lzham/lzhamdecomp/lzham_platform.cpp +157 -0
- data/contrib/lzham/lzhamdecomp/lzham_platform.h +284 -0
- data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.cpp +351 -0
- data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.h +146 -0
- data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.cpp +1484 -0
- data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.h +556 -0
- data/contrib/lzham/lzhamdecomp/lzham_timer.cpp +147 -0
- data/contrib/lzham/lzhamdecomp/lzham_timer.h +99 -0
- data/contrib/lzham/lzhamdecomp/lzham_traits.h +141 -0
- data/contrib/lzham/lzhamdecomp/lzham_types.h +97 -0
- data/contrib/lzham/lzhamdecomp/lzham_utils.h +58 -0
- data/contrib/lzham/lzhamdecomp/lzham_vector.cpp +75 -0
- data/contrib/lzham/lzhamdecomp/lzham_vector.h +588 -0
- data/contrib/lzham/lzhamlib/lzham_lib.cpp +179 -0
- data/examples/basic.rb +48 -0
- data/ext/constants.c +64 -0
- data/ext/decoder.c +313 -0
- data/ext/depend +5 -0
- data/ext/encoder.c +372 -0
- data/ext/error.c +80 -0
- data/ext/extconf.rb +29 -0
- data/ext/extlzham.c +34 -0
- data/ext/extlzham.h +62 -0
- data/gemstub.rb +22 -0
- data/lib/2.0/extlzham.so +0 -0
- data/lib/2.1/extlzham.so +0 -0
- data/lib/2.2/extlzham.so +0 -0
- data/lib/extlzham.rb +158 -0
- data/lib/extlzham/version.rb +5 -0
- data/test/test_extlzham.rb +35 -0
- metadata +156 -0
@@ -0,0 +1,1966 @@
|
|
1
|
+
// File: lzham_lzcomp_internal.cpp
|
2
|
+
// See Copyright Notice and license at the end of include/lzham.h
|
3
|
+
#include "lzham_core.h"
|
4
|
+
#include "lzham_lzcomp_internal.h"
|
5
|
+
#include "lzham_checksum.h"
|
6
|
+
#include "lzham_timer.h"
|
7
|
+
#include "lzham_lzbase.h"
|
8
|
+
#include <string.h>
|
9
|
+
|
10
|
+
// Update and print high-level coding statistics if set to 1.
|
11
|
+
// TODO: Add match distance coding statistics.
|
12
|
+
#define LZHAM_UPDATE_STATS 0
|
13
|
+
|
14
|
+
// Only parse on the main thread, for easier debugging.
|
15
|
+
#define LZHAM_FORCE_SINGLE_THREADED_PARSING 0
|
16
|
+
|
17
|
+
// Verify all computed match costs against the generic/slow state::get_cost() method.
|
18
|
+
#define LZHAM_VERIFY_MATCH_COSTS 0
|
19
|
+
|
20
|
+
// Set to 1 to force all blocks to be uncompressed (raw).
|
21
|
+
#define LZHAM_FORCE_ALL_RAW_BLOCKS 0
|
22
|
+
|
23
|
+
namespace lzham
|
24
|
+
{
|
25
|
+
static comp_settings s_level_settings[cCompressionLevelCount] =
|
26
|
+
{
|
27
|
+
// cCompressionLevelFastest
|
28
|
+
{
|
29
|
+
8, // m_fast_bytes
|
30
|
+
true, // m_fast_adaptive_huffman_updating
|
31
|
+
1, // m_match_accel_max_matches_per_probe
|
32
|
+
2, // m_match_accel_max_probes
|
33
|
+
},
|
34
|
+
// cCompressionLevelFaster
|
35
|
+
{
|
36
|
+
24, // m_fast_bytes
|
37
|
+
true, // m_fast_adaptive_huffman_updating
|
38
|
+
6, // m_match_accel_max_matches_per_probe
|
39
|
+
12, // m_match_accel_max_probes
|
40
|
+
},
|
41
|
+
// cCompressionLevelDefault
|
42
|
+
{
|
43
|
+
32, // m_fast_bytes
|
44
|
+
false, // m_fast_adaptive_huffman_updating
|
45
|
+
UINT_MAX, // m_match_accel_max_matches_per_probe
|
46
|
+
16, // m_match_accel_max_probes
|
47
|
+
},
|
48
|
+
// cCompressionLevelBetter
|
49
|
+
{
|
50
|
+
48, // m_fast_bytes
|
51
|
+
false, // m_fast_adaptive_huffman_updating
|
52
|
+
UINT_MAX, // m_match_accel_max_matches_per_probe
|
53
|
+
32, // m_match_accel_max_probes
|
54
|
+
},
|
55
|
+
// cCompressionLevelUber
|
56
|
+
{
|
57
|
+
64, // m_fast_bytes
|
58
|
+
false, // m_fast_adaptive_huffman_updating
|
59
|
+
UINT_MAX, // m_match_accel_max_matches_per_probe
|
60
|
+
cMatchAccelMaxSupportedProbes, // m_match_accel_max_probes
|
61
|
+
}
|
62
|
+
};
|
63
|
+
|
64
|
+
lzcompressor::lzcompressor() :
|
65
|
+
m_src_size(-1),
|
66
|
+
m_src_adler32(0),
|
67
|
+
m_step(0),
|
68
|
+
m_block_start_dict_ofs(0),
|
69
|
+
m_block_index(0),
|
70
|
+
m_finished(false),
|
71
|
+
m_num_parse_threads(0),
|
72
|
+
m_parse_jobs_remaining(0),
|
73
|
+
m_parse_jobs_complete(0, 1),
|
74
|
+
m_block_history_size(0),
|
75
|
+
m_block_history_next(0)
|
76
|
+
{
|
77
|
+
LZHAM_VERIFY( ((uint32_ptr)this & (LZHAM_GET_ALIGNMENT(lzcompressor) - 1)) == 0);
|
78
|
+
}
|
79
|
+
|
80
|
+
bool lzcompressor::init_seed_bytes()
|
81
|
+
{
|
82
|
+
uint cur_seed_ofs = 0;
|
83
|
+
|
84
|
+
while (cur_seed_ofs < m_params.m_num_seed_bytes)
|
85
|
+
{
|
86
|
+
uint total_bytes_remaining = m_params.m_num_seed_bytes - cur_seed_ofs;
|
87
|
+
uint num_bytes_to_add = math::minimum(total_bytes_remaining, m_params.m_block_size);
|
88
|
+
|
89
|
+
if (!m_accel.add_bytes_begin(num_bytes_to_add, static_cast<const uint8*>(m_params.m_pSeed_bytes) + cur_seed_ofs))
|
90
|
+
return false;
|
91
|
+
m_accel.add_bytes_end();
|
92
|
+
|
93
|
+
m_accel.advance_bytes(num_bytes_to_add);
|
94
|
+
|
95
|
+
cur_seed_ofs += num_bytes_to_add;
|
96
|
+
}
|
97
|
+
|
98
|
+
return true;
|
99
|
+
}
|
100
|
+
|
101
|
+
bool lzcompressor::init(const init_params& params)
|
102
|
+
{
|
103
|
+
clear();
|
104
|
+
|
105
|
+
if ((params.m_dict_size_log2 < CLZBase::cMinDictSizeLog2) || (params.m_dict_size_log2 > CLZBase::cMaxDictSizeLog2))
|
106
|
+
return false;
|
107
|
+
if ((params.m_compression_level < 0) || (params.m_compression_level > cCompressionLevelCount))
|
108
|
+
return false;
|
109
|
+
|
110
|
+
m_params = params;
|
111
|
+
m_use_task_pool = (m_params.m_pTask_pool) && (m_params.m_pTask_pool->get_num_threads() != 0) && (m_params.m_max_helper_threads > 0);
|
112
|
+
|
113
|
+
if (!m_use_task_pool)
|
114
|
+
m_params.m_max_helper_threads = 0;
|
115
|
+
|
116
|
+
m_settings = s_level_settings[params.m_compression_level];
|
117
|
+
|
118
|
+
const uint dict_size = 1U << m_params.m_dict_size_log2;
|
119
|
+
|
120
|
+
if (params.m_num_seed_bytes)
|
121
|
+
{
|
122
|
+
if (!params.m_pSeed_bytes)
|
123
|
+
return false;
|
124
|
+
if (params.m_num_seed_bytes > dict_size)
|
125
|
+
return false;
|
126
|
+
}
|
127
|
+
|
128
|
+
uint max_block_size = dict_size / 8;
|
129
|
+
if (m_params.m_block_size > max_block_size)
|
130
|
+
{
|
131
|
+
m_params.m_block_size = max_block_size;
|
132
|
+
}
|
133
|
+
|
134
|
+
m_num_parse_threads = 1;
|
135
|
+
|
136
|
+
#if !LZHAM_FORCE_SINGLE_THREADED_PARSING
|
137
|
+
if (m_params.m_max_helper_threads > 0)
|
138
|
+
{
|
139
|
+
LZHAM_ASSUME(cMaxParseThreads >= 4);
|
140
|
+
|
141
|
+
if (m_params.m_block_size < 16384)
|
142
|
+
{
|
143
|
+
m_num_parse_threads = LZHAM_MIN(cMaxParseThreads, m_params.m_max_helper_threads + 1);
|
144
|
+
}
|
145
|
+
else
|
146
|
+
{
|
147
|
+
if ((m_params.m_max_helper_threads == 1) || (m_params.m_compression_level == cCompressionLevelFastest))
|
148
|
+
{
|
149
|
+
m_num_parse_threads = 1;
|
150
|
+
}
|
151
|
+
else if (m_params.m_max_helper_threads <= 3)
|
152
|
+
{
|
153
|
+
m_num_parse_threads = 2;
|
154
|
+
}
|
155
|
+
else if (m_params.m_max_helper_threads <= 7)
|
156
|
+
{
|
157
|
+
if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber))
|
158
|
+
m_num_parse_threads = 4;
|
159
|
+
else
|
160
|
+
m_num_parse_threads = 2;
|
161
|
+
}
|
162
|
+
else
|
163
|
+
{
|
164
|
+
// 8-16
|
165
|
+
m_num_parse_threads = 4;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
169
|
+
#endif
|
170
|
+
|
171
|
+
int num_parse_jobs = m_num_parse_threads - 1;
|
172
|
+
uint match_accel_helper_threads = LZHAM_MAX(0, (int)m_params.m_max_helper_threads - num_parse_jobs);
|
173
|
+
|
174
|
+
LZHAM_ASSERT(m_num_parse_threads >= 1);
|
175
|
+
LZHAM_ASSERT(m_num_parse_threads <= cMaxParseThreads);
|
176
|
+
|
177
|
+
if (!m_use_task_pool)
|
178
|
+
{
|
179
|
+
LZHAM_ASSERT(!match_accel_helper_threads && (m_num_parse_threads == 1));
|
180
|
+
}
|
181
|
+
else
|
182
|
+
{
|
183
|
+
LZHAM_ASSERT((match_accel_helper_threads + (m_num_parse_threads - 1)) <= m_params.m_max_helper_threads);
|
184
|
+
}
|
185
|
+
|
186
|
+
if (!m_accel.init(this, params.m_pTask_pool, match_accel_helper_threads, dict_size, m_settings.m_match_accel_max_matches_per_probe, false, m_settings.m_match_accel_max_probes))
|
187
|
+
return false;
|
188
|
+
|
189
|
+
init_position_slots(params.m_dict_size_log2);
|
190
|
+
init_slot_tabs();
|
191
|
+
|
192
|
+
//m_settings.m_fast_adaptive_huffman_updating
|
193
|
+
if (!m_state.init(*this, m_params.m_table_max_update_interval, m_params.m_table_update_interval_slow_rate))
|
194
|
+
return false;
|
195
|
+
|
196
|
+
if (!m_block_buf.try_reserve(m_params.m_block_size))
|
197
|
+
return false;
|
198
|
+
|
199
|
+
if (!m_comp_buf.try_reserve(m_params.m_block_size*2))
|
200
|
+
return false;
|
201
|
+
|
202
|
+
for (uint i = 0; i < m_num_parse_threads; i++)
|
203
|
+
{
|
204
|
+
//m_settings.m_fast_adaptive_huffman_updating
|
205
|
+
if (!m_parse_thread_state[i].m_initial_state.init(*this, m_params.m_table_max_update_interval, m_params.m_table_update_interval_slow_rate))
|
206
|
+
return false;
|
207
|
+
}
|
208
|
+
|
209
|
+
m_block_history_size = 0;
|
210
|
+
m_block_history_next = 0;
|
211
|
+
|
212
|
+
if (params.m_num_seed_bytes)
|
213
|
+
{
|
214
|
+
if (!init_seed_bytes())
|
215
|
+
return false;
|
216
|
+
}
|
217
|
+
|
218
|
+
if (!send_zlib_header())
|
219
|
+
return false;
|
220
|
+
|
221
|
+
m_src_size = 0;
|
222
|
+
|
223
|
+
return true;
|
224
|
+
}
|
225
|
+
|
226
|
+
// See http://www.gzip.org/zlib/rfc-zlib.html
|
227
|
+
// Method is set to 14 (LZHAM) and CINFO is (window_size - 15).
|
228
|
+
bool lzcompressor::send_zlib_header()
|
229
|
+
{
|
230
|
+
if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_WRITE_ZLIB_STREAM) == 0)
|
231
|
+
return true;
|
232
|
+
|
233
|
+
// set CM (method) and CINFO (dictionary size) fields
|
234
|
+
int cmf = LZHAM_Z_LZHAM | ((m_params.m_dict_size_log2 - 15) << 4);
|
235
|
+
|
236
|
+
// set FLEVEL by mapping LZHAM's compression level to zlib's
|
237
|
+
int flg = 0;
|
238
|
+
switch (m_params.m_compression_level)
|
239
|
+
{
|
240
|
+
case LZHAM_COMP_LEVEL_FASTEST:
|
241
|
+
{
|
242
|
+
flg = 0 << 6;
|
243
|
+
break;
|
244
|
+
}
|
245
|
+
case LZHAM_COMP_LEVEL_FASTER:
|
246
|
+
{
|
247
|
+
flg = 1 << 6;
|
248
|
+
break;
|
249
|
+
}
|
250
|
+
case LZHAM_COMP_LEVEL_DEFAULT:
|
251
|
+
case LZHAM_COMP_LEVEL_BETTER:
|
252
|
+
{
|
253
|
+
flg = 2 << 6;
|
254
|
+
break;
|
255
|
+
}
|
256
|
+
default:
|
257
|
+
{
|
258
|
+
flg = 3 << 6;
|
259
|
+
break;
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
// set FDICT flag
|
264
|
+
if (m_params.m_pSeed_bytes)
|
265
|
+
flg |= 32;
|
266
|
+
|
267
|
+
int check = ((cmf << 8) + flg) % 31;
|
268
|
+
if (check)
|
269
|
+
flg += (31 - check);
|
270
|
+
|
271
|
+
LZHAM_ASSERT(0 == (((cmf << 8) + flg) % 31));
|
272
|
+
if (!m_comp_buf.try_push_back(static_cast<uint8>(cmf)))
|
273
|
+
return false;
|
274
|
+
if (!m_comp_buf.try_push_back(static_cast<uint8>(flg)))
|
275
|
+
return false;
|
276
|
+
|
277
|
+
if (m_params.m_pSeed_bytes)
|
278
|
+
{
|
279
|
+
// send adler32 of DICT
|
280
|
+
uint dict_adler32 = adler32(m_params.m_pSeed_bytes, m_params.m_num_seed_bytes);
|
281
|
+
for (uint i = 0; i < 4; i++)
|
282
|
+
{
|
283
|
+
if (!m_comp_buf.try_push_back(static_cast<uint8>(dict_adler32 >> 24)))
|
284
|
+
return false;
|
285
|
+
dict_adler32 <<= 8;
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
return true;
|
290
|
+
}
|
291
|
+
|
292
|
+
void lzcompressor::clear()
|
293
|
+
{
|
294
|
+
m_codec.clear();
|
295
|
+
m_src_size = -1;
|
296
|
+
m_src_adler32 = cInitAdler32;
|
297
|
+
m_block_buf.clear();
|
298
|
+
m_comp_buf.clear();
|
299
|
+
|
300
|
+
m_step = 0;
|
301
|
+
m_finished = false;
|
302
|
+
m_use_task_pool = false;
|
303
|
+
m_block_start_dict_ofs = 0;
|
304
|
+
m_block_index = 0;
|
305
|
+
m_state.clear();
|
306
|
+
m_num_parse_threads = 0;
|
307
|
+
m_parse_jobs_remaining = 0;
|
308
|
+
|
309
|
+
for (uint i = 0; i < cMaxParseThreads; i++)
|
310
|
+
{
|
311
|
+
parse_thread_state &parse_state = m_parse_thread_state[i];
|
312
|
+
parse_state.m_initial_state.clear();
|
313
|
+
|
314
|
+
for (uint j = 0; j <= cMaxParseGraphNodes; j++)
|
315
|
+
parse_state.m_nodes[j].clear();
|
316
|
+
|
317
|
+
parse_state.m_start_ofs = 0;
|
318
|
+
parse_state.m_bytes_to_match = 0;
|
319
|
+
parse_state.m_best_decisions.clear();
|
320
|
+
parse_state.m_issue_reset_state_partial = false;
|
321
|
+
parse_state.m_emit_decisions_backwards = false;
|
322
|
+
parse_state.m_failed = false;
|
323
|
+
}
|
324
|
+
|
325
|
+
m_block_history_size = 0;
|
326
|
+
m_block_history_next = 0;
|
327
|
+
}
|
328
|
+
|
329
|
+
bool lzcompressor::reset()
|
330
|
+
{
|
331
|
+
if (m_src_size < 0)
|
332
|
+
return false;
|
333
|
+
|
334
|
+
m_accel.reset();
|
335
|
+
m_codec.reset();
|
336
|
+
m_stats.clear();
|
337
|
+
m_src_size = 0;
|
338
|
+
m_src_adler32 = cInitAdler32;
|
339
|
+
m_block_buf.try_resize(0);
|
340
|
+
m_comp_buf.try_resize(0);
|
341
|
+
|
342
|
+
m_step = 0;
|
343
|
+
m_finished = false;
|
344
|
+
m_block_start_dict_ofs = 0;
|
345
|
+
m_block_index = 0;
|
346
|
+
m_state.reset();
|
347
|
+
|
348
|
+
m_block_history_size = 0;
|
349
|
+
m_block_history_next = 0;
|
350
|
+
|
351
|
+
if (m_params.m_num_seed_bytes)
|
352
|
+
{
|
353
|
+
if (!init_seed_bytes())
|
354
|
+
return false;
|
355
|
+
}
|
356
|
+
|
357
|
+
return send_zlib_header();
|
358
|
+
}
|
359
|
+
|
360
|
+
bool lzcompressor::code_decision(lzdecision lzdec, uint& cur_ofs, uint& bytes_to_match)
|
361
|
+
{
|
362
|
+
#ifdef LZHAM_LZDEBUG
|
363
|
+
if (!m_codec.encode_bits(CLZBase::cLZHAMDebugSyncMarkerValue, CLZBase::cLZHAMDebugSyncMarkerBits)) return false;
|
364
|
+
if (!m_codec.encode_bits(lzdec.is_match(), 1)) return false;
|
365
|
+
if (!m_codec.encode_bits(lzdec.get_len(), 17)) return false;
|
366
|
+
if (!m_codec.encode_bits(m_state.m_cur_state, 4)) return false;
|
367
|
+
#endif
|
368
|
+
|
369
|
+
#ifdef LZHAM_LZVERIFY
|
370
|
+
if (lzdec.is_match())
|
371
|
+
{
|
372
|
+
uint match_dist = lzdec.get_match_dist(m_state);
|
373
|
+
|
374
|
+
LZHAM_VERIFY(m_accel[cur_ofs] == m_accel[(cur_ofs - match_dist) & (m_accel.get_max_dict_size() - 1)]);
|
375
|
+
}
|
376
|
+
#endif
|
377
|
+
|
378
|
+
const uint len = lzdec.get_len();
|
379
|
+
|
380
|
+
if (!m_state.encode(m_codec, *this, m_accel, lzdec))
|
381
|
+
return false;
|
382
|
+
|
383
|
+
cur_ofs += len;
|
384
|
+
LZHAM_ASSERT(bytes_to_match >= len);
|
385
|
+
bytes_to_match -= len;
|
386
|
+
|
387
|
+
m_accel.advance_bytes(len);
|
388
|
+
|
389
|
+
m_step++;
|
390
|
+
|
391
|
+
return true;
|
392
|
+
}
|
393
|
+
|
394
|
+
bool lzcompressor::send_sync_block(lzham_flush_t flush_type)
|
395
|
+
{
|
396
|
+
m_codec.reset();
|
397
|
+
|
398
|
+
if (!m_codec.start_encoding(128))
|
399
|
+
return false;
|
400
|
+
#ifdef LZHAM_LZDEBUG
|
401
|
+
if (!m_codec.encode_bits(166, 12))
|
402
|
+
return false;
|
403
|
+
#endif
|
404
|
+
if (!m_codec.encode_bits(cSyncBlock, cBlockHeaderBits))
|
405
|
+
return false;
|
406
|
+
|
407
|
+
int flush_code = 0;
|
408
|
+
switch (flush_type)
|
409
|
+
{
|
410
|
+
case LZHAM_FULL_FLUSH:
|
411
|
+
flush_code = 2;
|
412
|
+
break;
|
413
|
+
case LZHAM_TABLE_FLUSH:
|
414
|
+
flush_code = 1;
|
415
|
+
break;
|
416
|
+
case LZHAM_SYNC_FLUSH:
|
417
|
+
flush_code = 3;
|
418
|
+
break;
|
419
|
+
case LZHAM_NO_FLUSH:
|
420
|
+
case LZHAM_FINISH:
|
421
|
+
flush_code = 0;
|
422
|
+
break;
|
423
|
+
}
|
424
|
+
if (!m_codec.encode_bits(flush_code, cBlockFlushTypeBits))
|
425
|
+
return false;
|
426
|
+
|
427
|
+
if (!m_codec.encode_align_to_byte())
|
428
|
+
return false;
|
429
|
+
if (!m_codec.encode_bits(0x0000, 16))
|
430
|
+
return false;
|
431
|
+
if (!m_codec.encode_bits(0xFFFF, 16))
|
432
|
+
return false;
|
433
|
+
if (!m_codec.stop_encoding(true))
|
434
|
+
return false;
|
435
|
+
if (!m_comp_buf.append(m_codec.get_encoding_buf()))
|
436
|
+
return false;
|
437
|
+
|
438
|
+
m_block_index++;
|
439
|
+
return true;
|
440
|
+
}
|
441
|
+
|
442
|
+
bool lzcompressor::flush(lzham_flush_t flush_type)
|
443
|
+
{
|
444
|
+
LZHAM_ASSERT(!m_finished);
|
445
|
+
if (m_finished)
|
446
|
+
return false;
|
447
|
+
|
448
|
+
bool status = true;
|
449
|
+
if (m_block_buf.size())
|
450
|
+
{
|
451
|
+
status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
|
452
|
+
|
453
|
+
m_block_buf.try_resize(0);
|
454
|
+
}
|
455
|
+
|
456
|
+
if (status)
|
457
|
+
{
|
458
|
+
status = send_sync_block(flush_type);
|
459
|
+
|
460
|
+
if (LZHAM_FULL_FLUSH == flush_type)
|
461
|
+
{
|
462
|
+
m_accel.flush();
|
463
|
+
m_state.reset();
|
464
|
+
}
|
465
|
+
}
|
466
|
+
|
467
|
+
lzham_flush_buffered_printf();
|
468
|
+
|
469
|
+
return status;
|
470
|
+
}
|
471
|
+
|
472
|
+
bool lzcompressor::put_bytes(const void* pBuf, uint buf_len)
|
473
|
+
{
|
474
|
+
LZHAM_ASSERT(!m_finished);
|
475
|
+
if (m_finished)
|
476
|
+
return false;
|
477
|
+
|
478
|
+
bool status = true;
|
479
|
+
|
480
|
+
if (!pBuf)
|
481
|
+
{
|
482
|
+
// Last block - flush whatever's left and send the final block.
|
483
|
+
if (m_block_buf.size())
|
484
|
+
{
|
485
|
+
status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
|
486
|
+
|
487
|
+
m_block_buf.try_resize(0);
|
488
|
+
}
|
489
|
+
|
490
|
+
if (status)
|
491
|
+
{
|
492
|
+
if (!send_final_block())
|
493
|
+
{
|
494
|
+
status = false;
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
m_finished = true;
|
499
|
+
}
|
500
|
+
else
|
501
|
+
{
|
502
|
+
// Compress blocks.
|
503
|
+
const uint8 *pSrcBuf = static_cast<const uint8*>(pBuf);
|
504
|
+
uint num_src_bytes_remaining = buf_len;
|
505
|
+
|
506
|
+
while (num_src_bytes_remaining)
|
507
|
+
{
|
508
|
+
const uint num_bytes_to_copy = LZHAM_MIN(num_src_bytes_remaining, m_params.m_block_size - m_block_buf.size());
|
509
|
+
|
510
|
+
if (num_bytes_to_copy == m_params.m_block_size)
|
511
|
+
{
|
512
|
+
LZHAM_ASSERT(!m_block_buf.size());
|
513
|
+
|
514
|
+
// Full-block available - compress in-place.
|
515
|
+
status = compress_block(pSrcBuf, num_bytes_to_copy);
|
516
|
+
}
|
517
|
+
else
|
518
|
+
{
|
519
|
+
// Less than a full block available - append to already accumulated bytes.
|
520
|
+
if (!m_block_buf.append(static_cast<const uint8 *>(pSrcBuf), num_bytes_to_copy))
|
521
|
+
return false;
|
522
|
+
|
523
|
+
LZHAM_ASSERT(m_block_buf.size() <= m_params.m_block_size);
|
524
|
+
|
525
|
+
if (m_block_buf.size() == m_params.m_block_size)
|
526
|
+
{
|
527
|
+
status = compress_block(m_block_buf.get_ptr(), m_block_buf.size());
|
528
|
+
|
529
|
+
m_block_buf.try_resize(0);
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
if (!status)
|
534
|
+
return false;
|
535
|
+
|
536
|
+
pSrcBuf += num_bytes_to_copy;
|
537
|
+
num_src_bytes_remaining -= num_bytes_to_copy;
|
538
|
+
}
|
539
|
+
}
|
540
|
+
|
541
|
+
lzham_flush_buffered_printf();
|
542
|
+
|
543
|
+
return status;
|
544
|
+
}
|
545
|
+
|
546
|
+
bool lzcompressor::send_final_block()
|
547
|
+
{
|
548
|
+
if (!m_codec.start_encoding(16))
|
549
|
+
return false;
|
550
|
+
|
551
|
+
#ifdef LZHAM_LZDEBUG
|
552
|
+
if (!m_codec.encode_bits(166, 12))
|
553
|
+
return false;
|
554
|
+
#endif
|
555
|
+
|
556
|
+
if (!m_block_index)
|
557
|
+
{
|
558
|
+
if (!send_configuration())
|
559
|
+
return false;
|
560
|
+
}
|
561
|
+
|
562
|
+
if (!m_codec.encode_bits(cEOFBlock, cBlockHeaderBits))
|
563
|
+
return false;
|
564
|
+
|
565
|
+
if (!m_codec.encode_align_to_byte())
|
566
|
+
return false;
|
567
|
+
|
568
|
+
if (!m_codec.encode_bits(m_src_adler32, 32))
|
569
|
+
return false;
|
570
|
+
|
571
|
+
if (!m_codec.stop_encoding(true))
|
572
|
+
return false;
|
573
|
+
|
574
|
+
if (m_comp_buf.empty())
|
575
|
+
{
|
576
|
+
m_comp_buf.swap(m_codec.get_encoding_buf());
|
577
|
+
}
|
578
|
+
else
|
579
|
+
{
|
580
|
+
if (!m_comp_buf.append(m_codec.get_encoding_buf()))
|
581
|
+
return false;
|
582
|
+
}
|
583
|
+
|
584
|
+
m_block_index++;
|
585
|
+
|
586
|
+
#if LZHAM_UPDATE_STATS
|
587
|
+
m_stats.print();
|
588
|
+
#endif
|
589
|
+
|
590
|
+
return true;
|
591
|
+
}
|
592
|
+
|
593
|
+
bool lzcompressor::send_configuration()
|
594
|
+
{
|
595
|
+
// TODO: Currently unused.
|
596
|
+
//if (!m_codec.encode_bits(m_settings.m_fast_adaptive_huffman_updating, 1))
|
597
|
+
// return false;
|
598
|
+
//if (!m_codec.encode_bits(0, 1))
|
599
|
+
// return false;
|
600
|
+
|
601
|
+
return true;
|
602
|
+
}
|
603
|
+
|
604
|
+
void lzcompressor::node::add_state(
|
605
|
+
int parent_index, int parent_state_index,
|
606
|
+
const lzdecision &lzdec, state &parent_state,
|
607
|
+
bit_cost_t total_cost,
|
608
|
+
uint total_complexity)
|
609
|
+
{
|
610
|
+
state_base trial_state;
|
611
|
+
parent_state.save_partial_state(trial_state);
|
612
|
+
trial_state.partial_advance(lzdec);
|
613
|
+
|
614
|
+
for (int i = m_num_node_states - 1; i >= 0; i--)
|
615
|
+
{
|
616
|
+
node_state &cur_node_state = m_node_states[i];
|
617
|
+
if (cur_node_state.m_saved_state == trial_state)
|
618
|
+
{
|
619
|
+
if ( (total_cost < cur_node_state.m_total_cost) ||
|
620
|
+
((total_cost == cur_node_state.m_total_cost) && (total_complexity < cur_node_state.m_total_complexity)) )
|
621
|
+
{
|
622
|
+
cur_node_state.m_parent_index = static_cast<int16>(parent_index);
|
623
|
+
cur_node_state.m_parent_state_index = static_cast<int8>(parent_state_index);
|
624
|
+
cur_node_state.m_lzdec = lzdec;
|
625
|
+
cur_node_state.m_total_cost = total_cost;
|
626
|
+
cur_node_state.m_total_complexity = total_complexity;
|
627
|
+
|
628
|
+
while (i > 0)
|
629
|
+
{
|
630
|
+
if ((m_node_states[i].m_total_cost < m_node_states[i - 1].m_total_cost) ||
|
631
|
+
((m_node_states[i].m_total_cost == m_node_states[i - 1].m_total_cost) && (m_node_states[i].m_total_complexity < m_node_states[i - 1].m_total_complexity)))
|
632
|
+
{
|
633
|
+
std::swap(m_node_states[i], m_node_states[i - 1]);
|
634
|
+
i--;
|
635
|
+
}
|
636
|
+
else
|
637
|
+
break;
|
638
|
+
}
|
639
|
+
}
|
640
|
+
|
641
|
+
return;
|
642
|
+
}
|
643
|
+
}
|
644
|
+
|
645
|
+
int insert_index;
|
646
|
+
for (insert_index = m_num_node_states; insert_index > 0; insert_index--)
|
647
|
+
{
|
648
|
+
node_state &cur_node_state = m_node_states[insert_index - 1];
|
649
|
+
|
650
|
+
if ( (total_cost > cur_node_state.m_total_cost) ||
|
651
|
+
((total_cost == cur_node_state.m_total_cost) && (total_complexity >= cur_node_state.m_total_complexity)) )
|
652
|
+
{
|
653
|
+
break;
|
654
|
+
}
|
655
|
+
}
|
656
|
+
|
657
|
+
if (insert_index == cMaxNodeStates)
|
658
|
+
return;
|
659
|
+
|
660
|
+
uint num_behind = m_num_node_states - insert_index;
|
661
|
+
uint num_to_move = (m_num_node_states < cMaxNodeStates) ? num_behind : (num_behind - 1);
|
662
|
+
if (num_to_move)
|
663
|
+
{
|
664
|
+
LZHAM_ASSERT((insert_index + 1 + num_to_move) <= cMaxNodeStates);
|
665
|
+
memmove( &m_node_states[insert_index + 1], &m_node_states[insert_index], sizeof(node_state) * num_to_move);
|
666
|
+
}
|
667
|
+
|
668
|
+
node_state *pNew_node_state = &m_node_states[insert_index];
|
669
|
+
pNew_node_state->m_parent_index = static_cast<int16>(parent_index);
|
670
|
+
pNew_node_state->m_parent_state_index = static_cast<uint8>(parent_state_index);
|
671
|
+
pNew_node_state->m_lzdec = lzdec;
|
672
|
+
pNew_node_state->m_total_cost = total_cost;
|
673
|
+
pNew_node_state->m_total_complexity = total_complexity;
|
674
|
+
pNew_node_state->m_saved_state = trial_state;
|
675
|
+
|
676
|
+
m_num_node_states = LZHAM_MIN(m_num_node_states + 1, static_cast<uint>(cMaxNodeStates));
|
677
|
+
|
678
|
+
#ifdef LZHAM_LZVERIFY
|
679
|
+
for (uint i = 0; i < (m_num_node_states - 1); ++i)
|
680
|
+
{
|
681
|
+
node_state &a = m_node_states[i];
|
682
|
+
node_state &b = m_node_states[i + 1];
|
683
|
+
LZHAM_VERIFY(
|
684
|
+
(a.m_total_cost < b.m_total_cost) ||
|
685
|
+
((a.m_total_cost == b.m_total_cost) && (a.m_total_complexity <= b.m_total_complexity)) );
|
686
|
+
}
|
687
|
+
#endif
|
688
|
+
}
|
689
|
+
|
690
|
+
// The "extreme" parser tracks the best node::cMaxNodeStates (4) candidate LZ decisions per lookahead character.
|
691
|
+
// This allows the compressor to make locally suboptimal decisions that ultimately result in a better parse.
|
692
|
+
// It assumes the input statistics are locally stationary over the input block to parse.
|
693
|
+
bool lzcompressor::extreme_parse(parse_thread_state &parse_state)
|
694
|
+
{
|
695
|
+
LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes);
|
696
|
+
|
697
|
+
parse_state.m_failed = false;
|
698
|
+
parse_state.m_emit_decisions_backwards = true;
|
699
|
+
|
700
|
+
node *pNodes = parse_state.m_nodes;
|
701
|
+
for (uint i = 0; i <= cMaxParseGraphNodes; i++)
|
702
|
+
{
|
703
|
+
pNodes[i].clear();
|
704
|
+
}
|
705
|
+
|
706
|
+
state &approx_state = parse_state.m_initial_state;
|
707
|
+
|
708
|
+
pNodes[0].m_num_node_states = 1;
|
709
|
+
node_state &first_node_state = pNodes[0].m_node_states[0];
|
710
|
+
approx_state.save_partial_state(first_node_state.m_saved_state);
|
711
|
+
first_node_state.m_parent_index = -1;
|
712
|
+
first_node_state.m_parent_state_index = -1;
|
713
|
+
first_node_state.m_total_cost = 0;
|
714
|
+
first_node_state.m_total_complexity = 0;
|
715
|
+
|
716
|
+
const uint bytes_to_parse = parse_state.m_bytes_to_match;
|
717
|
+
|
718
|
+
const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
|
719
|
+
|
720
|
+
uint cur_dict_ofs = parse_state.m_start_ofs;
|
721
|
+
uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
|
722
|
+
uint cur_node_index = 0;
|
723
|
+
|
724
|
+
enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes };
|
725
|
+
uint match_lens[cMaxFullMatches];
|
726
|
+
uint match_distances[cMaxFullMatches];
|
727
|
+
|
728
|
+
bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1];
|
729
|
+
|
730
|
+
node prev_lit_node;
|
731
|
+
prev_lit_node.clear();
|
732
|
+
|
733
|
+
while (cur_node_index < bytes_to_parse)
|
734
|
+
{
|
735
|
+
node* pCur_node = &pNodes[cur_node_index];
|
736
|
+
|
737
|
+
const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index);
|
738
|
+
const uint find_dict_size = m_accel.get_cur_dict_size() + cur_lookahead_ofs;
|
739
|
+
|
740
|
+
const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1);
|
741
|
+
|
742
|
+
const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs];
|
743
|
+
|
744
|
+
// full matches
|
745
|
+
uint max_full_match_len = 0;
|
746
|
+
uint num_full_matches = 0;
|
747
|
+
uint len2_match_dist = 0;
|
748
|
+
|
749
|
+
if (max_admissable_match_len >= CLZBase::cMinMatchLen)
|
750
|
+
{
|
751
|
+
const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs);
|
752
|
+
if (pMatches)
|
753
|
+
{
|
754
|
+
for ( ; ; )
|
755
|
+
{
|
756
|
+
uint match_len = pMatches->get_len();
|
757
|
+
LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
|
758
|
+
match_len = LZHAM_MIN(match_len, max_admissable_match_len);
|
759
|
+
|
760
|
+
if (match_len > max_full_match_len)
|
761
|
+
{
|
762
|
+
max_full_match_len = match_len;
|
763
|
+
|
764
|
+
match_lens[num_full_matches] = match_len;
|
765
|
+
match_distances[num_full_matches] = pMatches->get_dist();
|
766
|
+
num_full_matches++;
|
767
|
+
}
|
768
|
+
|
769
|
+
if (pMatches->is_last())
|
770
|
+
break;
|
771
|
+
pMatches++;
|
772
|
+
}
|
773
|
+
}
|
774
|
+
|
775
|
+
len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs);
|
776
|
+
}
|
777
|
+
|
778
|
+
for (uint cur_node_state_index = 0; cur_node_state_index < pCur_node->m_num_node_states; cur_node_state_index++)
|
779
|
+
{
|
780
|
+
node_state &cur_node_state = pCur_node->m_node_states[cur_node_state_index];
|
781
|
+
|
782
|
+
if (cur_node_index)
|
783
|
+
{
|
784
|
+
LZHAM_ASSERT(cur_node_state.m_parent_index >= 0);
|
785
|
+
|
786
|
+
approx_state.restore_partial_state(cur_node_state.m_saved_state);
|
787
|
+
}
|
788
|
+
|
789
|
+
uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(approx_state.m_cur_state);
|
790
|
+
|
791
|
+
const bit_cost_t cur_node_total_cost = cur_node_state.m_total_cost;
|
792
|
+
const uint cur_node_total_complexity = cur_node_state.m_total_complexity;
|
793
|
+
|
794
|
+
// rep matches
|
795
|
+
uint match_hist_max_len = 0;
|
796
|
+
uint match_hist_min_match_len = 1;
|
797
|
+
for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++)
|
798
|
+
{
|
799
|
+
uint hist_match_len = 0;
|
800
|
+
|
801
|
+
uint dist = approx_state.m_match_hist[rep_match_index];
|
802
|
+
if (dist <= find_dict_size)
|
803
|
+
{
|
804
|
+
const uint comp_pos = static_cast<uint>((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask);
|
805
|
+
const uint8* pComp = &m_accel.m_dict[comp_pos];
|
806
|
+
|
807
|
+
for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++)
|
808
|
+
if (pComp[hist_match_len] != pLookahead[hist_match_len])
|
809
|
+
break;
|
810
|
+
}
|
811
|
+
|
812
|
+
if (hist_match_len >= match_hist_min_match_len)
|
813
|
+
{
|
814
|
+
match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
|
815
|
+
|
816
|
+
approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index);
|
817
|
+
|
818
|
+
uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index);
|
819
|
+
for (uint l = match_hist_min_match_len; l <= hist_match_len; l++)
|
820
|
+
{
|
821
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
822
|
+
{
|
823
|
+
lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1));
|
824
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
825
|
+
LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
|
826
|
+
}
|
827
|
+
#endif
|
828
|
+
node& dst_node = pCur_node[l];
|
829
|
+
|
830
|
+
bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
|
831
|
+
|
832
|
+
dst_node.add_state(cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, -((int)rep_match_index + 1)), approx_state, rep_match_total_cost, rep_match_total_complexity);
|
833
|
+
}
|
834
|
+
}
|
835
|
+
|
836
|
+
match_hist_min_match_len = CLZBase::cMinMatchLen;
|
837
|
+
}
|
838
|
+
|
839
|
+
uint min_truncate_match_len = match_hist_max_len;
|
840
|
+
|
841
|
+
// nearest len2 match
|
842
|
+
if (len2_match_dist)
|
843
|
+
{
|
844
|
+
lzdecision lzdec(cur_dict_ofs, 2, len2_match_dist);
|
845
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, lzdec);
|
846
|
+
pCur_node[2].add_state(cur_node_index, cur_node_state_index, lzdec, approx_state, cur_node_total_cost + actual_cost, cur_node_total_complexity + cShortMatchComplexity);
|
847
|
+
|
848
|
+
min_truncate_match_len = LZHAM_MAX(min_truncate_match_len, 2);
|
849
|
+
}
|
850
|
+
|
851
|
+
// full matches
|
852
|
+
if (max_full_match_len > min_truncate_match_len)
|
853
|
+
{
|
854
|
+
uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len);
|
855
|
+
for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++)
|
856
|
+
{
|
857
|
+
uint end_len = match_lens[full_match_index];
|
858
|
+
if (end_len <= min_truncate_match_len)
|
859
|
+
continue;
|
860
|
+
|
861
|
+
uint start_len = prev_max_match_len + 1;
|
862
|
+
uint match_dist = match_distances[full_match_index];
|
863
|
+
|
864
|
+
LZHAM_ASSERT(start_len <= end_len);
|
865
|
+
|
866
|
+
approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index);
|
867
|
+
|
868
|
+
for (uint l = start_len; l <= end_len; l++)
|
869
|
+
{
|
870
|
+
uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity;
|
871
|
+
|
872
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
873
|
+
{
|
874
|
+
lzdecision actual_dec(cur_dict_ofs, l, match_dist);
|
875
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
876
|
+
LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
|
877
|
+
}
|
878
|
+
#endif
|
879
|
+
node& dst_node = pCur_node[l];
|
880
|
+
|
881
|
+
bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
|
882
|
+
uint match_total_complexity = cur_node_total_complexity + match_complexity;
|
883
|
+
|
884
|
+
dst_node.add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, l, match_dist), approx_state, match_total_cost, match_total_complexity);
|
885
|
+
}
|
886
|
+
|
887
|
+
prev_max_match_len = end_len;
|
888
|
+
}
|
889
|
+
}
|
890
|
+
|
891
|
+
// literal
|
892
|
+
bit_cost_t lit_cost = approx_state.get_lit_cost(*this, m_accel, cur_dict_ofs, lit_pred0, is_match_model_index);
|
893
|
+
bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost;
|
894
|
+
uint lit_total_complexity = cur_node_total_complexity + cLitComplexity;
|
895
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
896
|
+
{
|
897
|
+
lzdecision actual_dec(cur_dict_ofs, 0, 0);
|
898
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
899
|
+
LZHAM_ASSERT(actual_cost == lit_cost);
|
900
|
+
}
|
901
|
+
#endif
|
902
|
+
|
903
|
+
pCur_node[1].add_state( cur_node_index, cur_node_state_index, lzdecision(cur_dict_ofs, 0, 0), approx_state, lit_total_cost, lit_total_complexity);
|
904
|
+
|
905
|
+
} // cur_node_state_index
|
906
|
+
|
907
|
+
cur_dict_ofs++;
|
908
|
+
cur_lookahead_ofs++;
|
909
|
+
cur_node_index++;
|
910
|
+
}
|
911
|
+
|
912
|
+
// Now get the optimal decisions by starting from the goal node.
|
913
|
+
// m_best_decisions is filled backwards.
|
914
|
+
if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse))
|
915
|
+
{
|
916
|
+
parse_state.m_failed = true;
|
917
|
+
return false;
|
918
|
+
}
|
919
|
+
|
920
|
+
bit_cost_t lowest_final_cost = cBitCostMax; //math::cNearlyInfinite;
|
921
|
+
int node_state_index = 0;
|
922
|
+
node_state *pLast_node_states = pNodes[bytes_to_parse].m_node_states;
|
923
|
+
for (uint i = 0; i < pNodes[bytes_to_parse].m_num_node_states; i++)
|
924
|
+
{
|
925
|
+
if (pLast_node_states[i].m_total_cost < lowest_final_cost)
|
926
|
+
{
|
927
|
+
lowest_final_cost = pLast_node_states[i].m_total_cost;
|
928
|
+
node_state_index = i;
|
929
|
+
}
|
930
|
+
}
|
931
|
+
|
932
|
+
int node_index = bytes_to_parse;
|
933
|
+
lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr();
|
934
|
+
do
|
935
|
+
{
|
936
|
+
LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes));
|
937
|
+
|
938
|
+
node& cur_node = pNodes[node_index];
|
939
|
+
const node_state &cur_node_state = cur_node.m_node_states[node_state_index];
|
940
|
+
|
941
|
+
*pDst_dec++ = cur_node_state.m_lzdec;
|
942
|
+
|
943
|
+
node_index = cur_node_state.m_parent_index;
|
944
|
+
node_state_index = cur_node_state.m_parent_state_index;
|
945
|
+
|
946
|
+
} while (node_index > 0);
|
947
|
+
|
948
|
+
parse_state.m_best_decisions.try_resize(static_cast<uint>(pDst_dec - parse_state.m_best_decisions.get_ptr()));
|
949
|
+
|
950
|
+
return true;
|
951
|
+
}
|
952
|
+
|
953
|
+
// Parsing notes:
|
954
|
+
// The regular "optimal" parser only tracks the single cheapest candidate LZ decision per lookahead character.
|
955
|
+
// This function finds the shortest path through an extremely dense node graph using a streamlined/simplified Dijkstra's algorithm with some coding heuristics.
|
956
|
+
// Graph edges are LZ "decisions", cost is measured in fractional bits needed to code each graph edge, and graph nodes are lookahead characters.
|
957
|
+
// There is no need to track visited/unvisted nodes, or find the next cheapest unvisted node in each iteration. The search always proceeds sequentially, visiting each lookahead character in turn from left/right.
|
958
|
+
// The major CPU expense of this function is the complexity of LZ decision cost evaluation, so a lot of implementation effort is spent here reducing this overhead.
|
959
|
+
// To simplify the problem, it assumes the input statistics are locally stationary over the input block to parse. (Otherwise, it would need to store, track, and update
|
960
|
+
// unique symbol statistics for each lookahead character, which would be very costly.)
|
961
|
+
// This function always sequentially pushes "forward" the unvisited node horizon. This horizon frequently collapses to a single node, which guarantees that the shortest path through the
|
962
|
+
// graph must pass through this node. LZMA tracks cumulative bitprices relative to this node, while LZHAM currently always tracks cumulative bitprices relative to the first node in the lookahead buffer.
|
963
|
+
// In very early versions of LZHAM the parse was much more understandable (straight Dijkstra with almost no bit price optimizations or coding heuristics).
|
964
|
+
bool lzcompressor::optimal_parse(parse_thread_state &parse_state)
|
965
|
+
{
|
966
|
+
LZHAM_ASSERT(parse_state.m_bytes_to_match <= cMaxParseGraphNodes);
|
967
|
+
|
968
|
+
parse_state.m_failed = false;
|
969
|
+
parse_state.m_emit_decisions_backwards = true;
|
970
|
+
|
971
|
+
node_state *pNodes = reinterpret_cast<node_state*>(parse_state.m_nodes);
|
972
|
+
pNodes[0].m_parent_index = -1;
|
973
|
+
pNodes[0].m_total_cost = 0;
|
974
|
+
pNodes[0].m_total_complexity = 0;
|
975
|
+
|
976
|
+
#if 0
|
977
|
+
for (uint i = 1; i <= cMaxParseGraphNodes; i++)
|
978
|
+
{
|
979
|
+
pNodes[i].clear();
|
980
|
+
}
|
981
|
+
#else
|
982
|
+
memset( &pNodes[1], 0xFF, cMaxParseGraphNodes * sizeof(node_state));
|
983
|
+
#endif
|
984
|
+
|
985
|
+
state &approx_state = parse_state.m_initial_state;
|
986
|
+
|
987
|
+
const uint bytes_to_parse = parse_state.m_bytes_to_match;
|
988
|
+
|
989
|
+
const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
|
990
|
+
|
991
|
+
uint cur_dict_ofs = parse_state.m_start_ofs;
|
992
|
+
uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
|
993
|
+
uint cur_node_index = 0;
|
994
|
+
|
995
|
+
enum { cMaxFullMatches = cMatchAccelMaxSupportedProbes };
|
996
|
+
uint match_lens[cMaxFullMatches];
|
997
|
+
uint match_distances[cMaxFullMatches];
|
998
|
+
|
999
|
+
bit_cost_t lzdec_bitcosts[cMaxMatchLen + 1];
|
1000
|
+
|
1001
|
+
while (cur_node_index < bytes_to_parse)
|
1002
|
+
{
|
1003
|
+
node_state* pCur_node = &pNodes[cur_node_index];
|
1004
|
+
|
1005
|
+
const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), bytes_to_parse - cur_node_index);
|
1006
|
+
const uint find_dict_size = m_accel.m_cur_dict_size + cur_lookahead_ofs;
|
1007
|
+
|
1008
|
+
if (cur_node_index)
|
1009
|
+
{
|
1010
|
+
LZHAM_ASSERT(pCur_node->m_parent_index >= 0);
|
1011
|
+
|
1012
|
+
// Move to this node's state using the lowest cost LZ decision found.
|
1013
|
+
approx_state.restore_partial_state(pCur_node->m_saved_state);
|
1014
|
+
approx_state.partial_advance(pCur_node->m_lzdec);
|
1015
|
+
}
|
1016
|
+
|
1017
|
+
const bit_cost_t cur_node_total_cost = pCur_node->m_total_cost;
|
1018
|
+
// This assert includes a fudge factor - make sure we don't overflow our scaled costs.
|
1019
|
+
LZHAM_ASSERT((cBitCostMax - cur_node_total_cost) > (cBitCostScale * 64));
|
1020
|
+
const uint cur_node_total_complexity = pCur_node->m_total_complexity;
|
1021
|
+
|
1022
|
+
const uint lit_pred0 = approx_state.get_pred_char(m_accel, cur_dict_ofs, 1);
|
1023
|
+
uint is_match_model_index = LZHAM_IS_MATCH_MODEL_INDEX(approx_state.m_cur_state);
|
1024
|
+
|
1025
|
+
const uint8* pLookahead = &m_accel.m_dict[cur_dict_ofs];
|
1026
|
+
|
1027
|
+
// rep matches
|
1028
|
+
uint match_hist_max_len = 0;
|
1029
|
+
uint match_hist_min_match_len = 1;
|
1030
|
+
for (uint rep_match_index = 0; rep_match_index < cMatchHistSize; rep_match_index++)
|
1031
|
+
{
|
1032
|
+
uint hist_match_len = 0;
|
1033
|
+
|
1034
|
+
uint dist = approx_state.m_match_hist[rep_match_index];
|
1035
|
+
if (dist <= find_dict_size)
|
1036
|
+
{
|
1037
|
+
const uint comp_pos = static_cast<uint>((m_accel.m_lookahead_pos + cur_lookahead_ofs - dist) & m_accel.m_max_dict_size_mask);
|
1038
|
+
const uint8* pComp = &m_accel.m_dict[comp_pos];
|
1039
|
+
|
1040
|
+
for (hist_match_len = 0; hist_match_len < max_admissable_match_len; hist_match_len++)
|
1041
|
+
if (pComp[hist_match_len] != pLookahead[hist_match_len])
|
1042
|
+
break;
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
if (hist_match_len >= match_hist_min_match_len)
|
1046
|
+
{
|
1047
|
+
match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
|
1048
|
+
|
1049
|
+
approx_state.get_rep_match_costs(cur_dict_ofs, lzdec_bitcosts, rep_match_index, match_hist_min_match_len, hist_match_len, is_match_model_index);
|
1050
|
+
|
1051
|
+
uint rep_match_total_complexity = cur_node_total_complexity + (cRep0Complexity + rep_match_index);
|
1052
|
+
for (uint l = match_hist_min_match_len; l <= hist_match_len; l++)
|
1053
|
+
{
|
1054
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
1055
|
+
{
|
1056
|
+
lzdecision actual_dec(cur_dict_ofs, l, -((int)rep_match_index + 1));
|
1057
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
1058
|
+
LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
|
1059
|
+
}
|
1060
|
+
#endif
|
1061
|
+
node_state& dst_node = pCur_node[l];
|
1062
|
+
|
1063
|
+
bit_cost_t rep_match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
|
1064
|
+
|
1065
|
+
if ((rep_match_total_cost > dst_node.m_total_cost) || ((rep_match_total_cost == dst_node.m_total_cost) && (rep_match_total_complexity >= dst_node.m_total_complexity)))
|
1066
|
+
continue;
|
1067
|
+
|
1068
|
+
dst_node.m_total_cost = rep_match_total_cost;
|
1069
|
+
dst_node.m_total_complexity = rep_match_total_complexity;
|
1070
|
+
dst_node.m_parent_index = (uint16)cur_node_index;
|
1071
|
+
approx_state.save_partial_state(dst_node.m_saved_state);
|
1072
|
+
dst_node.m_lzdec.init(cur_dict_ofs, l, -((int)rep_match_index + 1));
|
1073
|
+
dst_node.m_lzdec.m_len = l;
|
1074
|
+
}
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
match_hist_min_match_len = CLZBase::cMinMatchLen;
|
1078
|
+
}
|
1079
|
+
|
1080
|
+
uint max_match_len = match_hist_max_len;
|
1081
|
+
|
1082
|
+
if (max_match_len >= m_settings.m_fast_bytes)
|
1083
|
+
{
|
1084
|
+
cur_dict_ofs += max_match_len;
|
1085
|
+
cur_lookahead_ofs += max_match_len;
|
1086
|
+
cur_node_index += max_match_len;
|
1087
|
+
continue;
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
// full matches
|
1091
|
+
if (max_admissable_match_len >= CLZBase::cMinMatchLen)
|
1092
|
+
{
|
1093
|
+
uint num_full_matches = 0;
|
1094
|
+
|
1095
|
+
if (match_hist_max_len < 2)
|
1096
|
+
{
|
1097
|
+
// Get the nearest len2 match if we didn't find a rep len2.
|
1098
|
+
uint len2_match_dist = m_accel.get_len2_match(cur_lookahead_ofs);
|
1099
|
+
if (len2_match_dist)
|
1100
|
+
{
|
1101
|
+
bit_cost_t cost = approx_state.get_len2_match_cost(*this, cur_dict_ofs, len2_match_dist, is_match_model_index);
|
1102
|
+
|
1103
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
1104
|
+
{
|
1105
|
+
lzdecision actual_dec(cur_dict_ofs, 2, len2_match_dist);
|
1106
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
1107
|
+
LZHAM_ASSERT(actual_cost == cost);
|
1108
|
+
}
|
1109
|
+
#endif
|
1110
|
+
|
1111
|
+
node_state& dst_node = pCur_node[2];
|
1112
|
+
|
1113
|
+
bit_cost_t match_total_cost = cur_node_total_cost + cost;
|
1114
|
+
uint match_total_complexity = cur_node_total_complexity + cShortMatchComplexity;
|
1115
|
+
|
1116
|
+
if ((match_total_cost < dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity < dst_node.m_total_complexity)))
|
1117
|
+
{
|
1118
|
+
dst_node.m_total_cost = match_total_cost;
|
1119
|
+
dst_node.m_total_complexity = match_total_complexity;
|
1120
|
+
dst_node.m_parent_index = (uint16)cur_node_index;
|
1121
|
+
approx_state.save_partial_state(dst_node.m_saved_state);
|
1122
|
+
dst_node.m_lzdec.init(cur_dict_ofs, 2, len2_match_dist);
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
max_match_len = 2;
|
1126
|
+
}
|
1127
|
+
}
|
1128
|
+
|
1129
|
+
const uint min_truncate_match_len = max_match_len;
|
1130
|
+
|
1131
|
+
// Now get all full matches: the nearest matches at each match length. (Actually, we don't
|
1132
|
+
// always get the nearest match. The match finder favors those matches which have the lowest value
|
1133
|
+
// in the nibble of each match distance, all other things being equal, to help exploit how the lowest
|
1134
|
+
// nibble of match distances is separately coded.)
|
1135
|
+
const dict_match* pMatches = m_accel.find_matches(cur_lookahead_ofs);
|
1136
|
+
if (pMatches)
|
1137
|
+
{
|
1138
|
+
for ( ; ; )
|
1139
|
+
{
|
1140
|
+
uint match_len = pMatches->get_len();
|
1141
|
+
LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
|
1142
|
+
match_len = LZHAM_MIN(match_len, max_admissable_match_len);
|
1143
|
+
|
1144
|
+
if (match_len > max_match_len)
|
1145
|
+
{
|
1146
|
+
max_match_len = match_len;
|
1147
|
+
|
1148
|
+
match_lens[num_full_matches] = match_len;
|
1149
|
+
match_distances[num_full_matches] = pMatches->get_dist();
|
1150
|
+
num_full_matches++;
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
if (pMatches->is_last())
|
1154
|
+
break;
|
1155
|
+
pMatches++;
|
1156
|
+
}
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
if (num_full_matches)
|
1160
|
+
{
|
1161
|
+
uint prev_max_match_len = LZHAM_MAX(1, min_truncate_match_len);
|
1162
|
+
for (uint full_match_index = 0; full_match_index < num_full_matches; full_match_index++)
|
1163
|
+
{
|
1164
|
+
uint start_len = prev_max_match_len + 1;
|
1165
|
+
uint end_len = match_lens[full_match_index];
|
1166
|
+
uint match_dist = match_distances[full_match_index];
|
1167
|
+
|
1168
|
+
LZHAM_ASSERT(start_len <= end_len);
|
1169
|
+
|
1170
|
+
approx_state.get_full_match_costs(*this, cur_dict_ofs, lzdec_bitcosts, match_dist, start_len, end_len, is_match_model_index);
|
1171
|
+
|
1172
|
+
for (uint l = start_len; l <= end_len; l++)
|
1173
|
+
{
|
1174
|
+
uint match_complexity = (l >= cLongMatchComplexityLenThresh) ? cLongMatchComplexity : cShortMatchComplexity;
|
1175
|
+
|
1176
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
1177
|
+
{
|
1178
|
+
lzdecision actual_dec(cur_dict_ofs, l, match_dist);
|
1179
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
1180
|
+
LZHAM_ASSERT(actual_cost == lzdec_bitcosts[l]);
|
1181
|
+
}
|
1182
|
+
#endif
|
1183
|
+
node_state& dst_node = pCur_node[l];
|
1184
|
+
|
1185
|
+
bit_cost_t match_total_cost = cur_node_total_cost + lzdec_bitcosts[l];
|
1186
|
+
uint match_total_complexity = cur_node_total_complexity + match_complexity;
|
1187
|
+
|
1188
|
+
if ((match_total_cost > dst_node.m_total_cost) || ((match_total_cost == dst_node.m_total_cost) && (match_total_complexity >= dst_node.m_total_complexity)))
|
1189
|
+
continue;
|
1190
|
+
|
1191
|
+
dst_node.m_total_cost = match_total_cost;
|
1192
|
+
dst_node.m_total_complexity = match_total_complexity;
|
1193
|
+
dst_node.m_parent_index = (uint16)cur_node_index;
|
1194
|
+
approx_state.save_partial_state(dst_node.m_saved_state);
|
1195
|
+
dst_node.m_lzdec.init(cur_dict_ofs, l, match_dist);
|
1196
|
+
}
|
1197
|
+
|
1198
|
+
prev_max_match_len = end_len;
|
1199
|
+
}
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
|
1203
|
+
if (max_match_len >= m_settings.m_fast_bytes)
|
1204
|
+
{
|
1205
|
+
cur_dict_ofs += max_match_len;
|
1206
|
+
cur_lookahead_ofs += max_match_len;
|
1207
|
+
cur_node_index += max_match_len;
|
1208
|
+
continue;
|
1209
|
+
}
|
1210
|
+
|
1211
|
+
// literal
|
1212
|
+
bit_cost_t lit_cost = approx_state.get_lit_cost(*this, m_accel, cur_dict_ofs, lit_pred0, is_match_model_index);
|
1213
|
+
bit_cost_t lit_total_cost = cur_node_total_cost + lit_cost;
|
1214
|
+
uint lit_total_complexity = cur_node_total_complexity + cLitComplexity;
|
1215
|
+
#if LZHAM_VERIFY_MATCH_COSTS
|
1216
|
+
{
|
1217
|
+
lzdecision actual_dec(cur_dict_ofs, 0, 0);
|
1218
|
+
bit_cost_t actual_cost = approx_state.get_cost(*this, m_accel, actual_dec);
|
1219
|
+
LZHAM_ASSERT(actual_cost == lit_cost);
|
1220
|
+
}
|
1221
|
+
#endif
|
1222
|
+
if ((lit_total_cost < pCur_node[1].m_total_cost) || ((lit_total_cost == pCur_node[1].m_total_cost) && (lit_total_complexity < pCur_node[1].m_total_complexity)))
|
1223
|
+
{
|
1224
|
+
pCur_node[1].m_total_cost = lit_total_cost;
|
1225
|
+
pCur_node[1].m_total_complexity = lit_total_complexity;
|
1226
|
+
pCur_node[1].m_parent_index = (int16)cur_node_index;
|
1227
|
+
approx_state.save_partial_state(pCur_node[1].m_saved_state);
|
1228
|
+
pCur_node[1].m_lzdec.init(cur_dict_ofs, 0, 0);
|
1229
|
+
}
|
1230
|
+
|
1231
|
+
cur_dict_ofs++;
|
1232
|
+
cur_lookahead_ofs++;
|
1233
|
+
cur_node_index++;
|
1234
|
+
|
1235
|
+
} // graph search
|
1236
|
+
|
1237
|
+
// Now get the optimal decisions by starting from the goal node.
|
1238
|
+
// m_best_decisions is filled backwards.
|
1239
|
+
if (!parse_state.m_best_decisions.try_reserve(bytes_to_parse))
|
1240
|
+
{
|
1241
|
+
parse_state.m_failed = true;
|
1242
|
+
return false;
|
1243
|
+
}
|
1244
|
+
|
1245
|
+
int node_index = bytes_to_parse;
|
1246
|
+
lzdecision *pDst_dec = parse_state.m_best_decisions.get_ptr();
|
1247
|
+
do
|
1248
|
+
{
|
1249
|
+
LZHAM_ASSERT((node_index >= 0) && (node_index <= (int)cMaxParseGraphNodes));
|
1250
|
+
node_state& cur_node = pNodes[node_index];
|
1251
|
+
|
1252
|
+
*pDst_dec++ = cur_node.m_lzdec;
|
1253
|
+
|
1254
|
+
node_index = cur_node.m_parent_index;
|
1255
|
+
|
1256
|
+
} while (node_index > 0);
|
1257
|
+
|
1258
|
+
parse_state.m_best_decisions.try_resize(static_cast<uint>(pDst_dec - parse_state.m_best_decisions.get_ptr()));
|
1259
|
+
|
1260
|
+
return true;
|
1261
|
+
}
|
1262
|
+
|
1263
|
+
void lzcompressor::parse_job_callback(uint64 data, void* pData_ptr)
|
1264
|
+
{
|
1265
|
+
const uint parse_job_index = (uint)data;
|
1266
|
+
scoped_perf_section parse_job_timer(cVarArgs, "parse_job_callback %u", parse_job_index);
|
1267
|
+
|
1268
|
+
(void)pData_ptr;
|
1269
|
+
|
1270
|
+
parse_thread_state &parse_state = m_parse_thread_state[parse_job_index];
|
1271
|
+
|
1272
|
+
if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_EXTREME_PARSING) && (m_params.m_compression_level == cCompressionLevelUber))
|
1273
|
+
extreme_parse(parse_state);
|
1274
|
+
else
|
1275
|
+
optimal_parse(parse_state);
|
1276
|
+
|
1277
|
+
LZHAM_MEMORY_EXPORT_BARRIER
|
1278
|
+
|
1279
|
+
if (atomic_decrement32(&m_parse_jobs_remaining) == 0)
|
1280
|
+
{
|
1281
|
+
m_parse_jobs_complete.release();
|
1282
|
+
}
|
1283
|
+
}
|
1284
|
+
|
1285
|
+
// ofs is the absolute dictionary offset, must be >= the lookahead offset.
|
1286
|
+
// TODO: Doesn't find len2 matches
|
1287
|
+
int lzcompressor::enumerate_lz_decisions(uint ofs, const state& cur_state, lzham::vector<lzpriced_decision>& decisions, uint min_match_len, uint max_match_len)
|
1288
|
+
{
|
1289
|
+
LZHAM_ASSERT(min_match_len >= 1);
|
1290
|
+
|
1291
|
+
uint start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
|
1292
|
+
LZHAM_ASSERT(ofs >= start_ofs);
|
1293
|
+
const uint lookahead_ofs = ofs - start_ofs;
|
1294
|
+
|
1295
|
+
uint largest_index = 0;
|
1296
|
+
uint largest_len;
|
1297
|
+
bit_cost_t largest_cost;
|
1298
|
+
|
1299
|
+
if (min_match_len <= 1)
|
1300
|
+
{
|
1301
|
+
if (!decisions.try_resize(1))
|
1302
|
+
return -1;
|
1303
|
+
|
1304
|
+
lzpriced_decision& lit_dec = decisions[0];
|
1305
|
+
lit_dec.init(ofs, 0, 0, 0);
|
1306
|
+
lit_dec.m_cost = cur_state.get_cost(*this, m_accel, lit_dec);
|
1307
|
+
largest_cost = lit_dec.m_cost;
|
1308
|
+
|
1309
|
+
largest_len = 1;
|
1310
|
+
}
|
1311
|
+
else
|
1312
|
+
{
|
1313
|
+
if (!decisions.try_resize(0))
|
1314
|
+
return -1;
|
1315
|
+
|
1316
|
+
largest_len = 0;
|
1317
|
+
largest_cost = cBitCostMax;
|
1318
|
+
}
|
1319
|
+
|
1320
|
+
uint match_hist_max_len = 0;
|
1321
|
+
|
1322
|
+
// Add rep matches.
|
1323
|
+
for (uint i = 0; i < cMatchHistSize; i++)
|
1324
|
+
{
|
1325
|
+
uint hist_match_len = m_accel.get_match_len(lookahead_ofs, cur_state.m_match_hist[i], max_match_len);
|
1326
|
+
if (hist_match_len < min_match_len)
|
1327
|
+
continue;
|
1328
|
+
|
1329
|
+
if ( ((hist_match_len == 1) && (i == 0)) || (hist_match_len >= CLZBase::cMinMatchLen) )
|
1330
|
+
{
|
1331
|
+
match_hist_max_len = math::maximum(match_hist_max_len, hist_match_len);
|
1332
|
+
|
1333
|
+
lzpriced_decision dec(ofs, hist_match_len, -((int)i + 1));
|
1334
|
+
dec.m_cost = cur_state.get_cost(*this, m_accel, dec);
|
1335
|
+
|
1336
|
+
if (!decisions.try_push_back(dec))
|
1337
|
+
return -1;
|
1338
|
+
|
1339
|
+
if ( (hist_match_len > largest_len) || ((hist_match_len == largest_len) && (dec.m_cost < largest_cost)) )
|
1340
|
+
{
|
1341
|
+
largest_index = decisions.size() - 1;
|
1342
|
+
largest_len = hist_match_len;
|
1343
|
+
largest_cost = dec.m_cost;
|
1344
|
+
}
|
1345
|
+
}
|
1346
|
+
}
|
1347
|
+
|
1348
|
+
// Now add full matches.
|
1349
|
+
if ((max_match_len >= CLZBase::cMinMatchLen) && (match_hist_max_len < m_settings.m_fast_bytes))
|
1350
|
+
{
|
1351
|
+
const dict_match* pMatches = m_accel.find_matches(lookahead_ofs);
|
1352
|
+
|
1353
|
+
if (pMatches)
|
1354
|
+
{
|
1355
|
+
for ( ; ; )
|
1356
|
+
{
|
1357
|
+
uint match_len = math::minimum(pMatches->get_len(), max_match_len);
|
1358
|
+
LZHAM_ASSERT((pMatches->get_dist() > 0) && (pMatches->get_dist() <= m_dict_size));
|
1359
|
+
|
1360
|
+
// Full matches are very likely to be more expensive than rep matches of the same length, so don't bother evaluating them.
|
1361
|
+
if ((match_len >= min_match_len) && (match_len > match_hist_max_len))
|
1362
|
+
{
|
1363
|
+
if ((max_match_len > CLZBase::cMaxMatchLen) && (match_len == CLZBase::cMaxMatchLen))
|
1364
|
+
{
|
1365
|
+
match_len = m_accel.get_match_len(lookahead_ofs, pMatches->get_dist(), max_match_len, CLZBase::cMaxMatchLen);
|
1366
|
+
}
|
1367
|
+
|
1368
|
+
lzpriced_decision dec(ofs, match_len, pMatches->get_dist());
|
1369
|
+
dec.m_cost = cur_state.get_cost(*this, m_accel, dec);
|
1370
|
+
|
1371
|
+
if (!decisions.try_push_back(dec))
|
1372
|
+
return -1;
|
1373
|
+
|
1374
|
+
if ( (match_len > largest_len) || ((match_len == largest_len) && (dec.get_cost() < largest_cost)) )
|
1375
|
+
{
|
1376
|
+
largest_index = decisions.size() - 1;
|
1377
|
+
largest_len = match_len;
|
1378
|
+
largest_cost = dec.get_cost();
|
1379
|
+
}
|
1380
|
+
}
|
1381
|
+
if (pMatches->is_last())
|
1382
|
+
break;
|
1383
|
+
pMatches++;
|
1384
|
+
}
|
1385
|
+
}
|
1386
|
+
}
|
1387
|
+
|
1388
|
+
return largest_index;
|
1389
|
+
}
|
1390
|
+
|
1391
|
+
bool lzcompressor::greedy_parse(parse_thread_state &parse_state)
|
1392
|
+
{
|
1393
|
+
parse_state.m_failed = true;
|
1394
|
+
parse_state.m_emit_decisions_backwards = false;
|
1395
|
+
|
1396
|
+
const uint bytes_to_parse = parse_state.m_bytes_to_match;
|
1397
|
+
|
1398
|
+
const uint lookahead_start_ofs = m_accel.get_lookahead_pos() & m_accel.get_max_dict_size_mask();
|
1399
|
+
|
1400
|
+
uint cur_dict_ofs = parse_state.m_start_ofs;
|
1401
|
+
uint cur_lookahead_ofs = cur_dict_ofs - lookahead_start_ofs;
|
1402
|
+
uint cur_ofs = 0;
|
1403
|
+
|
1404
|
+
state &approx_state = parse_state.m_initial_state;
|
1405
|
+
|
1406
|
+
lzham::vector<lzpriced_decision> &decisions = parse_state.m_temp_decisions;
|
1407
|
+
|
1408
|
+
if (!decisions.try_reserve(384))
|
1409
|
+
return false;
|
1410
|
+
|
1411
|
+
if (!parse_state.m_best_decisions.try_resize(0))
|
1412
|
+
return false;
|
1413
|
+
|
1414
|
+
while (cur_ofs < bytes_to_parse)
|
1415
|
+
{
|
1416
|
+
const uint max_admissable_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxHugeMatchLen), bytes_to_parse - cur_ofs);
|
1417
|
+
|
1418
|
+
int largest_dec_index = enumerate_lz_decisions(cur_dict_ofs, approx_state, decisions, 1, max_admissable_match_len);
|
1419
|
+
if (largest_dec_index < 0)
|
1420
|
+
return false;
|
1421
|
+
|
1422
|
+
const lzpriced_decision &dec = decisions[largest_dec_index];
|
1423
|
+
|
1424
|
+
if (!parse_state.m_best_decisions.try_push_back(dec))
|
1425
|
+
return false;
|
1426
|
+
|
1427
|
+
approx_state.partial_advance(dec);
|
1428
|
+
|
1429
|
+
uint match_len = dec.get_len();
|
1430
|
+
LZHAM_ASSERT(match_len <= max_admissable_match_len);
|
1431
|
+
cur_dict_ofs += match_len;
|
1432
|
+
cur_lookahead_ofs += match_len;
|
1433
|
+
cur_ofs += match_len;
|
1434
|
+
|
1435
|
+
if (parse_state.m_best_decisions.size() >= parse_state.m_max_greedy_decisions)
|
1436
|
+
{
|
1437
|
+
parse_state.m_greedy_parse_total_bytes_coded = cur_ofs;
|
1438
|
+
parse_state.m_greedy_parse_gave_up = true;
|
1439
|
+
return false;
|
1440
|
+
}
|
1441
|
+
}
|
1442
|
+
|
1443
|
+
parse_state.m_greedy_parse_total_bytes_coded = cur_ofs;
|
1444
|
+
|
1445
|
+
LZHAM_ASSERT(cur_ofs == bytes_to_parse);
|
1446
|
+
|
1447
|
+
parse_state.m_failed = false;
|
1448
|
+
|
1449
|
+
return true;
|
1450
|
+
}
|
1451
|
+
|
1452
|
+
bool lzcompressor::compress_block(const void* pBuf, uint buf_len)
|
1453
|
+
{
|
1454
|
+
uint cur_ofs = 0;
|
1455
|
+
uint bytes_remaining = buf_len;
|
1456
|
+
while (bytes_remaining)
|
1457
|
+
{
|
1458
|
+
uint bytes_to_compress = math::minimum(m_accel.get_max_add_bytes(), bytes_remaining);
|
1459
|
+
if (!compress_block_internal(static_cast<const uint8*>(pBuf) + cur_ofs, bytes_to_compress))
|
1460
|
+
return false;
|
1461
|
+
|
1462
|
+
cur_ofs += bytes_to_compress;
|
1463
|
+
bytes_remaining -= bytes_to_compress;
|
1464
|
+
}
|
1465
|
+
return true;
|
1466
|
+
}
|
1467
|
+
|
1468
|
+
void lzcompressor::update_block_history(uint comp_size, uint src_size, uint ratio, bool raw_block, bool reset_update_rate)
|
1469
|
+
{
|
1470
|
+
block_history& cur_block_history = m_block_history[m_block_history_next];
|
1471
|
+
m_block_history_next++;
|
1472
|
+
m_block_history_next %= cMaxBlockHistorySize;
|
1473
|
+
|
1474
|
+
cur_block_history.m_comp_size = comp_size;
|
1475
|
+
cur_block_history.m_src_size = src_size;
|
1476
|
+
cur_block_history.m_ratio = ratio;
|
1477
|
+
cur_block_history.m_raw_block = raw_block;
|
1478
|
+
cur_block_history.m_reset_update_rate = reset_update_rate;
|
1479
|
+
|
1480
|
+
m_block_history_size = LZHAM_MIN(m_block_history_size + 1, static_cast<uint>(cMaxBlockHistorySize));
|
1481
|
+
}
|
1482
|
+
|
1483
|
+
uint lzcompressor::get_recent_block_ratio()
|
1484
|
+
{
|
1485
|
+
if (!m_block_history_size)
|
1486
|
+
return 0;
|
1487
|
+
|
1488
|
+
uint64 total_scaled_ratio = 0;
|
1489
|
+
for (uint i = 0; i < m_block_history_size; i++)
|
1490
|
+
total_scaled_ratio += m_block_history[i].m_ratio;
|
1491
|
+
total_scaled_ratio /= m_block_history_size;
|
1492
|
+
|
1493
|
+
return static_cast<uint>(total_scaled_ratio);
|
1494
|
+
}
|
1495
|
+
|
1496
|
+
uint lzcompressor::get_min_block_ratio()
|
1497
|
+
{
|
1498
|
+
if (!m_block_history_size)
|
1499
|
+
return 0;
|
1500
|
+
uint min_scaled_ratio = UINT_MAX;
|
1501
|
+
for (uint i = 0; i < m_block_history_size; i++)
|
1502
|
+
min_scaled_ratio = LZHAM_MIN(m_block_history[i].m_ratio, min_scaled_ratio);
|
1503
|
+
return min_scaled_ratio;
|
1504
|
+
}
|
1505
|
+
|
1506
|
+
uint lzcompressor::get_max_block_ratio()
|
1507
|
+
{
|
1508
|
+
if (!m_block_history_size)
|
1509
|
+
return 0;
|
1510
|
+
uint max_scaled_ratio = 0;
|
1511
|
+
for (uint i = 0; i < m_block_history_size; i++)
|
1512
|
+
max_scaled_ratio = LZHAM_MAX(m_block_history[i].m_ratio, max_scaled_ratio);
|
1513
|
+
return max_scaled_ratio;
|
1514
|
+
}
|
1515
|
+
|
1516
|
+
uint lzcompressor::get_total_recent_reset_update_rate()
|
1517
|
+
{
|
1518
|
+
uint total_resets = 0;
|
1519
|
+
for (uint i = 0; i < m_block_history_size; i++)
|
1520
|
+
total_resets += m_block_history[i].m_reset_update_rate;
|
1521
|
+
return total_resets;
|
1522
|
+
}
|
1523
|
+
|
1524
|
+
bool lzcompressor::compress_block_internal(const void* pBuf, uint buf_len)
|
1525
|
+
{
|
1526
|
+
scoped_perf_section compress_block_timer(cVarArgs, "****** compress_block %u", m_block_index);
|
1527
|
+
|
1528
|
+
LZHAM_ASSERT(pBuf);
|
1529
|
+
LZHAM_ASSERT(buf_len <= m_params.m_block_size);
|
1530
|
+
|
1531
|
+
LZHAM_ASSERT(m_src_size >= 0);
|
1532
|
+
if (m_src_size < 0)
|
1533
|
+
return false;
|
1534
|
+
|
1535
|
+
m_src_size += buf_len;
|
1536
|
+
|
1537
|
+
// Important: Don't do any expensive work until after add_bytes_begin() is called, to increase parallelism.
|
1538
|
+
if (!m_accel.add_bytes_begin(buf_len, static_cast<const uint8*>(pBuf)))
|
1539
|
+
return false;
|
1540
|
+
|
1541
|
+
m_start_of_block_state = m_state;
|
1542
|
+
|
1543
|
+
m_src_adler32 = adler32(pBuf, buf_len, m_src_adler32);
|
1544
|
+
|
1545
|
+
m_block_start_dict_ofs = m_accel.get_lookahead_pos() & (m_accel.get_max_dict_size() - 1);
|
1546
|
+
|
1547
|
+
uint cur_dict_ofs = m_block_start_dict_ofs;
|
1548
|
+
|
1549
|
+
uint bytes_to_match = buf_len;
|
1550
|
+
|
1551
|
+
if (!m_codec.start_encoding((buf_len * 9) / 8))
|
1552
|
+
return false;
|
1553
|
+
|
1554
|
+
if (!m_block_index)
|
1555
|
+
{
|
1556
|
+
if (!send_configuration())
|
1557
|
+
return false;
|
1558
|
+
}
|
1559
|
+
|
1560
|
+
#ifdef LZHAM_LZDEBUG
|
1561
|
+
m_codec.encode_bits(166, 12);
|
1562
|
+
#endif
|
1563
|
+
|
1564
|
+
if (!m_codec.encode_bits(cCompBlock, cBlockHeaderBits))
|
1565
|
+
return false;
|
1566
|
+
|
1567
|
+
if (!m_codec.encode_arith_init())
|
1568
|
+
return false;
|
1569
|
+
|
1570
|
+
m_state.start_of_block(m_accel, cur_dict_ofs, m_block_index);
|
1571
|
+
|
1572
|
+
bool emit_reset_update_rate_command = false;
|
1573
|
+
|
1574
|
+
// Determine if it makes sense to reset the Huffman table update frequency back to their initial (maximum) rates.
|
1575
|
+
if ((m_block_history_size) && (m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_TRADEOFF_DECOMPRESSION_RATE_FOR_COMP_RATIO))
|
1576
|
+
{
|
1577
|
+
const block_history& prev_block_history = m_block_history[m_block_history_next ? (m_block_history_next - 1) : (cMaxBlockHistorySize - 1)];
|
1578
|
+
|
1579
|
+
if (prev_block_history.m_raw_block)
|
1580
|
+
emit_reset_update_rate_command = true;
|
1581
|
+
else if (get_total_recent_reset_update_rate() == 0)
|
1582
|
+
{
|
1583
|
+
if (get_recent_block_ratio() > (cBlockHistoryCompRatioScale * 95U / 100U))
|
1584
|
+
emit_reset_update_rate_command = true;
|
1585
|
+
else
|
1586
|
+
{
|
1587
|
+
uint recent_min_block_ratio = get_min_block_ratio();
|
1588
|
+
//uint recent_max_block_ratio = get_max_block_ratio();
|
1589
|
+
|
1590
|
+
// Compression ratio has recently dropped quite a bit - slam the table update rates back up.
|
1591
|
+
if (prev_block_history.m_ratio > (recent_min_block_ratio * 3U) / 2U)
|
1592
|
+
{
|
1593
|
+
//printf("Emitting reset: %u %u\n", prev_block_history.m_ratio, recent_min_block_ratio);
|
1594
|
+
emit_reset_update_rate_command = true;
|
1595
|
+
}
|
1596
|
+
}
|
1597
|
+
}
|
1598
|
+
}
|
1599
|
+
|
1600
|
+
if (emit_reset_update_rate_command)
|
1601
|
+
m_state.reset_update_rate();
|
1602
|
+
|
1603
|
+
m_codec.encode_bits(emit_reset_update_rate_command ? 1 : 0, cBlockFlushTypeBits);
|
1604
|
+
|
1605
|
+
//coding_stats initial_stats(m_stats);
|
1606
|
+
|
1607
|
+
uint initial_step = m_step;
|
1608
|
+
|
1609
|
+
while (bytes_to_match)
|
1610
|
+
{
|
1611
|
+
const uint cAvgAcceptableGreedyMatchLen = 384;
|
1612
|
+
if ((m_params.m_pSeed_bytes) && (bytes_to_match >= cAvgAcceptableGreedyMatchLen))
|
1613
|
+
{
|
1614
|
+
parse_thread_state &greedy_parse_state = m_parse_thread_state[cMaxParseThreads];
|
1615
|
+
|
1616
|
+
greedy_parse_state.m_initial_state = m_state;
|
1617
|
+
greedy_parse_state.m_initial_state.m_cur_ofs = cur_dict_ofs;
|
1618
|
+
|
1619
|
+
greedy_parse_state.m_issue_reset_state_partial = false;
|
1620
|
+
greedy_parse_state.m_start_ofs = cur_dict_ofs;
|
1621
|
+
greedy_parse_state.m_bytes_to_match = LZHAM_MIN(bytes_to_match, static_cast<uint>(CLZBase::cMaxHugeMatchLen));
|
1622
|
+
|
1623
|
+
greedy_parse_state.m_max_greedy_decisions = LZHAM_MAX((bytes_to_match / cAvgAcceptableGreedyMatchLen), 2);
|
1624
|
+
greedy_parse_state.m_greedy_parse_gave_up = false;
|
1625
|
+
greedy_parse_state.m_greedy_parse_total_bytes_coded = 0;
|
1626
|
+
|
1627
|
+
if (!greedy_parse(greedy_parse_state))
|
1628
|
+
{
|
1629
|
+
if (!greedy_parse_state.m_greedy_parse_gave_up)
|
1630
|
+
return false;
|
1631
|
+
}
|
1632
|
+
|
1633
|
+
uint num_greedy_decisions_to_code = 0;
|
1634
|
+
|
1635
|
+
const lzham::vector<lzdecision> &best_decisions = greedy_parse_state.m_best_decisions;
|
1636
|
+
|
1637
|
+
if (!greedy_parse_state.m_greedy_parse_gave_up)
|
1638
|
+
num_greedy_decisions_to_code = best_decisions.size();
|
1639
|
+
else
|
1640
|
+
{
|
1641
|
+
uint num_small_decisions = 0;
|
1642
|
+
uint total_match_len = 0;
|
1643
|
+
uint max_match_len = 0;
|
1644
|
+
|
1645
|
+
uint i;
|
1646
|
+
for (i = 0; i < best_decisions.size(); i++)
|
1647
|
+
{
|
1648
|
+
const lzdecision &dec = best_decisions[i];
|
1649
|
+
if (dec.get_len() <= CLZBase::cMaxMatchLen)
|
1650
|
+
{
|
1651
|
+
num_small_decisions++;
|
1652
|
+
if (num_small_decisions > 16)
|
1653
|
+
break;
|
1654
|
+
}
|
1655
|
+
|
1656
|
+
total_match_len += dec.get_len();
|
1657
|
+
max_match_len = LZHAM_MAX(max_match_len, dec.get_len());
|
1658
|
+
}
|
1659
|
+
|
1660
|
+
if (max_match_len > CLZBase::cMaxMatchLen)
|
1661
|
+
{
|
1662
|
+
if ((total_match_len / i) >= cAvgAcceptableGreedyMatchLen)
|
1663
|
+
{
|
1664
|
+
num_greedy_decisions_to_code = i;
|
1665
|
+
}
|
1666
|
+
}
|
1667
|
+
}
|
1668
|
+
|
1669
|
+
if (num_greedy_decisions_to_code)
|
1670
|
+
{
|
1671
|
+
for (uint i = 0; i < num_greedy_decisions_to_code; i++)
|
1672
|
+
{
|
1673
|
+
LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs);
|
1674
|
+
//LZHAM_ASSERT(i >= 0);
|
1675
|
+
LZHAM_ASSERT(i < best_decisions.size());
|
1676
|
+
|
1677
|
+
#if LZHAM_UPDATE_STATS
|
1678
|
+
bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]);
|
1679
|
+
m_stats.update(best_decisions[i], m_state, m_accel, cost);
|
1680
|
+
#endif
|
1681
|
+
|
1682
|
+
if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match))
|
1683
|
+
return false;
|
1684
|
+
}
|
1685
|
+
|
1686
|
+
if ((!greedy_parse_state.m_greedy_parse_gave_up) || (!bytes_to_match))
|
1687
|
+
continue;
|
1688
|
+
}
|
1689
|
+
}
|
1690
|
+
|
1691
|
+
uint num_parse_jobs = LZHAM_MIN(m_num_parse_threads, (bytes_to_match + cMaxParseGraphNodes - 1) / cMaxParseGraphNodes);
|
1692
|
+
if ((m_params.m_lzham_compress_flags & LZHAM_COMP_FLAG_DETERMINISTIC_PARSING) == 0)
|
1693
|
+
{
|
1694
|
+
if (m_use_task_pool && m_accel.get_max_helper_threads())
|
1695
|
+
{
|
1696
|
+
// Increase the number of active parse jobs as the match finder finishes up to keep CPU utilization up.
|
1697
|
+
num_parse_jobs += m_accel.get_num_completed_helper_threads();
|
1698
|
+
num_parse_jobs = LZHAM_MIN(num_parse_jobs, cMaxParseThreads);
|
1699
|
+
}
|
1700
|
+
}
|
1701
|
+
if (bytes_to_match < 1536)
|
1702
|
+
num_parse_jobs = 1;
|
1703
|
+
|
1704
|
+
// Reduce block size near the beginning of the file so statistical models get going a bit faster.
|
1705
|
+
bool force_small_block = false;
|
1706
|
+
if ((!m_block_index) && ((cur_dict_ofs - m_block_start_dict_ofs) < cMaxParseGraphNodes))
|
1707
|
+
{
|
1708
|
+
num_parse_jobs = 1;
|
1709
|
+
force_small_block = true;
|
1710
|
+
}
|
1711
|
+
|
1712
|
+
uint parse_thread_start_ofs = cur_dict_ofs;
|
1713
|
+
uint parse_thread_total_size = LZHAM_MIN(bytes_to_match, cMaxParseGraphNodes * num_parse_jobs);
|
1714
|
+
if (force_small_block)
|
1715
|
+
{
|
1716
|
+
parse_thread_total_size = LZHAM_MIN(parse_thread_total_size, 1536);
|
1717
|
+
}
|
1718
|
+
|
1719
|
+
uint parse_thread_remaining = parse_thread_total_size;
|
1720
|
+
for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
|
1721
|
+
{
|
1722
|
+
parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index];
|
1723
|
+
|
1724
|
+
parse_thread.m_initial_state = m_state;
|
1725
|
+
parse_thread.m_initial_state.m_cur_ofs = parse_thread_start_ofs;
|
1726
|
+
|
1727
|
+
if (parse_thread_index > 0)
|
1728
|
+
{
|
1729
|
+
parse_thread.m_initial_state.reset_state_partial();
|
1730
|
+
parse_thread.m_issue_reset_state_partial = true;
|
1731
|
+
}
|
1732
|
+
else
|
1733
|
+
{
|
1734
|
+
parse_thread.m_issue_reset_state_partial = false;
|
1735
|
+
}
|
1736
|
+
|
1737
|
+
parse_thread.m_start_ofs = parse_thread_start_ofs;
|
1738
|
+
if (parse_thread_index == (num_parse_jobs - 1))
|
1739
|
+
parse_thread.m_bytes_to_match = parse_thread_remaining;
|
1740
|
+
else
|
1741
|
+
parse_thread.m_bytes_to_match = parse_thread_total_size / num_parse_jobs;
|
1742
|
+
|
1743
|
+
parse_thread.m_bytes_to_match = LZHAM_MIN(parse_thread.m_bytes_to_match, cMaxParseGraphNodes);
|
1744
|
+
LZHAM_ASSERT(parse_thread.m_bytes_to_match > 0);
|
1745
|
+
|
1746
|
+
parse_thread.m_max_greedy_decisions = UINT_MAX;
|
1747
|
+
parse_thread.m_greedy_parse_gave_up = false;
|
1748
|
+
|
1749
|
+
parse_thread_start_ofs += parse_thread.m_bytes_to_match;
|
1750
|
+
parse_thread_remaining -= parse_thread.m_bytes_to_match;
|
1751
|
+
}
|
1752
|
+
|
1753
|
+
{
|
1754
|
+
scoped_perf_section parse_timer("parsing");
|
1755
|
+
|
1756
|
+
if ((m_use_task_pool) && (num_parse_jobs > 1))
|
1757
|
+
{
|
1758
|
+
m_parse_jobs_remaining = num_parse_jobs;
|
1759
|
+
|
1760
|
+
{
|
1761
|
+
scoped_perf_section queue_task_timer("queuing parse tasks");
|
1762
|
+
|
1763
|
+
if (!m_params.m_pTask_pool->queue_multiple_object_tasks(this, &lzcompressor::parse_job_callback, 1, num_parse_jobs - 1))
|
1764
|
+
return false;
|
1765
|
+
}
|
1766
|
+
|
1767
|
+
parse_job_callback(0, NULL);
|
1768
|
+
|
1769
|
+
{
|
1770
|
+
scoped_perf_section wait_timer("waiting for jobs");
|
1771
|
+
|
1772
|
+
m_parse_jobs_complete.wait();
|
1773
|
+
}
|
1774
|
+
}
|
1775
|
+
else
|
1776
|
+
{
|
1777
|
+
m_parse_jobs_remaining = INT_MAX;
|
1778
|
+
for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
|
1779
|
+
{
|
1780
|
+
parse_job_callback(parse_thread_index, NULL);
|
1781
|
+
}
|
1782
|
+
}
|
1783
|
+
}
|
1784
|
+
|
1785
|
+
{
|
1786
|
+
scoped_perf_section coding_timer("coding");
|
1787
|
+
|
1788
|
+
for (uint parse_thread_index = 0; parse_thread_index < num_parse_jobs; parse_thread_index++)
|
1789
|
+
{
|
1790
|
+
parse_thread_state &parse_thread = m_parse_thread_state[parse_thread_index];
|
1791
|
+
if (parse_thread.m_failed)
|
1792
|
+
return false;
|
1793
|
+
|
1794
|
+
const lzham::vector<lzdecision> &best_decisions = parse_thread.m_best_decisions;
|
1795
|
+
|
1796
|
+
if (parse_thread.m_issue_reset_state_partial)
|
1797
|
+
{
|
1798
|
+
if (!m_state.encode_reset_state_partial(m_codec, m_accel, cur_dict_ofs))
|
1799
|
+
return false;
|
1800
|
+
m_step++;
|
1801
|
+
}
|
1802
|
+
|
1803
|
+
if (best_decisions.size())
|
1804
|
+
{
|
1805
|
+
int i = 0;
|
1806
|
+
int end_dec_index = static_cast<int>(best_decisions.size()) - 1;
|
1807
|
+
int dec_step = 1;
|
1808
|
+
if (parse_thread.m_emit_decisions_backwards)
|
1809
|
+
{
|
1810
|
+
i = static_cast<int>(best_decisions.size()) - 1;
|
1811
|
+
end_dec_index = 0;
|
1812
|
+
dec_step = -1;
|
1813
|
+
LZHAM_ASSERT(best_decisions.back().m_pos == (int)parse_thread.m_start_ofs);
|
1814
|
+
}
|
1815
|
+
else
|
1816
|
+
{
|
1817
|
+
LZHAM_ASSERT(best_decisions.front().m_pos == (int)parse_thread.m_start_ofs);
|
1818
|
+
}
|
1819
|
+
|
1820
|
+
// Loop rearranged to avoid bad x64 codegen problem with MSVC2008.
|
1821
|
+
for ( ; ; )
|
1822
|
+
{
|
1823
|
+
LZHAM_ASSERT(best_decisions[i].m_pos == (int)cur_dict_ofs);
|
1824
|
+
LZHAM_ASSERT(i >= 0);
|
1825
|
+
LZHAM_ASSERT(i < (int)best_decisions.size());
|
1826
|
+
|
1827
|
+
#if LZHAM_UPDATE_STATS
|
1828
|
+
bit_cost_t cost = m_state.get_cost(*this, m_accel, best_decisions[i]);
|
1829
|
+
m_stats.update(best_decisions[i], m_state, m_accel, cost);
|
1830
|
+
//m_state.print(m_codec, *this, m_accel, best_decisions[i]);
|
1831
|
+
#endif
|
1832
|
+
|
1833
|
+
if (!code_decision(best_decisions[i], cur_dict_ofs, bytes_to_match))
|
1834
|
+
return false;
|
1835
|
+
if (i == end_dec_index)
|
1836
|
+
break;
|
1837
|
+
i += dec_step;
|
1838
|
+
}
|
1839
|
+
|
1840
|
+
LZHAM_NOTE_UNUSED(i);
|
1841
|
+
}
|
1842
|
+
|
1843
|
+
LZHAM_ASSERT(cur_dict_ofs == parse_thread.m_start_ofs + parse_thread.m_bytes_to_match);
|
1844
|
+
|
1845
|
+
} // parse_thread_index
|
1846
|
+
|
1847
|
+
}
|
1848
|
+
}
|
1849
|
+
|
1850
|
+
{
|
1851
|
+
scoped_perf_section add_bytes_timer("add_bytes_end");
|
1852
|
+
m_accel.add_bytes_end();
|
1853
|
+
}
|
1854
|
+
|
1855
|
+
if (!m_state.encode_eob(m_codec, m_accel, cur_dict_ofs))
|
1856
|
+
return false;
|
1857
|
+
|
1858
|
+
#ifdef LZHAM_LZDEBUG
|
1859
|
+
if (!m_codec.encode_bits(366, 12)) return false;
|
1860
|
+
#endif
|
1861
|
+
|
1862
|
+
{
|
1863
|
+
scoped_perf_section stop_encoding_timer("stop_encoding");
|
1864
|
+
if (!m_codec.stop_encoding(true)) return false;
|
1865
|
+
}
|
1866
|
+
|
1867
|
+
// Coded the entire block - now see if it makes more sense to just send a raw/uncompressed block.
|
1868
|
+
|
1869
|
+
uint compressed_size = m_codec.get_encoding_buf().size();
|
1870
|
+
LZHAM_NOTE_UNUSED(compressed_size);
|
1871
|
+
|
1872
|
+
bool used_raw_block = false;
|
1873
|
+
|
1874
|
+
#if !LZHAM_FORCE_ALL_RAW_BLOCKS
|
1875
|
+
#if (defined(LZHAM_DISABLE_RAW_BLOCKS) || defined(LZHAM_LZDEBUG))
|
1876
|
+
if (0)
|
1877
|
+
#else
|
1878
|
+
// TODO: Allow the user to control this threshold, i.e. if less than 1% then just store uncompressed.
|
1879
|
+
if (compressed_size >= buf_len)
|
1880
|
+
#endif
|
1881
|
+
#endif
|
1882
|
+
{
|
1883
|
+
// Failed to compress the block, so go back to our original state and just code a raw block.
|
1884
|
+
m_state = m_start_of_block_state;
|
1885
|
+
m_step = initial_step;
|
1886
|
+
//m_stats = initial_stats;
|
1887
|
+
|
1888
|
+
m_codec.reset();
|
1889
|
+
|
1890
|
+
if (!m_codec.start_encoding(buf_len + 16))
|
1891
|
+
return false;
|
1892
|
+
|
1893
|
+
if (!m_block_index)
|
1894
|
+
{
|
1895
|
+
if (!send_configuration())
|
1896
|
+
return false;
|
1897
|
+
}
|
1898
|
+
|
1899
|
+
#ifdef LZHAM_LZDEBUG
|
1900
|
+
if (!m_codec.encode_bits(166, 12))
|
1901
|
+
return false;
|
1902
|
+
#endif
|
1903
|
+
|
1904
|
+
if (!m_codec.encode_bits(cRawBlock, cBlockHeaderBits))
|
1905
|
+
return false;
|
1906
|
+
|
1907
|
+
LZHAM_ASSERT(buf_len <= 0x1000000);
|
1908
|
+
if (!m_codec.encode_bits(buf_len - 1, 24))
|
1909
|
+
return false;
|
1910
|
+
|
1911
|
+
// Write buf len check bits, to help increase the probability of detecting corrupted data more early.
|
1912
|
+
uint buf_len0 = (buf_len - 1) & 0xFF;
|
1913
|
+
uint buf_len1 = ((buf_len - 1) >> 8) & 0xFF;
|
1914
|
+
uint buf_len2 = ((buf_len - 1) >> 16) & 0xFF;
|
1915
|
+
if (!m_codec.encode_bits((buf_len0 ^ buf_len1) ^ buf_len2, 8))
|
1916
|
+
return false;
|
1917
|
+
|
1918
|
+
if (!m_codec.encode_align_to_byte())
|
1919
|
+
return false;
|
1920
|
+
|
1921
|
+
const uint8* pSrc = m_accel.get_ptr(m_block_start_dict_ofs);
|
1922
|
+
|
1923
|
+
for (uint i = 0; i < buf_len; i++)
|
1924
|
+
{
|
1925
|
+
if (!m_codec.encode_bits(*pSrc++, 8))
|
1926
|
+
return false;
|
1927
|
+
}
|
1928
|
+
|
1929
|
+
if (!m_codec.stop_encoding(true))
|
1930
|
+
return false;
|
1931
|
+
|
1932
|
+
used_raw_block = true;
|
1933
|
+
emit_reset_update_rate_command = false;
|
1934
|
+
}
|
1935
|
+
|
1936
|
+
uint comp_size = m_codec.get_encoding_buf().size();
|
1937
|
+
uint scaled_ratio = (comp_size * cBlockHistoryCompRatioScale) / buf_len;
|
1938
|
+
update_block_history(comp_size, buf_len, scaled_ratio, used_raw_block, emit_reset_update_rate_command);
|
1939
|
+
|
1940
|
+
//printf("\n%u, %u, %u, %u\n", m_block_index, 500*emit_reset_update_rate_command, scaled_ratio, get_recent_block_ratio());
|
1941
|
+
|
1942
|
+
{
|
1943
|
+
scoped_perf_section append_timer("append");
|
1944
|
+
|
1945
|
+
if (m_comp_buf.empty())
|
1946
|
+
{
|
1947
|
+
m_comp_buf.swap(m_codec.get_encoding_buf());
|
1948
|
+
}
|
1949
|
+
else
|
1950
|
+
{
|
1951
|
+
if (!m_comp_buf.append(m_codec.get_encoding_buf()))
|
1952
|
+
return false;
|
1953
|
+
}
|
1954
|
+
}
|
1955
|
+
#if LZHAM_UPDATE_STATS
|
1956
|
+
LZHAM_VERIFY(m_stats.m_total_bytes == m_src_size);
|
1957
|
+
if (emit_reset_update_rate_command)
|
1958
|
+
m_stats.m_total_update_rate_resets++;
|
1959
|
+
#endif
|
1960
|
+
|
1961
|
+
m_block_index++;
|
1962
|
+
|
1963
|
+
return true;
|
1964
|
+
}
|
1965
|
+
|
1966
|
+
} // namespace lzham
|