extlzham 0.0.1.PROTOTYPE

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +27 -0
  3. data/README.md +21 -0
  4. data/Rakefile +143 -0
  5. data/contrib/lzham/LICENSE +22 -0
  6. data/contrib/lzham/README.md +209 -0
  7. data/contrib/lzham/include/lzham.h +781 -0
  8. data/contrib/lzham/lzhamcomp/lzham_comp.h +38 -0
  9. data/contrib/lzham/lzhamcomp/lzham_lzbase.cpp +244 -0
  10. data/contrib/lzham/lzhamcomp/lzham_lzbase.h +45 -0
  11. data/contrib/lzham/lzhamcomp/lzham_lzcomp.cpp +608 -0
  12. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.cpp +1966 -0
  13. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.h +472 -0
  14. data/contrib/lzham/lzhamcomp/lzham_lzcomp_state.cpp +1413 -0
  15. data/contrib/lzham/lzhamcomp/lzham_match_accel.cpp +562 -0
  16. data/contrib/lzham/lzhamcomp/lzham_match_accel.h +146 -0
  17. data/contrib/lzham/lzhamcomp/lzham_null_threading.h +97 -0
  18. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.cpp +229 -0
  19. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.h +520 -0
  20. data/contrib/lzham/lzhamcomp/lzham_threading.h +12 -0
  21. data/contrib/lzham/lzhamcomp/lzham_win32_threading.cpp +220 -0
  22. data/contrib/lzham/lzhamcomp/lzham_win32_threading.h +368 -0
  23. data/contrib/lzham/lzhamdecomp/lzham_assert.cpp +66 -0
  24. data/contrib/lzham/lzhamdecomp/lzham_assert.h +40 -0
  25. data/contrib/lzham/lzhamdecomp/lzham_checksum.cpp +73 -0
  26. data/contrib/lzham/lzhamdecomp/lzham_checksum.h +13 -0
  27. data/contrib/lzham/lzhamdecomp/lzham_config.h +23 -0
  28. data/contrib/lzham/lzhamdecomp/lzham_core.h +264 -0
  29. data/contrib/lzham/lzhamdecomp/lzham_decomp.h +37 -0
  30. data/contrib/lzham/lzhamdecomp/lzham_helpers.h +54 -0
  31. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.cpp +262 -0
  32. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.h +14 -0
  33. data/contrib/lzham/lzhamdecomp/lzham_lzdecomp.cpp +1527 -0
  34. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.cpp +131 -0
  35. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.h +89 -0
  36. data/contrib/lzham/lzhamdecomp/lzham_math.h +142 -0
  37. data/contrib/lzham/lzhamdecomp/lzham_mem.cpp +284 -0
  38. data/contrib/lzham/lzhamdecomp/lzham_mem.h +112 -0
  39. data/contrib/lzham/lzhamdecomp/lzham_platform.cpp +157 -0
  40. data/contrib/lzham/lzhamdecomp/lzham_platform.h +284 -0
  41. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.cpp +351 -0
  42. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.h +146 -0
  43. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.cpp +1484 -0
  44. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.h +556 -0
  45. data/contrib/lzham/lzhamdecomp/lzham_timer.cpp +147 -0
  46. data/contrib/lzham/lzhamdecomp/lzham_timer.h +99 -0
  47. data/contrib/lzham/lzhamdecomp/lzham_traits.h +141 -0
  48. data/contrib/lzham/lzhamdecomp/lzham_types.h +97 -0
  49. data/contrib/lzham/lzhamdecomp/lzham_utils.h +58 -0
  50. data/contrib/lzham/lzhamdecomp/lzham_vector.cpp +75 -0
  51. data/contrib/lzham/lzhamdecomp/lzham_vector.h +588 -0
  52. data/contrib/lzham/lzhamlib/lzham_lib.cpp +179 -0
  53. data/examples/basic.rb +48 -0
  54. data/ext/extconf.rb +26 -0
  55. data/ext/extlzham.c +741 -0
  56. data/gemstub.rb +22 -0
  57. data/lib/extlzham/version.rb +5 -0
  58. data/lib/extlzham.rb +153 -0
  59. metadata +135 -0
@@ -0,0 +1,562 @@
1
+ // File: lzham_match_accel.cpp
2
+ // See Copyright Notice and license at the end of include/lzham.h
3
+ #include "lzham_core.h"
4
+ #include "lzham_match_accel.h"
5
+ #include "lzham_timer.h"
6
+
7
+ namespace lzham
8
+ {
9
+ static inline uint32 hash2_to_12(uint c0, uint c1)
10
+ {
11
+ return c0 ^ (c1 << 4);
12
+ }
13
+
14
+ static inline uint32 hash3_to_16(uint c0, uint c1, uint c2)
15
+ {
16
+ return (c0 | (c1 << 8)) ^ (c2 << 4);
17
+ }
18
+
19
+ search_accelerator::search_accelerator() :
20
+ m_pLZBase(NULL),
21
+ m_pTask_pool(NULL),
22
+ m_max_helper_threads(0),
23
+ m_max_dict_size(0),
24
+ m_max_dict_size_mask(0),
25
+ m_lookahead_pos(0),
26
+ m_lookahead_size(0),
27
+ m_cur_dict_size(0),
28
+ m_fill_lookahead_pos(0),
29
+ m_fill_lookahead_size(0),
30
+ m_fill_dict_size(0),
31
+ m_max_probes(0),
32
+ m_max_matches(0),
33
+ m_all_matches(false),
34
+ m_next_match_ref(0),
35
+ m_num_completed_helper_threads(0)
36
+ {
37
+ }
38
+
39
+ bool search_accelerator::init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes)
40
+ {
41
+ LZHAM_ASSERT(pLZBase);
42
+ LZHAM_ASSERT(max_dict_size && math::is_power_of_2(max_dict_size));
43
+ LZHAM_ASSERT(max_probes);
44
+
45
+ m_max_probes = LZHAM_MIN(cMatchAccelMaxSupportedProbes, max_probes);
46
+
47
+ m_pLZBase = pLZBase;
48
+ m_pTask_pool = max_helper_threads ? pPool : NULL;
49
+ m_max_helper_threads = m_pTask_pool ? max_helper_threads : 0;
50
+ m_max_matches = LZHAM_MIN(m_max_probes, max_matches);
51
+ m_all_matches = all_matches;
52
+
53
+ m_max_dict_size = max_dict_size;
54
+ m_max_dict_size_mask = m_max_dict_size - 1;
55
+ m_cur_dict_size = 0;
56
+ m_lookahead_size = 0;
57
+ m_lookahead_pos = 0;
58
+ m_fill_lookahead_pos = 0;
59
+ m_fill_lookahead_size = 0;
60
+ m_fill_dict_size = 0;
61
+ m_num_completed_helper_threads = 0;
62
+
63
+ if (!m_dict.try_resize_no_construct(max_dict_size + LZHAM_MIN(m_max_dict_size, static_cast<uint>(CLZBase::cMaxHugeMatchLen))))
64
+ return false;
65
+
66
+ if (!m_hash.try_resize_no_construct(cHashSize))
67
+ return false;
68
+
69
+ if (!m_nodes.try_resize_no_construct(max_dict_size))
70
+ return false;
71
+
72
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
73
+
74
+ return true;
75
+ }
76
+
77
+ void search_accelerator::reset()
78
+ {
79
+ m_cur_dict_size = 0;
80
+ m_lookahead_size = 0;
81
+ m_lookahead_pos = 0;
82
+ m_fill_lookahead_pos = 0;
83
+ m_fill_lookahead_size = 0;
84
+ m_fill_dict_size = 0;
85
+ m_num_completed_helper_threads = 0;
86
+
87
+ // Clearing the hash tables is only necessary for determinism (otherwise, it's possible the matches returned after a reset will depend on the data processes before the reset).
88
+ if (m_hash.size())
89
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
90
+ if (m_digram_hash.size())
91
+ memset(m_digram_hash.get_ptr(), 0, m_digram_hash.size_in_bytes());
92
+ }
93
+
94
+ void search_accelerator::flush()
95
+ {
96
+ m_cur_dict_size = 0;
97
+ }
98
+
99
+ uint search_accelerator::get_max_add_bytes() const
100
+ {
101
+ uint add_pos = static_cast<uint>(m_lookahead_pos & (m_max_dict_size - 1));
102
+ return m_max_dict_size - add_pos;
103
+ }
104
+
105
+ static uint8 g_hamming_dist[256] =
106
+ {
107
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
108
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
109
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
110
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
111
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
112
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
113
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
114
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
115
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
116
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
117
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
118
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
119
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
120
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
121
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
122
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
123
+ };
124
+
125
+ void search_accelerator::find_all_matches_callback(uint64 data, void* pData_ptr)
126
+ {
127
+ scoped_perf_section find_all_matches_timer("find_all_matches_callback");
128
+
129
+ LZHAM_NOTE_UNUSED(pData_ptr);
130
+ const uint thread_index = (uint)data;
131
+
132
+ dict_match temp_matches[cMatchAccelMaxSupportedProbes * 2];
133
+
134
+ uint fill_lookahead_pos = m_fill_lookahead_pos;
135
+ uint fill_dict_size = m_fill_dict_size;
136
+ uint fill_lookahead_size = m_fill_lookahead_size;
137
+
138
+ uint c0 = 0, c1 = 0;
139
+ if (fill_lookahead_size >= 2)
140
+ {
141
+ c0 = m_dict[fill_lookahead_pos & m_max_dict_size_mask];
142
+ c1 = m_dict[(fill_lookahead_pos & m_max_dict_size_mask) + 1];
143
+ }
144
+
145
+ const uint8* pDict = m_dict.get_ptr();
146
+
147
+ while (fill_lookahead_size >= 3)
148
+ {
149
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
150
+
151
+ uint c2 = pDict[insert_pos + 2];
152
+ uint h = hash3_to_16(c0, c1, c2);
153
+ c0 = c1;
154
+ c1 = c2;
155
+
156
+ LZHAM_ASSERT(!m_hash_thread_index.size() || (m_hash_thread_index[h] != UINT8_MAX));
157
+
158
+ // Only process those strings that this worker thread was assigned to - this allows us to manipulate multiple trees in parallel with no worries about synchronization.
159
+ if (m_hash_thread_index.size() && (m_hash_thread_index[h] != thread_index))
160
+ {
161
+ fill_lookahead_pos++;
162
+ fill_lookahead_size--;
163
+ fill_dict_size++;
164
+ continue;
165
+ }
166
+
167
+ dict_match* pDstMatch = temp_matches;
168
+
169
+ uint cur_pos = m_hash[h];
170
+ m_hash[h] = static_cast<uint>(fill_lookahead_pos);
171
+
172
+ uint *pLeft = &m_nodes[insert_pos].m_left;
173
+ uint *pRight = &m_nodes[insert_pos].m_right;
174
+
175
+ const uint max_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), fill_lookahead_size);
176
+ uint best_match_len = 2;
177
+
178
+ const uint8* pIns = &pDict[insert_pos];
179
+
180
+ uint n = m_max_probes;
181
+ for ( ; ; )
182
+ {
183
+ uint delta_pos = fill_lookahead_pos - cur_pos;
184
+ if ((n-- == 0) || (!delta_pos) || (delta_pos >= fill_dict_size))
185
+ {
186
+ *pLeft = 0;
187
+ *pRight = 0;
188
+ break;
189
+ }
190
+
191
+ uint pos = cur_pos & m_max_dict_size_mask;
192
+ node *pNode = &m_nodes[pos];
193
+
194
+ // Unfortunately, the initial compare match_len must be 0 because of the way we hash and truncate matches at the end of each block.
195
+ uint match_len = 0;
196
+ const uint8* pComp = &pDict[pos];
197
+
198
+ #if LZHAM_PLATFORM_X360 || (LZHAM_USE_UNALIGNED_INT_LOADS == 0)
199
+ for ( ; match_len < max_match_len; match_len++)
200
+ if (pComp[match_len] != pIns[match_len])
201
+ break;
202
+ #else
203
+ // Compare a qword at a time for a bit more efficiency.
204
+ const uint64* pComp_end = reinterpret_cast<const uint64*>(pComp + max_match_len - 7);
205
+ const uint64* pComp_cur = reinterpret_cast<const uint64*>(pComp);
206
+ const uint64* pIns_cur = reinterpret_cast<const uint64*>(pIns);
207
+ while (pComp_cur < pComp_end)
208
+ {
209
+ if (*pComp_cur != *pIns_cur)
210
+ break;
211
+ pComp_cur++;
212
+ pIns_cur++;
213
+ }
214
+ uint alt_match_len = static_cast<uint>(reinterpret_cast<const uint8*>(pComp_cur) - reinterpret_cast<const uint8*>(pComp));
215
+ for ( ; alt_match_len < max_match_len; alt_match_len++)
216
+ if (pComp[alt_match_len] != pIns[alt_match_len])
217
+ break;
218
+ #ifdef LZVERIFY
219
+ for ( ; match_len < max_match_len; match_len++)
220
+ if (pComp[match_len] != pIns[match_len])
221
+ break;
222
+ LZHAM_VERIFY(alt_match_len == match_len);
223
+ #endif
224
+ match_len = alt_match_len;
225
+ #endif
226
+
227
+ if (match_len > best_match_len)
228
+ {
229
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
230
+ pDstMatch->m_dist = delta_pos;
231
+ pDstMatch++;
232
+
233
+ best_match_len = match_len;
234
+
235
+ if (match_len == max_match_len)
236
+ {
237
+ *pLeft = pNode->m_left;
238
+ *pRight = pNode->m_right;
239
+ break;
240
+ }
241
+ }
242
+ else if (m_all_matches)
243
+ {
244
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
245
+ pDstMatch->m_dist = delta_pos;
246
+ pDstMatch++;
247
+ }
248
+ else if ((best_match_len > 2) && (best_match_len == match_len))
249
+ {
250
+ uint bestMatchDist = pDstMatch[-1].m_dist;
251
+ uint compMatchDist = delta_pos;
252
+
253
+ uint bestMatchSlot, bestMatchSlotOfs;
254
+ m_pLZBase->compute_lzx_position_slot(bestMatchDist, bestMatchSlot, bestMatchSlotOfs);
255
+
256
+ uint compMatchSlot, compMatchOfs;
257
+ m_pLZBase->compute_lzx_position_slot(compMatchDist, compMatchSlot, compMatchOfs);
258
+
259
+ // If both matches uses the same match slot, choose the one with the offset containing the lowest nibble as these bits separately entropy coded.
260
+ // This could choose a match which is further away in the absolute sense, but closer in a coding sense.
261
+ if ( (compMatchSlot < bestMatchSlot) ||
262
+ ((compMatchSlot >= 8) && (compMatchSlot == bestMatchSlot) && ((compMatchOfs & 15) < (bestMatchSlotOfs & 15))) )
263
+ {
264
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
265
+ pDstMatch[-1].m_dist = delta_pos;
266
+ }
267
+ else if ((match_len < max_match_len) && (compMatchSlot <= bestMatchSlot))
268
+ {
269
+ // Choose the match which has lowest hamming distance in the mismatch byte for a tiny win on binary files.
270
+ // TODO: This competes against the prev. optimization.
271
+ uint desired_mismatch_byte = pIns[match_len];
272
+
273
+ uint cur_mismatch_byte = pDict[(insert_pos - bestMatchDist + match_len) & m_max_dict_size_mask];
274
+ uint cur_mismatch_dist = g_hamming_dist[cur_mismatch_byte ^ desired_mismatch_byte];
275
+
276
+ uint new_mismatch_byte = pComp[match_len];
277
+ uint new_mismatch_dist = g_hamming_dist[new_mismatch_byte ^ desired_mismatch_byte];
278
+ if (new_mismatch_dist < cur_mismatch_dist)
279
+ {
280
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
281
+ pDstMatch[-1].m_dist = delta_pos;
282
+ }
283
+ }
284
+ }
285
+
286
+ uint new_pos;
287
+ if (pComp[match_len] < pIns[match_len])
288
+ {
289
+ *pLeft = cur_pos;
290
+ pLeft = &pNode->m_right;
291
+ new_pos = pNode->m_right;
292
+ }
293
+ else
294
+ {
295
+ *pRight = cur_pos;
296
+ pRight = &pNode->m_left;
297
+ new_pos = pNode->m_left;
298
+ }
299
+ if (new_pos == cur_pos)
300
+ break;
301
+ cur_pos = new_pos;
302
+ }
303
+
304
+ const uint num_matches = (uint)(pDstMatch - temp_matches);
305
+
306
+ if (num_matches)
307
+ {
308
+ pDstMatch[-1].m_dist |= 0x80000000;
309
+
310
+ const uint num_matches_to_write = LZHAM_MIN(num_matches, m_max_matches);
311
+
312
+ const uint match_ref_ofs = static_cast<uint>(atomic_exchange_add(&m_next_match_ref, num_matches_to_write));
313
+
314
+ memcpy(&m_matches[match_ref_ofs],
315
+ temp_matches + (num_matches - num_matches_to_write),
316
+ sizeof(temp_matches[0]) * num_matches_to_write);
317
+
318
+ // FIXME: This is going to really hurt on platforms requiring export barriers.
319
+ LZHAM_MEMORY_EXPORT_BARRIER
320
+
321
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], match_ref_ofs);
322
+ }
323
+ else
324
+ {
325
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
326
+ }
327
+
328
+ fill_lookahead_pos++;
329
+ fill_lookahead_size--;
330
+ fill_dict_size++;
331
+ }
332
+
333
+ while (fill_lookahead_size)
334
+ {
335
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
336
+ m_nodes[insert_pos].m_left = 0;
337
+ m_nodes[insert_pos].m_right = 0;
338
+
339
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
340
+
341
+ fill_lookahead_pos++;
342
+ fill_lookahead_size--;
343
+ fill_dict_size++;
344
+ }
345
+
346
+ atomic_increment32(&m_num_completed_helper_threads);
347
+ }
348
+
349
+ bool search_accelerator::find_len2_matches()
350
+ {
351
+ if (!m_digram_hash.size())
352
+ {
353
+ if (!m_digram_hash.try_resize(cDigramHashSize))
354
+ return false;
355
+ }
356
+
357
+ if (m_digram_next.size() < m_lookahead_size)
358
+ {
359
+ if (!m_digram_next.try_resize(m_lookahead_size))
360
+ return false;
361
+ }
362
+
363
+ uint lookahead_dict_pos = m_lookahead_pos & m_max_dict_size_mask;
364
+
365
+ for (int lookahead_ofs = 0; lookahead_ofs < ((int)m_lookahead_size - 1); ++lookahead_ofs, ++lookahead_dict_pos)
366
+ {
367
+ uint c0 = m_dict[lookahead_dict_pos];
368
+ uint c1 = m_dict[lookahead_dict_pos + 1];
369
+
370
+ uint h = hash2_to_12(c0, c1) & (cDigramHashSize - 1);
371
+
372
+ m_digram_next[lookahead_ofs] = m_digram_hash[h];
373
+ m_digram_hash[h] = m_lookahead_pos + lookahead_ofs;
374
+ }
375
+
376
+ m_digram_next[m_lookahead_size - 1] = 0;
377
+
378
+ return true;
379
+ }
380
+
381
+ uint search_accelerator::get_len2_match(uint lookahead_ofs)
382
+ {
383
+ if ((m_fill_lookahead_size - lookahead_ofs) < 2)
384
+ return 0;
385
+
386
+ uint cur_pos = m_lookahead_pos + lookahead_ofs;
387
+
388
+ uint next_match_pos = m_digram_next[cur_pos - m_fill_lookahead_pos];
389
+
390
+ uint match_dist = cur_pos - next_match_pos;
391
+
392
+ if ((!match_dist) || (match_dist > CLZBase::cMaxLen2MatchDist) || (match_dist > (m_cur_dict_size + lookahead_ofs)))
393
+ return 0;
394
+
395
+ const uint8* pCur = &m_dict[cur_pos & m_max_dict_size_mask];
396
+ const uint8* pMatch = &m_dict[next_match_pos & m_max_dict_size_mask];
397
+
398
+ if ((pCur[0] == pMatch[0]) && (pCur[1] == pMatch[1]))
399
+ return match_dist;
400
+
401
+ return 0;
402
+ }
403
+
404
+ bool search_accelerator::find_all_matches(uint num_bytes)
405
+ {
406
+ if (!m_matches.try_resize_no_construct(m_max_probes * num_bytes))
407
+ return false;
408
+
409
+ if (!m_match_refs.try_resize_no_construct(num_bytes))
410
+ return false;
411
+
412
+ memset(m_match_refs.get_ptr(), 0xFF, m_match_refs.size_in_bytes());
413
+
414
+ m_fill_lookahead_pos = m_lookahead_pos;
415
+ m_fill_lookahead_size = num_bytes;
416
+ m_fill_dict_size = m_cur_dict_size;
417
+
418
+ m_next_match_ref = 0;
419
+
420
+ if (!m_pTask_pool)
421
+ {
422
+ find_all_matches_callback(0, NULL);
423
+
424
+ m_num_completed_helper_threads = 0;
425
+ }
426
+ else
427
+ {
428
+ if (!m_hash_thread_index.try_resize_no_construct(0x10000))
429
+ return false;
430
+
431
+ memset(m_hash_thread_index.get_ptr(), 0xFF, m_hash_thread_index.size_in_bytes());
432
+
433
+ uint next_thread_index = 0;
434
+ const uint8* pDict = &m_dict[m_lookahead_pos & m_max_dict_size_mask];
435
+ uint num_unique_trigrams = 0;
436
+
437
+ if (num_bytes >= 3)
438
+ {
439
+ uint c0 = pDict[0];
440
+ uint c1 = pDict[1];
441
+
442
+ const int limit = ((int)num_bytes - 2);
443
+ for (int i = 0; i < limit; i++)
444
+ {
445
+ uint c2 = pDict[2];
446
+ uint t = hash3_to_16(c0, c1, c2);
447
+ c0 = c1;
448
+ c1 = c2;
449
+
450
+ pDict++;
451
+
452
+ if (m_hash_thread_index[t] == UINT8_MAX)
453
+ {
454
+ num_unique_trigrams++;
455
+
456
+ m_hash_thread_index[t] = static_cast<uint8>(next_thread_index);
457
+ if (++next_thread_index == m_max_helper_threads)
458
+ next_thread_index = 0;
459
+ }
460
+ }
461
+ }
462
+
463
+ m_num_completed_helper_threads = 0;
464
+
465
+ if (!m_pTask_pool->queue_multiple_object_tasks(this, &search_accelerator::find_all_matches_callback, 0, m_max_helper_threads))
466
+ return false;
467
+ }
468
+
469
+ return find_len2_matches();
470
+ }
471
+
472
+ bool search_accelerator::add_bytes_begin(uint num_bytes, const uint8* pBytes)
473
+ {
474
+ LZHAM_ASSERT(num_bytes <= m_max_dict_size);
475
+ LZHAM_ASSERT(!m_lookahead_size);
476
+
477
+ uint add_pos = m_lookahead_pos & m_max_dict_size_mask;
478
+ LZHAM_ASSERT((add_pos + num_bytes) <= m_max_dict_size);
479
+
480
+ memcpy(&m_dict[add_pos], pBytes, num_bytes);
481
+
482
+ uint dict_bytes_to_mirror = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxHugeMatchLen), m_max_dict_size);
483
+ if (add_pos < dict_bytes_to_mirror)
484
+ memcpy(&m_dict[m_max_dict_size], &m_dict[0], dict_bytes_to_mirror);
485
+
486
+ m_lookahead_size = num_bytes;
487
+
488
+ uint max_possible_dict_size = m_max_dict_size - num_bytes;
489
+ m_cur_dict_size = LZHAM_MIN(m_cur_dict_size, max_possible_dict_size);
490
+
491
+ m_next_match_ref = 0;
492
+
493
+ return find_all_matches(num_bytes);
494
+ }
495
+
496
+ void search_accelerator::add_bytes_end()
497
+ {
498
+ if (m_pTask_pool)
499
+ {
500
+ m_pTask_pool->join();
501
+ }
502
+
503
+ LZHAM_ASSERT((uint)m_next_match_ref <= m_matches.size());
504
+ }
505
+
506
+ dict_match* search_accelerator::find_matches(uint lookahead_ofs, bool spin)
507
+ {
508
+ LZHAM_ASSERT(lookahead_ofs < m_lookahead_size);
509
+
510
+ const uint match_ref_ofs = static_cast<uint>(m_lookahead_pos - m_fill_lookahead_pos + lookahead_ofs);
511
+
512
+ int match_ref;
513
+ uint spin_count = 0;
514
+
515
+ // This may spin until the match finder job(s) catch up to the caller's lookahead position.
516
+ for ( ; ; )
517
+ {
518
+ match_ref = static_cast<int>(m_match_refs[match_ref_ofs]);
519
+ if (match_ref == -2)
520
+ return NULL;
521
+ else if (match_ref != -1)
522
+ break;
523
+
524
+ spin_count++;
525
+ const uint cMaxSpinCount = 1000;
526
+ if ((spin) && (spin_count < cMaxSpinCount))
527
+ {
528
+ lzham_yield_processor();
529
+ lzham_yield_processor();
530
+ lzham_yield_processor();
531
+ lzham_yield_processor();
532
+ lzham_yield_processor();
533
+ lzham_yield_processor();
534
+ lzham_yield_processor();
535
+ lzham_yield_processor();
536
+
537
+ LZHAM_MEMORY_IMPORT_BARRIER
538
+ }
539
+ else
540
+ {
541
+ spin_count = cMaxSpinCount;
542
+
543
+ lzham_sleep(1);
544
+ }
545
+ }
546
+
547
+ LZHAM_MEMORY_IMPORT_BARRIER
548
+
549
+ return &m_matches[match_ref];
550
+ }
551
+
552
+ void search_accelerator::advance_bytes(uint num_bytes)
553
+ {
554
+ LZHAM_ASSERT(num_bytes <= m_lookahead_size);
555
+
556
+ m_lookahead_pos += num_bytes;
557
+ m_lookahead_size -= num_bytes;
558
+
559
+ m_cur_dict_size += num_bytes;
560
+ LZHAM_ASSERT(m_cur_dict_size <= m_max_dict_size);
561
+ }
562
+ }
@@ -0,0 +1,146 @@
1
+ // File: lzham_match_accel.h
2
+ // See Copyright Notice and license at the end of include/lzham.h
3
+ #pragma once
4
+ #include "lzham_lzbase.h"
5
+ #include "lzham_threading.h"
6
+
7
+ namespace lzham
8
+ {
9
+ const uint cMatchAccelMaxSupportedProbes = 128;
10
+
11
+ struct node
12
+ {
13
+ uint m_left;
14
+ uint m_right;
15
+ };
16
+
17
+ LZHAM_DEFINE_BITWISE_MOVABLE(node);
18
+
19
+ #pragma pack(push, 1)
20
+ struct dict_match
21
+ {
22
+ uint m_dist;
23
+ uint16 m_len;
24
+
25
+ inline uint get_dist() const { return m_dist & 0x7FFFFFFF; }
26
+ inline uint get_len() const { return m_len + 2; }
27
+ inline bool is_last() const { return (int)m_dist < 0; }
28
+ };
29
+ #pragma pack(pop)
30
+
31
+ LZHAM_DEFINE_BITWISE_MOVABLE(dict_match);
32
+
33
+ class search_accelerator
34
+ {
35
+ public:
36
+ search_accelerator();
37
+
38
+ // If all_matches is true, the match finder returns all found matches with no filtering.
39
+ // Otherwise, the finder will tend to return lists of matches with mostly unique lengths.
40
+ // For each length, it will discard matches with worse distances (in the coding sense).
41
+ bool init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes);
42
+
43
+ void reset();
44
+ void flush();
45
+
46
+ inline uint get_max_dict_size() const { return m_max_dict_size; }
47
+ inline uint get_max_dict_size_mask() const { return m_max_dict_size_mask; }
48
+ inline uint get_cur_dict_size() const { return m_cur_dict_size; }
49
+
50
+ inline uint get_lookahead_pos() const { return m_lookahead_pos; }
51
+ inline uint get_lookahead_size() const { return m_lookahead_size; }
52
+
53
+ inline uint get_char(int delta_pos) const { return m_dict[(m_lookahead_pos + delta_pos) & m_max_dict_size_mask]; }
54
+ inline uint get_char(uint cur_dict_pos, int delta_pos) const { return m_dict[(cur_dict_pos + delta_pos) & m_max_dict_size_mask]; }
55
+ inline const uint8* get_ptr(uint pos) const { return &m_dict[pos]; }
56
+
57
+ uint get_max_helper_threads() const { return m_max_helper_threads; }
58
+
59
+ inline uint operator[](uint pos) const { return m_dict[pos]; }
60
+
61
+ uint get_max_add_bytes() const;
62
+ bool add_bytes_begin(uint num_bytes, const uint8* pBytes);
63
+ inline atomic32_t get_num_completed_helper_threads() const { return m_num_completed_helper_threads; }
64
+ void add_bytes_end();
65
+
66
+ // Returns the lookahead's raw position/size/dict_size at the time add_bytes_begin() is called.
67
+ inline uint get_fill_lookahead_pos() const { return m_fill_lookahead_pos; }
68
+ inline uint get_fill_lookahead_size() const { return m_fill_lookahead_size; }
69
+ inline uint get_fill_dict_size() const { return m_fill_dict_size; }
70
+
71
+ uint get_len2_match(uint lookahead_ofs);
72
+ dict_match* find_matches(uint lookahead_ofs, bool spin = true);
73
+
74
+ void advance_bytes(uint num_bytes);
75
+
76
+ LZHAM_FORCE_INLINE uint get_match_len(uint lookahead_ofs, int dist, uint max_match_len, uint start_match_len = 0) const
77
+ {
78
+ LZHAM_ASSERT(lookahead_ofs < m_lookahead_size);
79
+ LZHAM_ASSERT(start_match_len <= max_match_len);
80
+ LZHAM_ASSERT(max_match_len <= (get_lookahead_size() - lookahead_ofs));
81
+
82
+ const int find_dict_size = m_cur_dict_size + lookahead_ofs;
83
+ if (dist > find_dict_size)
84
+ return 0;
85
+
86
+ const uint comp_pos = static_cast<uint>((m_lookahead_pos + lookahead_ofs - dist) & m_max_dict_size_mask);
87
+ const uint lookahead_pos = (m_lookahead_pos + lookahead_ofs) & m_max_dict_size_mask;
88
+
89
+ const uint8* pComp = &m_dict[comp_pos];
90
+ const uint8* pLookahead = &m_dict[lookahead_pos];
91
+
92
+ uint match_len;
93
+ for (match_len = start_match_len; match_len < max_match_len; match_len++)
94
+ if (pComp[match_len] != pLookahead[match_len])
95
+ break;
96
+
97
+ return match_len;
98
+ }
99
+
100
+ public:
101
+ CLZBase* m_pLZBase;
102
+ task_pool* m_pTask_pool;
103
+ uint m_max_helper_threads;
104
+
105
+ uint m_max_dict_size;
106
+ uint m_max_dict_size_mask;
107
+
108
+ uint m_lookahead_pos;
109
+ uint m_lookahead_size;
110
+
111
+ uint m_cur_dict_size;
112
+
113
+ lzham::vector<uint8> m_dict;
114
+
115
+ enum { cHashSize = 65536 };
116
+ lzham::vector<uint> m_hash;
117
+ lzham::vector<node> m_nodes;
118
+
119
+ lzham::vector<dict_match> m_matches;
120
+ lzham::vector<atomic32_t> m_match_refs;
121
+
122
+ lzham::vector<uint8> m_hash_thread_index;
123
+
124
+ enum { cDigramHashSize = 4096 };
125
+ lzham::vector<uint> m_digram_hash;
126
+ lzham::vector<uint> m_digram_next;
127
+
128
+ uint m_fill_lookahead_pos;
129
+ uint m_fill_lookahead_size;
130
+ uint m_fill_dict_size;
131
+
132
+ uint m_max_probes;
133
+ uint m_max_matches;
134
+
135
+ bool m_all_matches;
136
+
137
+ volatile atomic32_t m_next_match_ref;
138
+
139
+ volatile atomic32_t m_num_completed_helper_threads;
140
+
141
+ void find_all_matches_callback(uint64 data, void* pData_ptr);
142
+ bool find_all_matches(uint num_bytes);
143
+ bool find_len2_matches();
144
+ };
145
+
146
+ } // namespace lzham