extlzham 0.0.1.PROTOTYPE3-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +27 -0
  3. data/README.md +74 -0
  4. data/Rakefile +152 -0
  5. data/contrib/lzham/LICENSE +22 -0
  6. data/contrib/lzham/README.md +209 -0
  7. data/contrib/lzham/include/lzham.h +781 -0
  8. data/contrib/lzham/lzhamcomp/lzham_comp.h +38 -0
  9. data/contrib/lzham/lzhamcomp/lzham_lzbase.cpp +244 -0
  10. data/contrib/lzham/lzhamcomp/lzham_lzbase.h +45 -0
  11. data/contrib/lzham/lzhamcomp/lzham_lzcomp.cpp +608 -0
  12. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.cpp +1966 -0
  13. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.h +472 -0
  14. data/contrib/lzham/lzhamcomp/lzham_lzcomp_state.cpp +1413 -0
  15. data/contrib/lzham/lzhamcomp/lzham_match_accel.cpp +562 -0
  16. data/contrib/lzham/lzhamcomp/lzham_match_accel.h +146 -0
  17. data/contrib/lzham/lzhamcomp/lzham_null_threading.h +97 -0
  18. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.cpp +229 -0
  19. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.h +520 -0
  20. data/contrib/lzham/lzhamcomp/lzham_threading.h +12 -0
  21. data/contrib/lzham/lzhamcomp/lzham_win32_threading.cpp +220 -0
  22. data/contrib/lzham/lzhamcomp/lzham_win32_threading.h +368 -0
  23. data/contrib/lzham/lzhamdecomp/lzham_assert.cpp +66 -0
  24. data/contrib/lzham/lzhamdecomp/lzham_assert.h +40 -0
  25. data/contrib/lzham/lzhamdecomp/lzham_checksum.cpp +73 -0
  26. data/contrib/lzham/lzhamdecomp/lzham_checksum.h +13 -0
  27. data/contrib/lzham/lzhamdecomp/lzham_config.h +23 -0
  28. data/contrib/lzham/lzhamdecomp/lzham_core.h +264 -0
  29. data/contrib/lzham/lzhamdecomp/lzham_decomp.h +37 -0
  30. data/contrib/lzham/lzhamdecomp/lzham_helpers.h +54 -0
  31. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.cpp +262 -0
  32. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.h +14 -0
  33. data/contrib/lzham/lzhamdecomp/lzham_lzdecomp.cpp +1527 -0
  34. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.cpp +131 -0
  35. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.h +89 -0
  36. data/contrib/lzham/lzhamdecomp/lzham_math.h +142 -0
  37. data/contrib/lzham/lzhamdecomp/lzham_mem.cpp +284 -0
  38. data/contrib/lzham/lzhamdecomp/lzham_mem.h +112 -0
  39. data/contrib/lzham/lzhamdecomp/lzham_platform.cpp +157 -0
  40. data/contrib/lzham/lzhamdecomp/lzham_platform.h +284 -0
  41. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.cpp +351 -0
  42. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.h +146 -0
  43. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.cpp +1484 -0
  44. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.h +556 -0
  45. data/contrib/lzham/lzhamdecomp/lzham_timer.cpp +147 -0
  46. data/contrib/lzham/lzhamdecomp/lzham_timer.h +99 -0
  47. data/contrib/lzham/lzhamdecomp/lzham_traits.h +141 -0
  48. data/contrib/lzham/lzhamdecomp/lzham_types.h +97 -0
  49. data/contrib/lzham/lzhamdecomp/lzham_utils.h +58 -0
  50. data/contrib/lzham/lzhamdecomp/lzham_vector.cpp +75 -0
  51. data/contrib/lzham/lzhamdecomp/lzham_vector.h +588 -0
  52. data/contrib/lzham/lzhamlib/lzham_lib.cpp +179 -0
  53. data/examples/basic.rb +48 -0
  54. data/ext/constants.c +64 -0
  55. data/ext/decoder.c +313 -0
  56. data/ext/depend +5 -0
  57. data/ext/encoder.c +372 -0
  58. data/ext/error.c +80 -0
  59. data/ext/extconf.rb +29 -0
  60. data/ext/extlzham.c +34 -0
  61. data/ext/extlzham.h +62 -0
  62. data/gemstub.rb +22 -0
  63. data/lib/2.0/extlzham.so +0 -0
  64. data/lib/2.1/extlzham.so +0 -0
  65. data/lib/2.2/extlzham.so +0 -0
  66. data/lib/extlzham.rb +158 -0
  67. data/lib/extlzham/version.rb +5 -0
  68. data/test/test_extlzham.rb +35 -0
  69. metadata +156 -0
@@ -0,0 +1,562 @@
1
+ // File: lzham_match_accel.cpp
2
+ // See Copyright Notice and license at the end of include/lzham.h
3
+ #include "lzham_core.h"
4
+ #include "lzham_match_accel.h"
5
+ #include "lzham_timer.h"
6
+
7
+ namespace lzham
8
+ {
9
+ static inline uint32 hash2_to_12(uint c0, uint c1)
10
+ {
11
+ return c0 ^ (c1 << 4);
12
+ }
13
+
14
+ static inline uint32 hash3_to_16(uint c0, uint c1, uint c2)
15
+ {
16
+ return (c0 | (c1 << 8)) ^ (c2 << 4);
17
+ }
18
+
19
+ search_accelerator::search_accelerator() :
20
+ m_pLZBase(NULL),
21
+ m_pTask_pool(NULL),
22
+ m_max_helper_threads(0),
23
+ m_max_dict_size(0),
24
+ m_max_dict_size_mask(0),
25
+ m_lookahead_pos(0),
26
+ m_lookahead_size(0),
27
+ m_cur_dict_size(0),
28
+ m_fill_lookahead_pos(0),
29
+ m_fill_lookahead_size(0),
30
+ m_fill_dict_size(0),
31
+ m_max_probes(0),
32
+ m_max_matches(0),
33
+ m_all_matches(false),
34
+ m_next_match_ref(0),
35
+ m_num_completed_helper_threads(0)
36
+ {
37
+ }
38
+
39
+ bool search_accelerator::init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes)
40
+ {
41
+ LZHAM_ASSERT(pLZBase);
42
+ LZHAM_ASSERT(max_dict_size && math::is_power_of_2(max_dict_size));
43
+ LZHAM_ASSERT(max_probes);
44
+
45
+ m_max_probes = LZHAM_MIN(cMatchAccelMaxSupportedProbes, max_probes);
46
+
47
+ m_pLZBase = pLZBase;
48
+ m_pTask_pool = max_helper_threads ? pPool : NULL;
49
+ m_max_helper_threads = m_pTask_pool ? max_helper_threads : 0;
50
+ m_max_matches = LZHAM_MIN(m_max_probes, max_matches);
51
+ m_all_matches = all_matches;
52
+
53
+ m_max_dict_size = max_dict_size;
54
+ m_max_dict_size_mask = m_max_dict_size - 1;
55
+ m_cur_dict_size = 0;
56
+ m_lookahead_size = 0;
57
+ m_lookahead_pos = 0;
58
+ m_fill_lookahead_pos = 0;
59
+ m_fill_lookahead_size = 0;
60
+ m_fill_dict_size = 0;
61
+ m_num_completed_helper_threads = 0;
62
+
63
+ if (!m_dict.try_resize_no_construct(max_dict_size + LZHAM_MIN(m_max_dict_size, static_cast<uint>(CLZBase::cMaxHugeMatchLen))))
64
+ return false;
65
+
66
+ if (!m_hash.try_resize_no_construct(cHashSize))
67
+ return false;
68
+
69
+ if (!m_nodes.try_resize_no_construct(max_dict_size))
70
+ return false;
71
+
72
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
73
+
74
+ return true;
75
+ }
76
+
77
+ void search_accelerator::reset()
78
+ {
79
+ m_cur_dict_size = 0;
80
+ m_lookahead_size = 0;
81
+ m_lookahead_pos = 0;
82
+ m_fill_lookahead_pos = 0;
83
+ m_fill_lookahead_size = 0;
84
+ m_fill_dict_size = 0;
85
+ m_num_completed_helper_threads = 0;
86
+
87
+ // Clearing the hash tables is only necessary for determinism (otherwise, it's possible the matches returned after a reset will depend on the data processes before the reset).
88
+ if (m_hash.size())
89
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
90
+ if (m_digram_hash.size())
91
+ memset(m_digram_hash.get_ptr(), 0, m_digram_hash.size_in_bytes());
92
+ }
93
+
94
+ void search_accelerator::flush()
95
+ {
96
+ m_cur_dict_size = 0;
97
+ }
98
+
99
+ uint search_accelerator::get_max_add_bytes() const
100
+ {
101
+ uint add_pos = static_cast<uint>(m_lookahead_pos & (m_max_dict_size - 1));
102
+ return m_max_dict_size - add_pos;
103
+ }
104
+
105
+ static uint8 g_hamming_dist[256] =
106
+ {
107
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
108
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
109
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
110
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
111
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
112
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
113
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
114
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
115
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
116
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
117
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
118
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
119
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
120
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
121
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
122
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
123
+ };
124
+
125
+ void search_accelerator::find_all_matches_callback(uint64 data, void* pData_ptr)
126
+ {
127
+ scoped_perf_section find_all_matches_timer("find_all_matches_callback");
128
+
129
+ LZHAM_NOTE_UNUSED(pData_ptr);
130
+ const uint thread_index = (uint)data;
131
+
132
+ dict_match temp_matches[cMatchAccelMaxSupportedProbes * 2];
133
+
134
+ uint fill_lookahead_pos = m_fill_lookahead_pos;
135
+ uint fill_dict_size = m_fill_dict_size;
136
+ uint fill_lookahead_size = m_fill_lookahead_size;
137
+
138
+ uint c0 = 0, c1 = 0;
139
+ if (fill_lookahead_size >= 2)
140
+ {
141
+ c0 = m_dict[fill_lookahead_pos & m_max_dict_size_mask];
142
+ c1 = m_dict[(fill_lookahead_pos & m_max_dict_size_mask) + 1];
143
+ }
144
+
145
+ const uint8* pDict = m_dict.get_ptr();
146
+
147
+ while (fill_lookahead_size >= 3)
148
+ {
149
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
150
+
151
+ uint c2 = pDict[insert_pos + 2];
152
+ uint h = hash3_to_16(c0, c1, c2);
153
+ c0 = c1;
154
+ c1 = c2;
155
+
156
+ LZHAM_ASSERT(!m_hash_thread_index.size() || (m_hash_thread_index[h] != UINT8_MAX));
157
+
158
+ // Only process those strings that this worker thread was assigned to - this allows us to manipulate multiple trees in parallel with no worries about synchronization.
159
+ if (m_hash_thread_index.size() && (m_hash_thread_index[h] != thread_index))
160
+ {
161
+ fill_lookahead_pos++;
162
+ fill_lookahead_size--;
163
+ fill_dict_size++;
164
+ continue;
165
+ }
166
+
167
+ dict_match* pDstMatch = temp_matches;
168
+
169
+ uint cur_pos = m_hash[h];
170
+ m_hash[h] = static_cast<uint>(fill_lookahead_pos);
171
+
172
+ uint *pLeft = &m_nodes[insert_pos].m_left;
173
+ uint *pRight = &m_nodes[insert_pos].m_right;
174
+
175
+ const uint max_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), fill_lookahead_size);
176
+ uint best_match_len = 2;
177
+
178
+ const uint8* pIns = &pDict[insert_pos];
179
+
180
+ uint n = m_max_probes;
181
+ for ( ; ; )
182
+ {
183
+ uint delta_pos = fill_lookahead_pos - cur_pos;
184
+ if ((n-- == 0) || (!delta_pos) || (delta_pos >= fill_dict_size))
185
+ {
186
+ *pLeft = 0;
187
+ *pRight = 0;
188
+ break;
189
+ }
190
+
191
+ uint pos = cur_pos & m_max_dict_size_mask;
192
+ node *pNode = &m_nodes[pos];
193
+
194
+ // Unfortunately, the initial compare match_len must be 0 because of the way we hash and truncate matches at the end of each block.
195
+ uint match_len = 0;
196
+ const uint8* pComp = &pDict[pos];
197
+
198
+ #if LZHAM_PLATFORM_X360 || (LZHAM_USE_UNALIGNED_INT_LOADS == 0)
199
+ for ( ; match_len < max_match_len; match_len++)
200
+ if (pComp[match_len] != pIns[match_len])
201
+ break;
202
+ #else
203
+ // Compare a qword at a time for a bit more efficiency.
204
+ const uint64* pComp_end = reinterpret_cast<const uint64*>(pComp + max_match_len - 7);
205
+ const uint64* pComp_cur = reinterpret_cast<const uint64*>(pComp);
206
+ const uint64* pIns_cur = reinterpret_cast<const uint64*>(pIns);
207
+ while (pComp_cur < pComp_end)
208
+ {
209
+ if (*pComp_cur != *pIns_cur)
210
+ break;
211
+ pComp_cur++;
212
+ pIns_cur++;
213
+ }
214
+ uint alt_match_len = static_cast<uint>(reinterpret_cast<const uint8*>(pComp_cur) - reinterpret_cast<const uint8*>(pComp));
215
+ for ( ; alt_match_len < max_match_len; alt_match_len++)
216
+ if (pComp[alt_match_len] != pIns[alt_match_len])
217
+ break;
218
+ #ifdef LZVERIFY
219
+ for ( ; match_len < max_match_len; match_len++)
220
+ if (pComp[match_len] != pIns[match_len])
221
+ break;
222
+ LZHAM_VERIFY(alt_match_len == match_len);
223
+ #endif
224
+ match_len = alt_match_len;
225
+ #endif
226
+
227
+ if (match_len > best_match_len)
228
+ {
229
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
230
+ pDstMatch->m_dist = delta_pos;
231
+ pDstMatch++;
232
+
233
+ best_match_len = match_len;
234
+
235
+ if (match_len == max_match_len)
236
+ {
237
+ *pLeft = pNode->m_left;
238
+ *pRight = pNode->m_right;
239
+ break;
240
+ }
241
+ }
242
+ else if (m_all_matches)
243
+ {
244
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
245
+ pDstMatch->m_dist = delta_pos;
246
+ pDstMatch++;
247
+ }
248
+ else if ((best_match_len > 2) && (best_match_len == match_len))
249
+ {
250
+ uint bestMatchDist = pDstMatch[-1].m_dist;
251
+ uint compMatchDist = delta_pos;
252
+
253
+ uint bestMatchSlot, bestMatchSlotOfs;
254
+ m_pLZBase->compute_lzx_position_slot(bestMatchDist, bestMatchSlot, bestMatchSlotOfs);
255
+
256
+ uint compMatchSlot, compMatchOfs;
257
+ m_pLZBase->compute_lzx_position_slot(compMatchDist, compMatchSlot, compMatchOfs);
258
+
259
+ // If both matches uses the same match slot, choose the one with the offset containing the lowest nibble as these bits separately entropy coded.
260
+ // This could choose a match which is further away in the absolute sense, but closer in a coding sense.
261
+ if ( (compMatchSlot < bestMatchSlot) ||
262
+ ((compMatchSlot >= 8) && (compMatchSlot == bestMatchSlot) && ((compMatchOfs & 15) < (bestMatchSlotOfs & 15))) )
263
+ {
264
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
265
+ pDstMatch[-1].m_dist = delta_pos;
266
+ }
267
+ else if ((match_len < max_match_len) && (compMatchSlot <= bestMatchSlot))
268
+ {
269
+ // Choose the match which has lowest hamming distance in the mismatch byte for a tiny win on binary files.
270
+ // TODO: This competes against the prev. optimization.
271
+ uint desired_mismatch_byte = pIns[match_len];
272
+
273
+ uint cur_mismatch_byte = pDict[(insert_pos - bestMatchDist + match_len) & m_max_dict_size_mask];
274
+ uint cur_mismatch_dist = g_hamming_dist[cur_mismatch_byte ^ desired_mismatch_byte];
275
+
276
+ uint new_mismatch_byte = pComp[match_len];
277
+ uint new_mismatch_dist = g_hamming_dist[new_mismatch_byte ^ desired_mismatch_byte];
278
+ if (new_mismatch_dist < cur_mismatch_dist)
279
+ {
280
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
281
+ pDstMatch[-1].m_dist = delta_pos;
282
+ }
283
+ }
284
+ }
285
+
286
+ uint new_pos;
287
+ if (pComp[match_len] < pIns[match_len])
288
+ {
289
+ *pLeft = cur_pos;
290
+ pLeft = &pNode->m_right;
291
+ new_pos = pNode->m_right;
292
+ }
293
+ else
294
+ {
295
+ *pRight = cur_pos;
296
+ pRight = &pNode->m_left;
297
+ new_pos = pNode->m_left;
298
+ }
299
+ if (new_pos == cur_pos)
300
+ break;
301
+ cur_pos = new_pos;
302
+ }
303
+
304
+ const uint num_matches = (uint)(pDstMatch - temp_matches);
305
+
306
+ if (num_matches)
307
+ {
308
+ pDstMatch[-1].m_dist |= 0x80000000;
309
+
310
+ const uint num_matches_to_write = LZHAM_MIN(num_matches, m_max_matches);
311
+
312
+ const uint match_ref_ofs = static_cast<uint>(atomic_exchange_add(&m_next_match_ref, num_matches_to_write));
313
+
314
+ memcpy(&m_matches[match_ref_ofs],
315
+ temp_matches + (num_matches - num_matches_to_write),
316
+ sizeof(temp_matches[0]) * num_matches_to_write);
317
+
318
+ // FIXME: This is going to really hurt on platforms requiring export barriers.
319
+ LZHAM_MEMORY_EXPORT_BARRIER
320
+
321
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], match_ref_ofs);
322
+ }
323
+ else
324
+ {
325
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
326
+ }
327
+
328
+ fill_lookahead_pos++;
329
+ fill_lookahead_size--;
330
+ fill_dict_size++;
331
+ }
332
+
333
+ while (fill_lookahead_size)
334
+ {
335
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
336
+ m_nodes[insert_pos].m_left = 0;
337
+ m_nodes[insert_pos].m_right = 0;
338
+
339
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
340
+
341
+ fill_lookahead_pos++;
342
+ fill_lookahead_size--;
343
+ fill_dict_size++;
344
+ }
345
+
346
+ atomic_increment32(&m_num_completed_helper_threads);
347
+ }
348
+
349
+ bool search_accelerator::find_len2_matches()
350
+ {
351
+ if (!m_digram_hash.size())
352
+ {
353
+ if (!m_digram_hash.try_resize(cDigramHashSize))
354
+ return false;
355
+ }
356
+
357
+ if (m_digram_next.size() < m_lookahead_size)
358
+ {
359
+ if (!m_digram_next.try_resize(m_lookahead_size))
360
+ return false;
361
+ }
362
+
363
+ uint lookahead_dict_pos = m_lookahead_pos & m_max_dict_size_mask;
364
+
365
+ for (int lookahead_ofs = 0; lookahead_ofs < ((int)m_lookahead_size - 1); ++lookahead_ofs, ++lookahead_dict_pos)
366
+ {
367
+ uint c0 = m_dict[lookahead_dict_pos];
368
+ uint c1 = m_dict[lookahead_dict_pos + 1];
369
+
370
+ uint h = hash2_to_12(c0, c1) & (cDigramHashSize - 1);
371
+
372
+ m_digram_next[lookahead_ofs] = m_digram_hash[h];
373
+ m_digram_hash[h] = m_lookahead_pos + lookahead_ofs;
374
+ }
375
+
376
+ m_digram_next[m_lookahead_size - 1] = 0;
377
+
378
+ return true;
379
+ }
380
+
381
+ uint search_accelerator::get_len2_match(uint lookahead_ofs)
382
+ {
383
+ if ((m_fill_lookahead_size - lookahead_ofs) < 2)
384
+ return 0;
385
+
386
+ uint cur_pos = m_lookahead_pos + lookahead_ofs;
387
+
388
+ uint next_match_pos = m_digram_next[cur_pos - m_fill_lookahead_pos];
389
+
390
+ uint match_dist = cur_pos - next_match_pos;
391
+
392
+ if ((!match_dist) || (match_dist > CLZBase::cMaxLen2MatchDist) || (match_dist > (m_cur_dict_size + lookahead_ofs)))
393
+ return 0;
394
+
395
+ const uint8* pCur = &m_dict[cur_pos & m_max_dict_size_mask];
396
+ const uint8* pMatch = &m_dict[next_match_pos & m_max_dict_size_mask];
397
+
398
+ if ((pCur[0] == pMatch[0]) && (pCur[1] == pMatch[1]))
399
+ return match_dist;
400
+
401
+ return 0;
402
+ }
403
+
404
+ bool search_accelerator::find_all_matches(uint num_bytes)
405
+ {
406
+ if (!m_matches.try_resize_no_construct(m_max_probes * num_bytes))
407
+ return false;
408
+
409
+ if (!m_match_refs.try_resize_no_construct(num_bytes))
410
+ return false;
411
+
412
+ memset(m_match_refs.get_ptr(), 0xFF, m_match_refs.size_in_bytes());
413
+
414
+ m_fill_lookahead_pos = m_lookahead_pos;
415
+ m_fill_lookahead_size = num_bytes;
416
+ m_fill_dict_size = m_cur_dict_size;
417
+
418
+ m_next_match_ref = 0;
419
+
420
+ if (!m_pTask_pool)
421
+ {
422
+ find_all_matches_callback(0, NULL);
423
+
424
+ m_num_completed_helper_threads = 0;
425
+ }
426
+ else
427
+ {
428
+ if (!m_hash_thread_index.try_resize_no_construct(0x10000))
429
+ return false;
430
+
431
+ memset(m_hash_thread_index.get_ptr(), 0xFF, m_hash_thread_index.size_in_bytes());
432
+
433
+ uint next_thread_index = 0;
434
+ const uint8* pDict = &m_dict[m_lookahead_pos & m_max_dict_size_mask];
435
+ uint num_unique_trigrams = 0;
436
+
437
+ if (num_bytes >= 3)
438
+ {
439
+ uint c0 = pDict[0];
440
+ uint c1 = pDict[1];
441
+
442
+ const int limit = ((int)num_bytes - 2);
443
+ for (int i = 0; i < limit; i++)
444
+ {
445
+ uint c2 = pDict[2];
446
+ uint t = hash3_to_16(c0, c1, c2);
447
+ c0 = c1;
448
+ c1 = c2;
449
+
450
+ pDict++;
451
+
452
+ if (m_hash_thread_index[t] == UINT8_MAX)
453
+ {
454
+ num_unique_trigrams++;
455
+
456
+ m_hash_thread_index[t] = static_cast<uint8>(next_thread_index);
457
+ if (++next_thread_index == m_max_helper_threads)
458
+ next_thread_index = 0;
459
+ }
460
+ }
461
+ }
462
+
463
+ m_num_completed_helper_threads = 0;
464
+
465
+ if (!m_pTask_pool->queue_multiple_object_tasks(this, &search_accelerator::find_all_matches_callback, 0, m_max_helper_threads))
466
+ return false;
467
+ }
468
+
469
+ return find_len2_matches();
470
+ }
471
+
472
+ bool search_accelerator::add_bytes_begin(uint num_bytes, const uint8* pBytes)
473
+ {
474
+ LZHAM_ASSERT(num_bytes <= m_max_dict_size);
475
+ LZHAM_ASSERT(!m_lookahead_size);
476
+
477
+ uint add_pos = m_lookahead_pos & m_max_dict_size_mask;
478
+ LZHAM_ASSERT((add_pos + num_bytes) <= m_max_dict_size);
479
+
480
+ memcpy(&m_dict[add_pos], pBytes, num_bytes);
481
+
482
+ uint dict_bytes_to_mirror = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxHugeMatchLen), m_max_dict_size);
483
+ if (add_pos < dict_bytes_to_mirror)
484
+ memcpy(&m_dict[m_max_dict_size], &m_dict[0], dict_bytes_to_mirror);
485
+
486
+ m_lookahead_size = num_bytes;
487
+
488
+ uint max_possible_dict_size = m_max_dict_size - num_bytes;
489
+ m_cur_dict_size = LZHAM_MIN(m_cur_dict_size, max_possible_dict_size);
490
+
491
+ m_next_match_ref = 0;
492
+
493
+ return find_all_matches(num_bytes);
494
+ }
495
+
496
+ void search_accelerator::add_bytes_end()
497
+ {
498
+ if (m_pTask_pool)
499
+ {
500
+ m_pTask_pool->join();
501
+ }
502
+
503
+ LZHAM_ASSERT((uint)m_next_match_ref <= m_matches.size());
504
+ }
505
+
506
+ dict_match* search_accelerator::find_matches(uint lookahead_ofs, bool spin)
507
+ {
508
+ LZHAM_ASSERT(lookahead_ofs < m_lookahead_size);
509
+
510
+ const uint match_ref_ofs = static_cast<uint>(m_lookahead_pos - m_fill_lookahead_pos + lookahead_ofs);
511
+
512
+ int match_ref;
513
+ uint spin_count = 0;
514
+
515
+ // This may spin until the match finder job(s) catch up to the caller's lookahead position.
516
+ for ( ; ; )
517
+ {
518
+ match_ref = static_cast<int>(m_match_refs[match_ref_ofs]);
519
+ if (match_ref == -2)
520
+ return NULL;
521
+ else if (match_ref != -1)
522
+ break;
523
+
524
+ spin_count++;
525
+ const uint cMaxSpinCount = 1000;
526
+ if ((spin) && (spin_count < cMaxSpinCount))
527
+ {
528
+ lzham_yield_processor();
529
+ lzham_yield_processor();
530
+ lzham_yield_processor();
531
+ lzham_yield_processor();
532
+ lzham_yield_processor();
533
+ lzham_yield_processor();
534
+ lzham_yield_processor();
535
+ lzham_yield_processor();
536
+
537
+ LZHAM_MEMORY_IMPORT_BARRIER
538
+ }
539
+ else
540
+ {
541
+ spin_count = cMaxSpinCount;
542
+
543
+ lzham_sleep(1);
544
+ }
545
+ }
546
+
547
+ LZHAM_MEMORY_IMPORT_BARRIER
548
+
549
+ return &m_matches[match_ref];
550
+ }
551
+
552
+ void search_accelerator::advance_bytes(uint num_bytes)
553
+ {
554
+ LZHAM_ASSERT(num_bytes <= m_lookahead_size);
555
+
556
+ m_lookahead_pos += num_bytes;
557
+ m_lookahead_size -= num_bytes;
558
+
559
+ m_cur_dict_size += num_bytes;
560
+ LZHAM_ASSERT(m_cur_dict_size <= m_max_dict_size);
561
+ }
562
+ }