extlzham 0.0.1.PROTOTYPE3-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +27 -0
  3. data/README.md +74 -0
  4. data/Rakefile +152 -0
  5. data/contrib/lzham/LICENSE +22 -0
  6. data/contrib/lzham/README.md +209 -0
  7. data/contrib/lzham/include/lzham.h +781 -0
  8. data/contrib/lzham/lzhamcomp/lzham_comp.h +38 -0
  9. data/contrib/lzham/lzhamcomp/lzham_lzbase.cpp +244 -0
  10. data/contrib/lzham/lzhamcomp/lzham_lzbase.h +45 -0
  11. data/contrib/lzham/lzhamcomp/lzham_lzcomp.cpp +608 -0
  12. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.cpp +1966 -0
  13. data/contrib/lzham/lzhamcomp/lzham_lzcomp_internal.h +472 -0
  14. data/contrib/lzham/lzhamcomp/lzham_lzcomp_state.cpp +1413 -0
  15. data/contrib/lzham/lzhamcomp/lzham_match_accel.cpp +562 -0
  16. data/contrib/lzham/lzhamcomp/lzham_match_accel.h +146 -0
  17. data/contrib/lzham/lzhamcomp/lzham_null_threading.h +97 -0
  18. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.cpp +229 -0
  19. data/contrib/lzham/lzhamcomp/lzham_pthreads_threading.h +520 -0
  20. data/contrib/lzham/lzhamcomp/lzham_threading.h +12 -0
  21. data/contrib/lzham/lzhamcomp/lzham_win32_threading.cpp +220 -0
  22. data/contrib/lzham/lzhamcomp/lzham_win32_threading.h +368 -0
  23. data/contrib/lzham/lzhamdecomp/lzham_assert.cpp +66 -0
  24. data/contrib/lzham/lzhamdecomp/lzham_assert.h +40 -0
  25. data/contrib/lzham/lzhamdecomp/lzham_checksum.cpp +73 -0
  26. data/contrib/lzham/lzhamdecomp/lzham_checksum.h +13 -0
  27. data/contrib/lzham/lzhamdecomp/lzham_config.h +23 -0
  28. data/contrib/lzham/lzhamdecomp/lzham_core.h +264 -0
  29. data/contrib/lzham/lzhamdecomp/lzham_decomp.h +37 -0
  30. data/contrib/lzham/lzhamdecomp/lzham_helpers.h +54 -0
  31. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.cpp +262 -0
  32. data/contrib/lzham/lzhamdecomp/lzham_huffman_codes.h +14 -0
  33. data/contrib/lzham/lzhamdecomp/lzham_lzdecomp.cpp +1527 -0
  34. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.cpp +131 -0
  35. data/contrib/lzham/lzhamdecomp/lzham_lzdecompbase.h +89 -0
  36. data/contrib/lzham/lzhamdecomp/lzham_math.h +142 -0
  37. data/contrib/lzham/lzhamdecomp/lzham_mem.cpp +284 -0
  38. data/contrib/lzham/lzhamdecomp/lzham_mem.h +112 -0
  39. data/contrib/lzham/lzhamdecomp/lzham_platform.cpp +157 -0
  40. data/contrib/lzham/lzhamdecomp/lzham_platform.h +284 -0
  41. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.cpp +351 -0
  42. data/contrib/lzham/lzhamdecomp/lzham_prefix_coding.h +146 -0
  43. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.cpp +1484 -0
  44. data/contrib/lzham/lzhamdecomp/lzham_symbol_codec.h +556 -0
  45. data/contrib/lzham/lzhamdecomp/lzham_timer.cpp +147 -0
  46. data/contrib/lzham/lzhamdecomp/lzham_timer.h +99 -0
  47. data/contrib/lzham/lzhamdecomp/lzham_traits.h +141 -0
  48. data/contrib/lzham/lzhamdecomp/lzham_types.h +97 -0
  49. data/contrib/lzham/lzhamdecomp/lzham_utils.h +58 -0
  50. data/contrib/lzham/lzhamdecomp/lzham_vector.cpp +75 -0
  51. data/contrib/lzham/lzhamdecomp/lzham_vector.h +588 -0
  52. data/contrib/lzham/lzhamlib/lzham_lib.cpp +179 -0
  53. data/examples/basic.rb +48 -0
  54. data/ext/constants.c +64 -0
  55. data/ext/decoder.c +313 -0
  56. data/ext/depend +5 -0
  57. data/ext/encoder.c +372 -0
  58. data/ext/error.c +80 -0
  59. data/ext/extconf.rb +29 -0
  60. data/ext/extlzham.c +34 -0
  61. data/ext/extlzham.h +62 -0
  62. data/gemstub.rb +22 -0
  63. data/lib/2.0/extlzham.so +0 -0
  64. data/lib/2.1/extlzham.so +0 -0
  65. data/lib/2.2/extlzham.so +0 -0
  66. data/lib/extlzham.rb +158 -0
  67. data/lib/extlzham/version.rb +5 -0
  68. data/test/test_extlzham.rb +35 -0
  69. metadata +156 -0
@@ -0,0 +1,562 @@
1
+ // File: lzham_match_accel.cpp
2
+ // See Copyright Notice and license at the end of include/lzham.h
3
+ #include "lzham_core.h"
4
+ #include "lzham_match_accel.h"
5
+ #include "lzham_timer.h"
6
+
7
+ namespace lzham
8
+ {
9
+ static inline uint32 hash2_to_12(uint c0, uint c1)
10
+ {
11
+ return c0 ^ (c1 << 4);
12
+ }
13
+
14
+ static inline uint32 hash3_to_16(uint c0, uint c1, uint c2)
15
+ {
16
+ return (c0 | (c1 << 8)) ^ (c2 << 4);
17
+ }
18
+
19
+ search_accelerator::search_accelerator() :
20
+ m_pLZBase(NULL),
21
+ m_pTask_pool(NULL),
22
+ m_max_helper_threads(0),
23
+ m_max_dict_size(0),
24
+ m_max_dict_size_mask(0),
25
+ m_lookahead_pos(0),
26
+ m_lookahead_size(0),
27
+ m_cur_dict_size(0),
28
+ m_fill_lookahead_pos(0),
29
+ m_fill_lookahead_size(0),
30
+ m_fill_dict_size(0),
31
+ m_max_probes(0),
32
+ m_max_matches(0),
33
+ m_all_matches(false),
34
+ m_next_match_ref(0),
35
+ m_num_completed_helper_threads(0)
36
+ {
37
+ }
38
+
39
+ bool search_accelerator::init(CLZBase* pLZBase, task_pool* pPool, uint max_helper_threads, uint max_dict_size, uint max_matches, bool all_matches, uint max_probes)
40
+ {
41
+ LZHAM_ASSERT(pLZBase);
42
+ LZHAM_ASSERT(max_dict_size && math::is_power_of_2(max_dict_size));
43
+ LZHAM_ASSERT(max_probes);
44
+
45
+ m_max_probes = LZHAM_MIN(cMatchAccelMaxSupportedProbes, max_probes);
46
+
47
+ m_pLZBase = pLZBase;
48
+ m_pTask_pool = max_helper_threads ? pPool : NULL;
49
+ m_max_helper_threads = m_pTask_pool ? max_helper_threads : 0;
50
+ m_max_matches = LZHAM_MIN(m_max_probes, max_matches);
51
+ m_all_matches = all_matches;
52
+
53
+ m_max_dict_size = max_dict_size;
54
+ m_max_dict_size_mask = m_max_dict_size - 1;
55
+ m_cur_dict_size = 0;
56
+ m_lookahead_size = 0;
57
+ m_lookahead_pos = 0;
58
+ m_fill_lookahead_pos = 0;
59
+ m_fill_lookahead_size = 0;
60
+ m_fill_dict_size = 0;
61
+ m_num_completed_helper_threads = 0;
62
+
63
+ if (!m_dict.try_resize_no_construct(max_dict_size + LZHAM_MIN(m_max_dict_size, static_cast<uint>(CLZBase::cMaxHugeMatchLen))))
64
+ return false;
65
+
66
+ if (!m_hash.try_resize_no_construct(cHashSize))
67
+ return false;
68
+
69
+ if (!m_nodes.try_resize_no_construct(max_dict_size))
70
+ return false;
71
+
72
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
73
+
74
+ return true;
75
+ }
76
+
77
+ void search_accelerator::reset()
78
+ {
79
+ m_cur_dict_size = 0;
80
+ m_lookahead_size = 0;
81
+ m_lookahead_pos = 0;
82
+ m_fill_lookahead_pos = 0;
83
+ m_fill_lookahead_size = 0;
84
+ m_fill_dict_size = 0;
85
+ m_num_completed_helper_threads = 0;
86
+
87
+ // Clearing the hash tables is only necessary for determinism (otherwise, it's possible the matches returned after a reset will depend on the data processes before the reset).
88
+ if (m_hash.size())
89
+ memset(m_hash.get_ptr(), 0, m_hash.size_in_bytes());
90
+ if (m_digram_hash.size())
91
+ memset(m_digram_hash.get_ptr(), 0, m_digram_hash.size_in_bytes());
92
+ }
93
+
94
+ void search_accelerator::flush()
95
+ {
96
+ m_cur_dict_size = 0;
97
+ }
98
+
99
+ uint search_accelerator::get_max_add_bytes() const
100
+ {
101
+ uint add_pos = static_cast<uint>(m_lookahead_pos & (m_max_dict_size - 1));
102
+ return m_max_dict_size - add_pos;
103
+ }
104
+
105
+ static uint8 g_hamming_dist[256] =
106
+ {
107
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
108
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
109
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
110
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
111
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
112
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
113
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
114
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
115
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
116
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
117
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
118
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
119
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
120
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
121
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
122
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
123
+ };
124
+
125
+ void search_accelerator::find_all_matches_callback(uint64 data, void* pData_ptr)
126
+ {
127
+ scoped_perf_section find_all_matches_timer("find_all_matches_callback");
128
+
129
+ LZHAM_NOTE_UNUSED(pData_ptr);
130
+ const uint thread_index = (uint)data;
131
+
132
+ dict_match temp_matches[cMatchAccelMaxSupportedProbes * 2];
133
+
134
+ uint fill_lookahead_pos = m_fill_lookahead_pos;
135
+ uint fill_dict_size = m_fill_dict_size;
136
+ uint fill_lookahead_size = m_fill_lookahead_size;
137
+
138
+ uint c0 = 0, c1 = 0;
139
+ if (fill_lookahead_size >= 2)
140
+ {
141
+ c0 = m_dict[fill_lookahead_pos & m_max_dict_size_mask];
142
+ c1 = m_dict[(fill_lookahead_pos & m_max_dict_size_mask) + 1];
143
+ }
144
+
145
+ const uint8* pDict = m_dict.get_ptr();
146
+
147
+ while (fill_lookahead_size >= 3)
148
+ {
149
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
150
+
151
+ uint c2 = pDict[insert_pos + 2];
152
+ uint h = hash3_to_16(c0, c1, c2);
153
+ c0 = c1;
154
+ c1 = c2;
155
+
156
+ LZHAM_ASSERT(!m_hash_thread_index.size() || (m_hash_thread_index[h] != UINT8_MAX));
157
+
158
+ // Only process those strings that this worker thread was assigned to - this allows us to manipulate multiple trees in parallel with no worries about synchronization.
159
+ if (m_hash_thread_index.size() && (m_hash_thread_index[h] != thread_index))
160
+ {
161
+ fill_lookahead_pos++;
162
+ fill_lookahead_size--;
163
+ fill_dict_size++;
164
+ continue;
165
+ }
166
+
167
+ dict_match* pDstMatch = temp_matches;
168
+
169
+ uint cur_pos = m_hash[h];
170
+ m_hash[h] = static_cast<uint>(fill_lookahead_pos);
171
+
172
+ uint *pLeft = &m_nodes[insert_pos].m_left;
173
+ uint *pRight = &m_nodes[insert_pos].m_right;
174
+
175
+ const uint max_match_len = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxMatchLen), fill_lookahead_size);
176
+ uint best_match_len = 2;
177
+
178
+ const uint8* pIns = &pDict[insert_pos];
179
+
180
+ uint n = m_max_probes;
181
+ for ( ; ; )
182
+ {
183
+ uint delta_pos = fill_lookahead_pos - cur_pos;
184
+ if ((n-- == 0) || (!delta_pos) || (delta_pos >= fill_dict_size))
185
+ {
186
+ *pLeft = 0;
187
+ *pRight = 0;
188
+ break;
189
+ }
190
+
191
+ uint pos = cur_pos & m_max_dict_size_mask;
192
+ node *pNode = &m_nodes[pos];
193
+
194
+ // Unfortunately, the initial compare match_len must be 0 because of the way we hash and truncate matches at the end of each block.
195
+ uint match_len = 0;
196
+ const uint8* pComp = &pDict[pos];
197
+
198
+ #if LZHAM_PLATFORM_X360 || (LZHAM_USE_UNALIGNED_INT_LOADS == 0)
199
+ for ( ; match_len < max_match_len; match_len++)
200
+ if (pComp[match_len] != pIns[match_len])
201
+ break;
202
+ #else
203
+ // Compare a qword at a time for a bit more efficiency.
204
+ const uint64* pComp_end = reinterpret_cast<const uint64*>(pComp + max_match_len - 7);
205
+ const uint64* pComp_cur = reinterpret_cast<const uint64*>(pComp);
206
+ const uint64* pIns_cur = reinterpret_cast<const uint64*>(pIns);
207
+ while (pComp_cur < pComp_end)
208
+ {
209
+ if (*pComp_cur != *pIns_cur)
210
+ break;
211
+ pComp_cur++;
212
+ pIns_cur++;
213
+ }
214
+ uint alt_match_len = static_cast<uint>(reinterpret_cast<const uint8*>(pComp_cur) - reinterpret_cast<const uint8*>(pComp));
215
+ for ( ; alt_match_len < max_match_len; alt_match_len++)
216
+ if (pComp[alt_match_len] != pIns[alt_match_len])
217
+ break;
218
+ #ifdef LZVERIFY
219
+ for ( ; match_len < max_match_len; match_len++)
220
+ if (pComp[match_len] != pIns[match_len])
221
+ break;
222
+ LZHAM_VERIFY(alt_match_len == match_len);
223
+ #endif
224
+ match_len = alt_match_len;
225
+ #endif
226
+
227
+ if (match_len > best_match_len)
228
+ {
229
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
230
+ pDstMatch->m_dist = delta_pos;
231
+ pDstMatch++;
232
+
233
+ best_match_len = match_len;
234
+
235
+ if (match_len == max_match_len)
236
+ {
237
+ *pLeft = pNode->m_left;
238
+ *pRight = pNode->m_right;
239
+ break;
240
+ }
241
+ }
242
+ else if (m_all_matches)
243
+ {
244
+ pDstMatch->m_len = static_cast<uint16>(match_len - CLZBase::cMinMatchLen);
245
+ pDstMatch->m_dist = delta_pos;
246
+ pDstMatch++;
247
+ }
248
+ else if ((best_match_len > 2) && (best_match_len == match_len))
249
+ {
250
+ uint bestMatchDist = pDstMatch[-1].m_dist;
251
+ uint compMatchDist = delta_pos;
252
+
253
+ uint bestMatchSlot, bestMatchSlotOfs;
254
+ m_pLZBase->compute_lzx_position_slot(bestMatchDist, bestMatchSlot, bestMatchSlotOfs);
255
+
256
+ uint compMatchSlot, compMatchOfs;
257
+ m_pLZBase->compute_lzx_position_slot(compMatchDist, compMatchSlot, compMatchOfs);
258
+
259
+ // If both matches uses the same match slot, choose the one with the offset containing the lowest nibble as these bits separately entropy coded.
260
+ // This could choose a match which is further away in the absolute sense, but closer in a coding sense.
261
+ if ( (compMatchSlot < bestMatchSlot) ||
262
+ ((compMatchSlot >= 8) && (compMatchSlot == bestMatchSlot) && ((compMatchOfs & 15) < (bestMatchSlotOfs & 15))) )
263
+ {
264
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
265
+ pDstMatch[-1].m_dist = delta_pos;
266
+ }
267
+ else if ((match_len < max_match_len) && (compMatchSlot <= bestMatchSlot))
268
+ {
269
+ // Choose the match which has lowest hamming distance in the mismatch byte for a tiny win on binary files.
270
+ // TODO: This competes against the prev. optimization.
271
+ uint desired_mismatch_byte = pIns[match_len];
272
+
273
+ uint cur_mismatch_byte = pDict[(insert_pos - bestMatchDist + match_len) & m_max_dict_size_mask];
274
+ uint cur_mismatch_dist = g_hamming_dist[cur_mismatch_byte ^ desired_mismatch_byte];
275
+
276
+ uint new_mismatch_byte = pComp[match_len];
277
+ uint new_mismatch_dist = g_hamming_dist[new_mismatch_byte ^ desired_mismatch_byte];
278
+ if (new_mismatch_dist < cur_mismatch_dist)
279
+ {
280
+ LZHAM_ASSERT((pDstMatch[-1].m_len + (uint)CLZBase::cMinMatchLen) == best_match_len);
281
+ pDstMatch[-1].m_dist = delta_pos;
282
+ }
283
+ }
284
+ }
285
+
286
+ uint new_pos;
287
+ if (pComp[match_len] < pIns[match_len])
288
+ {
289
+ *pLeft = cur_pos;
290
+ pLeft = &pNode->m_right;
291
+ new_pos = pNode->m_right;
292
+ }
293
+ else
294
+ {
295
+ *pRight = cur_pos;
296
+ pRight = &pNode->m_left;
297
+ new_pos = pNode->m_left;
298
+ }
299
+ if (new_pos == cur_pos)
300
+ break;
301
+ cur_pos = new_pos;
302
+ }
303
+
304
+ const uint num_matches = (uint)(pDstMatch - temp_matches);
305
+
306
+ if (num_matches)
307
+ {
308
+ pDstMatch[-1].m_dist |= 0x80000000;
309
+
310
+ const uint num_matches_to_write = LZHAM_MIN(num_matches, m_max_matches);
311
+
312
+ const uint match_ref_ofs = static_cast<uint>(atomic_exchange_add(&m_next_match_ref, num_matches_to_write));
313
+
314
+ memcpy(&m_matches[match_ref_ofs],
315
+ temp_matches + (num_matches - num_matches_to_write),
316
+ sizeof(temp_matches[0]) * num_matches_to_write);
317
+
318
+ // FIXME: This is going to really hurt on platforms requiring export barriers.
319
+ LZHAM_MEMORY_EXPORT_BARRIER
320
+
321
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], match_ref_ofs);
322
+ }
323
+ else
324
+ {
325
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
326
+ }
327
+
328
+ fill_lookahead_pos++;
329
+ fill_lookahead_size--;
330
+ fill_dict_size++;
331
+ }
332
+
333
+ while (fill_lookahead_size)
334
+ {
335
+ uint insert_pos = fill_lookahead_pos & m_max_dict_size_mask;
336
+ m_nodes[insert_pos].m_left = 0;
337
+ m_nodes[insert_pos].m_right = 0;
338
+
339
+ atomic_exchange32((atomic32_t*)&m_match_refs[static_cast<uint>(fill_lookahead_pos - m_fill_lookahead_pos)], -2);
340
+
341
+ fill_lookahead_pos++;
342
+ fill_lookahead_size--;
343
+ fill_dict_size++;
344
+ }
345
+
346
+ atomic_increment32(&m_num_completed_helper_threads);
347
+ }
348
+
349
+ bool search_accelerator::find_len2_matches()
350
+ {
351
+ if (!m_digram_hash.size())
352
+ {
353
+ if (!m_digram_hash.try_resize(cDigramHashSize))
354
+ return false;
355
+ }
356
+
357
+ if (m_digram_next.size() < m_lookahead_size)
358
+ {
359
+ if (!m_digram_next.try_resize(m_lookahead_size))
360
+ return false;
361
+ }
362
+
363
+ uint lookahead_dict_pos = m_lookahead_pos & m_max_dict_size_mask;
364
+
365
+ for (int lookahead_ofs = 0; lookahead_ofs < ((int)m_lookahead_size - 1); ++lookahead_ofs, ++lookahead_dict_pos)
366
+ {
367
+ uint c0 = m_dict[lookahead_dict_pos];
368
+ uint c1 = m_dict[lookahead_dict_pos + 1];
369
+
370
+ uint h = hash2_to_12(c0, c1) & (cDigramHashSize - 1);
371
+
372
+ m_digram_next[lookahead_ofs] = m_digram_hash[h];
373
+ m_digram_hash[h] = m_lookahead_pos + lookahead_ofs;
374
+ }
375
+
376
+ m_digram_next[m_lookahead_size - 1] = 0;
377
+
378
+ return true;
379
+ }
380
+
381
+ uint search_accelerator::get_len2_match(uint lookahead_ofs)
382
+ {
383
+ if ((m_fill_lookahead_size - lookahead_ofs) < 2)
384
+ return 0;
385
+
386
+ uint cur_pos = m_lookahead_pos + lookahead_ofs;
387
+
388
+ uint next_match_pos = m_digram_next[cur_pos - m_fill_lookahead_pos];
389
+
390
+ uint match_dist = cur_pos - next_match_pos;
391
+
392
+ if ((!match_dist) || (match_dist > CLZBase::cMaxLen2MatchDist) || (match_dist > (m_cur_dict_size + lookahead_ofs)))
393
+ return 0;
394
+
395
+ const uint8* pCur = &m_dict[cur_pos & m_max_dict_size_mask];
396
+ const uint8* pMatch = &m_dict[next_match_pos & m_max_dict_size_mask];
397
+
398
+ if ((pCur[0] == pMatch[0]) && (pCur[1] == pMatch[1]))
399
+ return match_dist;
400
+
401
+ return 0;
402
+ }
403
+
404
+ bool search_accelerator::find_all_matches(uint num_bytes)
405
+ {
406
+ if (!m_matches.try_resize_no_construct(m_max_probes * num_bytes))
407
+ return false;
408
+
409
+ if (!m_match_refs.try_resize_no_construct(num_bytes))
410
+ return false;
411
+
412
+ memset(m_match_refs.get_ptr(), 0xFF, m_match_refs.size_in_bytes());
413
+
414
+ m_fill_lookahead_pos = m_lookahead_pos;
415
+ m_fill_lookahead_size = num_bytes;
416
+ m_fill_dict_size = m_cur_dict_size;
417
+
418
+ m_next_match_ref = 0;
419
+
420
+ if (!m_pTask_pool)
421
+ {
422
+ find_all_matches_callback(0, NULL);
423
+
424
+ m_num_completed_helper_threads = 0;
425
+ }
426
+ else
427
+ {
428
+ if (!m_hash_thread_index.try_resize_no_construct(0x10000))
429
+ return false;
430
+
431
+ memset(m_hash_thread_index.get_ptr(), 0xFF, m_hash_thread_index.size_in_bytes());
432
+
433
+ uint next_thread_index = 0;
434
+ const uint8* pDict = &m_dict[m_lookahead_pos & m_max_dict_size_mask];
435
+ uint num_unique_trigrams = 0;
436
+
437
+ if (num_bytes >= 3)
438
+ {
439
+ uint c0 = pDict[0];
440
+ uint c1 = pDict[1];
441
+
442
+ const int limit = ((int)num_bytes - 2);
443
+ for (int i = 0; i < limit; i++)
444
+ {
445
+ uint c2 = pDict[2];
446
+ uint t = hash3_to_16(c0, c1, c2);
447
+ c0 = c1;
448
+ c1 = c2;
449
+
450
+ pDict++;
451
+
452
+ if (m_hash_thread_index[t] == UINT8_MAX)
453
+ {
454
+ num_unique_trigrams++;
455
+
456
+ m_hash_thread_index[t] = static_cast<uint8>(next_thread_index);
457
+ if (++next_thread_index == m_max_helper_threads)
458
+ next_thread_index = 0;
459
+ }
460
+ }
461
+ }
462
+
463
+ m_num_completed_helper_threads = 0;
464
+
465
+ if (!m_pTask_pool->queue_multiple_object_tasks(this, &search_accelerator::find_all_matches_callback, 0, m_max_helper_threads))
466
+ return false;
467
+ }
468
+
469
+ return find_len2_matches();
470
+ }
471
+
472
+ bool search_accelerator::add_bytes_begin(uint num_bytes, const uint8* pBytes)
473
+ {
474
+ LZHAM_ASSERT(num_bytes <= m_max_dict_size);
475
+ LZHAM_ASSERT(!m_lookahead_size);
476
+
477
+ uint add_pos = m_lookahead_pos & m_max_dict_size_mask;
478
+ LZHAM_ASSERT((add_pos + num_bytes) <= m_max_dict_size);
479
+
480
+ memcpy(&m_dict[add_pos], pBytes, num_bytes);
481
+
482
+ uint dict_bytes_to_mirror = LZHAM_MIN(static_cast<uint>(CLZBase::cMaxHugeMatchLen), m_max_dict_size);
483
+ if (add_pos < dict_bytes_to_mirror)
484
+ memcpy(&m_dict[m_max_dict_size], &m_dict[0], dict_bytes_to_mirror);
485
+
486
+ m_lookahead_size = num_bytes;
487
+
488
+ uint max_possible_dict_size = m_max_dict_size - num_bytes;
489
+ m_cur_dict_size = LZHAM_MIN(m_cur_dict_size, max_possible_dict_size);
490
+
491
+ m_next_match_ref = 0;
492
+
493
+ return find_all_matches(num_bytes);
494
+ }
495
+
496
+ void search_accelerator::add_bytes_end()
497
+ {
498
+ if (m_pTask_pool)
499
+ {
500
+ m_pTask_pool->join();
501
+ }
502
+
503
+ LZHAM_ASSERT((uint)m_next_match_ref <= m_matches.size());
504
+ }
505
+
506
+ dict_match* search_accelerator::find_matches(uint lookahead_ofs, bool spin)
507
+ {
508
+ LZHAM_ASSERT(lookahead_ofs < m_lookahead_size);
509
+
510
+ const uint match_ref_ofs = static_cast<uint>(m_lookahead_pos - m_fill_lookahead_pos + lookahead_ofs);
511
+
512
+ int match_ref;
513
+ uint spin_count = 0;
514
+
515
+ // This may spin until the match finder job(s) catch up to the caller's lookahead position.
516
+ for ( ; ; )
517
+ {
518
+ match_ref = static_cast<int>(m_match_refs[match_ref_ofs]);
519
+ if (match_ref == -2)
520
+ return NULL;
521
+ else if (match_ref != -1)
522
+ break;
523
+
524
+ spin_count++;
525
+ const uint cMaxSpinCount = 1000;
526
+ if ((spin) && (spin_count < cMaxSpinCount))
527
+ {
528
+ lzham_yield_processor();
529
+ lzham_yield_processor();
530
+ lzham_yield_processor();
531
+ lzham_yield_processor();
532
+ lzham_yield_processor();
533
+ lzham_yield_processor();
534
+ lzham_yield_processor();
535
+ lzham_yield_processor();
536
+
537
+ LZHAM_MEMORY_IMPORT_BARRIER
538
+ }
539
+ else
540
+ {
541
+ spin_count = cMaxSpinCount;
542
+
543
+ lzham_sleep(1);
544
+ }
545
+ }
546
+
547
+ LZHAM_MEMORY_IMPORT_BARRIER
548
+
549
+ return &m_matches[match_ref];
550
+ }
551
+
552
+ void search_accelerator::advance_bytes(uint num_bytes)
553
+ {
554
+ LZHAM_ASSERT(num_bytes <= m_lookahead_size);
555
+
556
+ m_lookahead_pos += num_bytes;
557
+ m_lookahead_size -= num_bytes;
558
+
559
+ m_cur_dict_size += num_bytes;
560
+ LZHAM_ASSERT(m_cur_dict_size <= m_max_dict_size);
561
+ }
562
+ }