zstdlib 0.6.0-x64-mingw32 → 0.9.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +20 -0
  3. data/README.md +7 -1
  4. data/Rakefile +38 -8
  5. data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
  6. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
  7. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
  8. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
  9. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
  10. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
  11. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
  12. data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
  13. data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
  14. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
  15. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
  16. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
  17. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
  18. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
  19. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
  20. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
  21. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
  22. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
  23. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
  24. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
  25. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
  26. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
  27. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
  28. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
  29. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
  30. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
  31. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
  32. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
  33. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
  34. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
  35. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
  36. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
  37. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
  38. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
  39. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
  40. data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
  41. data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
  42. data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
  43. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +75 -57
  44. data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
  45. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
  46. data/ext/zstdlib_c/zstd-1.5.2/lib/common/debug.c +24 -0
  47. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +22 -49
  48. data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
  49. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +3 -1
  50. data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
  51. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +51 -42
  52. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +149 -57
  53. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +60 -54
  54. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
  55. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
  56. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +5 -5
  57. data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
  58. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +10 -8
  59. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +4 -3
  60. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
  61. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
  62. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
  63. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
  64. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_internal.h +493 -0
  65. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
  66. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
  67. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +105 -85
  68. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +41 -63
  69. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +13 -33
  70. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
  71. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress.c +6327 -0
  72. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +537 -82
  73. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +21 -16
  74. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
  75. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +61 -34
  76. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +10 -3
  77. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.c +573 -0
  78. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.h +32 -0
  79. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +236 -95
  80. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +321 -143
  81. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +2 -2
  82. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +328 -137
  83. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +2 -2
  84. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
  85. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
  86. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +336 -209
  87. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +15 -3
  88. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
  89. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +439 -239
  90. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
  91. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +205 -462
  92. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
  93. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress.c +1889 -0
  94. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
  95. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +20 -16
  96. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +3 -3
  97. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +691 -230
  98. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/zstd_decompress_block.c +2072 -0
  99. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +16 -7
  100. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +71 -10
  101. data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
  102. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +760 -234
  103. data/ext/{zstdlib/zstd-1.4.4/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +3 -1
  104. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
  105. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
  106. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
  107. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
  108. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
  109. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
  110. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
  111. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  112. data/ext/zstdlib_c/zstd.mk +15 -0
  113. data/lib/2.4/zstdlib_c.so +0 -0
  114. data/lib/2.5/zstdlib_c.so +0 -0
  115. data/lib/2.6/zstdlib_c.so +0 -0
  116. data/lib/2.7/zstdlib_c.so +0 -0
  117. data/lib/3.0/zstdlib_c.so +0 -0
  118. data/lib/zstdlib.rb +2 -2
  119. metadata +124 -114
  120. data/ext/zstdlib/zstd-1.4.4/lib/common/compiler.h +0 -159
  121. data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
  122. data/ext/zstdlib/zstd-1.4.4/lib/common/entropy_common.c +0 -236
  123. data/ext/zstdlib/zstd-1.4.4/lib/common/error_private.h +0 -76
  124. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.c +0 -882
  125. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.h +0 -305
  126. data/ext/zstdlib/zstd-1.4.4/lib/common/zstd_internal.h +0 -350
  127. data/ext/zstdlib/zstd-1.4.4/lib/compress/huf_compress.c +0 -798
  128. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_compress.c +0 -4103
  129. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.c +0 -1115
  130. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.h +0 -67
  131. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstdmt_compress.h +0 -192
  132. data/ext/zstdlib/zstd-1.4.4/lib/decompress/huf_decompress.c +0 -1234
  133. data/ext/zstdlib/zstd-1.4.4/lib/decompress/zstd_decompress_block.c +0 -1323
  134. data/ext/zstdlib/zstd.mk +0 -14
  135. data/lib/2.2/zstdlib.so +0 -0
  136. data/lib/2.3/zstdlib.so +0 -0
  137. data/lib/2.4/zstdlib.so +0 -0
  138. data/lib/2.5/zstdlib.so +0 -0
  139. data/lib/2.6/zstdlib.so +0 -0
  140. data/lib/2.7/zstdlib.so +0 -0
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,148 +29,308 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
45
45
 
46
+ /**
47
+ * If you squint hard enough (and ignore repcodes), the search operation at any
48
+ * given position is broken into 4 stages:
49
+ *
50
+ * 1. Hash (map position to hash value via input read)
51
+ * 2. Lookup (map hash val to index via hashtable read)
52
+ * 3. Load (map index to value at that position via input read)
53
+ * 4. Compare
54
+ *
55
+ * Each of these steps involves a memory read at an address which is computed
56
+ * from the previous step. This means these steps must be sequenced and their
57
+ * latencies are cumulative.
58
+ *
59
+ * Rather than do 1->2->3->4 sequentially for a single position before moving
60
+ * onto the next, this implementation interleaves these operations across the
61
+ * next few positions:
62
+ *
63
+ * R = Repcode Read & Compare
64
+ * H = Hash
65
+ * T = Table Lookup
66
+ * M = Match Read & Compare
67
+ *
68
+ * Pos | Time -->
69
+ * ----+-------------------
70
+ * N | ... M
71
+ * N+1 | ... TM
72
+ * N+2 | R H T M
73
+ * N+3 | H TM
74
+ * N+4 | R H T M
75
+ * N+5 | H ...
76
+ * N+6 | R ...
77
+ *
78
+ * This is very much analogous to the pipelining of execution in a CPU. And just
79
+ * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
80
+ * branch).
81
+ *
82
+ * When this happens, we throw away our current state, and do the following prep
83
+ * to re-enter the loop:
84
+ *
85
+ * Pos | Time -->
86
+ * ----+-------------------
87
+ * N | H T
88
+ * N+1 | H
89
+ *
90
+ * This is also the work we do at the beginning to enter the loop initially.
91
+ */
46
92
  FORCE_INLINE_TEMPLATE size_t
47
- ZSTD_compressBlock_fast_generic(
93
+ ZSTD_compressBlock_fast_noDict_generic(
48
94
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49
95
  void const* src, size_t srcSize,
50
- U32 const mls)
96
+ U32 const mls, U32 const hasStep)
51
97
  {
52
98
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
53
99
  U32* const hashTable = ms->hashTable;
54
100
  U32 const hlog = cParams->hashLog;
55
101
  /* support stepSize of 0 */
56
- size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
102
+ size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
57
103
  const BYTE* const base = ms->window.base;
58
104
  const BYTE* const istart = (const BYTE*)src;
59
- /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
60
- const BYTE* ip0 = istart;
61
- const BYTE* ip1;
62
- const BYTE* anchor = istart;
63
105
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
106
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
107
  const BYTE* const prefixStart = base + prefixStartIndex;
68
108
  const BYTE* const iend = istart + srcSize;
69
109
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
70
- U32 offset_1=rep[0], offset_2=rep[1];
110
+
111
+ const BYTE* anchor = istart;
112
+ const BYTE* ip0 = istart;
113
+ const BYTE* ip1;
114
+ const BYTE* ip2;
115
+ const BYTE* ip3;
116
+ U32 current0;
117
+
118
+ U32 rep_offset1 = rep[0];
119
+ U32 rep_offset2 = rep[1];
71
120
  U32 offsetSaved = 0;
72
121
 
73
- /* init */
122
+ size_t hash0; /* hash for ip0 */
123
+ size_t hash1; /* hash for ip1 */
124
+ U32 idx; /* match idx for ip0 */
125
+ U32 mval; /* src value at match idx */
126
+
127
+ U32 offcode;
128
+ const BYTE* match0;
129
+ size_t mLength;
130
+
131
+ /* ip0 and ip1 are always adjacent. The targetLength skipping and
132
+ * uncompressibility acceleration is applied to every other position,
133
+ * matching the behavior of #1562. step therefore represents the gap
134
+ * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
135
+ size_t step;
136
+ const BYTE* nextStep;
137
+ const size_t kStepIncr = (1 << (kSearchStrength - 1));
138
+
74
139
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
75
140
  ip0 += (ip0 == prefixStart);
141
+ { U32 const curr = (U32)(ip0 - base);
142
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143
+ U32 const maxRep = curr - windowLow;
144
+ if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145
+ if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
146
+ }
147
+
148
+ /* start each op */
149
+ _start: /* Requires: ip0 */
150
+
151
+ step = stepSize;
152
+ nextStep = ip0 + kStepIncr;
153
+
154
+ /* calculate positions, ip0 - anchor == 0, so we skip step calc */
76
155
  ip1 = ip0 + 1;
77
- { U32 const maxRep = (U32)(ip0 - prefixStart);
78
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
156
+ ip2 = ip0 + step;
157
+ ip3 = ip2 + 1;
158
+
159
+ if (ip3 >= ilimit) {
160
+ goto _cleanup;
80
161
  }
81
162
 
82
- /* Main Search Loop */
83
- while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
- size_t mLength;
85
- BYTE const* ip2 = ip0 + 2;
86
- size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
87
- U32 const val0 = MEM_read32(ip0);
88
- size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
89
- U32 const val1 = MEM_read32(ip1);
90
- U32 const current0 = (U32)(ip0-base);
91
- U32 const current1 = (U32)(ip1-base);
92
- U32 const matchIndex0 = hashTable[h0];
93
- U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
95
- const BYTE* match0 = base + matchIndex0;
96
- const BYTE* match1 = base + matchIndex1;
97
- U32 offcode;
98
- hashTable[h0] = current0; /* update hash table */
99
- hashTable[h1] = current1; /* update hash table */
100
-
101
- assert(ip0 + 1 == ip1);
102
-
103
- if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
105
- ip0 = ip2 - mLength;
106
- match0 = repMatch - mLength;
107
- offcode = 0;
163
+ hash0 = ZSTD_hashPtr(ip0, hlog, mls);
164
+ hash1 = ZSTD_hashPtr(ip1, hlog, mls);
165
+
166
+ idx = hashTable[hash0];
167
+
168
+ do {
169
+ /* load repcode match for ip[2]*/
170
+ const U32 rval = MEM_read32(ip2 - rep_offset1);
171
+
172
+ /* write back hash table entry */
173
+ current0 = (U32)(ip0 - base);
174
+ hashTable[hash0] = current0;
175
+
176
+ /* check repcode at ip[2] */
177
+ if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
178
+ ip0 = ip2;
179
+ match0 = ip0 - rep_offset1;
180
+ mLength = ip0[-1] == match0[-1];
181
+ ip0 -= mLength;
182
+ match0 -= mLength;
183
+ offcode = STORE_REPCODE_1;
184
+ mLength += 4;
108
185
  goto _match;
109
186
  }
110
- if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
111
- /* found a regular match */
187
+
188
+ /* load match for ip[0] */
189
+ if (idx >= prefixStartIndex) {
190
+ mval = MEM_read32(base + idx);
191
+ } else {
192
+ mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
193
+ }
194
+
195
+ /* check match at ip[0] */
196
+ if (MEM_read32(ip0) == mval) {
197
+ /* found a match! */
112
198
  goto _offset;
113
199
  }
114
- if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
115
- /* found a regular match after one literal */
116
- ip0 = ip1;
117
- match0 = match1;
200
+
201
+ /* lookup ip[1] */
202
+ idx = hashTable[hash1];
203
+
204
+ /* hash ip[2] */
205
+ hash0 = hash1;
206
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
207
+
208
+ /* advance to next positions */
209
+ ip0 = ip1;
210
+ ip1 = ip2;
211
+ ip2 = ip3;
212
+
213
+ /* write back hash table entry */
214
+ current0 = (U32)(ip0 - base);
215
+ hashTable[hash0] = current0;
216
+
217
+ /* load match for ip[0] */
218
+ if (idx >= prefixStartIndex) {
219
+ mval = MEM_read32(base + idx);
220
+ } else {
221
+ mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
222
+ }
223
+
224
+ /* check match at ip[0] */
225
+ if (MEM_read32(ip0) == mval) {
226
+ /* found a match! */
118
227
  goto _offset;
119
228
  }
120
- { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
121
- assert(step >= 2);
122
- ip0 += step;
123
- ip1 += step;
124
- continue;
229
+
230
+ /* lookup ip[1] */
231
+ idx = hashTable[hash1];
232
+
233
+ /* hash ip[2] */
234
+ hash0 = hash1;
235
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
236
+
237
+ /* advance to next positions */
238
+ ip0 = ip1;
239
+ ip1 = ip2;
240
+ ip2 = ip0 + step;
241
+ ip3 = ip1 + step;
242
+
243
+ /* calculate step */
244
+ if (ip2 >= nextStep) {
245
+ step++;
246
+ PREFETCH_L1(ip1 + 64);
247
+ PREFETCH_L1(ip1 + 128);
248
+ nextStep += kStepIncr;
125
249
  }
126
- _offset: /* Requires: ip0, match0 */
127
- /* Compute the offset code */
128
- offset_2 = offset_1;
129
- offset_1 = (U32)(ip0-match0);
130
- offcode = offset_1 + ZSTD_REP_MOVE;
131
- mLength = 0;
132
- /* Count the backwards match length */
133
- while (((ip0>anchor) & (match0>prefixStart))
134
- && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
250
+ } while (ip3 < ilimit);
251
+
252
+ _cleanup:
253
+ /* Note that there are probably still a couple positions we could search.
254
+ * However, it seems to be a meaningful performance hit to try to search
255
+ * them. So let's not. */
256
+
257
+ /* save reps for next block */
258
+ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
259
+ rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
260
+
261
+ /* Return the last literals size */
262
+ return (size_t)(iend - anchor);
263
+
264
+ _offset: /* Requires: ip0, idx */
265
+
266
+ /* Compute the offset code. */
267
+ match0 = base + idx;
268
+ rep_offset2 = rep_offset1;
269
+ rep_offset1 = (U32)(ip0-match0);
270
+ offcode = STORE_OFFSET(rep_offset1);
271
+ mLength = 4;
272
+
273
+ /* Count the backwards match length. */
274
+ while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
275
+ ip0--;
276
+ match0--;
277
+ mLength++;
278
+ }
135
279
 
136
280
  _match: /* Requires: ip0, match0, offcode */
137
- /* Count the forward length */
138
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
139
- ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
140
- /* match found */
141
- ip0 += mLength;
142
- anchor = ip0;
143
- ip1 = ip0 + 1;
144
281
 
145
- if (ip0 <= ilimit) {
146
- /* Fill Table */
147
- assert(base+current0+2 > istart); /* check base overflow */
148
- hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
149
- hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
282
+ /* Count the forward length. */
283
+ mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
284
+
285
+ ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
286
+
287
+ ip0 += mLength;
288
+ anchor = ip0;
289
+
290
+ /* write next hash table entry */
291
+ if (ip1 < ip0) {
292
+ hashTable[hash1] = (U32)(ip1 - base);
293
+ }
150
294
 
151
- while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
152
- && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
295
+ /* Fill table and check for immediate repcode. */
296
+ if (ip0 <= ilimit) {
297
+ /* Fill Table */
298
+ assert(base+current0+2 > istart); /* check base overflow */
299
+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
300
+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
301
+
302
+ if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
303
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
153
304
  /* store sequence */
154
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
155
- { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
305
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
306
+ { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
156
307
  hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
157
308
  ip0 += rLength;
158
- ip1 = ip0 + 1;
159
- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
309
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
160
310
  anchor = ip0;
161
311
  continue; /* faster when present (confirmed on gcc-8) ... (?) */
162
- }
163
- }
164
- }
165
-
166
- /* save reps for next block */
167
- rep[0] = offset_1 ? offset_1 : offsetSaved;
168
- rep[1] = offset_2 ? offset_2 : offsetSaved;
312
+ } } }
169
313
 
170
- /* Return the last literals size */
171
- return (size_t)(iend - anchor);
314
+ goto _start;
172
315
  }
173
316
 
317
+ #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
318
+ static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
319
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
320
+ void const* src, size_t srcSize) \
321
+ { \
322
+ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
323
+ }
324
+
325
+ ZSTD_GEN_FAST_FN(noDict, 4, 1)
326
+ ZSTD_GEN_FAST_FN(noDict, 5, 1)
327
+ ZSTD_GEN_FAST_FN(noDict, 6, 1)
328
+ ZSTD_GEN_FAST_FN(noDict, 7, 1)
329
+
330
+ ZSTD_GEN_FAST_FN(noDict, 4, 0)
331
+ ZSTD_GEN_FAST_FN(noDict, 5, 0)
332
+ ZSTD_GEN_FAST_FN(noDict, 6, 0)
333
+ ZSTD_GEN_FAST_FN(noDict, 7, 0)
174
334
 
175
335
  size_t ZSTD_compressBlock_fast(
176
336
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -178,24 +338,40 @@ size_t ZSTD_compressBlock_fast(
178
338
  {
179
339
  U32 const mls = ms->cParams.minMatch;
180
340
  assert(ms->dictMatchState == NULL);
181
- switch(mls)
182
- {
183
- default: /* includes case 3 */
184
- case 4 :
185
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
186
- case 5 :
187
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
188
- case 6 :
189
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
190
- case 7 :
191
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
341
+ if (ms->cParams.targetLength > 1) {
342
+ switch(mls)
343
+ {
344
+ default: /* includes case 3 */
345
+ case 4 :
346
+ return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
347
+ case 5 :
348
+ return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
349
+ case 6 :
350
+ return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
351
+ case 7 :
352
+ return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353
+ }
354
+ } else {
355
+ switch(mls)
356
+ {
357
+ default: /* includes case 3 */
358
+ case 4 :
359
+ return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
360
+ case 5 :
361
+ return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
362
+ case 6 :
363
+ return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
364
+ case 7 :
365
+ return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366
+ }
367
+
192
368
  }
193
369
  }
194
370
 
195
371
  FORCE_INLINE_TEMPLATE
196
372
  size_t ZSTD_compressBlock_fast_dictMatchState_generic(
197
373
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
198
- void const* src, size_t srcSize, U32 const mls)
374
+ void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
199
375
  {
200
376
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
201
377
  U32* const hashTable = ms->hashTable;
@@ -231,7 +407,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
231
407
  assert(endIndex - prefixStartIndex <= maxDistance);
232
408
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
233
409
 
234
- /* ensure there will be no no underflow
410
+ (void)hasStep; /* not currently specialized on whether it's accelerated */
411
+
412
+ /* ensure there will be no underflow
235
413
  * when translating a dict index into a local index */
236
414
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
237
415
 
@@ -247,21 +425,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
247
425
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
248
426
  size_t mLength;
249
427
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
250
- U32 const current = (U32)(ip-base);
428
+ U32 const curr = (U32)(ip-base);
251
429
  U32 const matchIndex = hashTable[h];
252
430
  const BYTE* match = base + matchIndex;
253
- const U32 repIndex = current + 1 - offset_1;
431
+ const U32 repIndex = curr + 1 - offset_1;
254
432
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
255
433
  dictBase + (repIndex - dictIndexDelta) :
256
434
  base + repIndex;
257
- hashTable[h] = current; /* update hash table */
435
+ hashTable[h] = curr; /* update hash table */
258
436
 
259
437
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
260
438
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
261
439
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
262
440
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
263
441
  ip++;
264
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
442
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
265
443
  } else if ( (matchIndex <= prefixStartIndex) ) {
266
444
  size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
267
445
  U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -273,7 +451,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
273
451
  continue;
274
452
  } else {
275
453
  /* found a dict match */
276
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
454
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
277
455
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
278
456
  while (((ip>anchor) & (dictMatch>dictStart))
279
457
  && (ip[-1] == dictMatch[-1])) {
@@ -281,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
281
459
  } /* catch up */
282
460
  offset_2 = offset_1;
283
461
  offset_1 = offset;
284
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
462
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
285
463
  }
286
464
  } else if (MEM_read32(match) != MEM_read32(ip)) {
287
465
  /* it's not a match, and we're not going to check the dictionary */
@@ -296,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
296
474
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
297
475
  offset_2 = offset_1;
298
476
  offset_1 = offset;
299
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
477
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
300
478
  }
301
479
 
302
480
  /* match found */
@@ -305,8 +483,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
305
483
 
306
484
  if (ip <= ilimit) {
307
485
  /* Fill Table */
308
- assert(base+current+2 > istart); /* check base overflow */
309
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
486
+ assert(base+curr+2 > istart); /* check base overflow */
487
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
310
488
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
311
489
 
312
490
  /* check immediate repcode */
@@ -321,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
321
499
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
322
500
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
323
501
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
324
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
502
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
325
503
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
326
504
  ip += repLength2;
327
505
  anchor = ip;
@@ -340,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
340
518
  return (size_t)(iend - anchor);
341
519
  }
342
520
 
521
+
522
+ ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
523
+ ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
524
+ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
525
+ ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
526
+
343
527
  size_t ZSTD_compressBlock_fast_dictMatchState(
344
528
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
345
529
  void const* src, size_t srcSize)
@@ -350,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
350
534
  {
351
535
  default: /* includes case 3 */
352
536
  case 4 :
353
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
537
+ return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
354
538
  case 5 :
355
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
539
+ return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
356
540
  case 6 :
357
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
541
+ return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
358
542
  case 7 :
359
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
543
+ return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
360
544
  }
361
545
  }
362
546
 
363
547
 
364
548
  static size_t ZSTD_compressBlock_fast_extDict_generic(
365
549
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
366
- void const* src, size_t srcSize, U32 const mls)
550
+ void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
367
551
  {
368
552
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
369
553
  U32* const hashTable = ms->hashTable;
@@ -387,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
387
571
  const BYTE* const ilimit = iend - 8;
388
572
  U32 offset_1=rep[0], offset_2=rep[1];
389
573
 
390
- DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
574
+ (void)hasStep; /* not currently specialized on whether it's accelerated */
575
+
576
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
391
577
 
392
578
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
393
579
  if (prefixStartIndex == dictStartIndex)
394
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
580
+ return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
395
581
 
396
582
  /* Search Loop */
397
583
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -399,19 +585,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
399
585
  const U32 matchIndex = hashTable[h];
400
586
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
401
587
  const BYTE* match = matchBase + matchIndex;
402
- const U32 current = (U32)(ip-base);
403
- const U32 repIndex = current + 1 - offset_1;
588
+ const U32 curr = (U32)(ip-base);
589
+ const U32 repIndex = curr + 1 - offset_1;
404
590
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
591
  const BYTE* const repMatch = repBase + repIndex;
406
- hashTable[h] = current; /* update hash table */
407
- assert(offset_1 <= current +1); /* check repIndex */
592
+ hashTable[h] = curr; /* update hash table */
593
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
408
594
 
409
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
595
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
596
+ & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
410
597
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
411
598
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
412
599
  size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
413
600
  ip++;
414
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
601
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
415
602
  ip += rLength;
416
603
  anchor = ip;
417
604
  } else {
@@ -423,30 +610,30 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
423
610
  }
424
611
  { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
425
612
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
426
- U32 const offset = current - matchIndex;
613
+ U32 const offset = curr - matchIndex;
427
614
  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
428
615
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
429
616
  offset_2 = offset_1; offset_1 = offset; /* update offset history */
430
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
617
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
431
618
  ip += mLength;
432
619
  anchor = ip;
433
620
  } }
434
621
 
435
622
  if (ip <= ilimit) {
436
623
  /* Fill Table */
437
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
624
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
438
625
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
439
626
  /* check immediate repcode */
440
627
  while (ip <= ilimit) {
441
628
  U32 const current2 = (U32)(ip-base);
442
629
  U32 const repIndex2 = current2 - offset_2;
443
630
  const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
444
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
631
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
445
632
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
446
633
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
447
634
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
448
635
  { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
449
- ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
636
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
450
637
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
451
638
  ip += repLength2;
452
639
  anchor = ip;
@@ -463,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
463
650
  return (size_t)(iend - anchor);
464
651
  }
465
652
 
653
+ ZSTD_GEN_FAST_FN(extDict, 4, 0)
654
+ ZSTD_GEN_FAST_FN(extDict, 5, 0)
655
+ ZSTD_GEN_FAST_FN(extDict, 6, 0)
656
+ ZSTD_GEN_FAST_FN(extDict, 7, 0)
466
657
 
467
658
  size_t ZSTD_compressBlock_fast_extDict(
468
659
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -473,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict(
473
664
  {
474
665
  default: /* includes case 3 */
475
666
  case 4 :
476
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
667
+ return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
477
668
  case 5 :
478
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
669
+ return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
479
670
  case 6 :
480
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
671
+ return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
481
672
  case 7 :
482
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
673
+ return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
483
674
  }
484
675
  }