extzstd 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +301 -56
  4. data/contrib/zstd/CONTRIBUTING.md +169 -72
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +116 -87
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +62 -32
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +52 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +225 -222
  13. data/contrib/zstd/lib/README.md +51 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +45 -62
  17. data/contrib/zstd/lib/common/compiler.h +205 -22
  18. data/contrib/zstd/lib/common/cpu.h +1 -3
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +12 -19
  21. data/contrib/zstd/lib/common/entropy_common.c +172 -48
  22. data/contrib/zstd/lib/common/error_private.c +10 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +37 -86
  25. data/contrib/zstd/lib/common/fse_decompress.c +117 -92
  26. data/contrib/zstd/lib/common/huf.h +99 -166
  27. data/contrib/zstd/lib/common/mem.h +124 -142
  28. data/contrib/zstd/lib/common/pool.c +54 -27
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -19
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -847
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +2 -37
  36. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +132 -187
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +83 -157
  41. data/contrib/zstd/lib/compress/hist.c +27 -29
  42. data/contrib/zstd/lib/compress/hist.h +2 -2
  43. data/contrib/zstd/lib/compress/huf_compress.c +916 -279
  44. data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
  79. data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +7 -6
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
  102. data/contrib/zstd/lib/zstd.h +1217 -287
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +19 -10
  106. data/ext/extzstd.h +6 -0
  107. data/ext/libzstd_conf.h +0 -1
  108. data/ext/zstd_decompress_asm.S +1 -0
  109. data/gemstub.rb +3 -21
  110. data/lib/extzstd/version.rb +6 -1
  111. data/lib/extzstd.rb +0 -2
  112. data/test/test_basic.rb +0 -5
  113. metadata +18 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2017-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
+ #ifndef ZDICT_STATIC_LINKING_ONLY
12
+ # define ZDICT_STATIC_LINKING_ONLY
13
+ #endif
14
+
11
15
  #include <stdio.h> /* fprintf */
12
16
  #include <stdlib.h> /* malloc, free, qsort */
13
17
  #include <string.h> /* memset */
@@ -16,10 +20,7 @@
16
20
  #include "../common/pool.h"
17
21
  #include "../common/threading.h"
18
22
  #include "../common/zstd_internal.h" /* includes zstd.h */
19
- #ifndef ZDICT_STATIC_LINKING_ONLY
20
- #define ZDICT_STATIC_LINKING_ONLY
21
- #endif
22
- #include "zdict.h"
23
+ #include "../zdict.h"
23
24
 
24
25
  /**
25
26
  * COVER_best_t is used for two purposes:
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
152
153
  * smallest dictionary within a specified regression of the compressed size
153
154
  * from the largest dictionary.
154
155
  */
155
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
156
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
156
157
  size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
157
158
  size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1576,7 +1576,7 @@ note:
1576
1576
  /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
1577
  trsort(ISAb, SA, m, 1);
1578
1578
 
1579
- /* Set the sorted order of tyoe B* suffixes. */
1579
+ /* Set the sorted order of type B* suffixes. */
1580
1580
  for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
1581
  for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
1582
  if(0 <= i) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-2020, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,24 +16,33 @@
16
16
  #include <string.h> /* memset */
17
17
  #include <time.h> /* clock */
18
18
 
19
+ #ifndef ZDICT_STATIC_LINKING_ONLY
20
+ # define ZDICT_STATIC_LINKING_ONLY
21
+ #endif
22
+
19
23
  #include "../common/mem.h" /* read */
20
24
  #include "../common/pool.h"
21
25
  #include "../common/threading.h"
22
- #include "cover.h"
23
26
  #include "../common/zstd_internal.h" /* includes zstd.h */
24
- #ifndef ZDICT_STATIC_LINKING_ONLY
25
- #define ZDICT_STATIC_LINKING_ONLY
26
- #endif
27
- #include "zdict.h"
27
+ #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
28
+ #include "../zdict.h"
29
+ #include "cover.h"
28
30
 
29
31
 
30
32
  /*-*************************************
31
33
  * Constants
32
34
  ***************************************/
35
+ /**
36
+ * There are 32bit indexes used to ref samples, so limit samples size to 4GB
37
+ * on 64bit builds.
38
+ * For 32bit builds we choose 1 GB.
39
+ * Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
40
+ * contiguous buffer, so 1GB is already a high limit.
41
+ */
33
42
  #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
34
43
  #define FASTCOVER_MAX_F 31
35
44
  #define FASTCOVER_MAX_ACCEL 10
36
- #define DEFAULT_SPLITPOINT 0.75
45
+ #define FASTCOVER_DEFAULT_SPLITPOINT 0.75
37
46
  #define DEFAULT_F 20
38
47
  #define DEFAULT_ACCEL 1
39
48
 
@@ -41,50 +50,50 @@
41
50
  /*-*************************************
42
51
  * Console display
43
52
  ***************************************/
44
- static int g_displayLevel = 2;
53
+ #ifndef LOCALDISPLAYLEVEL
54
+ static int g_displayLevel = 0;
55
+ #endif
56
+ #undef DISPLAY
45
57
  #define DISPLAY(...) \
46
58
  { \
47
59
  fprintf(stderr, __VA_ARGS__); \
48
60
  fflush(stderr); \
49
61
  }
62
+ #undef LOCALDISPLAYLEVEL
50
63
  #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
51
64
  if (displayLevel >= l) { \
52
65
  DISPLAY(__VA_ARGS__); \
53
66
  } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
67
+ #undef DISPLAYLEVEL
54
68
  #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
55
69
 
70
+ #ifndef LOCALDISPLAYUPDATE
71
+ static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
72
+ static clock_t g_time = 0;
73
+ #endif
74
+ #undef LOCALDISPLAYUPDATE
56
75
  #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
57
76
  if (displayLevel >= l) { \
58
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
77
+ if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
59
78
  g_time = clock(); \
60
79
  DISPLAY(__VA_ARGS__); \
61
80
  } \
62
81
  }
82
+ #undef DISPLAYUPDATE
63
83
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
64
- static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
65
- static clock_t g_time = 0;
66
84
 
67
85
 
68
86
  /*-*************************************
69
87
  * Hash Functions
70
88
  ***************************************/
71
- static const U64 prime6bytes = 227718039650203ULL;
72
- static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
73
- static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
74
-
75
- static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
76
- static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
77
- static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
78
-
79
-
80
89
  /**
81
- * Hash the d-byte value pointed to by p and mod 2^f
90
+ * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
82
91
  */
83
- static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
92
+ static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
84
93
  if (d == 6) {
85
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
94
+ return ZSTD_hash6Ptr(p, f);
86
95
  }
87
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
96
+ return ZSTD_hash8Ptr(p, f);
88
97
  }
89
98
 
90
99
 
@@ -295,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
295
304
 
296
305
  /**
297
306
  * Prepare a context for dictionary building.
298
- * The context is only dependent on the parameter `d` and can used multiple
307
+ * The context is only dependent on the parameter `d` and can be used multiple
299
308
  * times.
300
309
  * Returns 0 on success or error code on error.
301
310
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
@@ -461,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
461
470
  * This function is thread safe if zstd is compiled with multithreaded support.
462
471
  * It takes its parameters as an *OWNING* opaque pointer to support threading.
463
472
  */
464
- static void FASTCOVER_tryParameters(void *opaque)
473
+ static void FASTCOVER_tryParameters(void* opaque)
465
474
  {
466
475
  /* Save parameters as local variables */
467
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
476
+ FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
468
477
  const FASTCOVER_ctx_t *const ctx = data->ctx;
469
478
  const ZDICT_cover_params_t parameters = data->parameters;
470
479
  size_t dictBufferCapacity = data->dictBufferCapacity;
471
480
  size_t totalCompressedSize = ERROR(GENERIC);
472
481
  /* Initialize array to keep track of frequency of dmer within activeSegment */
473
- U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
482
+ U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
474
483
  /* Allocate space for hash table, dict, and freqs */
475
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
484
+ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
476
485
  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
477
- U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
486
+ U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
478
487
  if (!segmentFreqs || !dict || !freqs) {
479
488
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
480
489
  goto _cleanup;
@@ -486,7 +495,7 @@ static void FASTCOVER_tryParameters(void *opaque)
486
495
  parameters, segmentFreqs);
487
496
 
488
497
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
489
- selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
498
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
490
499
  ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
491
500
  totalCompressedSize);
492
501
 
@@ -547,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
547
556
  ZDICT_cover_params_t coverParams;
548
557
  FASTCOVER_accel_t accelParams;
549
558
  /* Initialize global data */
550
- g_displayLevel = parameters.zParams.notificationLevel;
559
+ g_displayLevel = (int)parameters.zParams.notificationLevel;
551
560
  /* Assign splitPoint and f if not provided */
552
561
  parameters.splitPoint = 1.0;
553
562
  parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
@@ -617,7 +626,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
617
626
  /* constants */
618
627
  const unsigned nbThreads = parameters->nbThreads;
619
628
  const double splitPoint =
620
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
629
+ parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
621
630
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
622
631
  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
623
632
  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -630,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
630
639
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
631
640
  const unsigned shrinkDict = 0;
632
641
  /* Local variables */
633
- const int displayLevel = parameters->zParams.notificationLevel;
642
+ const int displayLevel = (int)parameters->zParams.notificationLevel;
634
643
  unsigned iteration = 1;
635
644
  unsigned d;
636
645
  unsigned k;
@@ -714,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
714
723
  data->parameters.splitPoint = splitPoint;
715
724
  data->parameters.steps = kSteps;
716
725
  data->parameters.shrinkDict = shrinkDict;
717
- data->parameters.zParams.notificationLevel = g_displayLevel;
726
+ data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
718
727
  /* Check the parameters */
719
728
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
720
729
  data->ctx->f, accel)) {