zstd-ruby 1.5.4.0 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -2
- data/ext/zstdruby/extconf.rb +1 -1
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +30 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +4 -0
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/threading.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -35
- data/ext/zstdruby/libzstd/common/zstd_internal.h +0 -5
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +209 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +67 -13
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +123 -59
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +92 -62
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +2 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +5 -4
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +4 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +1 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +41 -38
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +16 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +5 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +258 -212
- data/ext/zstdruby/main.c +2 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -2
data/ext/zstdruby/libzstd/zstd.h
CHANGED
@@ -106,7 +106,7 @@ extern "C" {
|
|
106
106
|
/*------ Version ------*/
|
107
107
|
#define ZSTD_VERSION_MAJOR 1
|
108
108
|
#define ZSTD_VERSION_MINOR 5
|
109
|
-
#define ZSTD_VERSION_RELEASE
|
109
|
+
#define ZSTD_VERSION_RELEASE 5
|
110
110
|
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
|
111
111
|
|
112
112
|
/*! ZSTD_versionNumber() :
|
@@ -148,7 +148,8 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
|
|
148
148
|
***************************************/
|
149
149
|
/*! ZSTD_compress() :
|
150
150
|
* Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
|
151
|
-
*
|
151
|
+
* NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
|
152
|
+
* enough space to successfully compress the data.
|
152
153
|
* @return : compressed size written into `dst` (<= `dstCapacity),
|
153
154
|
* or an error code if it fails (which can be tested using ZSTD_isError()). */
|
154
155
|
ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
|
@@ -578,7 +579,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
|
|
578
579
|
* Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
|
579
580
|
* - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
|
580
581
|
* - The function is always blocking, returns when compression is completed.
|
581
|
-
*
|
582
|
+
* NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
|
583
|
+
* enough space to successfully compress the data, though it is possible it fails for other reasons.
|
582
584
|
* @return : compressed size written into `dst` (<= `dstCapacity),
|
583
585
|
* or an error code if it fails (which can be tested using ZSTD_isError()).
|
584
586
|
*/
|
@@ -1018,9 +1020,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
|
1018
1020
|
* Advanced dictionary and prefix API (Requires v1.4.0+)
|
1019
1021
|
*
|
1020
1022
|
* This API allows dictionaries to be used with ZSTD_compress2(),
|
1021
|
-
* ZSTD_compressStream2(), and ZSTD_decompressDCtx().
|
1022
|
-
*
|
1023
|
-
*
|
1023
|
+
* ZSTD_compressStream2(), and ZSTD_decompressDCtx().
|
1024
|
+
* Dictionaries are sticky, they remain valid when same context is re-used,
|
1025
|
+
* they only reset when the context is reset
|
1026
|
+
* with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
|
1027
|
+
* In contrast, Prefixes are single-use.
|
1024
1028
|
******************************************************************************/
|
1025
1029
|
|
1026
1030
|
|
@@ -1041,7 +1045,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
|
|
1041
1045
|
* Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
|
1042
1046
|
* In such a case, dictionary buffer must outlive its users.
|
1043
1047
|
* Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
|
1044
|
-
* to precisely select how dictionary content must be interpreted.
|
1048
|
+
* to precisely select how dictionary content must be interpreted.
|
1049
|
+
* Note 5 : This method does not benefit from LDM (long distance mode).
|
1050
|
+
* If you want to employ LDM on some large dictionary content,
|
1051
|
+
* prefer employing ZSTD_CCtx_refPrefix() described below.
|
1052
|
+
*/
|
1045
1053
|
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
|
1046
1054
|
|
1047
1055
|
/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
|
@@ -1064,6 +1072,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
|
|
1064
1072
|
* Decompression will need same prefix to properly regenerate data.
|
1065
1073
|
* Compressing with a prefix is similar in outcome as performing a diff and compressing it,
|
1066
1074
|
* but performs much faster, especially during decompression (compression speed is tunable with compression level).
|
1075
|
+
* This method is compatible with LDM (long distance mode).
|
1067
1076
|
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
|
1068
1077
|
* Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
|
1069
1078
|
* Note 1 : Prefix buffer is referenced. It **must** outlive compression.
|
@@ -1387,7 +1396,7 @@ typedef enum {
|
|
1387
1396
|
} ZSTD_paramSwitch_e;
|
1388
1397
|
|
1389
1398
|
/***************************************
|
1390
|
-
* Frame size functions
|
1399
|
+
* Frame header and size functions
|
1391
1400
|
***************************************/
|
1392
1401
|
|
1393
1402
|
/*! ZSTD_findDecompressedSize() :
|
@@ -1434,6 +1443,30 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size
|
|
1434
1443
|
* or an error code (if srcSize is too small) */
|
1435
1444
|
ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
1436
1445
|
|
1446
|
+
typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
|
1447
|
+
typedef struct {
|
1448
|
+
unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
|
1449
|
+
unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
|
1450
|
+
unsigned blockSizeMax;
|
1451
|
+
ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
|
1452
|
+
unsigned headerSize;
|
1453
|
+
unsigned dictID;
|
1454
|
+
unsigned checksumFlag;
|
1455
|
+
unsigned _reserved1;
|
1456
|
+
unsigned _reserved2;
|
1457
|
+
} ZSTD_frameHeader;
|
1458
|
+
|
1459
|
+
/*! ZSTD_getFrameHeader() :
|
1460
|
+
* decode Frame Header, or requires larger `srcSize`.
|
1461
|
+
* @return : 0, `zfhPtr` is correctly filled,
|
1462
|
+
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
|
1463
|
+
* or an error code, which can be tested using ZSTD_isError() */
|
1464
|
+
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
|
1465
|
+
/*! ZSTD_getFrameHeader_advanced() :
|
1466
|
+
* same as ZSTD_getFrameHeader(),
|
1467
|
+
* with added capability to select a format (like ZSTD_f_zstd1_magicless) */
|
1468
|
+
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
|
1469
|
+
|
1437
1470
|
/*! ZSTD_decompressionMargin() :
|
1438
1471
|
* Zstd supports in-place decompression, where the input and output buffers overlap.
|
1439
1472
|
* In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
|
@@ -1803,12 +1836,26 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
|
|
1803
1836
|
ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
|
1804
1837
|
|
1805
1838
|
/*! ZSTD_CCtx_setCParams() :
|
1806
|
-
* Set all parameters provided within @cparams into the working @cctx.
|
1839
|
+
* Set all parameters provided within @p cparams into the working @p cctx.
|
1807
1840
|
* Note : if modifying parameters during compression (MT mode only),
|
1808
1841
|
* note that changes to the .windowLog parameter will be ignored.
|
1809
|
-
* @return 0 on success, or an error code (can be checked with ZSTD_isError())
|
1842
|
+
* @return 0 on success, or an error code (can be checked with ZSTD_isError()).
|
1843
|
+
* On failure, no parameters are updated.
|
1844
|
+
*/
|
1810
1845
|
ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
|
1811
1846
|
|
1847
|
+
/*! ZSTD_CCtx_setFParams() :
|
1848
|
+
* Set all parameters provided within @p fparams into the working @p cctx.
|
1849
|
+
* @return 0 on success, or an error code (can be checked with ZSTD_isError()).
|
1850
|
+
*/
|
1851
|
+
ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
|
1852
|
+
|
1853
|
+
/*! ZSTD_CCtx_setParams() :
|
1854
|
+
* Set all parameters provided within @p params into the working @p cctx.
|
1855
|
+
* @return 0 on success, or an error code (can be checked with ZSTD_isError()).
|
1856
|
+
*/
|
1857
|
+
ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
|
1858
|
+
|
1812
1859
|
/*! ZSTD_compress_advanced() :
|
1813
1860
|
* Note : this function is now DEPRECATED.
|
1814
1861
|
* It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
|
@@ -2134,7 +2181,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
|
2134
2181
|
* This parameter can be used to set an upper bound on the blocksize
|
2135
2182
|
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
|
2136
2183
|
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
|
2137
|
-
* compressBound()
|
2184
|
+
* compressBound() inaccurate). Only currently meant to be used for testing.
|
2138
2185
|
*
|
2139
2186
|
*/
|
2140
2187
|
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
|
@@ -2452,12 +2499,9 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
|
|
2452
2499
|
int compressionLevel);
|
2453
2500
|
|
2454
2501
|
/*! ZSTD_initCStream_advanced() :
|
2455
|
-
* This function is DEPRECATED, and is
|
2502
|
+
* This function is DEPRECATED, and is equivalent to:
|
2456
2503
|
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
2457
|
-
*
|
2458
|
-
* for ((param, value) : params) {
|
2459
|
-
* ZSTD_CCtx_setParameter(zcs, param, value);
|
2460
|
-
* }
|
2504
|
+
* ZSTD_CCtx_setParams(zcs, params);
|
2461
2505
|
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
|
2462
2506
|
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
|
2463
2507
|
*
|
@@ -2486,12 +2530,9 @@ ZSTDLIB_STATIC_API
|
|
2486
2530
|
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
|
2487
2531
|
|
2488
2532
|
/*! ZSTD_initCStream_usingCDict_advanced() :
|
2489
|
-
* This function is DEPRECATED, and is
|
2533
|
+
* This function is DEPRECATED, and is equivalent to:
|
2490
2534
|
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
2491
|
-
*
|
2492
|
-
* for ((fParam, value) : fParams) {
|
2493
|
-
* ZSTD_CCtx_setParameter(zcs, fParam, value);
|
2494
|
-
* }
|
2535
|
+
* ZSTD_CCtx_setFParams(zcs, fParams);
|
2495
2536
|
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
|
2496
2537
|
* ZSTD_CCtx_refCDict(zcs, cdict);
|
2497
2538
|
*
|
@@ -2598,12 +2639,180 @@ ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
|
|
2598
2639
|
ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
|
2599
2640
|
|
2600
2641
|
|
2642
|
+
/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
|
2643
|
+
*
|
2644
|
+
* *** OVERVIEW ***
|
2645
|
+
* The Block-Level Sequence Producer API allows users to provide their own custom
|
2646
|
+
* sequence producer which libzstd invokes to process each block. The produced list
|
2647
|
+
* of sequences (literals and matches) is then post-processed by libzstd to produce
|
2648
|
+
* valid compressed blocks.
|
2649
|
+
*
|
2650
|
+
* This block-level offload API is a more granular complement of the existing
|
2651
|
+
* frame-level offload API compressSequences() (introduced in v1.5.1). It offers
|
2652
|
+
* an easier migration story for applications already integrated with libzstd: the
|
2653
|
+
* user application continues to invoke the same compression functions
|
2654
|
+
* ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
|
2655
|
+
* from the specific advantages of the external sequence producer. For example,
|
2656
|
+
* the sequence producer could be tuned to take advantage of known characteristics
|
2657
|
+
* of the input, to offer better speed / ratio, or could leverage hardware
|
2658
|
+
* acceleration not available within libzstd itself.
|
2659
|
+
*
|
2660
|
+
* See contrib/externalSequenceProducer for an example program employing the
|
2661
|
+
* Block-Level Sequence Producer API.
|
2662
|
+
*
|
2663
|
+
* *** USAGE ***
|
2664
|
+
* The user is responsible for implementing a function of type
|
2665
|
+
* ZSTD_sequenceProducer_F. For each block, zstd will pass the following
|
2666
|
+
* arguments to the user-provided function:
|
2667
|
+
*
|
2668
|
+
* - sequenceProducerState: a pointer to a user-managed state for the sequence
|
2669
|
+
* producer.
|
2670
|
+
*
|
2671
|
+
* - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
|
2672
|
+
* outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
|
2673
|
+
* backing outSeqs is managed by the CCtx.
|
2674
|
+
*
|
2675
|
+
* - src, srcSize: an input buffer for the sequence producer to parse.
|
2676
|
+
* srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
|
2677
|
+
*
|
2678
|
+
* - dict, dictSize: a history buffer, which may be empty, which the sequence
|
2679
|
+
* producer may reference as it parses the src buffer. Currently, zstd will
|
2680
|
+
* always pass dictSize == 0 into external sequence producers, but this will
|
2681
|
+
* change in the future.
|
2682
|
+
*
|
2683
|
+
* - compressionLevel: a signed integer representing the zstd compression level
|
2684
|
+
* set by the user for the current operation. The sequence producer may choose
|
2685
|
+
* to use this information to change its compression strategy and speed/ratio
|
2686
|
+
* tradeoff. Note: the compression level does not reflect zstd parameters set
|
2687
|
+
* through the advanced API.
|
2688
|
+
*
|
2689
|
+
* - windowSize: a size_t representing the maximum allowed offset for external
|
2690
|
+
* sequences. Note that sequence offsets are sometimes allowed to exceed the
|
2691
|
+
* windowSize if a dictionary is present, see doc/zstd_compression_format.md
|
2692
|
+
* for details.
|
2693
|
+
*
|
2694
|
+
* The user-provided function shall return a size_t representing the number of
|
2695
|
+
* sequences written to outSeqs. This return value will be treated as an error
|
2696
|
+
* code if it is greater than outSeqsCapacity. The return value must be non-zero
|
2697
|
+
* if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
|
2698
|
+
* for convenience, but any value greater than outSeqsCapacity will be treated as
|
2699
|
+
* an error code.
|
2700
|
+
*
|
2701
|
+
* If the user-provided function does not return an error code, the sequences
|
2702
|
+
* written to outSeqs must be a valid parse of the src buffer. Data corruption may
|
2703
|
+
* occur if the parse is not valid. A parse is defined to be valid if the
|
2704
|
+
* following conditions hold:
|
2705
|
+
* - The sum of matchLengths and literalLengths must equal srcSize.
|
2706
|
+
* - All sequences in the parse, except for the final sequence, must have
|
2707
|
+
* matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
|
2708
|
+
* matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
|
2709
|
+
* - All offsets must respect the windowSize parameter as specified in
|
2710
|
+
* doc/zstd_compression_format.md.
|
2711
|
+
* - If the final sequence has matchLength == 0, it must also have offset == 0.
|
2712
|
+
*
|
2713
|
+
* zstd will only validate these conditions (and fail compression if they do not
|
2714
|
+
* hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
|
2715
|
+
* validation has a performance cost.
|
2716
|
+
*
|
2717
|
+
* If the user-provided function returns an error, zstd will either fall back
|
2718
|
+
* to an internal sequence producer or fail the compression operation. The user can
|
2719
|
+
* choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
|
2720
|
+
* cParam. Fallback compression will follow any other cParam settings, such as
|
2721
|
+
* compression level, the same as in a normal compression operation.
|
2722
|
+
*
|
2723
|
+
* The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
|
2724
|
+
* function by calling
|
2725
|
+
* ZSTD_registerSequenceProducer(cctx,
|
2726
|
+
* sequenceProducerState,
|
2727
|
+
* sequenceProducer)
|
2728
|
+
* This setting will persist until the next parameter reset of the CCtx.
|
2729
|
+
*
|
2730
|
+
* The sequenceProducerState must be initialized by the user before calling
|
2731
|
+
* ZSTD_registerSequenceProducer(). The user is responsible for destroying the
|
2732
|
+
* sequenceProducerState.
|
2733
|
+
*
|
2734
|
+
* *** LIMITATIONS ***
|
2735
|
+
* This API is compatible with all zstd compression APIs which respect advanced parameters.
|
2736
|
+
* However, there are three limitations:
|
2737
|
+
*
|
2738
|
+
* First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
|
2739
|
+
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
|
2740
|
+
* external sequence producer.
|
2741
|
+
* - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
|
2742
|
+
* cases (see its documentation for details). Users must explicitly set
|
2743
|
+
* ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
|
2744
|
+
* sequence producer is registered.
|
2745
|
+
* - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
|
2746
|
+
* whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
|
2747
|
+
* check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
|
2748
|
+
* Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
|
2749
|
+
*
|
2750
|
+
* Second, history buffers are not currently supported. Concretely, zstd will always pass
|
2751
|
+
* dictSize == 0 to the external sequence producer (for now). This has two implications:
|
2752
|
+
* - Dictionaries are not currently supported. Compression will *not* fail if the user
|
2753
|
+
* references a dictionary, but the dictionary won't have any effect.
|
2754
|
+
* - Stream history is not currently supported. All advanced compression APIs, including
|
2755
|
+
* streaming APIs, work with external sequence producers, but each block is treated as
|
2756
|
+
* an independent chunk without history from previous blocks.
|
2757
|
+
*
|
2758
|
+
* Third, multi-threading within a single compression is not currently supported. In other words,
|
2759
|
+
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
|
2760
|
+
* Multi-threading across compressions is fine: simply create one CCtx per thread.
|
2761
|
+
*
|
2762
|
+
* Long-term, we plan to overcome all three limitations. There is no technical blocker to
|
2763
|
+
* overcoming them. It is purely a question of engineering effort.
|
2764
|
+
*/
|
2765
|
+
|
2766
|
+
#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
|
2767
|
+
|
2768
|
+
typedef size_t ZSTD_sequenceProducer_F (
|
2769
|
+
void* sequenceProducerState,
|
2770
|
+
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
2771
|
+
const void* src, size_t srcSize,
|
2772
|
+
const void* dict, size_t dictSize,
|
2773
|
+
int compressionLevel,
|
2774
|
+
size_t windowSize
|
2775
|
+
);
|
2776
|
+
|
2777
|
+
/*! ZSTD_registerSequenceProducer() :
|
2778
|
+
* Instruct zstd to use a block-level external sequence producer function.
|
2779
|
+
*
|
2780
|
+
* The sequenceProducerState must be initialized by the caller, and the caller is
|
2781
|
+
* responsible for managing its lifetime. This parameter is sticky across
|
2782
|
+
* compressions. It will remain set until the user explicitly resets compression
|
2783
|
+
* parameters.
|
2784
|
+
*
|
2785
|
+
* Sequence producer registration is considered to be an "advanced parameter",
|
2786
|
+
* part of the "advanced API". This means it will only have an effect on compression
|
2787
|
+
* APIs which respect advanced parameters, such as compress2() and compressStream2().
|
2788
|
+
* Older compression APIs such as compressCCtx(), which predate the introduction of
|
2789
|
+
* "advanced parameters", will ignore any external sequence producer setting.
|
2790
|
+
*
|
2791
|
+
* The sequence producer can be "cleared" by registering a NULL function pointer. This
|
2792
|
+
* removes all limitations described above in the "LIMITATIONS" section of the API docs.
|
2793
|
+
*
|
2794
|
+
* The user is strongly encouraged to read the full API documentation (above) before
|
2795
|
+
* calling this function. */
|
2796
|
+
ZSTDLIB_STATIC_API void
|
2797
|
+
ZSTD_registerSequenceProducer(
|
2798
|
+
ZSTD_CCtx* cctx,
|
2799
|
+
void* sequenceProducerState,
|
2800
|
+
ZSTD_sequenceProducer_F* sequenceProducer
|
2801
|
+
);
|
2802
|
+
|
2803
|
+
|
2601
2804
|
/*********************************************************************
|
2602
|
-
* Buffer-less and synchronous inner streaming functions
|
2805
|
+
* Buffer-less and synchronous inner streaming functions (DEPRECATED)
|
2603
2806
|
*
|
2604
|
-
* This
|
2605
|
-
*
|
2606
|
-
*
|
2807
|
+
* This API is deprecated, and will be removed in a future version.
|
2808
|
+
* It allows streaming (de)compression with user allocated buffers.
|
2809
|
+
* However, it is hard to use, and not as well tested as the rest of
|
2810
|
+
* our API.
|
2811
|
+
*
|
2812
|
+
* Please use the normal streaming API instead: ZSTD_compressStream2,
|
2813
|
+
* and ZSTD_decompressStream.
|
2814
|
+
* If there is functionality that you need, but it doesn't provide,
|
2815
|
+
* please open an issue on our GitHub.
|
2607
2816
|
********************************************************************* */
|
2608
2817
|
|
2609
2818
|
/**
|
@@ -2636,15 +2845,20 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
|
|
2636
2845
|
*/
|
2637
2846
|
|
2638
2847
|
/*===== Buffer-less streaming compression functions =====*/
|
2848
|
+
ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
|
2639
2849
|
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
|
2850
|
+
ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
|
2640
2851
|
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
|
2852
|
+
ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
|
2641
2853
|
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
|
2642
2854
|
|
2643
2855
|
ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
|
2644
2856
|
ZSTDLIB_STATIC_API
|
2645
2857
|
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
|
2646
2858
|
|
2859
|
+
ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
|
2647
2860
|
ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
2861
|
+
ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
|
2648
2862
|
ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
2649
2863
|
|
2650
2864
|
/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
|
@@ -2728,29 +2942,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
|
|
2728
2942
|
*/
|
2729
2943
|
|
2730
2944
|
/*===== Buffer-less streaming decompression functions =====*/
|
2731
|
-
typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
|
2732
|
-
typedef struct {
|
2733
|
-
unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
|
2734
|
-
unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
|
2735
|
-
unsigned blockSizeMax;
|
2736
|
-
ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
|
2737
|
-
unsigned headerSize;
|
2738
|
-
unsigned dictID;
|
2739
|
-
unsigned checksumFlag;
|
2740
|
-
unsigned _reserved1;
|
2741
|
-
unsigned _reserved2;
|
2742
|
-
} ZSTD_frameHeader;
|
2743
2945
|
|
2744
|
-
/*! ZSTD_getFrameHeader() :
|
2745
|
-
* decode Frame Header, or requires larger `srcSize`.
|
2746
|
-
* @return : 0, `zfhPtr` is correctly filled,
|
2747
|
-
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
|
2748
|
-
* or an error code, which can be tested using ZSTD_isError() */
|
2749
|
-
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
|
2750
|
-
/*! ZSTD_getFrameHeader_advanced() :
|
2751
|
-
* same as ZSTD_getFrameHeader(),
|
2752
|
-
* with added capability to select a format (like ZSTD_f_zstd1_magicless) */
|
2753
|
-
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
|
2754
2946
|
ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
|
2755
2947
|
|
2756
2948
|
ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
|
@@ -2769,11 +2961,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
|
|
2769
2961
|
|
2770
2962
|
|
2771
2963
|
|
2772
|
-
/*
|
2773
|
-
/** Block level API */
|
2774
|
-
/*
|
2964
|
+
/* ========================================= */
|
2965
|
+
/** Block level API (DEPRECATED) */
|
2966
|
+
/* ========================================= */
|
2775
2967
|
|
2776
2968
|
/*!
|
2969
|
+
|
2970
|
+
This API is deprecated in favor of the regular compression API.
|
2971
|
+
You can get the frame header down to 2 bytes by setting:
|
2972
|
+
- ZSTD_c_format = ZSTD_f_zstd1_magicless
|
2973
|
+
- ZSTD_c_contentSizeFlag = 0
|
2974
|
+
- ZSTD_c_checksumFlag = 0
|
2975
|
+
- ZSTD_c_dictIDFlag = 0
|
2976
|
+
|
2977
|
+
This API is not as well tested as our normal API, so we recommend not using it.
|
2978
|
+
We will be removing it in a future version. If the normal API doesn't provide
|
2979
|
+
the functionality you need, please open a GitHub issue.
|
2980
|
+
|
2777
2981
|
Block functions produce and decode raw zstd blocks, without frame metadata.
|
2778
2982
|
Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
|
2779
2983
|
But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
|
@@ -2800,173 +3004,15 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
|
|
2800
3004
|
*/
|
2801
3005
|
|
2802
3006
|
/*===== Raw zstd block functions =====*/
|
3007
|
+
ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
|
2803
3008
|
ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
|
3009
|
+
ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
|
2804
3010
|
ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
3011
|
+
ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
|
2805
3012
|
ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
3013
|
+
ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
|
2806
3014
|
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
|
2807
3015
|
|
2808
|
-
|
2809
|
-
/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
|
2810
|
-
*
|
2811
|
-
* *** OVERVIEW ***
|
2812
|
-
* The Block-Level Sequence Producer API allows users to provide their own custom
|
2813
|
-
* sequence producer which libzstd invokes to process each block. The produced list
|
2814
|
-
* of sequences (literals and matches) is then post-processed by libzstd to produce
|
2815
|
-
* valid compressed blocks.
|
2816
|
-
*
|
2817
|
-
* This block-level offload API is a more granular complement of the existing
|
2818
|
-
* frame-level offload API compressSequences() (introduced in v1.5.1). It offers
|
2819
|
-
* an easier migration story for applications already integrated with libzstd: the
|
2820
|
-
* user application continues to invoke the same compression functions
|
2821
|
-
* ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
|
2822
|
-
* from the specific advantages of the external sequence producer. For example,
|
2823
|
-
* the sequence producer could be tuned to take advantage of known characteristics
|
2824
|
-
* of the input, to offer better speed / ratio, or could leverage hardware
|
2825
|
-
* acceleration not available within libzstd itself.
|
2826
|
-
*
|
2827
|
-
* See contrib/externalSequenceProducer for an example program employing the
|
2828
|
-
* Block-Level Sequence Producer API.
|
2829
|
-
*
|
2830
|
-
* *** USAGE ***
|
2831
|
-
* The user is responsible for implementing a function of type
|
2832
|
-
* ZSTD_sequenceProducer_F. For each block, zstd will pass the following
|
2833
|
-
* arguments to the user-provided function:
|
2834
|
-
*
|
2835
|
-
* - sequenceProducerState: a pointer to a user-managed state for the sequence
|
2836
|
-
* producer.
|
2837
|
-
*
|
2838
|
-
* - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
|
2839
|
-
* outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
|
2840
|
-
* backing outSeqs is managed by the CCtx.
|
2841
|
-
*
|
2842
|
-
* - src, srcSize: an input buffer for the sequence producer to parse.
|
2843
|
-
* srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
|
2844
|
-
*
|
2845
|
-
* - dict, dictSize: a history buffer, which may be empty, which the sequence
|
2846
|
-
* producer may reference as it parses the src buffer. Currently, zstd will
|
2847
|
-
* always pass dictSize == 0 into external sequence producers, but this will
|
2848
|
-
* change in the future.
|
2849
|
-
*
|
2850
|
-
* - compressionLevel: a signed integer representing the zstd compression level
|
2851
|
-
* set by the user for the current operation. The sequence producer may choose
|
2852
|
-
* to use this information to change its compression strategy and speed/ratio
|
2853
|
-
* tradeoff. Note: the compression level does not reflect zstd parameters set
|
2854
|
-
* through the advanced API.
|
2855
|
-
*
|
2856
|
-
* - windowSize: a size_t representing the maximum allowed offset for external
|
2857
|
-
* sequences. Note that sequence offsets are sometimes allowed to exceed the
|
2858
|
-
* windowSize if a dictionary is present, see doc/zstd_compression_format.md
|
2859
|
-
* for details.
|
2860
|
-
*
|
2861
|
-
* The user-provided function shall return a size_t representing the number of
|
2862
|
-
* sequences written to outSeqs. This return value will be treated as an error
|
2863
|
-
* code if it is greater than outSeqsCapacity. The return value must be non-zero
|
2864
|
-
* if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
|
2865
|
-
* for convenience, but any value greater than outSeqsCapacity will be treated as
|
2866
|
-
* an error code.
|
2867
|
-
*
|
2868
|
-
* If the user-provided function does not return an error code, the sequences
|
2869
|
-
* written to outSeqs must be a valid parse of the src buffer. Data corruption may
|
2870
|
-
* occur if the parse is not valid. A parse is defined to be valid if the
|
2871
|
-
* following conditions hold:
|
2872
|
-
* - The sum of matchLengths and literalLengths must equal srcSize.
|
2873
|
-
* - All sequences in the parse, except for the final sequence, must have
|
2874
|
-
* matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
|
2875
|
-
* matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
|
2876
|
-
* - All offsets must respect the windowSize parameter as specified in
|
2877
|
-
* doc/zstd_compression_format.md.
|
2878
|
-
* - If the final sequence has matchLength == 0, it must also have offset == 0.
|
2879
|
-
*
|
2880
|
-
* zstd will only validate these conditions (and fail compression if they do not
|
2881
|
-
* hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
|
2882
|
-
* validation has a performance cost.
|
2883
|
-
*
|
2884
|
-
* If the user-provided function returns an error, zstd will either fall back
|
2885
|
-
* to an internal sequence producer or fail the compression operation. The user can
|
2886
|
-
* choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
|
2887
|
-
* cParam. Fallback compression will follow any other cParam settings, such as
|
2888
|
-
* compression level, the same as in a normal compression operation.
|
2889
|
-
*
|
2890
|
-
* The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
|
2891
|
-
* function by calling
|
2892
|
-
* ZSTD_registerSequenceProducer(cctx,
|
2893
|
-
* sequenceProducerState,
|
2894
|
-
* sequenceProducer)
|
2895
|
-
* This setting will persist until the next parameter reset of the CCtx.
|
2896
|
-
*
|
2897
|
-
* The sequenceProducerState must be initialized by the user before calling
|
2898
|
-
* ZSTD_registerSequenceProducer(). The user is responsible for destroying the
|
2899
|
-
* sequenceProducerState.
|
2900
|
-
*
|
2901
|
-
* *** LIMITATIONS ***
|
2902
|
-
* This API is compatible with all zstd compression APIs which respect advanced parameters.
|
2903
|
-
* However, there are three limitations:
|
2904
|
-
*
|
2905
|
-
* First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
|
2906
|
-
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
|
2907
|
-
* external sequence producer.
|
2908
|
-
* - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
|
2909
|
-
* cases (see its documentation for details). Users must explicitly set
|
2910
|
-
* ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
|
2911
|
-
* sequence producer is registered.
|
2912
|
-
* - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
|
2913
|
-
* whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
|
2914
|
-
* check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
|
2915
|
-
* Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
|
2916
|
-
*
|
2917
|
-
* Second, history buffers are not currently supported. Concretely, zstd will always pass
|
2918
|
-
* dictSize == 0 to the external sequence producer (for now). This has two implications:
|
2919
|
-
* - Dictionaries are not currently supported. Compression will *not* fail if the user
|
2920
|
-
* references a dictionary, but the dictionary won't have any effect.
|
2921
|
-
* - Stream history is not currently supported. All advanced compression APIs, including
|
2922
|
-
* streaming APIs, work with external sequence producers, but each block is treated as
|
2923
|
-
* an independent chunk without history from previous blocks.
|
2924
|
-
*
|
2925
|
-
* Third, multi-threading within a single compression is not currently supported. In other words,
|
2926
|
-
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
|
2927
|
-
* Multi-threading across compressions is fine: simply create one CCtx per thread.
|
2928
|
-
*
|
2929
|
-
* Long-term, we plan to overcome all three limitations. There is no technical blocker to
|
2930
|
-
* overcoming them. It is purely a question of engineering effort.
|
2931
|
-
*/
|
2932
|
-
|
2933
|
-
#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
|
2934
|
-
|
2935
|
-
typedef size_t ZSTD_sequenceProducer_F (
|
2936
|
-
void* sequenceProducerState,
|
2937
|
-
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
2938
|
-
const void* src, size_t srcSize,
|
2939
|
-
const void* dict, size_t dictSize,
|
2940
|
-
int compressionLevel,
|
2941
|
-
size_t windowSize
|
2942
|
-
);
|
2943
|
-
|
2944
|
-
/*! ZSTD_registerSequenceProducer() :
|
2945
|
-
* Instruct zstd to use a block-level external sequence producer function.
|
2946
|
-
*
|
2947
|
-
* The sequenceProducerState must be initialized by the caller, and the caller is
|
2948
|
-
* responsible for managing its lifetime. This parameter is sticky across
|
2949
|
-
* compressions. It will remain set until the user explicitly resets compression
|
2950
|
-
* parameters.
|
2951
|
-
*
|
2952
|
-
* Sequence producer registration is considered to be an "advanced parameter",
|
2953
|
-
* part of the "advanced API". This means it will only have an effect on compression
|
2954
|
-
* APIs which respect advanced parameters, such as compress2() and compressStream2().
|
2955
|
-
* Older compression APIs such as compressCCtx(), which predate the introduction of
|
2956
|
-
* "advanced parameters", will ignore any external sequence producer setting.
|
2957
|
-
*
|
2958
|
-
* The sequence producer can be "cleared" by registering a NULL function pointer. This
|
2959
|
-
* removes all limitations described above in the "LIMITATIONS" section of the API docs.
|
2960
|
-
*
|
2961
|
-
* The user is strongly encouraged to read the full API documentation (above) before
|
2962
|
-
* calling this function. */
|
2963
|
-
ZSTDLIB_STATIC_API void
|
2964
|
-
ZSTD_registerSequenceProducer(
|
2965
|
-
ZSTD_CCtx* cctx,
|
2966
|
-
void* sequenceProducerState,
|
2967
|
-
ZSTD_sequenceProducer_F* sequenceProducer
|
2968
|
-
);
|
2969
|
-
|
2970
3016
|
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
|
2971
3017
|
|
2972
3018
|
#if defined (__cplusplus)
|
data/ext/zstdruby/main.c
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include <common.h>
|
2
2
|
VALUE rb_mZstd;
|
3
3
|
void zstd_ruby_init(void);
|
4
|
+
void zstd_ruby_skippable_frame_init(void);
|
4
5
|
void zstd_ruby_streaming_compress_init(void);
|
5
6
|
void zstd_ruby_streaming_decompress_init(void);
|
6
7
|
|
@@ -13,6 +14,7 @@ Init_zstdruby(void)
|
|
13
14
|
|
14
15
|
rb_mZstd = rb_define_module("Zstd");
|
15
16
|
zstd_ruby_init();
|
17
|
+
zstd_ruby_skippable_frame_init();
|
16
18
|
zstd_ruby_streaming_compress_init();
|
17
19
|
zstd_ruby_streaming_decompress_init();
|
18
20
|
}
|