RubyGems - deflate-ruby - Versions diffs - 1.0.1 → 1.0.2 - Mend

deflate-ruby 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

data/ext/deflate_ruby/libdeflate/ht_matchfinder.h ADDED Viewed

@@ -0,0 +1,234 @@
+/*
+ * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
+ *
+ * Copyright 2022 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ---------------------------------------------------------------------------
+ *
+ * This is a Hash Table (ht) matchfinder.
+ *
+ * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
+ * very fast compression.  The ht_matchfinder stores the hash chains inline in
+ * the hash table, whereas the hc_matchfinder stores them in a separate array.
+ * Storing the hash chains inline is the faster method when max_search_depth
+ * (the maximum chain length) is very small.  It is not appropriate when
+ * max_search_depth is larger, as then it uses too much memory.
+ *
+ * Due to its focus on speed, the ht_matchfinder doesn't support length 3
+ * matches.  It also doesn't allow max_search_depth to vary at runtime; it is
+ * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
+ *
+ * See hc_matchfinder.h for more information.
+ */
+#ifndef LIB_HT_MATCHFINDER_H
+#define LIB_HT_MATCHFINDER_H
+#include "matchfinder_common.h"
+#define HT_MATCHFINDER_HASH_ORDER	15
+#define HT_MATCHFINDER_BUCKET_SIZE	2
+#define HT_MATCHFINDER_MIN_MATCH_LEN	4
+/* Minimum value of max_len for ht_matchfinder_longest_match() */
+#define HT_MATCHFINDER_REQUIRED_NBYTES	5
+struct MATCHFINDER_ALIGNED ht_matchfinder {
+	mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
+			 [HT_MATCHFINDER_BUCKET_SIZE];
+};
+static forceinline void
+ht_matchfinder_init(struct ht_matchfinder *mf)
+{
+	STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
+	matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
+}
+static forceinline void
+ht_matchfinder_slide_window(struct ht_matchfinder *mf)
+{
+	matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
+}
+/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
+static forceinline u32
+ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
+			     const u8 ** const in_base_p,
+			     const u8 * const in_next,
+			     const u32 max_len,
+			     const u32 nice_len,
+			     u32 * const next_hash,
+			     u32 * const offset_ret)
+{
+	u32 best_len = 0;
+	const u8 *best_matchptr = in_next;
+	u32 cur_pos = in_next - *in_base_p;
+	const u8 *in_base;
+	mf_pos_t cutoff;
+	u32 hash;
+	u32 seq;
+	mf_pos_t cur_node;
+	const u8 *matchptr;
+#if HT_MATCHFINDER_BUCKET_SIZE > 1
+	mf_pos_t to_insert;
+	u32 len;
+#endif
+#if HT_MATCHFINDER_BUCKET_SIZE > 2
+	int i;
+#endif
+	/* This is assumed throughout this function. */
+	STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
+	if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
+		ht_matchfinder_slide_window(mf);
+		*in_base_p += MATCHFINDER_WINDOW_SIZE;
+		cur_pos = 0;
+	}
+	in_base = *in_base_p;
+	cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
+	hash = *next_hash;
+	STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
+	*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
+			     HT_MATCHFINDER_HASH_ORDER);
+	seq = load_u32_unaligned(in_next);
+	prefetchw(&mf->hash_tab[*next_hash]);
+#if HT_MATCHFINDER_BUCKET_SIZE == 1
+	/* Hand-unrolled version for BUCKET_SIZE == 1 */
+	cur_node = mf->hash_tab[hash][0];
+	mf->hash_tab[hash][0] = cur_pos;
+	if (cur_node <= cutoff)
+		goto out;
+	matchptr = &in_base[cur_node];
+	if (load_u32_unaligned(matchptr) == seq) {
+		best_len = lz_extend(in_next, matchptr, 4, max_len);
+		best_matchptr = matchptr;
+	}
+#elif HT_MATCHFINDER_BUCKET_SIZE == 2
+	/*
+	 * Hand-unrolled version for BUCKET_SIZE == 2.  The logic here also
+	 * differs slightly in that it copies the first entry to the second even
+	 * if nice_len is reached on the first, as this can be slightly faster.
+	 */
+	cur_node = mf->hash_tab[hash][0];
+	mf->hash_tab[hash][0] = cur_pos;
+	if (cur_node <= cutoff)
+		goto out;
+	matchptr = &in_base[cur_node];
+	to_insert = cur_node;
+	cur_node = mf->hash_tab[hash][1];
+	mf->hash_tab[hash][1] = to_insert;
+	if (load_u32_unaligned(matchptr) == seq) {
+		best_len = lz_extend(in_next, matchptr, 4, max_len);
+		best_matchptr = matchptr;
+		if (cur_node <= cutoff || best_len >= nice_len)
+			goto out;
+		matchptr = &in_base[cur_node];
+		if (load_u32_unaligned(matchptr) == seq &&
+		    load_u32_unaligned(matchptr + best_len - 3) ==
+		    load_u32_unaligned(in_next + best_len - 3)) {
+			len = lz_extend(in_next, matchptr, 4, max_len);
+			if (len > best_len) {
+				best_len = len;
+				best_matchptr = matchptr;
+			}
+		}
+	} else {
+		if (cur_node <= cutoff)
+			goto out;
+		matchptr = &in_base[cur_node];
+		if (load_u32_unaligned(matchptr) == seq) {
+			best_len = lz_extend(in_next, matchptr, 4, max_len);
+			best_matchptr = matchptr;
+		}
+	}
+#else
+	/* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
+	to_insert = cur_pos;
+	for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
+		cur_node = mf->hash_tab[hash][i];
+		mf->hash_tab[hash][i] = to_insert;
+		if (cur_node <= cutoff)
+			goto out;
+		matchptr = &in_base[cur_node];
+		if (load_u32_unaligned(matchptr) == seq) {
+			len = lz_extend(in_next, matchptr, 4, max_len);
+			if (len > best_len) {
+				best_len = len;
+				best_matchptr = matchptr;
+				if (best_len >= nice_len)
+					goto out;
+			}
+		}
+		to_insert = cur_node;
+	}
+#endif
+out:
+	*offset_ret = in_next - best_matchptr;
+	return best_len;
+}
+static forceinline void
+ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
+			  const u8 ** const in_base_p,
+			  const u8 *in_next,
+			  const u8 * const in_end,
+			  const u32 count,
+			  u32 * const next_hash)
+{
+	s32 cur_pos = in_next - *in_base_p;
+	u32 hash;
+	u32 remaining = count;
+	int i;
+	if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
+		return;
+	if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
+		ht_matchfinder_slide_window(mf);
+		*in_base_p += MATCHFINDER_WINDOW_SIZE;
+		cur_pos -= MATCHFINDER_WINDOW_SIZE;
+	}
+	hash = *next_hash;
+	do {
+		for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
+			mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
+		mf->hash_tab[hash][0] = cur_pos;
+		hash = lz_hash(get_unaligned_le32(++in_next),
+			       HT_MATCHFINDER_HASH_ORDER);
+		cur_pos++;
+	} while (--remaining);
+	prefetchw(&mf->hash_tab[hash]);
+	*next_hash = hash;
+}
+#endif /* LIB_HT_MATCHFINDER_H */

data/ext/deflate_ruby/libdeflate/lib_common.h ADDED Viewed

@@ -0,0 +1,106 @@
+/*
+ * lib_common.h - internal header included by all library code
+ */
+#ifndef LIB_LIB_COMMON_H
+#define LIB_LIB_COMMON_H
+#ifdef LIBDEFLATE_H
+ /*
+  * When building the library, LIBDEFLATEAPI needs to be defined properly before
+  * including libdeflate.h.
+  */
+#  error "lib_common.h must always be included before libdeflate.h"
+#endif
+#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__))
+#  define LIBDEFLATE_EXPORT_SYM  __declspec(dllexport)
+#elif defined(__GNUC__)
+#  define LIBDEFLATE_EXPORT_SYM  __attribute__((visibility("default")))
+#else
+#  define LIBDEFLATE_EXPORT_SYM
+#endif
+/*
+ * On i386, gcc assumes that the stack is 16-byte aligned at function entry.
+ * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi)
+ * only guarantee 4-byte alignment when calling functions.  This is mainly an
+ * issue on Windows, but it has been seen on Linux too.  Work around this ABI
+ * incompatibility by realigning the stack pointer when entering libdeflate.
+ * This prevents crashes in SSE/AVX code.
+ */
+#if defined(__GNUC__) && defined(__i386__)
+#  define LIBDEFLATE_ALIGN_STACK  __attribute__((force_align_arg_pointer))
+#else
+#  define LIBDEFLATE_ALIGN_STACK
+#endif
+#define LIBDEFLATEAPI	LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
+#include "../common_defs.h"
+typedef void *(*malloc_func_t)(size_t);
+typedef void (*free_func_t)(void *);
+extern malloc_func_t libdeflate_default_malloc_func;
+extern free_func_t libdeflate_default_free_func;
+void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
+				size_t alignment, size_t size);
+void libdeflate_aligned_free(free_func_t free_func, void *ptr);
+#ifdef FREESTANDING
+/*
+ * With -ffreestanding, <string.h> may be missing, and we must provide
+ * implementations of memset(), memcpy(), memmove(), and memcmp().
+ * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
+ *
+ * Also, -ffreestanding disables interpreting calls to these functions as
+ * built-ins.  E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
+ * not be optimized to a single load instruction.  For performance reasons we
+ * don't want that.  So, declare these functions as macros that expand to the
+ * corresponding built-ins.  This approach is recommended in the gcc man page.
+ * We still need the actual function definitions in case gcc calls them.
+ */
+void *memset(void *s, int c, size_t n);
+#define memset(s, c, n)		__builtin_memset((s), (c), (n))
+void *memcpy(void *dest, const void *src, size_t n);
+#define memcpy(dest, src, n)	__builtin_memcpy((dest), (src), (n))
+void *memmove(void *dest, const void *src, size_t n);
+#define memmove(dest, src, n)	__builtin_memmove((dest), (src), (n))
+int memcmp(const void *s1, const void *s2, size_t n);
+#define memcmp(s1, s2, n)	__builtin_memcmp((s1), (s2), (n))
+#undef LIBDEFLATE_ENABLE_ASSERTIONS
+#else
+#  include <string.h>
+   /*
+    * To prevent false positive static analyzer warnings, ensure that assertions
+    * are visible to the static analyzer.
+    */
+#  ifdef __clang_analyzer__
+#    define LIBDEFLATE_ENABLE_ASSERTIONS
+#  endif
+#endif
+/*
+ * Runtime assertion support.  Don't enable this in production builds; it may
+ * hurt performance significantly.
+ */
+#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
+NORETURN void
+libdeflate_assertion_failed(const char *expr, const char *file, int line);
+#define ASSERT(expr) { if (unlikely(!(expr))) \
+	libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
+#else
+#define ASSERT(expr) (void)(expr)
+#endif
+#define CONCAT_IMPL(a, b)	a##b
+#define CONCAT(a, b)		CONCAT_IMPL(a, b)
+#define ADD_SUFFIX(name)	CONCAT(name, SUFFIX)
+#endif /* LIB_LIB_COMMON_H */

data/ext/deflate_ruby/libdeflate/libdeflate.h CHANGED Viewed

@@ -13,8 +13,8 @@ extern "C" {
 #endif
 #define LIBDEFLATE_VERSION_MAJOR	1
-#define LIBDEFLATE_VERSION_MINOR	21
-#define LIBDEFLATE_VERSION_STRING	"1.21"
+#define LIBDEFLATE_VERSION_MINOR	25
+#define LIBDEFLATE_VERSION_STRING	"1.25"
 /*
  * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause

data/ext/deflate_ruby/libdeflate/{lib/matchfinder_common.h → matchfinder_common.h} RENAMED Viewed

@@ -175,11 +175,11 @@ lz_hash(u32 seq, unsigned num_bits)
  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
  * to a maximum of @max_len.  Initially, @start_len bytes are matched.
  */
-static forceinline unsigned
+static forceinline u32
 lz_extend(const u8 * const strptr, const u8 * const matchptr,
-	  const unsigned start_len, const unsigned max_len)
+	  const u32 start_len, const u32 max_len)
 {
-	unsigned len = start_len;
+	u32 len = start_len;
 	machine_word_t v_word;
 	if (UNALIGNED_ACCESS_IS_FAST) {

data/ext/deflate_ruby/libdeflate/x86/adler32_impl.h ADDED Viewed

@@ -0,0 +1,135 @@
+/*
+ * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
+ *
+ * Copyright 2016 Eric Biggers
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef LIB_X86_ADLER32_IMPL_H
+#define LIB_X86_ADLER32_IMPL_H
+#include "cpu_features.h"
+/* SSE2 and AVX2 implementations.  Used on older CPUs. */
+#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
+#  define adler32_x86_sse2	adler32_x86_sse2
+#  define SUFFIX			   _sse2
+#  define ATTRIBUTES		_target_attribute("sse2")
+#  define VL			16
+#  define USE_VNNI		0
+#  define USE_AVX512		0
+#  include "adler32_template.h"
+#  define adler32_x86_avx2	adler32_x86_avx2
+#  define SUFFIX			   _avx2
+#  define ATTRIBUTES		_target_attribute("avx2")
+#  define VL			32
+#  define USE_VNNI		0
+#  define USE_AVX512		0
+#  include "adler32_template.h"
+#endif
+/*
+ * AVX-VNNI implementation.  This is used on CPUs that have AVX2 and AVX-VNNI
+ * but don't have AVX-512, for example Intel Alder Lake.
+ *
+ * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
+ * intrinsics (in gcc 11.1) slightly before binutils added support for
+ * assembling AVX-VNNI instructions (in binutils 2.36).  Distros can reasonably
+ * have gcc 11 with binutils 2.35.  Because of this issue, we check for gcc 12
+ * instead of gcc 11.  (libdeflate supports direct compilation without a
+ * configure step, so checking the binutils version is not always an option.)
+ */
+#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
+#  define adler32_x86_avx2_vnni	adler32_x86_avx2_vnni
+#  define SUFFIX			   _avx2_vnni
+#  define ATTRIBUTES		_target_attribute("avx2,avxvnni")
+#  define VL			32
+#  define USE_VNNI		1
+#  define USE_AVX512		0
+#  include "adler32_template.h"
+#endif
+#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
+	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
+/*
+ * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
+ * AVX-VNNI implementation but takes advantage of masking and more registers.
+ * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger
+ * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM
+ * registers are used.
+ */
+#  define adler32_x86_avx512_vl256_vnni	adler32_x86_avx512_vl256_vnni
+#  define SUFFIX				   _avx512_vl256_vnni
+#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vl,avx512vnni")
+#  define VL			32
+#  define USE_VNNI		1
+#  define USE_AVX512		1
+#  include "adler32_template.h"
+/*
+ * AVX512VNNI implementation using 512-bit vectors.  This is used on CPUs that
+ * have a good AVX-512 implementation including AVX512VNNI.
+ */
+#  define adler32_x86_avx512_vl512_vnni	adler32_x86_avx512_vl512_vnni
+#  define SUFFIX				   _avx512_vl512_vnni
+#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vnni")
+#  define VL			64
+#  define USE_VNNI		1
+#  define USE_AVX512		1
+#  include "adler32_template.h"
+#endif
+static inline adler32_func_t
+arch_select_adler32_func(void)
+{
+	const u32 features MAYBE_UNUSED = get_x86_cpu_features();
+#ifdef adler32_x86_avx512_vl512_vnni
+	if ((features & X86_CPU_FEATURE_ZMM) &&
+	    HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
+		return adler32_x86_avx512_vl512_vnni;
+#endif
+#ifdef adler32_x86_avx512_vl256_vnni
+	if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
+	    HAVE_AVX512VNNI(features))
+		return adler32_x86_avx512_vl256_vnni;
+#endif
+#ifdef adler32_x86_avx2_vnni
+	if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
+		return adler32_x86_avx2_vnni;
+#endif
+#ifdef adler32_x86_avx2
+	if (HAVE_AVX2(features))
+		return adler32_x86_avx2;
+#endif
+#ifdef adler32_x86_sse2
+	if (HAVE_SSE2(features))
+		return adler32_x86_sse2;
+#endif
+	return NULL;
+}
+#define arch_select_adler32_func	arch_select_adler32_func
+#endif /* LIB_X86_ADLER32_IMPL_H */