RubyGems - libdeflate - Versions diffs - 0.1.0 - Mend

libdeflate 0.1.0

Files changed (89) hide show

checksums.yaml +7 -0
data/.gitignore +17 -0
data/.gitmodules +3 -0
data/.rspec +2 -0
data/.rubocop.yml +1 -0
data/.rubocop_todo.yml +9 -0
data/.travis.yml +5 -0
data/Gemfile +4 -0
data/LICENSE.txt +21 -0
data/README.md +52 -0
data/Rakefile +15 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/ext/libdeflate/extconf.rb +14 -0
data/ext/libdeflate/libdeflate/.gitignore +19 -0
data/ext/libdeflate/libdeflate/COPYING +21 -0
data/ext/libdeflate/libdeflate/Makefile +231 -0
data/ext/libdeflate/libdeflate/Makefile.msc +64 -0
data/ext/libdeflate/libdeflate/NEWS +57 -0
data/ext/libdeflate/libdeflate/README.md +170 -0
data/ext/libdeflate/libdeflate/common/common_defs.h +351 -0
data/ext/libdeflate/libdeflate/common/compiler_gcc.h +134 -0
data/ext/libdeflate/libdeflate/common/compiler_msc.h +95 -0
data/ext/libdeflate/libdeflate/lib/adler32.c +213 -0
data/ext/libdeflate/libdeflate/lib/adler32_impl.h +281 -0
data/ext/libdeflate/libdeflate/lib/aligned_malloc.c +57 -0
data/ext/libdeflate/libdeflate/lib/aligned_malloc.h +13 -0
data/ext/libdeflate/libdeflate/lib/bt_matchfinder.h +357 -0
data/ext/libdeflate/libdeflate/lib/crc32.c +368 -0
data/ext/libdeflate/libdeflate/lib/crc32_impl.h +286 -0
data/ext/libdeflate/libdeflate/lib/crc32_table.h +526 -0
data/ext/libdeflate/libdeflate/lib/decompress_impl.h +404 -0
data/ext/libdeflate/libdeflate/lib/deflate_compress.c +2817 -0
data/ext/libdeflate/libdeflate/lib/deflate_compress.h +14 -0
data/ext/libdeflate/libdeflate/lib/deflate_constants.h +66 -0
data/ext/libdeflate/libdeflate/lib/deflate_decompress.c +889 -0
data/ext/libdeflate/libdeflate/lib/gzip_compress.c +95 -0
data/ext/libdeflate/libdeflate/lib/gzip_constants.h +45 -0
data/ext/libdeflate/libdeflate/lib/gzip_decompress.c +130 -0
data/ext/libdeflate/libdeflate/lib/hc_matchfinder.h +405 -0
data/ext/libdeflate/libdeflate/lib/lib_common.h +35 -0
data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h +53 -0
data/ext/libdeflate/libdeflate/lib/matchfinder_common.h +205 -0
data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h +61 -0
data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h +53 -0
data/ext/libdeflate/libdeflate/lib/unaligned.h +202 -0
data/ext/libdeflate/libdeflate/lib/x86_cpu_features.c +169 -0
data/ext/libdeflate/libdeflate/lib/x86_cpu_features.h +48 -0
data/ext/libdeflate/libdeflate/lib/zlib_compress.c +87 -0
data/ext/libdeflate/libdeflate/lib/zlib_constants.h +21 -0
data/ext/libdeflate/libdeflate/lib/zlib_decompress.c +91 -0
data/ext/libdeflate/libdeflate/libdeflate.h +274 -0
data/ext/libdeflate/libdeflate/programs/benchmark.c +558 -0
data/ext/libdeflate/libdeflate/programs/checksum.c +197 -0
data/ext/libdeflate/libdeflate/programs/detect.sh +62 -0
data/ext/libdeflate/libdeflate/programs/gzip.c +603 -0
data/ext/libdeflate/libdeflate/programs/prog_util.c +530 -0
data/ext/libdeflate/libdeflate/programs/prog_util.h +162 -0
data/ext/libdeflate/libdeflate/programs/test_checksums.c +135 -0
data/ext/libdeflate/libdeflate/programs/tgetopt.c +118 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/Makefile +12 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/fuzz.c +40 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_compress/inputs/0 +0 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/fuzz.c +28 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/deflate_decompress/inputs/0 +3 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/fuzz.c +28 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/gzip_decompress/inputs/0 +0 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/prepare_for_fuzz.sh +14 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/fuzz.c +28 -0
data/ext/libdeflate/libdeflate/tools/afl-fuzz/zlib_decompress/inputs/0 +3 -0
data/ext/libdeflate/libdeflate/tools/android_build.sh +104 -0
data/ext/libdeflate/libdeflate/tools/checksum_benchmarks.sh +76 -0
data/ext/libdeflate/libdeflate/tools/exec_tests.sh +30 -0
data/ext/libdeflate/libdeflate/tools/gen_crc32_multipliers.c +108 -0
data/ext/libdeflate/libdeflate/tools/gen_crc32_table.c +100 -0
data/ext/libdeflate/libdeflate/tools/gzip_tests.sh +412 -0
data/ext/libdeflate/libdeflate/tools/make-windows-releases +21 -0
data/ext/libdeflate/libdeflate/tools/mips_build.sh +9 -0
data/ext/libdeflate/libdeflate/tools/msc_test.bat +3 -0
data/ext/libdeflate/libdeflate/tools/pgo_build.sh +23 -0
data/ext/libdeflate/libdeflate/tools/produce_gzip_benchmark_table.sh +37 -0
data/ext/libdeflate/libdeflate/tools/run_tests.sh +305 -0
data/ext/libdeflate/libdeflate/tools/windows_build.sh +10 -0
data/ext/libdeflate/libdeflate_ext.c +389 -0
data/ext/libdeflate/libdeflate_ext.h +8 -0
data/lib/libdeflate.rb +2 -0
data/lib/libdeflate/version.rb +3 -0
data/libdeflate.gemspec +33 -0
metadata +230 -0

data/ext/libdeflate/libdeflate/lib/lib_common.h ADDED Viewed

@@ -0,0 +1,35 @@
+/*
+ * lib_common.h - internal header included by all library code
+ */
+#ifndef LIB_LIB_COMMON_H
+#define LIB_LIB_COMMON_H
+#ifdef LIBDEFLATE_H
+#  error "lib_common.h must always be included before libdeflate.h"
+   /* because BUILDING_LIBDEFLATE must be set first */
+#endif
+#define BUILDING_LIBDEFLATE
+#include "common_defs.h"
+/*
+ * Prefix with "_libdeflate_" all global symbols which are not part of the API.
+ * This avoids exposing overly generic names when libdeflate is built as a
+ * static library.
+ *
+ * Note that the chosen prefix is not really important and can be changed
+ * without breaking library users.  It was just chosen so that the resulting
+ * symbol names are unlikely to conflict with those from any other software.
+ * Also note that this fixup has no useful effect when libdeflate is built as a
+ * shared library, since these symbols are not exported.
+ */
+#define SYM_FIXUP(sym)			_libdeflate_##sym
+#define aligned_malloc			SYM_FIXUP(aligned_malloc)
+#define aligned_free			SYM_FIXUP(aligned_free)
+#define deflate_get_compression_level	SYM_FIXUP(deflate_get_compression_level)
+#define _x86_cpu_features		SYM_FIXUP(_x86_cpu_features)
+#define x86_setup_cpu_features		SYM_FIXUP(x86_setup_cpu_features)
+#endif /* LIB_LIB_COMMON_H */

data/ext/libdeflate/libdeflate/lib/matchfinder_avx2.h ADDED Viewed

@@ -0,0 +1,53 @@
+/*
+ * matchfinder_avx2.h - matchfinding routines optimized for Intel AVX2 (Advanced
+ * Vector Extensions)
+ */
+#include <immintrin.h>
+static forceinline bool
+matchfinder_init_avx2(mf_pos_t *data, size_t size)
+{
+	__m256i v, *p;
+	size_t n;
+	if (size % sizeof(__m256i) * 4)
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
+	p = (__m256i *)data;
+	n = size / (sizeof(__m256i) * 4);
+	do {
+		p[0] = v;
+		p[1] = v;
+		p[2] = v;
+		p[3] = v;
+		p += 4;
+	} while (--n);
+	return true;
+}
+static forceinline bool
+matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
+{
+	__m256i v, *p;
+	size_t n;
+	if ((size % sizeof(__m256i) * 4 != 0))
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
+	p = (__m256i *)data;
+	n = size / (sizeof(__m256i) * 4);
+	do {
+		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
+		p[0] = _mm256_adds_epi16(p[0], v);
+		p[1] = _mm256_adds_epi16(p[1], v);
+		p[2] = _mm256_adds_epi16(p[2], v);
+		p[3] = _mm256_adds_epi16(p[3], v);
+		p += 4;
+	} while (--n);
+	return true;
+}

data/ext/libdeflate/libdeflate/lib/matchfinder_common.h ADDED Viewed

@@ -0,0 +1,205 @@
+/*
+ * matchfinder_common.h - common code for Lempel-Ziv matchfinding
+ */
+#ifndef LIB_MATCHFINDER_COMMON_H
+#define LIB_MATCHFINDER_COMMON_H
+#include "lib_common.h"
+#include "unaligned.h"
+#ifndef MATCHFINDER_WINDOW_ORDER
+#  error "MATCHFINDER_WINDOW_ORDER must be defined!"
+#endif
+#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
+typedef s16 mf_pos_t;
+#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
+#define MATCHFINDER_ALIGNMENT 8
+#ifdef __AVX2__
+#  include "matchfinder_avx2.h"
+#  if MATCHFINDER_ALIGNMENT < 32
+#    undef MATCHFINDER_ALIGNMENT
+#    define MATCHFINDER_ALIGNMENT 32
+#  endif
+#endif
+#ifdef __SSE2__
+#  include "matchfinder_sse2.h"
+#  if MATCHFINDER_ALIGNMENT < 16
+#    undef MATCHFINDER_ALIGNMENT
+#    define MATCHFINDER_ALIGNMENT 16
+#  endif
+#endif
+#ifdef __ARM_NEON
+#  include "matchfinder_neon.h"
+#  if MATCHFINDER_ALIGNMENT < 16
+#    undef MATCHFINDER_ALIGNMENT
+#    define MATCHFINDER_ALIGNMENT 16
+#  endif
+#endif
+/*
+ * Initialize the hash table portion of the matchfinder.
+ *
+ * Essentially, this is an optimized memset().
+ *
+ * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
+ */
+static forceinline void
+matchfinder_init(mf_pos_t *data, size_t num_entries)
+{
+	size_t i;
+#if defined(__AVX2__) && defined(_aligned_attribute)
+	if (matchfinder_init_avx2(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+#if defined(__SSE2__) && defined(_aligned_attribute)
+	if (matchfinder_init_sse2(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+#if defined(__ARM_NEON) && defined(_aligned_attribute)
+	if (matchfinder_init_neon(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+	for (i = 0; i < num_entries; i++)
+		data[i] = MATCHFINDER_INITVAL;
+}
+/*
+ * Slide the matchfinder by WINDOW_SIZE bytes.
+ *
+ * This must be called just after each WINDOW_SIZE bytes have been run through
+ * the matchfinder.
+ *
+ * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
+ * The effect is that all entries are updated to be relative to the current
+ * position, rather than the position WINDOW_SIZE bytes prior.
+ *
+ * Underflow is detected and replaced with signed saturation.  This ensures that
+ * once the sliding window has passed over a position, that position forever
+ * remains out of bounds.
+ *
+ * The array passed in must contain all matchfinder data that is
+ * position-relative.  Concretely, this will include the hash table as well as
+ * the table of positions that is used to link together the sequences in each
+ * hash bucket.  Note that in the latter table, the links are 1-ary in the case
+ * of "hash chains", and 2-ary in the case of "binary trees".  In either case,
+ * the links need to be rebased in the same way.
+ */
+static forceinline void
+matchfinder_rebase(mf_pos_t *data, size_t num_entries)
+{
+	size_t i;
+#if defined(__AVX2__) && defined(_aligned_attribute)
+	if (matchfinder_rebase_avx2(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+#if defined(__SSE2__) && defined(_aligned_attribute)
+	if (matchfinder_rebase_sse2(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+#if defined(__ARM_NEON) && defined(_aligned_attribute)
+	if (matchfinder_rebase_neon(data, num_entries * sizeof(data[0])))
+		return;
+#endif
+	if (MATCHFINDER_WINDOW_SIZE == 32768) {
+		/* Branchless version for 32768 byte windows.  If the value was
+		 * already negative, clear all bits except the sign bit; this
+		 * changes the value to -32768.  Otherwise, set the sign bit;
+		 * this is equivalent to subtracting 32768.  */
+		for (i = 0; i < num_entries; i++) {
+			u16 v = data[i];
+			u16 sign_bit = v & 0x8000;
+			v &= sign_bit - ((sign_bit >> 15) ^ 1);
+			v |= 0x8000;
+			data[i] = v;
+		}
+		return;
+	}
+	for (i = 0; i < num_entries; i++) {
+		if (data[i] >= 0)
+			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
+		else
+			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
+	}
+}
+/*
+ * The hash function: given a sequence prefix held in the low-order bits of a
+ * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
+ * bits of the product that don't fit in a 32-bit value, but take the
+ * next-highest @num_bits bits of the product as the hash value, as those have
+ * the most randomness.
+ */
+static forceinline u32
+lz_hash(u32 seq, unsigned num_bits)
+{
+	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
+}
+/*
+ * Return the number of bytes at @matchptr that match the bytes at @strptr, up
+ * to a maximum of @max_len.  Initially, @start_len bytes are matched.
+ */
+static forceinline unsigned
+lz_extend(const u8 * const strptr, const u8 * const matchptr,
+	  const unsigned start_len, const unsigned max_len)
+{
+	unsigned len = start_len;
+	machine_word_t v_word;
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		if (likely(max_len - len >= 4 * WORDBYTES)) {
+		#define COMPARE_WORD_STEP				\
+			v_word = load_word_unaligned(&matchptr[len]) ^	\
+				 load_word_unaligned(&strptr[len]);	\
+			if (v_word != 0)				\
+				goto word_differs;			\
+			len += WORDBYTES;				\
+			COMPARE_WORD_STEP
+			COMPARE_WORD_STEP
+			COMPARE_WORD_STEP
+			COMPARE_WORD_STEP
+		#undef COMPARE_WORD_STEP
+		}
+		while (len + WORDBYTES <= max_len) {
+			v_word = load_word_unaligned(&matchptr[len]) ^
+				 load_word_unaligned(&strptr[len]);
+			if (v_word != 0)
+				goto word_differs;
+			len += WORDBYTES;
+		}
+	}
+	while (len < max_len && matchptr[len] == strptr[len])
+		len++;
+	return len;
+word_differs:
+	if (CPU_IS_LITTLE_ENDIAN())
+		len += (bsfw(v_word) >> 3);
+	else
+		len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
+	return len;
+}
+#endif /* LIB_MATCHFINDER_COMMON_H */

data/ext/libdeflate/libdeflate/lib/matchfinder_neon.h ADDED Viewed

@@ -0,0 +1,61 @@
+/*
+ * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
+ * SIMD) instructions
+ */
+#include <arm_neon.h>
+static forceinline bool
+matchfinder_init_neon(mf_pos_t *data, size_t size)
+{
+	int16x8_t v, *p;
+	size_t n;
+	if (size % sizeof(int16x8_t) * 4)
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = (int16x8_t) {
+		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
+		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
+		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
+	};
+	p = (int16x8_t *)data;
+	n = size / (sizeof(int16x8_t) * 4);
+	do {
+		p[0] = v;
+		p[1] = v;
+		p[2] = v;
+		p[3] = v;
+		p += 4;
+	} while (--n);
+	return true;
+}
+static forceinline bool
+matchfinder_rebase_neon(mf_pos_t *data, size_t size)
+{
+	int16x8_t v, *p;
+	size_t n;
+	if ((size % sizeof(int16x8_t) * 4 != 0))
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = (int16x8_t) {
+		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
+		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
+		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
+		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
+	};
+	p = (int16x8_t *)data;
+	n = size / (sizeof(int16x8_t) * 4);
+	do {
+		p[0] = vqaddq_s16(p[0], v);
+		p[1] = vqaddq_s16(p[1], v);
+		p[2] = vqaddq_s16(p[2], v);
+		p[3] = vqaddq_s16(p[3], v);
+		p += 4;
+	} while (--n);
+	return true;
+}

data/ext/libdeflate/libdeflate/lib/matchfinder_sse2.h ADDED Viewed

@@ -0,0 +1,53 @@
+/*
+ * matchfinder_sse2.h - matchfinding routines optimized for Intel SSE2
+ * (Streaming SIMD Extensions).
+ */
+#include <emmintrin.h>
+static forceinline bool
+matchfinder_init_sse2(mf_pos_t *data, size_t size)
+{
+	__m128i v, *p;
+	size_t n;
+	if (size % sizeof(__m128i) * 4)
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = _mm_set1_epi16(MATCHFINDER_INITVAL);
+	p = (__m128i *)data;
+	n = size / (sizeof(__m128i) * 4);
+	do {
+		p[0] = v;
+		p[1] = v;
+		p[2] = v;
+		p[3] = v;
+		p += 4;
+	} while (--n);
+	return true;
+}
+static forceinline bool
+matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
+{
+	__m128i v, *p;
+	size_t n;
+	if ((size % sizeof(__m128i) * 4 != 0))
+		return false;
+	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
+	v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
+	p = (__m128i *)data;
+	n = size / (sizeof(__m128i) * 4);
+	do {
+		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
+		p[0] = _mm_adds_epi16(p[0], v);
+		p[1] = _mm_adds_epi16(p[1], v);
+		p[2] = _mm_adds_epi16(p[2], v);
+		p[3] = _mm_adds_epi16(p[3], v);
+		p += 4;
+	} while (--n);
+	return true;
+}

data/ext/libdeflate/libdeflate/lib/unaligned.h ADDED Viewed

@@ -0,0 +1,202 @@
+/*
+ * unaligned.h - inline functions for unaligned memory accesses
+ */
+#ifndef LIB_UNALIGNED_H
+#define LIB_UNALIGNED_H
+#include "lib_common.h"
+/*
+ * Naming note:
+ *
+ * {load,store}_*_unaligned() deal with raw bytes without endianness conversion.
+ * {get,put}_unaligned_*() deal with a specific endianness.
+ */
+DEFINE_UNALIGNED_TYPE(u16)
+DEFINE_UNALIGNED_TYPE(u32)
+DEFINE_UNALIGNED_TYPE(u64)
+DEFINE_UNALIGNED_TYPE(machine_word_t)
+#define load_word_unaligned	load_machine_word_t_unaligned
+#define store_word_unaligned	store_machine_word_t_unaligned
+/***** Unaligned loads  *****/
+static forceinline u16
+get_unaligned_le16(const u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST)
+		return le16_bswap(load_u16_unaligned(p));
+	else
+		return ((u16)p[1] << 8) | p[0];
+}
+static forceinline u16
+get_unaligned_be16(const u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST)
+		return be16_bswap(load_u16_unaligned(p));
+	else
+		return ((u16)p[0] << 8) | p[1];
+}
+static forceinline u32
+get_unaligned_le32(const u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST)
+		return le32_bswap(load_u32_unaligned(p));
+	else
+		return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
+			((u32)p[1] << 8) | p[0];
+}
+static forceinline u32
+get_unaligned_be32(const u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST)
+		return be32_bswap(load_u32_unaligned(p));
+	else
+		return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
+			((u32)p[2] << 8) | p[3];
+}
+static forceinline u64
+get_unaligned_le64(const u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST)
+		return le64_bswap(load_u64_unaligned(p));
+	else
+		return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
+			((u64)p[5] << 40) | ((u64)p[4] << 32) |
+			((u64)p[3] << 24) | ((u64)p[2] << 16) |
+			((u64)p[1] << 8) | p[0];
+}
+static forceinline machine_word_t
+get_unaligned_leword(const u8 *p)
+{
+	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+	if (WORDBITS == 32)
+		return get_unaligned_le32(p);
+	else
+		return get_unaligned_le64(p);
+}
+/***** Unaligned stores  *****/
+static forceinline void
+put_unaligned_le16(u16 v, u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		store_u16_unaligned(le16_bswap(v), p);
+	} else {
+		p[0] = (u8)(v >> 0);
+		p[1] = (u8)(v >> 8);
+	}
+}
+static forceinline void
+put_unaligned_be16(u16 v, u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		store_u16_unaligned(be16_bswap(v), p);
+	} else {
+		p[0] = (u8)(v >> 8);
+		p[1] = (u8)(v >> 0);
+	}
+}
+static forceinline void
+put_unaligned_le32(u32 v, u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		store_u32_unaligned(le32_bswap(v), p);
+	} else {
+		p[0] = (u8)(v >> 0);
+		p[1] = (u8)(v >> 8);
+		p[2] = (u8)(v >> 16);
+		p[3] = (u8)(v >> 24);
+	}
+}
+static forceinline void
+put_unaligned_be32(u32 v, u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		store_u32_unaligned(be32_bswap(v), p);
+	} else {
+		p[0] = (u8)(v >> 24);
+		p[1] = (u8)(v >> 16);
+		p[2] = (u8)(v >> 8);
+		p[3] = (u8)(v >> 0);
+	}
+}
+static forceinline void
+put_unaligned_le64(u64 v, u8 *p)
+{
+	if (UNALIGNED_ACCESS_IS_FAST) {
+		store_u64_unaligned(le64_bswap(v), p);
+	} else {
+		p[0] = (u8)(v >> 0);
+		p[1] = (u8)(v >> 8);
+		p[2] = (u8)(v >> 16);
+		p[3] = (u8)(v >> 24);
+		p[4] = (u8)(v >> 32);
+		p[5] = (u8)(v >> 40);
+		p[6] = (u8)(v >> 48);
+		p[7] = (u8)(v >> 56);
+	}
+}
+static forceinline void
+put_unaligned_leword(machine_word_t v, u8 *p)
+{
+	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
+	if (WORDBITS == 32)
+		put_unaligned_le32(v, p);
+	else
+		put_unaligned_le64(v, p);
+}
+/***** 24-bit loads *****/
+/*
+ * Given a 32-bit value that was loaded with the platform's native endianness,
+ * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
+ * bits contain the first 3 bytes, arranged in octets in a platform-dependent
+ * order, at the memory location from which the input 32-bit value was loaded.
+ */
+static forceinline u32
+loaded_u32_to_u24(u32 v)
+{
+	if (CPU_IS_LITTLE_ENDIAN())
+		return v & 0xFFFFFF;
+	else
+		return v >> 8;
+}
+/*
+ * Load the next 3 bytes from the memory location @p into the 24 low-order bits
+ * of a 32-bit value.  The order in which the 3 bytes will be arranged as octets
+ * in the 24 bits is platform-dependent.  At least LOAD_U24_REQUIRED_NBYTES
+ * bytes must be available at @p; note that this may be more than 3.
+ */
+static forceinline u32
+load_u24_unaligned(const u8 *p)
+{
+#if UNALIGNED_ACCESS_IS_FAST
+#  define LOAD_U24_REQUIRED_NBYTES 4
+	return loaded_u32_to_u24(load_u32_unaligned(p));
+#else
+#  define LOAD_U24_REQUIRED_NBYTES 3
+	if (CPU_IS_LITTLE_ENDIAN())
+		return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
+	else
+		return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
+#endif
+}
+#endif /* LIB_UNALIGNED_H */