multi_compress 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@
6
6
  #include <brotli/encode.h>
7
7
  #include <lz4.h>
8
8
  #include <lz4hc.h>
9
+ #include <lz4frame.h>
9
10
  #include <pthread.h>
10
11
  #include <stdio.h>
11
12
  #include <stdint.h>
@@ -20,6 +21,20 @@
20
21
  #define RATIO_MIN_INPUT_BYTES 1024ULL
21
22
  #define DICT_FILE_MAX_SIZE (32ULL * 1024 * 1024)
22
23
 
24
+ #if defined(__GNUC__) || defined(__clang__)
25
+ #define MC_ALWAYS_INLINE static inline __attribute__((always_inline))
26
+ #else
27
+ #define MC_ALWAYS_INLINE static inline
28
+ #endif
29
+
30
+ #if defined(__GNUC__) || defined(__clang__)
31
+ #define MC_LIKELY(x) __builtin_expect(!!(x), 1)
32
+ #define MC_UNLIKELY(x) __builtin_expect(!!(x), 0)
33
+ #else
34
+ #define MC_LIKELY(x) (x)
35
+ #define MC_UNLIKELY(x) (x)
36
+ #endif
37
+
23
38
  typedef struct {
24
39
  size_t gvl_unlock_threshold;
25
40
  size_t fiber_yield_chunk;
@@ -62,7 +77,7 @@ static VALUE mBrotli;
62
77
  static rb_encoding *binary_encoding;
63
78
  static struct {
64
79
  ID zstd, lz4, brotli;
65
- ID algo, algorithm, level, dictionary, size;
80
+ ID algo, algorithm, level, dictionary, size, format, block, frame;
66
81
  ID max_output_size, max_ratio;
67
82
  ID fastest, default_, best;
68
83
  ID yield_, join;
@@ -71,11 +86,17 @@ static struct {
71
86
 
72
87
  static struct {
73
88
  VALUE zstd, lz4, brotli;
74
- VALUE algo, algorithm, level, dictionary, size;
89
+ VALUE algo, algorithm, level, dictionary, size, format, block, frame;
75
90
  VALUE max_output_size, max_ratio;
76
91
  } sym_cache;
77
92
 
78
93
  typedef enum { ALGO_ZSTD = 0, ALGO_LZ4 = 1, ALGO_BROTLI = 2 } compress_algo_t;
94
+ typedef enum { LZ4_FORMAT_BLOCK = 0, LZ4_FORMAT_FRAME = 1 } lz4_format_t;
95
+
96
+ #define MC_NUM_ALGOS 3
97
+
98
+ _Static_assert(ALGO_BROTLI == MC_NUM_ALGOS - 1,
99
+ "compress_algo_t must be contiguous [0..MC_NUM_ALGOS-1]");
79
100
 
80
101
  typedef struct dictionary_s dictionary_t;
81
102
  static const rb_data_type_t dictionary_type;
@@ -89,6 +110,9 @@ static void init_id_cache(void) {
89
110
  id_cache.level = rb_intern("level");
90
111
  id_cache.dictionary = rb_intern("dictionary");
91
112
  id_cache.size = rb_intern("size");
113
+ id_cache.format = rb_intern("format");
114
+ id_cache.block = rb_intern("block");
115
+ id_cache.frame = rb_intern("frame");
92
116
  id_cache.max_output_size = rb_intern("max_output_size");
93
117
  id_cache.max_ratio = rb_intern("max_ratio");
94
118
  id_cache.fastest = rb_intern("fastest");
@@ -106,6 +130,9 @@ static void init_id_cache(void) {
106
130
  sym_cache.level = ID2SYM(id_cache.level);
107
131
  sym_cache.dictionary = ID2SYM(id_cache.dictionary);
108
132
  sym_cache.size = ID2SYM(id_cache.size);
133
+ sym_cache.format = ID2SYM(id_cache.format);
134
+ sym_cache.block = ID2SYM(id_cache.block);
135
+ sym_cache.frame = ID2SYM(id_cache.frame);
109
136
  sym_cache.max_output_size = ID2SYM(id_cache.max_output_size);
110
137
  sym_cache.max_ratio = ID2SYM(id_cache.max_ratio);
111
138
  }
@@ -118,6 +145,30 @@ static inline VALUE opt_lookup2(VALUE opts, VALUE sym, VALUE default_value) {
118
145
  return NIL_P(opts) ? default_value : rb_hash_lookup2(opts, sym, default_value);
119
146
  }
120
147
 
148
+ enum { LZ4_FRAME_MAGIC_LEN = 4 };
149
+ static const uint8_t LZ4_FRAME_MAGIC[LZ4_FRAME_MAGIC_LEN] = {0x04, 0x22, 0x4D, 0x18};
150
+
151
+ static inline int is_lz4_frame_magic(const uint8_t *data, size_t len) {
152
+ return len >= LZ4_FRAME_MAGIC_LEN && memcmp(data, LZ4_FRAME_MAGIC, LZ4_FRAME_MAGIC_LEN) == 0;
153
+ }
154
+
155
+ static lz4_format_t parse_lz4_format(VALUE opts, compress_algo_t algo, int explicit_algo) {
156
+ VALUE format_val = opt_lookup2(opts, sym_cache.format, Qundef);
157
+ if (format_val == Qundef || NIL_P(format_val))
158
+ return LZ4_FORMAT_BLOCK;
159
+ if (explicit_algo && algo != ALGO_LZ4)
160
+ rb_raise(eUnsupportedError, "format is only supported for algo: :lz4");
161
+ if (!SYMBOL_P(format_val))
162
+ rb_raise(rb_eTypeError, "format must be a Symbol (:block or :frame)");
163
+ ID id = SYM2ID(format_val);
164
+ if (id == id_cache.block)
165
+ return LZ4_FORMAT_BLOCK;
166
+ if (id == id_cache.frame)
167
+ return LZ4_FORMAT_FRAME;
168
+ rb_raise(rb_eArgError, "Unknown LZ4 format: %s", rb_id2name(id));
169
+ return LZ4_FORMAT_BLOCK;
170
+ }
171
+
121
172
  static inline void reject_algorithm_keyword(VALUE opts) {
122
173
  if (NIL_P(opts))
123
174
  return;
@@ -161,11 +212,11 @@ static inline void dictionary_ivar_set(VALUE self, VALUE dictionary) {
161
212
  rb_ivar_set(self, id_cache.ivar_dictionary, dictionary);
162
213
  }
163
214
 
164
- static inline uint32_t read_le_u32(const uint8_t *p) {
215
+ MC_ALWAYS_INLINE uint32_t read_le_u32(const uint8_t *restrict p) {
165
216
  return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
166
217
  }
167
218
 
168
- static inline void write_le_u32(uint8_t *p, uint32_t v) {
219
+ MC_ALWAYS_INLINE void write_le_u32(uint8_t *restrict p, uint32_t v) {
169
220
  p[0] = (uint8_t)(v & 0xFF);
170
221
  p[1] = (uint8_t)((v >> 8) & 0xFF);
171
222
  p[2] = (uint8_t)((v >> 16) & 0xFF);
@@ -223,6 +274,8 @@ static const level_spec_t level_spec[] = {
223
274
  [ALGO_BROTLI] =
224
275
  {.min = 0, .max = 11, .fastest = 0, .default_ = 6, .best = 11, .name = "brotli"},
225
276
  };
277
+ _Static_assert(sizeof(level_spec) / sizeof(level_spec[0]) == MC_NUM_ALGOS,
278
+ "level_spec must cover every compress_algo_t value");
226
279
 
227
280
  static int resolve_level(compress_algo_t algo, VALUE level_val) {
228
281
  const level_spec_t *spec = &level_spec[algo];
@@ -241,7 +294,7 @@ static int resolve_level(compress_algo_t algo, VALUE level_val) {
241
294
  rb_raise(eLevelError, "Unknown named level: %s", rb_id2name(id));
242
295
  }
243
296
 
244
- int level = NUM2INT(level_val);
297
+ const int level = NUM2INT(level_val);
245
298
  if (level < spec->min || level > spec->max)
246
299
  rb_raise(eLevelError, "%s level must be %d..%d, got %d", spec->name, spec->min, spec->max,
247
300
  level);
@@ -249,23 +302,39 @@ static int resolve_level(compress_algo_t algo, VALUE level_val) {
249
302
  }
250
303
 
251
304
  static compress_algo_t detect_algo(const uint8_t *data, size_t len) {
252
- if (len >= 4) {
253
- if (data[0] == 0x28 && data[1] == 0xB5 && data[2] == 0x2F && data[3] == 0xFD) {
254
- return ALGO_ZSTD;
255
- }
305
+ enum { ZSTD_MAGIC_LEN = 4 };
306
+ static const uint8_t ZSTD_MAGIC[ZSTD_MAGIC_LEN] = {0x28, 0xB5, 0x2F, 0xFD};
307
+ enum { LZ4_BLOCK_SANITY_MAX = 256U * 1024 * 1024 };
308
+
309
+ if (is_lz4_frame_magic(data, len)) {
310
+ return ALGO_LZ4;
311
+ }
312
+
313
+ if (len >= ZSTD_MAGIC_LEN && memcmp(data, ZSTD_MAGIC, ZSTD_MAGIC_LEN) == 0) {
314
+ return ALGO_ZSTD;
256
315
  }
257
316
 
258
317
  if (len >= 12) {
259
- uint32_t orig = read_le_u32(data);
260
- uint32_t comp = read_le_u32(data + 4);
261
- if (orig > 0 && orig <= 256U * 1024 * 1024 && comp > 0 && comp <= 256U * 1024 * 1024 &&
262
- orig <= (uint32_t)INT_MAX && comp <= (uint32_t)LZ4_compressBound((int)orig) &&
263
- (size_t)8 + (size_t)comp + 4 == len) {
264
- size_t tail = 8 + (size_t)comp;
265
- if (data[tail] == 0 && data[tail + 1] == 0 && data[tail + 2] == 0 &&
266
- data[tail + 3] == 0) {
267
- return ALGO_LZ4;
318
+ size_t pos = 0;
319
+ int saw_block = 0;
320
+ while (pos + 4 <= len) {
321
+ uint32_t orig = read_le_u32(data + pos);
322
+ if (orig == 0) {
323
+ if (saw_block && pos + 4 == len)
324
+ return ALGO_LZ4;
325
+ break;
268
326
  }
327
+ if (pos + 8 > len)
328
+ break;
329
+ uint32_t comp = read_le_u32(data + pos + 4);
330
+ if (comp == 0 || orig > LZ4_BLOCK_SANITY_MAX || orig > (uint32_t)INT_MAX)
331
+ break;
332
+ if (comp > LZ4_BLOCK_SANITY_MAX || comp > (uint32_t)LZ4_compressBound((int)orig))
333
+ break;
334
+ if (pos + 8 + (size_t)comp > len)
335
+ break;
336
+ saw_block = 1;
337
+ pos += 8 + (size_t)comp;
269
338
  }
270
339
  }
271
340
 
@@ -347,8 +416,8 @@ static void parse_limits_from_opts(VALUE opts, limits_config_t *limits) {
347
416
  limits_config_apply_opts(opts, limits);
348
417
  }
349
418
 
350
- static size_t checked_add_size(size_t left, size_t right, const char *message) {
351
- if (SIZE_MAX - left < right)
419
+ static inline size_t checked_add_size(size_t left, size_t right, const char *message) {
420
+ if (MC_UNLIKELY(SIZE_MAX - left < right))
352
421
  rb_raise(eDataError, "%s", message);
353
422
  return left + right;
354
423
  }
@@ -361,10 +430,10 @@ static size_t ratio_limit_bytes(size_t total_input, unsigned long long max_ratio
361
430
  return total_input * (size_t)max_ratio;
362
431
  }
363
432
 
364
- static void enforce_output_and_ratio_limits(size_t total_output, size_t total_input,
365
- size_t max_output_size, int max_ratio_enabled,
366
- unsigned long long max_ratio) {
367
- if (total_output > max_output_size) {
433
+ static inline void enforce_output_and_ratio_limits(size_t total_output, size_t total_input,
434
+ size_t max_output_size, int max_ratio_enabled,
435
+ unsigned long long max_ratio) {
436
+ if (MC_UNLIKELY(total_output > max_output_size)) {
368
437
  rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)", max_output_size);
369
438
  }
370
439
 
@@ -372,7 +441,7 @@ static void enforce_output_and_ratio_limits(size_t total_output, size_t total_in
372
441
  return;
373
442
 
374
443
  size_t ratio_limit = ratio_limit_bytes(total_input, max_ratio);
375
- if (total_output > ratio_limit) {
444
+ if (MC_UNLIKELY(total_output > ratio_limit)) {
376
445
  size_t ratio = total_input == 0 ? 0 : (total_output / total_input);
377
446
  rb_raise(eDataError, "decompression ratio exceeds limit (ratio=%zu, max=%llu)", ratio,
378
447
  max_ratio);
@@ -401,12 +470,75 @@ static inline void run_without_gvl(void *(*func)(void *), void *arg) {
401
470
  typedef struct {
402
471
  void *(*func)(void *);
403
472
  void *arg;
404
-
405
- VALUE scheduler;
406
- VALUE blocker;
407
- VALUE fiber;
473
+ size_t arg_size;
474
+ VALUE thread;
408
475
  } fiber_worker_ctx_t;
409
476
 
477
+ typedef enum {
478
+ WORK_EXEC_DIRECT = 0,
479
+ WORK_EXEC_NOGVL = 1,
480
+ WORK_EXEC_FIBER = 2,
481
+ } work_exec_mode_t;
482
+
483
+ static void fiber_worker_mark(void *ptr) {
484
+ fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)ptr;
485
+ if (!c)
486
+ return;
487
+ rb_gc_mark(c->thread);
488
+ }
489
+
490
+ static void fiber_worker_free(void *ptr) {
491
+ fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)ptr;
492
+ if (!c)
493
+ return;
494
+ if (c->arg)
495
+ xfree(c->arg);
496
+ xfree(c);
497
+ }
498
+
499
+ static size_t fiber_worker_memsize(const void *ptr) {
500
+ const fiber_worker_ctx_t *c = (const fiber_worker_ctx_t *)ptr;
501
+ return sizeof(fiber_worker_ctx_t) + (c ? c->arg_size : 0);
502
+ }
503
+
504
+ static const rb_data_type_t fiber_worker_type = {
505
+ "MultiCompress/FiberWorker",
506
+ {fiber_worker_mark, fiber_worker_free, fiber_worker_memsize},
507
+ 0,
508
+ 0,
509
+ RUBY_TYPED_FREE_IMMEDIATELY};
510
+
511
+ static inline work_exec_mode_t select_fiber_or_direct_mode(VALUE scheduler, size_t work_size,
512
+ size_t fiber_threshold) {
513
+ if (scheduler != Qnil && work_size >= fiber_threshold)
514
+ return WORK_EXEC_FIBER;
515
+ return WORK_EXEC_DIRECT;
516
+ }
517
+
518
+ static inline work_exec_mode_t select_fiber_nogvl_or_direct_mode(VALUE scheduler, size_t work_size,
519
+ size_t fiber_threshold,
520
+ size_t nogvl_threshold) {
521
+ if (scheduler != Qnil && work_size >= fiber_threshold)
522
+ return WORK_EXEC_FIBER;
523
+ if (scheduler == Qnil && work_size >= nogvl_threshold)
524
+ return WORK_EXEC_NOGVL;
525
+ return WORK_EXEC_DIRECT;
526
+ }
527
+
528
+ static VALUE fiber_worker_new(void *(*func)(void *), const void *arg, size_t arg_size) {
529
+ fiber_worker_ctx_t *c;
530
+ VALUE worker = TypedData_Make_Struct(rb_cObject, fiber_worker_ctx_t, &fiber_worker_type, c);
531
+ memset(c, 0, sizeof(*c));
532
+ c->func = func;
533
+ c->arg_size = arg_size;
534
+ if (arg_size > 0) {
535
+ c->arg = xmalloc(arg_size);
536
+ memcpy(c->arg, arg, arg_size);
537
+ }
538
+ c->thread = Qnil;
539
+ return worker;
540
+ }
541
+
410
542
  static void *fiber_worker_nogvl(void *arg) {
411
543
  fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
412
544
  c->func(c->arg);
@@ -415,24 +547,55 @@ static void *fiber_worker_nogvl(void *arg) {
415
547
 
416
548
  static VALUE fiber_worker_thread(void *arg) {
417
549
  fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
418
- rb_thread_call_without_gvl(fiber_worker_nogvl, c, RUBY_UBF_PROCESS, NULL);
419
- rb_fiber_scheduler_unblock(c->scheduler, c->blocker, c->fiber);
550
+ rb_thread_call_without_gvl(fiber_worker_nogvl, c, unblock_noop, NULL);
551
+ return Qnil;
552
+ }
553
+
554
+ static VALUE fiber_worker_wait(VALUE worker) {
555
+ fiber_worker_ctx_t *c;
556
+ TypedData_Get_Struct(worker, fiber_worker_ctx_t, &fiber_worker_type, c);
557
+ c->thread = rb_thread_create(fiber_worker_thread, c);
558
+ join_thread(c->thread);
420
559
  return Qnil;
421
560
  }
422
561
 
423
- static void run_via_fiber_worker(VALUE scheduler, void *(*func)(void *), void *arg) {
424
- fiber_worker_ctx_t ctx = {
425
- .func = func,
426
- .arg = arg,
427
- .scheduler = scheduler,
428
- .blocker = rb_obj_alloc(rb_cObject),
429
- .fiber = rb_fiber_current(),
430
- };
431
- VALUE th = rb_thread_create(fiber_worker_thread, &ctx);
432
- rb_fiber_scheduler_block(scheduler, ctx.blocker, Qnil);
433
- join_thread(th);
562
+ static VALUE fiber_worker_join_ensure(VALUE worker) {
563
+ fiber_worker_ctx_t *c;
564
+ TypedData_Get_Struct(worker, fiber_worker_ctx_t, &fiber_worker_type, c);
565
+ if (!NIL_P(c->thread))
566
+ join_thread(c->thread);
567
+ return Qnil;
434
568
  }
435
569
 
570
+ static void run_via_fiber_worker(void *(*func)(void *), void *arg, size_t arg_size) {
571
+ VALUE worker = fiber_worker_new(func, arg, arg_size);
572
+ rb_ensure(fiber_worker_wait, worker, fiber_worker_join_ensure, worker);
573
+ fiber_worker_ctx_t *c;
574
+ TypedData_Get_Struct(worker, fiber_worker_ctx_t, &fiber_worker_type, c);
575
+ if (arg_size > 0)
576
+ memcpy(arg, c->arg, arg_size);
577
+ RB_GC_GUARD(worker);
578
+ }
579
+
580
+ static inline void run_with_exec_mode(work_exec_mode_t mode, void *(*func)(void *), void *arg,
581
+ size_t arg_size) {
582
+ switch (mode) {
583
+ case WORK_EXEC_FIBER:
584
+ run_via_fiber_worker(func, arg, arg_size);
585
+ break;
586
+ case WORK_EXEC_NOGVL:
587
+ run_without_gvl(func, arg);
588
+ break;
589
+ case WORK_EXEC_DIRECT:
590
+ default:
591
+ func(arg);
592
+ break;
593
+ }
594
+ }
595
+
596
+ #define RUN_VIA_FIBER_WORKER(func, arg) run_via_fiber_worker((func), &(arg), sizeof(arg))
597
+ #define RUN_WITH_EXEC_MODE(mode, func, arg) run_with_exec_mode((mode), (func), &(arg), sizeof(arg))
598
+
436
599
  static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_processed,
437
600
  size_t yield_chunk, int *did_yield) {
438
601
  *did_yield = 0;
@@ -449,6 +612,7 @@ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_pro
449
612
  }
450
613
 
451
614
  #define DICT_CDICT_CACHE_SIZE 4
615
+ _Static_assert(DICT_CDICT_CACHE_SIZE > 0, "CDict cache needs at least one slot");
452
616
 
453
617
  typedef struct {
454
618
  int level;
@@ -481,14 +645,18 @@ static void dict_free(void *ptr) {
481
645
 
482
646
  static size_t dict_memsize(const void *ptr) {
483
647
  const dictionary_t *d = (const dictionary_t *)ptr;
484
- size_t total = sizeof(dictionary_t) + d->size;
648
+ if (!d)
649
+ return 0;
485
650
 
486
- for (int i = 0; i < d->cdict_cache_count; i++) {
487
- if (d->cdict_cache[i].cdict)
488
- total += d->size + 4096;
651
+ size_t total = sizeof(dictionary_t) + d->size;
652
+ if (d->algo == ALGO_ZSTD) {
653
+ for (int i = 0; i < d->cdict_cache_count; i++) {
654
+ if (d->cdict_cache[i].cdict)
655
+ total += ZSTD_sizeof_CDict(d->cdict_cache[i].cdict);
656
+ }
657
+ if (d->ddict)
658
+ total += ZSTD_sizeof_DDict(d->ddict);
489
659
  }
490
- if (d->ddict)
491
- total += d->size + 4096;
492
660
  return total;
493
661
  }
494
662
 
@@ -609,9 +777,9 @@ typedef struct {
609
777
 
610
778
  static void *lz4_decompress_all_nogvl(void *arg) {
611
779
  lz4_decompress_all_args_t *a = (lz4_decompress_all_args_t *)arg;
612
- const uint8_t *src = a->src;
780
+ const uint8_t *restrict src = a->src;
613
781
  size_t slen = a->src_len;
614
- char *out_ptr = a->dst;
782
+ char *restrict out_ptr = a->dst;
615
783
  size_t out_offset = 0;
616
784
  size_t pos = 0;
617
785
 
@@ -623,13 +791,14 @@ static void *lz4_decompress_all_nogvl(void *arg) {
623
791
 
624
792
  int dsize = LZ4_decompress_safe((const char *)(src + pos + 8), out_ptr + out_offset,
625
793
  (int)comp_size, (int)orig_size);
626
- if (dsize < 0) {
794
+ if (MC_UNLIKELY(dsize < 0)) {
627
795
  a->error = 1;
628
- snprintf(a->err_msg, sizeof(a->err_msg), "lz4 decompress failed");
796
+ static const char kLz4FailMsg[] = "lz4 decompress failed";
797
+ memcpy(a->err_msg, kLz4FailMsg, sizeof(kLz4FailMsg));
629
798
  return NULL;
630
799
  }
631
800
 
632
- out_offset += dsize;
801
+ out_offset += (size_t)dsize;
633
802
  pos += 8 + comp_size;
634
803
  }
635
804
 
@@ -647,6 +816,24 @@ typedef struct {
647
816
  int result;
648
817
  } lz4_compress_args_t;
649
818
 
819
+ typedef struct {
820
+ const void *src;
821
+ size_t src_len;
822
+ void *dst;
823
+ size_t dst_cap;
824
+ size_t result;
825
+ size_t error_code;
826
+ } lz4frame_compress_args_t;
827
+
828
+ typedef struct {
829
+ const void *src;
830
+ size_t src_len;
831
+ void *dst;
832
+ size_t dst_cap;
833
+ size_t result;
834
+ size_t error_code;
835
+ } lz4frame_decompress_args_t;
836
+
650
837
  static void *lz4_compress_nogvl(void *arg) {
651
838
  lz4_compress_args_t *a = (lz4_compress_args_t *)arg;
652
839
  if (a->level > 1) {
@@ -666,6 +853,72 @@ typedef struct {
666
853
  BROTLI_BOOL result;
667
854
  } brotli_compress_args_t;
668
855
 
856
+ static void *lz4frame_compress_nogvl(void *arg) {
857
+ lz4frame_compress_args_t *a = (lz4frame_compress_args_t *)arg;
858
+ LZ4F_preferences_t prefs;
859
+ memset(&prefs, 0, sizeof(prefs));
860
+ prefs.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled;
861
+ prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
862
+ a->result = LZ4F_compressFrame(a->dst, a->dst_cap, a->src, a->src_len, &prefs);
863
+ a->error_code = LZ4F_isError(a->result) ? a->result : 0;
864
+ return NULL;
865
+ }
866
+
867
+ static void *lz4frame_decompress_nogvl(void *arg) {
868
+ lz4frame_decompress_args_t *a = (lz4frame_decompress_args_t *)arg;
869
+ LZ4F_dctx *dctx = NULL;
870
+ size_t rc = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
871
+ if (LZ4F_isError(rc)) {
872
+ a->result = 0;
873
+ a->error_code = rc;
874
+ return NULL;
875
+ }
876
+
877
+ const uint8_t *src = (const uint8_t *)a->src;
878
+ uint8_t *dst = (uint8_t *)a->dst;
879
+ size_t src_pos = 0;
880
+ size_t dst_pos = 0;
881
+ size_t hint = 1;
882
+
883
+ while (src_pos < a->src_len && hint != 0) {
884
+ size_t src_size = a->src_len - src_pos;
885
+ size_t dst_size = a->dst_cap - dst_pos;
886
+ rc = LZ4F_decompress(dctx, dst + dst_pos, &dst_size, src + src_pos, &src_size, NULL);
887
+ if (LZ4F_isError(rc)) {
888
+ a->result = 0;
889
+ a->error_code = rc;
890
+ LZ4F_freeDecompressionContext(dctx);
891
+ return NULL;
892
+ }
893
+ src_pos += src_size;
894
+ dst_pos += dst_size;
895
+ if (dst_pos > a->dst_cap) {
896
+ a->result = 0;
897
+ a->error_code = (size_t)-1;
898
+ LZ4F_freeDecompressionContext(dctx);
899
+ return NULL;
900
+ }
901
+ hint = rc;
902
+ if (src_size == 0 && dst_size == 0 && hint != 0)
903
+ break;
904
+ }
905
+
906
+ LZ4F_freeDecompressionContext(dctx);
907
+ if (hint != 0) {
908
+ if (dst_pos == a->dst_cap) {
909
+ a->result = 0;
910
+ a->error_code = (size_t)-1;
911
+ return NULL;
912
+ }
913
+ a->result = 0;
914
+ a->error_code = (size_t)-2;
915
+ return NULL;
916
+ }
917
+ a->result = dst_pos;
918
+ a->error_code = 0;
919
+ return NULL;
920
+ }
921
+
669
922
  static void *brotli_compress_nogvl(void *arg) {
670
923
  brotli_compress_args_t *a = (brotli_compress_args_t *)arg;
671
924
  a->result = BrotliEncoderCompress(a->level, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE,
@@ -695,64 +948,38 @@ typedef struct {
695
948
  size_t dst_cap;
696
949
  size_t result;
697
950
  int error;
698
-
699
- VALUE scheduler;
700
- VALUE blocker;
701
- VALUE fiber;
702
951
  } zstd_fiber_compress_t;
703
952
 
704
953
  typedef struct {
705
954
  ZSTD_CStream *cstream;
706
- ZSTD_inBuffer *input;
707
- ZSTD_outBuffer *output;
955
+ ZSTD_inBuffer input;
956
+ ZSTD_outBuffer output;
708
957
  size_t result;
709
-
710
- VALUE scheduler;
711
- VALUE blocker;
712
- VALUE fiber;
713
958
  } zstd_stream_chunk_fiber_t;
714
959
 
715
960
  static void *zstd_stream_chunk_fiber_nogvl(void *arg) {
716
961
  zstd_stream_chunk_fiber_t *a = (zstd_stream_chunk_fiber_t *)arg;
717
- a->result = ZSTD_compressStream(a->cstream, a->output, a->input);
962
+ a->result = ZSTD_compressStream(a->cstream, &a->output, &a->input);
718
963
  return NULL;
719
964
  }
720
965
 
721
- static VALUE zstd_stream_chunk_fiber_thread(void *arg) {
722
- zstd_stream_chunk_fiber_t *a = (zstd_stream_chunk_fiber_t *)arg;
723
- rb_thread_call_without_gvl(zstd_stream_chunk_fiber_nogvl, a, RUBY_UBF_PROCESS, NULL);
724
- rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
725
- return Qnil;
726
- }
727
-
728
966
  typedef struct {
729
967
  BrotliEncoderState *enc;
730
968
  BrotliEncoderOperation op;
731
- size_t *available_in;
732
- const uint8_t **next_in;
733
- size_t *available_out;
734
- uint8_t **next_out;
969
+ size_t available_in;
970
+ const uint8_t *next_in;
971
+ size_t available_out;
972
+ uint8_t *next_out;
735
973
  BROTLI_BOOL result;
736
-
737
- VALUE scheduler;
738
- VALUE blocker;
739
- VALUE fiber;
740
974
  } brotli_stream_chunk_fiber_t;
741
975
 
742
976
  static void *brotli_stream_chunk_fiber_nogvl(void *arg) {
743
977
  brotli_stream_chunk_fiber_t *a = (brotli_stream_chunk_fiber_t *)arg;
744
- a->result = BrotliEncoderCompressStream(a->enc, a->op, a->available_in, a->next_in,
745
- a->available_out, a->next_out, NULL);
978
+ a->result = BrotliEncoderCompressStream(a->enc, a->op, &a->available_in, &a->next_in,
979
+ &a->available_out, &a->next_out, NULL);
746
980
  return NULL;
747
981
  }
748
982
 
749
- static VALUE brotli_stream_chunk_fiber_thread(void *arg) {
750
- brotli_stream_chunk_fiber_t *a = (brotli_stream_chunk_fiber_t *)arg;
751
- rb_thread_call_without_gvl(brotli_stream_chunk_fiber_nogvl, a, RUBY_UBF_PROCESS, NULL);
752
- rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
753
- return Qnil;
754
- }
755
-
756
983
  typedef struct {
757
984
  size_t encoded_size;
758
985
  const uint8_t *encoded_buffer;
@@ -781,6 +1008,19 @@ static void *zstd_decompress_stream_chunk_nogvl(void *arg) {
781
1008
  return NULL;
782
1009
  }
783
1010
 
1011
+ typedef struct {
1012
+ ZSTD_DStream *dstream;
1013
+ ZSTD_outBuffer output;
1014
+ ZSTD_inBuffer input;
1015
+ size_t result;
1016
+ } zstd_decompress_stream_chunk_fiber_t;
1017
+
1018
+ static void *zstd_decompress_stream_chunk_fiber_nogvl(void *arg) {
1019
+ zstd_decompress_stream_chunk_fiber_t *a = (zstd_decompress_stream_chunk_fiber_t *)arg;
1020
+ a->result = ZSTD_decompressStream(a->dstream, &a->output, &a->input);
1021
+ return NULL;
1022
+ }
1023
+
784
1024
  typedef struct {
785
1025
  BrotliDecoderState *dec;
786
1026
  size_t *available_in;
@@ -797,6 +1037,22 @@ static void *brotli_decompress_stream_nogvl(void *arg) {
797
1037
  return NULL;
798
1038
  }
799
1039
 
1040
+ typedef struct {
1041
+ BrotliDecoderState *dec;
1042
+ size_t available_in;
1043
+ const uint8_t *next_in;
1044
+ size_t available_out;
1045
+ uint8_t *next_out;
1046
+ BrotliDecoderResult result;
1047
+ } brotli_decompress_stream_fiber_t;
1048
+
1049
+ static void *brotli_decompress_stream_fiber_nogvl(void *arg) {
1050
+ brotli_decompress_stream_fiber_t *a = (brotli_decompress_stream_fiber_t *)arg;
1051
+ a->result = BrotliDecoderDecompressStream(a->dec, &a->available_in, &a->next_in,
1052
+ &a->available_out, &a->next_out, NULL);
1053
+ return NULL;
1054
+ }
1055
+
800
1056
  static void *zstd_fiber_compress_nogvl(void *arg) {
801
1057
  zstd_fiber_compress_t *a = (zstd_fiber_compress_t *)arg;
802
1058
  if (a->cdict) {
@@ -814,13 +1070,6 @@ static void *zstd_fiber_compress_nogvl(void *arg) {
814
1070
  return NULL;
815
1071
  }
816
1072
 
817
- static VALUE zstd_fiber_compress_thread(void *arg) {
818
- zstd_fiber_compress_t *a = (zstd_fiber_compress_t *)arg;
819
- rb_thread_call_without_gvl(zstd_fiber_compress_nogvl, a, RUBY_UBF_PROCESS, NULL);
820
- rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
821
- return Qnil;
822
- }
823
-
824
1073
  static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
825
1074
  VALUE data, opts;
826
1075
  rb_scan_args(argc, argv, "1:", &data, &opts);
@@ -834,7 +1083,9 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
834
1083
  dict_val = opt_get(opts, sym_cache.dictionary);
835
1084
  }
836
1085
 
837
- compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1086
+ int explicit_algo = !NIL_P(algo_sym);
1087
+ compress_algo_t algo = explicit_algo ? sym_to_algo(algo_sym) : ALGO_ZSTD;
1088
+ lz4_format_t lz4_format = parse_lz4_format(opts, algo, explicit_algo);
838
1089
  int level = resolve_level(algo, level_val);
839
1090
 
840
1091
  dictionary_t *dict = NULL;
@@ -876,18 +1127,19 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
876
1127
  rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(csize));
877
1128
  rb_str_set_len(dst, (long)csize);
878
1129
  RB_GC_GUARD(data);
1130
+ RB_GC_GUARD(dict_val);
879
1131
  return dst;
880
1132
  }
881
1133
 
882
1134
  {
883
1135
  VALUE scheduler = current_fiber_scheduler();
884
- if (scheduler != Qnil) {
1136
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1137
+ scheduler, slen, policy->gvl_unlock_threshold, policy->gvl_unlock_threshold);
1138
+
1139
+ if (mode == WORK_EXEC_FIBER) {
885
1140
  char *out_buf = (char *)malloc(bound);
886
1141
  if (!out_buf)
887
1142
  rb_raise(eMemError, "zstd: malloc failed");
888
-
889
- VALUE blocker = rb_obj_alloc(rb_cObject);
890
-
891
1143
  zstd_fiber_compress_t fargs = {
892
1144
  .src = src,
893
1145
  .src_len = slen,
@@ -897,14 +1149,9 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
897
1149
  .dst_cap = bound,
898
1150
  .result = 0,
899
1151
  .error = 0,
900
- .scheduler = scheduler,
901
- .blocker = blocker,
902
- .fiber = rb_fiber_current(),
903
1152
  };
904
1153
 
905
- VALUE rb_thread = rb_thread_create(zstd_fiber_compress_thread, &fargs);
906
- rb_fiber_scheduler_block(scheduler, blocker, Qnil);
907
- join_thread(rb_thread);
1154
+ RUN_WITH_EXEC_MODE(mode, zstd_fiber_compress_nogvl, fargs);
908
1155
 
909
1156
  if (fargs.error) {
910
1157
  free(out_buf);
@@ -918,6 +1165,7 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
918
1165
  VALUE result = rb_binary_str_new(out_buf, (long)fargs.result);
919
1166
  free(out_buf);
920
1167
  RB_GC_GUARD(data);
1168
+ RB_GC_GUARD(dict_val);
921
1169
  return result;
922
1170
  }
923
1171
  }
@@ -934,7 +1182,7 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
934
1182
  .result = 0,
935
1183
  .error = 0,
936
1184
  };
937
- run_without_gvl(zstd_compress_nogvl, &args);
1185
+ RUN_WITH_EXEC_MODE(WORK_EXEC_NOGVL, zstd_compress_nogvl, args);
938
1186
 
939
1187
  if (args.error)
940
1188
  rb_raise(eMemError, "zstd: failed to create context");
@@ -943,10 +1191,39 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
943
1191
 
944
1192
  rb_str_set_len(dst, (long)args.result);
945
1193
  RB_GC_GUARD(data);
1194
+ RB_GC_GUARD(dict_val);
946
1195
  return dst;
947
1196
  }
948
1197
  }
949
1198
  case ALGO_LZ4: {
1199
+ if (lz4_format == LZ4_FORMAT_FRAME) {
1200
+ LZ4F_preferences_t prefs;
1201
+ memset(&prefs, 0, sizeof(prefs));
1202
+ prefs.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled;
1203
+ prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
1204
+ size_t bound = LZ4F_compressFrameBound(slen, &prefs);
1205
+ VALUE dst = rb_binary_str_buf_reserve((long)bound);
1206
+ lz4frame_compress_args_t args = {
1207
+ .src = src,
1208
+ .src_len = slen,
1209
+ .dst = RSTRING_PTR(dst),
1210
+ .dst_cap = bound,
1211
+ .result = 0,
1212
+ .error_code = 0,
1213
+ };
1214
+ {
1215
+ VALUE scheduler = current_fiber_scheduler();
1216
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1217
+ scheduler, slen, policy->gvl_unlock_threshold, policy->gvl_unlock_threshold);
1218
+ RUN_WITH_EXEC_MODE(mode, lz4frame_compress_nogvl, args);
1219
+ }
1220
+ if (args.error_code)
1221
+ rb_raise(eError, "lz4 frame compress failed: %s",
1222
+ LZ4F_getErrorName(args.error_code));
1223
+ rb_str_set_len(dst, (long)args.result);
1224
+ RB_GC_GUARD(data);
1225
+ return dst;
1226
+ }
950
1227
  if (slen > (size_t)INT_MAX)
951
1228
  rb_raise(eError, "lz4: input too large (max 2GB)");
952
1229
  int bound = LZ4_compressBound((int)slen);
@@ -967,11 +1244,9 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
967
1244
  };
968
1245
 
969
1246
  VALUE scheduler = current_fiber_scheduler();
970
- if (scheduler != Qnil) {
971
- run_via_fiber_worker(scheduler, lz4_compress_nogvl, &args);
972
- } else {
973
- run_without_gvl(lz4_compress_nogvl, &args);
974
- }
1247
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1248
+ scheduler, slen, policy->gvl_unlock_threshold, policy->gvl_unlock_threshold);
1249
+ RUN_WITH_EXEC_MODE(mode, lz4_compress_nogvl, args);
975
1250
  csize = args.result;
976
1251
 
977
1252
  if (csize <= 0)
@@ -1052,6 +1327,7 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1052
1327
 
1053
1328
  rb_str_set_len(dst, initial_out - available_out);
1054
1329
  RB_GC_GUARD(data);
1330
+ RB_GC_GUARD(dict_val);
1055
1331
  return dst;
1056
1332
  } else if (slen >= policy->gvl_unlock_threshold) {
1057
1333
  VALUE dst = rb_binary_str_buf_reserve(out_len);
@@ -1066,11 +1342,9 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1066
1342
  };
1067
1343
 
1068
1344
  VALUE scheduler = current_fiber_scheduler();
1069
- if (scheduler != Qnil) {
1070
- run_via_fiber_worker(scheduler, brotli_compress_nogvl, &args);
1071
- } else {
1072
- run_without_gvl(brotli_compress_nogvl, &args);
1073
- }
1345
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1346
+ scheduler, slen, policy->gvl_unlock_threshold, policy->gvl_unlock_threshold);
1347
+ RUN_WITH_EXEC_MODE(mode, brotli_compress_nogvl, args);
1074
1348
 
1075
1349
  if (!args.result)
1076
1350
  rb_raise(eError, "brotli compress failed");
@@ -1112,12 +1386,14 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1112
1386
  const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1113
1387
  size_t slen = RSTRING_LEN(data);
1114
1388
 
1389
+ int explicit_algo = !NIL_P(algo_sym);
1115
1390
  compress_algo_t algo;
1116
- if (NIL_P(algo_sym)) {
1391
+ if (!explicit_algo) {
1117
1392
  algo = detect_algo(src, slen);
1118
1393
  } else {
1119
1394
  algo = sym_to_algo(algo_sym);
1120
1395
  }
1396
+ lz4_format_t lz4_format = parse_lz4_format(opts, algo, explicit_algo);
1121
1397
 
1122
1398
  const algo_policy_t *policy = algo_policy(algo);
1123
1399
 
@@ -1167,11 +1443,10 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1167
1443
  };
1168
1444
 
1169
1445
  VALUE scheduler = current_fiber_scheduler();
1170
- if (scheduler != Qnil) {
1171
- run_via_fiber_worker(scheduler, zstd_decompress_nogvl, &args);
1172
- } else {
1173
- run_without_gvl(zstd_decompress_nogvl, &args);
1174
- }
1446
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1447
+ scheduler, frame_size, policy->gvl_unlock_threshold,
1448
+ policy->gvl_unlock_threshold);
1449
+ RUN_WITH_EXEC_MODE(mode, zstd_decompress_nogvl, args);
1175
1450
 
1176
1451
  if (args.error)
1177
1452
  rb_raise(eMemError, "zstd: failed to create dctx");
@@ -1183,6 +1458,7 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1183
1458
  limits.max_ratio_enabled, limits.max_ratio);
1184
1459
  rb_str_set_len(dst, (long)dsize);
1185
1460
  RB_GC_GUARD(data);
1461
+ RB_GC_GUARD(dict_val);
1186
1462
  return dst;
1187
1463
  } else {
1188
1464
  VALUE dst = rb_binary_str_buf_reserve((size_t)frame_size);
@@ -1207,6 +1483,7 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1207
1483
  limits.max_ratio_enabled, limits.max_ratio);
1208
1484
  rb_str_set_len(dst, dsize);
1209
1485
  RB_GC_GUARD(data);
1486
+ RB_GC_GUARD(dict_val);
1210
1487
  return dst;
1211
1488
  }
1212
1489
  }
@@ -1278,9 +1555,57 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1278
1555
  ZSTD_freeDCtx(dctx);
1279
1556
  rb_str_set_len(dst, total_out);
1280
1557
  RB_GC_GUARD(data);
1558
+ RB_GC_GUARD(dict_val);
1281
1559
  return dst;
1282
1560
  }
1283
1561
  case ALGO_LZ4: {
1562
+ if (lz4_format == LZ4_FORMAT_FRAME || is_lz4_frame_magic(src, slen)) {
1563
+ size_t alloc_size =
1564
+ (slen > limits.max_output_size / 4) ? limits.max_output_size : slen * 4;
1565
+ if (alloc_size < 4096)
1566
+ alloc_size = limits.max_output_size < 4096 ? limits.max_output_size : 4096;
1567
+ if (alloc_size == 0)
1568
+ alloc_size = limits.max_output_size;
1569
+ VALUE dst = rb_binary_str_buf_reserve((long)alloc_size);
1570
+ while (1) {
1571
+ lz4frame_decompress_args_t args = {
1572
+ .src = src,
1573
+ .src_len = slen,
1574
+ .dst = RSTRING_PTR(dst),
1575
+ .dst_cap = alloc_size,
1576
+ .result = 0,
1577
+ .error_code = 0,
1578
+ };
1579
+ {
1580
+ VALUE scheduler = current_fiber_scheduler();
1581
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1582
+ scheduler, slen, policy->gvl_unlock_threshold,
1583
+ policy->gvl_unlock_threshold);
1584
+ RUN_WITH_EXEC_MODE(mode, lz4frame_decompress_nogvl, args);
1585
+ }
1586
+ if (args.error_code == (size_t)-1) {
1587
+ if (alloc_size >= limits.max_output_size)
1588
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1589
+ limits.max_output_size);
1590
+ size_t next_cap = alloc_size * 2;
1591
+ if (next_cap > limits.max_output_size)
1592
+ next_cap = limits.max_output_size;
1593
+ alloc_size = next_cap;
1594
+ grow_binary_str(dst, 0, alloc_size);
1595
+ continue;
1596
+ }
1597
+ if (args.error_code == (size_t)-2)
1598
+ rb_raise(eDataError, "lz4 frame decompress failed: truncated frame");
1599
+ if (args.error_code)
1600
+ rb_raise(eDataError, "lz4 frame decompress failed: %s",
1601
+ LZ4F_getErrorName(args.error_code));
1602
+ enforce_output_and_ratio_limits(args.result, slen, limits.max_output_size,
1603
+ limits.max_ratio_enabled, limits.max_ratio);
1604
+ rb_str_set_len(dst, (long)args.result);
1605
+ RB_GC_GUARD(data);
1606
+ return dst;
1607
+ }
1608
+ }
1284
1609
  if (slen < 4)
1285
1610
  rb_raise(eDataError, "lz4: data too short");
1286
1611
 
@@ -1318,15 +1643,12 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1318
1643
  .error = 0,
1319
1644
  };
1320
1645
 
1321
- if (total_orig >= algo_policy(ALGO_LZ4)->gvl_unlock_threshold) {
1646
+ {
1322
1647
  VALUE scheduler = current_fiber_scheduler();
1323
- if (scheduler != Qnil) {
1324
- run_via_fiber_worker(scheduler, lz4_decompress_all_nogvl, &args);
1325
- } else {
1326
- run_without_gvl(lz4_decompress_all_nogvl, &args);
1327
- }
1328
- } else {
1329
- lz4_decompress_all_nogvl(&args);
1648
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
1649
+ scheduler, total_orig, algo_policy(ALGO_LZ4)->gvl_unlock_threshold,
1650
+ algo_policy(ALGO_LZ4)->gvl_unlock_threshold);
1651
+ RUN_WITH_EXEC_MODE(mode, lz4_decompress_all_nogvl, args);
1330
1652
  }
1331
1653
 
1332
1654
  if (args.error)
@@ -1376,16 +1698,21 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1376
1698
  available_out = remaining_budget;
1377
1699
  uint8_t *next_out = (uint8_t *)RSTRING_PTR(dst) + total_out;
1378
1700
 
1379
- if (scheduler != Qnil && available_in >= policy->fiber_stream_threshold) {
1380
- brotli_decompress_stream_args_t sargs = {
1701
+ if (select_fiber_or_direct_mode(scheduler, available_in,
1702
+ policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
1703
+ brotli_decompress_stream_fiber_t sargs = {
1381
1704
  .dec = dec,
1382
- .available_in = &available_in,
1383
- .next_in = &next_in,
1384
- .available_out = &available_out,
1385
- .next_out = &next_out,
1705
+ .available_in = available_in,
1706
+ .next_in = next_in,
1707
+ .available_out = available_out,
1708
+ .next_out = next_out,
1386
1709
  .result = BROTLI_DECODER_RESULT_ERROR,
1387
1710
  };
1388
- run_via_fiber_worker(scheduler, brotli_decompress_stream_nogvl, &sargs);
1711
+ RUN_VIA_FIBER_WORKER(brotli_decompress_stream_fiber_nogvl, sargs);
1712
+ available_in = sargs.available_in;
1713
+ next_in = sargs.next_in;
1714
+ available_out = sargs.available_out;
1715
+ next_out = sargs.next_out;
1389
1716
  res = sargs.result;
1390
1717
  } else {
1391
1718
  res = BrotliDecoderDecompressStream(dec, &available_in, &next_in, &available_out,
@@ -1417,6 +1744,7 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1417
1744
  }
1418
1745
  rb_str_set_len(dst, total_out);
1419
1746
  RB_GC_GUARD(data);
1747
+ RB_GC_GUARD(dict_val);
1420
1748
  return dst;
1421
1749
  }
1422
1750
  }
@@ -1450,14 +1778,14 @@ static void crc32_init_tables(void) {
1450
1778
  crc32_tables_initialized = 1;
1451
1779
  }
1452
1780
 
1453
- static uint32_t crc32_compute(const uint8_t *data, size_t len, uint32_t crc) {
1781
+ static uint32_t crc32_compute(const uint8_t *restrict data, size_t len, uint32_t crc) {
1454
1782
  crc = ~crc;
1455
1783
 
1456
1784
  while (len >= 8) {
1457
- uint32_t val0 = crc ^ ((uint32_t)data[0] | ((uint32_t)data[1] << 8) |
1458
- ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24));
1459
- uint32_t val1 = (uint32_t)data[4] | ((uint32_t)data[5] << 8) | ((uint32_t)data[6] << 16) |
1460
- ((uint32_t)data[7] << 24);
1785
+ const uint32_t val0 = crc ^ ((uint32_t)data[0] | ((uint32_t)data[1] << 8) |
1786
+ ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24));
1787
+ const uint32_t val1 = (uint32_t)data[4] | ((uint32_t)data[5] << 8) |
1788
+ ((uint32_t)data[6] << 16) | ((uint32_t)data[7] << 24);
1461
1789
 
1462
1790
  crc = crc32_tables[7][(val0) & 0xFF] ^ crc32_tables[6][(val0 >> 8) & 0xFF] ^
1463
1791
  crc32_tables[5][(val0 >> 16) & 0xFF] ^ crc32_tables[4][(val0 >> 24) & 0xFF] ^
@@ -1488,28 +1816,49 @@ static VALUE compress_crc32(int argc, VALUE *argv, VALUE self) {
1488
1816
  }
1489
1817
 
1490
1818
  static VALUE compress_adler32(int argc, VALUE *argv, VALUE self) {
1819
+ (void)self;
1491
1820
  VALUE data, prev;
1492
1821
  rb_scan_args(argc, argv, "11", &data, &prev);
1493
1822
  StringValue(data);
1494
1823
 
1495
- const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1496
- size_t len = RSTRING_LEN(data);
1497
- uint32_t adler = NIL_P(prev) ? 1 : NUM2UINT(prev);
1824
+ const uint8_t *restrict src = (const uint8_t *)RSTRING_PTR(data);
1825
+ size_t len = (size_t)RSTRING_LEN(data);
1826
+ const uint32_t adler = NIL_P(prev) ? 1u : NUM2UINT(prev);
1498
1827
 
1499
- uint32_t s1 = adler & 0xFFFF;
1500
- uint32_t s2 = (adler >> 16) & 0xFFFF;
1501
- const uint32_t BASE = 65521;
1828
+ uint32_t s1 = adler & 0xFFFFu;
1829
+ uint32_t s2 = (adler >> 16) & 0xFFFFu;
1830
+ enum { ADLER_BASE = 65521, ADLER_NMAX = 5552 };
1502
1831
 
1503
1832
  while (len > 0) {
1504
- size_t chunk = len > 5552 ? 5552 : len;
1833
+ size_t chunk = len > ADLER_NMAX ? (size_t)ADLER_NMAX : len;
1505
1834
  len -= chunk;
1506
- for (size_t i = 0; i < chunk; i++) {
1507
- s1 += src[i];
1835
+
1836
+ while (chunk >= 8) {
1837
+ s1 += src[0];
1838
+ s2 += s1;
1839
+ s1 += src[1];
1840
+ s2 += s1;
1841
+ s1 += src[2];
1842
+ s2 += s1;
1843
+ s1 += src[3];
1508
1844
  s2 += s1;
1845
+ s1 += src[4];
1846
+ s2 += s1;
1847
+ s1 += src[5];
1848
+ s2 += s1;
1849
+ s1 += src[6];
1850
+ s2 += s1;
1851
+ s1 += src[7];
1852
+ s2 += s1;
1853
+ src += 8;
1854
+ chunk -= 8;
1509
1855
  }
1510
- s1 %= BASE;
1511
- s2 %= BASE;
1512
- src += chunk;
1856
+ while (chunk--) {
1857
+ s1 += *src++;
1858
+ s2 += s1;
1859
+ }
1860
+ s1 %= ADLER_BASE;
1861
+ s2 %= ADLER_BASE;
1513
1862
  }
1514
1863
 
1515
1864
  return UINT2NUM((s2 << 16) | s1);
@@ -1545,6 +1894,9 @@ static VALUE compress_version(VALUE self, VALUE algo_sym) {
1545
1894
  #define LZ4_RING_BUFFER_SIZE (64 * 1024)
1546
1895
  #define LZ4_RING_BUFFER_TOTAL (LZ4_RING_BUFFER_SIZE * 2)
1547
1896
 
1897
+ _Static_assert(LZ4_RING_BUFFER_TOTAL == 2 * LZ4_RING_BUFFER_SIZE,
1898
+ "ring buffer total must be exactly twice the window size");
1899
+
1548
1900
  typedef struct {
1549
1901
  compress_algo_t algo;
1550
1902
  int level;
@@ -1589,9 +1941,22 @@ static void deflater_free(void *ptr) {
1589
1941
 
1590
1942
  static size_t deflater_memsize(const void *ptr) {
1591
1943
  const deflater_t *d = (const deflater_t *)ptr;
1944
+ if (!d)
1945
+ return 0;
1946
+
1592
1947
  size_t s = sizeof(deflater_t);
1593
- if (d->lz4_ring.buf)
1594
- s += LZ4_RING_BUFFER_TOTAL;
1948
+ switch (d->algo) {
1949
+ case ALGO_ZSTD:
1950
+ if (d->ctx.zstd)
1951
+ s += ZSTD_sizeof_CStream(d->ctx.zstd);
1952
+ break;
1953
+ case ALGO_BROTLI:
1954
+ break;
1955
+ case ALGO_LZ4:
1956
+ if (d->lz4_ring.buf)
1957
+ s += LZ4_RING_BUFFER_TOTAL;
1958
+ break;
1959
+ }
1595
1960
  return s;
1596
1961
  }
1597
1962
 
@@ -1711,7 +2076,12 @@ static VALUE lz4_compress_ring_block(deflater_t *d) {
1711
2076
 
1712
2077
  write_le_u32((uint8_t *)out, (uint32_t)src_size);
1713
2078
 
1714
- int csize = LZ4_compress_fast_continue(d->ctx.lz4, block_start, out + 8, src_size, bound, 1);
2079
+ int csize;
2080
+ if (d->level > 1) {
2081
+ csize = LZ4_compress_HC(block_start, out + 8, src_size, bound, d->level);
2082
+ } else {
2083
+ csize = LZ4_compress_default(block_start, out + 8, src_size, bound);
2084
+ }
1715
2085
  if (csize <= 0)
1716
2086
  rb_raise(eError, "lz4 stream compress block failed");
1717
2087
 
@@ -1759,37 +2129,41 @@ static VALUE deflater_write(VALUE self, VALUE chunk) {
1759
2129
 
1760
2130
  ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, out_cap, 0};
1761
2131
 
1762
- if (scheduler != Qnil && (input.size - input.pos) >= policy->fiber_stream_threshold) {
1763
- zstd_stream_chunk_fiber_t fargs = {
1764
- .cstream = d->ctx.zstd,
1765
- .input = &input,
1766
- .output = &output,
1767
- .result = 0,
1768
- .scheduler = scheduler,
1769
- .blocker = rb_obj_alloc(rb_cObject),
1770
- .fiber = rb_fiber_current(),
1771
- };
1772
- VALUE th = rb_thread_create(zstd_stream_chunk_fiber_thread, &fargs);
1773
- rb_fiber_scheduler_block(scheduler, fargs.blocker, Qnil);
1774
- join_thread(th);
1775
-
1776
- if (ZSTD_isError(fargs.result))
1777
- rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(fargs.result));
1778
- } else if (scheduler == Qnil &&
1779
- (input.size - input.pos) >= policy->gvl_unlock_threshold) {
1780
- zstd_stream_chunk_args_t args = {
1781
- .cstream = d->ctx.zstd,
1782
- .output = &output,
1783
- .input = &input,
1784
- .result = 0,
1785
- };
1786
- run_without_gvl(zstd_compress_stream_chunk_nogvl, &args);
1787
- if (ZSTD_isError(args.result))
1788
- rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(args.result));
1789
- } else {
1790
- size_t ret = ZSTD_compressStream(d->ctx.zstd, &output, &input);
1791
- if (ZSTD_isError(ret))
1792
- rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(ret));
2132
+ {
2133
+ work_exec_mode_t mode = select_fiber_nogvl_or_direct_mode(
2134
+ scheduler, input.size - input.pos, policy->fiber_stream_threshold,
2135
+ policy->gvl_unlock_threshold);
2136
+
2137
+ if (mode == WORK_EXEC_FIBER) {
2138
+ zstd_stream_chunk_fiber_t fargs = {
2139
+ .cstream = d->ctx.zstd,
2140
+ .input = input,
2141
+ .output = output,
2142
+ .result = 0,
2143
+ };
2144
+ RUN_WITH_EXEC_MODE(mode, zstd_stream_chunk_fiber_nogvl, fargs);
2145
+ input.pos = fargs.input.pos;
2146
+ output.pos = fargs.output.pos;
2147
+
2148
+ if (ZSTD_isError(fargs.result))
2149
+ rb_raise(eError, "zstd compress stream: %s",
2150
+ ZSTD_getErrorName(fargs.result));
2151
+ } else if (mode == WORK_EXEC_NOGVL) {
2152
+ zstd_stream_chunk_args_t args = {
2153
+ .cstream = d->ctx.zstd,
2154
+ .output = &output,
2155
+ .input = &input,
2156
+ .result = 0,
2157
+ };
2158
+ RUN_WITH_EXEC_MODE(mode, zstd_compress_stream_chunk_nogvl, args);
2159
+ if (ZSTD_isError(args.result))
2160
+ rb_raise(eError, "zstd compress stream: %s",
2161
+ ZSTD_getErrorName(args.result));
2162
+ } else {
2163
+ size_t ret = ZSTD_compressStream(d->ctx.zstd, &output, &input);
2164
+ if (ZSTD_isError(ret))
2165
+ rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(ret));
2166
+ }
1793
2167
  }
1794
2168
  result_len += output.pos;
1795
2169
  }
@@ -1814,22 +2188,23 @@ static VALUE deflater_write(VALUE self, VALUE chunk) {
1814
2188
  uint8_t *next_out = NULL;
1815
2189
  BROTLI_BOOL ok;
1816
2190
 
1817
- if (use_fiber && available_in >= policy->fiber_stream_threshold) {
2191
+ if (use_fiber &&
2192
+ select_fiber_or_direct_mode(scheduler, available_in,
2193
+ policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
1818
2194
  brotli_stream_chunk_fiber_t fargs = {
1819
2195
  .enc = d->ctx.brotli,
1820
2196
  .op = BROTLI_OPERATION_PROCESS,
1821
- .available_in = &available_in,
1822
- .next_in = &next_in,
1823
- .available_out = &available_out,
1824
- .next_out = &next_out,
2197
+ .available_in = available_in,
2198
+ .next_in = next_in,
2199
+ .available_out = available_out,
2200
+ .next_out = next_out,
1825
2201
  .result = BROTLI_FALSE,
1826
- .scheduler = scheduler,
1827
- .blocker = rb_obj_alloc(rb_cObject),
1828
- .fiber = rb_fiber_current(),
1829
2202
  };
1830
- VALUE th = rb_thread_create(brotli_stream_chunk_fiber_thread, &fargs);
1831
- rb_fiber_scheduler_block(scheduler, fargs.blocker, Qnil);
1832
- join_thread(th);
2203
+ RUN_VIA_FIBER_WORKER(brotli_stream_chunk_fiber_nogvl, fargs);
2204
+ available_in = fargs.available_in;
2205
+ next_in = fargs.next_in;
2206
+ available_out = fargs.available_out;
2207
+ next_out = fargs.next_out;
1833
2208
  ok = fargs.result;
1834
2209
  } else {
1835
2210
  ok = BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_PROCESS,
@@ -2221,7 +2596,22 @@ static void inflater_free(void *ptr) {
2221
2596
 
2222
2597
  static size_t inflater_memsize(const void *ptr) {
2223
2598
  const inflater_t *inf = (const inflater_t *)ptr;
2224
- return sizeof(inflater_t) + inf->lz4_buf.cap;
2599
+ if (!inf)
2600
+ return 0;
2601
+
2602
+ size_t s = sizeof(inflater_t);
2603
+ switch (inf->algo) {
2604
+ case ALGO_ZSTD:
2605
+ if (inf->ctx.zstd)
2606
+ s += ZSTD_sizeof_DStream(inf->ctx.zstd);
2607
+ break;
2608
+ case ALGO_BROTLI:
2609
+ break;
2610
+ case ALGO_LZ4:
2611
+ s += inf->lz4_buf.cap;
2612
+ break;
2613
+ }
2614
+ return s;
2225
2615
  }
2226
2616
 
2227
2617
  static const rb_data_type_t inflater_type = {"Compress::Inflater",
@@ -2317,8 +2707,7 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2317
2707
  if (slen == 0)
2318
2708
  return rb_binary_str_new("", 0);
2319
2709
 
2320
- inf->total_input =
2321
- checked_add_size(inf->total_input, slen, "compressed input exceeds representable size");
2710
+ size_t input_accounted_before = inf->total_input;
2322
2711
 
2323
2712
  switch (inf->algo) {
2324
2713
  case ALGO_ZSTD: {
@@ -2357,14 +2746,17 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2357
2746
  ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, current_out_cap, 0};
2358
2747
  size_t ret;
2359
2748
 
2360
- if (scheduler != Qnil && (input.size - input.pos) >= policy->fiber_stream_threshold) {
2361
- zstd_decompress_stream_chunk_args_t args = {
2749
+ if (select_fiber_or_direct_mode(scheduler, input.size - input.pos,
2750
+ policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
2751
+ zstd_decompress_stream_chunk_fiber_t args = {
2362
2752
  .dstream = inf->ctx.zstd,
2363
- .output = &output,
2364
- .input = &input,
2753
+ .output = output,
2754
+ .input = input,
2365
2755
  .result = 0,
2366
2756
  };
2367
- run_via_fiber_worker(scheduler, zstd_decompress_stream_chunk_nogvl, &args);
2757
+ RUN_VIA_FIBER_WORKER(zstd_decompress_stream_chunk_fiber_nogvl, args);
2758
+ output.pos = args.output.pos;
2759
+ input.pos = args.input.pos;
2368
2760
  ret = args.result;
2369
2761
  } else {
2370
2762
  ret = ZSTD_decompressStream(inf->ctx.zstd, &output, &input);
@@ -2376,11 +2768,15 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2376
2768
  "decompressed output exceeds representable size");
2377
2769
  size_t total_output = checked_add_size(
2378
2770
  inf->total_output, result_len, "decompressed output exceeds representable size");
2379
- enforce_output_and_ratio_limits(total_output, inf->total_input, inf->max_output_size,
2771
+ size_t total_input = checked_add_size(input_accounted_before, input.pos,
2772
+ "compressed input exceeds representable size");
2773
+ enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
2380
2774
  inf->max_ratio_enabled, inf->max_ratio);
2381
2775
  if (ret == 0)
2382
2776
  break;
2383
2777
  }
2778
+ inf->total_input = checked_add_size(input_accounted_before, input.pos,
2779
+ "compressed input exceeds representable size");
2384
2780
  inf->total_output = checked_add_size(inf->total_output, result_len,
2385
2781
  "decompressed output exceeds representable size");
2386
2782
  rb_str_set_len(result, result_len);
@@ -2409,16 +2805,21 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2409
2805
  uint8_t *next_out = NULL;
2410
2806
  BrotliDecoderResult res;
2411
2807
 
2412
- if (scheduler != Qnil && available_in >= policy->fiber_stream_threshold) {
2413
- brotli_decompress_stream_args_t sargs = {
2808
+ if (select_fiber_or_direct_mode(scheduler, available_in,
2809
+ policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
2810
+ brotli_decompress_stream_fiber_t sargs = {
2414
2811
  .dec = inf->ctx.brotli,
2415
- .available_in = &available_in,
2416
- .next_in = &next_in,
2417
- .available_out = &available_out,
2418
- .next_out = &next_out,
2812
+ .available_in = available_in,
2813
+ .next_in = next_in,
2814
+ .available_out = available_out,
2815
+ .next_out = next_out,
2419
2816
  .result = BROTLI_DECODER_RESULT_ERROR,
2420
2817
  };
2421
- run_via_fiber_worker(scheduler, brotli_decompress_stream_nogvl, &sargs);
2818
+ RUN_VIA_FIBER_WORKER(brotli_decompress_stream_fiber_nogvl, sargs);
2819
+ available_in = sargs.available_in;
2820
+ next_in = sargs.next_in;
2821
+ available_out = sargs.available_out;
2822
+ next_out = sargs.next_out;
2422
2823
  res = sargs.result;
2423
2824
  } else {
2424
2825
  res = BrotliDecoderDecompressStream(inf->ctx.brotli, &available_in, &next_in,
@@ -2436,9 +2837,11 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2436
2837
  checked_add_size(result_len, out_size,
2437
2838
  "decompressed output exceeds representable size"),
2438
2839
  "decompressed output exceeds representable size");
2439
- enforce_output_and_ratio_limits(total_output, inf->total_input,
2440
- inf->max_output_size, inf->max_ratio_enabled,
2441
- inf->max_ratio);
2840
+ size_t total_input =
2841
+ checked_add_size(input_accounted_before, slen - available_in,
2842
+ "compressed input exceeds representable size");
2843
+ enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
2844
+ inf->max_ratio_enabled, inf->max_ratio);
2442
2845
 
2443
2846
  if (result_len + out_size > result_cap) {
2444
2847
  result_cap = result_len + out_size;
@@ -2453,6 +2856,8 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2453
2856
  if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && available_in == 0)
2454
2857
  break;
2455
2858
  }
2859
+ inf->total_input = checked_add_size(input_accounted_before, slen - available_in,
2860
+ "compressed input exceeds representable size");
2456
2861
  inf->total_output = checked_add_size(inf->total_output, result_len,
2457
2862
  "decompressed output exceeds representable size");
2458
2863
  rb_str_set_len(result, result_len);
@@ -2521,7 +2926,10 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2521
2926
  checked_add_size(result_len, orig_size,
2522
2927
  "decompressed output exceeds representable size"),
2523
2928
  "decompressed output exceeds representable size");
2524
- enforce_output_and_ratio_limits(total_output, inf->total_input, inf->max_output_size,
2929
+ size_t total_input = checked_add_size(
2930
+ input_accounted_before, (pos + 8 + (size_t)comp_size) - inf->lz4_buf.offset,
2931
+ "compressed input exceeds representable size");
2932
+ enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
2525
2933
  inf->max_ratio_enabled, inf->max_ratio);
2526
2934
 
2527
2935
  if (result_len + orig_size > result_cap) {
@@ -2545,6 +2953,8 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2545
2953
  }
2546
2954
  }
2547
2955
 
2956
+ inf->total_input = checked_add_size(input_accounted_before, pos - inf->lz4_buf.offset,
2957
+ "compressed input exceeds representable size");
2548
2958
  inf->lz4_buf.offset = pos;
2549
2959
  inf->total_output = checked_add_size(inf->total_output, result_len,
2550
2960
  "decompressed output exceeds representable size");