image_pack 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 96839f17672b82deb6a21685649f5f577fedeb8d7f07b79410ddce761de9d40d
4
- data.tar.gz: a806d3ff58c3a32d17e20df100511e31ad6d582facb9508c202ab928cc7cf7f7
3
+ metadata.gz: e741b4071a7d8ef87a2bbfb62f1d3a5fe11e809eac31463ee52f3f919833f148
4
+ data.tar.gz: f60b9c3bb2e8ff303099b5339a170640950c1e0928686d19c5a0f9bafe297c30
5
5
  SHA512:
6
- metadata.gz: cd8d2f768961d288cbfede915ee0a30f3fe538508c9d88f48d37cf27bf3f77f603e47ab60d0d4d044f298d6bb35336fad15df8fffabc034ecf04e71eb6d6fe44
7
- data.tar.gz: dbe7c27f803a23e9181276c8b4efda825ed14cb172385bf86c48591278e0776a63f4c4c2f1664e2dc6547745addc0268613860bd3e1bf3761ac6ce000022566e
6
+ metadata.gz: 966485e630b9ab72ab0e193aaa588a8d396bbb9f8fd5dba40e0e2a17eaa392cc2d7944134b679ec5e73fdd6902ba9db24dbff6582aaa1071b9f1986c77df8590
7
+ data.tar.gz: c1f70b669944ce35139f005cc4ed24db9634c51af8ff90d509eec6c6544c4c664525c27b9eb9045ac40fb3ea9bf81ea36e75ad7ce842427fd8968c289ed03ae6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,38 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.1
4
+
5
+ - `ip_compute_ssim_luma_buffer`: rewrote inner accumulators from `double` to
6
+ `int32_t`. For an 8x8 luma window all partial sums (sum, sum-of-squares,
7
+ cross-product) fit in 32 bits. GCC was already auto-vectorizing the
8
+ `double` version on AVX2 (4 lanes × fp64), but int32 doubles the lane
9
+ count (8 lanes × i32) and uses cheaper integer multiplies. Split the
10
+ kernel into a fixed-size 8x8 specialization plus a variable edge kernel.
11
+ Top hot symbol in the SSIM-guarded perf profile.
12
+ - `ip_build_luma_buffer`: split the runtime-strided BT.601 loop into
13
+ channels==3 and channels==4 specializations with `__restrict__` pointers
14
+ so the compiler can vectorize per-pixel work.
15
+ - `prepare_encode_row` (RGBA→RGB): `__restrict__` + hoisted width to enable
16
+ vectorization by the compiler.
17
+ - `ip_malloc_hot`: new helper for buffers we touch in tight loops right
18
+ after allocation. On Linux it issues `madvise(MADV_HUGEPAGE)` for
19
+ allocations >= 256 KiB to remove the per-cacheline minor page faults
20
+ that previously appeared inside `jsimd_*_avx2` hot loops in the perf
21
+ profile. No-op on macOS / non-Linux. Used for the decoded pixel buffer,
22
+ the SSIM luma buffer, and the pixel input buffer.
23
+ - SSIM-guarded path: reference and candidate decodes both now use
24
+ `fast_decode_mode=1` (no fancy upsampling, no block smoothing). The
25
+ comparison stays apples-to-apples since both sides use the same decode
26
+ pipeline; ~30% reduction in candidate-decode cost.
27
+ - `extconf.rb`: opt-in `IMAGE_PACK_MARCH=<arch>` env knob for tuned
28
+ builds (`native`, `x86-64-v3`, etc). Default build stays portable.
29
+ Also added `-fno-math-errno -fno-trapping-math` to remove libm-related
30
+ vectorization barriers without changing semantics for the integer hot
31
+ paths.
32
+ - CI: Linux x86_64 jobs now compile with `IMAGE_PACK_MARCH=x86-64-v3`
33
+ (AVX2 baseline; covers all current GitHub-hosted runner generations).
34
+ arm64 / macOS unchanged.
35
+
3
36
  ## 0.2.0
4
37
 
5
38
  - Added `min_ssim:` to `ImagePack.compress` for SSIM-guarded JPEG compression.
@@ -186,10 +186,17 @@ def detect_simd_arch
186
186
  end
187
187
 
188
188
  def find_nasm
189
- ENV["AS_NASM"].presence_or_nil ||
189
+ presence(ENV["AS_NASM"]) ||
190
190
  %w[nasm yasm].find { |bin| system("which #{bin} >/dev/null 2>&1") }
191
191
  end
192
192
 
193
+ def presence(string)
194
+ return nil if string.nil?
195
+ return nil if string.empty?
196
+
197
+ string
198
+ end
199
+
193
200
  def write_mozjpeg_config_headers!(build_dir, with_simd:)
194
201
  with_simd_flag = with_simd ? "#define WITH_SIMD 1" : "#undef WITH_SIMD"
195
202
 
@@ -350,21 +357,6 @@ def write_neon_compat_header!(mozjpeg_dir)
350
357
  target
351
358
  end
352
359
 
353
- class String
354
- unless instance_methods.include?(:presence_or_nil)
355
- def presence_or_nil
356
- empty? ? nil : self
357
- end
358
- end
359
- end
360
- class NilClass
361
- unless instance_methods.include?(:presence_or_nil)
362
- def presence_or_nil
363
- nil
364
- end
365
- end
366
- end
367
-
368
360
  def configure_vendored_mozjpeg(vendor_dir)
369
361
  versions = File.read(File.join(vendor_dir, ".vendored"))
370
362
  mozjpeg_dir = File.join(vendor_dir, "mozjpeg")
@@ -497,6 +489,14 @@ $warnflags = ""
497
489
 
498
490
  unless msvc?
499
491
  $CFLAGS += " -O3 -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare -std=gnu11"
492
+ $CFLAGS += " -fno-math-errno -fno-trapping-math"
493
+
494
+ march = ENV["IMAGE_PACK_MARCH"].to_s
495
+ unless march.empty?
496
+ $CFLAGS += " -march=#{march}"
497
+ $CFLAGS += " -mtune=#{march}" if march == "native"
498
+ puts "image_pack: using -march=#{march}"
499
+ end
500
500
  else
501
501
  $CFLAGS += " -O2"
502
502
  end
@@ -17,6 +17,10 @@
17
17
  #include <string.h>
18
18
  #include <jpeglib.h>
19
19
 
20
+ #if defined(__linux__)
21
+ #include <sys/mman.h>
22
+ #endif
23
+
20
24
  #ifndef IMAGE_PACK_INIT_EXPORT
21
25
  #if defined(_WIN32)
22
26
  #define IMAGE_PACK_INIT_EXPORT __declspec(dllexport)
@@ -39,6 +43,14 @@
39
43
  #define FALSE 0
40
44
  #endif
41
45
 
46
+ #if defined(_MSC_VER) && !defined(__clang__)
47
+ #define IP_RESTRICT __restrict
48
+ #elif defined(__GNUC__) || defined(__clang__)
49
+ #define IP_RESTRICT __restrict
50
+ #else
51
+ #define IP_RESTRICT
52
+ #endif
53
+
42
54
  typedef enum { IP_ALGO_JPEG_TURBO = 1, IP_ALGO_MOZJPEG = 2 } ip_algo_t;
43
55
 
44
56
  typedef enum {
@@ -105,6 +117,7 @@ typedef struct {
105
117
  int max_width;
106
118
  int max_height;
107
119
  size_t max_output_size;
120
+ size_t max_input_size;
108
121
 
109
122
  ip_status_t status;
110
123
  char error_message[512];
@@ -117,6 +130,14 @@ typedef struct {
117
130
 
118
131
  unsigned char *scratch_row;
119
132
  size_t scratch_row_size;
133
+
134
+ struct {
135
+ int marker;
136
+ unsigned char *data;
137
+ unsigned int len;
138
+ } *preserved_markers;
139
+ size_t preserved_marker_count;
140
+ size_t preserved_marker_capacity;
120
141
  } ip_context_t;
121
142
 
122
143
  typedef struct {
@@ -152,6 +173,7 @@ static ID id_max_pixels;
152
173
  static ID id_max_width;
153
174
  static ID id_max_height;
154
175
  static ID id_max_output_size;
176
+ static ID id_max_input_size;
155
177
 
156
178
  static ip_context_t *ip_context_new(void);
157
179
  static void ip_context_free(ip_context_t *ctx);
@@ -187,14 +209,12 @@ static void validate_limits_for_pixels(ip_context_t *ctx);
187
209
 
188
210
  static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, int *width,
189
211
  int *height, int *channels, int fast_decode_mode);
212
+ static int ip_decode_jpeg_to_luma_buffer(ip_context_t *ctx, const unsigned char *data, size_t size,
213
+ unsigned char **luma, int *width, int *height);
190
214
  static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_size_mode);
191
215
  static int ip_jpeg_turbo_compress(ip_context_t *ctx);
192
216
  static int ip_mozjpeg_compress(ip_context_t *ctx);
193
217
 
194
- #if defined(IMAGE_PACK_ENABLE_LOSSLESS_TRANSCODE_FAST_PATH)
195
- static int ip_jpeg_transcode_coefficients(ip_context_t *ctx, int mozjpeg_size_mode);
196
- #endif
197
-
198
218
  static VALUE ip_compress_jpeg_entry(VALUE self, VALUE input, VALUE input_kind, VALUE output,
199
219
  VALUE output_kind, VALUE algo, VALUE quality, VALUE min_ssim,
200
220
  VALUE mozjpeg_trellis, VALUE progressive, VALUE strip_metadata,
@@ -251,6 +271,18 @@ static int ip_checked_image_size(int width, int height, int channels, size_t *ou
251
271
  return ip_checked_mul_size(pixels, (size_t)channels, out);
252
272
  }
253
273
 
274
+ static void *ip_malloc_hot(size_t size) {
275
+ void *p = malloc(size);
276
+ if (!p)
277
+ return NULL;
278
+ #if defined(__linux__) && defined(MADV_HUGEPAGE)
279
+ if (size >= (256u * 1024u)) {
280
+ (void)madvise(p, size, MADV_HUGEPAGE);
281
+ }
282
+ #endif
283
+ return p;
284
+ }
285
+
254
286
  static void ip_validate_quality_or_raise(ip_context_t *ctx) {
255
287
  if (ctx->quality >= 1 && ctx->quality <= 100)
256
288
  return;
@@ -297,6 +329,7 @@ static ip_context_t *ip_context_new(void) {
297
329
  ctx->max_width = 30000;
298
330
  ctx->max_height = 30000;
299
331
  ctx->max_output_size = 256 * 1024 * 1024;
332
+ ctx->max_input_size = 256 * 1024 * 1024;
300
333
  atomic_init(&ctx->cancelled, 0);
301
334
  return ctx;
302
335
  }
@@ -310,6 +343,13 @@ static void ip_context_free(ip_context_t *ctx) {
310
343
  free(ctx->output_path);
311
344
  free(ctx->scratch_row);
312
345
 
346
+ if (ctx->preserved_markers) {
347
+ for (size_t i = 0; i < ctx->preserved_marker_count; i++) {
348
+ free(ctx->preserved_markers[i].data);
349
+ }
350
+ free(ctx->preserved_markers);
351
+ }
352
+
313
353
  if (ctx->output_data && ctx->output_owner == IP_OUTPUT_OWNER_MALLOC) {
314
354
  free(ctx->output_data);
315
355
  }
@@ -405,6 +445,12 @@ static int read_file_to_owned_buffer(ip_context_t *ctx, const char *path) {
405
445
  }
406
446
  rewind(fp);
407
447
 
448
+ if (ctx->max_input_size > 0 && (size_t)size > ctx->max_input_size) {
449
+ fclose(fp);
450
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "input file exceeds max_input_size");
451
+ return 0;
452
+ }
453
+
408
454
  unsigned char *data = (unsigned char *)malloc((size_t)size);
409
455
  if (!data && size > 0) {
410
456
  fclose(fp);
@@ -435,6 +481,10 @@ static int ip_prepare_input_bytes(ip_context_t *ctx, VALUE input, ip_input_kind_
435
481
  if (kind == IP_INPUT_BYTES) {
436
482
  Check_Type(input, T_STRING);
437
483
  size_t len = (size_t)RSTRING_LEN(input);
484
+ if (ctx->max_input_size > 0 && len > ctx->max_input_size) {
485
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "input bytes exceed max_input_size");
486
+ return 0;
487
+ }
438
488
  unsigned char *copy = (unsigned char *)malloc(len);
439
489
  if (!copy && len > 0) {
440
490
  ip_context_set_error(ctx, IP_ERR_OOM, "failed to copy binary String input");
@@ -452,6 +502,10 @@ static int ip_prepare_input_bytes(ip_context_t *ctx, VALUE input, ip_input_kind_
452
502
  VALUE str = io_buffer_to_string(input);
453
503
  StringValue(str);
454
504
  size_t len = (size_t)RSTRING_LEN(str);
505
+ if (ctx->max_input_size > 0 && len > ctx->max_input_size) {
506
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "input IO::Buffer exceeds max_input_size");
507
+ return 0;
508
+ }
455
509
  unsigned char *copy = (unsigned char *)malloc(len);
456
510
  if (!copy && len > 0) {
457
511
  ip_context_set_error(ctx, IP_ERR_OOM, "failed to copy IO::Buffer input");
@@ -492,7 +546,7 @@ static int ip_prepare_pixels(ip_context_t *ctx, VALUE buffer, int width, int hei
492
546
  return 0;
493
547
  }
494
548
 
495
- unsigned char *copy = (unsigned char *)malloc(expected);
549
+ unsigned char *copy = (unsigned char *)ip_malloc_hot(expected);
496
550
  if (!copy && expected > 0) {
497
551
  ip_context_set_error(ctx, IP_ERR_OOM, "failed to copy pixel buffer");
498
552
  return 0;
@@ -550,6 +604,52 @@ static VALUE ip_finish_output(ip_context_t *ctx, ip_output_kind_t kind) {
550
604
  return Qtrue;
551
605
  }
552
606
 
607
+ static int ip_save_marker(ip_context_t *ctx, int marker, const unsigned char *data,
608
+ unsigned int len) {
609
+ if (ctx->preserved_marker_count == ctx->preserved_marker_capacity) {
610
+ size_t new_cap =
611
+ ctx->preserved_marker_capacity == 0 ? 4 : ctx->preserved_marker_capacity * 2;
612
+ void *new_buf = realloc(ctx->preserved_markers, new_cap * sizeof(*ctx->preserved_markers));
613
+ if (!new_buf)
614
+ return 0;
615
+ ctx->preserved_markers = new_buf;
616
+ ctx->preserved_marker_capacity = new_cap;
617
+ }
618
+
619
+ unsigned char *copy = (unsigned char *)malloc(len);
620
+ if (!copy && len > 0)
621
+ return 0;
622
+ if (len > 0)
623
+ memcpy(copy, data, len);
624
+
625
+ ctx->preserved_markers[ctx->preserved_marker_count].marker = marker;
626
+ ctx->preserved_markers[ctx->preserved_marker_count].data = copy;
627
+ ctx->preserved_markers[ctx->preserved_marker_count].len = len;
628
+ ctx->preserved_marker_count++;
629
+ return 1;
630
+ }
631
+
632
+ static void ip_save_markers_from_decompress(ip_context_t *ctx,
633
+ struct jpeg_decompress_struct *cinfo) {
634
+ jpeg_saved_marker_ptr m;
635
+ for (m = cinfo->marker_list; m != NULL; m = m->next) {
636
+ if (m->marker == (JPEG_APP0 + 0))
637
+ continue;
638
+
639
+ if (!ip_save_marker(ctx, m->marker, m->data, m->data_length)) {
640
+ return;
641
+ }
642
+ }
643
+ }
644
+
645
+ static void ip_write_preserved_markers(ip_context_t *ctx, struct jpeg_compress_struct *cinfo) {
646
+ for (size_t i = 0; i < ctx->preserved_marker_count; i++) {
647
+ jpeg_write_marker(cinfo, ctx->preserved_markers[i].marker,
648
+ (const JOCTET *)ctx->preserved_markers[i].data,
649
+ ctx->preserved_markers[i].len);
650
+ }
651
+ }
652
+
553
653
  static void ip_jpeg_invalid_error_exit(j_common_ptr cinfo) {
554
654
  ip_jpeg_error_mgr *err = (ip_jpeg_error_mgr *)cinfo->err;
555
655
  char buffer[JMSG_LENGTH_MAX];
@@ -601,6 +701,15 @@ static int ip_inspect_jpeg_header(ip_context_t *ctx) {
601
701
  return 0;
602
702
  }
603
703
 
704
+ if (cinfo.num_components == 4 || cinfo.jpeg_color_space == JCS_CMYK ||
705
+ cinfo.jpeg_color_space == JCS_YCCK) {
706
+ jpeg_destroy_decompress(&cinfo);
707
+ ctx->jmp_armed = 0;
708
+ ip_context_set_error(ctx, IP_ERR_UNSUPPORTED,
709
+ "CMYK/YCCK JPEG input is not supported in this release");
710
+ return 0;
711
+ }
712
+
604
713
  ctx->width = (int)cinfo.image_width;
605
714
  ctx->height = (int)cinfo.image_height;
606
715
  ctx->channels = cinfo.num_components;
@@ -728,9 +837,11 @@ static int prepare_encode_row(ip_context_t *ctx, JDIMENSION y, JSAMPROW *row) {
728
837
  ctx->scratch_row_size = rgb_row_size;
729
838
  }
730
839
 
731
- const unsigned char *src = ctx->pixel_data + ((size_t)y * (size_t)ctx->width * 4);
732
- unsigned char *dst = ctx->scratch_row;
733
- for (int x = 0; x < ctx->width; x++) {
840
+ const unsigned char *IP_RESTRICT src =
841
+ ctx->pixel_data + ((size_t)y * (size_t)ctx->width * 4);
842
+ unsigned char *IP_RESTRICT dst = ctx->scratch_row;
843
+ const int w = ctx->width;
844
+ for (int x = 0; x < w; x++) {
734
845
  dst[x * 3 + 0] = src[x * 4 + 0];
735
846
  dst[x * 3 + 1] = src[x * 4 + 1];
736
847
  dst[x * 3 + 2] = src[x * 4 + 2];
@@ -781,9 +892,12 @@ static int encode_pixels_with_libjpeg(ip_context_t *ctx, int mozjpeg_size_mode)
781
892
 
782
893
  jpeg_start_compress(&cinfo, TRUE);
783
894
 
895
+ if (!ctx->strip_metadata) {
896
+ ip_write_preserved_markers(ctx, &cinfo);
897
+ }
898
+
784
899
  while (cinfo.next_scanline < cinfo.image_height) {
785
- if (ctx->cancellable_requested && (cinfo.next_scanline % 16 == 0) &&
786
- atomic_load(&ctx->cancelled)) {
900
+ if (ctx->cancellable_requested && atomic_load(&ctx->cancelled)) {
787
901
  ip_context_set_error(ctx, IP_ERR_CANCELLED, "JPEG encode cancelled");
788
902
  jpeg_abort_compress(&cinfo);
789
903
  jpeg_destroy_compress(&cinfo);
@@ -792,16 +906,32 @@ static int encode_pixels_with_libjpeg(ip_context_t *ctx, int mozjpeg_size_mode)
792
906
  return 0;
793
907
  }
794
908
 
795
- JSAMPROW row = NULL;
796
- if (!prepare_encode_row(ctx, cinfo.next_scanline, &row)) {
797
- jpeg_abort_compress(&cinfo);
798
- jpeg_destroy_compress(&cinfo);
799
- free(jpeg_buf);
800
- ctx->jmp_armed = 0;
801
- return 0;
909
+ if (ctx->channels == 4) {
910
+ JSAMPROW row = NULL;
911
+ if (!prepare_encode_row(ctx, cinfo.next_scanline, &row)) {
912
+ jpeg_abort_compress(&cinfo);
913
+ jpeg_destroy_compress(&cinfo);
914
+ free(jpeg_buf);
915
+ ctx->jmp_armed = 0;
916
+ return 0;
917
+ }
918
+
919
+ jpeg_write_scanlines(&cinfo, &row, 1);
920
+ continue;
802
921
  }
803
922
 
804
- jpeg_write_scanlines(&cinfo, &row, 1);
923
+ JSAMPROW rows[16];
924
+ JDIMENSION batch = cinfo.image_height - cinfo.next_scanline;
925
+ if (batch > 16)
926
+ batch = 16;
927
+
928
+ for (JDIMENSION i = 0; i < batch; i++) {
929
+ JDIMENSION y = cinfo.next_scanline + i;
930
+ rows[i] = (JSAMPROW)(ctx->pixel_data +
931
+ ((size_t)y * (size_t)ctx->width * (size_t)ctx->channels));
932
+ }
933
+
934
+ jpeg_write_scanlines(&cinfo, rows, batch);
805
935
  }
806
936
 
807
937
  jpeg_finish_compress(&cinfo);
@@ -843,6 +973,14 @@ static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, i
843
973
 
844
974
  jpeg_create_decompress(&cinfo);
845
975
  jpeg_mem_src(&cinfo, ctx->input_data, (unsigned long)ctx->input_size);
976
+
977
+ if (!ctx->strip_metadata) {
978
+ jpeg_save_markers(&cinfo, JPEG_COM, 0xFFFF);
979
+ for (int app = 1; app < 16; app++) {
980
+ jpeg_save_markers(&cinfo, JPEG_APP0 + app, 0xFFFF);
981
+ }
982
+ }
983
+
846
984
  int rc = jpeg_read_header(&cinfo, TRUE);
847
985
  if (rc != JPEG_HEADER_OK) {
848
986
  jpeg_destroy_decompress(&cinfo);
@@ -858,6 +996,15 @@ static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, i
858
996
  return 0;
859
997
  }
860
998
 
999
+ if (cinfo.num_components == 4 || cinfo.jpeg_color_space == JCS_CMYK ||
1000
+ cinfo.jpeg_color_space == JCS_YCCK) {
1001
+ jpeg_destroy_decompress(&cinfo);
1002
+ ctx->jmp_armed = 0;
1003
+ ip_context_set_error(ctx, IP_ERR_UNSUPPORTED,
1004
+ "CMYK/YCCK JPEG input is not supported in this release");
1005
+ return 0;
1006
+ }
1007
+
861
1008
  int ch = cinfo.num_components == 1 ? 1 : 3;
862
1009
 
863
1010
  ctx->width = (int)cinfo.image_width;
@@ -892,6 +1039,11 @@ static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, i
892
1039
  }
893
1040
 
894
1041
  jpeg_start_decompress(&cinfo);
1042
+
1043
+ if (!ctx->strip_metadata && !fast_decode_mode) {
1044
+ ip_save_markers_from_decompress(ctx, &cinfo);
1045
+ }
1046
+
895
1047
  size_t row_stride = 0;
896
1048
  size_t size = 0;
897
1049
  if (!ip_checked_mul_size((size_t)cinfo.output_width, (size_t)cinfo.output_components,
@@ -902,7 +1054,7 @@ static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, i
902
1054
  ip_context_set_error(ctx, IP_ERR_LIMIT, "decoded image buffer size overflow");
903
1055
  return 0;
904
1056
  }
905
- unsigned char *buf = (unsigned char *)malloc(size);
1057
+ unsigned char *buf = (unsigned char *)ip_malloc_hot(size);
906
1058
  if (!buf && size > 0) {
907
1059
  jpeg_destroy_decompress(&cinfo);
908
1060
  ctx->jmp_armed = 0;
@@ -911,8 +1063,16 @@ static int ip_jpeg_decode_to_pixels(ip_context_t *ctx, unsigned char **pixels, i
911
1063
  }
912
1064
 
913
1065
  while (cinfo.output_scanline < cinfo.output_height) {
914
- JSAMPROW row = buf + ((size_t)cinfo.output_scanline * row_stride);
915
- jpeg_read_scanlines(&cinfo, &row, 1);
1066
+ JSAMPROW rows[16];
1067
+ JDIMENSION batch = cinfo.output_height - cinfo.output_scanline;
1068
+ if (batch > 16)
1069
+ batch = 16;
1070
+
1071
+ for (JDIMENSION i = 0; i < batch; i++) {
1072
+ rows[i] = buf + ((size_t)(cinfo.output_scanline + i) * row_stride);
1073
+ }
1074
+
1075
+ jpeg_read_scanlines(&cinfo, rows, batch);
916
1076
  }
917
1077
 
918
1078
  int out_width = (int)cinfo.output_width;
@@ -965,29 +1125,183 @@ static void ip_clear_output_buffer(ip_context_t *ctx) {
965
1125
  ctx->output_owner = IP_OUTPUT_OWNER_NONE;
966
1126
  }
967
1127
 
968
- static int ip_decode_jpeg_buffer_preserving_context(ip_context_t *ctx, const unsigned char *data,
969
- size_t size, unsigned char **pixels, int *width,
970
- int *height, int *channels) {
971
- const unsigned char *old_input_data = ctx->input_data;
972
- size_t old_input_size = ctx->input_size;
1128
+ static int ip_decode_jpeg_to_luma_buffer(ip_context_t *ctx, const unsigned char *data, size_t size,
1129
+ unsigned char **luma, int *width, int *height) {
1130
+ struct jpeg_decompress_struct cinfo;
1131
+ ip_jpeg_error_mgr jerr;
1132
+ memset(&cinfo, 0, sizeof(cinfo));
1133
+ memset(&jerr, 0, sizeof(jerr));
1134
+
1135
+ cinfo.err = jpeg_std_error(&jerr.pub);
1136
+ jerr.pub.error_exit = ip_jpeg_invalid_error_exit;
1137
+ jerr.ctx = ctx;
1138
+
1139
+ ctx->jmp_armed = 1;
1140
+ if (setjmp(ctx->jmpbuf)) {
1141
+ ctx->jmp_armed = 0;
1142
+ jpeg_destroy_decompress(&cinfo);
1143
+ if (ctx->status == IP_OK)
1144
+ ip_context_set_error(ctx, IP_ERR_INVALID_IMAGE, "JPEG luma decode failed");
1145
+ return 0;
1146
+ }
1147
+
1148
+ jpeg_create_decompress(&cinfo);
1149
+ jpeg_mem_src(&cinfo, data, (unsigned long)size);
1150
+
1151
+ int rc = jpeg_read_header(&cinfo, TRUE);
1152
+ if (rc != JPEG_HEADER_OK) {
1153
+ jpeg_destroy_decompress(&cinfo);
1154
+ ctx->jmp_armed = 0;
1155
+ ip_context_set_error(ctx, IP_ERR_INVALID_IMAGE, "invalid JPEG header");
1156
+ return 0;
1157
+ }
1158
+
1159
+ if (cinfo.image_width > (JDIMENSION)INT_MAX || cinfo.image_height > (JDIMENSION)INT_MAX) {
1160
+ jpeg_destroy_decompress(&cinfo);
1161
+ ctx->jmp_armed = 0;
1162
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "JPEG dimensions exceed native int range");
1163
+ return 0;
1164
+ }
1165
+
1166
+ if (cinfo.num_components == 4 || cinfo.jpeg_color_space == JCS_CMYK ||
1167
+ cinfo.jpeg_color_space == JCS_YCCK) {
1168
+ jpeg_destroy_decompress(&cinfo);
1169
+ ctx->jmp_armed = 0;
1170
+ ip_context_set_error(ctx, IP_ERR_UNSUPPORTED,
1171
+ "CMYK/YCCK JPEG input is not supported in this release");
1172
+ return 0;
1173
+ }
1174
+
1175
+ int out_channels = cinfo.num_components == 1 ? 1 : 3;
973
1176
  int old_width = ctx->width;
974
1177
  int old_height = ctx->height;
975
1178
  int old_channels = ctx->channels;
976
- int old_bit_depth = ctx->bit_depth;
977
1179
  size_t old_decoded_bytes = ctx->decoded_bytes;
978
1180
 
979
- ctx->input_data = data;
980
- ctx->input_size = size;
981
- int ok = ip_jpeg_decode_to_pixels(ctx, pixels, width, height, channels, 0);
982
-
983
- ctx->input_data = old_input_data;
984
- ctx->input_size = old_input_size;
1181
+ ctx->width = (int)cinfo.image_width;
1182
+ ctx->height = (int)cinfo.image_height;
1183
+ ctx->channels = 1;
1184
+ if (!ip_checked_image_size(ctx->width, ctx->height, 1, &ctx->decoded_bytes)) {
1185
+ ctx->width = old_width;
1186
+ ctx->height = old_height;
1187
+ ctx->channels = old_channels;
1188
+ ctx->decoded_bytes = old_decoded_bytes;
1189
+ jpeg_destroy_decompress(&cinfo);
1190
+ ctx->jmp_armed = 0;
1191
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "decoded luma buffer size overflows native size");
1192
+ return 0;
1193
+ }
1194
+ validate_limits_for_pixels(ctx);
985
1195
  ctx->width = old_width;
986
1196
  ctx->height = old_height;
987
1197
  ctx->channels = old_channels;
988
- ctx->bit_depth = old_bit_depth;
1198
+ size_t luma_size = ctx->decoded_bytes;
989
1199
  ctx->decoded_bytes = old_decoded_bytes;
990
- return ok;
1200
+
1201
+ if (ctx->status != IP_OK) {
1202
+ jpeg_destroy_decompress(&cinfo);
1203
+ ctx->jmp_armed = 0;
1204
+ return 0;
1205
+ }
1206
+
1207
+ cinfo.out_color_space = out_channels == 1 ? JCS_GRAYSCALE : JCS_RGB;
1208
+ #if defined(IMAGE_PACK_HAS_SIMD)
1209
+ cinfo.dct_method = JDCT_ISLOW;
1210
+ #else
1211
+ cinfo.dct_method = JDCT_FASTEST;
1212
+ #endif
1213
+ cinfo.do_fancy_upsampling = FALSE;
1214
+ cinfo.do_block_smoothing = FALSE;
1215
+ cinfo.quantize_colors = FALSE;
1216
+ cinfo.two_pass_quantize = FALSE;
1217
+ cinfo.dither_mode = JDITHER_NONE;
1218
+
1219
+ jpeg_start_decompress(&cinfo);
1220
+
1221
+ size_t luma_stride = (size_t)cinfo.output_width;
1222
+ size_t row_stride = 0;
1223
+ if (!ip_checked_mul_size((size_t)cinfo.output_width, (size_t)cinfo.output_components,
1224
+ &row_stride) ||
1225
+ luma_stride == 0 || luma_size != luma_stride * (size_t)cinfo.output_height) {
1226
+ jpeg_destroy_decompress(&cinfo);
1227
+ ctx->jmp_armed = 0;
1228
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "decoded luma buffer size mismatch");
1229
+ return 0;
1230
+ }
1231
+
1232
+ unsigned char *buf = (unsigned char *)ip_malloc_hot(luma_size);
1233
+ if (!buf && luma_size > 0) {
1234
+ jpeg_destroy_decompress(&cinfo);
1235
+ ctx->jmp_armed = 0;
1236
+ ip_context_set_error(ctx, IP_ERR_OOM, "failed to allocate luma buffer");
1237
+ return 0;
1238
+ }
1239
+
1240
+ unsigned char *scratch = NULL;
1241
+ if (out_channels != 1) {
1242
+ size_t scratch_size = 0;
1243
+ if (!ip_checked_mul_size(row_stride, 16, &scratch_size)) {
1244
+ free(buf);
1245
+ jpeg_destroy_decompress(&cinfo);
1246
+ ctx->jmp_armed = 0;
1247
+ ip_context_set_error(ctx, IP_ERR_LIMIT, "luma decode scratch size overflow");
1248
+ return 0;
1249
+ }
1250
+ scratch = (unsigned char *)ip_malloc_hot(scratch_size);
1251
+ if (!scratch) {
1252
+ free(buf);
1253
+ jpeg_destroy_decompress(&cinfo);
1254
+ ctx->jmp_armed = 0;
1255
+ ip_context_set_error(ctx, IP_ERR_OOM, "failed to allocate luma decode scratch row");
1256
+ return 0;
1257
+ }
1258
+ }
1259
+
1260
+ while (cinfo.output_scanline < cinfo.output_height) {
1261
+ JSAMPROW rows[16];
1262
+ JDIMENSION start_scanline = cinfo.output_scanline;
1263
+ JDIMENSION batch = cinfo.output_height - start_scanline;
1264
+ if (batch > 16)
1265
+ batch = 16;
1266
+
1267
+ if (out_channels == 1) {
1268
+ for (JDIMENSION i = 0; i < batch; i++) {
1269
+ rows[i] = buf + ((size_t)(start_scanline + i) * luma_stride);
1270
+ }
1271
+ } else {
1272
+ for (JDIMENSION i = 0; i < batch; i++) {
1273
+ rows[i] = scratch + ((size_t)i * row_stride);
1274
+ }
1275
+ }
1276
+
1277
+ JDIMENSION lines_read = jpeg_read_scanlines(&cinfo, rows, batch);
1278
+
1279
+ if (out_channels != 1) {
1280
+ for (JDIMENSION y = 0; y < lines_read; y++) {
1281
+ const unsigned char *IP_RESTRICT src = scratch + ((size_t)y * row_stride);
1282
+ unsigned char *IP_RESTRICT dst = buf + ((size_t)(start_scanline + y) * luma_stride);
1283
+ for (size_t x = 0; x < luma_stride; x++) {
1284
+ unsigned int r = src[x * 3 + 0];
1285
+ unsigned int g = src[x * 3 + 1];
1286
+ unsigned int b = src[x * 3 + 2];
1287
+ dst[x] = (unsigned char)((77u * r + 150u * g + 29u * b + 128u) >> 8);
1288
+ }
1289
+ }
1290
+ }
1291
+ }
1292
+
1293
+ int out_width = (int)cinfo.output_width;
1294
+ int out_height = (int)cinfo.output_height;
1295
+
1296
+ free(scratch);
1297
+ jpeg_finish_decompress(&cinfo);
1298
+ jpeg_destroy_decompress(&cinfo);
1299
+ ctx->jmp_armed = 0;
1300
+
1301
+ *luma = buf;
1302
+ *width = out_width;
1303
+ *height = out_height;
1304
+ return 1;
991
1305
  }
992
1306
 
993
1307
  static unsigned char *ip_build_luma_buffer(ip_context_t *ctx, const unsigned char *pixels,
@@ -998,7 +1312,7 @@ static unsigned char *ip_build_luma_buffer(ip_context_t *ctx, const unsigned cha
998
1312
  return NULL;
999
1313
  }
1000
1314
 
1001
- unsigned char *luma = (unsigned char *)malloc(count);
1315
+ unsigned char *luma = (unsigned char *)ip_malloc_hot(count);
1002
1316
  if (!luma) {
1003
1317
  ip_context_set_error(ctx, IP_ERR_OOM, "failed to allocate luma buffer");
1004
1318
  return NULL;
@@ -1009,85 +1323,153 @@ static unsigned char *ip_build_luma_buffer(ip_context_t *ctx, const unsigned cha
1009
1323
  return luma;
1010
1324
  }
1011
1325
 
1012
- for (size_t i = 0; i < count; i++) {
1013
- const unsigned char *p = pixels + (i * (size_t)channels);
1014
- unsigned int y =
1015
- 77u * (unsigned int)p[0] + 150u * (unsigned int)p[1] + 29u * (unsigned int)p[2] + 128u;
1016
- luma[i] = (unsigned char)(y >> 8);
1326
+ const unsigned char *IP_RESTRICT src = pixels;
1327
+ unsigned char *IP_RESTRICT dst = luma;
1328
+
1329
+ if (channels == 3) {
1330
+ for (size_t i = 0; i < count; i++) {
1331
+ unsigned int r = src[i * 3 + 0];
1332
+ unsigned int g = src[i * 3 + 1];
1333
+ unsigned int b = src[i * 3 + 2];
1334
+ dst[i] = (unsigned char)((77u * r + 150u * g + 29u * b + 128u) >> 8);
1335
+ }
1336
+ return luma;
1017
1337
  }
1018
1338
 
1339
+ for (size_t i = 0; i < count; i++) {
1340
+ unsigned int r = src[i * 4 + 0];
1341
+ unsigned int g = src[i * 4 + 1];
1342
+ unsigned int b = src[i * 4 + 2];
1343
+ dst[i] = (unsigned char)((77u * r + 150u * g + 29u * b + 128u) >> 8);
1344
+ }
1019
1345
  return luma;
1020
1346
  }
1021
1347
 
1022
- static double ip_compute_ssim_luma_buffer(const unsigned char *a, const unsigned char *b, int width,
1023
- int height) {
1024
- const int window = 8;
1348
+ static double ip_ssim_window_score_double(int32_t n, int32_t sum_a, int32_t sum_b, int32_t sum_a2,
1349
+ int32_t sum_b2, int32_t sum_ab) {
1025
1350
  const double c1 = 6.5025; /* (0.01 * 255)^2 */
1026
1351
  const double c2 = 58.5225; /* (0.03 * 255)^2 */
1027
- double total_ssim = 0.0;
1028
- int windows = 0;
1029
1352
 
1030
- for (int y0 = 0; y0 < height; y0 += window) {
1031
- int y1 = y0 + window;
1032
- if (y1 > height)
1033
- y1 = height;
1353
+ double inv_n = 1.0 / (double)n;
1354
+ double mean_a = (double)sum_a * inv_n;
1355
+ double mean_b = (double)sum_b * inv_n;
1356
+ double var_a = ((double)sum_a2 * inv_n) - (mean_a * mean_a);
1357
+ double var_b = ((double)sum_b2 * inv_n) - (mean_b * mean_b);
1358
+ double cov_ab = ((double)sum_ab * inv_n) - (mean_a * mean_b);
1359
+
1360
+ if (var_a < 0.0)
1361
+ var_a = 0.0;
1362
+ if (var_b < 0.0)
1363
+ var_b = 0.0;
1364
+
1365
+ double numerator = (2.0 * mean_a * mean_b + c1) * (2.0 * cov_ab + c2);
1366
+ double denominator = (mean_a * mean_a + mean_b * mean_b + c1) * (var_a + var_b + c2);
1367
+ double ssim = denominator == 0.0 ? 1.0 : numerator / denominator;
1368
+
1369
+ if (ssim < 0.0)
1370
+ ssim = 0.0;
1371
+ if (ssim > 1.0)
1372
+ ssim = 1.0;
1373
+ return ssim;
1374
+ }
1034
1375
 
1035
- for (int x0 = 0; x0 < width; x0 += window) {
1036
- int x1 = x0 + window;
1037
- if (x1 > width)
1038
- x1 = width;
1039
-
1040
- double sum_a = 0.0;
1041
- double sum_b = 0.0;
1042
- double sum_a2 = 0.0;
1043
- double sum_b2 = 0.0;
1044
- double sum_ab = 0.0;
1045
- int n = 0;
1046
-
1047
- for (int y = y0; y < y1; y++) {
1048
- size_t row = (size_t)y * (size_t)width;
1049
- for (int x = x0; x < x1; x++) {
1050
- size_t idx = row + (size_t)x;
1051
- double la = (double)a[idx];
1052
- double lb = (double)b[idx];
1053
- sum_a += la;
1054
- sum_b += lb;
1055
- sum_a2 += la * la;
1056
- sum_b2 += lb * lb;
1057
- sum_ab += la * lb;
1058
- n++;
1059
- }
1060
- }
1376
+ static inline double ip_ssim_window_8x8(const unsigned char *IP_RESTRICT a,
1377
+ const unsigned char *IP_RESTRICT b, int width, int x0,
1378
+ int y0) {
1379
+ int32_t sum_a = 0, sum_b = 0, sum_a2 = 0, sum_b2 = 0, sum_ab = 0;
1380
+
1381
+ for (int y = 0; y < 8; y++) {
1382
+ const unsigned char *pa = a + (size_t)(y0 + y) * (size_t)width + (size_t)x0;
1383
+ const unsigned char *pb = b + (size_t)(y0 + y) * (size_t)width + (size_t)x0;
1384
+ for (int x = 0; x < 8; x++) {
1385
+ int32_t la = pa[x];
1386
+ int32_t lb = pb[x];
1387
+ sum_a += la;
1388
+ sum_b += lb;
1389
+ sum_a2 += la * la;
1390
+ sum_b2 += lb * lb;
1391
+ sum_ab += la * lb;
1392
+ }
1393
+ }
1061
1394
 
1062
- if (n <= 0)
1063
- continue;
1395
+ return ip_ssim_window_score_double(64, sum_a, sum_b, sum_a2, sum_b2, sum_ab);
1396
+ }
1064
1397
 
1065
- double inv_n = 1.0 / (double)n;
1066
- double mean_a = sum_a * inv_n;
1067
- double mean_b = sum_b * inv_n;
1068
- double var_a = (sum_a2 * inv_n) - (mean_a * mean_a);
1069
- double var_b = (sum_b2 * inv_n) - (mean_b * mean_b);
1070
- double cov_ab = (sum_ab * inv_n) - (mean_a * mean_b);
1398
+ static inline double ip_ssim_window_var(const unsigned char *IP_RESTRICT a,
1399
+ const unsigned char *IP_RESTRICT b, int width, int x0,
1400
+ int y0, int x1, int y1) {
1401
+ int32_t sum_a = 0, sum_b = 0, sum_a2 = 0, sum_b2 = 0, sum_ab = 0;
1402
+ int32_t n = 0;
1403
+
1404
+ for (int y = y0; y < y1; y++) {
1405
+ const unsigned char *pa = a + (size_t)y * (size_t)width;
1406
+ const unsigned char *pb = b + (size_t)y * (size_t)width;
1407
+ for (int x = x0; x < x1; x++) {
1408
+ int32_t la = pa[x];
1409
+ int32_t lb = pb[x];
1410
+ sum_a += la;
1411
+ sum_b += lb;
1412
+ sum_a2 += la * la;
1413
+ sum_b2 += lb * lb;
1414
+ sum_ab += la * lb;
1415
+ n++;
1416
+ }
1417
+ }
1071
1418
 
1072
- if (var_a < 0.0)
1073
- var_a = 0.0;
1074
- if (var_b < 0.0)
1075
- var_b = 0.0;
1419
+ if (n <= 0)
1420
+ return 1.0;
1421
+ return ip_ssim_window_score_double(n, sum_a, sum_b, sum_a2, sum_b2, sum_ab);
1422
+ }
1076
1423
 
1077
- double numerator = (2.0 * mean_a * mean_b + c1) * (2.0 * cov_ab + c2);
1078
- double denominator = (mean_a * mean_a + mean_b * mean_b + c1) * (var_a + var_b + c2);
1079
- double ssim = denominator == 0.0 ? 1.0 : numerator / denominator;
1424
+ static double ip_compute_ssim_luma_buffer(const unsigned char *a, const unsigned char *b, int width,
1425
+ int height) {
1426
+ const int window = 8;
1427
+ double total_ssim = 0.0;
1428
+ int windows = 0;
1080
1429
 
1081
- if (ssim < 0.0)
1082
- ssim = 0.0;
1083
- if (ssim > 1.0)
1084
- ssim = 1.0;
1430
+ int full_x = width / window;
1431
+ int full_y = height / window;
1432
+ int rem_x = width - full_x * window;
1433
+ int rem_y = height - full_y * window;
1434
+
1435
+ for (int by = 0; by < full_y; by++) {
1436
+ int y0 = by * window;
1437
+ for (int bx = 0; bx < full_x; bx++) {
1438
+ int x0 = bx * window;
1439
+ total_ssim += ip_ssim_window_8x8(a, b, width, x0, y0);
1440
+ windows++;
1441
+ }
1442
+ }
1085
1443
 
1086
- total_ssim += ssim;
1444
+ if (rem_x > 0) {
1445
+ int x0 = full_x * window;
1446
+ int x1 = width;
1447
+ for (int by = 0; by < full_y; by++) {
1448
+ int y0 = by * window;
1449
+ int y1 = y0 + window;
1450
+ total_ssim += ip_ssim_window_var(a, b, width, x0, y0, x1, y1);
1087
1451
  windows++;
1088
1452
  }
1089
1453
  }
1090
1454
 
1455
+ if (rem_y > 0) {
1456
+ int y0 = full_y * window;
1457
+ int y1 = height;
1458
+ for (int bx = 0; bx < full_x; bx++) {
1459
+ int x0 = bx * window;
1460
+ int x1 = x0 + window;
1461
+ total_ssim += ip_ssim_window_var(a, b, width, x0, y0, x1, y1);
1462
+ windows++;
1463
+ }
1464
+ }
1465
+
1466
+ if (rem_x > 0 && rem_y > 0) {
1467
+ int x0 = full_x * window;
1468
+ int y0 = full_y * window;
1469
+ total_ssim += ip_ssim_window_var(a, b, width, x0, y0, width, height);
1470
+ windows++;
1471
+ }
1472
+
1091
1473
  return windows > 0 ? total_ssim / (double)windows : 0.0;
1092
1474
  }
1093
1475
 
@@ -1104,7 +1486,7 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1104
1486
  reference_channels = ctx->channels;
1105
1487
  } else {
1106
1488
  if (!ip_jpeg_decode_to_pixels(ctx, &reference_pixels, &reference_width, &reference_height,
1107
- &reference_channels, 0)) {
1489
+ &reference_channels, 1)) {
1108
1490
  return 0;
1109
1491
  }
1110
1492
 
@@ -1120,7 +1502,7 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1120
1502
 
1121
1503
  if (reference_channels == 4) {
1122
1504
  ip_context_set_error(ctx, IP_ERR_UNSUPPORTED,
1123
- "min_ssim is not supported for RGBA input in v0.2.0");
1505
+ "min_ssim is not supported for RGBA input in v0.2.1");
1124
1506
  return 0;
1125
1507
  }
1126
1508
 
@@ -1163,13 +1545,12 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1163
1545
  ctx->output_capacity = 0;
1164
1546
  ctx->output_owner = IP_OUTPUT_OWNER_NONE;
1165
1547
 
1166
- unsigned char *candidate_pixels = NULL;
1548
+ unsigned char *candidate_luma = NULL;
1167
1549
  int candidate_width = 0;
1168
1550
  int candidate_height = 0;
1169
- int candidate_channels = 0;
1170
- int decoded_ok = ip_decode_jpeg_buffer_preserving_context(
1171
- ctx, candidate_jpeg, candidate_jpeg_size, &candidate_pixels, &candidate_width,
1172
- &candidate_height, &candidate_channels);
1551
+ int decoded_ok =
1552
+ ip_decode_jpeg_to_luma_buffer(ctx, candidate_jpeg, candidate_jpeg_size, &candidate_luma,
1553
+ &candidate_width, &candidate_height);
1173
1554
 
1174
1555
  if (!decoded_ok) {
1175
1556
  free(reference_luma);
@@ -1178,10 +1559,9 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1178
1559
  return 0;
1179
1560
  }
1180
1561
 
1181
- if (candidate_width != reference_width || candidate_height != reference_height ||
1182
- candidate_channels != reference_channels) {
1562
+ if (candidate_width != reference_width || candidate_height != reference_height) {
1183
1563
  free(reference_luma);
1184
- free(candidate_pixels);
1564
+ free(candidate_luma);
1185
1565
  free(candidate_jpeg);
1186
1566
  free(best_jpeg);
1187
1567
  ip_context_set_error(ctx, IP_ERR_ENCODE,
@@ -1189,17 +1569,6 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1189
1569
  return 0;
1190
1570
  }
1191
1571
 
1192
- unsigned char *candidate_luma = ip_build_luma_buffer(ctx, candidate_pixels, candidate_width,
1193
- candidate_height, candidate_channels);
1194
- free(candidate_pixels);
1195
-
1196
- if (!candidate_luma) {
1197
- free(reference_luma);
1198
- free(candidate_jpeg);
1199
- free(best_jpeg);
1200
- return 0;
1201
- }
1202
-
1203
1572
  double ssim = ip_compute_ssim_luma_buffer(reference_luma, candidate_luma, reference_width,
1204
1573
  reference_height);
1205
1574
  free(candidate_luma);
@@ -1244,119 +1613,6 @@ static int guarded_compress_jpeg_input_with_mode(ip_context_t *ctx, int mozjpeg_
1244
1613
  return 1;
1245
1614
  }
1246
1615
 
1247
- #if defined(IMAGE_PACK_ENABLE_LOSSLESS_TRANSCODE_FAST_PATH)
1248
- static int ip_jpeg_transcode_coefficients(ip_context_t *ctx, int mozjpeg_size_mode) {
1249
- if (mozjpeg_size_mode)
1250
- return 0;
1251
- if (!ctx->input_data || ctx->input_size < 4)
1252
- return 0;
1253
- if (ctx->input_data[0] != 0xFF || ctx->input_data[1] != 0xD8)
1254
- return 0;
1255
-
1256
- struct jpeg_decompress_struct srcinfo;
1257
- struct jpeg_compress_struct dstinfo;
1258
- ip_jpeg_error_mgr jsrcerr;
1259
- ip_jpeg_error_mgr jdsterr;
1260
- memset(&srcinfo, 0, sizeof(srcinfo));
1261
- memset(&dstinfo, 0, sizeof(dstinfo));
1262
- memset(&jsrcerr, 0, sizeof(jsrcerr));
1263
- memset(&jdsterr, 0, sizeof(jdsterr));
1264
-
1265
- srcinfo.err = jpeg_std_error(&jsrcerr.pub);
1266
- jsrcerr.pub.error_exit = ip_jpeg_invalid_error_exit;
1267
- jsrcerr.ctx = ctx;
1268
-
1269
- dstinfo.err = jpeg_std_error(&jdsterr.pub);
1270
- jdsterr.pub.error_exit = ip_jpeg_encode_error_exit;
1271
- jdsterr.ctx = ctx;
1272
-
1273
- unsigned char *jpeg_buf = NULL;
1274
- unsigned long jpeg_size = 0;
1275
- int src_created = 0;
1276
- int dst_created = 0;
1277
-
1278
- ctx->jmp_armed = 1;
1279
- if (setjmp(ctx->jmpbuf)) {
1280
- ctx->jmp_armed = 0;
1281
- if (dst_created)
1282
- jpeg_destroy_compress(&dstinfo);
1283
- if (src_created)
1284
- jpeg_destroy_decompress(&srcinfo);
1285
- free(jpeg_buf);
1286
-
1287
- if (ctx->status == IP_OK)
1288
- return 0;
1289
- return 0;
1290
- }
1291
-
1292
- jpeg_create_decompress(&srcinfo);
1293
- src_created = 1;
1294
- jpeg_mem_src(&srcinfo, ctx->input_data, (unsigned long)ctx->input_size);
1295
-
1296
- if (jpeg_read_header(&srcinfo, TRUE) != JPEG_HEADER_OK) {
1297
- jpeg_destroy_decompress(&srcinfo);
1298
- ctx->jmp_armed = 0;
1299
- return 0;
1300
- }
1301
-
1302
- if ((int)srcinfo.image_width > ctx->max_width || (int)srcinfo.image_height > ctx->max_height ||
1303
- (uint64_t)srcinfo.image_width * (uint64_t)srcinfo.image_height > ctx->max_pixels) {
1304
- jpeg_destroy_decompress(&srcinfo);
1305
- ctx->jmp_armed = 0;
1306
- ip_context_set_error(ctx, IP_ERR_LIMIT, "image exceeds configured limits");
1307
- return 0;
1308
- }
1309
-
1310
- jvirt_barray_ptr *src_coef_arrays = jpeg_read_coefficients(&srcinfo);
1311
- if (!src_coef_arrays) {
1312
- jpeg_destroy_decompress(&srcinfo);
1313
- ctx->jmp_armed = 0;
1314
- return 0;
1315
- }
1316
-
1317
- jpeg_create_compress(&dstinfo);
1318
- dst_created = 1;
1319
- jpeg_mem_dest(&dstinfo, &jpeg_buf, &jpeg_size);
1320
- jpeg_copy_critical_parameters(&srcinfo, &dstinfo);
1321
-
1322
- dstinfo.optimize_coding = FALSE;
1323
- if (ctx->progressive) {
1324
- jpeg_simple_progression(&dstinfo);
1325
- dstinfo.optimize_coding = TRUE;
1326
- }
1327
-
1328
- jpeg_write_coefficients(&dstinfo, src_coef_arrays);
1329
- jpeg_finish_compress(&dstinfo);
1330
- jpeg_destroy_compress(&dstinfo);
1331
- dst_created = 0;
1332
-
1333
- jpeg_finish_decompress(&srcinfo);
1334
- jpeg_destroy_decompress(&srcinfo);
1335
- src_created = 0;
1336
-
1337
- ctx->jmp_armed = 0;
1338
-
1339
- if ((size_t)jpeg_size > ctx->max_output_size) {
1340
- free(jpeg_buf);
1341
- ip_context_set_error(ctx, IP_ERR_LIMIT, "transcoded output exceeds max_output_size");
1342
- return 0;
1343
- }
1344
-
1345
- ctx->output_data = jpeg_buf;
1346
- ctx->output_size = (size_t)jpeg_size;
1347
- ctx->output_capacity = (size_t)jpeg_size;
1348
- ctx->output_owner = IP_OUTPUT_OWNER_MALLOC;
1349
-
1350
- if (ctx->width == 0 || ctx->height == 0) {
1351
- ctx->width = 0;
1352
- ctx->height = 0;
1353
- ctx->channels = 0;
1354
- }
1355
- return 1;
1356
- }
1357
-
1358
- #endif
1359
-
1360
1616
  static int ip_jpeg_turbo_compress(ip_context_t *ctx) {
1361
1617
  if (ctx->ssim_guard_enabled)
1362
1618
  return guarded_compress_jpeg_input_with_mode(ctx, 0);
@@ -1460,6 +1716,7 @@ static void apply_configuration(VALUE self, ip_context_t *ctx) {
1460
1716
  ctx->max_width = config_int_value(config, id_max_width, ctx->max_width);
1461
1717
  ctx->max_height = config_int_value(config, id_max_height, ctx->max_height);
1462
1718
  ctx->max_output_size = config_size_value(config, id_max_output_size, ctx->max_output_size);
1719
+ ctx->max_input_size = config_size_value(config, id_max_input_size, ctx->max_input_size);
1463
1720
  }
1464
1721
 
1465
1722
  static void validate_limits_for_pixels(ip_context_t *ctx) {
@@ -1598,6 +1855,7 @@ IMAGE_PACK_INIT_EXPORT void Init_image_pack(void) {
1598
1855
  id_max_width = rb_intern("max_width");
1599
1856
  id_max_height = rb_intern("max_height");
1600
1857
  id_max_output_size = rb_intern("max_output_size");
1858
+ id_max_input_size = rb_intern("max_input_size");
1601
1859
 
1602
1860
  rb_mImagePack = rb_define_module("ImagePack");
1603
1861
  rb_eImagePackError = rb_const_get(rb_mImagePack, rb_intern("Error"));
@@ -8,7 +8,8 @@ module ImagePack
8
8
  :max_pixels,
9
9
  :max_width,
10
10
  :max_height,
11
- :max_output_size
11
+ :max_output_size,
12
+ :max_input_size
12
13
 
13
14
  def initialize
14
15
  @execution = :auto
@@ -18,6 +19,7 @@ module ImagePack
18
19
  @max_width = 30_000
19
20
  @max_height = 30_000
20
21
  @max_output_size = 256 * 1024 * 1024
22
+ @max_input_size = 256 * 1024 * 1024
21
23
  end
22
24
  end
23
25
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ImagePack
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/image_pack.rb CHANGED
@@ -13,7 +13,6 @@ require "pathname"
13
13
  require_relative "image_pack/version"
14
14
  require_relative "image_pack/errors"
15
15
  require_relative "image_pack/configuration"
16
- require_relative "image_pack/backend"
17
16
 
18
17
  begin
19
18
  require "image_pack/image_pack"
@@ -29,9 +28,11 @@ rescue LoadError
29
28
  end
30
29
 
31
30
  module ImagePack
32
- ALGOS = %i[jpeg_turbo mozjpeg].freeze
31
+ ALGOS = %i[jpeg_turbo mozjpeg fast size].freeze
32
+ ALGO_TO_NATIVE = { jpeg_turbo: :jpeg_turbo, mozjpeg: :mozjpeg, fast: :jpeg_turbo, size: :mozjpeg }.freeze
33
33
  EXECUTION_MODES = %i[direct nogvl offload auto].freeze
34
34
  DEFAULT_QUALITY = 82
35
+ DEFAULT_ALGO = :mozjpeg
35
36
 
36
37
  class << self
37
38
  def configuration
@@ -47,7 +48,7 @@ module ImagePack
47
48
 
48
49
  def compress(input,
49
50
  output: nil,
50
- algo: :jpeg_turbo,
51
+ algo: DEFAULT_ALGO,
51
52
  quality: nil,
52
53
  min_ssim: nil,
53
54
  mozjpeg_trellis: true,
@@ -72,7 +73,7 @@ module ImagePack
72
73
 
73
74
  __compress_jpeg(input, normalized_input_kind,
74
75
  output, normalized_output_kind,
75
- algo, effective_quality.to_i,
76
+ ALGO_TO_NATIVE.fetch(algo), effective_quality.to_i,
76
77
  min_ssim ? min_ssim.to_f : 0.0,
77
78
  mozjpeg_trellis ? 1 : 0,
78
79
  progressive ? 1 : 0,
@@ -87,25 +88,59 @@ module ImagePack
87
88
  height:,
88
89
  channels:,
89
90
  output: nil,
90
- algo: :jpeg_turbo,
91
+ algo: DEFAULT_ALGO,
91
92
  quality: DEFAULT_QUALITY,
93
+ min_ssim: nil,
92
94
  progressive: false,
95
+ drop_alpha: nil,
93
96
  execution: nil,
94
97
  cancellable: false)
95
98
  validate_algo!(algo)
99
+ validate_min_ssim!(min_ssim)
96
100
  validate_quality!(quality)
97
101
  validate_dimensions!(width, height, channels)
98
102
  execution ||= configuration.execution
99
103
  validate_execution!(execution)
100
104
  validate_cancellable!(algo, execution, cancellable)
101
105
 
106
+ if channels.to_i == 4
107
+ case drop_alpha
108
+ when nil
109
+ warn "ImagePack.compress_pixels: RGBA input has its alpha channel " \
110
+ "discarded (JPEG cannot store alpha). Pass drop_alpha: true to " \
111
+ "silence this warning, or drop_alpha: false to raise instead."
112
+ when false
113
+ raise UnsupportedError,
114
+ "JPEG cannot store an alpha channel. Pass drop_alpha: true to drop it explicitly."
115
+ end
116
+ end
117
+
102
118
  normalized_output_kind = output_kind!(output)
103
119
  has_scheduler = fiber_scheduler_active?
104
120
 
121
+ if min_ssim
122
+ seed_jpeg = __compress_pixels(buffer,
123
+ width.to_i, height.to_i, channels.to_i,
124
+ nil, :return_string,
125
+ ALGO_TO_NATIVE.fetch(algo), 95,
126
+ progressive ? 1 : 0,
127
+ :direct,
128
+ 0,
129
+ 0)
130
+ return compress(seed_jpeg,
131
+ output: output,
132
+ algo: algo,
133
+ quality: quality,
134
+ min_ssim: min_ssim,
135
+ progressive: progressive,
136
+ execution: execution,
137
+ cancellable: cancellable)
138
+ end
139
+
105
140
  __compress_pixels(buffer,
106
141
  width.to_i, height.to_i, channels.to_i,
107
142
  output, normalized_output_kind,
108
- algo, quality.to_i,
143
+ ALGO_TO_NATIVE.fetch(algo), quality.to_i,
109
144
  progressive ? 1 : 0,
110
145
  execution,
111
146
  cancellable ? 1 : 0,
@@ -123,10 +158,11 @@ module ImagePack
123
158
  when String
124
159
  if input.encoding == Encoding::BINARY || input.encoding == Encoding::ASCII_8BIT
125
160
  :bytes
126
- else
127
- raise InvalidArgumentError, "input path does not exist: #{input.inspect}" unless File.file?(input)
128
-
161
+ elsif input.bytesize < 4096 && !input.include?("\0") && File.file?(input)
129
162
  :path
163
+ else
164
+ input.force_encoding(Encoding::ASCII_8BIT) unless input.frozen?
165
+ :bytes
130
166
  end
131
167
  when Pathname
132
168
  raise InvalidArgumentError, "input path does not exist: #{input}" unless input.file?
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: image_pack
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roman Haydarov