oj_windows 3.16.15 → 3.17.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -360,11 +360,59 @@ static inline const char *scan_string_SSE2(const char *str, const char *end) {
360
360
  static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
361
361
 
362
362
  void oj_scanner_init(void) {
363
- #ifdef HAVE_SIMD_SSE4_2
363
+ #if defined(HAVE_SIMD_SSE2) || defined(HAVE_SIMD_SSE4_2)
364
+ bool have_sse42 = false;
365
+ #if defined(OJ_SIMD_SSE4_2_RUNTIME)
366
+ // MSVC x64: SSE2 is always available; SSE4.2 must be detected at runtime
367
+ // (CPUID leaf 1, ECX bit 20) since not every CPU supports it.
368
+ {
369
+ int regs[4];
370
+ __cpuid(regs, 1);
371
+ have_sse42 = (0 != (regs[2] & (1 << 20)));
372
+ }
373
+ #elif defined(HAVE_SIMD_SSE4_2)
374
+ have_sse42 = true; // GCC/Clang build compiled with -msse4.2
375
+ #endif
376
+
377
+ // Optional diagnostic override: OJ_SCAN=scalar|sse2|sse42. PCMPESTRI (SSE4.2)
378
+ // is slow on some microarchitectures, so this allows measuring/forcing a
379
+ // specific scanner without rebuilding.
380
+ {
381
+ const char *force = getenv("OJ_SCAN");
382
+ if (NULL != force) {
383
+ if (0 == strcmp(force, "scalar")) {
384
+ scan_func = scan_string_noSIMD;
385
+ return;
386
+ }
387
+ #if defined(HAVE_SIMD_SSE2)
388
+ if (0 == strcmp(force, "sse2")) {
389
+ scan_func = scan_string_SSE2;
390
+ return;
391
+ }
392
+ #endif
393
+ #if defined(HAVE_SIMD_SSE4_2)
394
+ if (0 == strcmp(force, "sse42") && have_sse42) {
395
+ scan_func = scan_string_SSE42;
396
+ return;
397
+ }
398
+ #endif
399
+ }
400
+ }
401
+
402
+ #if defined(OJ_SIMD_SSE4_2_RUNTIME)
403
+ // Both scanners are compiled. Benchmarks (string-heavy payloads) show the
404
+ // SSE2 scanner (PCMPEQB + PMOVMSKB) is as fast or faster than the SSE4.2
405
+ // scanner (PCMPESTRI, which is high-latency on many microarchitectures), and
406
+ // SSE2 is guaranteed on all x86_64 CPUs. Default to SSE2; use OJ_SCAN=sse42
407
+ // to opt into PCMPESTRI where it happens to win.
408
+ scan_func = scan_string_SSE2;
409
+ (void)have_sse42;
410
+ #elif defined(HAVE_SIMD_SSE4_2)
364
411
  scan_func = scan_string_SSE42;
365
412
  #elif defined(HAVE_SIMD_SSE2)
366
413
  scan_func = scan_string_SSE2;
367
414
  #endif
415
+ #endif // HAVE_SIMD_SSE2 || HAVE_SIMD_SSE4_2
368
416
  // Note: ARM NEON string scanning would be added here if needed
369
417
  }
370
418
 
@@ -1176,6 +1176,8 @@ extern void oj_set_parser_validator(ojParser p);
1176
1176
  extern void oj_set_parser_saj(ojParser p);
1177
1177
  extern void oj_set_parser_usual(ojParser p);
1178
1178
  extern void oj_set_parser_debug(ojParser p);
1179
+ extern void oj_set_parser_safe(ojParser p, VALUE options); // safe parser (upstream Oj 3.17.0)
1180
+ extern void oj_safe_init(VALUE parser_class);
1179
1181
 
1180
1182
  static int opt_cb(VALUE rkey, VALUE value, VALUE ptr) {
1181
1183
  ojParser p = (ojParser)ptr;
@@ -1354,6 +1356,37 @@ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
1354
1356
  return p->option(p, key, rv);
1355
1357
  }
1356
1358
 
1359
+ // Ported from upstream Oj (3.16.16/3.16.17): when the input ends, make sure we
1360
+ // were not left mid-literal or inside an unclosed array/object. Without this an
1361
+ // incomplete document such as "tru", "[1,2" or "{\"a\":1" was accepted silently.
1362
+ static void validate_primitives_are_complete(ojParser p) {
1363
+ if (0 >= p->ri) {
1364
+ return;
1365
+ }
1366
+
1367
+ switch (p->map[256]) {
1368
+ case 'N': parse_error(p, "expected null"); break;
1369
+ case 'F': parse_error(p, "expected false"); break;
1370
+ case 'T': parse_error(p, "expected true"); break;
1371
+ }
1372
+ }
1373
+
1374
+ static void validate_non_primitives_are_complete(ojParser p) {
1375
+ if (0 >= p->depth) {
1376
+ return;
1377
+ }
1378
+ if (OBJECT_FUN == p->stack[p->depth]) {
1379
+ parse_error(p, "Object is not closed");
1380
+ } else {
1381
+ parse_error(p, "Array is not closed");
1382
+ }
1383
+ }
1384
+
1385
+ static void validate_document_end(ojParser p) {
1386
+ validate_primitives_are_complete(p);
1387
+ validate_non_primitives_are_complete(p);
1388
+ }
1389
+
1357
1390
  /* Document-method: parse(json)
1358
1391
  * call-seq: parse(json)
1359
1392
  *
@@ -1370,6 +1403,7 @@ static VALUE parser_parse(VALUE self, VALUE json) {
1370
1403
  parser_reset(p);
1371
1404
  p->start(p);
1372
1405
  parse(p, ptr);
1406
+ validate_document_end(p);
1373
1407
 
1374
1408
  return p->result(p);
1375
1409
  }
@@ -1452,22 +1486,26 @@ static VALUE parser_file(VALUE self, VALUE filename) {
1452
1486
  return p->result(p);
1453
1487
  }
1454
1488
  #endif
1455
- byte buf[16385];
1456
- size_t size = sizeof(buf) - 1;
1457
- size_t rsize;
1489
+ byte buf[16385];
1490
+ int size = (int)(sizeof(buf) - 1);
1491
+ int rsize;
1458
1492
 
1493
+ // read() returns a signed count: -1 on error, 0 at EOF. Capturing it in a
1494
+ // signed int (not size_t) avoids -1 wrapping to SIZE_MAX, which would make
1495
+ // `buf[rsize] = '\0'` an out-of-bounds write and the error check unreachable.
1459
1496
  while (true) {
1460
- if (0 < (rsize = read(fd, buf, size))) {
1461
- buf[rsize] = '\0';
1462
- parse(p, buf);
1497
+ rsize = read(fd, buf, (unsigned int)size);
1498
+ if (rsize < 0) {
1499
+ close(fd);
1500
+ rb_raise(rb_eIOError, "error reading from %s", path);
1463
1501
  }
1464
- if (rsize <= 0) {
1465
- if (0 != rsize) {
1466
- rb_raise(rb_eIOError, "error reading from %s", path);
1467
- }
1502
+ if (0 == rsize) { // EOF
1468
1503
  break;
1469
1504
  }
1505
+ buf[rsize] = '\0';
1506
+ parse(p, buf);
1470
1507
  }
1508
+ close(fd);
1471
1509
  return p->result(p);
1472
1510
  }
1473
1511
 
@@ -1570,6 +1608,34 @@ static VALUE parser_validate(VALUE self) {
1570
1608
  return validate_parser;
1571
1609
  }
1572
1610
 
1611
+ /* Document-method: safe(options = {})
1612
+ * call-seq: safe(options = {})
1613
+ *
1614
+ * Returns a parser that builds Ruby objects like the :usual parser but enforces
1615
+ * configurable limits on untrusted input: :max_hash_size, :max_array_size,
1616
+ * :max_depth, and :max_total_elements. Ported from upstream Oj 3.17.0.
1617
+ */
1618
+ static VALUE parser_safe(int argc, VALUE *argv, VALUE self) {
1619
+ VALUE options;
1620
+
1621
+ if (1 == argc) {
1622
+ options = argv[0];
1623
+ Check_Type(options, T_HASH);
1624
+ } else {
1625
+ options = rb_hash_new();
1626
+ }
1627
+
1628
+ ojParser p = OJ_R_ALLOC(struct _ojParser);
1629
+
1630
+ memset(p, 0, sizeof(struct _ojParser));
1631
+ buf_init(&p->key);
1632
+ buf_init(&p->buf);
1633
+ p->map = value_map;
1634
+ oj_set_parser_safe(p, options);
1635
+
1636
+ return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
1637
+ }
1638
+
1573
1639
  /* Document-class: Oj::Parser
1574
1640
  *
1575
1641
  * A reusable parser that makes use of named delegates to determine the
@@ -1597,4 +1663,7 @@ void oj_parser_init(void) {
1597
1663
  rb_define_module_function(parser_class, "usual", parser_usual, 0);
1598
1664
  rb_define_module_function(parser_class, "saj", parser_saj, 0);
1599
1665
  rb_define_module_function(parser_class, "validate", parser_validate, 0);
1666
+ rb_define_module_function(parser_class, "safe", parser_safe, -1);
1667
+
1668
+ oj_safe_init(parser_class);
1600
1669
  }
@@ -5,7 +5,9 @@
5
5
  #include <stdio.h>
6
6
  #include <stdlib.h>
7
7
  #include <string.h>
8
+ #if !IS_WINDOWS
8
9
  #include <sys/types.h>
10
+ #endif
9
11
  #ifdef NEEDS_UIO
10
12
  #if NEEDS_UIO
11
13
  #include <sys/uio.h>
@@ -5,8 +5,8 @@
5
5
  #include <stdio.h>
6
6
  #include <stdlib.h>
7
7
  #include <string.h>
8
- #include <sys/types.h>
9
8
  #if !IS_WINDOWS
9
+ #include <sys/types.h>
10
10
  #include <regex.h>
11
11
  #endif
12
12
 
@@ -0,0 +1,230 @@
1
+
2
+ #include "safe.h"
3
+
4
+ static VALUE max_hash_size_sym, max_array_size_sym, max_depth_sym, max_total_elements_sym, max_hash_size_error_class,
5
+ max_array_size_error_class, max_depth_error_class, max_total_elements_error_class;
6
+
7
+ static void check_object_size(safe_T safe) {
8
+ if (NIL_P(safe->max_hash_size)) {
9
+ return;
10
+ }
11
+
12
+ struct _usual usual = safe->usual;
13
+ Col current_object_location = usual.ctail - 1;
14
+
15
+ long int number_of_items_in_stack = usual.vtail - usual.vhead;
16
+ long int number_of_items_in_hash = (number_of_items_in_stack - current_object_location->vi - 1) / 2;
17
+
18
+ if (safe->max_hash_size > number_of_items_in_hash) {
19
+ return;
20
+ }
21
+
22
+ rb_raise(max_hash_size_error_class, "Too many object items!");
23
+ }
24
+
25
+ static void check_array_size(safe_T safe) {
26
+ if (NIL_P(safe->max_array_size)) {
27
+ return;
28
+ }
29
+
30
+ struct _usual usual = safe->usual;
31
+ Col current_object_location = usual.ctail - 1;
32
+
33
+ long int number_of_items_in_stack = usual.vtail - usual.vhead;
34
+ long int number_of_items_in_array = number_of_items_in_stack - current_object_location->vi - 1;
35
+
36
+ if (safe->max_array_size > number_of_items_in_array) {
37
+ return;
38
+ }
39
+
40
+ rb_raise(max_array_size_error_class, "Too many array items!");
41
+ }
42
+
43
+ static void check_max_depth(safe_T safe, ojParser p) {
44
+ if (NIL_P(safe->max_depth) || safe->max_depth >= (p->depth + 1)) {
45
+ return;
46
+ }
47
+
48
+ rb_raise(max_depth_error_class, "JSON is too deep!");
49
+ }
50
+
51
+ static void check_max_total_elements(safe_T safe) {
52
+ /*
53
+ * We check if `max_total_elements` is greater than `current_elements_count`
54
+ * (instead of greater than or equal) because top-level elements (e.g., [],
55
+ * null, true) are not counted. As a result, `current_elements_count`
56
+ * always holds one less than the actual total.
57
+ */
58
+ if (NIL_P(safe->max_total_elements) || safe->max_total_elements > safe->current_elements_count) {
59
+ return;
60
+ }
61
+
62
+ rb_raise(max_total_elements_error_class, "Too many elements!");
63
+ }
64
+
65
+ static void safe_start(ojParser p) {
66
+ safe_T safe = (safe_T)p->ctx;
67
+
68
+ safe->current_hash_size = 0;
69
+ safe->current_array_size = 0;
70
+ safe->current_elements_count = 0;
71
+
72
+ safe->delegated_start_func(p);
73
+ }
74
+
75
+ static void safe_open_object(ojParser p) {
76
+ safe_T safe = (safe_T)p->ctx;
77
+
78
+ safe->current_hash_size++;
79
+ safe->current_elements_count++;
80
+
81
+ check_array_size(safe);
82
+ check_max_depth(safe, p);
83
+ check_max_total_elements(safe);
84
+
85
+ safe->delegated_open_object_func(p);
86
+ }
87
+
88
+ static void safe_open_array(ojParser p) {
89
+ safe_T safe = (safe_T)p->ctx;
90
+
91
+ safe->current_array_size++;
92
+ safe->current_elements_count++;
93
+
94
+ check_array_size(safe);
95
+ check_max_depth(safe, p);
96
+ check_max_total_elements(safe);
97
+
98
+ safe->delegated_open_array_func(p);
99
+ }
100
+
101
+ DEFINE_DELEGATED_FUNCTION(add_null);
102
+ DEFINE_DELEGATED_FUNCTION(add_true);
103
+ DEFINE_DELEGATED_FUNCTION(add_false);
104
+ DEFINE_DELEGATED_FUNCTION(add_int);
105
+ DEFINE_DELEGATED_FUNCTION(add_float);
106
+ DEFINE_DELEGATED_FUNCTION(add_big);
107
+ DEFINE_DELEGATED_FUNCTION(add_str);
108
+
109
+ static void safe_open_object_key(ojParser p) {
110
+ safe_T safe = (safe_T)p->ctx;
111
+
112
+ safe->current_hash_size++;
113
+ safe->current_elements_count += 2;
114
+
115
+ check_object_size(safe);
116
+ check_max_depth(safe, p);
117
+ check_max_total_elements(safe);
118
+
119
+ safe->delegated_open_object_key_func(p);
120
+ }
121
+
122
+ static void safe_open_array_key(ojParser p) {
123
+ safe_T safe = (safe_T)p->ctx;
124
+
125
+ safe->current_array_size++;
126
+ safe->current_elements_count += 2;
127
+
128
+ check_object_size(safe);
129
+ check_max_depth(safe, p);
130
+ check_max_total_elements(safe);
131
+
132
+ safe->delegated_open_array_key_func(p);
133
+ }
134
+
135
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_null);
136
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_true);
137
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_false);
138
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_int);
139
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_float);
140
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_big);
141
+ DEFINE_DELEGATED_OBJECT_FUNCTION(add_str);
142
+
143
+ void oj_init_safe_parser(ojParser p, safe_T safe, VALUE options) {
144
+ // Safe parser inherits all members of usual parser
145
+ oj_init_usual(p, &safe->usual);
146
+
147
+ safe->delegated_start_func = p->start;
148
+ p->start = safe_start;
149
+
150
+ Funcs f;
151
+
152
+ // Array parser functions
153
+ f = &p->funcs[ARRAY_FUN];
154
+ safe->delegated_open_object_func = f->open_object;
155
+ f->open_object = safe_open_object;
156
+ safe->delegated_open_array_func = f->open_array;
157
+ f->open_array = safe_open_array;
158
+ // The following overrides are done for counting objects
159
+ safe->delegated_add_null_func = f->add_null;
160
+ f->add_null = safe_add_null;
161
+ safe->delegated_add_true_func = f->add_true;
162
+ f->add_true = safe_add_true;
163
+ safe->delegated_add_false_func = f->add_false;
164
+ f->add_false = safe_add_false;
165
+ safe->delegated_add_int_func = f->add_int;
166
+ f->add_int = safe_add_int;
167
+ safe->delegated_add_float_func = f->add_float;
168
+ f->add_float = safe_add_float;
169
+ safe->delegated_add_big_func = f->add_big;
170
+ f->add_big = safe_add_big;
171
+ safe->delegated_add_str_func = f->add_str;
172
+ f->add_str = safe_add_str;
173
+
174
+ // Object parser functions
175
+ f = &p->funcs[OBJECT_FUN];
176
+ safe->delegated_open_object_key_func = f->open_object;
177
+ f->open_object = safe_open_object_key;
178
+ safe->delegated_open_array_key_func = f->open_array;
179
+ f->open_array = safe_open_array_key;
180
+ // The following overrides are done for counting objects
181
+ safe->delegated_add_null_key_func = f->add_null;
182
+ f->add_null = safe_add_null_key;
183
+ safe->delegated_add_true_key_func = f->add_true;
184
+ f->add_true = safe_add_true_key;
185
+ safe->delegated_add_false_key_func = f->add_false;
186
+ f->add_false = safe_add_false_key;
187
+ safe->delegated_add_int_key_func = f->add_int;
188
+ f->add_int = safe_add_int_key;
189
+ safe->delegated_add_float_key_func = f->add_float;
190
+ f->add_float = safe_add_float_key;
191
+ safe->delegated_add_big_key_func = f->add_big;
192
+ f->add_big = safe_add_big_key;
193
+ safe->delegated_add_str_key_func = f->add_str;
194
+ f->add_str = safe_add_str_key;
195
+
196
+ SET_CONFIG(max_hash_size);
197
+ SET_CONFIG(max_array_size);
198
+ SET_CONFIG(max_depth);
199
+ SET_CONFIG(max_total_elements);
200
+ }
201
+
202
+ void oj_set_parser_safe(ojParser p, VALUE options) {
203
+ safe_T s = OJ_R_ALLOC(struct _safe_S);
204
+
205
+ oj_init_safe_parser(p, s, options);
206
+ }
207
+
208
+ void oj_safe_init(VALUE parser_class) {
209
+ VALUE validation_error_class = rb_define_class_under(parser_class, "ValidationError", rb_eRuntimeError);
210
+
211
+ max_hash_size_error_class = rb_define_class_under(parser_class, "HashSizeError", validation_error_class);
212
+ max_array_size_error_class = rb_define_class_under(parser_class, "ArraySizeError", validation_error_class);
213
+ max_depth_error_class = rb_define_class_under(parser_class, "DepthError", validation_error_class);
214
+ max_total_elements_error_class = rb_define_class_under(parser_class, "TotalElementsError", validation_error_class);
215
+
216
+ rb_gc_register_address(&max_hash_size_error_class);
217
+ rb_gc_register_address(&max_array_size_error_class);
218
+ rb_gc_register_address(&max_depth_error_class);
219
+ rb_gc_register_address(&max_total_elements_error_class);
220
+
221
+ max_hash_size_sym = ID2SYM(rb_intern("max_hash_size"));
222
+ max_array_size_sym = ID2SYM(rb_intern("max_array_size"));
223
+ max_depth_sym = ID2SYM(rb_intern("max_depth"));
224
+ max_total_elements_sym = ID2SYM(rb_intern("max_total_elements"));
225
+
226
+ rb_gc_register_address(&max_hash_size_sym);
227
+ rb_gc_register_address(&max_array_size_sym);
228
+ rb_gc_register_address(&max_depth_sym);
229
+ rb_gc_register_address(&max_total_elements_sym);
230
+ }
@@ -0,0 +1,79 @@
1
+ #include <ruby.h>
2
+
3
+ #include "parser.h"
4
+ #include "usual.h"
5
+
6
+ #define SET_CONFIG(config_name) \
7
+ do { \
8
+ VALUE rb_##config_name = rb_hash_aref(options, config_name##_sym); \
9
+ \
10
+ if (RB_INTEGER_TYPE_P(rb_##config_name)) { \
11
+ safe->config_name = NUM2LONG(rb_##config_name); \
12
+ } else if (!NIL_P(rb_##config_name)) { \
13
+ rb_raise(rb_eArgError, "Incorrect value provided for `" #config_name "`"); \
14
+ } else { \
15
+ safe->config_name = Qnil; \
16
+ } \
17
+ } while (0);
18
+
19
+ #define DEFINE_DELEGATED_FUNCTION(function_name) \
20
+ static void safe_##function_name(ojParser p) { \
21
+ safe_T safe = (safe_T)p->ctx; \
22
+ \
23
+ safe->current_elements_count++; \
24
+ \
25
+ check_array_size(safe); \
26
+ check_max_total_elements(safe); \
27
+ \
28
+ safe->delegated_##function_name##_func(p); \
29
+ }
30
+
31
+ #define DEFINE_DELEGATED_OBJECT_FUNCTION(function_name) \
32
+ static void safe_##function_name##_key(ojParser p) { \
33
+ safe_T safe = (safe_T)p->ctx; \
34
+ \
35
+ safe->current_elements_count += 2; \
36
+ \
37
+ check_object_size(safe); \
38
+ check_max_total_elements(safe); \
39
+ \
40
+ safe->delegated_##function_name##_key_func(p); \
41
+ }
42
+
43
+ typedef struct _safe_S {
44
+ struct _usual usual;
45
+
46
+ long int max_hash_size;
47
+ long int max_array_size;
48
+ long int max_depth;
49
+ long int max_total_elements;
50
+ long int max_json_size_bytes;
51
+
52
+ long int current_hash_size;
53
+ long int current_array_size;
54
+ long int current_elements_count;
55
+
56
+ void (*delegated_start_func)(struct _ojParser *p);
57
+
58
+ // Array functions
59
+ void (*delegated_open_object_func)(struct _ojParser *p);
60
+ void (*delegated_open_array_func)(struct _ojParser *p);
61
+ void (*delegated_add_null_func)(struct _ojParser *p);
62
+ void (*delegated_add_true_func)(struct _ojParser *p);
63
+ void (*delegated_add_false_func)(struct _ojParser *p);
64
+ void (*delegated_add_int_func)(struct _ojParser *p);
65
+ void (*delegated_add_float_func)(struct _ojParser *p);
66
+ void (*delegated_add_big_func)(struct _ojParser *p);
67
+ void (*delegated_add_str_func)(struct _ojParser *p);
68
+
69
+ // Object functions
70
+ void (*delegated_open_object_key_func)(struct _ojParser *p);
71
+ void (*delegated_open_array_key_func)(struct _ojParser *p);
72
+ void (*delegated_add_null_key_func)(struct _ojParser *p);
73
+ void (*delegated_add_true_key_func)(struct _ojParser *p);
74
+ void (*delegated_add_false_key_func)(struct _ojParser *p);
75
+ void (*delegated_add_int_key_func)(struct _ojParser *p);
76
+ void (*delegated_add_float_key_func)(struct _ojParser *p);
77
+ void (*delegated_add_big_key_func)(struct _ojParser *p);
78
+ void (*delegated_add_str_key_func)(struct _ojParser *p);
79
+ } *safe_T;
@@ -4,10 +4,31 @@
4
4
  // SIMD architecture detection and configuration
5
5
  // This header provides unified SIMD support across different CPU architectures
6
6
 
7
+ // Escape hatch: build with -DOJ_DISABLE_SIMD to force the scalar scanner on any
8
+ // platform (useful for debugging or comparison benchmarks).
9
+ #ifndef OJ_DISABLE_SIMD
10
+
7
11
  // x86/x86_64 SIMD detection
8
12
  #if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
9
13
  #define HAVE_SIMD_X86 1
10
14
 
15
+ #if defined(_MSC_VER)
16
+ // MSVC (cl.exe) does not define the GCC/Clang feature macros __SSE2__/__SSE4_2__,
17
+ // and it requires no -msse flag to emit SSE intrinsics. On x64, SSE2 is guaranteed
18
+ // by the architecture, so we enable the SSE2 scanner unconditionally. SSE4.2 is not
19
+ // guaranteed by every CPU, so we also compile the SSE4.2 scanner but select it only
20
+ // at runtime via CPUID (see oj_scanner_init() in parse.c).
21
+ #if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
22
+ #define HAVE_SIMD_SSE2 1
23
+ #include <emmintrin.h>
24
+ #define HAVE_SIMD_SSE4_2 1
25
+ #define OJ_SIMD_SSE4_2_RUNTIME 1
26
+ #include <nmmintrin.h>
27
+ #include <intrin.h> // __cpuid, _BitScanForward
28
+ #endif
29
+
30
+ #else // GCC / Clang: gate on the -msseN feature macros set by extconf.rb.
31
+
11
32
  // SSE4.2 support (Intel Core i7+, AMD Bulldozer+)
12
33
  // Enabled automatically when compiler has -msse4.2 flag
13
34
  #if defined(__SSE4_2__)
@@ -21,6 +42,8 @@
21
42
  #include <emmintrin.h>
22
43
  #endif
23
44
 
45
+ #endif // _MSC_VER
46
+
24
47
  #endif // x86/x86_64
25
48
 
26
49
  // ARM NEON detection
@@ -30,6 +53,8 @@
30
53
  #include <arm_neon.h>
31
54
  #endif
32
55
 
56
+ #endif // OJ_DISABLE_SIMD
57
+
33
58
  // Define which SIMD implementation to use (priority order: SSE4.2 > NEON > SSE2)
34
59
  #if defined(HAVE_SIMD_SSE4_2)
35
60
  #define HAVE_SIMD_STRING_SCAN 1
@@ -44,4 +69,25 @@
44
69
  #define SIMD_TYPE "none"
45
70
  #endif
46
71
 
72
+ // Portability shims: the SSE scanners in parse.c are written with GCC/Clang
73
+ // builtins. Provide MSVC-native equivalents so the same code compiles under
74
+ // cl.exe. Only defined when an x86 SSE path is actually enabled, and guarded by
75
+ // #ifndef so a real GCC/Clang build is never affected.
76
+ #if defined(_MSC_VER) && (defined(HAVE_SIMD_SSE2) || defined(HAVE_SIMD_SSE4_2))
77
+ #ifndef __builtin_prefetch
78
+ #define __builtin_prefetch(addr, ...) _mm_prefetch((const char *)(addr), _MM_HINT_T0)
79
+ #endif
80
+ #ifndef __builtin_expect
81
+ #define __builtin_expect(expr, expected) (expr)
82
+ #endif
83
+ static __forceinline int oj_msvc_ctz(unsigned int x) {
84
+ unsigned long index;
85
+ _BitScanForward(&index, x); // callers guarantee x != 0
86
+ return (int)index;
87
+ }
88
+ #ifndef __builtin_ctz
89
+ #define __builtin_ctz(x) oj_msvc_ctz((unsigned int)(x))
90
+ #endif
91
+ #endif
92
+
47
93
  #endif /* OJ_SIMD_H */
@@ -405,6 +405,7 @@ static void read_num(ParseInfo pi) {
405
405
  char c;
406
406
 
407
407
  reader_protect(&pi->rd);
408
+ ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
408
409
  ni.i = 0;
409
410
  ni.num = 0;
410
411
  ni.div = 1;
@@ -556,6 +557,7 @@ static void read_nan(ParseInfo pi) {
556
557
  char c;
557
558
 
558
559
  ni.str = pi->rd.str;
560
+ ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
559
561
  ni.i = 0;
560
562
  ni.num = 0;
561
563
  ni.div = 1;
@@ -752,6 +754,7 @@ void oj_sparse2(ParseInfo pi) {
752
754
  return;
753
755
  }
754
756
  ni.str = pi->rd.str;
757
+ ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
755
758
  ni.i = 0;
756
759
  ni.num = 0;
757
760
  ni.div = 1;
@@ -200,7 +200,10 @@ static void push_key(ojParser p) {
200
200
  d->ktail = d->khead + pos;
201
201
  d->kend = d->khead + cap;
202
202
  }
203
- d->ktail->len = klen;
203
+ if (32000 < klen) { // extreme-size guard (upstream Oj 3.17.2); ktail->len is int16_t
204
+ rb_raise(oj_json_parser_error_class, "Key too long. Keys are limited to 32,000 bytes.");
205
+ }
206
+ d->ktail->len = (int16_t)klen;
204
207
  if (klen < sizeof(d->ktail->buf)) {
205
208
  memcpy(d->ktail->buf, key, klen);
206
209
  d->ktail->buf[klen] = '\0';
@@ -1,4 +1,4 @@
1
- module Oj
2
- # Current version of the module.
3
- VERSION = '3.16.15'
4
- end
1
+ module Oj
2
+ # Current version of the module.
3
+ VERSION = '3.17.2.1'
4
+ end
@@ -10,11 +10,21 @@ To enable Oj trace feature on Windows, use the `--enable-trace-log` option when
10
10
  Then, the trace logs will be displayed when `:trace` option is set to `true`.
11
11
 
12
12
 
13
- ### Enable SIMD instructions
13
+ ### SIMD string scanning
14
+
15
+ SIMD acceleration of string scanning is **enabled by default** on x86_64 (SSE2,
16
+ which every x64 CPU supports) — no flag is required. The parser selects its
17
+ scanner at load time; by default it uses the SSE2 scanner, which benchmarks
18
+ fastest on common CPUs.
19
+
20
+ To force the portable scalar scanner (e.g. for debugging or on a CPU where you
21
+ want to compare), build with `--disable-simd`:
14
22
 
15
23
  ```powershell
16
- gem install oj_windows -- --with-sse42
24
+ gem install oj_windows -- --disable-simd
17
25
  ```
18
26
 
19
- To enable the use of SIMD instructions in Oj, use the `--with-sse42` option when installing the gem.
20
- This will enable the use of the SSE4.2 instructions in the internal parser for improved performance.
27
+ You can also override the scanner at runtime, without rebuilding, via the
28
+ `OJ_SCAN` environment variable: `scalar`, `sse2`, or `sse42`. For example
29
+ `set OJ_SCAN=sse42` opts into the SSE4.2 (PCMPESTRI) scanner, which can be
30
+ faster on some microarchitectures.