oj_windows 3.16.15 → 3.17.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +73 -0
- data/README.md +168 -164
- data/ext/oj_windows/extconf.rb +11 -5
- data/ext/oj_windows/mem.c +31 -17
- data/ext/oj_windows/oj.h +6 -0
- data/ext/oj_windows/parse.c +49 -1
- data/ext/oj_windows/parser.c +79 -10
- data/ext/oj_windows/reader.c +2 -0
- data/ext/oj_windows/rxclass.c +1 -1
- data/ext/oj_windows/safe.c +230 -0
- data/ext/oj_windows/safe.h +79 -0
- data/ext/oj_windows/simd.h +46 -0
- data/ext/oj_windows/sparse.c +3 -0
- data/ext/oj_windows/usual.c +4 -1
- data/lib/oj_windows/version.rb +4 -4
- data/pages/InstallOptions.md +14 -4
- metadata +12 -15
- data/lib/oj/active_support_helper.rb +0 -39
- data/lib/oj/bag.rb +0 -95
- data/lib/oj/easy_hash.rb +0 -52
- data/lib/oj/error.rb +0 -21
- data/lib/oj/json.rb +0 -188
- data/lib/oj/mimic.rb +0 -301
- data/lib/oj/saj.rb +0 -80
- data/lib/oj/schandler.rb +0 -143
- data/lib/oj/state.rb +0 -135
- data/lib/oj/version.rb +0 -4
data/ext/oj_windows/parse.c
CHANGED
|
@@ -360,11 +360,59 @@ static inline const char *scan_string_SSE2(const char *str, const char *end) {
|
|
|
360
360
|
static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
|
|
361
361
|
|
|
362
362
|
void oj_scanner_init(void) {
|
|
363
|
-
#
|
|
363
|
+
#if defined(HAVE_SIMD_SSE2) || defined(HAVE_SIMD_SSE4_2)
|
|
364
|
+
bool have_sse42 = false;
|
|
365
|
+
#if defined(OJ_SIMD_SSE4_2_RUNTIME)
|
|
366
|
+
// MSVC x64: SSE2 is always available; SSE4.2 must be detected at runtime
|
|
367
|
+
// (CPUID leaf 1, ECX bit 20) since not every CPU supports it.
|
|
368
|
+
{
|
|
369
|
+
int regs[4];
|
|
370
|
+
__cpuid(regs, 1);
|
|
371
|
+
have_sse42 = (0 != (regs[2] & (1 << 20)));
|
|
372
|
+
}
|
|
373
|
+
#elif defined(HAVE_SIMD_SSE4_2)
|
|
374
|
+
have_sse42 = true; // GCC/Clang build compiled with -msse4.2
|
|
375
|
+
#endif
|
|
376
|
+
|
|
377
|
+
// Optional diagnostic override: OJ_SCAN=scalar|sse2|sse42. PCMPESTRI (SSE4.2)
|
|
378
|
+
// is slow on some microarchitectures, so this allows measuring/forcing a
|
|
379
|
+
// specific scanner without rebuilding.
|
|
380
|
+
{
|
|
381
|
+
const char *force = getenv("OJ_SCAN");
|
|
382
|
+
if (NULL != force) {
|
|
383
|
+
if (0 == strcmp(force, "scalar")) {
|
|
384
|
+
scan_func = scan_string_noSIMD;
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
#if defined(HAVE_SIMD_SSE2)
|
|
388
|
+
if (0 == strcmp(force, "sse2")) {
|
|
389
|
+
scan_func = scan_string_SSE2;
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
#endif
|
|
393
|
+
#if defined(HAVE_SIMD_SSE4_2)
|
|
394
|
+
if (0 == strcmp(force, "sse42") && have_sse42) {
|
|
395
|
+
scan_func = scan_string_SSE42;
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
#endif
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
#if defined(OJ_SIMD_SSE4_2_RUNTIME)
|
|
403
|
+
// Both scanners are compiled. Benchmarks (string-heavy payloads) show the
|
|
404
|
+
// SSE2 scanner (PCMPEQB + PMOVMSKB) is as fast or faster than the SSE4.2
|
|
405
|
+
// scanner (PCMPESTRI, which is high-latency on many microarchitectures), and
|
|
406
|
+
// SSE2 is guaranteed on all x86_64 CPUs. Default to SSE2; use OJ_SCAN=sse42
|
|
407
|
+
// to opt into PCMPESTRI where it happens to win.
|
|
408
|
+
scan_func = scan_string_SSE2;
|
|
409
|
+
(void)have_sse42;
|
|
410
|
+
#elif defined(HAVE_SIMD_SSE4_2)
|
|
364
411
|
scan_func = scan_string_SSE42;
|
|
365
412
|
#elif defined(HAVE_SIMD_SSE2)
|
|
366
413
|
scan_func = scan_string_SSE2;
|
|
367
414
|
#endif
|
|
415
|
+
#endif // HAVE_SIMD_SSE2 || HAVE_SIMD_SSE4_2
|
|
368
416
|
// Note: ARM NEON string scanning would be added here if needed
|
|
369
417
|
}
|
|
370
418
|
|
data/ext/oj_windows/parser.c
CHANGED
|
@@ -1176,6 +1176,8 @@ extern void oj_set_parser_validator(ojParser p);
|
|
|
1176
1176
|
extern void oj_set_parser_saj(ojParser p);
|
|
1177
1177
|
extern void oj_set_parser_usual(ojParser p);
|
|
1178
1178
|
extern void oj_set_parser_debug(ojParser p);
|
|
1179
|
+
extern void oj_set_parser_safe(ojParser p, VALUE options); // safe parser (upstream Oj 3.17.0)
|
|
1180
|
+
extern void oj_safe_init(VALUE parser_class);
|
|
1179
1181
|
|
|
1180
1182
|
static int opt_cb(VALUE rkey, VALUE value, VALUE ptr) {
|
|
1181
1183
|
ojParser p = (ojParser)ptr;
|
|
@@ -1354,6 +1356,37 @@ static VALUE parser_missing(int argc, VALUE *argv, VALUE self) {
|
|
|
1354
1356
|
return p->option(p, key, rv);
|
|
1355
1357
|
}
|
|
1356
1358
|
|
|
1359
|
+
// Ported from upstream Oj (3.16.16/3.16.17): when the input ends, make sure we
|
|
1360
|
+
// were not left mid-literal or inside an unclosed array/object. Without this an
|
|
1361
|
+
// incomplete document such as "tru", "[1,2" or "{\"a\":1" was accepted silently.
|
|
1362
|
+
static void validate_primitives_are_complete(ojParser p) {
|
|
1363
|
+
if (0 >= p->ri) {
|
|
1364
|
+
return;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
switch (p->map[256]) {
|
|
1368
|
+
case 'N': parse_error(p, "expected null"); break;
|
|
1369
|
+
case 'F': parse_error(p, "expected false"); break;
|
|
1370
|
+
case 'T': parse_error(p, "expected true"); break;
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
static void validate_non_primitives_are_complete(ojParser p) {
|
|
1375
|
+
if (0 >= p->depth) {
|
|
1376
|
+
return;
|
|
1377
|
+
}
|
|
1378
|
+
if (OBJECT_FUN == p->stack[p->depth]) {
|
|
1379
|
+
parse_error(p, "Object is not closed");
|
|
1380
|
+
} else {
|
|
1381
|
+
parse_error(p, "Array is not closed");
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
static void validate_document_end(ojParser p) {
|
|
1386
|
+
validate_primitives_are_complete(p);
|
|
1387
|
+
validate_non_primitives_are_complete(p);
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1357
1390
|
/* Document-method: parse(json)
|
|
1358
1391
|
* call-seq: parse(json)
|
|
1359
1392
|
*
|
|
@@ -1370,6 +1403,7 @@ static VALUE parser_parse(VALUE self, VALUE json) {
|
|
|
1370
1403
|
parser_reset(p);
|
|
1371
1404
|
p->start(p);
|
|
1372
1405
|
parse(p, ptr);
|
|
1406
|
+
validate_document_end(p);
|
|
1373
1407
|
|
|
1374
1408
|
return p->result(p);
|
|
1375
1409
|
}
|
|
@@ -1452,22 +1486,26 @@ static VALUE parser_file(VALUE self, VALUE filename) {
|
|
|
1452
1486
|
return p->result(p);
|
|
1453
1487
|
}
|
|
1454
1488
|
#endif
|
|
1455
|
-
byte
|
|
1456
|
-
|
|
1457
|
-
|
|
1489
|
+
byte buf[16385];
|
|
1490
|
+
int size = (int)(sizeof(buf) - 1);
|
|
1491
|
+
int rsize;
|
|
1458
1492
|
|
|
1493
|
+
// read() returns a signed count: -1 on error, 0 at EOF. Capturing it in a
|
|
1494
|
+
// signed int (not size_t) avoids -1 wrapping to SIZE_MAX, which would make
|
|
1495
|
+
// `buf[rsize] = '\0'` an out-of-bounds write and the error check unreachable.
|
|
1459
1496
|
while (true) {
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1497
|
+
rsize = read(fd, buf, (unsigned int)size);
|
|
1498
|
+
if (rsize < 0) {
|
|
1499
|
+
close(fd);
|
|
1500
|
+
rb_raise(rb_eIOError, "error reading from %s", path);
|
|
1463
1501
|
}
|
|
1464
|
-
if (
|
|
1465
|
-
if (0 != rsize) {
|
|
1466
|
-
rb_raise(rb_eIOError, "error reading from %s", path);
|
|
1467
|
-
}
|
|
1502
|
+
if (0 == rsize) { // EOF
|
|
1468
1503
|
break;
|
|
1469
1504
|
}
|
|
1505
|
+
buf[rsize] = '\0';
|
|
1506
|
+
parse(p, buf);
|
|
1470
1507
|
}
|
|
1508
|
+
close(fd);
|
|
1471
1509
|
return p->result(p);
|
|
1472
1510
|
}
|
|
1473
1511
|
|
|
@@ -1570,6 +1608,34 @@ static VALUE parser_validate(VALUE self) {
|
|
|
1570
1608
|
return validate_parser;
|
|
1571
1609
|
}
|
|
1572
1610
|
|
|
1611
|
+
/* Document-method: safe(options = {})
|
|
1612
|
+
* call-seq: safe(options = {})
|
|
1613
|
+
*
|
|
1614
|
+
* Returns a parser that builds Ruby objects like the :usual parser but enforces
|
|
1615
|
+
* configurable limits on untrusted input: :max_hash_size, :max_array_size,
|
|
1616
|
+
* :max_depth, and :max_total_elements. Ported from upstream Oj 3.17.0.
|
|
1617
|
+
*/
|
|
1618
|
+
static VALUE parser_safe(int argc, VALUE *argv, VALUE self) {
|
|
1619
|
+
VALUE options;
|
|
1620
|
+
|
|
1621
|
+
if (1 == argc) {
|
|
1622
|
+
options = argv[0];
|
|
1623
|
+
Check_Type(options, T_HASH);
|
|
1624
|
+
} else {
|
|
1625
|
+
options = rb_hash_new();
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
ojParser p = OJ_R_ALLOC(struct _ojParser);
|
|
1629
|
+
|
|
1630
|
+
memset(p, 0, sizeof(struct _ojParser));
|
|
1631
|
+
buf_init(&p->key);
|
|
1632
|
+
buf_init(&p->buf);
|
|
1633
|
+
p->map = value_map;
|
|
1634
|
+
oj_set_parser_safe(p, options);
|
|
1635
|
+
|
|
1636
|
+
return TypedData_Wrap_Struct(parser_class, &oj_parser_type, p);
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1573
1639
|
/* Document-class: Oj::Parser
|
|
1574
1640
|
*
|
|
1575
1641
|
* A reusable parser that makes use of named delegates to determine the
|
|
@@ -1597,4 +1663,7 @@ void oj_parser_init(void) {
|
|
|
1597
1663
|
rb_define_module_function(parser_class, "usual", parser_usual, 0);
|
|
1598
1664
|
rb_define_module_function(parser_class, "saj", parser_saj, 0);
|
|
1599
1665
|
rb_define_module_function(parser_class, "validate", parser_validate, 0);
|
|
1666
|
+
rb_define_module_function(parser_class, "safe", parser_safe, -1);
|
|
1667
|
+
|
|
1668
|
+
oj_safe_init(parser_class);
|
|
1600
1669
|
}
|
data/ext/oj_windows/reader.c
CHANGED
data/ext/oj_windows/rxclass.c
CHANGED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
|
|
2
|
+
#include "safe.h"
|
|
3
|
+
|
|
4
|
+
static VALUE max_hash_size_sym, max_array_size_sym, max_depth_sym, max_total_elements_sym, max_hash_size_error_class,
|
|
5
|
+
max_array_size_error_class, max_depth_error_class, max_total_elements_error_class;
|
|
6
|
+
|
|
7
|
+
static void check_object_size(safe_T safe) {
|
|
8
|
+
if (NIL_P(safe->max_hash_size)) {
|
|
9
|
+
return;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
struct _usual usual = safe->usual;
|
|
13
|
+
Col current_object_location = usual.ctail - 1;
|
|
14
|
+
|
|
15
|
+
long int number_of_items_in_stack = usual.vtail - usual.vhead;
|
|
16
|
+
long int number_of_items_in_hash = (number_of_items_in_stack - current_object_location->vi - 1) / 2;
|
|
17
|
+
|
|
18
|
+
if (safe->max_hash_size > number_of_items_in_hash) {
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
rb_raise(max_hash_size_error_class, "Too many object items!");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
static void check_array_size(safe_T safe) {
|
|
26
|
+
if (NIL_P(safe->max_array_size)) {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
struct _usual usual = safe->usual;
|
|
31
|
+
Col current_object_location = usual.ctail - 1;
|
|
32
|
+
|
|
33
|
+
long int number_of_items_in_stack = usual.vtail - usual.vhead;
|
|
34
|
+
long int number_of_items_in_array = number_of_items_in_stack - current_object_location->vi - 1;
|
|
35
|
+
|
|
36
|
+
if (safe->max_array_size > number_of_items_in_array) {
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
rb_raise(max_array_size_error_class, "Too many array items!");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static void check_max_depth(safe_T safe, ojParser p) {
|
|
44
|
+
if (NIL_P(safe->max_depth) || safe->max_depth >= (p->depth + 1)) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
rb_raise(max_depth_error_class, "JSON is too deep!");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static void check_max_total_elements(safe_T safe) {
|
|
52
|
+
/*
|
|
53
|
+
* We check if `max_total_elements` is greater than `current_elements_count`
|
|
54
|
+
* (instead of greater than or equal) because top-level elements (e.g., [],
|
|
55
|
+
* null, true) are not counted. As a result, `current_elements_count`
|
|
56
|
+
* always holds one less than the actual total.
|
|
57
|
+
*/
|
|
58
|
+
if (NIL_P(safe->max_total_elements) || safe->max_total_elements > safe->current_elements_count) {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
rb_raise(max_total_elements_error_class, "Too many elements!");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static void safe_start(ojParser p) {
|
|
66
|
+
safe_T safe = (safe_T)p->ctx;
|
|
67
|
+
|
|
68
|
+
safe->current_hash_size = 0;
|
|
69
|
+
safe->current_array_size = 0;
|
|
70
|
+
safe->current_elements_count = 0;
|
|
71
|
+
|
|
72
|
+
safe->delegated_start_func(p);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
static void safe_open_object(ojParser p) {
|
|
76
|
+
safe_T safe = (safe_T)p->ctx;
|
|
77
|
+
|
|
78
|
+
safe->current_hash_size++;
|
|
79
|
+
safe->current_elements_count++;
|
|
80
|
+
|
|
81
|
+
check_array_size(safe);
|
|
82
|
+
check_max_depth(safe, p);
|
|
83
|
+
check_max_total_elements(safe);
|
|
84
|
+
|
|
85
|
+
safe->delegated_open_object_func(p);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
static void safe_open_array(ojParser p) {
|
|
89
|
+
safe_T safe = (safe_T)p->ctx;
|
|
90
|
+
|
|
91
|
+
safe->current_array_size++;
|
|
92
|
+
safe->current_elements_count++;
|
|
93
|
+
|
|
94
|
+
check_array_size(safe);
|
|
95
|
+
check_max_depth(safe, p);
|
|
96
|
+
check_max_total_elements(safe);
|
|
97
|
+
|
|
98
|
+
safe->delegated_open_array_func(p);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
DEFINE_DELEGATED_FUNCTION(add_null);
|
|
102
|
+
DEFINE_DELEGATED_FUNCTION(add_true);
|
|
103
|
+
DEFINE_DELEGATED_FUNCTION(add_false);
|
|
104
|
+
DEFINE_DELEGATED_FUNCTION(add_int);
|
|
105
|
+
DEFINE_DELEGATED_FUNCTION(add_float);
|
|
106
|
+
DEFINE_DELEGATED_FUNCTION(add_big);
|
|
107
|
+
DEFINE_DELEGATED_FUNCTION(add_str);
|
|
108
|
+
|
|
109
|
+
static void safe_open_object_key(ojParser p) {
|
|
110
|
+
safe_T safe = (safe_T)p->ctx;
|
|
111
|
+
|
|
112
|
+
safe->current_hash_size++;
|
|
113
|
+
safe->current_elements_count += 2;
|
|
114
|
+
|
|
115
|
+
check_object_size(safe);
|
|
116
|
+
check_max_depth(safe, p);
|
|
117
|
+
check_max_total_elements(safe);
|
|
118
|
+
|
|
119
|
+
safe->delegated_open_object_key_func(p);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static void safe_open_array_key(ojParser p) {
|
|
123
|
+
safe_T safe = (safe_T)p->ctx;
|
|
124
|
+
|
|
125
|
+
safe->current_array_size++;
|
|
126
|
+
safe->current_elements_count += 2;
|
|
127
|
+
|
|
128
|
+
check_object_size(safe);
|
|
129
|
+
check_max_depth(safe, p);
|
|
130
|
+
check_max_total_elements(safe);
|
|
131
|
+
|
|
132
|
+
safe->delegated_open_array_key_func(p);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_null);
|
|
136
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_true);
|
|
137
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_false);
|
|
138
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_int);
|
|
139
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_float);
|
|
140
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_big);
|
|
141
|
+
DEFINE_DELEGATED_OBJECT_FUNCTION(add_str);
|
|
142
|
+
|
|
143
|
+
void oj_init_safe_parser(ojParser p, safe_T safe, VALUE options) {
|
|
144
|
+
// Safe parser inherits all members of usual parser
|
|
145
|
+
oj_init_usual(p, &safe->usual);
|
|
146
|
+
|
|
147
|
+
safe->delegated_start_func = p->start;
|
|
148
|
+
p->start = safe_start;
|
|
149
|
+
|
|
150
|
+
Funcs f;
|
|
151
|
+
|
|
152
|
+
// Array parser functions
|
|
153
|
+
f = &p->funcs[ARRAY_FUN];
|
|
154
|
+
safe->delegated_open_object_func = f->open_object;
|
|
155
|
+
f->open_object = safe_open_object;
|
|
156
|
+
safe->delegated_open_array_func = f->open_array;
|
|
157
|
+
f->open_array = safe_open_array;
|
|
158
|
+
// The following overrides are done for counting objects
|
|
159
|
+
safe->delegated_add_null_func = f->add_null;
|
|
160
|
+
f->add_null = safe_add_null;
|
|
161
|
+
safe->delegated_add_true_func = f->add_true;
|
|
162
|
+
f->add_true = safe_add_true;
|
|
163
|
+
safe->delegated_add_false_func = f->add_false;
|
|
164
|
+
f->add_false = safe_add_false;
|
|
165
|
+
safe->delegated_add_int_func = f->add_int;
|
|
166
|
+
f->add_int = safe_add_int;
|
|
167
|
+
safe->delegated_add_float_func = f->add_float;
|
|
168
|
+
f->add_float = safe_add_float;
|
|
169
|
+
safe->delegated_add_big_func = f->add_big;
|
|
170
|
+
f->add_big = safe_add_big;
|
|
171
|
+
safe->delegated_add_str_func = f->add_str;
|
|
172
|
+
f->add_str = safe_add_str;
|
|
173
|
+
|
|
174
|
+
// Object parser functions
|
|
175
|
+
f = &p->funcs[OBJECT_FUN];
|
|
176
|
+
safe->delegated_open_object_key_func = f->open_object;
|
|
177
|
+
f->open_object = safe_open_object_key;
|
|
178
|
+
safe->delegated_open_array_key_func = f->open_array;
|
|
179
|
+
f->open_array = safe_open_array_key;
|
|
180
|
+
// The following overrides are done for counting objects
|
|
181
|
+
safe->delegated_add_null_key_func = f->add_null;
|
|
182
|
+
f->add_null = safe_add_null_key;
|
|
183
|
+
safe->delegated_add_true_key_func = f->add_true;
|
|
184
|
+
f->add_true = safe_add_true_key;
|
|
185
|
+
safe->delegated_add_false_key_func = f->add_false;
|
|
186
|
+
f->add_false = safe_add_false_key;
|
|
187
|
+
safe->delegated_add_int_key_func = f->add_int;
|
|
188
|
+
f->add_int = safe_add_int_key;
|
|
189
|
+
safe->delegated_add_float_key_func = f->add_float;
|
|
190
|
+
f->add_float = safe_add_float_key;
|
|
191
|
+
safe->delegated_add_big_key_func = f->add_big;
|
|
192
|
+
f->add_big = safe_add_big_key;
|
|
193
|
+
safe->delegated_add_str_key_func = f->add_str;
|
|
194
|
+
f->add_str = safe_add_str_key;
|
|
195
|
+
|
|
196
|
+
SET_CONFIG(max_hash_size);
|
|
197
|
+
SET_CONFIG(max_array_size);
|
|
198
|
+
SET_CONFIG(max_depth);
|
|
199
|
+
SET_CONFIG(max_total_elements);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
void oj_set_parser_safe(ojParser p, VALUE options) {
|
|
203
|
+
safe_T s = OJ_R_ALLOC(struct _safe_S);
|
|
204
|
+
|
|
205
|
+
oj_init_safe_parser(p, s, options);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
void oj_safe_init(VALUE parser_class) {
|
|
209
|
+
VALUE validation_error_class = rb_define_class_under(parser_class, "ValidationError", rb_eRuntimeError);
|
|
210
|
+
|
|
211
|
+
max_hash_size_error_class = rb_define_class_under(parser_class, "HashSizeError", validation_error_class);
|
|
212
|
+
max_array_size_error_class = rb_define_class_under(parser_class, "ArraySizeError", validation_error_class);
|
|
213
|
+
max_depth_error_class = rb_define_class_under(parser_class, "DepthError", validation_error_class);
|
|
214
|
+
max_total_elements_error_class = rb_define_class_under(parser_class, "TotalElementsError", validation_error_class);
|
|
215
|
+
|
|
216
|
+
rb_gc_register_address(&max_hash_size_error_class);
|
|
217
|
+
rb_gc_register_address(&max_array_size_error_class);
|
|
218
|
+
rb_gc_register_address(&max_depth_error_class);
|
|
219
|
+
rb_gc_register_address(&max_total_elements_error_class);
|
|
220
|
+
|
|
221
|
+
max_hash_size_sym = ID2SYM(rb_intern("max_hash_size"));
|
|
222
|
+
max_array_size_sym = ID2SYM(rb_intern("max_array_size"));
|
|
223
|
+
max_depth_sym = ID2SYM(rb_intern("max_depth"));
|
|
224
|
+
max_total_elements_sym = ID2SYM(rb_intern("max_total_elements"));
|
|
225
|
+
|
|
226
|
+
rb_gc_register_address(&max_hash_size_sym);
|
|
227
|
+
rb_gc_register_address(&max_array_size_sym);
|
|
228
|
+
rb_gc_register_address(&max_depth_sym);
|
|
229
|
+
rb_gc_register_address(&max_total_elements_sym);
|
|
230
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
|
|
3
|
+
#include "parser.h"
|
|
4
|
+
#include "usual.h"
|
|
5
|
+
|
|
6
|
+
#define SET_CONFIG(config_name) \
|
|
7
|
+
do { \
|
|
8
|
+
VALUE rb_##config_name = rb_hash_aref(options, config_name##_sym); \
|
|
9
|
+
\
|
|
10
|
+
if (RB_INTEGER_TYPE_P(rb_##config_name)) { \
|
|
11
|
+
safe->config_name = NUM2LONG(rb_##config_name); \
|
|
12
|
+
} else if (!NIL_P(rb_##config_name)) { \
|
|
13
|
+
rb_raise(rb_eArgError, "Incorrect value provided for `" #config_name "`"); \
|
|
14
|
+
} else { \
|
|
15
|
+
safe->config_name = Qnil; \
|
|
16
|
+
} \
|
|
17
|
+
} while (0);
|
|
18
|
+
|
|
19
|
+
#define DEFINE_DELEGATED_FUNCTION(function_name) \
|
|
20
|
+
static void safe_##function_name(ojParser p) { \
|
|
21
|
+
safe_T safe = (safe_T)p->ctx; \
|
|
22
|
+
\
|
|
23
|
+
safe->current_elements_count++; \
|
|
24
|
+
\
|
|
25
|
+
check_array_size(safe); \
|
|
26
|
+
check_max_total_elements(safe); \
|
|
27
|
+
\
|
|
28
|
+
safe->delegated_##function_name##_func(p); \
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
#define DEFINE_DELEGATED_OBJECT_FUNCTION(function_name) \
|
|
32
|
+
static void safe_##function_name##_key(ojParser p) { \
|
|
33
|
+
safe_T safe = (safe_T)p->ctx; \
|
|
34
|
+
\
|
|
35
|
+
safe->current_elements_count += 2; \
|
|
36
|
+
\
|
|
37
|
+
check_object_size(safe); \
|
|
38
|
+
check_max_total_elements(safe); \
|
|
39
|
+
\
|
|
40
|
+
safe->delegated_##function_name##_key_func(p); \
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
typedef struct _safe_S {
|
|
44
|
+
struct _usual usual;
|
|
45
|
+
|
|
46
|
+
long int max_hash_size;
|
|
47
|
+
long int max_array_size;
|
|
48
|
+
long int max_depth;
|
|
49
|
+
long int max_total_elements;
|
|
50
|
+
long int max_json_size_bytes;
|
|
51
|
+
|
|
52
|
+
long int current_hash_size;
|
|
53
|
+
long int current_array_size;
|
|
54
|
+
long int current_elements_count;
|
|
55
|
+
|
|
56
|
+
void (*delegated_start_func)(struct _ojParser *p);
|
|
57
|
+
|
|
58
|
+
// Array functions
|
|
59
|
+
void (*delegated_open_object_func)(struct _ojParser *p);
|
|
60
|
+
void (*delegated_open_array_func)(struct _ojParser *p);
|
|
61
|
+
void (*delegated_add_null_func)(struct _ojParser *p);
|
|
62
|
+
void (*delegated_add_true_func)(struct _ojParser *p);
|
|
63
|
+
void (*delegated_add_false_func)(struct _ojParser *p);
|
|
64
|
+
void (*delegated_add_int_func)(struct _ojParser *p);
|
|
65
|
+
void (*delegated_add_float_func)(struct _ojParser *p);
|
|
66
|
+
void (*delegated_add_big_func)(struct _ojParser *p);
|
|
67
|
+
void (*delegated_add_str_func)(struct _ojParser *p);
|
|
68
|
+
|
|
69
|
+
// Object functions
|
|
70
|
+
void (*delegated_open_object_key_func)(struct _ojParser *p);
|
|
71
|
+
void (*delegated_open_array_key_func)(struct _ojParser *p);
|
|
72
|
+
void (*delegated_add_null_key_func)(struct _ojParser *p);
|
|
73
|
+
void (*delegated_add_true_key_func)(struct _ojParser *p);
|
|
74
|
+
void (*delegated_add_false_key_func)(struct _ojParser *p);
|
|
75
|
+
void (*delegated_add_int_key_func)(struct _ojParser *p);
|
|
76
|
+
void (*delegated_add_float_key_func)(struct _ojParser *p);
|
|
77
|
+
void (*delegated_add_big_key_func)(struct _ojParser *p);
|
|
78
|
+
void (*delegated_add_str_key_func)(struct _ojParser *p);
|
|
79
|
+
} *safe_T;
|
data/ext/oj_windows/simd.h
CHANGED
|
@@ -4,10 +4,31 @@
|
|
|
4
4
|
// SIMD architecture detection and configuration
|
|
5
5
|
// This header provides unified SIMD support across different CPU architectures
|
|
6
6
|
|
|
7
|
+
// Escape hatch: build with -DOJ_DISABLE_SIMD to force the scalar scanner on any
|
|
8
|
+
// platform (useful for debugging or comparison benchmarks).
|
|
9
|
+
#ifndef OJ_DISABLE_SIMD
|
|
10
|
+
|
|
7
11
|
// x86/x86_64 SIMD detection
|
|
8
12
|
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
9
13
|
#define HAVE_SIMD_X86 1
|
|
10
14
|
|
|
15
|
+
#if defined(_MSC_VER)
|
|
16
|
+
// MSVC (cl.exe) does not define the GCC/Clang feature macros __SSE2__/__SSE4_2__,
|
|
17
|
+
// and it requires no -msse flag to emit SSE intrinsics. On x64, SSE2 is guaranteed
|
|
18
|
+
// by the architecture, so we enable the SSE2 scanner unconditionally. SSE4.2 is not
|
|
19
|
+
// guaranteed by every CPU, so we also compile the SSE4.2 scanner but select it only
|
|
20
|
+
// at runtime via CPUID (see oj_scanner_init() in parse.c).
|
|
21
|
+
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
|
22
|
+
#define HAVE_SIMD_SSE2 1
|
|
23
|
+
#include <emmintrin.h>
|
|
24
|
+
#define HAVE_SIMD_SSE4_2 1
|
|
25
|
+
#define OJ_SIMD_SSE4_2_RUNTIME 1
|
|
26
|
+
#include <nmmintrin.h>
|
|
27
|
+
#include <intrin.h> // __cpuid, _BitScanForward
|
|
28
|
+
#endif
|
|
29
|
+
|
|
30
|
+
#else // GCC / Clang: gate on the -msseN feature macros set by extconf.rb.
|
|
31
|
+
|
|
11
32
|
// SSE4.2 support (Intel Core i7+, AMD Bulldozer+)
|
|
12
33
|
// Enabled automatically when compiler has -msse4.2 flag
|
|
13
34
|
#if defined(__SSE4_2__)
|
|
@@ -21,6 +42,8 @@
|
|
|
21
42
|
#include <emmintrin.h>
|
|
22
43
|
#endif
|
|
23
44
|
|
|
45
|
+
#endif // _MSC_VER
|
|
46
|
+
|
|
24
47
|
#endif // x86/x86_64
|
|
25
48
|
|
|
26
49
|
// ARM NEON detection
|
|
@@ -30,6 +53,8 @@
|
|
|
30
53
|
#include <arm_neon.h>
|
|
31
54
|
#endif
|
|
32
55
|
|
|
56
|
+
#endif // OJ_DISABLE_SIMD
|
|
57
|
+
|
|
33
58
|
// Define which SIMD implementation to use (priority order: SSE4.2 > NEON > SSE2)
|
|
34
59
|
#if defined(HAVE_SIMD_SSE4_2)
|
|
35
60
|
#define HAVE_SIMD_STRING_SCAN 1
|
|
@@ -44,4 +69,25 @@
|
|
|
44
69
|
#define SIMD_TYPE "none"
|
|
45
70
|
#endif
|
|
46
71
|
|
|
72
|
+
// Portability shims: the SSE scanners in parse.c are written with GCC/Clang
|
|
73
|
+
// builtins. Provide MSVC-native equivalents so the same code compiles under
|
|
74
|
+
// cl.exe. Only defined when an x86 SSE path is actually enabled, and guarded by
|
|
75
|
+
// #ifndef so a real GCC/Clang build is never affected.
|
|
76
|
+
#if defined(_MSC_VER) && (defined(HAVE_SIMD_SSE2) || defined(HAVE_SIMD_SSE4_2))
|
|
77
|
+
#ifndef __builtin_prefetch
|
|
78
|
+
#define __builtin_prefetch(addr, ...) _mm_prefetch((const char *)(addr), _MM_HINT_T0)
|
|
79
|
+
#endif
|
|
80
|
+
#ifndef __builtin_expect
|
|
81
|
+
#define __builtin_expect(expr, expected) (expr)
|
|
82
|
+
#endif
|
|
83
|
+
static __forceinline int oj_msvc_ctz(unsigned int x) {
|
|
84
|
+
unsigned long index;
|
|
85
|
+
_BitScanForward(&index, x); // callers guarantee x != 0
|
|
86
|
+
return (int)index;
|
|
87
|
+
}
|
|
88
|
+
#ifndef __builtin_ctz
|
|
89
|
+
#define __builtin_ctz(x) oj_msvc_ctz((unsigned int)(x))
|
|
90
|
+
#endif
|
|
91
|
+
#endif
|
|
92
|
+
|
|
47
93
|
#endif /* OJ_SIMD_H */
|
data/ext/oj_windows/sparse.c
CHANGED
|
@@ -405,6 +405,7 @@ static void read_num(ParseInfo pi) {
|
|
|
405
405
|
char c;
|
|
406
406
|
|
|
407
407
|
reader_protect(&pi->rd);
|
|
408
|
+
ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
|
|
408
409
|
ni.i = 0;
|
|
409
410
|
ni.num = 0;
|
|
410
411
|
ni.div = 1;
|
|
@@ -556,6 +557,7 @@ static void read_nan(ParseInfo pi) {
|
|
|
556
557
|
char c;
|
|
557
558
|
|
|
558
559
|
ni.str = pi->rd.str;
|
|
560
|
+
ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
|
|
559
561
|
ni.i = 0;
|
|
560
562
|
ni.num = 0;
|
|
561
563
|
ni.div = 1;
|
|
@@ -752,6 +754,7 @@ void oj_sparse2(ParseInfo pi) {
|
|
|
752
754
|
return;
|
|
753
755
|
}
|
|
754
756
|
ni.str = pi->rd.str;
|
|
757
|
+
ni.pi = pi; // ni->pi->err_class is dereferenced in oj_num_as_value
|
|
755
758
|
ni.i = 0;
|
|
756
759
|
ni.num = 0;
|
|
757
760
|
ni.div = 1;
|
data/ext/oj_windows/usual.c
CHANGED
|
@@ -200,7 +200,10 @@ static void push_key(ojParser p) {
|
|
|
200
200
|
d->ktail = d->khead + pos;
|
|
201
201
|
d->kend = d->khead + cap;
|
|
202
202
|
}
|
|
203
|
-
|
|
203
|
+
if (32000 < klen) { // extreme-size guard (upstream Oj 3.17.2); ktail->len is int16_t
|
|
204
|
+
rb_raise(oj_json_parser_error_class, "Key too long. Keys are limited to 32,000 bytes.");
|
|
205
|
+
}
|
|
206
|
+
d->ktail->len = (int16_t)klen;
|
|
204
207
|
if (klen < sizeof(d->ktail->buf)) {
|
|
205
208
|
memcpy(d->ktail->buf, key, klen);
|
|
206
209
|
d->ktail->buf[klen] = '\0';
|
data/lib/oj_windows/version.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
module Oj
|
|
2
|
-
# Current version of the module.
|
|
3
|
-
VERSION = '3.
|
|
4
|
-
end
|
|
1
|
+
module Oj
|
|
2
|
+
# Current version of the module.
|
|
3
|
+
VERSION = '3.17.2.1'
|
|
4
|
+
end
|
data/pages/InstallOptions.md
CHANGED
|
@@ -10,11 +10,21 @@ To enable Oj trace feature on Windows, use the `--enable-trace-log` option when
|
|
|
10
10
|
Then, the trace logs will be displayed when `:trace` option is set to `true`.
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
###
|
|
13
|
+
### SIMD string scanning
|
|
14
|
+
|
|
15
|
+
SIMD acceleration of string scanning is **enabled by default** on x86_64 (SSE2,
|
|
16
|
+
which every x64 CPU supports) — no flag is required. The parser selects its
|
|
17
|
+
scanner at load time; by default it uses the SSE2 scanner, which benchmarks
|
|
18
|
+
fastest on common CPUs.
|
|
19
|
+
|
|
20
|
+
To force the portable scalar scanner (e.g. for debugging or on a CPU where you
|
|
21
|
+
want to compare), build with `--disable-simd`:
|
|
14
22
|
|
|
15
23
|
```powershell
|
|
16
|
-
gem install oj_windows -- --
|
|
24
|
+
gem install oj_windows -- --disable-simd
|
|
17
25
|
```
|
|
18
26
|
|
|
19
|
-
|
|
20
|
-
|
|
27
|
+
You can also override the scanner at runtime, without rebuilding, via the
|
|
28
|
+
`OJ_SCAN` environment variable: `scalar`, `sse2`, or `sse42`. For example
|
|
29
|
+
`set OJ_SCAN=sse42` opts into the SSE4.2 (PCMPESTRI) scanner, which can be
|
|
30
|
+
faster on some microarchitectures.
|