smarter_json 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca54f0d032730a5606b5a5d0fe4091c62c2d1ef70ebce6dd9283c91bb288a6b2
4
- data.tar.gz: b4d859ef69ca0fa1935271ba99027f9d132ad8e558579da2b37d6bb542fc5076
3
+ metadata.gz: 480227c64ed99ba271fe95c6e7c79be6871f6bbf5c7f8f03d9bf118bb6bd7051
4
+ data.tar.gz: 966190c8f2e316e3664e381bf738831259b0e469a783f2fa3a25edf5f198d41b
5
5
  SHA512:
6
- metadata.gz: c4c474e948779c8541fe7d6bc7a572cf1a131aea53dff2436ac0068228c777d020dd3c6e946b014e80bedbca03aec93fa35f8189294fb070935fb12db06fee24
7
- data.tar.gz: 77082d230f4cccd2f913bb4a5508f8b15551230fde2c78b341ad71b08700b8a67bd1030a242ab3bd6f70252c66300d2fce7099964b5cf883229bbc73303305a7
6
+ metadata.gz: d53d1a84aa5ce83a2243a9adcdcddc284933dc8c357e08c8aaaec8e5e005164dfdec37d70c41bce0b2a3abf22dc7902e062adbbe3ffaa947b2232836bd01a86c
7
+ data.tar.gz: ae44f83437fe6c75227822a234658abb2157caf0378c8b998bab25aa40b4f1286da9a85a482be04b1be4ab17b142779140d0892ada81ae7d4342a0b7f706e9b7
data/CHANGELOG.md CHANGED
@@ -3,8 +3,8 @@
3
3
 
4
4
  > ⚠️ SmarterJSON **always returns an `Array`** of documents.
5
5
  >
6
- > `SmarterJSON.process` / `SmarterJSON.process_file` return:
7
- >
6
+ > `SmarterJSON.process` / `SmarterJSON.process_file`
7
+ > both return:
8
8
  > — `[]` for no doc
9
9
  > - `[doc]` for one doc
10
10
  > - `[d1, d2, …]` for several docs (NDJSON / JSONL / concatenated docs)
@@ -12,6 +12,12 @@
12
12
  > ⚠️ We discourage the use of `process(input).first` / `process(input)[0]` because it silently drops potential additional documents
13
13
  > Please use `process_one` if you are expecting only one JSON doc, e.g. in API payloads.
14
14
 
15
+ ## 1.1.1 (2026-06-11)
16
+
17
+ RSpec tests: 1,070 → 1,097
18
+
19
+ - The C extension now emits the same `on_warning` warnings as the pure-Ruby parser. `empty_value` and `duplicate_key` warnings name the offending key (and `duplicate_key` names the resolution strategy), and the warning text, line, and column are now identical whether or not the C extension is loaded.
20
+
15
21
  ## 1.1.0 (2026-06-09)
16
22
 
17
23
  RSpec tests: 1,038 → 1,070
data/README.md CHANGED
@@ -19,6 +19,8 @@ A lenient, fast JSON processor for Ruby. It extracts strict JSON, NDJSON, JSONL,
19
19
 
20
20
  ## Why SmarterJSON?
21
21
 
22
+ > 📖 **The thinking behind it:** [*Strict by Accident: Your JSON parser isn't broken, it's answering the wrong question*](https://dev.to/tilo_sloboda/strict-by-accident-your-json-parser-isnt-broken-its-answering-the-wrong-question-54f0) — why a data pipeline wants a lenient, recovery-first parser rather than a spec-policing one.
23
+
22
24
  **Are you tired of seeing errors like these?**
23
25
 
24
26
  ```
@@ -81,9 +81,9 @@ typedef struct {
81
81
  * an error) by scanning from the start of the buffer. CR, LF, and CRLF each
82
82
  * count as one newline; col is bytes since the last line start (1-based).
83
83
  * Keeping this off the hot path is the point — fj_advance never touches it. */
84
- static void fj_line_col(fj_state *st, long *line, long *col) {
84
+ static void fj_line_col_to(fj_state *st, long stop, long *line, long *col) {
85
85
  long l = 1, c = 1, i;
86
- long limit = (st->pos < st->len) ? st->pos : st->len;
86
+ long limit = (stop < st->len) ? stop : st->len;
87
87
  for (i = 0; i < limit; i++) {
88
88
  unsigned char b = (unsigned char)st->buf[i];
89
89
  if (b == 0x0A) { l++; c = 1; }
@@ -93,6 +93,9 @@ static void fj_line_col(fj_state *st, long *line, long *col) {
93
93
  *line = l;
94
94
  *col = c;
95
95
  }
96
+ static void fj_line_col(fj_state *st, long *line, long *col) {
97
+ fj_line_col_to(st, st->pos, line, col);
98
+ }
96
99
 
97
100
  /* Report a non-fatal lenient fix to the on_warning callable — a no-op (and builds no
98
101
  * Warning) when no handler was given. The internal Qnil guard is the safety net; the
@@ -106,6 +109,25 @@ static void fj_warn(fj_state *st, VALUE type_sym, const char *msg) {
106
109
  rb_utf8_str_new_cstr(msg), LONG2NUM(line), LONG2NUM(col)));
107
110
  }
108
111
 
112
+ /* Like fj_warn but the message is a prebuilt Ruby String (rb_enc_sprintf, for messages
113
+ * that interpolate the offending key). Same Qnil guard and st->pos line/col. */
114
+ static void fj_warn_str(fj_state *st, VALUE type_sym, VALUE msg) {
115
+ long line, col;
116
+ if (st->on_warning == Qnil) return;
117
+ fj_line_col(st, &line, &col);
118
+ rb_funcall(st->on_warning, fj_call_id, 1,
119
+ rb_funcall(cWarning, fj_new_id, 4, type_sym, msg, LONG2NUM(line), LONG2NUM(col)));
120
+ }
121
+
122
+ /* Like fj_warn_str but at an explicit (line,col) — for a warning detected away from the
123
+ * cursor. Duplicate keys are found while building the closed object, but attributed to
124
+ * the object's closing position so the column matches the pure-Ruby parser. */
125
+ static void fj_warn_str_at(fj_state *st, VALUE type_sym, VALUE msg, long line, long col) {
126
+ if (st->on_warning == Qnil) return;
127
+ rb_funcall(st->on_warning, fj_call_id, 1,
128
+ rb_funcall(cWarning, fj_new_id, 4, type_sym, msg, LONG2NUM(line), LONG2NUM(col)));
129
+ }
130
+
109
131
  /* 1-based column of the current byte position (bytes since the last line start).
110
132
  * Used for triple-quoted indentation stripping (smarter_json.md §2.3). */
111
133
  static long fj_column(fj_state *st) {
@@ -1290,7 +1312,7 @@ void rb_hash_bulk_insert(long, const VALUE *, VALUE);
1290
1312
  /* Build a Hash from `count` interleaved key,value slots. Fast path (String keys,
1291
1313
  * default :last_wins): pre-size + bulk insert. symbolize_keys / :first_wins use a
1292
1314
  * per-member loop into the same pre-sized hash. */
1293
- static VALUE fj_build_object(fj_state *st, const VALUE *pairs, long count) {
1315
+ static VALUE fj_build_object(fj_state *st, const VALUE *pairs, const long *positions, long count) {
1294
1316
  long entries = count / 2, i;
1295
1317
  VALUE hash = rb_hash_new_capa(entries);
1296
1318
 
@@ -1305,7 +1327,16 @@ static VALUE fj_build_object(fj_state *st, const VALUE *pairs, long count) {
1305
1327
  VALUE k = st->symbolize_keys ? rb_funcall(pairs[i], fj_to_sym_id, 0) : pairs[i];
1306
1328
  if (st->dup_first_wins || st->on_warning != Qnil) {
1307
1329
  if (RTEST(rb_funcall(hash, fj_key_p_id, 1, k))) {
1308
- fj_warn(st, fj_sym_duplicate_key, "duplicate key");
1330
+ if (st->on_warning != Qnil) {
1331
+ long wl, wc;
1332
+ fj_line_col_to(st, positions[i + 1], &wl, &wc); /* the duplicate member's value position — matches the Ruby parser */
1333
+ fj_warn_str_at(st, fj_sym_duplicate_key,
1334
+ rb_enc_sprintf(rb_utf8_encoding(),
1335
+ "duplicate key %"PRIsVALUE" \xe2\x80\x94 %s",
1336
+ rb_inspect(k),
1337
+ st->dup_first_wins ? "first_wins" : "last_wins"),
1338
+ wl, wc);
1339
+ }
1309
1340
  if (st->dup_first_wins) continue;
1310
1341
  }
1311
1342
  }
@@ -1323,6 +1354,7 @@ typedef struct { long mark; int is_obj; } fj_frame;
1323
1354
 
1324
1355
  typedef struct {
1325
1356
  VALUE *vptr; long vhead; long vcapa; /* pending values (GC-marked) */
1357
+ long *pptr; /* byte offset just past each pushed value (mirrors vptr/vcapa); used only to attribute a duplicate-key warning to the duplicate member's position */
1326
1358
  fj_frame *fptr; long fhead; long fcapa; /* open-container frames (no VALUEs) */
1327
1359
  } fj_pstack;
1328
1360
 
@@ -1334,12 +1366,15 @@ static void fj_pstack_mark(void *p) {
1334
1366
  static void fj_pstack_free(void *p) {
1335
1367
  fj_pstack *ps = (fj_pstack *)p;
1336
1368
  if (ps->vptr != NULL) xfree(ps->vptr);
1369
+ if (ps->pptr != NULL) xfree(ps->pptr);
1337
1370
  if (ps->fptr != NULL) xfree(ps->fptr);
1338
1371
  xfree(ps);
1339
1372
  }
1340
1373
  static size_t fj_pstack_memsize(const void *p) {
1341
1374
  const fj_pstack *ps = (const fj_pstack *)p;
1342
- return sizeof(fj_pstack) + (size_t)ps->vcapa * sizeof(VALUE) + (size_t)ps->fcapa * sizeof(fj_frame);
1375
+ return sizeof(fj_pstack) + (size_t)ps->vcapa * sizeof(VALUE)
1376
+ + (ps->pptr ? (size_t)ps->vcapa * sizeof(long) : 0)
1377
+ + (size_t)ps->fcapa * sizeof(fj_frame);
1343
1378
  }
1344
1379
  static const rb_data_type_t fj_pstack_type = {
1345
1380
  "smarter_json/pstack",
@@ -1347,8 +1382,15 @@ static const rb_data_type_t fj_pstack_type = {
1347
1382
  0, 0, RUBY_TYPED_FREE_IMMEDIATELY,
1348
1383
  };
1349
1384
 
1350
- static inline void fj_vpush(fj_pstack *ps, VALUE v) {
1351
- if (ps->vhead >= ps->vcapa) { ps->vcapa *= 2; REALLOC_N(ps->vptr, VALUE, ps->vcapa); }
1385
+ static inline void fj_vpush(fj_state *st, fj_pstack *ps, VALUE v) {
1386
+ if (ps->vhead >= ps->vcapa) {
1387
+ ps->vcapa *= 2;
1388
+ REALLOC_N(ps->vptr, VALUE, ps->vcapa);
1389
+ if (ps->pptr) REALLOC_N(ps->pptr, long, ps->vcapa);
1390
+ }
1391
+ /* pptr is allocated only when on_warning is set; the fast path (no handler) does no
1392
+ * extra store. The offset is just past this value — the duplicate-key warning position. */
1393
+ if (ps->pptr) ps->pptr[ps->vhead] = st->pos;
1352
1394
  ps->vptr[ps->vhead++] = v;
1353
1395
  }
1354
1396
  static inline void fj_fpush(fj_pstack *ps, long mark, int is_obj) {
@@ -1368,6 +1410,7 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1368
1410
  int vss = 0; /* warnings: has a value landed in the current container since the last separator? */
1369
1411
 
1370
1412
  ps->vptr = ALLOC_N(VALUE, 64); ps->vhead = 0; ps->vcapa = 64;
1413
+ ps->pptr = (st->on_warning != Qnil) ? ALLOC_N(long, 64) : NULL; /* only the warning path needs per-value positions */
1371
1414
  ps->fptr = ALLOC_N(fj_frame, 16); ps->fhead = 0; ps->fcapa = 16;
1372
1415
 
1373
1416
  if (implicit_root) fj_fpush(ps, 0, 1);
@@ -1398,7 +1441,7 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1398
1441
  b = fj_byte(st);
1399
1442
  if (FJ_UNLIKELY(fj_needs_ws_skip(b))) { fj_skip_ws_comments(st); b = fj_byte(st); }
1400
1443
  if (b == ',') { /* collapsing separator: skip empty member */
1401
- if (st->on_warning != Qnil && !vss) fj_warn(st, fj_sym_empty_slot, "extra comma, collapsed an empty slot");
1444
+ if (st->on_warning != Qnil && !vss) fj_warn(st, fj_sym_empty_slot, "extra comma \xe2\x80\x94 collapsed an empty slot");
1402
1445
  vss = 0;
1403
1446
  fj_advance(st, 1);
1404
1447
  continue;
@@ -1406,17 +1449,17 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1406
1449
  if (b == '}') {
1407
1450
  VALUE hash;
1408
1451
  fj_advance(st, 1);
1409
- hash = fj_build_object(st, &ps->vptr[mark], ps->vhead - mark);
1452
+ hash = fj_build_object(st, &ps->vptr[mark], ps->pptr ? &ps->pptr[mark] : NULL, ps->vhead - mark);
1410
1453
  ps->vhead = mark;
1411
1454
  ps->fhead--;
1412
1455
  if (ps->fhead == 0) { result = hash; break; }
1413
- fj_vpush(ps, hash);
1456
+ fj_vpush(st, ps, hash);
1414
1457
  vss = 1;
1415
1458
  continue;
1416
1459
  }
1417
1460
  if (b == -1) {
1418
1461
  if (implicit_root && ps->fhead == 1) {
1419
- result = fj_build_object(st, &ps->vptr[mark], ps->vhead - mark);
1462
+ result = fj_build_object(st, &ps->vptr[mark], ps->pptr ? &ps->pptr[mark] : NULL, ps->vhead - mark);
1420
1463
  break;
1421
1464
  }
1422
1465
  fj_error(st, "unterminated object");
@@ -1430,28 +1473,32 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1430
1473
  b = fj_byte(st);
1431
1474
  if (FJ_UNLIKELY(fj_needs_ws_skip(b))) { fj_skip_ws_comments(st); b = fj_byte(st); }
1432
1475
  if (b == '{' || b == '[') {
1433
- fj_vpush(ps, key);
1476
+ fj_vpush(st, ps, key);
1434
1477
  fj_advance(st, 1);
1435
1478
  fj_fpush(ps, ps->vhead, (b == '{'));
1436
1479
  vss = 0;
1437
1480
  continue;
1438
1481
  }
1439
1482
  if (b == '}' || b == ',') { /* key with a colon but no value -> null */
1440
- fj_vpush(ps, key);
1441
- fj_vpush(ps, Qnil);
1442
- fj_warn(st, fj_sym_empty_value, "empty value, used null");
1483
+ fj_vpush(st, ps, key);
1484
+ fj_vpush(st, ps, Qnil);
1485
+ if (st->on_warning != Qnil)
1486
+ fj_warn_str(st, fj_sym_empty_value,
1487
+ rb_enc_sprintf(rb_utf8_encoding(),
1488
+ "key %"PRIsVALUE" had no value \xe2\x80\x94 used null",
1489
+ rb_inspect(key)));
1443
1490
  vss = 1;
1444
1491
  continue;
1445
1492
  }
1446
1493
  if (b == -1) fj_error(st, "unexpected end of input");
1447
- fj_vpush(ps, key);
1448
- fj_vpush(ps, fj_parse_member_value(st));
1494
+ fj_vpush(st, ps, key);
1495
+ fj_vpush(st, ps, fj_parse_member_value(st));
1449
1496
  vss = 1;
1450
1497
  } else { /* array */
1451
1498
  b = fj_byte(st);
1452
1499
  if (FJ_UNLIKELY(fj_needs_ws_skip(b))) { fj_skip_ws_comments(st); b = fj_byte(st); }
1453
1500
  if (b == ',') { /* collapsing separator: skip empty slot */
1454
- if (st->on_warning != Qnil && !vss) fj_warn(st, fj_sym_empty_slot, "extra comma, collapsed an empty slot");
1501
+ if (st->on_warning != Qnil && !vss) fj_warn(st, fj_sym_empty_slot, "extra comma \xe2\x80\x94 collapsed an empty slot");
1455
1502
  vss = 0;
1456
1503
  fj_advance(st, 1);
1457
1504
  continue;
@@ -1463,7 +1510,7 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1463
1510
  ps->vhead = mark;
1464
1511
  ps->fhead--;
1465
1512
  if (ps->fhead == 0) { result = ary; break; }
1466
- fj_vpush(ps, ary);
1513
+ fj_vpush(st, ps, ary);
1467
1514
  vss = 1;
1468
1515
  continue;
1469
1516
  }
@@ -1481,10 +1528,10 @@ static VALUE fj_parse_iter(fj_state *st, int implicit_root) {
1481
1528
  smart-quote, literals) falls through to the full dispatch below. */
1482
1529
  if (b == '-' || b == '+' || b == '.' || (b >= '0' && b <= '9')) {
1483
1530
  VALUE num;
1484
- if (fj_try_member_number(st, &num)) { fj_vpush(ps, num); vss = 1; continue; }
1531
+ if (fj_try_member_number(st, &num)) { fj_vpush(st, ps, num); vss = 1; continue; }
1485
1532
  }
1486
- if (b == '"') { fj_vpush(ps, fj_parse_string(st, '"')); vss = 1; continue; }
1487
- fj_vpush(ps, fj_parse_member_value(st));
1533
+ if (b == '"') { fj_vpush(st, ps, fj_parse_string(st, '"')); vss = 1; continue; }
1534
+ fj_vpush(st, ps, fj_parse_member_value(st));
1488
1535
  vss = 1;
1489
1536
  }
1490
1537
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterJSON
4
- VERSION = "1.1.0"
4
+ VERSION = "1.1.1"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-06-09 00:00:00.000000000 Z
10
+ date: 2026-06-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: bigdecimal