smarter_json 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf6191c05bd9049082a1362f4bfa5ab3240c4690bc59c1675c716c0901d5c6cb
4
- data.tar.gz: 56f1d5418b20d8bad23694f7831dfb335ab4737dd34e9184b9c20113c41fd3aa
3
+ metadata.gz: a31a7485e53042c06cb28c92f23e24cb55067fd312704c0c9ac0776868f20293
4
+ data.tar.gz: 531187f8f7ea38f573785f09fa6e217f63a4393fd537d80c92f00973aeb2b375
5
5
  SHA512:
6
- metadata.gz: 64c3511d1f21662b703ee1a02876ba8f05401ebcd48b1025e7290a50c49a5fc1d74623c94477b6fc7006ea782028698e298010b3e827f5ed0315fa1f7e88f595
7
- data.tar.gz: c2801263204013c23954d7f4489f7f4f38f74802c42da5cf2ba2e5965decfc6c00dd304a7ff16f9f96741e4103fd499f36efa8a8ba5ec7abfdb40dded1c996a9
6
+ metadata.gz: f88266d06416277355f770645d694be6e81a8faef22a86806cd1e4620e773dee50aeeeeb09cfbc0de47354972250c2654e599c5c03ba0926ca129d9d6a638b0c
7
+ data.tar.gz: c8bdd4a34d4617c897c815f2526c0f5555459b35bad47106a1db2b71dedac5d7a2605d0a5f92676735ae4895208fcdf1272f05dce09026aa696b84f143693ee5
data/CHANGELOG.md CHANGED
@@ -13,6 +13,18 @@
13
13
  > ⚠️ We discourage the use of `process(input).first` / `process(input)[0]` because it silently drops potential additional documents
14
14
  > Please use `process_one` if you are expecting only one JSON doc, e.g. in API payloads, because it emits on_warning if it finds multiple docs.
15
15
 
16
+ ## 1.2.2 (unreleased)
17
+
18
+ RSpec tests: 1,167
19
+
20
+ - The Eisel-Lemire fast path for `decimal_precision: :float` now covers decimals with **up to 19 significant digits** (was 18). 19 digits is the most that fits exactly in a `uint64` (max 19-digit ≈ 1.0e19 < `UINT64_MAX` ≈ 1.8e19), so these no longer fall back to the slower `strtod`. Still correctly rounded, bit-for-bit identical to the stdlib — verified across 18/19-digit round-to-even tie shapes.
21
+
22
+ ## 1.2.1 (2026-06-17)
23
+
24
+ RSpec tests: 1,165
25
+
26
+ - Performance improvements
27
+
16
28
  ## 1.2.0 (2026-06-16)
17
29
 
18
30
  RSpec tests: 1,097 → 1,165
data/README.md CHANGED
@@ -130,10 +130,10 @@ SmarterJSON.process_file("config.json5") # read a file, then process
130
130
 
131
131
  ## Usage in APIs
132
132
 
133
- At an API boundary the JSON comes from someone you don't control — a client POSTing a request body to *your* service, or an upstream service answering a call *you* made — and it isn't always clean: a stray trailing comma, a `NaN`, a payload wrapped in prose, or a quiet change to the format. A strict parser turns any of those into an exception (a request you reject, or a failed call chain). SmarterJSON extracts the data that's there instead, so one formatting quirk doesn't sink the whole request:
133
+ At an API boundary the JSON comes from someone you don't control — a client sending a request body to *your* service, or an upstream service answering a call *you* made — and it isn't always clean: a stray trailing comma, a `NaN`, a payload wrapped in prose, or a quiet change to the format. A strict parser turns any of those into an exception (a request you reject, or a failed call chain). SmarterJSON extracts the data that's there instead, so one formatting quirk doesn't sink the whole request:
134
134
 
135
135
  ```ruby
136
- # Inbound — JSON a caller sent to your endpoint:
136
+ # Inbound — JSON a caller sent to your service:
137
137
  data = SmarterJSON.process(request.body)
138
138
 
139
139
  # Outbound — JSON from a service you called:
@@ -629,14 +629,15 @@ static VALUE fj_float_strtod(const char *p, long n) {
629
629
  /* e10 is the final base-10 exponent (already adjusted by the fraction length). */
630
630
  static FJ_ALWAYS_INLINE VALUE fj_float_from_parts(fj_state *st, uint64_t m10, int m10digits, int64_t e10, int neg, int overflow, const char *p, long n) {
631
631
  double d;
632
- /* Fast path by mantissa width (our scanner accumulates m10 exactly up to 18
632
+ /* Fast path by mantissa width (our scanner accumulates m10 exactly up to 19
633
633
  digits, flagging overflow beyond):
634
- 1..18 digits -> Eisel-Lemire, correctly-rounded for any exact uint64 mantissa
635
- (Mushtak-Lemire). This pulls full-double-precision data (e.g.
636
- citylots coordinates, 18 sig digits) off the slow strtod
637
- fallback the stdlib json gem still strtods it.
638
- >18 digits / overflow / extreme exponent -> strtod (round-to-odd). */
639
- if (!overflow && m10digits >= 1 && m10digits <= 18 && (long)m10digits + e10 >= -307) {
634
+ 1..19 digits -> Eisel-Lemire, correctly-rounded for any exact uint64 mantissa
635
+ (Mushtak-Lemire). 19 digits is the most that fits exactly in a
636
+ uint64 (max 19-digit ~1.0e19 < UINT64_MAX ~1.8e19); this pulls
637
+ full-double-precision data (e.g. citylots coordinates) off the
638
+ slow strtod fallback the stdlib json gem still strtods it.
639
+ >19 digits / overflow / extreme exponent -> strtod (round-to-odd). */
640
+ if (!overflow && m10digits >= 1 && m10digits <= 19 && (long)m10digits + e10 >= -307) {
640
641
  if (m10 == 0) return rb_float_new(neg ? -0.0 : 0.0);
641
642
  d = fj_eisel_lemire_s2d(e10, m10, neg);
642
643
  } else {
@@ -683,7 +684,7 @@ static int fj_try_decimal(fj_state *st, const char *p, long n, VALUE *out) {
683
684
  for (;;) {
684
685
  while (i < n && p[i] >= '0' && p[i] <= '9') {
685
686
  had_leading_zero = 1;
686
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; }
687
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; }
687
688
  else overflow = 1;
688
689
  i++;
689
690
  }
@@ -695,7 +696,7 @@ static int fj_try_decimal(fj_state *st, const char *p, long n, VALUE *out) {
695
696
  has_digit = 1;
696
697
  for (;;) {
697
698
  while (i < n && p[i] >= '0' && p[i] <= '9') {
698
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; }
699
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; }
699
700
  else overflow = 1;
700
701
  i++;
701
702
  }
@@ -710,7 +711,7 @@ static int fj_try_decimal(fj_state *st, const char *p, long n, VALUE *out) {
710
711
  for (;;) {
711
712
  while (i < n && p[i] >= '0' && p[i] <= '9') {
712
713
  has_digit = 1;
713
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; frac++; }
714
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(p[i] - '0'); m10digits++; frac++; }
714
715
  else overflow = 1;
715
716
  i++;
716
717
  }
@@ -774,7 +775,7 @@ static VALUE fj_parse_number(fj_state *st) {
774
775
  long nlen;
775
776
  int is_float = 0, neg = 0, overflow = 0, has_sign = 0, had_leading_zero = 0;
776
777
  uint64_t m10 = 0; /* mantissa: integer + fraction digits */
777
- int m10digits = 0; /* mantissa digit chars (caps the Eisel-Lemire fast path at 18) */
778
+ int m10digits = 0; /* mantissa digit chars (caps the Eisel-Lemire fast path at 19) */
778
779
  int frac = 0; /* fraction digit chars: e10 -= frac */
779
780
  int64_t e10 = 0;
780
781
 
@@ -810,7 +811,7 @@ static VALUE fj_parse_number(fj_state *st) {
810
811
  for (;;) {
811
812
  while (*p >= '0' && *p <= '9') {
812
813
  had_leading_zero = 1;
813
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
814
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
814
815
  else overflow = 1;
815
816
  p++;
816
817
  }
@@ -821,7 +822,7 @@ static VALUE fj_parse_number(fj_state *st) {
821
822
  } else if (*p >= '1' && *p <= '9') {
822
823
  for (;;) {
823
824
  while (*p >= '0' && *p <= '9') {
824
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
825
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
825
826
  else overflow = 1;
826
827
  p++;
827
828
  }
@@ -841,7 +842,7 @@ static VALUE fj_parse_number(fj_state *st) {
841
842
  p++;
842
843
  for (;;) {
843
844
  while (*p >= '0' && *p <= '9') {
844
- if (m10digits < 18) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; frac++; }
845
+ if (m10digits < 19) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; frac++; }
845
846
  else overflow = 1;
846
847
  p++;
847
848
  }
@@ -1245,7 +1246,7 @@ static int fj_try_member_number(fj_state *st, VALUE *out) {
1245
1246
  } else if (*p >= '1' && *p <= '9') {
1246
1247
  for (;;) {
1247
1248
  while (*p >= '0' && *p <= '9') {
1248
- if (FJ_LIKELY(m10digits < 18)) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
1249
+ if (FJ_LIKELY(m10digits < 19)) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; }
1249
1250
  else overflow = 1;
1250
1251
  p++;
1251
1252
  }
@@ -1259,7 +1260,7 @@ static int fj_try_member_number(fj_state *st, VALUE *out) {
1259
1260
  is_float = 1; p++;
1260
1261
  for (;;) {
1261
1262
  while (*p >= '0' && *p <= '9') {
1262
- if (FJ_LIKELY(m10digits < 18)) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; frac++; }
1263
+ if (FJ_LIKELY(m10digits < 19)) { m10 = m10 * 10 + (uint64_t)(*p - '0'); m10digits++; frac++; }
1263
1264
  else overflow = 1;
1264
1265
  p++;
1265
1266
  }
@@ -5,7 +5,7 @@
5
5
 
6
6
  ## What smarter_json uses it for
7
7
 
8
- `fj_float_from_parts` (in `smarter_json.c`) routes `m10digits ≤ 18 → fj_eisel_lemire_s2d`, and `> 18 / overflow / extreme exponent → strtod` (round-to-odd). Eisel-Lemire is correctly-rounded across the whole ≤18-digit range — no round-to-even tie loss — **and** fast on the common short-mantissa case.
8
+ `fj_float_from_parts` (in `smarter_json.c`) routes `m10digits ≤ 19 → fj_eisel_lemire_s2d`, and `> 19 / overflow / extreme exponent → strtod` (round-to-odd). Eisel-Lemire is correctly-rounded across the whole ≤19-digit range — every mantissa that fits exactly in a uint64, no round-to-even tie loss — **and** fast on the common short-mantissa case.
9
9
 
10
10
  ## These two files are DERIVED, not verbatim copies
11
11
 
@@ -757,8 +757,10 @@ module SmarterJSON
757
757
  # incl. LF/CR which also terminate). Stopping at a terminator/EOF means the run had no
758
758
  # interior whitespace, so there's nothing to trim and no comment marker can apply.
759
759
  #
760
- # U+FEFF is JSON5/ES5 whitespace but not in [[:space:]], so we need to add it:
761
- QL_BREAK = /[,{}\[\]]|[[:space:]]|#{[0xFEFF].pack("U")}/.freeze
760
+ # U+FEFF is JSON5/ES5 whitespace but NOT in [[:space:]]. It is deliberately kept OUT of
761
+ # this regex: a multibyte alternative defeats byteindex's fast byte-search (~3.3x slower
762
+ # on number-dense input). A trailing U+FEFF is trimmed cheaply in the fast path below.
763
+ QL_BREAK = /[,{}\[\]]|[[:space:]]/.freeze
762
764
 
763
765
  # The defaults live centrally in SmarterJSON::Options (lib/smarter_json/options.rb).
764
766
  DEFAULT_OPTIONS = Options::DEFAULT_OPTIONS
@@ -1350,7 +1352,14 @@ module SmarterJSON
1350
1352
  b = hit < @bytesize ? input.getbyte(hit) : nil
1351
1353
  if b.nil? || b == COMMA || b == RBRACE || b == RBRACKET || b == LBRACE || b == LBRACKET || b == LF || b == CR
1352
1354
  @pos = hit
1353
- return hit
1355
+ # A trailing U+FEFF (EF BB BF) is JSON5/ES5 whitespace but not in QL_BREAK, so
1356
+ # byteindex scanned past it into the run — trim it (and a run of them). On the
1357
+ # common path the last byte is a digit/letter, so the first compare fails at once.
1358
+ fin = hit
1359
+ while fin - 3 >= pos && input.getbyte(fin - 1) == 0xBF && input.getbyte(fin - 2) == 0xBB && input.getbyte(fin - 3) == 0xEF
1360
+ fin -= 3
1361
+ end
1362
+ return fin
1354
1363
  end
1355
1364
  end
1356
1365
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SmarterJSON
4
- VERSION = "1.2.0"
4
+ VERSION = "1.2.2"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: smarter_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tilo Sloboda
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-06-16 00:00:00.000000000 Z
10
+ date: 2026-06-19 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: bigdecimal