RubyGems - secp256k1-native - Versions diffs - 0.17.0 → 0.18.0 - Mend

secp256k1-native 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/README.md +1 -1
data/ext/secp256k1_native/field.c +69 -9
data/ext/secp256k1_native/jacobian.c +2 -2
data/ext/secp256k1_native/scalar.c +91 -40
data/ext/secp256k1_native/secp256k1_native.h +31 -1
data/lib/secp256k1/version.rb +1 -1
data/lib/secp256k1.rb +104 -10
metadata +1 -2
data/lib/secp256k1_native.bundle +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b6a8b3524bbf944accbd5ee819c3623ce7495829d30b280fb29a905e43dcf52b
-  data.tar.gz: a0204b6cf5b4c37447d63e29425086320642cb881b9c107847a00111b4d07373
+  metadata.gz: f869c9c197727fdab65f1517decd680fa332a79a2409cbf08a3cc975dd679da7
+  data.tar.gz: bdcbb3a7fa8964600a6baf0e99249d2ce9dd76b5bbdf17ff0b70547f4c016b50
 SHA512:
-  metadata.gz: 7f0a0fd90e016a83ef50ce79f4d70c058524ae47d008992edb6250b12363ebf6c12397376da0ae6978592d9bf9a5ea05038765384f5379aba537503af9fc2b54
-  data.tar.gz: e22b4b04332c215353bfaf38f45eb03825fc9d1012a926e0c4da2369fbdc9c39bb305e3faecd69a9c476755f6c67872570d492c8531b93e0e9edaa6e7bd61872
+  metadata.gz: 2aff9d5c272393b74a4df7abed789bd40be5b06d8c90ed13c9458326129ceac2051de76c01e5ac7ba42d6c9c3c7d6fe0150a76b8b7e68dd35481d21ea48a467e
+  data.tar.gz: c45c61f5380c35774d8026b9cab72b21d7183b2fafb7b513b63caf520996c9d664e87759158b6cb4af359ff9e26064b60d70730a07066d4a480b2c976313db3f

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # Changelog
+## [0.18.0] - 2026-06-30
+### Security
+- **Compiler-reconstructed timing side-channel in `Point#mul` (the secret-scalar Montgomery ladder).** Bare-metal dudect verification (issue #25; AMD Ryzen 9 9950X, GCC 15.2, `-O2`) found that GCC 15.2 reconstructs the branchless `(a & ~mask) | (b & mask)` select idiom in `uint256_select` into a secret-dependent conditional jump, leaking the scalar at dudect |t| ≈ 21. This silently undid the 0.17.0 |t|=875 fix, which relied on that select being branchless. Fixed with a value barrier (`ct_value_barrier_u64`) applied via a single `ct_mask_u64` helper to **every** constant-time select mask in the extension (`uint256_select`, `fred`/`fadd`/`fsub`/`fneg`, `scalar_reduce`/`scalar_add`, the `jp_double` infinity select, and the ladder `cswap`). Re-verified: disassembly shows no branch/`cmov` at any select line, ctgrind clean, dudect `scalar_multiply_ct` |t| → 0.68 mean (0/20 runs over 4.5). See [advisory 0001](docs/advisories/0001-compiler-reconstructed-ct-branch.md). Only `uint256_select` actively branchified under this compiler; the other sites are hardened as defence-in-depth.
+### Changed
+- Bare-metal dudect timing verification is now a **required pre-tag release gate** (not a one-off): a constant-time *source* is not a constant-time *binary*, and a compiler upgrade can silently reintroduce a branch that only a statistical run on the shipping compiler observes. Documented in [`docs/security.md`](docs/security.md#empirical-timing-verification) and [`docs/timing-verification-runbook.md`](docs/timing-verification-runbook.md).
+### Build
+- Timing harness (`timing/`) now builds on modern GCC/glibc toolchains: define `_POSIX_C_SOURCE` for `clock_gettime` under `-std=c99`, and add `-fcommon` for the `rb_mSecp256k1Native` tentative definition under GCC 10+ `-fno-common`.
 ## [0.17.0] - 2026-05-01
 ### Added

data/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > **Before using a custom cryptographic implementation, read [Evaluating the risks](https://sgbett.github.io/secp256k1-native/risks/) — it examines what the empirical evidence says about rolling your own crypto and where this gem sits in that landscape.**
-Pure native C secp256k1 implementation for Ruby (no libsecp256k1 dependency).
+Pure native secp256k1 implementation for Ruby (no libsecp256k1 dependency).
 Provides secp256k1 elliptic curve cryptography for Ruby — field arithmetic, scalar operations, Jacobian point arithmetic, and constant-time scalar multiplication — via an optional native C extension. The gem ships a pure-Ruby base layer that works out of the box on any Ruby 2.7+ platform, with the C extension providing constant-time guarantees and ~22x acceleration when available.

data/ext/secp256k1_native/field.c CHANGED Viewed

@@ -24,8 +24,24 @@
  * so that only one copy of each function exists in the linked extension.
  * ----------------------------------------------------------------------- */
+/*
+ * Marshal a Ruby Integer into a uint256_t.
+ *
+ * @raise [TypeError] if rb_int is not an Integer (L-1: rejects Float,
+ *   Rational, BigDecimal, nil, anything responding to #to_int).
+ * @raise [ArgumentError] if rb_int is negative or exceeds 256 bits.
+ */
 uint256_t rb_to_uint256(VALUE rb_int)
 {
+    /* L-1: reject non-Integer before reaching rb_integer_pack, which itself
+     * calls rb_to_int and would silently coerce Float / Rational / objects
+     * responding to #to_int.  This check is the single load-bearing guard
+     * for ALL 16 wrappers' Integer contract — it must come FIRST, before
+     * rb_integer_pack mutates the input. */
+    if (!RB_INTEGER_TYPE_P(rb_int)) {
+        rb_raise(rb_eTypeError, "expected Integer");
+    }
     uint256_t n;
     memset(&n, 0, sizeof(n));
     int result = rb_integer_pack(rb_int, n.d, 4, sizeof(uint64_t), 0, U256_PACK_FLAGS);
@@ -183,7 +199,6 @@ void fred_internal(uint256_t *r, const uint256_t *hi, const uint256_t *lo)
     /* Compute c × hi with carry.  c fits in 33 bits, hi fits in 64 bits
      * each, so each product fits in 97 bits — safe in uint128_t. */
-    acc   = 0;
     carry = 0;
     for (i = 0; i < 4; i++) {
         acc       = (uint128_t)hi->d[i] * FRED_C + lo->d[i] + carry;
@@ -239,7 +254,7 @@ void fred_internal(uint256_t *r, const uint256_t *hi, const uint256_t *lo)
     uint64_t borrow = uint256_sub(&reduced, r, &FIELD_P);
     /* mask = all 1s if borrow == 1 (keep r), all 0s if borrow == 0 (keep reduced). */
-    uint64_t mask = -(uint64_t)(borrow != 0);
+    uint64_t mask = ct_mask_u64(borrow);
     for (i = 0; i < 4; i++) {
         r->d[i] = (r->d[i] & mask) | (reduced.d[i] & ~mask);
     }
@@ -307,6 +322,10 @@ void fsqr_internal(uint256_t *r, const uint256_t *a)
  * fadd_internal — modular addition.
  *
  * Computes a + b, then branchlessly subtracts P if the result >= P.
+ *
+ * Precondition: a, b < P (canonical).  Pre-reduction is the wrapper's
+ * responsibility — see rb_fadd.  The Jacobian path (jacobian.c) only feeds
+ * canonical intermediates produced by other internals.
  */
 void fadd_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
 {
@@ -323,7 +342,7 @@ void fadd_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
      * If overflow == 0 and borrow == 1 : sum < P, want sum.
      * Combined: keep sum iff (overflow == 0 && borrow == 1). */
     uint64_t keep_original = (~overflow) & borrow;
-    uint64_t mask = -(uint64_t)(keep_original != 0); /* all 1s iff sum < P */
+    uint64_t mask = ct_mask_u64(keep_original); /* all 1s iff sum < P */
     int i;
     for (i = 0; i < 4; i++) {
         r->d[i] = (sum.d[i] & mask) | (reduced.d[i] & ~mask);
@@ -334,6 +353,8 @@ void fadd_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
  * fsub_internal — modular subtraction.
  *
  * Computes a - b; if the result underflows, adds P back — branchlessly.
+ *
+ * Precondition: a, b < P (canonical) — see fadd_internal.
  */
 void fsub_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
 {
@@ -346,7 +367,7 @@ void fsub_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
     (void)carry; /* carry is 0 here since diff + P < 2^256 when borrow == 1 */
     /* mask: all 1s if borrow == 1 (use corrected), all 0s otherwise (use diff). */
-    uint64_t mask = -(uint64_t)(borrow != 0);
+    uint64_t mask = ct_mask_u64(borrow);
     int i;
     for (i = 0; i < 4; i++) {
         r->d[i] = (corrected.d[i] & mask) | (diff.d[i] & ~mask);
@@ -357,6 +378,8 @@ void fsub_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
  * fneg_internal — modular negation.
  *
  * Returns P - a for non-zero a, and 0 for a == 0 — branchlessly.
+ *
+ * Precondition: a < P (canonical) — see fadd_internal.
  */
 void fneg_internal(uint256_t *r, const uint256_t *a)
 {
@@ -365,7 +388,7 @@ void fneg_internal(uint256_t *r, const uint256_t *a)
     /* If a == 0 the result should be 0, not P. */
     uint64_t is_zero = uint256_is_zero(a);
-    uint64_t mask = -(uint64_t)(is_zero != 0); /* all 1s if a is zero */
+    uint64_t mask = ct_mask_u64(is_zero); /* all 1s if a is zero */
     int i;
     for (i = 0; i < 4; i++) {
         /* zero mask: 0 where is_zero, negated.d[i] where not */
@@ -435,7 +458,13 @@ int fsqrt_internal(uint256_t *r, const uint256_t *a)
         diff |= (check.d[i] ^ a_reduced.d[i]);
     }
-    if (diff != 0) return 0; /* not a quadratic residue */
+    if (diff != 0) {
+        /* Not a quadratic residue.  Honour the docstring contract by
+         * writing a defined value to *r so callers cannot inadvertently
+         * read uninitialised memory if they ignore the return code. */
+        uint256_copy(r, &zero);
+        return 0;
+    }
     uint256_copy(r, &result);
     return 1;
@@ -455,6 +484,14 @@ int fsqrt_internal(uint256_t *r, const uint256_t *a)
 static VALUE rb_fred(VALUE self, VALUE x)
 {
     (void)self;
+    /* L-1: reject non-Integer before rb_integer_pack, which would silently
+     * coerce Float / Rational / objects responding to #to_int.  rb_fred packs
+     * 8 limbs (vs 4 in rb_to_uint256), so it does not flow through that
+     * helper — guard locally to honour the same boundary contract. */
+    if (!RB_INTEGER_TYPE_P(x)) {
+        rb_raise(rb_eTypeError, "expected Integer");
+    }
     /* fred is used for reducing wide intermediates.  Pack into 8 limbs. */
     uint64_t limbs[8];
     memset(limbs, 0, sizeof(limbs));
@@ -519,8 +556,17 @@ static VALUE rb_fadd(VALUE self, VALUE a, VALUE b)
     (void)self;
     uint256_t ua = rb_to_uint256(a);
     uint256_t ub = rb_to_uint256(b);
+    /* L-3: pre-reduce operands so fadd_internal's `a, b < P` precondition is
+     * always satisfied (mirrors rb_finv / rb_fsqrt).  fred handles 512-bit
+     * inputs; here we use hi=0 so it's a single fast pass on each operand. */
+    uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
+    uint256_t ua_reduced, ub_reduced;
+    fred_internal(&ua_reduced, &zero_limbs, &ua);
+    fred_internal(&ub_reduced, &zero_limbs, &ub);
     uint256_t r;
-    fadd_internal(&r, &ua, &ub);
+    fadd_internal(&r, &ua_reduced, &ub_reduced);
     return uint256_to_rb(&r);
 }
@@ -535,8 +581,15 @@ static VALUE rb_fsub(VALUE self, VALUE a, VALUE b)
     (void)self;
     uint256_t ua = rb_to_uint256(a);
     uint256_t ub = rb_to_uint256(b);
+    /* L-3: pre-reduce operands (see rb_fadd). */
+    uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
+    uint256_t ua_reduced, ub_reduced;
+    fred_internal(&ua_reduced, &zero_limbs, &ua);
+    fred_internal(&ub_reduced, &zero_limbs, &ub);
     uint256_t r;
-    fsub_internal(&r, &ua, &ub);
+    fsub_internal(&r, &ua_reduced, &ub_reduced);
     return uint256_to_rb(&r);
 }
@@ -550,8 +603,15 @@ static VALUE rb_fneg(VALUE self, VALUE a)
 {
     (void)self;
     uint256_t ua = rb_to_uint256(a);
+    /* L-3 / I-3: pre-reduce the operand so fneg_internal's `a < P`
+     * precondition is always satisfied (mirrors rb_finv / rb_fsqrt). */
+    uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
+    uint256_t ua_reduced;
+    fred_internal(&ua_reduced, &zero_limbs, &ua);
     uint256_t r;
-    fneg_internal(&r, &ua);
+    fneg_internal(&r, &ua_reduced);
     return uint256_to_rb(&r);
 }

data/ext/secp256k1_native/jacobian.c CHANGED Viewed

@@ -131,7 +131,7 @@ void jp_double_internal(uint256_t r[3], const uint256_t p[3])
      * Compute mask = all 1s if Y1 is zero, all 0s otherwise.
      * Use the mask to select between [x3, y3, z3] and JP_INFINITY. */
     uint64_t is_zero = uint256_is_zero(&p[1]);
-    uint64_t mask = -(uint64_t)(is_zero != 0); /* all 1s if Y1 == 0 */
+    uint64_t mask = ct_mask_u64(is_zero); /* all 1s if Y1 == 0 */
     int i;
     for (i = 0; i < 4; i++) {
         r[0].d[i] = (x3.d[i] & ~mask) | (JP_INF_X.d[i] & mask);
@@ -383,7 +383,7 @@ static VALUE rb_jp_neg(VALUE self, VALUE rb_point)
  */
 static void cswap(uint64_t bit, uint256_t a[3], uint256_t b[3])
 {
-    uint64_t mask = -(uint64_t)bit; /* all-ones if bit==1, all-zeros if bit==0 */
+    uint64_t mask = ct_mask_u64(bit); /* all-ones if bit==1, all-zeros if bit==0 */
     int j, k;
     for (j = 0; j < 3; j++) {
         for (k = 0; k < 4; k++) {

data/ext/secp256k1_native/scalar.c CHANGED Viewed

@@ -28,9 +28,9 @@
  *
  * Constant-time discipline
  * ------------------------
- * scalar_reduce, scalar_add_internal use branchless conditional selection.
- * scalar_inv_internal iterates over bits of the public constant N-2, which
- * is safe.
+ * scalar_reduce_limbs and scalar_add_internal use branchless conditional
+ * selection — no operand-dependent branches in either.  scalar_inv_internal
+ * iterates over bits of the public constant N-2, which is safe.
  */
 /* -----------------------------------------------------------------------
@@ -90,11 +90,16 @@ static const uint256_t SCALAR_ONE = {{ 1ULL, 0ULL, 0ULL, 0ULL }};
  * After the first fold the 512-bit value has been reduced to at most
  * ~385 bits.  The overflow above bit 255 (stored in the temporary carry
  * words) requires a second fold.  After two folds the result fits in
- * 256 bits + at most 1 bit, handled by a conditional subtraction.
+ * 256 bits + at most 1 bit; the residual fold then propagates that
+ * remaining bit (the "topcarry") and a branchless conditional-subtract of N
+ * selects whichever of {r, r-N} is the canonical residue.  When topcarry is
+ * set, the subtract also folds the dropped 2^256 back as c_N (= 2^256 - N).
  *
  * We accumulate into an 8-limb array and reuse the upper limbs as
  * temporaries for the folded-in contributions, so no extra allocation is
  * needed.
+ *
+ * Branchless throughout — no operand-dependent control flow.
  */
 static void scalar_reduce_limbs(uint256_t *r, uint64_t product[8])
 {
@@ -161,9 +166,12 @@ static void scalar_reduce_limbs(uint256_t *r, uint64_t product[8])
     uint64_t hi2[4];
     for (i = 0; i < 4; i++) { hi2[i] = t[4 + i]; t[4 + i] = 0; }
+    /* Second fold — unconditional loop body (no branch on h being zero).
+     * The body is a faithful no-op when h == 0 (each `h * CONST` term is 0
+     * and the carries propagate identically), so removing the guard changes
+     * no result, only the timing.  (Closes I-11 secret-dependent branch.) */
     for (i = 0; i < 4; i++) {
         uint64_t h = hi2[i];
-        if (h == 0) continue;
         acc       = (uint128_t)h * CN_LO + t[i];
         t[i]      = (uint64_t)acc;
@@ -183,30 +191,46 @@ static void scalar_reduce_limbs(uint256_t *r, uint64_t product[8])
         /* After two folds, any carry here is negligible (< 2). */
     }
-    /* The result is now in t[0..3] with at most a tiny overflow in t[4].
-     * Copy t[0..3] into r and apply a conditional subtraction. */
+    /* Result is now in t[0..3] with a small residual in t[4].
+     *
+     * Bound: after the first fold the value is < 2^386 (the original 512-bit
+     * product reduced by c_N ≈ 2^129).  The second fold reduces that overflow
+     * by another factor of c_N, so the post-second-fold residual is < 2^259
+     * — i.e. t[4] is a few bits wide (at most a small single-digit value),
+     * and the residual fold below produces V < 2N.  V < 2N means a single
+     * conditional subtract of N is sufficient to canonicalise. */
     r->d[0] = t[0]; r->d[1] = t[1]; r->d[2] = t[2]; r->d[3] = t[3];
-    /* Handle residual overflow from t[4] (at most 1 after two folds of
-     * a 512-bit input): add t[4] × c_N into r. */
+    /* Residual fold — unconditional (I-11: no branch on the carry) and
+     * capturing the carry OUT of the top limb (H-1: previously dropped at
+     * bit 255).  After two folds the value here is < 2^257, so topcarry
+     * is 0 or 1. */
     uint64_t carry3 = t[4];
-    if (carry3) {
-        /* carry3 is at most a few bits wide — use simple arithmetic. */
-        uint128_t a0 = (uint128_t)carry3 * CN_LO + r->d[0];
-        r->d[0] = (uint64_t)a0;
-        uint128_t a1 = (uint128_t)carry3 * CN_MID + r->d[1] + (a0 >> 64);
-        r->d[1] = (uint64_t)a1;
-        uint128_t a2 = (uint128_t)carry3 + r->d[2] + (a1 >> 64);
-        r->d[2] = (uint64_t)a2;
-        r->d[3] += (uint64_t)(a2 >> 64);
-    }
-    /* Branchless final conditional subtraction: keep r - N if r >= N. */
+    uint128_t a0 = (uint128_t)carry3 * CN_LO + r->d[0];
+    r->d[0] = (uint64_t)a0;
+    uint128_t a1 = (uint128_t)carry3 * CN_MID + r->d[1] + (a0 >> 64);
+    r->d[1] = (uint64_t)a1;
+    uint128_t a2 = (uint128_t)carry3 + r->d[2] + (a1 >> 64);
+    r->d[2] = (uint64_t)a2;
+    uint128_t a3 = (uint128_t)r->d[3] + (a2 >> 64);
+    r->d[3] = (uint64_t)a3;
+    uint64_t topcarry = (uint64_t)(a3 >> 64);    /* 0 or 1 — was H-1 dropped bit */
+    /* Branchless final reduction: keep (r - N) when topcarry is set OR r >= N.
+     *
+     * c_N == 2^256 - N, so subtracting N once when topcarry is set converts
+     * the dropped 2^256 into the correct +c_N residue.  Total value V < 2N
+     * (from V < 2^257 and N ≈ 2^256), so a single conditional subtract of N
+     * is sufficient.
+     *
+     * Using (1 ^ borrow) instead of (borrow == 0) avoids any compiler
+     * latitude to emit a compare-and-branch for the predicate. */
     uint256_t reduced;
-    uint64_t borrow = uint256_sub(&reduced, r, &CURVE_N);
-    uint64_t mask = -(uint64_t)(borrow != 0); /* all 1s if r < N */
+    uint64_t borrow = uint256_sub(&reduced, r, &CURVE_N);   /* borrow==0 <=> r >= N */
+    uint64_t keep_reduced = topcarry | (1 ^ borrow);
+    uint64_t mask = ct_mask_u64(keep_reduced);
     for (i = 0; i < 4; i++) {
-        r->d[i] = (r->d[i] & mask) | (reduced.d[i] & ~mask);
+        r->d[i] = (reduced.d[i] & mask) | (r->d[i] & ~mask);
     }
 }
@@ -269,6 +293,9 @@ static void scalar_sqr_internal(uint256_t *r, const uint256_t *a)
  * scalar_add_internal — modular addition mod N.
  *
  * Computes a + b, then branchlessly subtracts N if the result >= N.
+ *
+ * Precondition: a, b < N (canonical).  Pre-reduction is the wrapper's
+ * responsibility — see rb_scalar_add.
  */
 void scalar_add_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
 {
@@ -283,7 +310,7 @@ void scalar_add_internal(uint256_t *r, const uint256_t *a, const uint256_t *b)
      * If overflow == 0 and borrow == 0: sum >= N, want reduced.
      * If overflow == 0 and borrow == 1: sum < N, want sum. */
     uint64_t keep_original = (~overflow) & borrow;
-    uint64_t mask = -(uint64_t)(keep_original != 0);
+    uint64_t mask = ct_mask_u64(keep_original);
     int i;
     for (i = 0; i < 4; i++) {
         r->d[i] = (sum.d[i] & mask) | (reduced.d[i] & ~mask);
@@ -322,26 +349,30 @@ void scalar_inv_internal(uint256_t *r, const uint256_t *a)
  * call-seq:
  *   Secp256k1Native.scalar_mod(a) -> Integer
  *
- * Reduce +a+ modulo the curve order N.  Handles negative Ruby Integers:
- * if +a+ is negative, the result is +a mod N+ in the range [0, N).
+ * Reduce +a+ modulo the curve order N.  Accepts any Ruby Integer — negative,
+ * positive, and arbitrary width (including values >= 2^256).
  */
 static VALUE rb_scalar_mod(VALUE self, VALUE a)
 {
     (void)self;
-    /* Handle negative values by delegating to Ruby's own % operator which
-     * always returns a non-negative result for a positive modulus. */
-    VALUE n_rb  = uint256_to_rb(&CURVE_N);
-    int negative = RTEST(rb_funcall(a, rb_intern("<"), 1, INT2FIX(0)));
-    VALUE a_norm;
-    if (negative) {
-        /* Ruby % is always non-negative when the modulus is positive */
-        a_norm = rb_funcall(a, rb_intern("%"), 1, n_rb);
-    } else {
-        a_norm = a;
+    /* L-1: reject non-Integer BEFORE Ruby `%` is dispatched on the receiver.
+     * Without this, a String would raise NoMethodError (no `%` of Integer),
+     * and any object whose `%` happens to return an Integer would silently
+     * succeed — both bypass the wrapper's documented TypeError contract. */
+    if (!RB_INTEGER_TYPE_P(a)) {
+        rb_raise(rb_eTypeError, "expected Integer");
     }
+    /* L-4: pre-reduce via Ruby `%` unconditionally.  This is intentionally
+     * different from the other scalar wrappers (which use the C-level
+     * scalar_reduce): Ruby `%` handles both negative inputs (returns the
+     * non-negative residue) and arbitrary width (rb_to_uint256 would raise
+     * "exceeds 256 bits" on values >= 2^256 otherwise), so it is the right
+     * canonicalisation primitive at this boundary. */
+    VALUE n_rb  = uint256_to_rb(&CURVE_N);
+    VALUE a_norm = rb_funcall(a, rb_intern("%"), 1, n_rb);
     uint256_t ua = rb_to_uint256(a_norm);
     uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
     uint256_t r;
@@ -360,8 +391,18 @@ static VALUE rb_scalar_mul(VALUE self, VALUE a, VALUE b)
     (void)self;
     uint256_t ua = rb_to_uint256(a);
     uint256_t ub = rb_to_uint256(b);
+    /* Defence in depth: pre-reduce both operands mod N before multiplying.
+     * scalar_mul_internal is correct on any 256-bit operand pair after the
+     * H-1 fix, so this is belt-and-braces — but it makes the Ruby boundary's
+     * input contract explicit and consistent with rb_scalar_inv. */
+    uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
+    uint256_t ua_reduced, ub_reduced;
+    scalar_reduce(&ua_reduced, &zero_limbs, &ua);
+    scalar_reduce(&ub_reduced, &zero_limbs, &ub);
     uint256_t r;
-    scalar_mul_internal(&r, &ua, &ub);
+    scalar_mul_internal(&r, &ua_reduced, &ub_reduced);
     return uint256_to_rb(&r);
 }
@@ -403,8 +444,18 @@ static VALUE rb_scalar_add(VALUE self, VALUE a, VALUE b)
     (void)self;
     uint256_t ua = rb_to_uint256(a);
     uint256_t ub = rb_to_uint256(b);
+    /* M-1 correctness fix: scalar_add_internal subtracts N at most once and is
+     * therefore correct only when both operands are already < N.  Pre-reduce
+     * both operands mod N so the wrapper's documented `(a + b) mod N` contract
+     * holds for any 256-bit input (mirrors rb_scalar_inv / rb_scalar_mul). */
+    uint256_t zero_limbs = {{ 0ULL, 0ULL, 0ULL, 0ULL }};
+    uint256_t ua_reduced, ub_reduced;
+    scalar_reduce(&ua_reduced, &zero_limbs, &ua);
+    scalar_reduce(&ub_reduced, &zero_limbs, &ub);
     uint256_t r;
-    scalar_add_internal(&r, &ua, &ub);
+    scalar_add_internal(&r, &ua_reduced, &ub_reduced);
     return uint256_to_rb(&r);
 }

data/ext/secp256k1_native/secp256k1_native.h CHANGED Viewed

@@ -107,11 +107,41 @@ void register_scalar_methods(VALUE mod);
  * Branchless selection helper
  * ----------------------------------------------------------------------- */
+/* Opaque value barrier: returns x unchanged, but the empty volatile asm forces
+ * the compiler to treat the result as an unknown register value. Without it,
+ * GCC (observed on 15.2, -O2) recognises the all-0s/all-1s select masks used
+ * throughout this extension, reconstructs the original boolean, and emits a
+ * secret-dependent conditional jump — defeating the branchless intent. This is
+ * the same technique libsecp256k1/BoringSSL use to keep constant-time selects
+ * flat. On compilers without GNU asm (e.g. MSVC, where this extension is a
+ * no-op anyway) it degrades to an identity function. */
+static inline uint64_t ct_value_barrier_u64(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+    __asm__ volatile("" : "+r"(x));
+#endif
+    return x;
+}
+/* Build a constant-time select mask: all-ones (0xFFFF...FF) when flag != 0,
+ * all-zeros otherwise. The value barrier is applied here so that EVERY mask in
+ * this extension is opaque to the optimiser before it feeds a branchless
+ * mask-select — both polarities are used in this codebase:
+ *   (a & mask) | (b & ~mask)   — selects `a` when mask is all-ones
+ *   (a & ~mask) | (b & mask)   — selects `b` when mask is all-ones (e.g. uint256_select)
+ * Either form is equivalent for an all-0/all-1 mask; the comment lists both so
+ * an auditor reading a call site knows the polarity is intentional, not a bug.
+ * All constant-time masks MUST be constructed through this helper — a raw
+ * `-(uint64_t)(cond)` is a latent branch waiting for the compiler to
+ * reconstruct it. */
+static inline uint64_t ct_mask_u64(uint64_t flag) {
+    return ct_value_barrier_u64(-(uint64_t)(flag != 0));
+}
 /* Branchless conditional select: if flag is non-zero, *r = *b; else *r = *a.
  * Constant-time: no branch on flag. */
 static inline void uint256_select(uint256_t *r, const uint256_t *a,
                                    const uint256_t *b, uint64_t flag) {
-    uint64_t mask = -(uint64_t)(flag != 0);
+    uint64_t mask = ct_mask_u64(flag);
     r->d[0] = (a->d[0] & ~mask) | (b->d[0] & mask);
     r->d[1] = (a->d[1] & ~mask) | (b->d[1] & mask);
     r->d[2] = (a->d[2] & ~mask) | (b->d[2] & mask);

data/lib/secp256k1/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Secp256k1
-  VERSION = '0.17.0'
+  VERSION = '0.18.0'
 end

data/lib/secp256k1.rb CHANGED Viewed

@@ -138,12 +138,28 @@ module Secp256k1
   end
   # Modular subtraction in the field.
+  #
+  # Canonicalises both operands so the result matches the C wrapper for any
+  # *non-negative* 256-bit input — load-bearing for the dfuzz differential,
+  # where pure-Ruby serves as the oracle. The dfuzz harness only feeds
+  # non-negative inputs (xorshift output, plus structured P-band vectors),
+  # so the differential never observes the negative case.
+  #
+  # Note: pure-Ruby accepts negative inputs (Ruby `%` canonicalises them);
+  # the C wrapper rejects negatives via `rb_to_uint256`. Backend parity
+  # holds for all >= 0 inputs; intentional divergence on negatives.
   def fsub(a, b)
+    a %= P
+    b %= P
     a >= b ? a - b : P - (b - a)
   end
   # Modular negation in the field.
+  #
+  # Canonicalises the operand so the result matches the C wrapper for any
+  # non-negative 256-bit input — see {#fsub} for the negative-input note.
   def fneg(a)
+    a %= P
     a.zero? ? 0 : P - a
   end
@@ -291,7 +307,10 @@ module Secp256k1
   # @!visibility private
   # Cache for precomputed wNAF tables, keyed by "window:x:y".
-  # Evicts oldest entry when the LRU limit is reached.
+  # FIFO eviction: the oldest *inserted* entry is dropped when the cap
+  # is reached (Hash preserves insertion order; we delete the first key).
+  # Bounded at WNAF_CACHE_MAX entries; keyed only on the public base
+  # point — no secret-scalar exposure.
   WNAF_TABLE_CACHE = {} # rubocop:disable Style/MutableConstant
   # @!visibility private
@@ -317,7 +336,7 @@ module Secp256k1
     tbl = WNAF_TABLE_CACHE[cache_key]
     if tbl.nil?
-      # Evict the oldest entry when the cache is full (simple LRU).
+      # FIFO eviction: drop the oldest *inserted* entry when the cache is full.
       WNAF_TABLE_CACHE.delete(WNAF_TABLE_CACHE.keys.first) if WNAF_TABLE_CACHE.size >= WNAF_CACHE_MAX
       tbl_size = 1 << (window - 1) # e.g. w=5 -> 16 entries
@@ -437,7 +456,24 @@ module Secp256k1
     # @param x [Integer, nil] x-coordinate (nil for infinity)
     # @param y [Integer, nil] y-coordinate (nil for infinity)
+    # @raise [ArgumentError] if x and y are not both nil and not both
+    #   Integers in [0, P)
     def initialize(x, y)
+      # I-3 mitigation, hardened: only two valid shapes are accepted —
+      # the point at infinity (nil, nil), or a finite point with both
+      # coordinates canonical in [0, P). Catches Point.new(1, P-of-range),
+      # Point.new(-1, 5), Point.new(nil, 5), and similar half-states at
+      # construction so no downstream path (negate, to_octet_string,
+      # on_curve?) has to second-guess the invariant.
+      if x.nil? && y.nil?
+        # point at infinity — both coordinates absent
+      elsif x.is_a?(Integer) && y.is_a?(Integer) && x >= 0 && x < P && y >= 0 && y < P
+        # finite point with canonical coordinates
+      else
+        raise ArgumentError,
+              'Point requires (nil, nil) for infinity or two Integers in [0, P)'
+      end
       @x = x
       @y = y
     end
@@ -449,6 +485,37 @@ module Secp256k1
       new(nil, nil)
     end
+    # Construct a Point from raw (x, y) coordinates with curve-membership
+    # validation. This is the **required** entry point for caller-supplied
+    # coordinates (e.g. from an external protocol or user input).
+    #
+    # `Point.new` is intended for always-on-curve intermediates produced by
+    # `mul` / `mul_vt` / `add` / `negate`; it validates only the range of
+    # the coordinates, not that they satisfy y² = x³ + 7 (mod P). Calling
+    # `mul` on a Point constructed via `Point.new` with off-curve
+    # coordinates is an invalid-curve precondition that this method
+    # exists to close (L-5).
+    #
+    # @param x [Integer] x-coordinate in [0, P)
+    # @param y [Integer] y-coordinate in [0, P)
+    # @return [Point]
+    # @raise [ArgumentError] if x or y is nil (use `Point.infinity` for
+    #   infinity); if x or y is not an Integer in [0, P) (raised by `new`);
+    #   or if (x, y) is not on the curve
+    def self.from_coordinates(x, y)
+      # Reject the (nil, nil) infinity shape that Point.new accepts. This
+      # method's contract is "raw (x, y) Integers"; callers wanting infinity
+      # should use Point.infinity (or Point.new(nil, nil) on the internal path).
+      # Without this check, on_curve? returns true for infinity and we would
+      # silently return it.
+      raise ArgumentError, 'x and y must be Integers' if x.nil? || y.nil?
+      pt = new(x, y)
+      raise ArgumentError, 'point is not on the secp256k1 curve' unless pt.on_curve?
+      pt
+    end
     # The generator point G.
     #
     # @return [Point]
@@ -464,6 +531,15 @@ module Secp256k1
     # @raise [ArgumentError] if the encoding is invalid or the point
     #   is not on the curve
     def self.from_bytes(bytes)
+      # I-4: reject non-String / empty input up front with a clean
+      # ArgumentError. Without this, nil / Float / Integer raise
+      # NoMethodError (on `.encoding`), and an empty String raises
+      # NoMethodError (on `nil.to_s` in the else-branch error formatting).
+      # All fail closed either way, but the error type is wrong.
+      unless bytes.is_a?(String) && !bytes.empty?
+        raise ArgumentError, 'bytes must be a non-empty String'
+      end
       bytes = bytes.b if bytes.encoding != Encoding::BINARY
       prefix = bytes.getbyte(0)
@@ -558,10 +634,8 @@ module Secp256k1
               'Set SECP256K1_ALLOW_PURE_RUBY_CT=1 or call Secp256k1.allow_pure_ruby_ct! to override.'
       end
-      return self.class.infinity if scalar.zero? || infinity?
-      scalar %= N
-      return self.class.infinity if scalar.zero?
+      scalar = normalise_scalar(scalar)
+      return self.class.infinity if scalar.nil?
       jp = Secp256k1.scalar_multiply_ct(scalar, @x, @y)
       affine = Secp256k1.jp_to_affine(jp)
@@ -582,10 +656,8 @@ module Secp256k1
     # @param scalar [Integer] the public scalar multiplier
     # @return [Point] the resulting point
     def mul_vt(scalar)
-      return self.class.infinity if scalar.zero? || infinity?
-      scalar %= N
-      return self.class.infinity if scalar.zero?
+      scalar = normalise_scalar(scalar)
+      return self.class.infinity if scalar.nil?
       jp = Secp256k1.scalar_multiply_wnaf(scalar, @x, @y)
       affine = Secp256k1.jp_to_affine(jp)
@@ -594,6 +666,28 @@ module Secp256k1
       self.class.new(affine[0], affine[1])
     end
+    private
+    # Validate and canonicalise a scalar for multiplication (L-2).
+    #
+    # @param scalar [Integer] the scalar multiplier
+    # @return [Integer, nil] the scalar reduced mod N, or nil if the
+    #   product would be infinity (scalar is zero mod N, or self is the
+    #   point at infinity)
+    # @raise [ArgumentError] if scalar is not an Integer
+    def normalise_scalar(scalar)
+      raise ArgumentError, 'scalar must be an Integer' unless scalar.is_a?(Integer)
+      return nil if infinity?
+      scalar %= N
+      return nil if scalar.zero?
+      scalar
+    end
+    public
     # Point addition: self + other.
     #
     # @param other [Point]

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: secp256k1-native
 version: !ruby/object:Gem::Version
-  version: 0.17.0
+  version: 0.18.0
 platform: ruby
 authors:
 - Simon Bettison
@@ -32,7 +32,6 @@ files:
 - ext/secp256k1_native/secp256k1_native.h
 - lib/secp256k1.rb
 - lib/secp256k1/version.rb
-- lib/secp256k1_native.bundle
 - secp256k1-native.gemspec
 homepage: https://github.com/sgbett/secp256k1-native
 licenses:

data/lib/secp256k1_native.bundle DELETED Viewed

Binary file