@bcts/uniform-resources 1.0.0-alpha.9 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.ts CHANGED
@@ -1,8 +1,16 @@
1
- import { InvalidTypeError } from "./error.js";
1
+ /**
2
+ * Copyright © 2023-2026 Blockchain Commons, LLC
3
+ * Copyright © 2025-2026 Parity Technologies
4
+ *
5
+ */
6
+
7
+ import { BytewordsError, InvalidTypeError } from "./error.js";
2
8
 
3
9
  /**
4
10
  * Checks if a character is a valid UR type character.
5
- * Valid characters are lowercase letters, digits, and hyphens.
11
+ *
12
+ * Mirrors Rust's `URTypeChar::is_ur_type` (`bc-ur-rust/src/utils.rs:6-19`):
13
+ * lowercase a-z, digits 0-9, and the hyphen `-`.
6
14
  */
7
15
  export function isURTypeChar(char: string): boolean {
8
16
  const code = char.charCodeAt(0);
@@ -17,10 +25,14 @@ export function isURTypeChar(char: string): boolean {
17
25
 
18
26
  /**
19
27
  * Checks if a string is a valid UR type.
20
- * Valid UR types contain only lowercase letters, digits, and hyphens.
28
+ *
29
+ * Mirrors Rust's `URTypeString::is_ur_type` (`bc-ur-rust/src/utils.rs:26-32`)
30
+ * which is `self.chars().all(...)` — meaning **the empty string is accepted**
31
+ * (a vacuously-true `all` over no chars). We mirror that here so that
32
+ * `URType::new("")` succeeds in both ports; the round-trip then fails at
33
+ * decode-time with `TypeUnspecified`.
21
34
  */
22
35
  export function isValidURType(urType: string): boolean {
23
- if (urType.length === 0) return false;
24
36
  return Array.from(urType).every((char) => isURTypeChar(char));
25
37
  }
26
38
 
@@ -574,39 +586,115 @@ export const BYTEMOJIS: string[] = [
574
586
  ];
575
587
 
576
588
  /**
577
- * Encodes a 4-byte slice as a string of bytewords for identification.
589
+ * Encodes an arbitrary byte slice as a string of space-separated bytewords.
590
+ *
591
+ * Mirrors `bytewords::encode_to_words` in `bc-ur-rust` (≥ v0.19.1). Does not
592
+ * add a CRC32 checksum — use {@link encodeBytewords} for UR-style encoding.
578
593
  */
579
- export function encodeBytewordsIdentifier(data: Uint8Array): string {
580
- if (data.length !== 4) {
581
- throw new Error("Identifier data must be exactly 4 bytes");
582
- }
594
+ export function encodeToWords(data: Uint8Array): string {
583
595
  const words: string[] = [];
584
- for (let i = 0; i < 4; i++) {
585
- const byte = data[i];
586
- if (byte === undefined) throw new Error("Invalid byte");
596
+ for (const byte of data) {
587
597
  const word = BYTEWORDS[byte];
588
- if (word === "" || word === undefined) throw new Error("Invalid byteword mapping");
598
+ if (word === undefined) throw new Error(`Invalid byte value: ${byte}`);
589
599
  words.push(word);
590
600
  }
591
601
  return words.join(" ");
592
602
  }
593
603
 
604
+ /**
605
+ * Encodes an arbitrary byte slice as a string of space-separated bytemojis.
606
+ *
607
+ * Mirrors `bytewords::encode_to_bytemojis` in `bc-ur-rust` (≥ v0.19.1).
608
+ */
609
+ export function encodeToBytemojis(data: Uint8Array): string {
610
+ const emojis: string[] = [];
611
+ for (const byte of data) {
612
+ const emoji = BYTEMOJIS[byte];
613
+ if (emoji === undefined) throw new Error(`Invalid byte value: ${byte}`);
614
+ emojis.push(emoji);
615
+ }
616
+ return emojis.join(" ");
617
+ }
618
+
619
+ /**
620
+ * Encodes an arbitrary byte slice as minimal bytewords (first + last letter of
621
+ * each word, concatenated with no separator).
622
+ *
623
+ * Mirrors `bytewords::encode_to_minimal_bytewords` in `bc-ur-rust`
624
+ * (≥ v0.19.1). Does not add a CRC32 checksum.
625
+ */
626
+ export function encodeToMinimalBytewords(data: Uint8Array): string {
627
+ let out = "";
628
+ for (const byte of data) {
629
+ const word = BYTEWORDS[byte];
630
+ if (word === undefined) throw new Error(`Invalid byte value: ${byte}`);
631
+ out += word[0] + word[word.length - 1];
632
+ }
633
+ return out;
634
+ }
635
+
636
+ /**
637
+ * Encodes a 4-byte slice as a string of bytewords for identification.
638
+ *
639
+ * Thin wrapper over {@link encodeToWords} that enforces the 4-byte length
640
+ * contract historically used by `bc-ur-rust`'s `bytewords::identifier`.
641
+ */
642
+ export function encodeBytewordsIdentifier(data: Uint8Array): string {
643
+ if (data.length !== 4) {
644
+ throw new Error("Identifier data must be exactly 4 bytes");
645
+ }
646
+ return encodeToWords(data);
647
+ }
648
+
594
649
  /**
595
650
  * Encodes a 4-byte slice as a string of bytemojis for identification.
651
+ *
652
+ * Thin wrapper over {@link encodeToBytemojis} that enforces the 4-byte length
653
+ * contract historically used by `bc-ur-rust`'s `bytewords::bytemoji_identifier`.
596
654
  */
597
655
  export function encodeBytemojisIdentifier(data: Uint8Array): string {
598
656
  if (data.length !== 4) {
599
657
  throw new Error("Identifier data must be exactly 4 bytes");
600
658
  }
601
- const emojis: string[] = [];
602
- for (let i = 0; i < 4; i++) {
603
- const byte = data[i];
604
- if (byte === undefined) throw new Error("Invalid byte");
605
- const emoji = BYTEMOJIS[byte];
606
- if (emoji === "" || emoji === undefined) throw new Error("Invalid bytemoji mapping");
607
- emojis.push(emoji);
659
+ return encodeToBytemojis(data);
660
+ }
661
+
662
+ /**
663
+ * Returns `true` if `emoji` is one of the 256 bytemojis.
664
+ *
665
+ * Mirrors `bytewords::is_valid_bytemoji` in `bc-ur-rust` (≥ v0.19.1).
666
+ */
667
+ export function isValidBytemoji(emoji: string): boolean {
668
+ return BYTEMOJI_SET.has(emoji);
669
+ }
670
+
671
+ /**
672
+ * Canonicalises a byteword token (2–4 ASCII letters, case-insensitive) to its
673
+ * full 4-letter lowercase form. Returns `undefined` if the token is not a
674
+ * valid byteword or any of its short forms.
675
+ *
676
+ * Mirrors `bytewords::canonicalize_byteword` in `bc-ur-rust` (≥ v0.19.1).
677
+ *
678
+ * - 2-letter tokens are matched against the first + last letter of each
679
+ * byteword (identical to the minimal bytewords encoding).
680
+ * - 3-letter tokens are matched against the first 3 and the last 3 letters of
681
+ * each byteword; if both match different entries, the first-3 match wins
682
+ * (matching rust's `or_else` priority).
683
+ * - 4-letter tokens must exactly match a full byteword (after lower-casing).
684
+ */
685
+ export function canonicalizeByteword(token: string): string | undefined {
686
+ const lower = token.toLowerCase();
687
+ switch (lower.length) {
688
+ case 4:
689
+ return BYTEWORDS_MAP.has(lower) ? lower : undefined;
690
+ case 2:
691
+ return BYTEWORD_FIRST_LAST_MAP.get(lower);
692
+ case 3: {
693
+ return BYTEWORD_FIRST_THREE_MAP.get(lower) ?? BYTEWORD_LAST_THREE_MAP.get(lower);
694
+ }
695
+ default:
696
+ return undefined;
608
697
  }
609
- return emojis.join(" ");
610
698
  }
611
699
 
612
700
  /**
@@ -615,7 +703,7 @@ export function encodeBytemojisIdentifier(data: Uint8Array): string {
615
703
  export enum BytewordsStyle {
616
704
  /** Full 4-letter words separated by spaces */
617
705
  Standard = "standard",
618
- /** Full 4-letter words without separators */
706
+ /** Full 4-letter words separated by hyphens (URI-safe) */
619
707
  Uri = "uri",
620
708
  /** First and last character only (minimal) - used by UR encoding */
621
709
  Minimal = "minimal",
@@ -636,6 +724,48 @@ function createMinimalBytewordsMap(): Map<string, number> {
636
724
 
637
725
  export const MINIMAL_BYTEWORDS_MAP = createMinimalBytewordsMap();
638
726
 
727
+ /**
728
+ * Set of all 256 bytemojis for fast membership testing. Backs
729
+ * {@link isValidBytemoji}.
730
+ */
731
+ const BYTEMOJI_SET: ReadonlySet<string> = new Set(BYTEMOJIS);
732
+
733
+ /**
734
+ * Lookup from a 2-letter (first+last) byteword short-form to its full
735
+ * lowercase 4-letter form. Backs {@link canonicalizeByteword}.
736
+ */
737
+ const BYTEWORD_FIRST_LAST_MAP: ReadonlyMap<string, string> = (() => {
738
+ const map = new Map<string, string>();
739
+ for (const word of BYTEWORDS) {
740
+ map.set(word[0] + word[word.length - 1], word);
741
+ }
742
+ return map;
743
+ })();
744
+
745
+ /**
746
+ * Lookup from the first 3 letters of a byteword to its full lowercase 4-letter
747
+ * form. Backs {@link canonicalizeByteword}.
748
+ */
749
+ const BYTEWORD_FIRST_THREE_MAP: ReadonlyMap<string, string> = (() => {
750
+ const map = new Map<string, string>();
751
+ for (const word of BYTEWORDS) {
752
+ map.set(word.slice(0, 3), word);
753
+ }
754
+ return map;
755
+ })();
756
+
757
+ /**
758
+ * Lookup from the last 3 letters of a byteword to its full lowercase 4-letter
759
+ * form. Backs {@link canonicalizeByteword}.
760
+ */
761
+ const BYTEWORD_LAST_THREE_MAP: ReadonlyMap<string, string> = (() => {
762
+ const map = new Map<string, string>();
763
+ for (const word of BYTEWORDS) {
764
+ map.set(word.slice(1), word);
765
+ }
766
+ return map;
767
+ })();
768
+
639
769
  /**
640
770
  * CRC32 lookup table (IEEE polynomial).
641
771
  */
@@ -712,19 +842,51 @@ export function encodeBytewords(
712
842
  case BytewordsStyle.Standard:
713
843
  return words.join(" ");
714
844
  case BytewordsStyle.Uri:
845
+ return words.join("-");
715
846
  case BytewordsStyle.Minimal:
716
847
  return words.join("");
717
848
  }
718
849
  }
719
850
 
851
+ /**
852
+ * Returns true if every code unit of `s` is in the ASCII range (0..=127).
853
+ *
854
+ * Mirrors Rust's `str::is_ascii` used at `ur::bytewords::decode` line 105.
855
+ * We test the raw code units (rather than Array.from + codepoint) because
856
+ * any non-BMP character has surrogate pairs both ≥ 0xD800, which already
857
+ * exceed 0x7F.
858
+ */
859
+ function isAsciiString(s: string): boolean {
860
+ for (let i = 0; i < s.length; i++) {
861
+ if (s.charCodeAt(i) > 0x7f) return false;
862
+ }
863
+ return true;
864
+ }
865
+
720
866
  /**
721
867
  * Decode bytewords string back to data.
722
868
  * Validates and removes CRC32 checksum.
869
+ *
870
+ * Errors mirror the upstream Rust `ur::bytewords::Error` enum
871
+ * (`ur-0.4.1/src/bytewords.rs`):
872
+ * - `NonAscii` — input contains non-ASCII characters (checked first).
873
+ * - `InvalidLength` — minimal-style input has odd length.
874
+ * - `InvalidWord` — a token does not map to a byteword index.
875
+ * - `InvalidChecksum` — the trailing 4-byte CRC32 does not match.
876
+ *
877
+ * All variants are surfaced as {@link BytewordsError} with the same default
878
+ * `Display` strings as Rust (e.g. "invalid checksum", "non-ASCII"), so
879
+ * callers can branch on the error class rather than the bare `Error`
880
+ * thrown by earlier revisions of this port.
723
881
  */
724
882
  export function decodeBytewords(
725
883
  encoded: string,
726
884
  style: BytewordsStyle = BytewordsStyle.Minimal,
727
885
  ): Uint8Array {
886
+ // Rust rejects non-ASCII input up-front (`bytewords.rs:105-107`).
887
+ if (!isAsciiString(encoded)) {
888
+ throw new BytewordsError("bytewords string contains non-ASCII characters");
889
+ }
728
890
  const lowercased = encoded.toLowerCase();
729
891
  let bytes: number[];
730
892
 
@@ -734,39 +896,35 @@ export function decodeBytewords(
734
896
  bytes = words.map((word) => {
735
897
  const index = BYTEWORDS_MAP.get(word);
736
898
  if (index === undefined) {
737
- throw new Error(`Invalid byteword: ${word}`);
899
+ throw new BytewordsError("invalid word");
738
900
  }
739
901
  return index;
740
902
  });
741
903
  break;
742
904
  }
743
905
  case BytewordsStyle.Uri: {
744
- // 4-character words with no separator
745
- if (lowercased.length % 4 !== 0) {
746
- throw new Error("Invalid URI bytewords length");
747
- }
748
- bytes = [];
749
- for (let i = 0; i < lowercased.length; i += 4) {
750
- const word = lowercased.slice(i, i + 4);
906
+ // 4-character words separated by hyphens
907
+ const words = lowercased.split("-");
908
+ bytes = words.map((word) => {
751
909
  const index = BYTEWORDS_MAP.get(word);
752
910
  if (index === undefined) {
753
- throw new Error(`Invalid byteword: ${word}`);
911
+ throw new BytewordsError("invalid word");
754
912
  }
755
- bytes.push(index);
756
- }
913
+ return index;
914
+ });
757
915
  break;
758
916
  }
759
917
  case BytewordsStyle.Minimal: {
760
918
  // 2-character minimal words with no separator
761
919
  if (lowercased.length % 2 !== 0) {
762
- throw new Error("Invalid minimal bytewords length");
920
+ throw new BytewordsError("invalid length");
763
921
  }
764
922
  bytes = [];
765
923
  for (let i = 0; i < lowercased.length; i += 2) {
766
924
  const minimal = lowercased.slice(i, i + 2);
767
925
  const index = MINIMAL_BYTEWORDS_MAP.get(minimal);
768
926
  if (index === undefined) {
769
- throw new Error(`Invalid minimal byteword: ${minimal}`);
927
+ throw new BytewordsError("invalid word");
770
928
  }
771
929
  bytes.push(index);
772
930
  }
@@ -775,7 +933,7 @@ export function decodeBytewords(
775
933
  }
776
934
 
777
935
  if (bytes.length < 4) {
778
- throw new Error("Bytewords data too short (missing checksum)");
936
+ throw new BytewordsError("invalid checksum");
779
937
  }
780
938
 
781
939
  // Extract data and checksum
@@ -793,9 +951,7 @@ export function decodeBytewords(
793
951
  0;
794
952
 
795
953
  if (expectedChecksum !== actualChecksum) {
796
- throw new Error(
797
- `Bytewords checksum mismatch: expected ${expectedChecksum.toString(16)}, got ${actualChecksum.toString(16)}`,
798
- );
954
+ throw new BytewordsError("invalid checksum");
799
955
  }
800
956
 
801
957
  return data;
package/src/xoshiro.ts CHANGED
@@ -1,12 +1,19 @@
1
1
  /**
2
+ * Copyright © 2023-2026 Blockchain Commons, LLC
3
+ * Copyright © 2025-2026 Parity Technologies
4
+ *
5
+ *
2
6
  * Xoshiro256** PRNG implementation.
3
7
  *
4
8
  * This is a high-quality, fast pseudo-random number generator used
5
9
  * for deterministic fragment selection in fountain codes.
6
10
  *
7
11
  * Reference: https://prng.di.unimi.it/
12
+ * BC-UR Reference: https://github.com/nicklockwood/fountain-codes
8
13
  */
9
14
 
15
+ import { sha256 } from "@bcts/crypto";
16
+
10
17
  const MAX_UINT64 = BigInt("0xffffffffffffffff");
11
18
 
12
19
  /**
@@ -28,25 +35,33 @@ export class Xoshiro256 {
28
35
  private s: [bigint, bigint, bigint, bigint];
29
36
 
30
37
  /**
31
- * Creates a new Xoshiro256** instance from a seed.
38
+ * Creates a new Xoshiro256** instance from a 32-byte seed.
32
39
  *
33
- * The seed is hashed using SHA-256 to initialize the state.
34
- * For consistent results across encoder/decoder, use the same seed.
40
+ * The seed must be exactly 32 bytes (256 bits). The bytes are interpreted
41
+ * using the BC-UR reference algorithm: each 8-byte chunk is read as
42
+ * big-endian then stored as little-endian for the state.
35
43
  *
36
- * @param seed - The seed bytes (any length)
44
+ * @param seed - The seed bytes (must be exactly 32 bytes)
37
45
  */
38
46
  constructor(seed: Uint8Array) {
39
- // Hash the seed using a simple hash function
40
- // In production, you'd use SHA-256 here
41
- const hash = this.hashSeed(seed);
42
-
43
- // Initialize the 4x64-bit state from the hash
44
- this.s = [
45
- this.bytesToBigInt(hash.slice(0, 8)),
46
- this.bytesToBigInt(hash.slice(8, 16)),
47
- this.bytesToBigInt(hash.slice(16, 24)),
48
- this.bytesToBigInt(hash.slice(24, 32)),
49
- ];
47
+ if (seed.length !== 32) {
48
+ throw new Error(`Seed must be 32 bytes, got ${seed.length}`);
49
+ }
50
+
51
+ // BC-UR reference implementation:
52
+ // For each 8-byte chunk, read as big-endian u64, then convert to little-endian bytes
53
+ // This effectively swaps the byte order within each 8-byte segment
54
+ const s: [bigint, bigint, bigint, bigint] = [0n, 0n, 0n, 0n];
55
+ for (let i = 0; i < 4; i++) {
56
+ // Read 8 bytes as big-endian u64
57
+ let v = 0n;
58
+ for (let n = 0; n < 8; n++) {
59
+ v = (v << 8n) | BigInt(seed[8 * i + n] ?? 0);
60
+ }
61
+ s[i] = v;
62
+ }
63
+
64
+ this.s = s;
50
65
  }
51
66
 
52
67
  /**
@@ -59,47 +74,6 @@ export class Xoshiro256 {
59
74
  return instance;
60
75
  }
61
76
 
62
- /**
63
- * Simple hash function for seeding.
64
- * This is a basic implementation - in production use SHA-256.
65
- */
66
- private hashSeed(seed: Uint8Array): Uint8Array {
67
- // Simple hash expansion using CRC32-like operations
68
- const result = new Uint8Array(32);
69
-
70
- if (seed.length === 0) {
71
- return result;
72
- }
73
-
74
- // Expand seed to 32 bytes using a simple mixing function
75
- for (let i = 0; i < 32; i++) {
76
- let hash = 0;
77
- for (const byte of seed) {
78
- hash = (hash * 31 + byte + i) >>> 0;
79
- }
80
- // Mix the hash further
81
- hash ^= hash >>> 16;
82
- hash = (hash * 0x85ebca6b) >>> 0;
83
- hash ^= hash >>> 13;
84
- hash = (hash * 0xc2b2ae35) >>> 0;
85
- hash ^= hash >>> 16;
86
- result[i] = hash & 0xff;
87
- }
88
-
89
- return result;
90
- }
91
-
92
- /**
93
- * Converts 8 bytes to a 64-bit BigInt (little-endian).
94
- */
95
- private bytesToBigInt(bytes: Uint8Array): bigint {
96
- let result = 0n;
97
- for (let i = 7; i >= 0; i--) {
98
- result = (result << 8n) | BigInt(bytes[i] ?? 0);
99
- }
100
- return result;
101
- }
102
-
103
77
  /**
104
78
  * Generates the next 64-bit random value.
105
79
  */
@@ -121,30 +95,43 @@ export class Xoshiro256 {
121
95
 
122
96
  /**
123
97
  * Generates a random double in [0, 1).
98
+ * Matches BC-UR reference: self.next() as f64 / (u64::MAX as f64 + 1.0)
124
99
  */
125
100
  nextDouble(): number {
126
- // Use the upper 53 bits for double precision
127
101
  const value = this.next();
128
- return Number(value >> 11n) / Number(1n << 53n);
102
+ // u64::MAX as f64 + 1.0 = 18446744073709551616.0
103
+ return Number(value) / 18446744073709551616.0;
129
104
  }
130
105
 
131
106
  /**
132
- * Generates a random integer in [low, high).
107
+ * Generates a random integer in [low, high] (inclusive).
108
+ * Matches BC-UR reference: (self.next_double() * ((high - low + 1) as f64)) as u64 + low
133
109
  */
134
110
  nextInt(low: number, high: number): number {
135
- const range = high - low;
136
- return low + Math.floor(this.nextDouble() * range);
111
+ const range = high - low + 1;
112
+ return Math.floor(this.nextDouble() * range) + low;
137
113
  }
138
114
 
139
115
  /**
140
116
  * Generates a random byte [0, 255].
117
+ *
118
+ * Mirrors Rust `Xoshiro256::next_byte` (`ur-0.4.1/src/xoshiro.rs:91`):
119
+ * `self.next_int(0, 255) as u8`
120
+ * This goes through `next_double() * 256.0`, which effectively uses
121
+ * the top 8 bits of the f64-converted u64 — NOT the low 8 bits
122
+ * of the raw `next()` output. Earlier the TS port used `next() & 0xff`,
123
+ * which produced a completely different byte sequence than Rust for
124
+ * the same seeded RNG.
141
125
  */
142
126
  nextByte(): number {
143
- return Number(this.next() & 0xffn);
127
+ return this.nextInt(0, 255);
144
128
  }
145
129
 
146
130
  /**
147
131
  * Generates an array of random bytes.
132
+ *
133
+ * Mirrors Rust `Xoshiro256::next_bytes` (`ur-0.4.1/src/xoshiro.rs:95-97`):
134
+ * `(0..n).map(|_| self.next_byte()).collect()`
148
135
  */
149
136
  nextData(count: number): Uint8Array {
150
137
  const result = new Uint8Array(count);
@@ -153,28 +140,153 @@ export class Xoshiro256 {
153
140
  }
154
141
  return result;
155
142
  }
143
+
144
+ /**
145
+ * Shuffles items by repeatedly picking random indices.
146
+ * Matches BC-UR reference implementation.
147
+ */
148
+ shuffled<T>(items: T[]): T[] {
149
+ const source = [...items];
150
+ const shuffled: T[] = [];
151
+ while (source.length > 0) {
152
+ const index = this.nextInt(0, source.length - 1);
153
+ const item = source.splice(index, 1)[0];
154
+ if (item !== undefined) {
155
+ shuffled.push(item);
156
+ }
157
+ }
158
+ return shuffled;
159
+ }
160
+
161
+ /**
162
+ * Chooses the degree (number of fragments to mix) using a weighted sampler.
163
+ * Uses the robust soliton distribution with weights [1/1, 1/2, 1/3, ..., 1/n].
164
+ * Matches BC-UR reference implementation.
165
+ */
166
+ chooseDegree(seqLen: number): number {
167
+ // Create weights: [1/1, 1/2, 1/3, ..., 1/seqLen]
168
+ const weights: number[] = [];
169
+ for (let i = 1; i <= seqLen; i++) {
170
+ weights.push(1.0 / i);
171
+ }
172
+
173
+ // Use Vose's alias method for weighted sampling
174
+ const sampler = new WeightedSampler(weights);
175
+ return sampler.next(this) + 1; // 1-indexed degree
176
+ }
156
177
  }
157
178
 
158
179
  /**
159
- * Creates a seed for the Xoshiro PRNG from message checksum and sequence number.
180
+ * Weighted sampler using Vose's alias method.
181
+ * Allows O(1) sampling from a discrete probability distribution.
182
+ */
183
+ class WeightedSampler {
184
+ private readonly aliases: number[];
185
+ private readonly probs: number[];
186
+
187
+ constructor(weights: number[]) {
188
+ const n = weights.length;
189
+
190
+ // Mirrors Rust `Weighted::new` (`ur-0.4.1/src/sampler.rs:13-19`):
191
+ // assert!(!weights.iter().any(|&p| p < 0.0), "negative probability encountered");
192
+ // let summed = weights.iter().sum::<f64>();
193
+ // assert!(summed > 0.0, "probabilities don't sum to a positive value");
194
+ if (weights.some((w) => w < 0.0)) {
195
+ throw new Error("negative probability encountered");
196
+ }
197
+ const sum = weights.reduce((a, b) => a + b, 0);
198
+ if (!(sum > 0.0)) {
199
+ throw new Error("probabilities don't sum to a positive value");
200
+ }
201
+
202
+ const normalized = weights.map((w) => (w * n) / sum);
203
+
204
+ // Initialize alias table
205
+ this.aliases = Array.from<number>({ length: n }).fill(0);
206
+ this.probs = Array.from<number>({ length: n }).fill(0);
207
+
208
+ // Partition into small and large
209
+ const small: number[] = [];
210
+ const large: number[] = [];
211
+
212
+ for (let i = n - 1; i >= 0; i--) {
213
+ if (normalized[i] < 1.0) {
214
+ small.push(i);
215
+ } else {
216
+ large.push(i);
217
+ }
218
+ }
219
+
220
+ // Build the alias table
221
+ while (small.length > 0 && large.length > 0) {
222
+ const a = small.pop();
223
+ const g = large.pop();
224
+ if (a === undefined || g === undefined) break;
225
+ this.probs[a] = normalized[a] ?? 0;
226
+ this.aliases[a] = g;
227
+ const normalizedG = normalized[g] ?? 0;
228
+ const normalizedA = normalized[a] ?? 0;
229
+ normalized[g] = normalizedG + normalizedA - 1.0;
230
+ if (normalized[g] !== undefined && normalized[g] < 1.0) {
231
+ small.push(g);
232
+ } else {
233
+ large.push(g);
234
+ }
235
+ }
236
+
237
+ while (large.length > 0) {
238
+ const g = large.pop();
239
+ if (g === undefined) break;
240
+ this.probs[g] = 1.0;
241
+ }
242
+
243
+ while (small.length > 0) {
244
+ const a = small.pop();
245
+ if (a === undefined) break;
246
+ this.probs[a] = 1.0;
247
+ }
248
+ }
249
+
250
+ /**
251
+ * Sample from the distribution.
252
+ */
253
+ next(rng: Xoshiro256): number {
254
+ const r1 = rng.nextDouble();
255
+ const r2 = rng.nextDouble();
256
+ const n = this.probs.length;
257
+ const i = Math.floor(n * r1);
258
+ if (r2 < this.probs[i]) {
259
+ return i;
260
+ } else {
261
+ return this.aliases[i];
262
+ }
263
+ }
264
+ }
265
+
266
+ /**
267
+ * Creates a Xoshiro256 PRNG instance from message checksum and sequence number.
268
+ *
269
+ * This creates an 8-byte seed by concatenating seqNum and checksum (both in
270
+ * big-endian), then hashes it with SHA-256 to get the 32-byte seed for Xoshiro.
160
271
  *
161
- * This ensures that both encoder and decoder produce the same random sequence
162
- * for a given message and part number.
272
+ * This matches the BC-UR reference implementation.
163
273
  */
164
274
  export function createSeed(checksum: number, seqNum: number): Uint8Array {
165
- const seed = new Uint8Array(8);
275
+ // Create 8-byte seed: seqNum (big-endian) || checksum (big-endian)
276
+ const seed8 = new Uint8Array(8);
166
277
 
167
- // Pack checksum (4 bytes, big-endian)
168
- seed[0] = (checksum >>> 24) & 0xff;
169
- seed[1] = (checksum >>> 16) & 0xff;
170
- seed[2] = (checksum >>> 8) & 0xff;
171
- seed[3] = checksum & 0xff;
278
+ // seqNum in big-endian (bytes 0-3)
279
+ seed8[0] = (seqNum >>> 24) & 0xff;
280
+ seed8[1] = (seqNum >>> 16) & 0xff;
281
+ seed8[2] = (seqNum >>> 8) & 0xff;
282
+ seed8[3] = seqNum & 0xff;
172
283
 
173
- // Pack seqNum (4 bytes, big-endian)
174
- seed[4] = (seqNum >>> 24) & 0xff;
175
- seed[5] = (seqNum >>> 16) & 0xff;
176
- seed[6] = (seqNum >>> 8) & 0xff;
177
- seed[7] = seqNum & 0xff;
284
+ // checksum in big-endian (bytes 4-7)
285
+ seed8[4] = (checksum >>> 24) & 0xff;
286
+ seed8[5] = (checksum >>> 16) & 0xff;
287
+ seed8[6] = (checksum >>> 8) & 0xff;
288
+ seed8[7] = checksum & 0xff;
178
289
 
179
- return seed;
290
+ // Hash with SHA-256 to get 32 bytes
291
+ return sha256(seed8);
180
292
  }