@bcts/uniform-resources 1.0.0-alpha.21 → 1.0.0-alpha.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.ts CHANGED
@@ -580,39 +580,115 @@ export const BYTEMOJIS: string[] = [
580
580
  ];
581
581
 
582
582
  /**
583
- * Encodes a 4-byte slice as a string of bytewords for identification.
583
+ * Encodes an arbitrary byte slice as a string of space-separated bytewords.
584
+ *
585
+ * Mirrors `bytewords::encode_to_words` in `bc-ur-rust` (≥ v0.19.1). Does not
586
+ * add a CRC32 checksum — use {@link encodeBytewords} for UR-style encoding.
584
587
  */
585
- export function encodeBytewordsIdentifier(data: Uint8Array): string {
586
- if (data.length !== 4) {
587
- throw new Error("Identifier data must be exactly 4 bytes");
588
- }
588
+ export function encodeToWords(data: Uint8Array): string {
589
589
  const words: string[] = [];
590
- for (let i = 0; i < 4; i++) {
591
- const byte = data[i];
592
- if (byte === undefined) throw new Error("Invalid byte");
590
+ for (const byte of data) {
593
591
  const word = BYTEWORDS[byte];
594
- if (word === "" || word === undefined) throw new Error("Invalid byteword mapping");
592
+ if (word === undefined) throw new Error(`Invalid byte value: ${byte}`);
595
593
  words.push(word);
596
594
  }
597
595
  return words.join(" ");
598
596
  }
599
597
 
598
+ /**
599
+ * Encodes an arbitrary byte slice as a string of space-separated bytemojis.
600
+ *
601
+ * Mirrors `bytewords::encode_to_bytemojis` in `bc-ur-rust` (≥ v0.19.1).
602
+ */
603
+ export function encodeToBytemojis(data: Uint8Array): string {
604
+ const emojis: string[] = [];
605
+ for (const byte of data) {
606
+ const emoji = BYTEMOJIS[byte];
607
+ if (emoji === undefined) throw new Error(`Invalid byte value: ${byte}`);
608
+ emojis.push(emoji);
609
+ }
610
+ return emojis.join(" ");
611
+ }
612
+
613
+ /**
614
+ * Encodes an arbitrary byte slice as minimal bytewords (first + last letter of
615
+ * each word, concatenated with no separator).
616
+ *
617
+ * Mirrors `bytewords::encode_to_minimal_bytewords` in `bc-ur-rust`
618
+ * (≥ v0.19.1). Does not add a CRC32 checksum.
619
+ */
620
+ export function encodeToMinimalBytewords(data: Uint8Array): string {
621
+ let out = "";
622
+ for (const byte of data) {
623
+ const word = BYTEWORDS[byte];
624
+ if (word === undefined) throw new Error(`Invalid byte value: ${byte}`);
625
+ out += word[0] + word[word.length - 1];
626
+ }
627
+ return out;
628
+ }
629
+
630
+ /**
631
+ * Encodes a 4-byte slice as a string of bytewords for identification.
632
+ *
633
+ * Thin wrapper over {@link encodeToWords} that enforces the 4-byte length
634
+ * contract historically used by `bc-ur-rust`'s `bytewords::identifier`.
635
+ */
636
+ export function encodeBytewordsIdentifier(data: Uint8Array): string {
637
+ if (data.length !== 4) {
638
+ throw new Error("Identifier data must be exactly 4 bytes");
639
+ }
640
+ return encodeToWords(data);
641
+ }
642
+
600
643
  /**
601
644
  * Encodes a 4-byte slice as a string of bytemojis for identification.
645
+ *
646
+ * Thin wrapper over {@link encodeToBytemojis} that enforces the 4-byte length
647
+ * contract historically used by `bc-ur-rust`'s `bytewords::bytemoji_identifier`.
602
648
  */
603
649
  export function encodeBytemojisIdentifier(data: Uint8Array): string {
604
650
  if (data.length !== 4) {
605
651
  throw new Error("Identifier data must be exactly 4 bytes");
606
652
  }
607
- const emojis: string[] = [];
608
- for (let i = 0; i < 4; i++) {
609
- const byte = data[i];
610
- if (byte === undefined) throw new Error("Invalid byte");
611
- const emoji = BYTEMOJIS[byte];
612
- if (emoji === "" || emoji === undefined) throw new Error("Invalid bytemoji mapping");
613
- emojis.push(emoji);
653
+ return encodeToBytemojis(data);
654
+ }
655
+
656
+ /**
657
+ * Returns `true` if `emoji` is one of the 256 bytemojis.
658
+ *
659
+ * Mirrors `bytewords::is_valid_bytemoji` in `bc-ur-rust` (≥ v0.19.1).
660
+ */
661
+ export function isValidBytemoji(emoji: string): boolean {
662
+ return BYTEMOJI_SET.has(emoji);
663
+ }
664
+
665
+ /**
666
+ * Canonicalises a byteword token (2–4 ASCII letters, case-insensitive) to its
667
+ * full 4-letter lowercase form. Returns `undefined` if the token is not a
668
+ * valid byteword or any of its short forms.
669
+ *
670
+ * Mirrors `bytewords::canonicalize_byteword` in `bc-ur-rust` (≥ v0.19.1).
671
+ *
672
+ * - 2-letter tokens are matched against the first + last letter of each
673
+ * byteword (identical to the minimal bytewords encoding).
674
+ * - 3-letter tokens are matched against the first 3 and the last 3 letters of
675
+ * each byteword; if both match different entries, the first-3 match wins
676
+ * (matching rust's `or_else` priority).
677
+ * - 4-letter tokens must exactly match a full byteword (after lower-casing).
678
+ */
679
+ export function canonicalizeByteword(token: string): string | undefined {
680
+ const lower = token.toLowerCase();
681
+ switch (lower.length) {
682
+ case 4:
683
+ return BYTEWORDS_MAP.has(lower) ? lower : undefined;
684
+ case 2:
685
+ return BYTEWORD_FIRST_LAST_MAP.get(lower);
686
+ case 3: {
687
+ return BYTEWORD_FIRST_THREE_MAP.get(lower) ?? BYTEWORD_LAST_THREE_MAP.get(lower);
688
+ }
689
+ default:
690
+ return undefined;
614
691
  }
615
- return emojis.join(" ");
616
692
  }
617
693
 
618
694
  /**
@@ -642,6 +718,48 @@ function createMinimalBytewordsMap(): Map<string, number> {
642
718
 
643
719
  export const MINIMAL_BYTEWORDS_MAP = createMinimalBytewordsMap();
644
720
 
721
+ /**
722
+ * Set of all 256 bytemojis for fast membership testing. Backs
723
+ * {@link isValidBytemoji}.
724
+ */
725
+ const BYTEMOJI_SET: ReadonlySet<string> = new Set(BYTEMOJIS);
726
+
727
+ /**
728
+ * Lookup from a 2-letter (first+last) byteword short-form to its full
729
+ * lowercase 4-letter form. Backs {@link canonicalizeByteword}.
730
+ */
731
+ const BYTEWORD_FIRST_LAST_MAP: ReadonlyMap<string, string> = (() => {
732
+ const map = new Map<string, string>();
733
+ for (const word of BYTEWORDS) {
734
+ map.set(word[0] + word[word.length - 1], word);
735
+ }
736
+ return map;
737
+ })();
738
+
739
+ /**
740
+ * Lookup from the first 3 letters of a byteword to its full lowercase 4-letter
741
+ * form. Backs {@link canonicalizeByteword}.
742
+ */
743
+ const BYTEWORD_FIRST_THREE_MAP: ReadonlyMap<string, string> = (() => {
744
+ const map = new Map<string, string>();
745
+ for (const word of BYTEWORDS) {
746
+ map.set(word.slice(0, 3), word);
747
+ }
748
+ return map;
749
+ })();
750
+
751
+ /**
752
+ * Lookup from the last 3 letters of a byteword to its full lowercase 4-letter
753
+ * form. Backs {@link canonicalizeByteword}.
754
+ */
755
+ const BYTEWORD_LAST_THREE_MAP: ReadonlyMap<string, string> = (() => {
756
+ const map = new Map<string, string>();
757
+ for (const word of BYTEWORDS) {
758
+ map.set(word.slice(1), word);
759
+ }
760
+ return map;
761
+ })();
762
+
645
763
  /**
646
764
  * CRC32 lookup table (IEEE polynomial).
647
765
  */