@bcts/provenance-mark 1.0.0-alpha.21 → 1.0.0-alpha.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bcts/provenance-mark",
3
- "version": "1.0.0-alpha.21",
3
+ "version": "1.0.0-alpha.23",
4
4
  "type": "module",
5
5
  "description": "Blockchain Commons Provenance Mark for TypeScript - A cryptographically-secured system for establishing and verifying the authenticity of works",
6
6
  "license": "BSD-2-Clause-Patent",
@@ -57,22 +57,22 @@
57
57
  "@bcts/eslint": "^0.1.0",
58
58
  "@bcts/tsconfig": "^0.1.0",
59
59
  "@eslint/js": "^10.0.1",
60
- "@typescript-eslint/eslint-plugin": "^8.56.1",
61
- "@typescript-eslint/parser": "^8.56.1",
62
- "eslint": "^10.0.2",
60
+ "@typescript-eslint/eslint-plugin": "^8.59.0",
61
+ "@typescript-eslint/parser": "^8.59.0",
62
+ "eslint": "^10.2.1",
63
63
  "ts-node": "^10.9.2",
64
- "tsdown": "^0.20.3",
65
- "typedoc": "^0.28.17",
66
- "typescript": "^5.9.3",
67
- "vitest": "^4.0.18"
64
+ "tsdown": "^0.21.0",
65
+ "typedoc": "^0.28.19",
66
+ "typescript": "^6.0.3",
67
+ "vitest": "^4.1.5"
68
68
  },
69
69
  "dependencies": {
70
- "@bcts/dcbor": "^1.0.0-alpha.21",
71
- "@bcts/envelope": "^1.0.0-alpha.21",
72
- "@bcts/rand": "^1.0.0-alpha.21",
73
- "@bcts/tags": "^1.0.0-alpha.21",
74
- "@bcts/uniform-resources": "^1.0.0-alpha.21",
75
- "@noble/ciphers": "^2.1.1",
76
- "@noble/hashes": "^2.0.1"
70
+ "@bcts/dcbor": "^1.0.0-alpha.23",
71
+ "@bcts/envelope": "^1.0.0-alpha.23",
72
+ "@bcts/rand": "^1.0.0-alpha.23",
73
+ "@bcts/tags": "^1.0.0-alpha.23",
74
+ "@bcts/uniform-resources": "^1.0.0-alpha.23",
75
+ "@noble/ciphers": "^2.2.0",
76
+ "@noble/hashes": "^2.2.0"
77
77
  }
78
78
  }
package/src/mark-info.ts CHANGED
@@ -46,8 +46,8 @@ export class ProvenanceMarkInfo {
46
46
  }
47
47
  const cborValue = decodeCbor(mark.toCborData());
48
48
  const ur = UR.new(tagName, cborValue);
49
- const bytewords = mark.bytewordsIdentifier(true);
50
- const bytemoji = mark.bytemojiIdentifier(true);
49
+ const bytewords = mark.idBytewords(4, true);
50
+ const bytemoji = mark.idBytemoji(4, true);
51
51
  return new ProvenanceMarkInfo(mark, ur, bytewords, bytemoji, comment);
52
52
  }
53
53
 
package/src/mark.ts CHANGED
@@ -13,8 +13,9 @@ import {
13
13
  BytewordsStyle,
14
14
  encodeBytewords,
15
15
  decodeBytewords,
16
- encodeBytewordsIdentifier,
17
- encodeBytemojisIdentifier,
16
+ encodeToWords,
17
+ encodeToBytemojis,
18
+ encodeToMinimalBytewords,
18
19
  UR,
19
20
  } from "@bcts/uniform-resources";
20
21
  import { Envelope } from "@bcts/envelope";
@@ -277,66 +278,213 @@ export class ProvenanceMark {
277
278
  }
278
279
 
279
280
  /**
280
- * Get the first four bytes of the hash as a hex string identifier.
281
+ * The 32-byte Mark ID.
282
+ *
283
+ * The first `linkLength` bytes are the mark's stored hash. The remaining
284
+ * bytes come from the mark's fingerprint (SHA-256 of CBOR encoding),
285
+ * ensuring a full 32-byte value is always available regardless of
286
+ * resolution.
287
+ */
288
+ id(): Uint8Array {
289
+ const result = new Uint8Array(32);
290
+ const n = this._hash.length;
291
+ result.set(this._hash, 0);
292
+ if (n < 32) {
293
+ const fp = this.fingerprint();
294
+ result.set(fp.subarray(0, 32 - n), n);
295
+ }
296
+ return result;
297
+ }
298
+
299
+ /**
300
+ * The full 32-byte Mark ID as a 64-character hex string.
301
+ */
302
+ idHex(): string {
303
+ return bytesToHex(this.id());
304
+ }
305
+
306
+ /**
307
+ * The first `wordCount` bytes of the Mark ID as upper-case ByteWords.
308
+ *
309
+ * @param wordCount Number of bytes to encode, must be in `4..=32`.
310
+ * @param prefix If `true`, prepends the provenance-mark prefix character.
311
+ * @throws if `wordCount` is not in the range `4..=32`.
312
+ */
313
+ idBytewords(wordCount: number, prefix: boolean): string {
314
+ if (!Number.isInteger(wordCount) || wordCount < 4 || wordCount > 32) {
315
+ throw new Error(`word_count must be 4..=32, got ${wordCount}`);
316
+ }
317
+ const s = encodeToWords(this.id().subarray(0, wordCount)).toUpperCase();
318
+ return prefix ? `\u{1F151} ${s}` : s;
319
+ }
320
+
321
+ /**
322
+ * The first `wordCount` bytes of the Mark ID as Bytemoji.
323
+ *
324
+ * @param wordCount Number of bytes to encode, must be in `4..=32`.
325
+ * @param prefix If `true`, prepends the provenance-mark prefix character.
326
+ * @throws if `wordCount` is not in the range `4..=32`.
327
+ */
328
+ idBytemoji(wordCount: number, prefix: boolean): string {
329
+ if (!Number.isInteger(wordCount) || wordCount < 4 || wordCount > 32) {
330
+ throw new Error(`word_count must be 4..=32, got ${wordCount}`);
331
+ }
332
+ const s = encodeToBytemojis(this.id().subarray(0, wordCount)).toUpperCase();
333
+ return prefix ? `\u{1F151} ${s}` : s;
334
+ }
335
+
336
+ /**
337
+ * The first `wordCount` bytes of the Mark ID as upper-case minimal
338
+ * ByteWords (2 letters per byte, concatenated without separator).
339
+ *
340
+ * @param wordCount Number of bytes to encode, must be in `4..=32`.
341
+ * @param prefix If `true`, prepends the provenance-mark prefix character.
342
+ * @throws if `wordCount` is not in the range `4..=32`.
343
+ */
344
+ idBytewordsMinimal(wordCount: number, prefix: boolean): string {
345
+ if (!Number.isInteger(wordCount) || wordCount < 4 || wordCount > 32) {
346
+ throw new Error(`word_count must be 4..=32, got ${wordCount}`);
347
+ }
348
+ const s = encodeToMinimalBytewords(this.id().subarray(0, wordCount)).toUpperCase();
349
+ return prefix ? `\u{1F151} ${s}` : s;
350
+ }
351
+
352
+ /**
353
+ * Legacy 8-character hex identifier — the first 4 bytes of the Mark ID.
354
+ *
355
+ * @deprecated Use {@link idHex} for the full 64-char hex, or
356
+ * `idHex().slice(0, 8)` for this legacy short form. Retained for
357
+ * backwards compatibility; will be removed in a future alpha.
281
358
  */
282
359
  identifier(): string {
283
- return Array.from(this._hash.slice(0, 4))
284
- .map((b) => b.toString(16).padStart(2, "0"))
285
- .join("");
360
+ return this.idHex().slice(0, 8);
286
361
  }
287
362
 
288
363
  /**
289
- * Get the first four bytes of the hash as upper-case ByteWords.
364
+ * Legacy 4-byte upper-case ByteWords identifier.
365
+ *
366
+ * @deprecated Equivalent to `idBytewords(4, prefix)`. Retained for
367
+ * backwards compatibility; will be removed in a future alpha.
290
368
  */
291
369
  bytewordsIdentifier(prefix: boolean): string {
292
- const bytes = this._hash.slice(0, 4);
293
- const s = encodeBytewordsIdentifier(bytes).toUpperCase();
294
- return prefix ? `\u{1F151} ${s}` : s;
370
+ return this.idBytewords(4, prefix);
295
371
  }
296
372
 
297
373
  /**
298
- * A compact 8-letter identifier derived from the upper-case ByteWords
299
- * identifier by taking the first and last letter of each ByteWords word
300
- * (4 words x 2 letters = 8 letters).
374
+ * Legacy 8-letter minimal ByteWords identifier (first+last letter of each
375
+ * of the 4 ByteWords). Example: "ABLE ACID ALSO APEX" -> "AEADAOAX".
301
376
  *
302
- * Example: "ABLE ACID ALSO APEX" -> "AEADAOAX"
303
- * If prefix is true, prepends the provenance mark prefix character.
377
+ * @deprecated Equivalent to `idBytewordsMinimal(4, prefix)`. Retained
378
+ * for backwards compatibility; will be removed in a future alpha.
304
379
  */
305
380
  bytewordsMinimalIdentifier(prefix: boolean): string {
306
- const full = encodeBytewordsIdentifier(this._hash.slice(0, 4));
307
-
308
- const words = full.split(/\s+/);
309
- let out = "";
310
- if (words.length === 4) {
311
- for (const w of words) {
312
- if (w.length === 0) continue;
313
- out += w[0].toUpperCase();
314
- out += w[w.length - 1].toUpperCase();
315
- }
381
+ return this.idBytewordsMinimal(4, prefix);
382
+ }
383
+
384
+ /**
385
+ * Legacy 4-byte upper-case Bytemoji identifier.
386
+ *
387
+ * @deprecated Equivalent to `idBytemoji(4, prefix)`. Retained for
388
+ * backwards compatibility; will be removed in a future alpha.
389
+ */
390
+ bytemojiIdentifier(prefix: boolean): string {
391
+ return this.idBytemoji(4, prefix);
392
+ }
393
+
394
+ /**
395
+ * Computes the minimum prefix length (in bytes, `4..=32`) each mark needs
396
+ * so that every mark in the set has a unique Mark ID prefix.
397
+ *
398
+ * Non-colliding marks get the minimum of 4. Only marks whose 4-byte
399
+ * prefixes collide are extended.
400
+ */
401
+ private static minimalNoncollidingPrefixLengths(ids: Uint8Array[]): number[] {
402
+ const n = ids.length;
403
+ const lengths: number[] = new Array<number>(n).fill(4);
404
+
405
+ // Group by 4-byte prefix (fast path)
406
+ const groups = new Map<string, number[]>();
407
+ for (let i = 0; i < n; i++) {
408
+ const key = bytesToHex(ids[i].subarray(0, 4));
409
+ const g = groups.get(key);
410
+ if (g !== undefined) g.push(i);
411
+ else groups.set(key, [i]);
316
412
  }
317
413
 
318
- // Conservative fallback: if the input wasn't in the expected
319
- // space-separated 4-word format, remove whitespace and chunk the
320
- // remaining letters.
321
- if (out.length !== 8) {
322
- out = "";
323
- const compact = full.replace(/[^a-zA-Z]/g, "").toUpperCase();
324
- for (let i = 0; i + 3 < compact.length; i += 4) {
325
- out += compact[i];
326
- out += compact[i + 3];
414
+ // Resolve each collision group
415
+ for (const indices of groups.values()) {
416
+ if (indices.length <= 1) continue;
417
+ ProvenanceMark.resolveCollisionGroup(ids, indices, lengths);
418
+ }
419
+
420
+ return lengths;
421
+ }
422
+
423
+ private static resolveCollisionGroup(
424
+ ids: Uint8Array[],
425
+ initialIndices: number[],
426
+ lengths: number[],
427
+ ): void {
428
+ let unresolved: number[] = [...initialIndices];
429
+
430
+ for (let prefixLen = 5; prefixLen <= 32; prefixLen++) {
431
+ const subGroups = new Map<string, number[]>();
432
+ for (const i of unresolved) {
433
+ const key = bytesToHex(ids[i].subarray(0, prefixLen));
434
+ const g = subGroups.get(key);
435
+ if (g !== undefined) g.push(i);
436
+ else subGroups.set(key, [i]);
437
+ }
438
+
439
+ const nextUnresolved: number[] = [];
440
+ for (const subIndices of subGroups.values()) {
441
+ if (subIndices.length === 1) {
442
+ lengths[subIndices[0]] = prefixLen;
443
+ } else {
444
+ nextUnresolved.push(...subIndices);
445
+ }
327
446
  }
447
+
448
+ if (nextUnresolved.length === 0) return;
449
+ unresolved = nextUnresolved;
328
450
  }
329
451
 
330
- return prefix ? `\u{1F151} ${out}` : out;
452
+ // At 32 bytes, truly identical IDs remain — assign 32
453
+ for (const i of unresolved) {
454
+ lengths[i] = 32;
455
+ }
331
456
  }
332
457
 
333
458
  /**
334
- * Get the first four bytes of the hash as Bytemoji.
335
- */
336
- bytemojiIdentifier(prefix: boolean): string {
337
- const bytes = this._hash.slice(0, 4);
338
- const s = encodeBytemojisIdentifier(bytes).toUpperCase();
339
- return prefix ? `\u{1F151} ${s}` : s;
459
+ * Returns disambiguated upper-case ByteWords Mark IDs for a set of marks.
460
+ *
461
+ * Non-colliding marks get 4-word identifiers. Only marks whose 4-byte
462
+ * prefixes collide are extended with additional words (up to 32 bytes
463
+ * per identifier).
464
+ */
465
+ static disambiguatedIdBytewords(marks: ProvenanceMark[], prefix: boolean): string[] {
466
+ const ids = marks.map((m) => m.id());
467
+ const lengths = ProvenanceMark.minimalNoncollidingPrefixLengths(ids);
468
+ return ids.map((id, i) => {
469
+ const s = encodeToWords(id.subarray(0, lengths[i])).toUpperCase();
470
+ return prefix ? `\u{1F151} ${s}` : s;
471
+ });
472
+ }
473
+
474
+ /**
475
+ * Returns disambiguated Bytemoji Mark IDs for a set of marks.
476
+ *
477
+ * Non-colliding marks get 4-emoji identifiers. Only marks whose 4-byte
478
+ * prefixes collide are extended with additional emojis (up to 32 bytes
479
+ * per identifier).
480
+ */
481
+ static disambiguatedIdBytemoji(marks: ProvenanceMark[], prefix: boolean): string[] {
482
+ const ids = marks.map((m) => m.id());
483
+ const lengths = ProvenanceMark.minimalNoncollidingPrefixLengths(ids);
484
+ return ids.map((id, i) => {
485
+ const s = encodeToBytemojis(id.subarray(0, lengths[i])).toUpperCase();
486
+ return prefix ? `\u{1F151} ${s}` : s;
487
+ });
340
488
  }
341
489
 
342
490
  /**
@@ -603,9 +751,13 @@ export class ProvenanceMark {
603
751
 
604
752
  /**
605
753
  * Debug string representation.
754
+ *
755
+ * As of provenance-mark v0.24, this includes the full 64-character Mark ID
756
+ * hex (matching rust's `Display` impl). Pre-v0.24 callers that depended on
757
+ * the 8-character prefix should use `idHex().slice(0, 8)` directly.
606
758
  */
607
759
  toString(): string {
608
- return `ProvenanceMark(${this.identifier()})`;
760
+ return `ProvenanceMark(${this.idHex()})`;
609
761
  }
610
762
 
611
763
  /**
package/src/resolution.ts CHANGED
@@ -56,7 +56,7 @@ export enum ProvenanceMarkResolution {
56
56
  * Convert a resolution to its numeric value.
57
57
  */
58
58
  export function resolutionToNumber(res: ProvenanceMarkResolution): number {
59
- return res as number;
59
+ return res;
60
60
  }
61
61
 
62
62
  /**
@@ -288,7 +288,7 @@ export function resolutionToString(res: ProvenanceMarkResolution): string {
288
288
  * Convert a resolution to CBOR.
289
289
  */
290
290
  export function resolutionToCbor(res: ProvenanceMarkResolution): Cbor {
291
- return cbor(res as number);
291
+ return cbor(res);
292
292
  }
293
293
 
294
294
  /**
package/src/validate.ts CHANGED
@@ -193,7 +193,7 @@ function formatText(report: ValidationReport): string {
193
193
  // Report each mark in the sequence
194
194
  for (const flaggedMark of seq.marks) {
195
195
  const mark = flaggedMark.mark;
196
- const shortId = mark.identifier();
196
+ const shortId = mark.idHex().slice(0, 8);
197
197
  const seqNum = mark.seq();
198
198
 
199
199
  // Build the mark line with annotations