capdag 0.126.278 → 0.137.303

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/capdag.js +343 -139
  2. package/capdag.test.js +395 -306
  3. package/package.json +2 -2
package/capdag.js CHANGED
@@ -27,47 +27,102 @@ const ErrorCodes = {
27
27
  UNTERMINATED_QUOTE: 8,
28
28
  INVALID_ESCAPE_SEQUENCE: 9,
29
29
  MISSING_IN_SPEC: 10,
30
- MISSING_OUT_SPEC: 11
30
+ MISSING_OUT_SPEC: 11,
31
+ EMPTY_VALUE: 12,
32
+ INVALID_IN_SPEC: 13,
33
+ INVALID_OUT_SPEC: 14
31
34
  };
32
35
 
33
36
  // Note: All parsing is delegated to TaggedUrn from tagged-urn-js
34
37
  // No duplicate state machine or parsing helpers needed here
35
38
 
36
39
  /**
37
- * Cap URN implementation with required direction (in/out) and optional tags
40
+ * Cap URN implementation with direction specs and optional tags.
38
41
  *
39
- * Direction is now a REQUIRED first-class field:
40
- * - inSpec: The input media URN (required, must start with "media:")
41
- * - outSpec: The output media URN (required, must start with "media:")
42
+ * Direction rules match the Rust reference:
43
+ * - missing `in` or `out` defaults to `media:`
44
+ * - `in=*` and `out=*` normalize to `media:`
45
+ * - empty `in=` / `out=` are rejected
42
46
  * - tags: Other optional tags (no longer contains in/out)
43
47
  */
44
48
  /**
45
- * Check if a value is a valid media URN or wildcard
46
- * @param {string} value - The value to check
47
- * @returns {boolean} True if valid media URN or wildcard
49
+ * Normalize a parsed direction tag to canonical wildcard semantics.
50
+ * Missing tags and `*` both become `media:`.
51
+ *
52
+ * @param {TaggedUrn} taggedUrn - Parsed tagged URN
53
+ * @param {string} tagName - `in` or `out`
54
+ * @returns {string} Normalized direction spec
55
+ */
56
+ function processDirectionTag(taggedUrn, tagName) {
57
+ const rawValue = taggedUrn.getTag(tagName);
58
+ if (rawValue === undefined || rawValue === '*') {
59
+ return 'media:';
60
+ }
61
+ if (rawValue === '') {
62
+ throw new CapUrnError(
63
+ tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
64
+ `Empty value for '${tagName}' tag is not allowed`
65
+ );
66
+ }
67
+ return rawValue;
68
+ }
69
+
70
+ /**
71
+ * Canonicalize a direction spec via MediaUrn parsing.
72
+ *
73
+ * @param {string} spec - Media URN string
74
+ * @param {string} tagName - `in` or `out`
75
+ * @returns {string} Canonical media URN string
48
76
  */
49
- function isValidMediaUrnOrWildcard(value) {
50
- return value === '*' || (value && value.startsWith('media:'));
77
+ function canonicalizeDirectionSpec(spec, tagName) {
78
+ if (spec === 'media:' || spec === '*') {
79
+ return spec;
80
+ }
81
+
82
+ try {
83
+ return MediaUrn.fromString(spec).toString();
84
+ } catch (error) {
85
+ throw new CapUrnError(
86
+ tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
87
+ `Invalid media URN for ${tagName} spec '${spec}': ${error.message}`
88
+ );
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Validate a direction spec while preserving the caller-provided string.
94
+ * This matches the reference `with_in_spec` / `with_out_spec` behavior.
95
+ *
96
+ * @param {string} spec - Media URN string
97
+ * @param {string} tagName - `in` or `out`
98
+ * @returns {string} The original spec when valid
99
+ */
100
+ function validatePreservedDirectionSpec(spec, tagName) {
101
+ if (spec === 'media:' || spec === '*') {
102
+ return spec;
103
+ }
104
+
105
+ try {
106
+ MediaUrn.fromString(spec);
107
+ return spec;
108
+ } catch (error) {
109
+ throw new CapUrnError(
110
+ tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
111
+ `Invalid media URN for ${tagName} spec '${spec}': ${error.message}`
112
+ );
113
+ }
51
114
  }
52
115
 
53
116
  class CapUrn {
54
117
  /**
55
- * Create a new CapUrn with required direction specs
56
- * @param {string} inSpec - Required input media URN (e.g., "media:void") or wildcard "*"
57
- * @param {string} outSpec - Required output media URN (e.g., "media:object") or wildcard "*"
118
+ * Create a new CapUrn with direction specs.
119
+ * @param {string} inSpec - Input media URN (e.g., "media:void")
120
+ * @param {string} outSpec - Output media URN (e.g., "media:object")
58
121
  * @param {Object} tags - Other tags (must NOT contain 'in' or 'out')
59
122
  */
60
123
  constructor(inSpec, outSpec, tags = {}) {
61
- // Validate in/out are media URNs or wildcards
62
- if (!isValidMediaUrnOrWildcard(inSpec)) {
63
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
64
- }
65
- if (!isValidMediaUrnOrWildcard(outSpec)) {
66
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'out' media URN: ${outSpec}. Must start with 'media:' or be '*'`);
67
- }
68
-
69
- this.inSpec = inSpec;
70
- this.outSpec = outSpec;
124
+ this.inSpec = canonicalizeDirectionSpec(inSpec, 'in');
125
+ this.outSpec = canonicalizeDirectionSpec(outSpec, 'out');
71
126
  this.tags = {};
72
127
  // Copy tags, filtering out any 'in' or 'out' that might have slipped through
73
128
  for (const [key, value] of Object.entries(tags)) {
@@ -116,13 +171,14 @@ class CapUrn {
116
171
  * Create a Cap URN from string representation
117
172
  * Format: cap:in="<media-urn>";out="<media-urn>";key1=value1;key2=value2;...
118
173
  *
119
- * IMPORTANT: 'in' and 'out' tags are REQUIRED and must be valid media URNs.
174
+ * Missing `in` / `out` default to `media:`. `in=*` / `out=*` are also
175
+ * normalized to `media:`.
120
176
  *
121
177
  * Uses TaggedUrn for parsing to ensure consistent behavior across implementations.
122
178
  *
123
179
  * @param {string} s - The Cap URN string
124
180
  * @returns {CapUrn} The parsed Cap URN
125
- * @throws {CapUrnError} If parsing fails or in/out are missing/invalid
181
+ * @throws {CapUrnError} If parsing fails or direction specs are invalid
126
182
  */
127
183
  static fromString(s) {
128
184
  if (!s || typeof s !== 'string') {
@@ -162,24 +218,8 @@ class CapUrn {
162
218
  throw new CapUrnError(ErrorCodes.MISSING_CAP_PREFIX, `Expected 'cap:' prefix, got '${taggedUrn.getPrefix()}:'`);
163
219
  }
164
220
 
165
- // Extract required 'in' and 'out' tags
166
- const inSpec = taggedUrn.getTag('in');
167
- const outSpec = taggedUrn.getTag('out');
168
-
169
- if (!inSpec) {
170
- throw new CapUrnError(ErrorCodes.MISSING_IN_SPEC, "Cap URN requires 'in' tag for input media URN");
171
- }
172
- if (!outSpec) {
173
- throw new CapUrnError(ErrorCodes.MISSING_OUT_SPEC, "Cap URN requires 'out' tag for output media URN");
174
- }
175
-
176
- // Validate in/out are media URNs or wildcards
177
- if (!isValidMediaUrnOrWildcard(inSpec)) {
178
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
179
- }
180
- if (!isValidMediaUrnOrWildcard(outSpec)) {
181
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'out' media URN: ${outSpec}. Must start with 'media:' or be '*'`);
182
- }
221
+ const inSpec = processDirectionTag(taggedUrn, 'in');
222
+ const outSpec = processDirectionTag(taggedUrn, 'out');
183
223
 
184
224
  // Build remaining tags (excluding in/out)
185
225
  const remainingTags = {};
@@ -193,12 +233,12 @@ class CapUrn {
193
233
  }
194
234
 
195
235
  /**
196
- * Create a Cap URN from a tags object
197
- * Extracts 'in' and 'out' from tags (required), stores rest as regular tags
236
+ * Create a Cap URN from a tags object.
237
+ * Unlike string parsing, this path requires explicit `in` and `out` tags.
198
238
  *
199
239
  * @param {Object} tags - Object containing all tags including 'in' and 'out'
200
240
  * @returns {CapUrn} The parsed Cap URN
201
- * @throws {CapUrnError} If 'in' or 'out' tags are missing or invalid
241
+ * @throws {CapUrnError} If `in` or `out` tags are missing or invalid
202
242
  */
203
243
  static fromTags(tags) {
204
244
  const inSpec = tags['in'] || tags['IN'];
@@ -211,12 +251,11 @@ class CapUrn {
211
251
  throw new CapUrnError(ErrorCodes.MISSING_OUT_SPEC, "Cap URN requires 'out' tag for output media URN");
212
252
  }
213
253
 
214
- // Validate in/out are media URNs or wildcards
215
- if (!isValidMediaUrnOrWildcard(inSpec)) {
216
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
254
+ if (inSpec === '') {
255
+ throw new CapUrnError(ErrorCodes.INVALID_IN_SPEC, "Empty value for 'in' tag is not allowed");
217
256
  }
218
- if (!isValidMediaUrnOrWildcard(outSpec)) {
219
- throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'out' media URN: ${outSpec}. Must start with 'media:' or be '*'`);
257
+ if (outSpec === '') {
258
+ throw new CapUrnError(ErrorCodes.INVALID_OUT_SPEC, "Empty value for 'out' tag is not allowed");
220
259
  }
221
260
 
222
261
  // Build remaining tags (excluding in/out)
@@ -289,15 +328,18 @@ class CapUrn {
289
328
  }
290
329
 
291
330
  /**
292
- * Create a new cap URN with an added or updated tag
293
- * Key is normalized to lowercase; value is preserved as-is
294
- * SILENTLY IGNORES attempts to set "in" or "out" - use withInSpec/withOutSpec instead
331
+ * Create a new cap URN with an added or updated tag.
332
+ * Attempts to set `in` / `out` through `withTag` are ignored; use
333
+ * `withInSpec` / `withOutSpec` instead.
295
334
  *
296
335
  * @param {string} key - The tag key
297
336
  * @param {string} value - The tag value
298
337
  * @returns {CapUrn} A new CapUrn instance with the tag added/updated
299
338
  */
300
339
  withTag(key, value) {
340
+ if (value === '') {
341
+ throw new CapUrnError(ErrorCodes.EMPTY_VALUE, `Empty value for key '${key}' (use '*' for wildcard)`);
342
+ }
301
343
  const keyLower = key.toLowerCase();
302
344
  // Silently ignore attempts to set in/out via withTag
303
345
  if (keyLower === 'in' || keyLower === 'out') {
@@ -315,7 +357,9 @@ class CapUrn {
315
357
  * @returns {CapUrn} A new CapUrn instance with the updated inSpec
316
358
  */
317
359
  withInSpec(inSpec) {
318
- return new CapUrn(inSpec, this.outSpec, this.tags);
360
+ const updated = new CapUrn(this.inSpec, this.outSpec, this.tags);
361
+ updated.inSpec = validatePreservedDirectionSpec(inSpec, 'in');
362
+ return updated;
319
363
  }
320
364
 
321
365
  /**
@@ -325,7 +369,9 @@ class CapUrn {
325
369
  * @returns {CapUrn} A new CapUrn instance with the updated outSpec
326
370
  */
327
371
  withOutSpec(outSpec) {
328
- return new CapUrn(this.inSpec, outSpec, this.tags);
372
+ const updated = new CapUrn(this.inSpec, this.outSpec, this.tags);
373
+ updated.outSpec = validatePreservedDirectionSpec(outSpec, 'out');
374
+ return updated;
329
375
  }
330
376
 
331
377
  /**
@@ -366,11 +412,9 @@ class CapUrn {
366
412
  return true;
367
413
  }
368
414
 
369
- // Direction specs: TaggedUrn semantic matching via MediaUrn
370
- // Check in_urn: cap's input spec (pattern) accepts request's input (instance).
371
- // "media:" on the PATTERN side (this.inSpec) means "I accept any input" skip check.
372
- // "*" is also treated as wildcard. "media:" on the instance side still participates.
373
- if (this.inSpec !== '*' && this.inSpec !== 'media:' && request.inSpec !== '*') {
415
+ // Input direction: pattern accepts instance. `media:` on the pattern side is
416
+ // the wildcard top and skips the check.
417
+ if (this.inSpec !== 'media:' && this.inSpec !== '*') {
374
418
  const capIn = TaggedUrn.fromString(this.inSpec);
375
419
  const requestIn = TaggedUrn.fromString(request.inSpec);
376
420
  if (!capIn.accepts(requestIn)) {
@@ -378,10 +422,9 @@ class CapUrn {
378
422
  }
379
423
  }
380
424
 
381
- // Check out_urn: cap's output (instance) conforms to request's output (pattern).
382
- // "media:" on the PATTERN side (this.outSpec) means "I accept any output" — skip check.
383
- // "*" is also treated as wildcard. "media:" on the instance side still participates.
384
- if (this.outSpec !== '*' && this.outSpec !== 'media:' && request.outSpec !== '*') {
425
+ // Output direction: provider output must conform to requested output.
426
+ // `media:` on the pattern side is wildcard top and skips the check.
427
+ if (this.outSpec !== 'media:' && this.outSpec !== '*') {
385
428
  const capOut = TaggedUrn.fromString(this.outSpec);
386
429
  const requestOut = TaggedUrn.fromString(request.outSpec);
387
430
  if (!capOut.conformsTo(requestOut)) {
@@ -389,33 +432,23 @@ class CapUrn {
389
432
  }
390
433
  }
391
434
 
392
- // Check all other tags that the request specifies
393
- for (const [requestKey, requestValue] of Object.entries(request.tags)) {
394
- const capValue = this.tags[requestKey];
395
-
396
- if (capValue === undefined) {
397
- // Missing tag in cap is treated as wildcard - can handle any value
398
- continue;
399
- }
435
+ // Check all tags required by the pattern. Missing tags in the instance reject.
436
+ for (const [patternKey, patternValue] of Object.entries(this.tags)) {
437
+ const requestValue = request.tags[patternKey];
400
438
 
401
- if (capValue === '*') {
402
- // Cap has wildcard - can handle any value
403
- continue;
439
+ if (requestValue === undefined) {
440
+ return false;
404
441
  }
405
442
 
406
- if (requestValue === '*') {
407
- // Request accepts any value - cap's specific value matches
443
+ if (patternValue === '*' || requestValue === '*') {
408
444
  continue;
409
445
  }
410
446
 
411
- if (capValue !== requestValue) {
412
- // Cap has specific value that doesn't match request's specific value
447
+ if (patternValue !== requestValue) {
413
448
  return false;
414
449
  }
415
450
  }
416
451
 
417
- // If cap has additional specific tags that request doesn't specify, that's fine
418
- // The cap is just more specific than needed
419
452
  return true;
420
453
  }
421
454
 
@@ -441,12 +474,13 @@ class CapUrn {
441
474
  */
442
475
  specificity() {
443
476
  let count = 0;
444
- // Direction specs contribute their MediaUrn tag count
445
- if (this.inSpec !== '*') {
477
+ // Direction specs contribute their MediaUrn tag count. `media:` is the
478
+ // wildcard top and contributes zero.
479
+ if (this.inSpec !== 'media:' && this.inSpec !== '*') {
446
480
  const inMedia = TaggedUrn.fromString(this.inSpec);
447
481
  count += Object.keys(inMedia.tags).length;
448
482
  }
449
- if (this.outSpec !== '*') {
483
+ if (this.outSpec !== 'media:' && this.outSpec !== '*') {
450
484
  const outMedia = TaggedUrn.fromString(this.outSpec);
451
485
  count += Object.keys(outMedia.tags).length;
452
486
  }
@@ -686,7 +720,7 @@ class CapMatcher {
686
720
  let bestSpecificity = -1;
687
721
 
688
722
  for (const cap of caps) {
689
- if (cap.accepts(request)) {
723
+ if (request.accepts(cap)) {
690
724
  const specificity = cap.specificity();
691
725
  if (specificity > bestSpecificity) {
692
726
  best = cap;
@@ -706,7 +740,7 @@ class CapMatcher {
706
740
  * @returns {CapUrn[]} Array of matching caps sorted by specificity (most specific first)
707
741
  */
708
742
  static findAllMatches(caps, request) {
709
- const matches = caps.filter(cap => cap.accepts(request));
743
+ const matches = caps.filter(cap => request.accepts(cap));
710
744
 
711
745
  // Sort by specificity (most specific first)
712
746
  matches.sort((a, b) => b.specificity() - a.specificity());
@@ -789,18 +823,22 @@ const MEDIA_OBJECT = 'media:record';
789
823
  // Media URN for binary data - the most general media type (no constraints)
790
824
  const MEDIA_IDENTITY = 'media:';
791
825
 
792
- // Array types - URNs must match base.toml definitions
793
- // Media URN for string array type - textable with list marker
794
- const MEDIA_STRING_ARRAY = 'media:list;textable';
795
- // Media URN for integer array type - textable, numeric with list marker
796
- const MEDIA_INTEGER_ARRAY = 'media:integer;list;textable;numeric';
797
- // Media URN for number array type - textable, numeric with list marker
798
- const MEDIA_NUMBER_ARRAY = 'media:list;textable;numeric';
799
- // Media URN for boolean array type - uses "bool" with list marker
800
- const MEDIA_BOOLEAN_ARRAY = 'media:bool;list;textable';
801
- // Media URN for object array type - list of records (NOT textable)
802
- // Use a specific format like JSON array for textable object arrays.
803
- const MEDIA_OBJECT_ARRAY = 'media:list;record';
826
+ // List types - URNs must match base.toml definitions
827
+ // Media URN for generic list type
828
+ const MEDIA_LIST = 'media:list';
829
+ // Media URN for textable list type
830
+ const MEDIA_TEXTABLE_LIST = 'media:list;textable';
831
+ // Media URN for string list type - textable with list marker
832
+ const MEDIA_STRING_LIST = 'media:list;textable';
833
+ // Media URN for integer list type - textable, numeric with list marker
834
+ const MEDIA_INTEGER_LIST = 'media:integer;list;textable;numeric';
835
+ // Media URN for number list type - textable, numeric with list marker
836
+ const MEDIA_NUMBER_LIST = 'media:list;numeric;textable';
837
+ // Media URN for boolean list type - uses "bool" with list marker
838
+ const MEDIA_BOOLEAN_LIST = 'media:bool;list;textable';
839
+ // Media URN for object list type - list of records (NOT textable)
840
+ // Use a specific format like JSON array for textable object lists.
841
+ const MEDIA_OBJECT_LIST = 'media:list;record';
804
842
 
805
843
  // Semantic media types for specialized content
806
844
  // Media URN for PNG image data
@@ -813,8 +851,6 @@ const MEDIA_VIDEO = 'media:video';
813
851
  // Semantic AI input types - distinguished by their purpose/context
814
852
  // Media URN for audio input containing speech for transcription (Whisper)
815
853
  const MEDIA_AUDIO_SPEECH = 'media:audio;wav;speech';
816
- // Media URN for thumbnail image output
817
- const MEDIA_IMAGE_THUMBNAIL = 'media:image;png;thumbnail';
818
854
 
819
855
  // Document types (PRIMARY naming - type IS the format)
820
856
  // Media URN for PDF documents
@@ -842,6 +878,18 @@ const MEDIA_JSON_SCHEMA = 'media:json;json-schema;record;textable';
842
878
  // Media URN for YAML data - has record marker (structured key-value)
843
879
  const MEDIA_YAML = 'media:record;textable;yaml';
844
880
 
881
+ // Format-specific variants for JSON, YAML, CSV
882
+ const MEDIA_JSON_VALUE = 'media:json;textable';
883
+ const MEDIA_JSON_RECORD = 'media:json;record;textable';
884
+ const MEDIA_JSON_LIST = 'media:json;list;textable';
885
+ const MEDIA_JSON_LIST_RECORD = 'media:json;list;record;textable';
886
+ const MEDIA_YAML_VALUE = 'media:textable;yaml';
887
+ const MEDIA_YAML_RECORD = 'media:record;textable;yaml';
888
+ const MEDIA_YAML_LIST = 'media:list;textable;yaml';
889
+ const MEDIA_YAML_LIST_RECORD = 'media:list;record;textable;yaml';
890
+ const MEDIA_CSV = 'media:csv;list;record;textable';
891
+ const MEDIA_CSV_LIST = 'media:csv;list;textable';
892
+
845
893
  // File path types - for arguments that represent filesystem paths
846
894
  // Media URN for a single file path - textable, scalar by default (no list marker)
847
895
  const MEDIA_FILE_PATH = 'media:file-path;textable';
@@ -849,8 +897,6 @@ const MEDIA_FILE_PATH = 'media:file-path;textable';
849
897
  const MEDIA_FILE_PATH_ARRAY = 'media:file-path;list;textable';
850
898
 
851
899
  // Semantic text input types - distinguished by their purpose/context
852
- // Media URN for frontmatter text (book metadata) - scalar by default
853
- const MEDIA_FRONTMATTER_TEXT = 'media:frontmatter;textable';
854
900
  // Media URN for model spec (provider:model format, HuggingFace name, etc.) - scalar by default
855
901
  const MEDIA_MODEL_SPEC = 'media:model-spec;textable';
856
902
  // Media URN for MLX model path - scalar by default
@@ -877,20 +923,19 @@ const MEDIA_PATH_OUTPUT = 'media:model-path;record;textable';
877
923
  const MEDIA_EMBEDDING_VECTOR = 'media:embedding-vector;record;textable';
878
924
  // Media URN for LLM inference output - has record marker
879
925
  const MEDIA_LLM_INFERENCE_OUTPUT = 'media:generated-text;record;textable';
880
- // Media URN for extracted metadata - has record marker
881
- const MEDIA_FILE_METADATA = 'media:file-metadata;record;textable';
882
- // Media URN for extracted outline - has record marker
883
- const MEDIA_DOCUMENT_OUTLINE = 'media:document-outline;record;textable';
884
- // Media URN for disbound page - has list marker (array of page objects)
885
- const MEDIA_DISBOUND_PAGE = 'media:disbound-page;list;textable';
886
926
  // Media URN for vision inference output - textable, scalar by default
887
927
  const MEDIA_IMAGE_DESCRIPTION = 'media:image-description;textable';
888
928
  // Media URN for transcription output - has record marker
889
929
  const MEDIA_TRANSCRIPTION_OUTPUT = 'media:record;textable;transcription';
890
- // Media URN for decision output (bit choice) - scalar by default
891
- const MEDIA_DECISION = 'media:bool;decision;textable';
892
- // Media URN for decision array output (bit choices) - has list marker
893
- const MEDIA_DECISION_ARRAY = 'media:bool;decision;list;textable';
930
+ // Media URN for decision output - JSON record with textable
931
+ const MEDIA_DECISION = 'media:decision;json;record;textable';
932
+ // Media URN for textable page output
933
+ const MEDIA_TEXTABLE_PAGE = 'media:textable;page';
934
+ // Collection types
935
+ const MEDIA_COLLECTION = 'media:collection;record';
936
+ const MEDIA_COLLECTION_LIST = 'media:collection;list;record';
937
+ // Media URN for adapter selection output - JSON record
938
+ const MEDIA_ADAPTER_SELECTION = 'media:adapter-selection;json;record';
894
939
 
895
940
  // =============================================================================
896
941
  // STANDARD CAP URN CONSTANTS
@@ -900,6 +945,10 @@ const MEDIA_DECISION_ARRAY = 'media:bool;decision;list;textable';
900
945
  // Accepts any media type as input and outputs any media type
901
946
  const CAP_IDENTITY = 'cap:in=media:;out=media:';
902
947
 
948
+ // Adapter-selection capability. Default implementation returns empty END (no match).
949
+ // Cartridges that inspect file content override this with a handler that returns {"media_urns": [...]}.
950
+ const CAP_ADAPTER_SELECTION = 'cap:in="media:";out="media:adapter-selection;json;record"';
951
+
903
952
  // =============================================================================
904
953
  // MEDIA URN CLASS
905
954
  // =============================================================================
@@ -956,8 +1005,10 @@ class MediaUrn {
956
1005
  // =========================================================================
957
1006
 
958
1007
  /**
959
- * Returns true if this media is a list (has `list` marker tag).
960
- * Returns false if scalar (no `list` marker = default).
1008
+ * Returns true if this media URN describes list-type data (has `list` marker tag).
1009
+ * This is a semantic type check it means "the data format IS a list/array".
1010
+ * This does NOT indicate input cardinality (single vs multiple items).
1011
+ * Cardinality is tracked by is_sequence on the wire protocol, not by URN tags.
961
1012
  * @returns {boolean}
962
1013
  */
963
1014
  isList() { return this._hasMarkerTag('list'); }
@@ -1027,6 +1078,22 @@ class MediaUrn {
1027
1078
  /** @returns {boolean} True if the "bool" marker tag is present */
1028
1079
  isBool() { return this._urn.getTag('bool') !== undefined; }
1029
1080
 
1081
+ /**
1082
+ * Returns true if this media URN describes YAML representation.
1083
+ * @returns {boolean}
1084
+ */
1085
+ isYaml() {
1086
+ return this._hasMarkerTag('yaml');
1087
+ }
1088
+
1089
+ /**
1090
+ * Returns true if this media URN describes CSV representation.
1091
+ * @returns {boolean}
1092
+ */
1093
+ isCsv() {
1094
+ return this._hasMarkerTag('csv');
1095
+ }
1096
+
1030
1097
  /**
1031
1098
  * Check if this represents a single file path type (not array).
1032
1099
  * Returns true if the "file-path" marker tag is present AND no list marker.
@@ -1048,6 +1115,13 @@ class MediaUrn {
1048
1115
  */
1049
1116
  isAnyFilePath() { return this._hasMarkerTag('file-path'); }
1050
1117
 
1118
+ /**
1119
+ * Check if this represents a collection type.
1120
+ * Returns true if the "collection" marker tag is present.
1121
+ * @returns {boolean}
1122
+ */
1123
+ isCollection() { return this._hasMarkerTag('collection'); }
1124
+
1051
1125
  /**
1052
1126
  * Check if this media URN conforms to another (pattern).
1053
1127
  * @param {MediaUrn} pattern
@@ -1123,20 +1197,67 @@ class MediaUrn {
1123
1197
  // =============================================================================
1124
1198
 
1125
1199
  /**
1126
- * Build URN for LLM conversation capability
1127
- * @param {string} langCode - Language code (e.g., "en", "fr")
1200
+ * Build URN for LLM generate-text capability
1128
1201
  * @returns {CapUrn}
1129
1202
  */
1130
- function llmConversationUrn(langCode) {
1203
+ function llmGenerateTextUrn() {
1131
1204
  return new CapUrnBuilder()
1132
- .tag('op', 'conversation')
1133
- .tag('unconstrained', '*')
1134
- .tag('language', langCode)
1205
+ .tag('op', 'generate_text')
1206
+ .tag('llm', '*')
1207
+ .tag('ml-model', '*')
1135
1208
  .inSpec(MEDIA_STRING)
1136
- .outSpec(MEDIA_LLM_INFERENCE_OUTPUT)
1209
+ .outSpec(MEDIA_STRING)
1210
+ .build();
1211
+ }
1212
+
1213
+ /**
1214
+ * Build URN for render-page-image capability
1215
+ * @param {string} inputMedia - The input media URN string
1216
+ * @returns {CapUrn}
1217
+ */
1218
+ function renderPageImageUrn(inputMedia) {
1219
+ return new CapUrnBuilder()
1220
+ .tag('op', 'render_page_image')
1221
+ .inSpec(inputMedia)
1222
+ .outSpec(MEDIA_PNG)
1137
1223
  .build();
1138
1224
  }
1139
1225
 
1226
+ /**
1227
+ * Build URN for format conversion capability
1228
+ * @param {string} inMedia - The input media URN string
1229
+ * @param {string} outMedia - The output media URN string
1230
+ * @returns {CapUrn}
1231
+ */
1232
+ function formatConversionUrn(inMedia, outMedia) {
1233
+ return new CapUrnBuilder()
1234
+ .tag('op', 'convert_format')
1235
+ .inSpec(inMedia)
1236
+ .outSpec(outMedia)
1237
+ .build();
1238
+ }
1239
+
1240
+ /**
1241
+ * Map a primitive type name to the corresponding media URN string.
1242
+ * @param {string} typeName - The type name (e.g., 'string', 'integer', 'string-list')
1243
+ * @returns {string|null} The media URN string, or null if not recognized
1244
+ */
1245
+ function mediaUrnForType(typeName) {
1246
+ switch (typeName) {
1247
+ case 'string': return MEDIA_STRING;
1248
+ case 'integer': return MEDIA_INTEGER;
1249
+ case 'number': return MEDIA_NUMBER;
1250
+ case 'boolean': return MEDIA_BOOLEAN;
1251
+ case 'object': return MEDIA_OBJECT;
1252
+ case 'string-list': return MEDIA_STRING_LIST;
1253
+ case 'integer-list': return MEDIA_INTEGER_LIST;
1254
+ case 'number-list': return MEDIA_NUMBER_LIST;
1255
+ case 'boolean-list': return MEDIA_BOOLEAN_LIST;
1256
+ case 'object-list': return MEDIA_OBJECT_LIST;
1257
+ default: return null;
1258
+ }
1259
+ }
1260
+
1140
1261
  /**
1141
1262
  * Build URN for model-availability capability
1142
1263
  * @returns {CapUrn}
@@ -2392,6 +2513,23 @@ function validateCapArgs(cap) {
2392
2513
  }
2393
2514
  }
2394
2515
 
2516
+ // RULE11: Stdin source consistency with in= spec
2517
+ // If in= is media:void, no args may have stdin sources.
2518
+ // If in= is anything other than media:void, at least one arg must have a stdin source.
2519
+ const inMediaUrn = cap.urn.inMediaUrn();
2520
+ const voidUrn = MediaUrn.fromString(MEDIA_VOID);
2521
+ const inIsVoid = inMediaUrn.isEquivalent(voidUrn);
2522
+ if (inIsVoid && stdinUrns.length > 0) {
2523
+ throw new ValidationError('InvalidCapSchema', capUrn, {
2524
+ issue: `RULE11: Cap has in="${MEDIA_VOID}" but argument(s) declare stdin source`
2525
+ });
2526
+ }
2527
+ if (!inIsVoid && stdinUrns.length === 0 && args.length > 0) {
2528
+ throw new ValidationError('InvalidCapSchema', capUrn, {
2529
+ issue: `RULE11: Cap has non-void in= spec but no argument declares a stdin source`
2530
+ });
2531
+ }
2532
+
2395
2533
  // RULE5: No two args may have same position
2396
2534
  const positionSet = new Set();
2397
2535
  for (const { position, mediaUrn } of positions) {
@@ -2426,9 +2564,6 @@ function validateCapArgs(cap) {
2426
2564
  flagSet.add(flag);
2427
2565
  }
2428
2566
 
2429
- // RULE8: No unknown keys in source objects - this is handled in ArgSource.fromJSON()
2430
- // RULE11: cli_flag used verbatim as specified - enforced by design
2431
- // RULE12: media_urn is the key, no name field - enforced by CapArg structure
2432
2567
  }
2433
2568
 
2434
2569
  /**
@@ -2933,6 +3068,56 @@ class CapValidator {
2933
3068
  // CAP ARGUMENT VALUE - Unified argument type
2934
3069
  // ============================================================================
2935
3070
 
3071
+ /**
3072
+ * Result from a cap execution.
3073
+ *
3074
+ * Scalar outputs carry raw materialized bytes (e.g. UTF-8 text, raw binary).
3075
+ * List outputs carry a CBOR sequence of values, one per list item.
3076
+ * Empty represents a void cap with no output.
3077
+ */
3078
+ class CapResult {
3079
+ static KIND_SCALAR = 'scalar';
3080
+ static KIND_LIST = 'list';
3081
+ static KIND_EMPTY = 'empty';
3082
+
3083
+ /**
3084
+ * @param {'scalar'|'list'|'empty'} kind
3085
+ * @param {Uint8Array|null} data - Bytes for scalar or CBOR sequence for list, null for empty
3086
+ */
3087
+ constructor(kind, data = null) {
3088
+ this.kind = kind;
3089
+ this.data = data;
3090
+ }
3091
+
3092
+ /** Create a CapResult carrying raw bytes (scalar output). */
3093
+ static scalar(data) {
3094
+ const bytes = data instanceof Uint8Array ? data : new Uint8Array(data || []);
3095
+ return new CapResult(CapResult.KIND_SCALAR, bytes);
3096
+ }
3097
+
3098
+ /** Create a CapResult carrying a CBOR sequence (list output). */
3099
+ static list(cborSequence) {
3100
+ const bytes = cborSequence instanceof Uint8Array ? cborSequence : new Uint8Array(cborSequence || []);
3101
+ return new CapResult(CapResult.KIND_LIST, bytes);
3102
+ }
3103
+
3104
+ /** Create a CapResult for void caps. */
3105
+ static empty() {
3106
+ return new CapResult(CapResult.KIND_EMPTY, null);
3107
+ }
3108
+
3109
+ /** Returns true if this is a scalar result. */
3110
+ isScalar() { return this.kind === CapResult.KIND_SCALAR; }
3111
+
3112
+ /** Returns true if this is a list result. */
3113
+ isList() { return this.kind === CapResult.KIND_LIST; }
3114
+
3115
+ /** Returns true if this is an empty result. */
3116
+ isEmpty() { return this.kind === CapResult.KIND_EMPTY; }
3117
+ }
3118
+
3119
+ // ============================================================================
3120
+
2936
3121
  /**
2937
3122
  * Unified argument type - arguments are identified by media_urn.
2938
3123
  * The cap definition's sources specify how to extract values (stdin, position, cli_flag).
@@ -3189,7 +3374,7 @@ class CompositeCapSet {
3189
3374
  * Execute a capability by finding the best match and delegating
3190
3375
  * @param {string} capUrn - The capability URN to execute
3191
3376
  * @param {CapArgumentValue[]} args - Arguments identified by media_urn
3192
- * @returns {Promise<{binaryOutput: Uint8Array|null, textOutput: string|null}>}
3377
+ * @returns {Promise<CapResult>}
3193
3378
  */
3194
3379
  async executeCap(capUrn, args) {
3195
3380
  let request;
@@ -5435,18 +5620,20 @@ module.exports = {
5435
5620
  MEDIA_NUMBER,
5436
5621
  MEDIA_BOOLEAN,
5437
5622
  MEDIA_OBJECT,
5438
- MEDIA_STRING_ARRAY,
5439
- MEDIA_INTEGER_ARRAY,
5440
- MEDIA_NUMBER_ARRAY,
5441
- MEDIA_BOOLEAN_ARRAY,
5442
- MEDIA_OBJECT_ARRAY,
5623
+ // List types
5624
+ MEDIA_LIST,
5625
+ MEDIA_TEXTABLE_LIST,
5626
+ MEDIA_STRING_LIST,
5627
+ MEDIA_INTEGER_LIST,
5628
+ MEDIA_NUMBER_LIST,
5629
+ MEDIA_BOOLEAN_LIST,
5630
+ MEDIA_OBJECT_LIST,
5443
5631
  MEDIA_IDENTITY,
5444
5632
  MEDIA_VOID,
5445
5633
  MEDIA_PNG,
5446
5634
  MEDIA_AUDIO,
5447
5635
  MEDIA_VIDEO,
5448
5636
  MEDIA_AUDIO_SPEECH,
5449
- MEDIA_IMAGE_THUMBNAIL,
5450
5637
  // Document types (PRIMARY naming)
5451
5638
  MEDIA_PDF,
5452
5639
  MEDIA_EPUB,
@@ -5460,11 +5647,22 @@ module.exports = {
5460
5647
  MEDIA_JSON,
5461
5648
  MEDIA_JSON_SCHEMA,
5462
5649
  MEDIA_YAML,
5650
+ // Format-specific variants
5651
+ MEDIA_JSON_VALUE,
5652
+ MEDIA_JSON_RECORD,
5653
+ MEDIA_JSON_LIST,
5654
+ MEDIA_JSON_LIST_RECORD,
5655
+ MEDIA_YAML_VALUE,
5656
+ MEDIA_YAML_RECORD,
5657
+ MEDIA_YAML_LIST,
5658
+ MEDIA_YAML_LIST_RECORD,
5659
+ MEDIA_CSV,
5660
+ MEDIA_CSV_LIST,
5463
5661
  MEDIA_MODEL_SPEC,
5464
5662
  MEDIA_MODEL_REPO,
5465
5663
  MEDIA_MODEL_DIM,
5466
5664
  MEDIA_DECISION,
5467
- MEDIA_DECISION_ARRAY,
5665
+ MEDIA_TEXTABLE_PAGE,
5468
5666
  // Semantic output types - model management
5469
5667
  MEDIA_DOWNLOAD_OUTPUT,
5470
5668
  MEDIA_LIST_OUTPUT,
@@ -5475,21 +5673,27 @@ module.exports = {
5475
5673
  // Semantic output types - inference
5476
5674
  MEDIA_EMBEDDING_VECTOR,
5477
5675
  MEDIA_LLM_INFERENCE_OUTPUT,
5478
- MEDIA_FILE_METADATA,
5479
- MEDIA_DOCUMENT_OUTLINE,
5480
- MEDIA_DISBOUND_PAGE,
5481
5676
  MEDIA_IMAGE_DESCRIPTION,
5482
5677
  MEDIA_TRANSCRIPTION_OUTPUT,
5483
5678
  // File path types
5484
5679
  MEDIA_FILE_PATH,
5485
5680
  MEDIA_FILE_PATH_ARRAY,
5486
- // Semantic text input types
5487
- MEDIA_FRONTMATTER_TEXT,
5488
5681
  MEDIA_MLX_MODEL_PATH,
5682
+ // Collection types
5683
+ MEDIA_COLLECTION,
5684
+ MEDIA_COLLECTION_LIST,
5685
+ MEDIA_ADAPTER_SELECTION,
5686
+ // Standard cap URN constants
5687
+ CAP_ADAPTER_SELECTION,
5688
+ // Cap execution result
5689
+ CapResult,
5489
5690
  // Unified argument type
5490
5691
  CapArgumentValue,
5491
5692
  // Standard cap URN builders
5492
- llmConversationUrn,
5693
+ llmGenerateTextUrn,
5694
+ renderPageImageUrn,
5695
+ formatConversionUrn,
5696
+ mediaUrnForType,
5493
5697
  modelAvailabilityUrn,
5494
5698
  modelPathUrn,
5495
5699
  CapMatrixError,