capdag 0.126.278 → 0.137.303
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/capdag.js +343 -139
- package/capdag.test.js +395 -306
- package/package.json +2 -2
package/capdag.js
CHANGED
|
@@ -27,47 +27,102 @@ const ErrorCodes = {
|
|
|
27
27
|
UNTERMINATED_QUOTE: 8,
|
|
28
28
|
INVALID_ESCAPE_SEQUENCE: 9,
|
|
29
29
|
MISSING_IN_SPEC: 10,
|
|
30
|
-
MISSING_OUT_SPEC: 11
|
|
30
|
+
MISSING_OUT_SPEC: 11,
|
|
31
|
+
EMPTY_VALUE: 12,
|
|
32
|
+
INVALID_IN_SPEC: 13,
|
|
33
|
+
INVALID_OUT_SPEC: 14
|
|
31
34
|
};
|
|
32
35
|
|
|
33
36
|
// Note: All parsing is delegated to TaggedUrn from tagged-urn-js
|
|
34
37
|
// No duplicate state machine or parsing helpers needed here
|
|
35
38
|
|
|
36
39
|
/**
|
|
37
|
-
* Cap URN implementation with
|
|
40
|
+
* Cap URN implementation with direction specs and optional tags.
|
|
38
41
|
*
|
|
39
|
-
* Direction
|
|
40
|
-
* -
|
|
41
|
-
* -
|
|
42
|
+
* Direction rules match the Rust reference:
|
|
43
|
+
* - missing `in` or `out` defaults to `media:`
|
|
44
|
+
* - `in=*` and `out=*` normalize to `media:`
|
|
45
|
+
* - empty `in=` / `out=` are rejected
|
|
42
46
|
* - tags: Other optional tags (no longer contains in/out)
|
|
43
47
|
*/
|
|
44
48
|
/**
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
*
|
|
49
|
+
* Normalize a parsed direction tag to canonical wildcard semantics.
|
|
50
|
+
* Missing tags and `*` both become `media:`.
|
|
51
|
+
*
|
|
52
|
+
* @param {TaggedUrn} taggedUrn - Parsed tagged URN
|
|
53
|
+
* @param {string} tagName - `in` or `out`
|
|
54
|
+
* @returns {string} Normalized direction spec
|
|
55
|
+
*/
|
|
56
|
+
function processDirectionTag(taggedUrn, tagName) {
|
|
57
|
+
const rawValue = taggedUrn.getTag(tagName);
|
|
58
|
+
if (rawValue === undefined || rawValue === '*') {
|
|
59
|
+
return 'media:';
|
|
60
|
+
}
|
|
61
|
+
if (rawValue === '') {
|
|
62
|
+
throw new CapUrnError(
|
|
63
|
+
tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
|
|
64
|
+
`Empty value for '${tagName}' tag is not allowed`
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
return rawValue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Canonicalize a direction spec via MediaUrn parsing.
|
|
72
|
+
*
|
|
73
|
+
* @param {string} spec - Media URN string
|
|
74
|
+
* @param {string} tagName - `in` or `out`
|
|
75
|
+
* @returns {string} Canonical media URN string
|
|
48
76
|
*/
|
|
49
|
-
function
|
|
50
|
-
|
|
77
|
+
function canonicalizeDirectionSpec(spec, tagName) {
|
|
78
|
+
if (spec === 'media:' || spec === '*') {
|
|
79
|
+
return spec;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
return MediaUrn.fromString(spec).toString();
|
|
84
|
+
} catch (error) {
|
|
85
|
+
throw new CapUrnError(
|
|
86
|
+
tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
|
|
87
|
+
`Invalid media URN for ${tagName} spec '${spec}': ${error.message}`
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Validate a direction spec while preserving the caller-provided string.
|
|
94
|
+
* This matches the reference `with_in_spec` / `with_out_spec` behavior.
|
|
95
|
+
*
|
|
96
|
+
* @param {string} spec - Media URN string
|
|
97
|
+
* @param {string} tagName - `in` or `out`
|
|
98
|
+
* @returns {string} The original spec when valid
|
|
99
|
+
*/
|
|
100
|
+
function validatePreservedDirectionSpec(spec, tagName) {
|
|
101
|
+
if (spec === 'media:' || spec === '*') {
|
|
102
|
+
return spec;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
MediaUrn.fromString(spec);
|
|
107
|
+
return spec;
|
|
108
|
+
} catch (error) {
|
|
109
|
+
throw new CapUrnError(
|
|
110
|
+
tagName === 'in' ? ErrorCodes.INVALID_IN_SPEC : ErrorCodes.INVALID_OUT_SPEC,
|
|
111
|
+
`Invalid media URN for ${tagName} spec '${spec}': ${error.message}`
|
|
112
|
+
);
|
|
113
|
+
}
|
|
51
114
|
}
|
|
52
115
|
|
|
53
116
|
class CapUrn {
|
|
54
117
|
/**
|
|
55
|
-
* Create a new CapUrn with
|
|
56
|
-
* @param {string} inSpec -
|
|
57
|
-
* @param {string} outSpec -
|
|
118
|
+
* Create a new CapUrn with direction specs.
|
|
119
|
+
* @param {string} inSpec - Input media URN (e.g., "media:void")
|
|
120
|
+
* @param {string} outSpec - Output media URN (e.g., "media:object")
|
|
58
121
|
* @param {Object} tags - Other tags (must NOT contain 'in' or 'out')
|
|
59
122
|
*/
|
|
60
123
|
constructor(inSpec, outSpec, tags = {}) {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
|
|
64
|
-
}
|
|
65
|
-
if (!isValidMediaUrnOrWildcard(outSpec)) {
|
|
66
|
-
throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'out' media URN: ${outSpec}. Must start with 'media:' or be '*'`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
this.inSpec = inSpec;
|
|
70
|
-
this.outSpec = outSpec;
|
|
124
|
+
this.inSpec = canonicalizeDirectionSpec(inSpec, 'in');
|
|
125
|
+
this.outSpec = canonicalizeDirectionSpec(outSpec, 'out');
|
|
71
126
|
this.tags = {};
|
|
72
127
|
// Copy tags, filtering out any 'in' or 'out' that might have slipped through
|
|
73
128
|
for (const [key, value] of Object.entries(tags)) {
|
|
@@ -116,13 +171,14 @@ class CapUrn {
|
|
|
116
171
|
* Create a Cap URN from string representation
|
|
117
172
|
* Format: cap:in="<media-urn>";out="<media-urn>";key1=value1;key2=value2;...
|
|
118
173
|
*
|
|
119
|
-
*
|
|
174
|
+
* Missing `in` / `out` default to `media:`. `in=*` / `out=*` are also
|
|
175
|
+
* normalized to `media:`.
|
|
120
176
|
*
|
|
121
177
|
* Uses TaggedUrn for parsing to ensure consistent behavior across implementations.
|
|
122
178
|
*
|
|
123
179
|
* @param {string} s - The Cap URN string
|
|
124
180
|
* @returns {CapUrn} The parsed Cap URN
|
|
125
|
-
* @throws {CapUrnError} If parsing fails or
|
|
181
|
+
* @throws {CapUrnError} If parsing fails or direction specs are invalid
|
|
126
182
|
*/
|
|
127
183
|
static fromString(s) {
|
|
128
184
|
if (!s || typeof s !== 'string') {
|
|
@@ -162,24 +218,8 @@ class CapUrn {
|
|
|
162
218
|
throw new CapUrnError(ErrorCodes.MISSING_CAP_PREFIX, `Expected 'cap:' prefix, got '${taggedUrn.getPrefix()}:'`);
|
|
163
219
|
}
|
|
164
220
|
|
|
165
|
-
|
|
166
|
-
const
|
|
167
|
-
const outSpec = taggedUrn.getTag('out');
|
|
168
|
-
|
|
169
|
-
if (!inSpec) {
|
|
170
|
-
throw new CapUrnError(ErrorCodes.MISSING_IN_SPEC, "Cap URN requires 'in' tag for input media URN");
|
|
171
|
-
}
|
|
172
|
-
if (!outSpec) {
|
|
173
|
-
throw new CapUrnError(ErrorCodes.MISSING_OUT_SPEC, "Cap URN requires 'out' tag for output media URN");
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// Validate in/out are media URNs or wildcards
|
|
177
|
-
if (!isValidMediaUrnOrWildcard(inSpec)) {
|
|
178
|
-
throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
|
|
179
|
-
}
|
|
180
|
-
if (!isValidMediaUrnOrWildcard(outSpec)) {
|
|
181
|
-
throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'out' media URN: ${outSpec}. Must start with 'media:' or be '*'`);
|
|
182
|
-
}
|
|
221
|
+
const inSpec = processDirectionTag(taggedUrn, 'in');
|
|
222
|
+
const outSpec = processDirectionTag(taggedUrn, 'out');
|
|
183
223
|
|
|
184
224
|
// Build remaining tags (excluding in/out)
|
|
185
225
|
const remainingTags = {};
|
|
@@ -193,12 +233,12 @@ class CapUrn {
|
|
|
193
233
|
}
|
|
194
234
|
|
|
195
235
|
/**
|
|
196
|
-
* Create a Cap URN from a tags object
|
|
197
|
-
*
|
|
236
|
+
* Create a Cap URN from a tags object.
|
|
237
|
+
* Unlike string parsing, this path requires explicit `in` and `out` tags.
|
|
198
238
|
*
|
|
199
239
|
* @param {Object} tags - Object containing all tags including 'in' and 'out'
|
|
200
240
|
* @returns {CapUrn} The parsed Cap URN
|
|
201
|
-
* @throws {CapUrnError} If
|
|
241
|
+
* @throws {CapUrnError} If `in` or `out` tags are missing or invalid
|
|
202
242
|
*/
|
|
203
243
|
static fromTags(tags) {
|
|
204
244
|
const inSpec = tags['in'] || tags['IN'];
|
|
@@ -211,12 +251,11 @@ class CapUrn {
|
|
|
211
251
|
throw new CapUrnError(ErrorCodes.MISSING_OUT_SPEC, "Cap URN requires 'out' tag for output media URN");
|
|
212
252
|
}
|
|
213
253
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
throw new CapUrnError(ErrorCodes.INVALID_FORMAT, `Invalid 'in' media URN: ${inSpec}. Must start with 'media:' or be '*'`);
|
|
254
|
+
if (inSpec === '') {
|
|
255
|
+
throw new CapUrnError(ErrorCodes.INVALID_IN_SPEC, "Empty value for 'in' tag is not allowed");
|
|
217
256
|
}
|
|
218
|
-
if (
|
|
219
|
-
throw new CapUrnError(ErrorCodes.
|
|
257
|
+
if (outSpec === '') {
|
|
258
|
+
throw new CapUrnError(ErrorCodes.INVALID_OUT_SPEC, "Empty value for 'out' tag is not allowed");
|
|
220
259
|
}
|
|
221
260
|
|
|
222
261
|
// Build remaining tags (excluding in/out)
|
|
@@ -289,15 +328,18 @@ class CapUrn {
|
|
|
289
328
|
}
|
|
290
329
|
|
|
291
330
|
/**
|
|
292
|
-
* Create a new cap URN with an added or updated tag
|
|
293
|
-
*
|
|
294
|
-
*
|
|
331
|
+
* Create a new cap URN with an added or updated tag.
|
|
332
|
+
* Attempts to set `in` / `out` through `withTag` are ignored; use
|
|
333
|
+
* `withInSpec` / `withOutSpec` instead.
|
|
295
334
|
*
|
|
296
335
|
* @param {string} key - The tag key
|
|
297
336
|
* @param {string} value - The tag value
|
|
298
337
|
* @returns {CapUrn} A new CapUrn instance with the tag added/updated
|
|
299
338
|
*/
|
|
300
339
|
withTag(key, value) {
|
|
340
|
+
if (value === '') {
|
|
341
|
+
throw new CapUrnError(ErrorCodes.EMPTY_VALUE, `Empty value for key '${key}' (use '*' for wildcard)`);
|
|
342
|
+
}
|
|
301
343
|
const keyLower = key.toLowerCase();
|
|
302
344
|
// Silently ignore attempts to set in/out via withTag
|
|
303
345
|
if (keyLower === 'in' || keyLower === 'out') {
|
|
@@ -315,7 +357,9 @@ class CapUrn {
|
|
|
315
357
|
* @returns {CapUrn} A new CapUrn instance with the updated inSpec
|
|
316
358
|
*/
|
|
317
359
|
withInSpec(inSpec) {
|
|
318
|
-
|
|
360
|
+
const updated = new CapUrn(this.inSpec, this.outSpec, this.tags);
|
|
361
|
+
updated.inSpec = validatePreservedDirectionSpec(inSpec, 'in');
|
|
362
|
+
return updated;
|
|
319
363
|
}
|
|
320
364
|
|
|
321
365
|
/**
|
|
@@ -325,7 +369,9 @@ class CapUrn {
|
|
|
325
369
|
* @returns {CapUrn} A new CapUrn instance with the updated outSpec
|
|
326
370
|
*/
|
|
327
371
|
withOutSpec(outSpec) {
|
|
328
|
-
|
|
372
|
+
const updated = new CapUrn(this.inSpec, this.outSpec, this.tags);
|
|
373
|
+
updated.outSpec = validatePreservedDirectionSpec(outSpec, 'out');
|
|
374
|
+
return updated;
|
|
329
375
|
}
|
|
330
376
|
|
|
331
377
|
/**
|
|
@@ -366,11 +412,9 @@ class CapUrn {
|
|
|
366
412
|
return true;
|
|
367
413
|
}
|
|
368
414
|
|
|
369
|
-
//
|
|
370
|
-
//
|
|
371
|
-
|
|
372
|
-
// "*" is also treated as wildcard. "media:" on the instance side still participates.
|
|
373
|
-
if (this.inSpec !== '*' && this.inSpec !== 'media:' && request.inSpec !== '*') {
|
|
415
|
+
// Input direction: pattern accepts instance. `media:` on the pattern side is
|
|
416
|
+
// the wildcard top and skips the check.
|
|
417
|
+
if (this.inSpec !== 'media:' && this.inSpec !== '*') {
|
|
374
418
|
const capIn = TaggedUrn.fromString(this.inSpec);
|
|
375
419
|
const requestIn = TaggedUrn.fromString(request.inSpec);
|
|
376
420
|
if (!capIn.accepts(requestIn)) {
|
|
@@ -378,10 +422,9 @@ class CapUrn {
|
|
|
378
422
|
}
|
|
379
423
|
}
|
|
380
424
|
|
|
381
|
-
//
|
|
382
|
-
//
|
|
383
|
-
|
|
384
|
-
if (this.outSpec !== '*' && this.outSpec !== 'media:' && request.outSpec !== '*') {
|
|
425
|
+
// Output direction: provider output must conform to requested output.
|
|
426
|
+
// `media:` on the pattern side is wildcard top and skips the check.
|
|
427
|
+
if (this.outSpec !== 'media:' && this.outSpec !== '*') {
|
|
385
428
|
const capOut = TaggedUrn.fromString(this.outSpec);
|
|
386
429
|
const requestOut = TaggedUrn.fromString(request.outSpec);
|
|
387
430
|
if (!capOut.conformsTo(requestOut)) {
|
|
@@ -389,33 +432,23 @@ class CapUrn {
|
|
|
389
432
|
}
|
|
390
433
|
}
|
|
391
434
|
|
|
392
|
-
// Check all
|
|
393
|
-
for (const [
|
|
394
|
-
const
|
|
395
|
-
|
|
396
|
-
if (capValue === undefined) {
|
|
397
|
-
// Missing tag in cap is treated as wildcard - can handle any value
|
|
398
|
-
continue;
|
|
399
|
-
}
|
|
435
|
+
// Check all tags required by the pattern. Missing tags in the instance reject.
|
|
436
|
+
for (const [patternKey, patternValue] of Object.entries(this.tags)) {
|
|
437
|
+
const requestValue = request.tags[patternKey];
|
|
400
438
|
|
|
401
|
-
if (
|
|
402
|
-
|
|
403
|
-
continue;
|
|
439
|
+
if (requestValue === undefined) {
|
|
440
|
+
return false;
|
|
404
441
|
}
|
|
405
442
|
|
|
406
|
-
if (requestValue === '*') {
|
|
407
|
-
// Request accepts any value - cap's specific value matches
|
|
443
|
+
if (patternValue === '*' || requestValue === '*') {
|
|
408
444
|
continue;
|
|
409
445
|
}
|
|
410
446
|
|
|
411
|
-
if (
|
|
412
|
-
// Cap has specific value that doesn't match request's specific value
|
|
447
|
+
if (patternValue !== requestValue) {
|
|
413
448
|
return false;
|
|
414
449
|
}
|
|
415
450
|
}
|
|
416
451
|
|
|
417
|
-
// If cap has additional specific tags that request doesn't specify, that's fine
|
|
418
|
-
// The cap is just more specific than needed
|
|
419
452
|
return true;
|
|
420
453
|
}
|
|
421
454
|
|
|
@@ -441,12 +474,13 @@ class CapUrn {
|
|
|
441
474
|
*/
|
|
442
475
|
specificity() {
|
|
443
476
|
let count = 0;
|
|
444
|
-
// Direction specs contribute their MediaUrn tag count
|
|
445
|
-
|
|
477
|
+
// Direction specs contribute their MediaUrn tag count. `media:` is the
|
|
478
|
+
// wildcard top and contributes zero.
|
|
479
|
+
if (this.inSpec !== 'media:' && this.inSpec !== '*') {
|
|
446
480
|
const inMedia = TaggedUrn.fromString(this.inSpec);
|
|
447
481
|
count += Object.keys(inMedia.tags).length;
|
|
448
482
|
}
|
|
449
|
-
if (this.outSpec !== '*') {
|
|
483
|
+
if (this.outSpec !== 'media:' && this.outSpec !== '*') {
|
|
450
484
|
const outMedia = TaggedUrn.fromString(this.outSpec);
|
|
451
485
|
count += Object.keys(outMedia.tags).length;
|
|
452
486
|
}
|
|
@@ -686,7 +720,7 @@ class CapMatcher {
|
|
|
686
720
|
let bestSpecificity = -1;
|
|
687
721
|
|
|
688
722
|
for (const cap of caps) {
|
|
689
|
-
if (
|
|
723
|
+
if (request.accepts(cap)) {
|
|
690
724
|
const specificity = cap.specificity();
|
|
691
725
|
if (specificity > bestSpecificity) {
|
|
692
726
|
best = cap;
|
|
@@ -706,7 +740,7 @@ class CapMatcher {
|
|
|
706
740
|
* @returns {CapUrn[]} Array of matching caps sorted by specificity (most specific first)
|
|
707
741
|
*/
|
|
708
742
|
static findAllMatches(caps, request) {
|
|
709
|
-
const matches = caps.filter(cap =>
|
|
743
|
+
const matches = caps.filter(cap => request.accepts(cap));
|
|
710
744
|
|
|
711
745
|
// Sort by specificity (most specific first)
|
|
712
746
|
matches.sort((a, b) => b.specificity() - a.specificity());
|
|
@@ -789,18 +823,22 @@ const MEDIA_OBJECT = 'media:record';
|
|
|
789
823
|
// Media URN for binary data - the most general media type (no constraints)
|
|
790
824
|
const MEDIA_IDENTITY = 'media:';
|
|
791
825
|
|
|
792
|
-
//
|
|
793
|
-
// Media URN for
|
|
794
|
-
const
|
|
795
|
-
// Media URN for
|
|
796
|
-
const
|
|
797
|
-
// Media URN for
|
|
798
|
-
const
|
|
799
|
-
// Media URN for
|
|
800
|
-
const
|
|
801
|
-
// Media URN for
|
|
802
|
-
|
|
803
|
-
|
|
826
|
+
// List types - URNs must match base.toml definitions
|
|
827
|
+
// Media URN for generic list type
|
|
828
|
+
const MEDIA_LIST = 'media:list';
|
|
829
|
+
// Media URN for textable list type
|
|
830
|
+
const MEDIA_TEXTABLE_LIST = 'media:list;textable';
|
|
831
|
+
// Media URN for string list type - textable with list marker
|
|
832
|
+
const MEDIA_STRING_LIST = 'media:list;textable';
|
|
833
|
+
// Media URN for integer list type - textable, numeric with list marker
|
|
834
|
+
const MEDIA_INTEGER_LIST = 'media:integer;list;textable;numeric';
|
|
835
|
+
// Media URN for number list type - textable, numeric with list marker
|
|
836
|
+
const MEDIA_NUMBER_LIST = 'media:list;numeric;textable';
|
|
837
|
+
// Media URN for boolean list type - uses "bool" with list marker
|
|
838
|
+
const MEDIA_BOOLEAN_LIST = 'media:bool;list;textable';
|
|
839
|
+
// Media URN for object list type - list of records (NOT textable)
|
|
840
|
+
// Use a specific format like JSON array for textable object lists.
|
|
841
|
+
const MEDIA_OBJECT_LIST = 'media:list;record';
|
|
804
842
|
|
|
805
843
|
// Semantic media types for specialized content
|
|
806
844
|
// Media URN for PNG image data
|
|
@@ -813,8 +851,6 @@ const MEDIA_VIDEO = 'media:video';
|
|
|
813
851
|
// Semantic AI input types - distinguished by their purpose/context
|
|
814
852
|
// Media URN for audio input containing speech for transcription (Whisper)
|
|
815
853
|
const MEDIA_AUDIO_SPEECH = 'media:audio;wav;speech';
|
|
816
|
-
// Media URN for thumbnail image output
|
|
817
|
-
const MEDIA_IMAGE_THUMBNAIL = 'media:image;png;thumbnail';
|
|
818
854
|
|
|
819
855
|
// Document types (PRIMARY naming - type IS the format)
|
|
820
856
|
// Media URN for PDF documents
|
|
@@ -842,6 +878,18 @@ const MEDIA_JSON_SCHEMA = 'media:json;json-schema;record;textable';
|
|
|
842
878
|
// Media URN for YAML data - has record marker (structured key-value)
|
|
843
879
|
const MEDIA_YAML = 'media:record;textable;yaml';
|
|
844
880
|
|
|
881
|
+
// Format-specific variants for JSON, YAML, CSV
|
|
882
|
+
const MEDIA_JSON_VALUE = 'media:json;textable';
|
|
883
|
+
const MEDIA_JSON_RECORD = 'media:json;record;textable';
|
|
884
|
+
const MEDIA_JSON_LIST = 'media:json;list;textable';
|
|
885
|
+
const MEDIA_JSON_LIST_RECORD = 'media:json;list;record;textable';
|
|
886
|
+
const MEDIA_YAML_VALUE = 'media:textable;yaml';
|
|
887
|
+
const MEDIA_YAML_RECORD = 'media:record;textable;yaml';
|
|
888
|
+
const MEDIA_YAML_LIST = 'media:list;textable;yaml';
|
|
889
|
+
const MEDIA_YAML_LIST_RECORD = 'media:list;record;textable;yaml';
|
|
890
|
+
const MEDIA_CSV = 'media:csv;list;record;textable';
|
|
891
|
+
const MEDIA_CSV_LIST = 'media:csv;list;textable';
|
|
892
|
+
|
|
845
893
|
// File path types - for arguments that represent filesystem paths
|
|
846
894
|
// Media URN for a single file path - textable, scalar by default (no list marker)
|
|
847
895
|
const MEDIA_FILE_PATH = 'media:file-path;textable';
|
|
@@ -849,8 +897,6 @@ const MEDIA_FILE_PATH = 'media:file-path;textable';
|
|
|
849
897
|
const MEDIA_FILE_PATH_ARRAY = 'media:file-path;list;textable';
|
|
850
898
|
|
|
851
899
|
// Semantic text input types - distinguished by their purpose/context
|
|
852
|
-
// Media URN for frontmatter text (book metadata) - scalar by default
|
|
853
|
-
const MEDIA_FRONTMATTER_TEXT = 'media:frontmatter;textable';
|
|
854
900
|
// Media URN for model spec (provider:model format, HuggingFace name, etc.) - scalar by default
|
|
855
901
|
const MEDIA_MODEL_SPEC = 'media:model-spec;textable';
|
|
856
902
|
// Media URN for MLX model path - scalar by default
|
|
@@ -877,20 +923,19 @@ const MEDIA_PATH_OUTPUT = 'media:model-path;record;textable';
|
|
|
877
923
|
const MEDIA_EMBEDDING_VECTOR = 'media:embedding-vector;record;textable';
|
|
878
924
|
// Media URN for LLM inference output - has record marker
|
|
879
925
|
const MEDIA_LLM_INFERENCE_OUTPUT = 'media:generated-text;record;textable';
|
|
880
|
-
// Media URN for extracted metadata - has record marker
|
|
881
|
-
const MEDIA_FILE_METADATA = 'media:file-metadata;record;textable';
|
|
882
|
-
// Media URN for extracted outline - has record marker
|
|
883
|
-
const MEDIA_DOCUMENT_OUTLINE = 'media:document-outline;record;textable';
|
|
884
|
-
// Media URN for disbound page - has list marker (array of page objects)
|
|
885
|
-
const MEDIA_DISBOUND_PAGE = 'media:disbound-page;list;textable';
|
|
886
926
|
// Media URN for vision inference output - textable, scalar by default
|
|
887
927
|
const MEDIA_IMAGE_DESCRIPTION = 'media:image-description;textable';
|
|
888
928
|
// Media URN for transcription output - has record marker
|
|
889
929
|
const MEDIA_TRANSCRIPTION_OUTPUT = 'media:record;textable;transcription';
|
|
890
|
-
// Media URN for decision output
|
|
891
|
-
const MEDIA_DECISION = 'media:
|
|
892
|
-
// Media URN for
|
|
893
|
-
const
|
|
930
|
+
// Media URN for decision output - JSON record with textable
|
|
931
|
+
const MEDIA_DECISION = 'media:decision;json;record;textable';
|
|
932
|
+
// Media URN for textable page output
|
|
933
|
+
const MEDIA_TEXTABLE_PAGE = 'media:textable;page';
|
|
934
|
+
// Collection types
|
|
935
|
+
const MEDIA_COLLECTION = 'media:collection;record';
|
|
936
|
+
const MEDIA_COLLECTION_LIST = 'media:collection;list;record';
|
|
937
|
+
// Media URN for adapter selection output - JSON record
|
|
938
|
+
const MEDIA_ADAPTER_SELECTION = 'media:adapter-selection;json;record';
|
|
894
939
|
|
|
895
940
|
// =============================================================================
|
|
896
941
|
// STANDARD CAP URN CONSTANTS
|
|
@@ -900,6 +945,10 @@ const MEDIA_DECISION_ARRAY = 'media:bool;decision;list;textable';
|
|
|
900
945
|
// Accepts any media type as input and outputs any media type
|
|
901
946
|
const CAP_IDENTITY = 'cap:in=media:;out=media:';
|
|
902
947
|
|
|
948
|
+
// Adapter-selection capability. Default implementation returns empty END (no match).
|
|
949
|
+
// Cartridges that inspect file content override this with a handler that returns {"media_urns": [...]}.
|
|
950
|
+
const CAP_ADAPTER_SELECTION = 'cap:in="media:";out="media:adapter-selection;json;record"';
|
|
951
|
+
|
|
903
952
|
// =============================================================================
|
|
904
953
|
// MEDIA URN CLASS
|
|
905
954
|
// =============================================================================
|
|
@@ -956,8 +1005,10 @@ class MediaUrn {
|
|
|
956
1005
|
// =========================================================================
|
|
957
1006
|
|
|
958
1007
|
/**
|
|
959
|
-
* Returns true if this media
|
|
960
|
-
*
|
|
1008
|
+
* Returns true if this media URN describes list-type data (has `list` marker tag).
|
|
1009
|
+
* This is a semantic type check — it means "the data format IS a list/array".
|
|
1010
|
+
* This does NOT indicate input cardinality (single vs multiple items).
|
|
1011
|
+
* Cardinality is tracked by is_sequence on the wire protocol, not by URN tags.
|
|
961
1012
|
* @returns {boolean}
|
|
962
1013
|
*/
|
|
963
1014
|
isList() { return this._hasMarkerTag('list'); }
|
|
@@ -1027,6 +1078,22 @@ class MediaUrn {
|
|
|
1027
1078
|
/** @returns {boolean} True if the "bool" marker tag is present */
|
|
1028
1079
|
isBool() { return this._urn.getTag('bool') !== undefined; }
|
|
1029
1080
|
|
|
1081
|
+
/**
|
|
1082
|
+
* Returns true if this media URN describes YAML representation.
|
|
1083
|
+
* @returns {boolean}
|
|
1084
|
+
*/
|
|
1085
|
+
isYaml() {
|
|
1086
|
+
return this._hasMarkerTag('yaml');
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
/**
|
|
1090
|
+
* Returns true if this media URN describes CSV representation.
|
|
1091
|
+
* @returns {boolean}
|
|
1092
|
+
*/
|
|
1093
|
+
isCsv() {
|
|
1094
|
+
return this._hasMarkerTag('csv');
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1030
1097
|
/**
|
|
1031
1098
|
* Check if this represents a single file path type (not array).
|
|
1032
1099
|
* Returns true if the "file-path" marker tag is present AND no list marker.
|
|
@@ -1048,6 +1115,13 @@ class MediaUrn {
|
|
|
1048
1115
|
*/
|
|
1049
1116
|
isAnyFilePath() { return this._hasMarkerTag('file-path'); }
|
|
1050
1117
|
|
|
1118
|
+
/**
|
|
1119
|
+
* Check if this represents a collection type.
|
|
1120
|
+
* Returns true if the "collection" marker tag is present.
|
|
1121
|
+
* @returns {boolean}
|
|
1122
|
+
*/
|
|
1123
|
+
isCollection() { return this._hasMarkerTag('collection'); }
|
|
1124
|
+
|
|
1051
1125
|
/**
|
|
1052
1126
|
* Check if this media URN conforms to another (pattern).
|
|
1053
1127
|
* @param {MediaUrn} pattern
|
|
@@ -1123,20 +1197,67 @@ class MediaUrn {
|
|
|
1123
1197
|
// =============================================================================
|
|
1124
1198
|
|
|
1125
1199
|
/**
|
|
1126
|
-
* Build URN for LLM
|
|
1127
|
-
* @param {string} langCode - Language code (e.g., "en", "fr")
|
|
1200
|
+
* Build URN for LLM generate-text capability
|
|
1128
1201
|
* @returns {CapUrn}
|
|
1129
1202
|
*/
|
|
1130
|
-
function
|
|
1203
|
+
function llmGenerateTextUrn() {
|
|
1131
1204
|
return new CapUrnBuilder()
|
|
1132
|
-
.tag('op', '
|
|
1133
|
-
.tag('
|
|
1134
|
-
.tag('
|
|
1205
|
+
.tag('op', 'generate_text')
|
|
1206
|
+
.tag('llm', '*')
|
|
1207
|
+
.tag('ml-model', '*')
|
|
1135
1208
|
.inSpec(MEDIA_STRING)
|
|
1136
|
-
.outSpec(
|
|
1209
|
+
.outSpec(MEDIA_STRING)
|
|
1210
|
+
.build();
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
/**
|
|
1214
|
+
* Build URN for render-page-image capability
|
|
1215
|
+
* @param {string} inputMedia - The input media URN string
|
|
1216
|
+
* @returns {CapUrn}
|
|
1217
|
+
*/
|
|
1218
|
+
function renderPageImageUrn(inputMedia) {
|
|
1219
|
+
return new CapUrnBuilder()
|
|
1220
|
+
.tag('op', 'render_page_image')
|
|
1221
|
+
.inSpec(inputMedia)
|
|
1222
|
+
.outSpec(MEDIA_PNG)
|
|
1137
1223
|
.build();
|
|
1138
1224
|
}
|
|
1139
1225
|
|
|
1226
|
+
/**
|
|
1227
|
+
* Build URN for format conversion capability
|
|
1228
|
+
* @param {string} inMedia - The input media URN string
|
|
1229
|
+
* @param {string} outMedia - The output media URN string
|
|
1230
|
+
* @returns {CapUrn}
|
|
1231
|
+
*/
|
|
1232
|
+
function formatConversionUrn(inMedia, outMedia) {
|
|
1233
|
+
return new CapUrnBuilder()
|
|
1234
|
+
.tag('op', 'convert_format')
|
|
1235
|
+
.inSpec(inMedia)
|
|
1236
|
+
.outSpec(outMedia)
|
|
1237
|
+
.build();
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
/**
|
|
1241
|
+
* Map a primitive type name to the corresponding media URN string.
|
|
1242
|
+
* @param {string} typeName - The type name (e.g., 'string', 'integer', 'string-list')
|
|
1243
|
+
* @returns {string|null} The media URN string, or null if not recognized
|
|
1244
|
+
*/
|
|
1245
|
+
function mediaUrnForType(typeName) {
|
|
1246
|
+
switch (typeName) {
|
|
1247
|
+
case 'string': return MEDIA_STRING;
|
|
1248
|
+
case 'integer': return MEDIA_INTEGER;
|
|
1249
|
+
case 'number': return MEDIA_NUMBER;
|
|
1250
|
+
case 'boolean': return MEDIA_BOOLEAN;
|
|
1251
|
+
case 'object': return MEDIA_OBJECT;
|
|
1252
|
+
case 'string-list': return MEDIA_STRING_LIST;
|
|
1253
|
+
case 'integer-list': return MEDIA_INTEGER_LIST;
|
|
1254
|
+
case 'number-list': return MEDIA_NUMBER_LIST;
|
|
1255
|
+
case 'boolean-list': return MEDIA_BOOLEAN_LIST;
|
|
1256
|
+
case 'object-list': return MEDIA_OBJECT_LIST;
|
|
1257
|
+
default: return null;
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1140
1261
|
/**
|
|
1141
1262
|
* Build URN for model-availability capability
|
|
1142
1263
|
* @returns {CapUrn}
|
|
@@ -2392,6 +2513,23 @@ function validateCapArgs(cap) {
|
|
|
2392
2513
|
}
|
|
2393
2514
|
}
|
|
2394
2515
|
|
|
2516
|
+
// RULE11: Stdin source consistency with in= spec
|
|
2517
|
+
// If in= is media:void, no args may have stdin sources.
|
|
2518
|
+
// If in= is anything other than media:void, at least one arg must have a stdin source.
|
|
2519
|
+
const inMediaUrn = cap.urn.inMediaUrn();
|
|
2520
|
+
const voidUrn = MediaUrn.fromString(MEDIA_VOID);
|
|
2521
|
+
const inIsVoid = inMediaUrn.isEquivalent(voidUrn);
|
|
2522
|
+
if (inIsVoid && stdinUrns.length > 0) {
|
|
2523
|
+
throw new ValidationError('InvalidCapSchema', capUrn, {
|
|
2524
|
+
issue: `RULE11: Cap has in="${MEDIA_VOID}" but argument(s) declare stdin source`
|
|
2525
|
+
});
|
|
2526
|
+
}
|
|
2527
|
+
if (!inIsVoid && stdinUrns.length === 0 && args.length > 0) {
|
|
2528
|
+
throw new ValidationError('InvalidCapSchema', capUrn, {
|
|
2529
|
+
issue: `RULE11: Cap has non-void in= spec but no argument declares a stdin source`
|
|
2530
|
+
});
|
|
2531
|
+
}
|
|
2532
|
+
|
|
2395
2533
|
// RULE5: No two args may have same position
|
|
2396
2534
|
const positionSet = new Set();
|
|
2397
2535
|
for (const { position, mediaUrn } of positions) {
|
|
@@ -2426,9 +2564,6 @@ function validateCapArgs(cap) {
|
|
|
2426
2564
|
flagSet.add(flag);
|
|
2427
2565
|
}
|
|
2428
2566
|
|
|
2429
|
-
// RULE8: No unknown keys in source objects - this is handled in ArgSource.fromJSON()
|
|
2430
|
-
// RULE11: cli_flag used verbatim as specified - enforced by design
|
|
2431
|
-
// RULE12: media_urn is the key, no name field - enforced by CapArg structure
|
|
2432
2567
|
}
|
|
2433
2568
|
|
|
2434
2569
|
/**
|
|
@@ -2933,6 +3068,56 @@ class CapValidator {
|
|
|
2933
3068
|
// CAP ARGUMENT VALUE - Unified argument type
|
|
2934
3069
|
// ============================================================================
|
|
2935
3070
|
|
|
3071
|
+
/**
|
|
3072
|
+
* Result from a cap execution.
|
|
3073
|
+
*
|
|
3074
|
+
* Scalar outputs carry raw materialized bytes (e.g. UTF-8 text, raw binary).
|
|
3075
|
+
* List outputs carry a CBOR sequence of values, one per list item.
|
|
3076
|
+
* Empty represents a void cap with no output.
|
|
3077
|
+
*/
|
|
3078
|
+
class CapResult {
|
|
3079
|
+
static KIND_SCALAR = 'scalar';
|
|
3080
|
+
static KIND_LIST = 'list';
|
|
3081
|
+
static KIND_EMPTY = 'empty';
|
|
3082
|
+
|
|
3083
|
+
/**
|
|
3084
|
+
* @param {'scalar'|'list'|'empty'} kind
|
|
3085
|
+
* @param {Uint8Array|null} data - Bytes for scalar or CBOR sequence for list, null for empty
|
|
3086
|
+
*/
|
|
3087
|
+
constructor(kind, data = null) {
|
|
3088
|
+
this.kind = kind;
|
|
3089
|
+
this.data = data;
|
|
3090
|
+
}
|
|
3091
|
+
|
|
3092
|
+
/** Create a CapResult carrying raw bytes (scalar output). */
|
|
3093
|
+
static scalar(data) {
|
|
3094
|
+
const bytes = data instanceof Uint8Array ? data : new Uint8Array(data || []);
|
|
3095
|
+
return new CapResult(CapResult.KIND_SCALAR, bytes);
|
|
3096
|
+
}
|
|
3097
|
+
|
|
3098
|
+
/** Create a CapResult carrying a CBOR sequence (list output). */
|
|
3099
|
+
static list(cborSequence) {
|
|
3100
|
+
const bytes = cborSequence instanceof Uint8Array ? cborSequence : new Uint8Array(cborSequence || []);
|
|
3101
|
+
return new CapResult(CapResult.KIND_LIST, bytes);
|
|
3102
|
+
}
|
|
3103
|
+
|
|
3104
|
+
/** Create a CapResult for void caps. */
|
|
3105
|
+
static empty() {
|
|
3106
|
+
return new CapResult(CapResult.KIND_EMPTY, null);
|
|
3107
|
+
}
|
|
3108
|
+
|
|
3109
|
+
/** Returns true if this is a scalar result. */
|
|
3110
|
+
isScalar() { return this.kind === CapResult.KIND_SCALAR; }
|
|
3111
|
+
|
|
3112
|
+
/** Returns true if this is a list result. */
|
|
3113
|
+
isList() { return this.kind === CapResult.KIND_LIST; }
|
|
3114
|
+
|
|
3115
|
+
/** Returns true if this is an empty result. */
|
|
3116
|
+
isEmpty() { return this.kind === CapResult.KIND_EMPTY; }
|
|
3117
|
+
}
|
|
3118
|
+
|
|
3119
|
+
// ============================================================================
|
|
3120
|
+
|
|
2936
3121
|
/**
|
|
2937
3122
|
* Unified argument type - arguments are identified by media_urn.
|
|
2938
3123
|
* The cap definition's sources specify how to extract values (stdin, position, cli_flag).
|
|
@@ -3189,7 +3374,7 @@ class CompositeCapSet {
|
|
|
3189
3374
|
* Execute a capability by finding the best match and delegating
|
|
3190
3375
|
* @param {string} capUrn - The capability URN to execute
|
|
3191
3376
|
* @param {CapArgumentValue[]} args - Arguments identified by media_urn
|
|
3192
|
-
* @returns {Promise<
|
|
3377
|
+
* @returns {Promise<CapResult>}
|
|
3193
3378
|
*/
|
|
3194
3379
|
async executeCap(capUrn, args) {
|
|
3195
3380
|
let request;
|
|
@@ -5435,18 +5620,20 @@ module.exports = {
|
|
|
5435
5620
|
MEDIA_NUMBER,
|
|
5436
5621
|
MEDIA_BOOLEAN,
|
|
5437
5622
|
MEDIA_OBJECT,
|
|
5438
|
-
|
|
5439
|
-
|
|
5440
|
-
|
|
5441
|
-
|
|
5442
|
-
|
|
5623
|
+
// List types
|
|
5624
|
+
MEDIA_LIST,
|
|
5625
|
+
MEDIA_TEXTABLE_LIST,
|
|
5626
|
+
MEDIA_STRING_LIST,
|
|
5627
|
+
MEDIA_INTEGER_LIST,
|
|
5628
|
+
MEDIA_NUMBER_LIST,
|
|
5629
|
+
MEDIA_BOOLEAN_LIST,
|
|
5630
|
+
MEDIA_OBJECT_LIST,
|
|
5443
5631
|
MEDIA_IDENTITY,
|
|
5444
5632
|
MEDIA_VOID,
|
|
5445
5633
|
MEDIA_PNG,
|
|
5446
5634
|
MEDIA_AUDIO,
|
|
5447
5635
|
MEDIA_VIDEO,
|
|
5448
5636
|
MEDIA_AUDIO_SPEECH,
|
|
5449
|
-
MEDIA_IMAGE_THUMBNAIL,
|
|
5450
5637
|
// Document types (PRIMARY naming)
|
|
5451
5638
|
MEDIA_PDF,
|
|
5452
5639
|
MEDIA_EPUB,
|
|
@@ -5460,11 +5647,22 @@ module.exports = {
|
|
|
5460
5647
|
MEDIA_JSON,
|
|
5461
5648
|
MEDIA_JSON_SCHEMA,
|
|
5462
5649
|
MEDIA_YAML,
|
|
5650
|
+
// Format-specific variants
|
|
5651
|
+
MEDIA_JSON_VALUE,
|
|
5652
|
+
MEDIA_JSON_RECORD,
|
|
5653
|
+
MEDIA_JSON_LIST,
|
|
5654
|
+
MEDIA_JSON_LIST_RECORD,
|
|
5655
|
+
MEDIA_YAML_VALUE,
|
|
5656
|
+
MEDIA_YAML_RECORD,
|
|
5657
|
+
MEDIA_YAML_LIST,
|
|
5658
|
+
MEDIA_YAML_LIST_RECORD,
|
|
5659
|
+
MEDIA_CSV,
|
|
5660
|
+
MEDIA_CSV_LIST,
|
|
5463
5661
|
MEDIA_MODEL_SPEC,
|
|
5464
5662
|
MEDIA_MODEL_REPO,
|
|
5465
5663
|
MEDIA_MODEL_DIM,
|
|
5466
5664
|
MEDIA_DECISION,
|
|
5467
|
-
|
|
5665
|
+
MEDIA_TEXTABLE_PAGE,
|
|
5468
5666
|
// Semantic output types - model management
|
|
5469
5667
|
MEDIA_DOWNLOAD_OUTPUT,
|
|
5470
5668
|
MEDIA_LIST_OUTPUT,
|
|
@@ -5475,21 +5673,27 @@ module.exports = {
|
|
|
5475
5673
|
// Semantic output types - inference
|
|
5476
5674
|
MEDIA_EMBEDDING_VECTOR,
|
|
5477
5675
|
MEDIA_LLM_INFERENCE_OUTPUT,
|
|
5478
|
-
MEDIA_FILE_METADATA,
|
|
5479
|
-
MEDIA_DOCUMENT_OUTLINE,
|
|
5480
|
-
MEDIA_DISBOUND_PAGE,
|
|
5481
5676
|
MEDIA_IMAGE_DESCRIPTION,
|
|
5482
5677
|
MEDIA_TRANSCRIPTION_OUTPUT,
|
|
5483
5678
|
// File path types
|
|
5484
5679
|
MEDIA_FILE_PATH,
|
|
5485
5680
|
MEDIA_FILE_PATH_ARRAY,
|
|
5486
|
-
// Semantic text input types
|
|
5487
|
-
MEDIA_FRONTMATTER_TEXT,
|
|
5488
5681
|
MEDIA_MLX_MODEL_PATH,
|
|
5682
|
+
// Collection types
|
|
5683
|
+
MEDIA_COLLECTION,
|
|
5684
|
+
MEDIA_COLLECTION_LIST,
|
|
5685
|
+
MEDIA_ADAPTER_SELECTION,
|
|
5686
|
+
// Standard cap URN constants
|
|
5687
|
+
CAP_ADAPTER_SELECTION,
|
|
5688
|
+
// Cap execution result
|
|
5689
|
+
CapResult,
|
|
5489
5690
|
// Unified argument type
|
|
5490
5691
|
CapArgumentValue,
|
|
5491
5692
|
// Standard cap URN builders
|
|
5492
|
-
|
|
5693
|
+
llmGenerateTextUrn,
|
|
5694
|
+
renderPageImageUrn,
|
|
5695
|
+
formatConversionUrn,
|
|
5696
|
+
mediaUrnForType,
|
|
5493
5697
|
modelAvailabilityUrn,
|
|
5494
5698
|
modelPathUrn,
|
|
5495
5699
|
CapMatrixError,
|