@pickled-dev/cli 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +12 -12
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -163,7 +163,7 @@ ${Z.comment}`:Z.comment}this.doc.range[2]=Z.offset;break}default:this.errors.pus
|
|
|
163
163
|
`)+1;while(Z!==0)this.onNewLine(this.offset+Z),Z=this.source.indexOf(`
|
|
164
164
|
`,Z)+1}yield*this.pop();break;default:yield*this.pop(),yield*this.step()}}*blockMap($){let Z=$.items[$.items.length-1];switch(this.type){case"newline":if(this.onKeyLine=!1,Z.value){let X="end"in Z.value?Z.value.end:void 0;if((Array.isArray(X)?X[X.length-1]:void 0)?.type==="comment")X?.push(this.sourceToken);else $.items.push({start:[this.sourceToken]})}else if(Z.sep)Z.sep.push(this.sourceToken);else Z.start.push(this.sourceToken);return;case"space":case"comment":if(Z.value)$.items.push({start:[this.sourceToken]});else if(Z.sep)Z.sep.push(this.sourceToken);else{if(this.atIndentedComment(Z.start,$.indent)){let Y=$.items[$.items.length-2]?.value?.end;if(Array.isArray(Y)){Array.prototype.push.apply(Y,Z.start),Y.push(this.sourceToken),$.items.pop();return}}Z.start.push(this.sourceToken)}return}if(this.indent>=$.indent){let X=!this.onKeyLine&&this.indent===$.indent,Y=X&&(Z.sep||Z.explicitKey)&&this.type!=="seq-item-ind",G=[];if(Y&&Z.sep&&!Z.value){let z=[];for(let J=0;J<Z.sep.length;++J){let Q=Z.sep[J];switch(Q.type){case"newline":z.push(J);break;case"space":break;case"comment":if(Q.indent>$.indent)z.length=0;break;default:z.length=0}}if(z.length>=2)G=Z.sep.splice(z[1])}switch(this.type){case"anchor":case"tag":if(Y||Z.value)G.push(this.sourceToken),$.items.push({start:G}),this.onKeyLine=!0;else if(Z.sep)Z.sep.push(this.sourceToken);else Z.start.push(this.sourceToken);return;case"explicit-key-ind":if(!Z.sep&&!Z.explicitKey)Z.start.push(this.sourceToken),Z.explicitKey=!0;else if(Y||Z.value)G.push(this.sourceToken),$.items.push({start:G,explicitKey:!0});else this.stack.push({type:"block-map",offset:this.offset,indent:this.indent,items:[{start:[this.sourceToken],explicitKey:!0}]});this.onKeyLine=!0;return;case"map-value-ind":if(Z.explicitKey)if(!Z.sep)if(w$(Z.start,"newline"))Object.assign(Z,{key:null,sep:[this.sourceToken]});else{let z=I1(Z.start);this.stack.push({type:"block-map",offset:this.offset,indent:this.indent,items:[{start:z,key:null,sep:[this.sourceToken]}]})}else if(Z.value)$.items.push({start:[],key:null,sep:[this.sourceToken]});else if(w$(Z.sep,"map-value-ind"))this.stack.push({type:"block-map",offset:this.offset,indent:this.indent,items:[{start:G,key:null,sep:[this.sourceToken]}]});else if(EY(Z.key)&&!w$(Z.sep,"newline")){let z=I1(Z.start),J=Z.key,Q=Z.sep;Q.push(this.sourceToken),delete Z.key,delete Z.sep,this.stack.push({type:"block-map",offset:this.offset,indent:this.indent,items:[{start:z,key:J,sep:Q}]})}else if(G.length>0)Z.sep=Z.sep.concat(G,this.sourceToken);else Z.sep.push(this.sourceToken);else if(!Z.sep)Object.assign(Z,{key:null,sep:[this.sourceToken]});else if(Z.value||Y)$.items.push({start:G,key:null,sep:[this.sourceToken]});else if(w$(Z.sep,"map-value-ind"))this.stack.push({type:"block-map",offset:this.offset,indent:this.indent,items:[{start:[],key:null,sep:[this.sourceToken]}]});else Z.sep.push(this.sourceToken);this.onKeyLine=!0;return;case"alias":case"scalar":case"single-quoted-scalar":case"double-quoted-scalar":{let z=this.flowScalar(this.type);if(Y||Z.value)$.items.push({start:G,key:z,sep:[]}),this.onKeyLine=!0;else if(Z.sep)this.stack.push(z);else Object.assign(Z,{key:z,sep:[]}),this.onKeyLine=!0;return}default:{let z=this.startBlockValue($);if(z){if(z.type==="block-seq"){if(!Z.explicitKey&&Z.sep&&!w$(Z.sep,"newline")){yield*this.pop({type:"error",offset:this.offset,message:"Unexpected block-seq-ind on same line with key",source:this.source});return}}else if(X)$.items.push({start:G});this.stack.push(z);return}}}}yield*this.pop(),yield*this.step()}*blockSequence($){let Z=$.items[$.items.length-1];switch(this.type){case"newline":if(Z.value){let X="end"in Z.value?Z.value.end:void 0;if((Array.isArray(X)?X[X.length-1]:void 0)?.type==="comment")X?.push(this.sourceToken);else $.items.push({start:[this.sourceToken]})}else Z.start.push(this.sourceToken);return;case"space":case"comment":if(Z.value)$.items.push({start:[this.sourceToken]});else{if(this.atIndentedComment(Z.start,$.indent)){let Y=$.items[$.items.length-2]?.value?.end;if(Array.isArray(Y)){Array.prototype.push.apply(Y,Z.start),Y.push(this.sourceToken),$.items.pop();return}}Z.start.push(this.sourceToken)}return;case"anchor":case"tag":if(Z.value||this.indent<=$.indent)break;Z.start.push(this.sourceToken);return;case"seq-item-ind":if(this.indent!==$.indent)break;if(Z.value||w$(Z.start,"seq-item-ind"))$.items.push({start:[this.sourceToken]});else Z.start.push(this.sourceToken);return}if(this.indent>$.indent){let X=this.startBlockValue($);if(X){this.stack.push(X);return}}yield*this.pop(),yield*this.step()}*flowCollection($){let Z=$.items[$.items.length-1];if(this.type==="flow-error-end"){let X;do yield*this.pop(),X=this.peek(1);while(X?.type==="flow-collection")}else if($.end.length===0){switch(this.type){case"comma":case"explicit-key-ind":if(!Z||Z.sep)$.items.push({start:[this.sourceToken]});else Z.start.push(this.sourceToken);return;case"map-value-ind":if(!Z||Z.value)$.items.push({start:[],key:null,sep:[this.sourceToken]});else if(Z.sep)Z.sep.push(this.sourceToken);else Object.assign(Z,{key:null,sep:[this.sourceToken]});return;case"space":case"comment":case"newline":case"anchor":case"tag":if(!Z||Z.value)$.items.push({start:[this.sourceToken]});else if(Z.sep)Z.sep.push(this.sourceToken);else Z.start.push(this.sourceToken);return;case"alias":case"scalar":case"single-quoted-scalar":case"double-quoted-scalar":{let Y=this.flowScalar(this.type);if(!Z||Z.value)$.items.push({start:[],key:Y,sep:[]});else if(Z.sep)this.stack.push(Y);else Object.assign(Z,{key:Y,sep:[]});return}case"flow-map-end":case"flow-seq-end":$.end.push(this.sourceToken);return}let X=this.startBlockValue($);if(X)this.stack.push(X);else yield*this.pop(),yield*this.step()}else{let X=this.peek(2);if(X.type==="block-map"&&(this.type==="map-value-ind"&&X.indent===$.indent||this.type==="newline"&&!X.items[X.items.length-1].sep))yield*this.pop(),yield*this.step();else if(this.type==="map-value-ind"&&X.type!=="flow-collection"){let Y=v6(X),G=I1(Y);DY($);let z=$.end.splice(1,$.end.length);z.push(this.sourceToken);let J={type:"block-map",offset:$.offset,indent:$.indent,items:[{start:G,key:$,sep:z}]};this.onKeyLine=!0,this.stack[this.stack.length-1]=J}else yield*this.lineEnd($)}}flowScalar($){if(this.onNewLine){let Z=this.source.indexOf(`
|
|
165
165
|
`)+1;while(Z!==0)this.onNewLine(this.offset+Z),Z=this.source.indexOf(`
|
|
166
|
-
`,Z)+1}return{type:$,offset:this.offset,indent:this.indent,source:this.source}}startBlockValue($){switch(this.type){case"alias":case"scalar":case"single-quoted-scalar":case"double-quoted-scalar":return this.flowScalar(this.type);case"block-scalar-header":return{type:"block-scalar",offset:this.offset,indent:this.indent,props:[this.sourceToken],source:""};case"flow-map-start":case"flow-seq-start":return{type:"flow-collection",offset:this.offset,indent:this.indent,start:this.sourceToken,items:[],end:[]};case"seq-item-ind":return{type:"block-seq",offset:this.offset,indent:this.indent,items:[{start:[this.sourceToken]}]};case"explicit-key-ind":{this.onKeyLine=!0;let Z=v6($),X=I1(Z);return X.push(this.sourceToken),{type:"block-map",offset:this.offset,indent:this.indent,items:[{start:X,explicitKey:!0}]}}case"map-value-ind":{this.onKeyLine=!0;let Z=v6($),X=I1(Z);return{type:"block-map",offset:this.offset,indent:this.indent,items:[{start:X,key:null,sep:[this.sourceToken]}]}}}return null}atIndentedComment($,Z){if(this.type!=="comment")return!1;if(this.indent<=Z)return!1;return $.every((X)=>X.type==="newline"||X.type==="space")}*documentEnd($){if(this.type!=="doc-mode"){if($.end)$.end.push(this.sourceToken);else $.end=[this.sourceToken];if(this.type==="newline")yield*this.pop()}}*lineEnd($){switch(this.type){case"comma":case"doc-start":case"doc-end":case"flow-seq-end":case"flow-map-end":case"map-value-ind":yield*this.pop(),yield*this.step();break;case"newline":this.onKeyLine=!1;case"space":case"comment":default:if($.end)$.end.push(this.sourceToken);else $.end=[this.sourceToken];if(this.type==="newline")yield*this.pop()}}}sq.Parser=RY});var jY=C((zT)=>{var AY=M9(),tq=n2(),$4=r2(),eq=CX(),$T=m(),ZT=C9(),SY=I9();function CY($){let Z=$.prettyErrors!==!1;return{lineCounter:$.lineCounter||Z&&new ZT.LineCounter||null,prettyErrors:Z}}function XT($,Z={}){let{lineCounter:X,prettyErrors:Y}=CY(Z),G=new SY.Parser(X?.addNewLine),z=new AY.Composer(Z),J=Array.from(z.compose(G.parse($)));if(Y&&X)for(let Q of J)Q.errors.forEach($4.prettifyError($,X)),Q.warnings.forEach($4.prettifyError($,X));if(J.length>0)return J;return Object.assign([],{empty:!0},z.streamInfo())}function IY($,Z={}){let{lineCounter:X,prettyErrors:Y}=CY(Z),G=new SY.Parser(X?.addNewLine),z=new AY.Composer(Z),J=null;for(let Q of z.compose(G.parse($),!0,$.length))if(!J)J=Q;else if(J.options.logLevel!=="silent"){J.errors.push(new $4.YAMLParseError(Q.range.slice(0,2),"MULTIPLE_DOCS","Source contains multiple documents; please use YAML.parseAllDocuments()"));break}if(Y&&X)J.errors.forEach($4.prettifyError($,X)),J.warnings.forEach($4.prettifyError($,X));return J}function YT($,Z,X){let Y=void 0;if(typeof Z==="function")Y=Z;else if(X===void 0&&Z&&typeof Z==="object")X=Z;let G=IY($,X);if(!G)return null;if(G.warnings.forEach((z)=>eq.warn(G.options.logLevel,z)),G.errors.length>0)if(G.options.logLevel!=="silent")throw G.errors[0];else G.errors=[];return G.toJS(Object.assign({reviver:Y},X))}function GT($,Z,X){let Y=null;if(typeof Z==="function"||Array.isArray(Z))Y=Z;else if(X===void 0&&Z)X=Z;if(typeof X==="string")X=X.length;if(typeof X==="number"){let G=Math.round(X);X=G<1?void 0:G>8?{indent:8}:{indent:G}}if($===void 0){let{keepUndefined:G}=X??Z??{};if(!G)return}if($T.isDocument($)&&!Y)return $.toString(X);return new tq.Document($,Y,X).toString(X)}zT.parse=YT;zT.parseAllDocuments=XT;zT.parseDocument=IY;zT.stringify=GT});var kY=C((_T)=>{var HT=M9(),KT=n2(),BT=Y9(),j9=r2(),UT=k2(),M$=m(),qT=q$(),TT=o(),FT=F$(),LT=L$(),wT=f6(),MT=S9(),OT=C9(),NT=I9(),g6=jY(),xY=j2();_T.Composer=HT.Composer;_T.Document=KT.Document;_T.Schema=BT.Schema;_T.YAMLError=j9.YAMLError;_T.YAMLParseError=j9.YAMLParseError;_T.YAMLWarning=j9.YAMLWarning;_T.Alias=UT.Alias;_T.isAlias=M$.isAlias;_T.isCollection=M$.isCollection;_T.isDocument=M$.isDocument;_T.isMap=M$.isMap;_T.isNode=M$.isNode;_T.isPair=M$.isPair;_T.isScalar=M$.isScalar;_T.isSeq=M$.isSeq;_T.Pair=qT.Pair;_T.Scalar=TT.Scalar;_T.YAMLMap=FT.YAMLMap;_T.YAMLSeq=LT.YAMLSeq;_T.CST=wT;_T.Lexer=MT.Lexer;_T.LineCounter=OT.LineCounter;_T.Parser=NT.Parser;_T.parse=g6.parse;_T.parseAllDocuments=g6.parseAllDocuments;_T.parseDocument=g6.parseDocument;_T.stringify=g6.stringify;_T.visit=xY.visit;_T.visitAsync=xY.visitAsync});var P3=J3(_3(),1),{program:M1,createCommand:hO,createArgument:uO,createOption:mO,CommanderError:dO,InvalidArgumentError:cO,InvalidOptionArgumentError:lO,Command:pO,Argument:iO,Option:wX,Help:aO}=P3.default;var D3={name:"@pickled-dev/cli",version:"0.23.0",description:"Test what agents actually understand about your product",module:"dist/index.js",type:"module",bin:{pickled:"./dist/index.js"},files:["dist"],scripts:{dev:"bun run ./src/index.ts",build:"bun build ./src/index.ts --outdir ./dist --target bun --minify --external @anthropic-ai/claude-agent-sdk",release:"semantic-release"},dependencies:{commander:"^14.0.2",chalk:"^5.6.2","@anthropic-ai/claude-agent-sdk":"^0.3.0"},devDependencies:{"@pickled-dev/config":"workspace:*","@pickled-dev/core":"workspace:*","semantic-release":"^25.0.2","@semantic-release/commit-analyzer":"^13.0.1","@semantic-release/exec":"^7.1.0","@semantic-release/git":"^10.0.1","@semantic-release/github":"^12.0.2","@semantic-release/release-notes-generator":"^14.0.3"}};import _O from"path";var h0={category:"cli",provider:"claude-code",model:"sonnet"},MX=["Read","Glob","Grep","Bash"],OX=["Edit","MultiEdit","Write","NotebookEdit"];var bY=J3(kY(),1);async function j1($){let Z=`${$}/pickled.yml`,X=Bun.file(Z);if(!await X.exists())throw Error(`pickled.yml not found in ${$}`);let Y;try{let G=await X.text();Y=bY.default.parse(G)}catch(G){throw Error(`Failed to parse pickled.yml: ${G}`)}return Y=x9(Y),eT(Y),Y}var tT=/\$\{([A-Z_][A-Z0-9_]*)\}/g;function x9($){if(typeof $==="string")return $.replace(tT,(Z,X)=>process.env[X]??"");if(Array.isArray($))return $.map(x9);if($!==null&&typeof $==="object"){let Z={};for(let[X,Y]of Object.entries($))Z[X]=x9(Y);return Z}return $}function eT($){if(!$.tool?.name)throw Error("pickled.yml: 'tool.name' is required");if(!Array.isArray($.scenarios)||$.scenarios.length===0)throw Error("pickled.yml: 'scenarios' must be a non-empty array");if($.docs?.sources)for(let[X,Y]of Object.entries($.docs.sources)){if(X==="none")throw Error('pickled.yml: docs.sources cannot use the reserved id "none". That name represents the no-context matrix cell (model prior with toolset:none, or open discovery with toolset:web). Rename this source.');JF(X,Y)}if($.toolsets){if(typeof $.toolsets!=="object"||Array.isArray($.toolsets))throw Error("pickled.yml: 'toolsets' must be an object mapping name to configuration");for(let[X,Y]of Object.entries($.toolsets))if(typeof Y!=="object"||Y===null||Array.isArray(Y))throw Error(`pickled.yml: toolsets["${X}"] must be an object`)}if($.targets)for(let[X,Y]of Object.entries($.targets)){if(Y.systemPrompt!==void 0)throw Error(`pickled.yml: target "${X}" sets 'systemPrompt', which bypasses the citation contract. Remove it; custom prompts are not supported in citation mode.`);if(Y.provider==="codex-cli"){if(!Y.model)throw Error(`pickled.yml: target "${X}" (codex-cli) requires an explicit 'model' field. Codex's default model can change without notice; pin it for reproducible evals.`);if(Y.maxTurns!==void 0)throw Error(`pickled.yml: target "${X}" (codex-cli) sets 'maxTurns', but the codex CLI does not support a turn cap. Remove the field.`)}if(Y.category==="api"){if(!Y.model)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) requires an explicit 'model' field. Pickled does not substitute a default; reproducible evals depend on pinning the model.`);let G=["allowedTools","disallowedTools","mcpServers","permissionMode","maxTurns","maxThinkingTokens","maxBudgetUsd"];for(let z of G)if(Y[z]!==void 0)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) sets '${z}', which only applies to CLI/Agent SDK targets. Remove the field; API targets accept only model/temperature/maxTokens/threshold.`);if(Y.workspaceContext!==void 0)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) sets 'workspaceContext', which only applies to IDE targets. Remove the field.`)}}let Z=new Set(Object.keys($.docs?.sources??{}));for(let X of $.scenarios){if(!X.name||!X.prompt)throw Error("pickled.yml: every scenario needs 'name' and 'prompt'");if(X.requiredSources!==void 0){if(!Array.isArray(X.requiredSources))throw Error(`pickled.yml: scenario "${X.name}" has non-array 'requiredSources'. Omit the field to skip citation scoring, or set [] for "any cited source counts".`);for(let Y of X.requiredSources)if(!Z.has(Y))throw Error(`pickled.yml: scenario "${X.name}" references unknown source "${Y}". Declared sources: ${[...Z].join(", ")||"(none)"}`)}VF(X.name,X.traps),zF(X.name,X.compareSurfaces,Z),ZF(X.name,X.matrix,Z,new Set(Object.keys($.targets??{})),new Set(Object.keys($.toolsets??{none:{}}))),XF(X.name,X.expected),YF(X.name,X.verifiers,Z),GF(X)}$F($)}function $F($){if(!$.docs?.sources)return;let Z=[];for(let[Y,G]of Object.entries($.docs.sources)){if(typeof G==="string")continue;let z=G.audit?.traps;if(Array.isArray(z))Z.push({id:Y,list:z})}if(Z.length===0)return;let X=new Map;for(let Y of $.scenarios)for(let G of Y.traps??[]){let z=X.get(G.id);if(z!==void 0)throw Error(`pickled.yml: trap id "${G.id}" is declared in both scenario "${z}" and scenario "${Y.name}". Globally unique trap ids are required when any source uses list-form audit.traps suppression. Rename one of the traps.`);X.set(G.id,Y.name)}for(let{id:Y,list:G}of Z)for(let z of G)if(!X.has(z)){let J=[...X.keys()].join(", ")||"(none)";throw Error(`pickled.yml: docs.sources["${Y}"].audit.traps lists unknown trap id "${z}". Declared trap ids: ${J}`)}}function ZF($,Z,X,Y,G){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" matrix must be an object with optional interfaces/sources/toolsets arrays`);let z=(J,Q,W)=>{let V=Z[J];if(V===void 0)return;if(!Array.isArray(V))throw Error(`pickled.yml: scenario "${$}" matrix.${J} must be an array of ${W} names`);if(V.length===0)throw Error(`pickled.yml: scenario "${$}" matrix.${J} cannot be empty (omit the field to use defaults)`);for(let K of V){if(typeof K!=="string")throw Error(`pickled.yml: scenario "${$}" matrix.${J} entries must be strings`);if(!Q.has(K))throw Error(`pickled.yml: scenario "${$}" matrix.${J} references unknown ${W} "${K}". Declared: ${[...Q].join(", ")||"(none)"}`)}};z("interfaces",Y,"target"),z("sources",new Set([...X,"none"]),"source"),z("toolsets",G,"toolset")}function XF($,Z){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" expected must be an object with optional includes/excludes arrays`);let X=(Y)=>{let G=Z[Y];if(G===void 0)return;if(!Array.isArray(G))throw Error(`pickled.yml: scenario "${$}" expected.${Y} must be an array of strings`);if(G.length===0)throw Error(`pickled.yml: scenario "${$}" expected.${Y} cannot be empty (omit the field instead)`);for(let z=0;z<G.length;z++)if(typeof G[z]!=="string"||G[z].length===0)throw Error(`pickled.yml: scenario "${$}" expected.${Y}[${z}] must be a non-empty string`)};X("includes"),X("excludes")}function YF($,Z,X){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" verifiers must be an object`);let Y=Z.sources;if(Y===void 0)return;if(!Array.isArray(Y))throw Error(`pickled.yml: scenario "${$}" verifiers.sources must be an array of source IDs`);for(let G of Y){if(typeof G!=="string")throw Error(`pickled.yml: scenario "${$}" verifiers.sources entries must be strings`);if(!X.has(G))throw Error(`pickled.yml: scenario "${$}" verifiers.sources references unknown source "${G}"`)}}function GF($){let Z=$.requiredSources!==void 0,X=$.expected!==void 0&&($.expected.includes!==void 0&&$.expected.includes.length>0||$.expected.excludes!==void 0&&$.expected.excludes.length>0),Y=$.traps!==void 0&&$.traps.length>0;if(!Z&&!X&&!Y)throw Error(`pickled.yml: scenario "${$.name}" must declare at least one of requiredSources, expected.includes/excludes, or traps. A scenario with nothing to check has no verdict.`);let G=($.matrix?.toolsets??[]).filter((z)=>z!=="none");if(G.length>0&&!X&&!Y)throw Error(`pickled.yml: scenario "${$.name}" declares non-none toolsets [${G.join(", ")}] but has neither expected.includes/excludes nor traps. Non-none cells skip the citation contract because the source is not injected, so requiredSources alone leaves them with no actionable answer contract. Add expected or traps, or restrict matrix.toolsets to ["none"].`)}function zF($,Z,X){if(Z===void 0)return;if(!Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" compareSurfaces must be an array of source-id lists`);if(Z.length===0)throw Error(`pickled.yml: scenario "${$}" compareSurfaces cannot be empty (use a non-empty list of surfaces, or remove the field)`);for(let Y=0;Y<Z.length;Y++){let G=Z[Y];if(!Array.isArray(G))throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] must be an array of source ids`);if(G.length===0)throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] must be a non-empty list of source ids`);for(let z of G){if(typeof z!=="string")throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] entries must be string source ids`);if(!X.has(z))throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] references unknown source "${z}". Declared sources: ${[...X].join(", ")||"(none)"}`)}}}function JF($,Z){if(typeof Z==="string"){if(Z.length===0)throw Error(`pickled.yml: docs.sources["${$}"] string form must be a non-empty file path or URL`);return}if(!Z||typeof Z!=="object")throw Error(`pickled.yml: docs.sources["${$}"] must be a string (path/URL) or an object with a 'path' field`);let X=Z;if(typeof X.path!=="string"||X.path.length===0)throw Error(`pickled.yml: docs.sources["${$}"] object form requires a non-empty 'path' field`);if(X.audit!==void 0){if(typeof X.audit!=="object"||X.audit===null)throw Error(`pickled.yml: docs.sources["${$}"].audit must be an object`);let Y=X.audit;if(Y.traps!==void 0){let G=Y.traps;if(typeof G==="boolean");else if(Array.isArray(G)){if(G.length===0)throw Error(`pickled.yml: docs.sources["${$}"].audit.traps cannot be an empty array; use true (scan all) or false (skip all) instead`);for(let z=0;z<G.length;z++)if(typeof G[z]!=="string")throw Error(`pickled.yml: docs.sources["${$}"].audit.traps[${z}] must be a string trap id`)}else throw Error(`pickled.yml: docs.sources["${$}"].audit.traps must be a boolean or an array of trap ids`)}for(let G of Object.keys(Y))if(G!=="traps")throw Error(`pickled.yml: docs.sources["${$}"].audit has unknown field "${G}"`)}if(X.type!==void 0){if(X.type!=="file"&&X.type!=="url"&&X.type!=="codebase")throw Error(`pickled.yml: docs.sources["${$}"].type must be "file", "url", or "codebase"`)}if(X.type==="codebase"){if(typeof X.path==="string"&&X.path.split("/").includes(".."))throw Error(`pickled.yml: docs.sources["${$}"].path must not contain ".." segments. Codebase loader stays within the project root.`);if(X.exclude!==void 0){if(!Array.isArray(X.exclude))throw Error(`pickled.yml: docs.sources["${$}"].exclude must be an array of glob patterns`);for(let Y=0;Y<X.exclude.length;Y++)if(typeof X.exclude[Y]!=="string")throw Error(`pickled.yml: docs.sources["${$}"].exclude[${Y}] must be a string glob pattern`)}if(X.maxBytes!==void 0){if(typeof X.maxBytes!=="number"||!Number.isFinite(X.maxBytes)||X.maxBytes<=0)throw Error(`pickled.yml: docs.sources["${$}"].maxBytes must be a positive number of bytes`)}}else{if(X.exclude!==void 0)throw Error(`pickled.yml: docs.sources["${$}"].exclude only applies to type: codebase sources`);if(X.maxBytes!==void 0)throw Error(`pickled.yml: docs.sources["${$}"].maxBytes only applies to type: codebase sources`)}for(let Y of Object.keys(X))if(Y!=="path"&&Y!=="audit"&&Y!=="type"&&Y!=="exclude"&&Y!=="maxBytes")throw Error(`pickled.yml: docs.sources["${$}"] has unknown field "${Y}"`)}var QF=new Set(["g","y"]),WF=new Set(["i","m","s","u","v"]);function VF($,Z){if(Z===void 0)return;if(!Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" has non-array 'traps'`);let X=new Set;for(let Y of Z){if(!Y.id||typeof Y.id!=="string")throw Error(`pickled.yml: scenario "${$}" has a trap missing 'id'`);if(X.has(Y.id))throw Error(`pickled.yml: scenario "${$}" has duplicate trap id "${Y.id}"`);if(X.add(Y.id),!Y.reason||typeof Y.reason!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" requires non-empty 'reason'`);if(Y.auditSeverity!==void 0&&Y.auditSeverity!=="warning"&&Y.auditSeverity!=="error")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" auditSeverity must be "warning" or "error"`);let G=typeof Y.match==="string",z=typeof Y.pattern==="string";if(G===z)throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" must set exactly one of 'match' or 'pattern'`);if(G){if(Y.match==="")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has empty 'match'`);if(Y.flags!==void 0)throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" sets 'flags' without 'pattern'`)}else{let J=Y.pattern;if(typeof J!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" must set 'pattern'`);if(J==="")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has empty 'pattern'`);if(Y.flags!==void 0){if(typeof Y.flags!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has non-string 'flags'`);for(let Q of Y.flags){if(QF.has(Q))throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" uses forbidden regex flag "${Q}" (g and y are not allowed)`);if(!WF.has(Q))throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" uses unsupported regex flag "${Q}"`)}}try{new RegExp(J,Y.flags??"")}catch(Q){throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has invalid regex pattern: ${Q instanceof Error?Q.message:Q}`)}}}}function k9($,Z){let X=new Set([...Object.keys($.targets??{}),"default"]);if(!X.has(Z)){let Y=[...X].sort().join(", ");throw Error(`Unknown target: "${Z}". Available targets: ${Y}`)}return{...$,matrix:{...$.matrix,target:[Z]},scenarios:$.scenarios.filter((Y)=>!Y.target||Y.target===Z)}}function b9($){if(typeof $==="string")return{path:$,auditTraps:!0};return{path:$.path,auditTraps:$.audit?.traps??!0}}function h6($){let{config:Z,files:X,pairs:Y,findings:G}=$,z=[],J=G.filter((H)=>H.severity==="error").length,Q=G.filter((H)=>H.severity==="warning").length;if(z.push(`# Agent-context audit
|
|
166
|
+
`,Z)+1}return{type:$,offset:this.offset,indent:this.indent,source:this.source}}startBlockValue($){switch(this.type){case"alias":case"scalar":case"single-quoted-scalar":case"double-quoted-scalar":return this.flowScalar(this.type);case"block-scalar-header":return{type:"block-scalar",offset:this.offset,indent:this.indent,props:[this.sourceToken],source:""};case"flow-map-start":case"flow-seq-start":return{type:"flow-collection",offset:this.offset,indent:this.indent,start:this.sourceToken,items:[],end:[]};case"seq-item-ind":return{type:"block-seq",offset:this.offset,indent:this.indent,items:[{start:[this.sourceToken]}]};case"explicit-key-ind":{this.onKeyLine=!0;let Z=v6($),X=I1(Z);return X.push(this.sourceToken),{type:"block-map",offset:this.offset,indent:this.indent,items:[{start:X,explicitKey:!0}]}}case"map-value-ind":{this.onKeyLine=!0;let Z=v6($),X=I1(Z);return{type:"block-map",offset:this.offset,indent:this.indent,items:[{start:X,key:null,sep:[this.sourceToken]}]}}}return null}atIndentedComment($,Z){if(this.type!=="comment")return!1;if(this.indent<=Z)return!1;return $.every((X)=>X.type==="newline"||X.type==="space")}*documentEnd($){if(this.type!=="doc-mode"){if($.end)$.end.push(this.sourceToken);else $.end=[this.sourceToken];if(this.type==="newline")yield*this.pop()}}*lineEnd($){switch(this.type){case"comma":case"doc-start":case"doc-end":case"flow-seq-end":case"flow-map-end":case"map-value-ind":yield*this.pop(),yield*this.step();break;case"newline":this.onKeyLine=!1;case"space":case"comment":default:if($.end)$.end.push(this.sourceToken);else $.end=[this.sourceToken];if(this.type==="newline")yield*this.pop()}}}sq.Parser=RY});var jY=C((zT)=>{var AY=M9(),tq=n2(),$4=r2(),eq=CX(),$T=m(),ZT=C9(),SY=I9();function CY($){let Z=$.prettyErrors!==!1;return{lineCounter:$.lineCounter||Z&&new ZT.LineCounter||null,prettyErrors:Z}}function XT($,Z={}){let{lineCounter:X,prettyErrors:Y}=CY(Z),G=new SY.Parser(X?.addNewLine),z=new AY.Composer(Z),J=Array.from(z.compose(G.parse($)));if(Y&&X)for(let Q of J)Q.errors.forEach($4.prettifyError($,X)),Q.warnings.forEach($4.prettifyError($,X));if(J.length>0)return J;return Object.assign([],{empty:!0},z.streamInfo())}function IY($,Z={}){let{lineCounter:X,prettyErrors:Y}=CY(Z),G=new SY.Parser(X?.addNewLine),z=new AY.Composer(Z),J=null;for(let Q of z.compose(G.parse($),!0,$.length))if(!J)J=Q;else if(J.options.logLevel!=="silent"){J.errors.push(new $4.YAMLParseError(Q.range.slice(0,2),"MULTIPLE_DOCS","Source contains multiple documents; please use YAML.parseAllDocuments()"));break}if(Y&&X)J.errors.forEach($4.prettifyError($,X)),J.warnings.forEach($4.prettifyError($,X));return J}function YT($,Z,X){let Y=void 0;if(typeof Z==="function")Y=Z;else if(X===void 0&&Z&&typeof Z==="object")X=Z;let G=IY($,X);if(!G)return null;if(G.warnings.forEach((z)=>eq.warn(G.options.logLevel,z)),G.errors.length>0)if(G.options.logLevel!=="silent")throw G.errors[0];else G.errors=[];return G.toJS(Object.assign({reviver:Y},X))}function GT($,Z,X){let Y=null;if(typeof Z==="function"||Array.isArray(Z))Y=Z;else if(X===void 0&&Z)X=Z;if(typeof X==="string")X=X.length;if(typeof X==="number"){let G=Math.round(X);X=G<1?void 0:G>8?{indent:8}:{indent:G}}if($===void 0){let{keepUndefined:G}=X??Z??{};if(!G)return}if($T.isDocument($)&&!Y)return $.toString(X);return new tq.Document($,Y,X).toString(X)}zT.parse=YT;zT.parseAllDocuments=XT;zT.parseDocument=IY;zT.stringify=GT});var kY=C((_T)=>{var HT=M9(),KT=n2(),BT=Y9(),j9=r2(),UT=k2(),M$=m(),qT=q$(),TT=o(),FT=F$(),LT=L$(),wT=f6(),MT=S9(),OT=C9(),NT=I9(),g6=jY(),xY=j2();_T.Composer=HT.Composer;_T.Document=KT.Document;_T.Schema=BT.Schema;_T.YAMLError=j9.YAMLError;_T.YAMLParseError=j9.YAMLParseError;_T.YAMLWarning=j9.YAMLWarning;_T.Alias=UT.Alias;_T.isAlias=M$.isAlias;_T.isCollection=M$.isCollection;_T.isDocument=M$.isDocument;_T.isMap=M$.isMap;_T.isNode=M$.isNode;_T.isPair=M$.isPair;_T.isScalar=M$.isScalar;_T.isSeq=M$.isSeq;_T.Pair=qT.Pair;_T.Scalar=TT.Scalar;_T.YAMLMap=FT.YAMLMap;_T.YAMLSeq=LT.YAMLSeq;_T.CST=wT;_T.Lexer=MT.Lexer;_T.LineCounter=OT.LineCounter;_T.Parser=NT.Parser;_T.parse=g6.parse;_T.parseAllDocuments=g6.parseAllDocuments;_T.parseDocument=g6.parseDocument;_T.stringify=g6.stringify;_T.visit=xY.visit;_T.visitAsync=xY.visitAsync});var P3=J3(_3(),1),{program:M1,createCommand:uO,createArgument:mO,createOption:dO,CommanderError:cO,InvalidArgumentError:lO,InvalidOptionArgumentError:pO,Command:iO,Argument:aO,Option:wX,Help:nO}=P3.default;var D3={name:"@pickled-dev/cli",version:"0.24.0",description:"Test what agents actually understand about your product",module:"dist/index.js",type:"module",bin:{pickled:"./dist/index.js"},files:["dist"],scripts:{dev:"bun run ./src/index.ts",build:"bun build ./src/index.ts --outdir ./dist --target bun --minify --external @anthropic-ai/claude-agent-sdk",release:"semantic-release"},dependencies:{commander:"^14.0.2",chalk:"^5.6.2","@anthropic-ai/claude-agent-sdk":"^0.3.0"},devDependencies:{"@pickled-dev/config":"workspace:*","@pickled-dev/core":"workspace:*","semantic-release":"^25.0.2","@semantic-release/commit-analyzer":"^13.0.1","@semantic-release/exec":"^7.1.0","@semantic-release/git":"^10.0.1","@semantic-release/github":"^12.0.2","@semantic-release/release-notes-generator":"^14.0.3"}};import PO from"path";var h0={category:"cli",provider:"claude-code",model:"sonnet"},MX=["Read","Glob","Grep","Bash"],OX=["Edit","MultiEdit","Write","NotebookEdit"];var bY=J3(kY(),1);async function j1($){let Z=`${$}/pickled.yml`,X=Bun.file(Z);if(!await X.exists())throw Error(`pickled.yml not found in ${$}`);let Y;try{let G=await X.text();Y=bY.default.parse(G)}catch(G){throw Error(`Failed to parse pickled.yml: ${G}`)}return Y=x9(Y),eT(Y),Y}var tT=/\$\{([A-Z_][A-Z0-9_]*)\}/g;function x9($){if(typeof $==="string")return $.replace(tT,(Z,X)=>process.env[X]??"");if(Array.isArray($))return $.map(x9);if($!==null&&typeof $==="object"){let Z={};for(let[X,Y]of Object.entries($))Z[X]=x9(Y);return Z}return $}function eT($){if(!$.tool?.name)throw Error("pickled.yml: 'tool.name' is required");if(!Array.isArray($.scenarios)||$.scenarios.length===0)throw Error("pickled.yml: 'scenarios' must be a non-empty array");if($.docs?.sources)for(let[X,Y]of Object.entries($.docs.sources)){if(X==="none")throw Error('pickled.yml: docs.sources cannot use the reserved id "none". That name represents the no-context matrix cell (model prior with toolset:none, or open discovery with toolset:web). Rename this source.');JF(X,Y)}if($.toolsets){if(typeof $.toolsets!=="object"||Array.isArray($.toolsets))throw Error("pickled.yml: 'toolsets' must be an object mapping name to configuration");for(let[X,Y]of Object.entries($.toolsets))if(typeof Y!=="object"||Y===null||Array.isArray(Y))throw Error(`pickled.yml: toolsets["${X}"] must be an object`)}if($.targets)for(let[X,Y]of Object.entries($.targets)){if(Y.systemPrompt!==void 0)throw Error(`pickled.yml: target "${X}" sets 'systemPrompt', which bypasses the citation contract. Remove it; custom prompts are not supported in citation mode.`);if(Y.provider==="codex-cli"){if(!Y.model)throw Error(`pickled.yml: target "${X}" (codex-cli) requires an explicit 'model' field. Codex's default model can change without notice; pin it for reproducible evals.`);if(Y.maxTurns!==void 0)throw Error(`pickled.yml: target "${X}" (codex-cli) sets 'maxTurns', but the codex CLI does not support a turn cap. Remove the field.`)}if(Y.category==="api"){if(!Y.model)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) requires an explicit 'model' field. Pickled does not substitute a default; reproducible evals depend on pinning the model.`);let G=["allowedTools","disallowedTools","mcpServers","permissionMode","maxTurns","maxThinkingTokens","maxBudgetUsd"];for(let z of G)if(Y[z]!==void 0)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) sets '${z}', which only applies to CLI/Agent SDK targets. Remove the field; API targets accept only model/temperature/maxTokens/threshold.`);if(Y.workspaceContext!==void 0)throw Error(`pickled.yml: target "${X}" (api/${Y.provider}) sets 'workspaceContext', which only applies to IDE targets. Remove the field.`)}}let Z=new Set(Object.keys($.docs?.sources??{}));for(let X of $.scenarios){if(!X.name||!X.prompt)throw Error("pickled.yml: every scenario needs 'name' and 'prompt'");if(X.requiredSources!==void 0){if(!Array.isArray(X.requiredSources))throw Error(`pickled.yml: scenario "${X.name}" has non-array 'requiredSources'. Omit the field to skip citation scoring, or set [] for "any cited source counts".`);for(let Y of X.requiredSources)if(!Z.has(Y))throw Error(`pickled.yml: scenario "${X.name}" references unknown source "${Y}". Declared sources: ${[...Z].join(", ")||"(none)"}`)}VF(X.name,X.traps),zF(X.name,X.compareSurfaces,Z),ZF(X.name,X.matrix,Z,new Set(Object.keys($.targets??{})),new Set(Object.keys($.toolsets??{none:{}}))),XF(X.name,X.expected),YF(X.name,X.verifiers,Z),GF(X)}$F($)}function $F($){if(!$.docs?.sources)return;let Z=[];for(let[Y,G]of Object.entries($.docs.sources)){if(typeof G==="string")continue;let z=G.audit?.traps;if(Array.isArray(z))Z.push({id:Y,list:z})}if(Z.length===0)return;let X=new Map;for(let Y of $.scenarios)for(let G of Y.traps??[]){let z=X.get(G.id);if(z!==void 0)throw Error(`pickled.yml: trap id "${G.id}" is declared in both scenario "${z}" and scenario "${Y.name}". Globally unique trap ids are required when any source uses list-form audit.traps suppression. Rename one of the traps.`);X.set(G.id,Y.name)}for(let{id:Y,list:G}of Z)for(let z of G)if(!X.has(z)){let J=[...X.keys()].join(", ")||"(none)";throw Error(`pickled.yml: docs.sources["${Y}"].audit.traps lists unknown trap id "${z}". Declared trap ids: ${J}`)}}function ZF($,Z,X,Y,G){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" matrix must be an object with optional interfaces/sources/toolsets arrays`);let z=(J,Q,W)=>{let V=Z[J];if(V===void 0)return;if(!Array.isArray(V))throw Error(`pickled.yml: scenario "${$}" matrix.${J} must be an array of ${W} names`);if(V.length===0)throw Error(`pickled.yml: scenario "${$}" matrix.${J} cannot be empty (omit the field to use defaults)`);for(let K of V){if(typeof K!=="string")throw Error(`pickled.yml: scenario "${$}" matrix.${J} entries must be strings`);if(!Q.has(K))throw Error(`pickled.yml: scenario "${$}" matrix.${J} references unknown ${W} "${K}". Declared: ${[...Q].join(", ")||"(none)"}`)}};z("interfaces",Y,"target"),z("sources",new Set([...X,"none"]),"source"),z("toolsets",G,"toolset")}function XF($,Z){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" expected must be an object with optional includes/excludes arrays`);let X=(Y)=>{let G=Z[Y];if(G===void 0)return;if(!Array.isArray(G))throw Error(`pickled.yml: scenario "${$}" expected.${Y} must be an array of strings`);if(G.length===0)throw Error(`pickled.yml: scenario "${$}" expected.${Y} cannot be empty (omit the field instead)`);for(let z=0;z<G.length;z++)if(typeof G[z]!=="string"||G[z].length===0)throw Error(`pickled.yml: scenario "${$}" expected.${Y}[${z}] must be a non-empty string`)};X("includes"),X("excludes")}function YF($,Z,X){if(Z===void 0)return;if(typeof Z!=="object"||Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" verifiers must be an object`);let Y=Z.sources;if(Y===void 0)return;if(!Array.isArray(Y))throw Error(`pickled.yml: scenario "${$}" verifiers.sources must be an array of source IDs`);for(let G of Y){if(typeof G!=="string")throw Error(`pickled.yml: scenario "${$}" verifiers.sources entries must be strings`);if(!X.has(G))throw Error(`pickled.yml: scenario "${$}" verifiers.sources references unknown source "${G}"`)}}function GF($){let Z=$.requiredSources!==void 0,X=$.expected!==void 0&&($.expected.includes!==void 0&&$.expected.includes.length>0||$.expected.excludes!==void 0&&$.expected.excludes.length>0),Y=$.traps!==void 0&&$.traps.length>0;if(!Z&&!X&&!Y)throw Error(`pickled.yml: scenario "${$.name}" must declare at least one of requiredSources, expected.includes/excludes, or traps. A scenario with nothing to check has no verdict.`);let G=($.matrix?.toolsets??[]).filter((z)=>z!=="none");if(G.length>0&&!X&&!Y)throw Error(`pickled.yml: scenario "${$.name}" declares non-none toolsets [${G.join(", ")}] but has neither expected.includes/excludes nor traps. Non-none cells skip the citation contract because the source is not injected, so requiredSources alone leaves them with no actionable answer contract. Add expected or traps, or restrict matrix.toolsets to ["none"].`)}function zF($,Z,X){if(Z===void 0)return;if(!Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" compareSurfaces must be an array of source-id lists`);if(Z.length===0)throw Error(`pickled.yml: scenario "${$}" compareSurfaces cannot be empty (use a non-empty list of surfaces, or remove the field)`);for(let Y=0;Y<Z.length;Y++){let G=Z[Y];if(!Array.isArray(G))throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] must be an array of source ids`);if(G.length===0)throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] must be a non-empty list of source ids`);for(let z of G){if(typeof z!=="string")throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] entries must be string source ids`);if(!X.has(z))throw Error(`pickled.yml: scenario "${$}" compareSurfaces[${Y}] references unknown source "${z}". Declared sources: ${[...X].join(", ")||"(none)"}`)}}}function JF($,Z){if(typeof Z==="string"){if(Z.length===0)throw Error(`pickled.yml: docs.sources["${$}"] string form must be a non-empty file path or URL`);return}if(!Z||typeof Z!=="object")throw Error(`pickled.yml: docs.sources["${$}"] must be a string (path/URL) or an object with a 'path' field`);let X=Z;if(typeof X.path!=="string"||X.path.length===0)throw Error(`pickled.yml: docs.sources["${$}"] object form requires a non-empty 'path' field`);if(X.audit!==void 0){if(typeof X.audit!=="object"||X.audit===null)throw Error(`pickled.yml: docs.sources["${$}"].audit must be an object`);let Y=X.audit;if(Y.traps!==void 0){let G=Y.traps;if(typeof G==="boolean");else if(Array.isArray(G)){if(G.length===0)throw Error(`pickled.yml: docs.sources["${$}"].audit.traps cannot be an empty array; use true (scan all) or false (skip all) instead`);for(let z=0;z<G.length;z++)if(typeof G[z]!=="string")throw Error(`pickled.yml: docs.sources["${$}"].audit.traps[${z}] must be a string trap id`)}else throw Error(`pickled.yml: docs.sources["${$}"].audit.traps must be a boolean or an array of trap ids`)}for(let G of Object.keys(Y))if(G!=="traps")throw Error(`pickled.yml: docs.sources["${$}"].audit has unknown field "${G}"`)}if(X.type!==void 0){if(X.type!=="file"&&X.type!=="url"&&X.type!=="codebase")throw Error(`pickled.yml: docs.sources["${$}"].type must be "file", "url", or "codebase"`)}if(X.type==="codebase"){if(typeof X.path==="string"&&X.path.split("/").includes(".."))throw Error(`pickled.yml: docs.sources["${$}"].path must not contain ".." segments. Codebase loader stays within the project root.`);if(X.exclude!==void 0){if(!Array.isArray(X.exclude))throw Error(`pickled.yml: docs.sources["${$}"].exclude must be an array of glob patterns`);for(let Y=0;Y<X.exclude.length;Y++)if(typeof X.exclude[Y]!=="string")throw Error(`pickled.yml: docs.sources["${$}"].exclude[${Y}] must be a string glob pattern`)}if(X.maxBytes!==void 0){if(typeof X.maxBytes!=="number"||!Number.isFinite(X.maxBytes)||X.maxBytes<=0)throw Error(`pickled.yml: docs.sources["${$}"].maxBytes must be a positive number of bytes`)}}else{if(X.exclude!==void 0)throw Error(`pickled.yml: docs.sources["${$}"].exclude only applies to type: codebase sources`);if(X.maxBytes!==void 0)throw Error(`pickled.yml: docs.sources["${$}"].maxBytes only applies to type: codebase sources`)}for(let Y of Object.keys(X))if(Y!=="path"&&Y!=="audit"&&Y!=="type"&&Y!=="exclude"&&Y!=="maxBytes")throw Error(`pickled.yml: docs.sources["${$}"] has unknown field "${Y}"`)}var QF=new Set(["g","y"]),WF=new Set(["i","m","s","u","v"]);function VF($,Z){if(Z===void 0)return;if(!Array.isArray(Z))throw Error(`pickled.yml: scenario "${$}" has non-array 'traps'`);let X=new Set;for(let Y of Z){if(!Y.id||typeof Y.id!=="string")throw Error(`pickled.yml: scenario "${$}" has a trap missing 'id'`);if(X.has(Y.id))throw Error(`pickled.yml: scenario "${$}" has duplicate trap id "${Y.id}"`);if(X.add(Y.id),!Y.reason||typeof Y.reason!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" requires non-empty 'reason'`);if(Y.auditSeverity!==void 0&&Y.auditSeverity!=="warning"&&Y.auditSeverity!=="error")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" auditSeverity must be "warning" or "error"`);let G=typeof Y.match==="string",z=typeof Y.pattern==="string";if(G===z)throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" must set exactly one of 'match' or 'pattern'`);if(G){if(Y.match==="")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has empty 'match'`);if(Y.flags!==void 0)throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" sets 'flags' without 'pattern'`)}else{let J=Y.pattern;if(typeof J!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" must set 'pattern'`);if(J==="")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has empty 'pattern'`);if(Y.flags!==void 0){if(typeof Y.flags!=="string")throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has non-string 'flags'`);for(let Q of Y.flags){if(QF.has(Q))throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" uses forbidden regex flag "${Q}" (g and y are not allowed)`);if(!WF.has(Q))throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" uses unsupported regex flag "${Q}"`)}}try{new RegExp(J,Y.flags??"")}catch(Q){throw Error(`pickled.yml: scenario "${$}" trap "${Y.id}" has invalid regex pattern: ${Q instanceof Error?Q.message:Q}`)}}}}function k9($,Z){let X=new Set([...Object.keys($.targets??{}),"default"]);if(!X.has(Z)){let Y=[...X].sort().join(", ");throw Error(`Unknown target: "${Z}". Available targets: ${Y}`)}return{...$,matrix:{...$.matrix,target:[Z]},scenarios:$.scenarios.filter((Y)=>!Y.target||Y.target===Z)}}function b9($){if(typeof $==="string")return{path:$,auditTraps:!0};return{path:$.path,auditTraps:$.audit?.traps??!0}}function h6($){let{config:Z,files:X,pairs:Y,findings:G}=$,z=[],J=G.filter((H)=>H.severity==="error").length,Q=G.filter((H)=>H.severity==="warning").length;if(z.push(`# Agent-context audit
|
|
167
167
|
`),z.push(`Target: \`${Z.targetRepo}\``),z.push(`Budgets: root \u2264 ${Z.budgets.rootLines} lines, nested warn at ${Z.budgets.nestedWarnLines} lines.`),z.push(`Found ${X.length} agent-doc files. ${J} error(s), ${Q} warning(s).
|
|
168
168
|
`),z.push(`## Inventory
|
|
169
169
|
`),X.length===0)z.push(`No agent-doc files found.
|
|
@@ -261,22 +261,22 @@ new OpenAI({ apiKey, dangerouslyAllowBrowser: true });
|
|
|
261
261
|
|
|
262
262
|
https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety
|
|
263
263
|
`);this.baseURL=W.baseURL,this.timeout=W.timeout??d5.DEFAULT_TIMEOUT,this.logger=W.logger??console;let V="warn";this.logLevel=V,this.logLevel=M5(W.logLevel,"ClientOptions.logLevel",this)??M5(l0("OPENAI_LOG"),"process.env['OPENAI_LOG']",this)??V,this.fetchOptions=W.fetchOptions,this.maxRetries=W.maxRetries??2,this.fetch=W.fetch??R8(),x(this,$X,jG,"f");let K=l0("OPENAI_CUSTOM_HEADERS");if(K){let H={};for(let B of K.split(`
|
|
264
|
-
`)){let U=B.indexOf(":");if(U>=0)H[B.substring(0,U).trim()]=B.substring(U+1).trim()}W.defaultHeaders=E([H,W.defaultHeaders])}if(this._options=W,J)this._workloadIdentityAuth=new N5(J,this.fetch);this.apiKey=typeof Z==="string"?Z:null,this.adminAPIKey=X,this.organization=Y,this.project=G,this.webhookSecret=z}withOptions($){return new this.constructor({...this._options,baseURL:this.baseURL,maxRetries:this.maxRetries,timeout:this.timeout,logger:this.logger,logLevel:this.logLevel,fetch:this.fetch,fetchOptions:this.fetchOptions,apiKey:this._options.apiKey,adminAPIKey:this.adminAPIKey,workloadIdentity:this._options.workloadIdentity,organization:this.organization,project:this.project,webhookSecret:this.webhookSecret,...$})}defaultQuery(){return this._options.defaultQuery}validateHeaders({values:$,nulls:Z},X={bearerAuth:!0,adminAPIKeyAuth:!0}){if($.get("authorization")||$.get("api-key"))return;if(Z.has("authorization")||Z.has("api-key"))return;if(this._workloadIdentityAuth&&X.bearerAuth)return;throw Error('Could not resolve authentication method. Expected either apiKey or adminAPIKey to be set. Or for one of the "Authorization" or "api-key" headers to be explicitly omitted')}async authHeaders($,Z={bearerAuth:!0,adminAPIKeyAuth:!0}){return E([Z.bearerAuth?await this.bearerAuth($):null,Z.adminAPIKeyAuth?await this.adminAPIKeyAuth($):null])}async bearerAuth($){if(this._workloadIdentityAuth)return E([{Authorization:`Bearer ${await this._workloadIdentityAuth.getToken()}`}]);if(this.apiKey==null)return;return E([{Authorization:`Bearer ${this.apiKey}`}])}async adminAPIKeyAuth($){if(this.adminAPIKey==null)return;return E([{Authorization:`Bearer ${this.adminAPIKey}`}])}stringifyQuery($){return uG($)}getUserAgent(){return`${this.constructor.name}/JS ${R$}`}defaultIdempotencyKey(){return`stainless-node-retry-${J5()}`}makeStatusError($,Z,X,Y){return r.generate($,Z,X,Y)}async _callApiKey(){let $=this._options.apiKey;if(typeof $!=="function")return!1;let Z;try{Z=await $()}catch(X){if(X instanceof I)throw X;throw new I(`Failed to get token from 'apiKey' function: ${X.message}`,{cause:X})}if(typeof Z!=="string"||!Z)throw new I(`Expected 'apiKey' function argument to return a string but it returned ${Z}`);return this.apiKey=Z,!0}buildURL($,Z,X){let Y=!M(this,m5,"m",Sz).call(this)&&X||this.baseURL,G=NG($)?new URL($):new URL(Y+(Y.endsWith("/")&&$.startsWith("/")?$.slice(1):$)),z=this.defaultQuery(),J=Object.fromEntries(G.searchParams);if(!W5(z)||!W5(J))Z={...J,...z,...Z};if(typeof Z==="object"&&Z&&!Array.isArray(Z))G.search=this.stringifyQuery(Z);return G.toString()}async prepareOptions($){if(($.__security??{bearerAuth:!0}).bearerAuth)await this._callApiKey()}async prepareRequest($,{url:Z,options:X}){}get($,Z){return this.methodRequest("get",$,Z)}post($,Z){return this.methodRequest("post",$,Z)}patch($,Z){return this.methodRequest("patch",$,Z)}put($,Z){return this.methodRequest("put",$,Z)}delete($,Z){return this.methodRequest("delete",$,Z)}methodRequest($,Z,X){return this.request(Promise.resolve(X).then((Y)=>{return{method:$,path:Z,...Y}}))}request($,Z=null){return new X1(this,this.makeRequest($,Z,void 0))}async makeRequest($,Z,X){let Y=await $,G=Y.maxRetries??this.maxRetries;if(Z==null)Z=G;await this.prepareOptions(Y);let{req:z,url:J,timeout:Q}=await this.buildRequest(Y,{retryCount:G-Z});await this.prepareRequest(z,{url:J,options:Y});let W="log_"+(Math.random()*16777216|0).toString(16).padStart(6,"0"),V=X===void 0?"":`, retryOf: ${X}`,K=Date.now();if(s(this).debug(`[${W}] sending request`,z$({retryOfRequestLogID:X,method:Y.method,url:J,options:Y,headers:z.headers})),Y.signal?.aborted)throw new $0;let H=Y.__security??{bearerAuth:!0},B=new AbortController,U=await this.fetchWithAuth(J,z,Q,B,H).catch(N4),F=Date.now();if(U instanceof globalThis.Error){let O=`retrying, ${Z} attempts remaining`;if(Y.signal?.aborted)throw new $0;let N=O4(U)||/timed? ?out/i.test(String(U)+("cause"in U?String(U.cause):""));if(Z)return s(this).info(`[${W}] connection ${N?"timed out":"failed"} - ${O}`),s(this).debug(`[${W}] connection ${N?"timed out":"failed"} (${O})`,z$({retryOfRequestLogID:X,url:J,durationMs:F-K,message:U.message})),this.retryRequest(Y,Z,X??W);if(s(this).info(`[${W}] connection ${N?"timed out":"failed"} - error; no more retries left`),s(this).debug(`[${W}] connection ${N?"timed out":"failed"} (error; no more retries left)`,z$({retryOfRequestLogID:X,url:J,durationMs:F-K,message:U.message})),U instanceof i1||U instanceof D8)throw U;if(N)throw new Z1;throw new $1({cause:U})}let T=[...U.headers.entries()].filter(([O])=>O==="x-request-id").map(([O,N])=>", "+O+": "+JSON.stringify(N)).join(""),w=`[${W}${V}${T}] ${z.method} ${J} ${U.ok?"succeeded":"failed"} with status ${U.status} in ${F-K}ms`;if(!U.ok){if(U.status===401&&this._workloadIdentityAuth&&H.bearerAuth&&!Y.__metadata?.hasStreamingBody&&!Y.__metadata?.workloadIdentityTokenRefreshed)return await K5(U.body),this._workloadIdentityAuth.invalidateToken(),this.makeRequest({...Y,__metadata:{...Y.__metadata,workloadIdentityTokenRefreshed:!0}},Z,X??W);let O=await this.shouldRetry(U);if(Z&&O){let S=`retrying, ${Z} attempts remaining`;return await K5(U.body),s(this).info(`${w} - ${S}`),s(this).debug(`[${W}] response error (${S})`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,durationMs:F-K})),this.retryRequest(Y,Z,X??W,U.headers)}let N=O?"error; no more retries left":"error; not retryable";s(this).info(`${w} - ${N}`);let _=await U.text().catch((S)=>N4(S).message),P=DG(_),R=P?void 0:_;throw s(this).debug(`[${W}] response error (${N})`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,message:R,durationMs:Date.now()-K})),this.makeStatusError(U.status,P,R,U.headers)}return s(this).info(w),s(this).debug(`[${W}] response start`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,durationMs:F-K})),{response:U,options:Y,controller:B,requestLogID:W,retryOfRequestLogID:X,startTime:K}}getAPIList($,Z,X){return this.requestAPIList(Z,X&&"then"in X?X.then((Y)=>({method:"get",path:$,...Y})):{method:"get",path:$,...X})}requestAPIList($,Z){let X=this.makeRequest(Z,null,void 0);return new k8(this,X,$)}async fetchWithAuth($,Z,X,Y,G={bearerAuth:!0,adminAPIKeyAuth:!0}){if(this._workloadIdentityAuth&&G.bearerAuth){let J=Z.headers,Q=J.get("Authorization");if(!Q||Q===`Bearer ${IM}`){let W=await this._workloadIdentityAuth.getToken();J.set("Authorization",`Bearer ${W}`)}}return await this.fetchWithTimeout($,Z,X,Y)}async fetchWithTimeout($,Z,X,Y){let{signal:G,method:z,...J}=Z||{},Q=this._makeAbort(Y);if(G)G.addEventListener("abort",Q,{once:!0});let W=setTimeout(Q,X),V=globalThis.ReadableStream&&J.body instanceof globalThis.ReadableStream||typeof J.body==="object"&&J.body!==null&&Symbol.asyncIterator in J.body,K={signal:Y.signal,...V?{duplex:"half"}:{},method:"GET",...J};if(z)K.method=z.toUpperCase();try{return await this.fetch.call(void 0,$,K)}finally{clearTimeout(W)}}async shouldRetry($){let Z=$.headers.get("x-should-retry");if(Z==="true")return!0;if(Z==="false")return!1;if($.status===408)return!0;if($.status===409)return!0;if($.status===429)return!0;if($.status>=500)return!0;return!1}async retryRequest($,Z,X,Y){let G,z=Y?.get("retry-after-ms");if(z){let Q=parseFloat(z);if(!Number.isNaN(Q))G=Q}let J=Y?.get("retry-after");if(J&&!G){let Q=parseFloat(J);if(!Number.isNaN(Q))G=Q*1000;else G=Date.parse(J)-Date.now()}if(G===void 0){let Q=$.maxRetries??this.maxRetries;G=this.calculateDefaultRetryTimeoutMillis(Z,Q)}return await d0(G),this.makeRequest($,Z-1,X)}calculateDefaultRetryTimeoutMillis($,Z){let G=Z-$,z=Math.min(0.5*Math.pow(2,G),8),J=1-Math.random()*0.25;return z*J*1000}async buildRequest($,{retryCount:Z=0}={}){let X={...$},{method:Y,path:G,query:z,defaultBaseURL:J}=X,Q=this.buildURL(G,z,J);if("timeout"in X)PG("timeout",X.timeout);X.timeout=X.timeout??this.timeout;let{bodyHeaders:W,body:V,isStreamingBody:K}=this.buildBody({options:X});if(K)$.__metadata={...$.__metadata,hasStreamingBody:!0};let H=await this.buildHeaders({options:$,method:Y,bodyHeaders:W,retryCount:Z});return{req:{method:Y,headers:H,...X.signal&&{signal:X.signal},...globalThis.ReadableStream&&V instanceof globalThis.ReadableStream&&{duplex:"half"},...V&&{body:V},...this.fetchOptions??{},...X.fetchOptions??{}},url:Q,timeout:X.timeout}}async buildHeaders({options:$,method:Z,bodyHeaders:X,retryCount:Y}){let G={};if(this.idempotencyHeader&&Z!=="get"){if(!$.idempotencyKey)$.idempotencyKey=this.defaultIdempotencyKey();G[this.idempotencyHeader]=$.idempotencyKey}let z=E([G,{Accept:"application/json","User-Agent":this.getUserAgent(),"X-Stainless-Retry-Count":String(Y),...$.timeout?{"X-Stainless-Timeout":String(Math.trunc($.timeout/1000))}:{},...CG(),"OpenAI-Organization":this.organization,"OpenAI-Project":this.project},await this.authHeaders($,$.__security??{bearerAuth:!0}),this._options.defaultHeaders,X,$.headers]);return this.validateHeaders(z,$.__security??{bearerAuth:!0}),z.values}_makeAbort($){return()=>$.abort()}buildBody({options:{body:$,headers:Z}}){if(!$)return{bodyHeaders:void 0,body:void 0,isStreamingBody:!1};let X=E([Z]),Y=typeof globalThis.ReadableStream<"u"&&$ instanceof globalThis.ReadableStream,G=!Y&&(typeof $==="string"||$ instanceof ArrayBuffer||ArrayBuffer.isView($)||typeof globalThis.Blob<"u"&&$ instanceof globalThis.Blob||$ instanceof URLSearchParams||$ instanceof FormData);if(ArrayBuffer.isView($)||$ instanceof ArrayBuffer||$ instanceof DataView||typeof $==="string"&&X.values.has("content-type")||globalThis.Blob&&$ instanceof globalThis.Blob||$ instanceof FormData||$ instanceof URLSearchParams||Y)return{bodyHeaders:void 0,body:$,isStreamingBody:!G};else if(typeof $==="object"&&((Symbol.asyncIterator in $)||(Symbol.iterator in $)&&("next"in $)&&typeof $.next==="function"))return{bodyHeaders:void 0,body:A8($),isStreamingBody:!0};else if(typeof $==="object"&&X.values.get("content-type")==="application/x-www-form-urlencoded")return{bodyHeaders:{"content-type":"application/x-www-form-urlencoded"},body:this.stringifyQuery($),isStreamingBody:!1};else return{...M(this,$X,"f").call(this,{body:$,headers:X}),isStreamingBody:!1}}}d5=f,$X=new WeakMap,m5=new WeakSet,Sz=function(){return this.baseURL!=="https://api.openai.com/v1"};f.OpenAI=d5;f.DEFAULT_TIMEOUT=600000;f.OpenAIError=I;f.APIError=r;f.APIConnectionError=$1;f.APIConnectionTimeoutError=Z1;f.APIUserAbortError=$0;f.NotFoundError=E4;f.ConflictError=R4;f.RateLimitError=S4;f.BadRequestError=_4;f.AuthenticationError=P4;f.InternalServerError=C4;f.PermissionDeniedError=D4;f.UnprocessableEntityError=A4;f.InvalidWebhookSignatureError=G$;f.toFile=f8;f.Completions=Q2;f.Chat=z1;f.Embeddings=V2;f.Files=K2;f.Images=T2;f.Audio=W$;f.Moderations=L2;f.Models=F2;f.FineTuning=a0;f.Graders=L1;f.VectorStores=y$;f.Webhooks=_2;f.Beta=i0;f.Batches=G2;f.Uploads=w1;f.Admin=Q1;f.Responses=b$;f.Realtime=x$;f.Conversations=T1;f.Evals=F1;f.Containers=q1;f.Skills=f$;f.Videos=O2;class c5{category="api";provider="openai";name;config;clientFactory;constructor($,Z,X){this.name=$,this.config=Z,this.clientFactory=X??(()=>new f)}async run($,Z){let{tool:X,docs:Y,requiredSources:G,discovery:z,webTools:J}=Z;if(!this.config.model)throw Error(`API target "${this.name}" missing 'model'. API targets must declare an explicit model.`);let Q=z?p1(X,z.sourceHint):E$(X,Y,G),W=this.clientFactory(),V=J?.search?[{type:"web_search"}]:void 0,K=await W.responses.create({model:this.config.model,instructions:Q,input:$,temperature:this.config.temperature??0,max_output_tokens:this.config.maxTokens??4096,...V?{tools:V}:{}}),H=typeof K.output_text==="string"?K.output_text:"",B=jM(K.output);return{response:H,allResponses:H?[{type:"final",text:H}]:[],toolsUsed:B,sources:[],metadata:{model:this.config.model,category:this.category,provider:this.provider,target:this.name}}}}function jM($){if(!Array.isArray($))return[];let Z=!1;for(let X of $)if(X?.type==="web_search_call"){Z=!0;break}return Z?["web_search"]:[]}import{query as xM}from"@anthropic-ai/claude-agent-sdk";class l5{category="cli";provider="claude-code";name;config;constructor($,Z){this.name=$,this.config=Z}async run($,Z){let{tool:X,cwd:Y,context:G,docs:z,requiredSources:J,discovery:Q,restrictBuiltinTools:W}=Z,V=[],K=[],H=Q?p1(X,Q.sourceHint):E$(X,z,J),B={cwd:Y,model:this.config.model??"sonnet",systemPrompt:H,allowedTools:G?.allowedTools??this.config.allowedTools??MX,disallowedTools:G?.disallowedTools??this.config.disallowedTools??OX,permissionMode:this.config.permissionMode??"acceptEdits",maxTurns:this.config.maxTurns??10,maxThinkingTokens:this.config.maxThinkingTokens,maxBudgetUsd:this.config.maxBudgetUsd,mcpServers:G?.mcpServers??this.config.mcpServers,settingSources:[]};if(W!==void 0)B.tools=W;let U=[],F="",T="";for await(let w of xM({prompt:$,options:B})){if(w.type==="assistant"){let O=w.message?.content;if(Array.isArray(O)){let N="";for(let _ of O){if(_.type==="text")N+=_.text;if(_.type==="tool_use"){let P=_;if(!V.includes(P.name))V.push(P.name)}}if(N){let _=U.length===0?"initial":"intermediate";U.push({type:_,text:N}),F=N}}}if(w.type==="result"){let O=w;if(O.result)T=O.result}}if(U.length>0)U[U.length-1].type="final";return{response:T||F,allResponses:U,toolsUsed:V,sources:K,metadata:{model:this.config.model??"sonnet",category:this.category,provider:this.provider,target:this.name}}}}import{randomUUID as kM}from"crypto";import{rm as bM}from"fs/promises";import{tmpdir as fM}from"os";import{join as yM}from"path";var vM="codex";class p5{category="cli";provider="codex-cli";name;config;spawnFn;readFileFn;binary;constructor($,Z,X={}){this.name=$,this.config=Z,this.spawnFn=X.spawn??mM,this.readFileFn=X.readFile??dM,this.binary=X.binary??vM}async run($,Z){let{tool:X,cwd:Y,docs:G,requiredSources:z}=Z,J=this.config.model;if(!J)throw Error(`CodexCliTarget "${this.name}" requires an explicit model. Validation should have caught this at config load.`);let W=`${E$(X,G,z)}
|
|
264
|
+
`)){let U=B.indexOf(":");if(U>=0)H[B.substring(0,U).trim()]=B.substring(U+1).trim()}W.defaultHeaders=E([H,W.defaultHeaders])}if(this._options=W,J)this._workloadIdentityAuth=new N5(J,this.fetch);this.apiKey=typeof Z==="string"?Z:null,this.adminAPIKey=X,this.organization=Y,this.project=G,this.webhookSecret=z}withOptions($){return new this.constructor({...this._options,baseURL:this.baseURL,maxRetries:this.maxRetries,timeout:this.timeout,logger:this.logger,logLevel:this.logLevel,fetch:this.fetch,fetchOptions:this.fetchOptions,apiKey:this._options.apiKey,adminAPIKey:this.adminAPIKey,workloadIdentity:this._options.workloadIdentity,organization:this.organization,project:this.project,webhookSecret:this.webhookSecret,...$})}defaultQuery(){return this._options.defaultQuery}validateHeaders({values:$,nulls:Z},X={bearerAuth:!0,adminAPIKeyAuth:!0}){if($.get("authorization")||$.get("api-key"))return;if(Z.has("authorization")||Z.has("api-key"))return;if(this._workloadIdentityAuth&&X.bearerAuth)return;throw Error('Could not resolve authentication method. Expected either apiKey or adminAPIKey to be set. Or for one of the "Authorization" or "api-key" headers to be explicitly omitted')}async authHeaders($,Z={bearerAuth:!0,adminAPIKeyAuth:!0}){return E([Z.bearerAuth?await this.bearerAuth($):null,Z.adminAPIKeyAuth?await this.adminAPIKeyAuth($):null])}async bearerAuth($){if(this._workloadIdentityAuth)return E([{Authorization:`Bearer ${await this._workloadIdentityAuth.getToken()}`}]);if(this.apiKey==null)return;return E([{Authorization:`Bearer ${this.apiKey}`}])}async adminAPIKeyAuth($){if(this.adminAPIKey==null)return;return E([{Authorization:`Bearer ${this.adminAPIKey}`}])}stringifyQuery($){return uG($)}getUserAgent(){return`${this.constructor.name}/JS ${R$}`}defaultIdempotencyKey(){return`stainless-node-retry-${J5()}`}makeStatusError($,Z,X,Y){return r.generate($,Z,X,Y)}async _callApiKey(){let $=this._options.apiKey;if(typeof $!=="function")return!1;let Z;try{Z=await $()}catch(X){if(X instanceof I)throw X;throw new I(`Failed to get token from 'apiKey' function: ${X.message}`,{cause:X})}if(typeof Z!=="string"||!Z)throw new I(`Expected 'apiKey' function argument to return a string but it returned ${Z}`);return this.apiKey=Z,!0}buildURL($,Z,X){let Y=!M(this,m5,"m",Sz).call(this)&&X||this.baseURL,G=NG($)?new URL($):new URL(Y+(Y.endsWith("/")&&$.startsWith("/")?$.slice(1):$)),z=this.defaultQuery(),J=Object.fromEntries(G.searchParams);if(!W5(z)||!W5(J))Z={...J,...z,...Z};if(typeof Z==="object"&&Z&&!Array.isArray(Z))G.search=this.stringifyQuery(Z);return G.toString()}async prepareOptions($){if(($.__security??{bearerAuth:!0}).bearerAuth)await this._callApiKey()}async prepareRequest($,{url:Z,options:X}){}get($,Z){return this.methodRequest("get",$,Z)}post($,Z){return this.methodRequest("post",$,Z)}patch($,Z){return this.methodRequest("patch",$,Z)}put($,Z){return this.methodRequest("put",$,Z)}delete($,Z){return this.methodRequest("delete",$,Z)}methodRequest($,Z,X){return this.request(Promise.resolve(X).then((Y)=>{return{method:$,path:Z,...Y}}))}request($,Z=null){return new X1(this,this.makeRequest($,Z,void 0))}async makeRequest($,Z,X){let Y=await $,G=Y.maxRetries??this.maxRetries;if(Z==null)Z=G;await this.prepareOptions(Y);let{req:z,url:J,timeout:Q}=await this.buildRequest(Y,{retryCount:G-Z});await this.prepareRequest(z,{url:J,options:Y});let W="log_"+(Math.random()*16777216|0).toString(16).padStart(6,"0"),V=X===void 0?"":`, retryOf: ${X}`,K=Date.now();if(s(this).debug(`[${W}] sending request`,z$({retryOfRequestLogID:X,method:Y.method,url:J,options:Y,headers:z.headers})),Y.signal?.aborted)throw new $0;let H=Y.__security??{bearerAuth:!0},B=new AbortController,U=await this.fetchWithAuth(J,z,Q,B,H).catch(N4),F=Date.now();if(U instanceof globalThis.Error){let O=`retrying, ${Z} attempts remaining`;if(Y.signal?.aborted)throw new $0;let N=O4(U)||/timed? ?out/i.test(String(U)+("cause"in U?String(U.cause):""));if(Z)return s(this).info(`[${W}] connection ${N?"timed out":"failed"} - ${O}`),s(this).debug(`[${W}] connection ${N?"timed out":"failed"} (${O})`,z$({retryOfRequestLogID:X,url:J,durationMs:F-K,message:U.message})),this.retryRequest(Y,Z,X??W);if(s(this).info(`[${W}] connection ${N?"timed out":"failed"} - error; no more retries left`),s(this).debug(`[${W}] connection ${N?"timed out":"failed"} (error; no more retries left)`,z$({retryOfRequestLogID:X,url:J,durationMs:F-K,message:U.message})),U instanceof i1||U instanceof D8)throw U;if(N)throw new Z1;throw new $1({cause:U})}let T=[...U.headers.entries()].filter(([O])=>O==="x-request-id").map(([O,N])=>", "+O+": "+JSON.stringify(N)).join(""),w=`[${W}${V}${T}] ${z.method} ${J} ${U.ok?"succeeded":"failed"} with status ${U.status} in ${F-K}ms`;if(!U.ok){if(U.status===401&&this._workloadIdentityAuth&&H.bearerAuth&&!Y.__metadata?.hasStreamingBody&&!Y.__metadata?.workloadIdentityTokenRefreshed)return await K5(U.body),this._workloadIdentityAuth.invalidateToken(),this.makeRequest({...Y,__metadata:{...Y.__metadata,workloadIdentityTokenRefreshed:!0}},Z,X??W);let O=await this.shouldRetry(U);if(Z&&O){let S=`retrying, ${Z} attempts remaining`;return await K5(U.body),s(this).info(`${w} - ${S}`),s(this).debug(`[${W}] response error (${S})`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,durationMs:F-K})),this.retryRequest(Y,Z,X??W,U.headers)}let N=O?"error; no more retries left":"error; not retryable";s(this).info(`${w} - ${N}`);let _=await U.text().catch((S)=>N4(S).message),P=DG(_),R=P?void 0:_;throw s(this).debug(`[${W}] response error (${N})`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,message:R,durationMs:Date.now()-K})),this.makeStatusError(U.status,P,R,U.headers)}return s(this).info(w),s(this).debug(`[${W}] response start`,z$({retryOfRequestLogID:X,url:U.url,status:U.status,headers:U.headers,durationMs:F-K})),{response:U,options:Y,controller:B,requestLogID:W,retryOfRequestLogID:X,startTime:K}}getAPIList($,Z,X){return this.requestAPIList(Z,X&&"then"in X?X.then((Y)=>({method:"get",path:$,...Y})):{method:"get",path:$,...X})}requestAPIList($,Z){let X=this.makeRequest(Z,null,void 0);return new k8(this,X,$)}async fetchWithAuth($,Z,X,Y,G={bearerAuth:!0,adminAPIKeyAuth:!0}){if(this._workloadIdentityAuth&&G.bearerAuth){let J=Z.headers,Q=J.get("Authorization");if(!Q||Q===`Bearer ${IM}`){let W=await this._workloadIdentityAuth.getToken();J.set("Authorization",`Bearer ${W}`)}}return await this.fetchWithTimeout($,Z,X,Y)}async fetchWithTimeout($,Z,X,Y){let{signal:G,method:z,...J}=Z||{},Q=this._makeAbort(Y);if(G)G.addEventListener("abort",Q,{once:!0});let W=setTimeout(Q,X),V=globalThis.ReadableStream&&J.body instanceof globalThis.ReadableStream||typeof J.body==="object"&&J.body!==null&&Symbol.asyncIterator in J.body,K={signal:Y.signal,...V?{duplex:"half"}:{},method:"GET",...J};if(z)K.method=z.toUpperCase();try{return await this.fetch.call(void 0,$,K)}finally{clearTimeout(W)}}async shouldRetry($){let Z=$.headers.get("x-should-retry");if(Z==="true")return!0;if(Z==="false")return!1;if($.status===408)return!0;if($.status===409)return!0;if($.status===429)return!0;if($.status>=500)return!0;return!1}async retryRequest($,Z,X,Y){let G,z=Y?.get("retry-after-ms");if(z){let Q=parseFloat(z);if(!Number.isNaN(Q))G=Q}let J=Y?.get("retry-after");if(J&&!G){let Q=parseFloat(J);if(!Number.isNaN(Q))G=Q*1000;else G=Date.parse(J)-Date.now()}if(G===void 0){let Q=$.maxRetries??this.maxRetries;G=this.calculateDefaultRetryTimeoutMillis(Z,Q)}return await d0(G),this.makeRequest($,Z-1,X)}calculateDefaultRetryTimeoutMillis($,Z){let G=Z-$,z=Math.min(0.5*Math.pow(2,G),8),J=1-Math.random()*0.25;return z*J*1000}async buildRequest($,{retryCount:Z=0}={}){let X={...$},{method:Y,path:G,query:z,defaultBaseURL:J}=X,Q=this.buildURL(G,z,J);if("timeout"in X)PG("timeout",X.timeout);X.timeout=X.timeout??this.timeout;let{bodyHeaders:W,body:V,isStreamingBody:K}=this.buildBody({options:X});if(K)$.__metadata={...$.__metadata,hasStreamingBody:!0};let H=await this.buildHeaders({options:$,method:Y,bodyHeaders:W,retryCount:Z});return{req:{method:Y,headers:H,...X.signal&&{signal:X.signal},...globalThis.ReadableStream&&V instanceof globalThis.ReadableStream&&{duplex:"half"},...V&&{body:V},...this.fetchOptions??{},...X.fetchOptions??{}},url:Q,timeout:X.timeout}}async buildHeaders({options:$,method:Z,bodyHeaders:X,retryCount:Y}){let G={};if(this.idempotencyHeader&&Z!=="get"){if(!$.idempotencyKey)$.idempotencyKey=this.defaultIdempotencyKey();G[this.idempotencyHeader]=$.idempotencyKey}let z=E([G,{Accept:"application/json","User-Agent":this.getUserAgent(),"X-Stainless-Retry-Count":String(Y),...$.timeout?{"X-Stainless-Timeout":String(Math.trunc($.timeout/1000))}:{},...CG(),"OpenAI-Organization":this.organization,"OpenAI-Project":this.project},await this.authHeaders($,$.__security??{bearerAuth:!0}),this._options.defaultHeaders,X,$.headers]);return this.validateHeaders(z,$.__security??{bearerAuth:!0}),z.values}_makeAbort($){return()=>$.abort()}buildBody({options:{body:$,headers:Z}}){if(!$)return{bodyHeaders:void 0,body:void 0,isStreamingBody:!1};let X=E([Z]),Y=typeof globalThis.ReadableStream<"u"&&$ instanceof globalThis.ReadableStream,G=!Y&&(typeof $==="string"||$ instanceof ArrayBuffer||ArrayBuffer.isView($)||typeof globalThis.Blob<"u"&&$ instanceof globalThis.Blob||$ instanceof URLSearchParams||$ instanceof FormData);if(ArrayBuffer.isView($)||$ instanceof ArrayBuffer||$ instanceof DataView||typeof $==="string"&&X.values.has("content-type")||globalThis.Blob&&$ instanceof globalThis.Blob||$ instanceof FormData||$ instanceof URLSearchParams||Y)return{bodyHeaders:void 0,body:$,isStreamingBody:!G};else if(typeof $==="object"&&((Symbol.asyncIterator in $)||(Symbol.iterator in $)&&("next"in $)&&typeof $.next==="function"))return{bodyHeaders:void 0,body:A8($),isStreamingBody:!0};else if(typeof $==="object"&&X.values.get("content-type")==="application/x-www-form-urlencoded")return{bodyHeaders:{"content-type":"application/x-www-form-urlencoded"},body:this.stringifyQuery($),isStreamingBody:!1};else return{...M(this,$X,"f").call(this,{body:$,headers:X}),isStreamingBody:!1}}}d5=f,$X=new WeakMap,m5=new WeakSet,Sz=function(){return this.baseURL!=="https://api.openai.com/v1"};f.OpenAI=d5;f.DEFAULT_TIMEOUT=600000;f.OpenAIError=I;f.APIError=r;f.APIConnectionError=$1;f.APIConnectionTimeoutError=Z1;f.APIUserAbortError=$0;f.NotFoundError=E4;f.ConflictError=R4;f.RateLimitError=S4;f.BadRequestError=_4;f.AuthenticationError=P4;f.InternalServerError=C4;f.PermissionDeniedError=D4;f.UnprocessableEntityError=A4;f.InvalidWebhookSignatureError=G$;f.toFile=f8;f.Completions=Q2;f.Chat=z1;f.Embeddings=V2;f.Files=K2;f.Images=T2;f.Audio=W$;f.Moderations=L2;f.Models=F2;f.FineTuning=a0;f.Graders=L1;f.VectorStores=y$;f.Webhooks=_2;f.Beta=i0;f.Batches=G2;f.Uploads=w1;f.Admin=Q1;f.Responses=b$;f.Realtime=x$;f.Conversations=T1;f.Evals=F1;f.Containers=q1;f.Skills=f$;f.Videos=O2;class c5{category="api";provider="openai";name;config;clientFactory;constructor($,Z,X){this.name=$,this.config=Z,this.clientFactory=X??(()=>new f)}async run($,Z){let{tool:X,docs:Y,requiredSources:G,discovery:z,webTools:J,mcpTools:Q}=Z;if(!this.config.model)throw Error(`API target "${this.name}" missing 'model'. API targets must declare an explicit model.`);let W=z?p1(X,z.sourceHint):E$(X,Y,G),V=this.clientFactory(),K=[];if(J?.search)K.push({type:"web_search"});if(Q?.servers)for(let[w,O]of Object.entries(Q.servers))K.push(jM(w,O));let H=K.length>0?K:void 0,B=await V.responses.create({model:this.config.model,instructions:W,input:$,temperature:this.config.temperature??0,max_output_tokens:this.config.maxTokens??4096,...H?{tools:H}:{}}),U=typeof B.output_text==="string"?B.output_text:"",F=xM(B.output);return{response:U,allResponses:U?[{type:"final",text:U}]:[],toolsUsed:F,sources:[],metadata:{model:this.config.model,category:this.category,provider:this.provider,target:this.name}}}}function jM($,Z){if(!Z.url)throw Error(`MCP server "${$}" has no url; the OpenAI hosted-MCP tool requires server_url. Stdio MCP transports are not reachable from the OpenAI API.`);let X={type:"mcp",server_label:$,server_url:Z.url,require_approval:"never"};if(Z.headers&&Object.keys(Z.headers).length>0)X.headers=Z.headers;return X}function xM($){if(!Array.isArray($))return[];let Z=new Set;for(let X of $)if(X?.type==="web_search_call")Z.add("web_search");else if(X?.type==="mcp_call"&&typeof X.server_label==="string"&&typeof X.name==="string")Z.add(`mcp__${X.server_label}__${X.name}`);return Array.from(Z)}import{query as kM}from"@anthropic-ai/claude-agent-sdk";class l5{category="cli";provider="claude-code";name;config;constructor($,Z){this.name=$,this.config=Z}async run($,Z){let{tool:X,cwd:Y,context:G,docs:z,requiredSources:J,discovery:Q,restrictBuiltinTools:W}=Z,V=[],K=[],H=Q?p1(X,Q.sourceHint):E$(X,z,J),B={cwd:Y,model:this.config.model??"sonnet",systemPrompt:H,allowedTools:G?.allowedTools??this.config.allowedTools??MX,disallowedTools:G?.disallowedTools??this.config.disallowedTools??OX,permissionMode:this.config.permissionMode??"acceptEdits",maxTurns:this.config.maxTurns??10,maxThinkingTokens:this.config.maxThinkingTokens,maxBudgetUsd:this.config.maxBudgetUsd,mcpServers:G?.mcpServers??this.config.mcpServers,settingSources:[]};if(W!==void 0)B.tools=W;let U=[],F="",T="";for await(let w of kM({prompt:$,options:B})){if(w.type==="assistant"){let O=w.message?.content;if(Array.isArray(O)){let N="";for(let _ of O){if(_.type==="text")N+=_.text;if(_.type==="tool_use"){let P=_;if(!V.includes(P.name))V.push(P.name)}}if(N){let _=U.length===0?"initial":"intermediate";U.push({type:_,text:N}),F=N}}}if(w.type==="result"){let O=w;if(O.result)T=O.result}}if(U.length>0)U[U.length-1].type="final";return{response:T||F,allResponses:U,toolsUsed:V,sources:K,metadata:{model:this.config.model??"sonnet",category:this.category,provider:this.provider,target:this.name}}}}import{randomUUID as bM}from"crypto";import{rm as fM}from"fs/promises";import{tmpdir as yM}from"os";import{join as vM}from"path";var gM="codex";class p5{category="cli";provider="codex-cli";name;config;spawnFn;readFileFn;binary;constructor($,Z,X={}){this.name=$,this.config=Z,this.spawnFn=X.spawn??dM,this.readFileFn=X.readFile??cM,this.binary=X.binary??gM}async run($,Z){let{tool:X,cwd:Y,docs:G,requiredSources:z}=Z,J=this.config.model;if(!J)throw Error(`CodexCliTarget "${this.name}" requires an explicit model. Validation should have caught this at config load.`);let W=`${E$(X,G,z)}
|
|
265
265
|
|
|
266
266
|
---
|
|
267
267
|
|
|
268
|
-
${$}`,V=yM(
|
|
269
|
-
`)){let G=Y.trim();if(!G)continue;let z;try{z=JSON.parse(G)}catch{continue}let J=hM(z);if(J){let W=Z.length===0?"initial":"intermediate";Z.push({type:W,text:J})}let Q=uM(z);if(Q&&!X.includes(Q))X.push(Q)}return{allResponses:Z,toolsUsed:X}}function hM($){if(!$||typeof $!=="object")return null;let Z=$;if(typeof Z.message==="string")return Z.message;if(typeof Z.content==="string")return Z.content;if(typeof Z.type==="string"&&(Z.type==="assistant_message"||Z.type==="agent_message")&&typeof Z.text==="string")return Z.text;if(Z.type==="message"&&typeof Z.text==="string")return Z.text;return null}function uM($){if(!$||typeof $!=="object")return null;let Z=$;if(typeof Z.tool==="string")return Z.tool;if(typeof Z.type==="string"&&(Z.type==="tool_call"||Z.type==="function_call")&&typeof Z.name==="string")return Z.name;return null}var mM=async($,Z,X)=>{let Y=Bun.spawn([$,...Z],{cwd:X.cwd,stdin:"pipe",stdout:"pipe",stderr:"pipe"});Y.stdin.write(X.stdin),await Y.stdin.end();let[G,z,J]=await Promise.all([new Response(Y.stdout).text(),new Response(Y.stderr).text(),Y.exited]);return{exitCode:J,stdout:G,stderr:z}},dM=async($)=>{let Z=Bun.file($);if(!await Z.exists())throw Error(`Last-message file not found: ${$}`);return Z.text()};function ZX($,Z){let X=Z??h0;switch(X.category){case"cli":return cM($,X);case"api":return lM($,X);case"ide":throw Error("IDE targets not yet implemented. Coming soon!");default:throw Error(`Unknown target category: ${X.category}`)}}function cM($,Z){switch(Z.provider){case"claude-code":return new l5($,Z);case"codex-cli":return new p5($,Z);case"gemini-cli":throw Error("Gemini CLI target not yet implemented. Coming soon!");case"amazon-q":throw Error("Amazon Q target not yet implemented. Coming soon!");default:throw Error(`Unknown CLI provider: ${Z.provider}`)}}function lM($,Z){switch(Z.provider){case"anthropic":return new z5($,Z);case"openai":return new c5($,Z);case"google":throw Error("Google API target not yet implemented. Coming soon!");default:throw Error(`Unknown API provider: ${Z.provider}`)}}var P2="default";function XX($,Z){if(!$||$===P2)return{name:P2,config:h0};if(Z?.[$])return{name:$,config:Z[$]};return console.warn(`Target "${$}" not found, using default`),{name:P2,config:h0}}function i5($,Z){if(!$||$===P2)return{name:P2,config:{}};if(Z?.[$])return{name:$,config:Z[$]};return console.warn(`Context "${$}" not found, using default`),{name:P2,config:{}}}var YX="__matrix__";function pM($){let Z=[],X=$.matrix?.target??["default"],Y=$.matrix?.context??["default"];for(let G of $.scenarios){let z=G.context?[G.context]:Y;if(G.matrix){for(let Q of z)Z.push({scenario:G,targetName:YX,contextName:Q});continue}let J=G.target?[G.target]:X;for(let Q of J)for(let W of z)Z.push({scenario:G,targetName:Q,contextName:W})}return Z}function iM($,Z,X){let Y=[];for(let{scenario:G,targetName:z,contextName:J}of $)if(G.matrix&&z===YX){let Q=G.matrix,W=G.target??"default",V=Q.interfaces??[W],K=Q.sources??[null],H=Q.toolsets??["none"];for(let B of V){if(X.interface&&X.interface!==B)continue;for(let U of K){if(X.source!==void 0&&X.source!==(U??""))continue;for(let F of H){if(X.toolset&&X.toolset!==F)continue;Y.push({scenario:G.name,kind:"matrix",interface:B,source:U,toolset:F})}}}}else Y.push({scenario:G.name,kind:"single",target:z,context:J});return Y}function a5($){if($.kind==="matrix")return`m:${$.scenario}\x01${$.interface}\x01${$.source??""}\x01${$.toolset}`;return`s:${$.scenario}\x01${$.target}\x01${$.context}`}function aM($){let{tool:Z,docs:X,expandedCells:Y,selectedCells:G,seed:z}=$;return{tool:{name:Z.name,description:Z.description,path:Z.path},docs:X,scenarios:[],summary:{total:0,answered:0,unanswered:0,score:0},plan:{expandedCells:Y,selectedCells:G.length,seed:z,cells:G.map((J)=>J.kind==="matrix"?{scenario:J.scenario,interface:J.interface,source:J.source,toolset:J.toolset}:{scenario:J.scenario,target:J.target,context:J.context})}}}async function n5($,Z,X={}){let{onProgress:Y}=X,G=[];if(Z.scenarios.length===0)throw Error("No scenarios defined in config");let z=Z.docs?.sources??{},J=[];if(Object.keys(z).length>0){Y?.("Loading sources..."),J=await Z4(z,$.path);for(let w of J)Y?.(` [${w.id}] ${w.name}`);Y?.("")}let Q=J.map((w)=>w.id),W=pM(Z);if(X.scenarioFilter&&X.scenarioFilter.length>0){let w=new Set(X.scenarioFilter);if(W=W.filter((O)=>w.has(O.scenario.name)),W.length===0)throw Error(`No scenarios matched filter: ${[...w].join(", ")}. Declared scenarios: ${Z.scenarios.map((O)=>O.name).join(", ")}`)}let V=X.cellFilter??{},K=iM(W,Z,V),H=K,B;if(X.sample!==void 0){let w=X.seed??"default",O=K.filter((P)=>P.kind==="matrix"),N=dY(O,X.sample,w),_=new Set(N.map(a5));H=K.filter((P)=>P.kind!=="matrix"||_.has(a5(P))),B=w}if(X.maxCells!==void 0&&H.length>X.maxCells)throw Error(`Matrix expands to ${H.length} cells, exceeding --max-cells ${X.maxCells}. Add --interface/--source/--toolset/--scenario filters, or pass --sample N to sample per scenario.`);if(X.plan)return aM({tool:$,docs:J,expandedCells:K.length,selectedCells:H,seed:B});let U=X.sample!==void 0?new Set(H.filter((w)=>w.kind==="matrix").map(a5)):void 0,F="";for(let{scenario:w,targetName:O,contextName:N}of W){let _=nM(O,N);if(w.name!==F){if(F)Y?.("");Y?.(`"${w.name}"`),F=w.name}try{let P=await rM(w,O,N,$,Z,J,Q,X,U);G.push(P);let R=_?_.padEnd(18):"";if(P.cells){Y?.(` ${R} (matrix mode)`);for(let A of P.cells){let S=e0(A),k=` [${[A.cell.interface,A.cell.source??"-",A.cell.toolset].join(" \xB7 ")}]`.padEnd(40);Y?.(`${k} ${S.icon} ${S.label} (${S.confidence}%)`)}}else if(P.surfaces){Y?.(` ${R} (compare-surfaces mode)`);for(let A of P.surfaces){let S=e0(A),D=` [${A.active.join(",")}]`.padEnd(22);Y?.(`${D} ${S.icon} ${S.label} (${S.confidence}%)`)}}else{let A=e0({answerable:P.answerable??"NO",confidence:P.confidence??0,traps:P.traps??{fired:[],avoided:[]},error:P.error});Y?.(` ${R} ${A.icon} ${A.label} (${A.confidence}%)`)}}catch(P){let R=O==="default"?h0:Z.targets?.[O]??h0,A=w.requiredSources??[],S={scenario:w,answerable:"NO",confidence:0,response:"",reason:"Error during run",citations:{cited:[],required:A,missing:A,unknown:[]},traps:{fired:[],avoided:(w.traps??[]).map((k)=>k.id)},error:P instanceof Error?P.message:String(P),target:{target:O===YX?"matrix":O,category:R.category,provider:R.provider,model:R.model??"unknown"},context:{name:N}};G.push(S);let D=_?_.padEnd(18):"";Y?.(` ${D} \u2717 Error`)}}Y?.("");let T=sM($,J,G);return T.plan={expandedCells:K.length,selectedCells:H.length,seed:B},T}function nM($,Z){if($==="default"&&Z==="default")return"";if(Z==="default")return`[${$}]`;return`[${$}/${Z}]`}async function rM($,Z,X,Y,G,z,J,Q,W){if($.matrix&&Z===YX)return oM($,X,Y,G,z,Q,W);let{config:V}=XX(Z,G.targets),{config:K}=i5(X,G.contexts),H=Q.targetFactory?Q.targetFactory(Z,V):ZX(Z,V);if($.compareSurfaces&&$.compareSurfaces.length>0){let R=[],A;for(let S of $.compareSurfaces){let D=new Set(S),k=z.filter((V$)=>D.has(V$.id)),i=($.requiredSources??[]).filter((V$)=>D.has(V$)),c=await H.run($.prompt,{tool:Y,cwd:Y.path,context:K,docs:k,requiredSources:i,onProgress:Q.onProgress}),M0=b1({response:c.response,requiredSources:i,registeredIds:S}),v$=O$({response:c.response,traps:$.traps??[]}),N0=v$.fired.length>0,aZ=N0?"NO":M0.answerable,WX=N0?0:M0.confidence,nZ=N0?`Trap fired: ${v$.fired.map((V$)=>`"${V$.id}" (${V$.reason})`).join("; ")}`:M0.reason;R.push({active:S,answerable:aZ,confidence:WX,response:c.response,reason:nZ,citations:M0.citations,traps:v$,allResponses:c.allResponses}),A=c.metadata??A}return{scenario:$,answerable:null,confidence:null,response:null,reason:null,citations:null,traps:null,surfaces:R,target:A,context:{name:X}}}let B=$.requiredSources??[],U=await H.run($.prompt,{tool:Y,cwd:Y.path,context:K,docs:z,requiredSources:B,onProgress:Q.onProgress}),F=O$({response:U.response,traps:$.traps??[]}),T=F.fired.length>0,w=$.requiredSources!==void 0?b1({response:U.response,requiredSources:B,registeredIds:J}):null,O=$.expected!==void 0?n6({response:U.response,expected:$.expected}):null,N,_,P;if(T)N="NO",_=0,P=`Trap fired: ${F.fired.map((R)=>`"${R.id}" (${R.reason})`).join("; ")}`;else{let R=[],A=[];if(w)R.push({answerable:w.answerable,confidence:w.confidence}),A.push(w.reason);if(O){let S=O.total===0?100:Math.round(O.satisfied/O.total*100),D=S===100?"YES":S===0?"NO":"PARTIAL";R.push({answerable:D,confidence:S})}if(R.length===0)N="YES",_=100,P="No traps fired; no other contract declared";else{let S={YES:0,PARTIAL:1,NO:2};N=R.reduce((k,b)=>S[b.answerable]>S[k.answerable]?b:k).answerable,_=Math.round(R.reduce((k,b)=>k+b.confidence,0)/R.length),P=A.filter((k)=>k.length>0).join(" | ")}}return{scenario:$,answerable:N,confidence:_,response:U.response,reason:P,citations:w?w.citations:{cited:[],required:B,missing:[],unknown:[]},traps:F,target:U.metadata,context:{name:X},toolsUsed:U.toolsUsed,sources:U.sources,allResponses:U.allResponses}}function sM($,Z,X){let Y=[];for(let Q of X){if(Q.surfaces){for(let W of Q.surfaces)Y.push({answerable:W.answerable,confidence:W.confidence});continue}if(Q.cells){for(let W of Q.cells)Y.push({answerable:W.answerable,confidence:W.confidence});continue}if(Q.answerable!==null&&Q.confidence!==null)Y.push({answerable:Q.answerable,confidence:Q.confidence})}let G=Y.length,z=Y.filter((Q)=>Q.answerable==="YES"||Q.answerable==="PARTIAL").length,J=G>0?Math.round(Y.reduce((Q,W)=>{if(W.answerable==="YES")return Q+W.confidence;if(W.answerable==="PARTIAL")return Q+W.confidence*0.5;return Q},0)/G):0;return{tool:{name:$.name,description:$.description,path:$.path},docs:Z,scenarios:X,summary:{total:G,answered:z,unanswered:G-z,score:J}}}async function oM($,Z,X,Y,G,z,J){let{config:Q}=i5(Z,Y.contexts),W=$.matrix??{},V=$.target??"default",K=W.interfaces??[V],H=W.sources??[null],B=W.toolsets??["none"],U=z.cellFilter??{},F=[],T;for(let O of K){if(U.interface&&U.interface!==O)continue;for(let N of H){if(U.source!==void 0&&U.source!==(N??""))continue;for(let _ of B){if(U.toolset&&U.toolset!==_)continue;if(J!==void 0){let g=`m:${$.name}\x01${O}\x01${N??""}\x01${_}`;if(!J.has(g))continue}let P=_==="none"?null:Y.toolsets?.[_]??null,R=_!=="none"&&(P?.webSearch===!0||P?.webFetch===!0),A=_!=="none"&&P?.mcpServers?Object.keys(P.mcpServers):[],S=A.length>0,{config:D}=XX(O,Y.targets);if(_!=="none"){if(R&&S)throw Error(`Toolset "${_}" mixes webSearch/webFetch with mcpServers; declare separate toolsets per shape so provenance can be attributed to one tool path.`);if(!R&&!S)throw Error(`Toolset "${_}" is declared but defines no runtime shape. Supported today: "none", web (webSearch/webFetch flags), MCP (mcpServers map). Other adapters (Firecrawl, native API search) land per release.`);if(S&&D.provider!=="claude-code")throw Error(`Toolset "${_}" (MCP) is implemented only on the claude-code interface today. Interface "${O}" uses provider "${D.provider}"; rerun with a Claude Code interface or use toolset "none".`);if(R&&D.provider!=="claude-code"&&D.provider!=="anthropic"&&D.provider!=="openai")throw Error(`Toolset "${_}" (web) is implemented on claude-code, anthropic, and openai interfaces today. Interface "${O}" uses provider "${D.provider}"; rerun with a supported interface or use toolset "none".`);if(R&&(D.provider==="anthropic"||D.provider==="openai")&&!P?.webSearch)throw Error(`Toolset "${_}" on ${D.provider} provider requires webSearch: true. The ${D.provider} API exposes a single server-side web tool; declare webSearch to enable it, or split web/fetch behaviour across separate toolsets.`)}let k=[],b=[],i=[],c=D.provider==="anthropic"||D.provider==="openai";if(R)if(c)i.push((g)=>g==="web_search");else{if(P?.webSearch)k.push("WebSearch"),b.push("WebSearch"),i.push((g)=>g==="WebSearch");if(P?.webFetch)k.push("WebFetch"),b.push("WebFetch"),i.push((g)=>g==="WebFetch")}if(S)for(let g of A)k.push(`mcp__${g}__*`),i.push((z0)=>z0.startsWith(`mcp__${g}__`));let M0=_==="none"||c?D:{...D,allowedTools:k,disallowedTools:[],mcpServers:S?P?.mcpServers:void 0,maxTurns:Math.max(D.maxTurns??0,15)},v$=z.targetFactory?z.targetFactory(O,M0):ZX(O,M0),N0=_==="none",aZ=N==="none",WX=aZ||!N0?[]:N===null?G:G.filter((g)=>g.id===N),nZ=aZ||!N0?[]:N===null?G.map((g)=>g.id):[N],V$=$.requiredSources??[],G3=N0?V$.filter((g)=>nZ.includes(g)):[],sz=eM($.prompt,N,G,N0),oz=N0?void 0:{sourceHint:$O(N,G)},n0;try{let g=_==="none"?Q:void 0;n0=await v$.run(sz,{tool:X,cwd:X.path,context:g,docs:WX,requiredSources:G3,discovery:oz,restrictBuiltinTools:_==="none"?void 0:b,webTools:R&&c?{search:P?.webSearch===!0}:void 0,onProgress:z.onProgress})}catch(g){F.push({cell:{interface:O,source:N,toolset:_},answerable:"NO",confidence:0,response:"",reason:`Error in cell: ${g instanceof Error?g.message:String(g)}`,citations:null,traps:{fired:[],avoided:($.traps??[]).map((z0)=>z0.id)},error:g instanceof Error?g.message:String(g)});continue}let VX=O$({response:n0.response,traps:$.traps??[]}),tz=VX.fired.length>0,H$=$.requiredSources!==void 0&&N0?b1({response:n0.response,requiredSources:G3,registeredIds:nZ}):null,G0=$.expected!==void 0?n6({response:n0.response,expected:$.expected}):null,R2=_!=="none"&&i.length>0?{expected:k,used:(n0.toolsUsed??[]).filter((g)=>i.some((z0)=>z0(g)))}:null,ez=R2!==null&&R2.used.length===0,$J=()=>{let g=[];if(H$)g.push(H$.reason);if(G0){let z0=G0.includes.filter((Q0)=>!Q0.satisfied).map((Q0)=>`"${Q0.value}"`),S0=G0.excludes.filter((Q0)=>!Q0.satisfied).map((Q0)=>`"${Q0.value}"`),g$=[];if(z0.length>0)g$.push(`missing includes: ${z0.join(", ")}`);if(S0.length>0)g$.push(`hit excludes: ${S0.join(", ")}`);g.push(g$.length>0?g$.join("; "):`expected checks satisfied (${G0.satisfied}/${G0.total})`)}return g.filter((z0)=>z0.length>0)},A2,S2,C2;if(tz)A2="NO",S2=0,C2=`Trap fired: ${VX.fired.map((g)=>`"${g.id}" (${g.reason})`).join("; ")}`;else if(ez){let g=R2;A2="NO",S2=0;let z0=`Provenance failed: toolset "${_}" configured but none of [${g.expected.join(", ")}] were used (answer rests on model prior knowledge)`,S0=$J();C2=S0.length>0?`${z0} | ${S0.join(" | ")}`:z0}else{let g=[],z0=[];if(H$)g.push({answerable:H$.answerable,confidence:H$.confidence}),z0.push(H$.reason);if(G0){let S0=G0.total===0?100:Math.round(G0.satisfied/G0.total*100),g$=S0===100?"YES":S0===0?"NO":"PARTIAL";g.push({answerable:g$,confidence:S0});let Q0=G0.includes.filter((u$)=>!u$.satisfied).map((u$)=>`"${u$.value}"`),h$=G0.excludes.filter((u$)=>!u$.satisfied).map((u$)=>`"${u$.value}"`),rZ=[];if(Q0.length>0)rZ.push(`missing includes: ${Q0.join(", ")}`);if(h$.length>0)rZ.push(`hit excludes: ${h$.join(", ")}`);z0.push(rZ.length>0?rZ.join("; "):`expected checks satisfied (${G0.satisfied}/${G0.total})`)}if(R2)z0.push(`tool use verified (${R2.used.join(", ")})`);if(g.length===0)A2="YES",S2=100,C2="No traps fired; no other contract declared";else{let S0={YES:0,PARTIAL:1,NO:2};A2=g.reduce((Q0,h$)=>S0[h$.answerable]>S0[Q0.answerable]?h$:Q0).answerable,S2=Math.round(g.reduce((Q0,h$)=>Q0+h$.confidence,0)/g.length),C2=z0.filter((Q0)=>Q0.length>0).join(" | ")}}F.push({cell:{interface:O,source:N,toolset:_},answerable:A2,confidence:S2,response:n0.response,reason:C2,citations:H$?H$.citations:null,traps:VX,expected:G0?{includes:G0.includes,excludes:G0.excludes,satisfied:G0.satisfied,total:G0.total}:void 0,toolsUsed:n0.toolsUsed,allResponses:n0.allResponses}),T=n0.metadata??T}}}let w=tM($,G);return{scenario:$,answerable:null,confidence:null,response:null,reason:null,citations:null,traps:null,cells:F,verifierSamples:w,target:T,context:{name:Z}}}function tM($,Z){let X=$.verifiers?.sources;if(!X||X.length===0)return;let Y=new Map(Z.map((z)=>[z.id,z])),G=[];for(let z of X){let J=Y.get(z);if(!J)continue;G.push({id:J.id,name:J.name,content:J.content})}return G.length>0?G:void 0}function eM($,Z,X,Y){if(Y)return $;if(Z===null||Z==="none")return $;let G=X.find((J)=>J.id===Z);if(!G)return $;let z=G.type==="url"?`The canonical source for this question is the documentation at ${G.source}. Use your available tools to research it.`:`The canonical source for this question is "${G.name}" (registered locally as ${G.id}). Use your available tools to research from authoritative sources.`;return`${$}
|
|
268
|
+
${$}`,V=vM(yM(),`pickled-codex-${bM()}.txt`),K=["--ask-for-approval","never","exec","--json","--sandbox","read-only","--ignore-user-config","--ignore-rules","--ephemeral","--skip-git-repo-check","--cd",Y,"--model",J,"--output-last-message",V,"-"];try{let H=await this.spawnFn(this.binary,K,{cwd:Y,stdin:W}),{allResponses:B,toolsUsed:U}=hM(H.stdout);if(H.exitCode!==0)throw Error(`codex exec failed (exit ${H.exitCode}): ${H.stderr.trim()||"no stderr output"}`);let F="";try{F=(await this.readFileFn(V)).trim()}catch{F=B.length>0?B[B.length-1].text:""}if(B.length>0)B[B.length-1].type="final";return{response:F,allResponses:B,toolsUsed:U,sources:[],metadata:{model:J,category:this.category,provider:this.provider,target:this.name}}}finally{await fM(V,{force:!0})}}}function hM($){let Z=[],X=[];for(let Y of $.split(`
|
|
269
|
+
`)){let G=Y.trim();if(!G)continue;let z;try{z=JSON.parse(G)}catch{continue}let J=uM(z);if(J){let W=Z.length===0?"initial":"intermediate";Z.push({type:W,text:J})}let Q=mM(z);if(Q&&!X.includes(Q))X.push(Q)}return{allResponses:Z,toolsUsed:X}}function uM($){if(!$||typeof $!=="object")return null;let Z=$;if(typeof Z.message==="string")return Z.message;if(typeof Z.content==="string")return Z.content;if(typeof Z.type==="string"&&(Z.type==="assistant_message"||Z.type==="agent_message")&&typeof Z.text==="string")return Z.text;if(Z.type==="message"&&typeof Z.text==="string")return Z.text;return null}function mM($){if(!$||typeof $!=="object")return null;let Z=$;if(typeof Z.tool==="string")return Z.tool;if(typeof Z.type==="string"&&(Z.type==="tool_call"||Z.type==="function_call")&&typeof Z.name==="string")return Z.name;return null}var dM=async($,Z,X)=>{let Y=Bun.spawn([$,...Z],{cwd:X.cwd,stdin:"pipe",stdout:"pipe",stderr:"pipe"});Y.stdin.write(X.stdin),await Y.stdin.end();let[G,z,J]=await Promise.all([new Response(Y.stdout).text(),new Response(Y.stderr).text(),Y.exited]);return{exitCode:J,stdout:G,stderr:z}},cM=async($)=>{let Z=Bun.file($);if(!await Z.exists())throw Error(`Last-message file not found: ${$}`);return Z.text()};function ZX($,Z){let X=Z??h0;switch(X.category){case"cli":return lM($,X);case"api":return pM($,X);case"ide":throw Error("IDE targets not yet implemented. Coming soon!");default:throw Error(`Unknown target category: ${X.category}`)}}function lM($,Z){switch(Z.provider){case"claude-code":return new l5($,Z);case"codex-cli":return new p5($,Z);case"gemini-cli":throw Error("Gemini CLI target not yet implemented. Coming soon!");case"amazon-q":throw Error("Amazon Q target not yet implemented. Coming soon!");default:throw Error(`Unknown CLI provider: ${Z.provider}`)}}function pM($,Z){switch(Z.provider){case"anthropic":return new z5($,Z);case"openai":return new c5($,Z);case"google":throw Error("Google API target not yet implemented. Coming soon!");default:throw Error(`Unknown API provider: ${Z.provider}`)}}var P2="default";function XX($,Z){if(!$||$===P2)return{name:P2,config:h0};if(Z?.[$])return{name:$,config:Z[$]};return console.warn(`Target "${$}" not found, using default`),{name:P2,config:h0}}function i5($,Z){if(!$||$===P2)return{name:P2,config:{}};if(Z?.[$])return{name:$,config:Z[$]};return console.warn(`Context "${$}" not found, using default`),{name:P2,config:{}}}var YX="__matrix__";function iM($){let Z=[],X=$.matrix?.target??["default"],Y=$.matrix?.context??["default"];for(let G of $.scenarios){let z=G.context?[G.context]:Y;if(G.matrix){for(let Q of z)Z.push({scenario:G,targetName:YX,contextName:Q});continue}let J=G.target?[G.target]:X;for(let Q of J)for(let W of z)Z.push({scenario:G,targetName:Q,contextName:W})}return Z}function aM($,Z,X){let Y=[];for(let{scenario:G,targetName:z,contextName:J}of $)if(G.matrix&&z===YX){let Q=G.matrix,W=G.target??"default",V=Q.interfaces??[W],K=Q.sources??[null],H=Q.toolsets??["none"];for(let B of V){if(X.interface&&X.interface!==B)continue;for(let U of K){if(X.source!==void 0&&X.source!==(U??""))continue;for(let F of H){if(X.toolset&&X.toolset!==F)continue;Y.push({scenario:G.name,kind:"matrix",interface:B,source:U,toolset:F})}}}}else Y.push({scenario:G.name,kind:"single",target:z,context:J});return Y}function a5($){if($.kind==="matrix")return`m:${$.scenario}\x01${$.interface}\x01${$.source??""}\x01${$.toolset}`;return`s:${$.scenario}\x01${$.target}\x01${$.context}`}function nM($){let{tool:Z,docs:X,expandedCells:Y,selectedCells:G,seed:z}=$;return{tool:{name:Z.name,description:Z.description,path:Z.path},docs:X,scenarios:[],summary:{total:0,answered:0,unanswered:0,score:0},plan:{expandedCells:Y,selectedCells:G.length,seed:z,cells:G.map((J)=>J.kind==="matrix"?{scenario:J.scenario,interface:J.interface,source:J.source,toolset:J.toolset}:{scenario:J.scenario,target:J.target,context:J.context})}}}async function n5($,Z,X={}){let{onProgress:Y}=X,G=[];if(Z.scenarios.length===0)throw Error("No scenarios defined in config");let z=Z.docs?.sources??{},J=[];if(Object.keys(z).length>0){Y?.("Loading sources..."),J=await Z4(z,$.path);for(let w of J)Y?.(` [${w.id}] ${w.name}`);Y?.("")}let Q=J.map((w)=>w.id),W=iM(Z);if(X.scenarioFilter&&X.scenarioFilter.length>0){let w=new Set(X.scenarioFilter);if(W=W.filter((O)=>w.has(O.scenario.name)),W.length===0)throw Error(`No scenarios matched filter: ${[...w].join(", ")}. Declared scenarios: ${Z.scenarios.map((O)=>O.name).join(", ")}`)}let V=X.cellFilter??{},K=aM(W,Z,V),H=K,B;if(X.sample!==void 0){let w=X.seed??"default",O=K.filter((P)=>P.kind==="matrix"),N=dY(O,X.sample,w),_=new Set(N.map(a5));H=K.filter((P)=>P.kind!=="matrix"||_.has(a5(P))),B=w}if(X.maxCells!==void 0&&H.length>X.maxCells)throw Error(`Matrix expands to ${H.length} cells, exceeding --max-cells ${X.maxCells}. Add --interface/--source/--toolset/--scenario filters, or pass --sample N to sample per scenario.`);if(X.plan)return nM({tool:$,docs:J,expandedCells:K.length,selectedCells:H,seed:B});let U=X.sample!==void 0?new Set(H.filter((w)=>w.kind==="matrix").map(a5)):void 0,F="";for(let{scenario:w,targetName:O,contextName:N}of W){let _=rM(O,N);if(w.name!==F){if(F)Y?.("");Y?.(`"${w.name}"`),F=w.name}try{let P=await sM(w,O,N,$,Z,J,Q,X,U);G.push(P);let R=_?_.padEnd(18):"";if(P.cells){Y?.(` ${R} (matrix mode)`);for(let A of P.cells){let S=e0(A),k=` [${[A.cell.interface,A.cell.source??"-",A.cell.toolset].join(" \xB7 ")}]`.padEnd(40);Y?.(`${k} ${S.icon} ${S.label} (${S.confidence}%)`)}}else if(P.surfaces){Y?.(` ${R} (compare-surfaces mode)`);for(let A of P.surfaces){let S=e0(A),D=` [${A.active.join(",")}]`.padEnd(22);Y?.(`${D} ${S.icon} ${S.label} (${S.confidence}%)`)}}else{let A=e0({answerable:P.answerable??"NO",confidence:P.confidence??0,traps:P.traps??{fired:[],avoided:[]},error:P.error});Y?.(` ${R} ${A.icon} ${A.label} (${A.confidence}%)`)}}catch(P){let R=O==="default"?h0:Z.targets?.[O]??h0,A=w.requiredSources??[],S={scenario:w,answerable:"NO",confidence:0,response:"",reason:"Error during run",citations:{cited:[],required:A,missing:A,unknown:[]},traps:{fired:[],avoided:(w.traps??[]).map((k)=>k.id)},error:P instanceof Error?P.message:String(P),target:{target:O===YX?"matrix":O,category:R.category,provider:R.provider,model:R.model??"unknown"},context:{name:N}};G.push(S);let D=_?_.padEnd(18):"";Y?.(` ${D} \u2717 Error`)}}Y?.("");let T=oM($,J,G);return T.plan={expandedCells:K.length,selectedCells:H.length,seed:B},T}function rM($,Z){if($==="default"&&Z==="default")return"";if(Z==="default")return`[${$}]`;return`[${$}/${Z}]`}async function sM($,Z,X,Y,G,z,J,Q,W){if($.matrix&&Z===YX)return tM($,X,Y,G,z,Q,W);let{config:V}=XX(Z,G.targets),{config:K}=i5(X,G.contexts),H=Q.targetFactory?Q.targetFactory(Z,V):ZX(Z,V);if($.compareSurfaces&&$.compareSurfaces.length>0){let R=[],A;for(let S of $.compareSurfaces){let D=new Set(S),k=z.filter((V$)=>D.has(V$.id)),i=($.requiredSources??[]).filter((V$)=>D.has(V$)),c=await H.run($.prompt,{tool:Y,cwd:Y.path,context:K,docs:k,requiredSources:i,onProgress:Q.onProgress}),M0=b1({response:c.response,requiredSources:i,registeredIds:S}),v$=O$({response:c.response,traps:$.traps??[]}),N0=v$.fired.length>0,aZ=N0?"NO":M0.answerable,WX=N0?0:M0.confidence,nZ=N0?`Trap fired: ${v$.fired.map((V$)=>`"${V$.id}" (${V$.reason})`).join("; ")}`:M0.reason;R.push({active:S,answerable:aZ,confidence:WX,response:c.response,reason:nZ,citations:M0.citations,traps:v$,allResponses:c.allResponses}),A=c.metadata??A}return{scenario:$,answerable:null,confidence:null,response:null,reason:null,citations:null,traps:null,surfaces:R,target:A,context:{name:X}}}let B=$.requiredSources??[],U=await H.run($.prompt,{tool:Y,cwd:Y.path,context:K,docs:z,requiredSources:B,onProgress:Q.onProgress}),F=O$({response:U.response,traps:$.traps??[]}),T=F.fired.length>0,w=$.requiredSources!==void 0?b1({response:U.response,requiredSources:B,registeredIds:J}):null,O=$.expected!==void 0?n6({response:U.response,expected:$.expected}):null,N,_,P;if(T)N="NO",_=0,P=`Trap fired: ${F.fired.map((R)=>`"${R.id}" (${R.reason})`).join("; ")}`;else{let R=[],A=[];if(w)R.push({answerable:w.answerable,confidence:w.confidence}),A.push(w.reason);if(O){let S=O.total===0?100:Math.round(O.satisfied/O.total*100),D=S===100?"YES":S===0?"NO":"PARTIAL";R.push({answerable:D,confidence:S})}if(R.length===0)N="YES",_=100,P="No traps fired; no other contract declared";else{let S={YES:0,PARTIAL:1,NO:2};N=R.reduce((k,b)=>S[b.answerable]>S[k.answerable]?b:k).answerable,_=Math.round(R.reduce((k,b)=>k+b.confidence,0)/R.length),P=A.filter((k)=>k.length>0).join(" | ")}}return{scenario:$,answerable:N,confidence:_,response:U.response,reason:P,citations:w?w.citations:{cited:[],required:B,missing:[],unknown:[]},traps:F,target:U.metadata,context:{name:X},toolsUsed:U.toolsUsed,sources:U.sources,allResponses:U.allResponses}}function oM($,Z,X){let Y=[];for(let Q of X){if(Q.surfaces){for(let W of Q.surfaces)Y.push({answerable:W.answerable,confidence:W.confidence});continue}if(Q.cells){for(let W of Q.cells)Y.push({answerable:W.answerable,confidence:W.confidence});continue}if(Q.answerable!==null&&Q.confidence!==null)Y.push({answerable:Q.answerable,confidence:Q.confidence})}let G=Y.length,z=Y.filter((Q)=>Q.answerable==="YES"||Q.answerable==="PARTIAL").length,J=G>0?Math.round(Y.reduce((Q,W)=>{if(W.answerable==="YES")return Q+W.confidence;if(W.answerable==="PARTIAL")return Q+W.confidence*0.5;return Q},0)/G):0;return{tool:{name:$.name,description:$.description,path:$.path},docs:Z,scenarios:X,summary:{total:G,answered:z,unanswered:G-z,score:J}}}async function tM($,Z,X,Y,G,z,J){let{config:Q}=i5(Z,Y.contexts),W=$.matrix??{},V=$.target??"default",K=W.interfaces??[V],H=W.sources??[null],B=W.toolsets??["none"],U=z.cellFilter??{},F=[],T;for(let O of K){if(U.interface&&U.interface!==O)continue;for(let N of H){if(U.source!==void 0&&U.source!==(N??""))continue;for(let _ of B){if(U.toolset&&U.toolset!==_)continue;if(J!==void 0){let g=`m:${$.name}\x01${O}\x01${N??""}\x01${_}`;if(!J.has(g))continue}let P=_==="none"?null:Y.toolsets?.[_]??null,R=_!=="none"&&(P?.webSearch===!0||P?.webFetch===!0),A=_!=="none"&&P?.mcpServers?Object.keys(P.mcpServers):[],S=A.length>0,{config:D}=XX(O,Y.targets);if(_!=="none"){if(R&&S)throw Error(`Toolset "${_}" mixes webSearch/webFetch with mcpServers; declare separate toolsets per shape so provenance can be attributed to one tool path.`);if(!R&&!S)throw Error(`Toolset "${_}" is declared but defines no runtime shape. Supported today: "none", web (webSearch/webFetch flags), MCP (mcpServers map). Other adapters (Firecrawl, native API search) land per release.`);if(S&&D.provider!=="claude-code"&&D.provider!=="openai")throw Error(`Toolset "${_}" (MCP) is implemented on claude-code and openai interfaces today. Interface "${O}" uses provider "${D.provider}"; rerun with a supported interface or use toolset "none".`);if(R&&D.provider!=="claude-code"&&D.provider!=="anthropic"&&D.provider!=="openai")throw Error(`Toolset "${_}" (web) is implemented on claude-code, anthropic, and openai interfaces today. Interface "${O}" uses provider "${D.provider}"; rerun with a supported interface or use toolset "none".`);if(R&&(D.provider==="anthropic"||D.provider==="openai")&&!P?.webSearch)throw Error(`Toolset "${_}" on ${D.provider} provider requires webSearch: true. The ${D.provider} API exposes a single server-side web tool; declare webSearch to enable it, or split web/fetch behaviour across separate toolsets.`)}let k=[],b=[],i=[],c=D.provider==="anthropic"||D.provider==="openai";if(R)if(c)i.push((g)=>g==="web_search");else{if(P?.webSearch)k.push("WebSearch"),b.push("WebSearch"),i.push((g)=>g==="WebSearch");if(P?.webFetch)k.push("WebFetch"),b.push("WebFetch"),i.push((g)=>g==="WebFetch")}if(S)for(let g of A)k.push(`mcp__${g}__*`),i.push((z0)=>z0.startsWith(`mcp__${g}__`));let M0=_==="none"||c?D:{...D,allowedTools:k,disallowedTools:[],mcpServers:S?P?.mcpServers:void 0,maxTurns:Math.max(D.maxTurns??0,15)},v$=z.targetFactory?z.targetFactory(O,M0):ZX(O,M0),N0=_==="none",aZ=N==="none",WX=aZ||!N0?[]:N===null?G:G.filter((g)=>g.id===N),nZ=aZ||!N0?[]:N===null?G.map((g)=>g.id):[N],V$=$.requiredSources??[],G3=N0?V$.filter((g)=>nZ.includes(g)):[],sz=$O($.prompt,N,G,N0),oz=N0?void 0:{sourceHint:ZO(N,G)},n0;try{let g=_==="none"?Q:void 0;n0=await v$.run(sz,{tool:X,cwd:X.path,context:g,docs:WX,requiredSources:G3,discovery:oz,restrictBuiltinTools:_==="none"?void 0:b,webTools:R&&c?{search:P?.webSearch===!0}:void 0,mcpTools:S&&D.provider==="openai"&&P?.mcpServers?{servers:P.mcpServers}:void 0,onProgress:z.onProgress})}catch(g){F.push({cell:{interface:O,source:N,toolset:_},answerable:"NO",confidence:0,response:"",reason:`Error in cell: ${g instanceof Error?g.message:String(g)}`,citations:null,traps:{fired:[],avoided:($.traps??[]).map((z0)=>z0.id)},error:g instanceof Error?g.message:String(g)});continue}let VX=O$({response:n0.response,traps:$.traps??[]}),tz=VX.fired.length>0,H$=$.requiredSources!==void 0&&N0?b1({response:n0.response,requiredSources:G3,registeredIds:nZ}):null,G0=$.expected!==void 0?n6({response:n0.response,expected:$.expected}):null,R2=_!=="none"&&i.length>0?{expected:k,used:(n0.toolsUsed??[]).filter((g)=>i.some((z0)=>z0(g)))}:null,ez=R2!==null&&R2.used.length===0,$J=()=>{let g=[];if(H$)g.push(H$.reason);if(G0){let z0=G0.includes.filter((Q0)=>!Q0.satisfied).map((Q0)=>`"${Q0.value}"`),S0=G0.excludes.filter((Q0)=>!Q0.satisfied).map((Q0)=>`"${Q0.value}"`),g$=[];if(z0.length>0)g$.push(`missing includes: ${z0.join(", ")}`);if(S0.length>0)g$.push(`hit excludes: ${S0.join(", ")}`);g.push(g$.length>0?g$.join("; "):`expected checks satisfied (${G0.satisfied}/${G0.total})`)}return g.filter((z0)=>z0.length>0)},A2,S2,C2;if(tz)A2="NO",S2=0,C2=`Trap fired: ${VX.fired.map((g)=>`"${g.id}" (${g.reason})`).join("; ")}`;else if(ez){let g=R2;A2="NO",S2=0;let z0=`Provenance failed: toolset "${_}" configured but none of [${g.expected.join(", ")}] were used (answer rests on model prior knowledge)`,S0=$J();C2=S0.length>0?`${z0} | ${S0.join(" | ")}`:z0}else{let g=[],z0=[];if(H$)g.push({answerable:H$.answerable,confidence:H$.confidence}),z0.push(H$.reason);if(G0){let S0=G0.total===0?100:Math.round(G0.satisfied/G0.total*100),g$=S0===100?"YES":S0===0?"NO":"PARTIAL";g.push({answerable:g$,confidence:S0});let Q0=G0.includes.filter((u$)=>!u$.satisfied).map((u$)=>`"${u$.value}"`),h$=G0.excludes.filter((u$)=>!u$.satisfied).map((u$)=>`"${u$.value}"`),rZ=[];if(Q0.length>0)rZ.push(`missing includes: ${Q0.join(", ")}`);if(h$.length>0)rZ.push(`hit excludes: ${h$.join(", ")}`);z0.push(rZ.length>0?rZ.join("; "):`expected checks satisfied (${G0.satisfied}/${G0.total})`)}if(R2)z0.push(`tool use verified (${R2.used.join(", ")})`);if(g.length===0)A2="YES",S2=100,C2="No traps fired; no other contract declared";else{let S0={YES:0,PARTIAL:1,NO:2};A2=g.reduce((Q0,h$)=>S0[h$.answerable]>S0[Q0.answerable]?h$:Q0).answerable,S2=Math.round(g.reduce((Q0,h$)=>Q0+h$.confidence,0)/g.length),C2=z0.filter((Q0)=>Q0.length>0).join(" | ")}}F.push({cell:{interface:O,source:N,toolset:_},answerable:A2,confidence:S2,response:n0.response,reason:C2,citations:H$?H$.citations:null,traps:VX,expected:G0?{includes:G0.includes,excludes:G0.excludes,satisfied:G0.satisfied,total:G0.total}:void 0,toolsUsed:n0.toolsUsed,allResponses:n0.allResponses}),T=n0.metadata??T}}}let w=eM($,G);return{scenario:$,answerable:null,confidence:null,response:null,reason:null,citations:null,traps:null,cells:F,verifierSamples:w,target:T,context:{name:Z}}}function eM($,Z){let X=$.verifiers?.sources;if(!X||X.length===0)return;let Y=new Map(Z.map((z)=>[z.id,z])),G=[];for(let z of X){let J=Y.get(z);if(!J)continue;G.push({id:J.id,name:J.name,content:J.content})}return G.length>0?G:void 0}function $O($,Z,X,Y){if(Y)return $;if(Z===null||Z==="none")return $;let G=X.find((J)=>J.id===Z);if(!G)return $;let z=G.type==="url"?`The canonical source for this question is the documentation at ${G.source}. Use your available tools to research it.`:`The canonical source for this question is "${G.name}" (registered locally as ${G.id}). Use your available tools to research from authoritative sources.`;return`${$}
|
|
270
270
|
|
|
271
|
-
${z}`}function
|
|
271
|
+
${z}`}function ZO($,Z){if($===null||$==="none")return null;let X=Z.find((Y)=>Y.id===$);if(!X)return null;if(X.type==="url")return X.source;return X.name}var Cz=($=0)=>(Z)=>`\x1B[${Z+$}m`,Iz=($=0)=>(Z)=>`\x1B[${38+$};5;${Z}m`,jz=($=0)=>(Z,X,Y)=>`\x1B[${38+$};2;${Z};${X};${Y}m`,a={modifier:{reset:[0,0],bold:[1,22],dim:[2,22],italic:[3,23],underline:[4,24],overline:[53,55],inverse:[7,27],hidden:[8,28],strikethrough:[9,29]},color:{black:[30,39],red:[31,39],green:[32,39],yellow:[33,39],blue:[34,39],magenta:[35,39],cyan:[36,39],white:[37,39],blackBright:[90,39],gray:[90,39],grey:[90,39],redBright:[91,39],greenBright:[92,39],yellowBright:[93,39],blueBright:[94,39],magentaBright:[95,39],cyanBright:[96,39],whiteBright:[97,39]},bgColor:{bgBlack:[40,49],bgRed:[41,49],bgGreen:[42,49],bgYellow:[43,49],bgBlue:[44,49],bgMagenta:[45,49],bgCyan:[46,49],bgWhite:[47,49],bgBlackBright:[100,49],bgGray:[100,49],bgGrey:[100,49],bgRedBright:[101,49],bgGreenBright:[102,49],bgYellowBright:[103,49],bgBlueBright:[104,49],bgMagentaBright:[105,49],bgCyanBright:[106,49],bgWhiteBright:[107,49]}},Ff=Object.keys(a.modifier),XO=Object.keys(a.color),YO=Object.keys(a.bgColor),Lf=[...XO,...YO];function GO(){let $=new Map;for(let[Z,X]of Object.entries(a)){for(let[Y,G]of Object.entries(X))a[Y]={open:`\x1B[${G[0]}m`,close:`\x1B[${G[1]}m`},X[Y]=a[Y],$.set(G[0],G[1]);Object.defineProperty(a,Z,{value:X,enumerable:!1})}return Object.defineProperty(a,"codes",{value:$,enumerable:!1}),a.color.close="\x1B[39m",a.bgColor.close="\x1B[49m",a.color.ansi=Cz(),a.color.ansi256=Iz(),a.color.ansi16m=jz(),a.bgColor.ansi=Cz(10),a.bgColor.ansi256=Iz(10),a.bgColor.ansi16m=jz(10),Object.defineProperties(a,{rgbToAnsi256:{value(Z,X,Y){if(Z===X&&X===Y){if(Z<8)return 16;if(Z>248)return 231;return Math.round((Z-8)/247*24)+232}return 16+36*Math.round(Z/255*5)+6*Math.round(X/255*5)+Math.round(Y/255*5)},enumerable:!1},hexToRgb:{value(Z){let X=/[a-f\d]{6}|[a-f\d]{3}/i.exec(Z.toString(16));if(!X)return[0,0,0];let[Y]=X;if(Y.length===3)Y=[...Y].map((z)=>z+z).join("");let G=Number.parseInt(Y,16);return[G>>16&255,G>>8&255,G&255]},enumerable:!1},hexToAnsi256:{value:(Z)=>a.rgbToAnsi256(...a.hexToRgb(Z)),enumerable:!1},ansi256ToAnsi:{value(Z){if(Z<8)return 30+Z;if(Z<16)return 90+(Z-8);let X,Y,G;if(Z>=232)X=((Z-232)*10+8)/255,Y=X,G=X;else{Z-=16;let Q=Z%36;X=Math.floor(Z/36)/5,Y=Math.floor(Q/6)/5,G=Q%6/5}let z=Math.max(X,Y,G)*2;if(z===0)return 30;let J=30+(Math.round(G)<<2|Math.round(Y)<<1|Math.round(X));if(z===2)J+=60;return J},enumerable:!1},rgbToAnsi:{value:(Z,X,Y)=>a.ansi256ToAnsi(a.rgbToAnsi256(Z,X,Y)),enumerable:!1},hexToAnsi:{value:(Z)=>a.ansi256ToAnsi(a.hexToAnsi256(Z)),enumerable:!1}}),a}var zO=GO(),g0=zO;import r5 from"process";import JO from"os";import xz from"tty";function b0($,Z=globalThis.Deno?globalThis.Deno.args:r5.argv){let X=$.startsWith("-")?"":$.length===1?"-":"--",Y=Z.indexOf(X+$),G=Z.indexOf("--");return Y!==-1&&(G===-1||Y<G)}var{env:n}=r5,GX;if(b0("no-color")||b0("no-colors")||b0("color=false")||b0("color=never"))GX=0;else if(b0("color")||b0("colors")||b0("color=true")||b0("color=always"))GX=1;function QO(){if("FORCE_COLOR"in n){if(n.FORCE_COLOR==="true")return 1;if(n.FORCE_COLOR==="false")return 0;return n.FORCE_COLOR.length===0?1:Math.min(Number.parseInt(n.FORCE_COLOR,10),3)}}function WO($){if($===0)return!1;return{level:$,hasBasic:!0,has256:$>=2,has16m:$>=3}}function VO($,{streamIsTTY:Z,sniffFlags:X=!0}={}){let Y=QO();if(Y!==void 0)GX=Y;let G=X?GX:Y;if(G===0)return 0;if(X){if(b0("color=16m")||b0("color=full")||b0("color=truecolor"))return 3;if(b0("color=256"))return 2}if("TF_BUILD"in n&&"AGENT_NAME"in n)return 1;if($&&!Z&&G===void 0)return 0;let z=G||0;if(n.TERM==="dumb")return z;if(r5.platform==="win32"){let J=JO.release().split(".");if(Number(J[0])>=10&&Number(J[2])>=10586)return Number(J[2])>=14931?3:2;return 1}if("CI"in n){if(["GITHUB_ACTIONS","GITEA_ACTIONS","CIRCLECI"].some((J)=>(J in n)))return 3;if(["TRAVIS","APPVEYOR","GITLAB_CI","BUILDKITE","DRONE"].some((J)=>(J in n))||n.CI_NAME==="codeship")return 1;return z}if("TEAMCITY_VERSION"in n)return/^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(n.TEAMCITY_VERSION)?1:0;if(n.COLORTERM==="truecolor")return 3;if(n.TERM==="xterm-kitty")return 3;if(n.TERM==="xterm-ghostty")return 3;if(n.TERM==="wezterm")return 3;if("TERM_PROGRAM"in n){let J=Number.parseInt((n.TERM_PROGRAM_VERSION||"").split(".")[0],10);switch(n.TERM_PROGRAM){case"iTerm.app":return J>=3?3:2;case"Apple_Terminal":return 2}}if(/-256(color)?$/i.test(n.TERM))return 2;if(/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(n.TERM))return 1;if("COLORTERM"in n)return 1;return z}function kz($,Z={}){let X=VO($,{streamIsTTY:$&&$.isTTY,...Z});return WO(X)}var HO={stdout:kz({isTTY:xz.isatty(1)}),stderr:kz({isTTY:xz.isatty(2)})},bz=HO;function fz($,Z,X){let Y=$.indexOf(Z);if(Y===-1)return $;let G=Z.length,z=0,J="";do J+=$.slice(z,Y)+Z+X,z=Y+G,Y=$.indexOf(Z,z);while(Y!==-1);return J+=$.slice(z),J}function yz($,Z,X,Y){let G=0,z="";do{let J=$[Y-1]==="\r";z+=$.slice(G,J?Y-1:Y)+Z+(J?`\r
|
|
272
272
|
`:`
|
|
273
273
|
`)+X,G=Y+1,Y=$.indexOf(`
|
|
274
|
-
`,G)}while(Y!==-1);return z+=$.slice(G),z}var{stdout:vz,stderr:gz}=bz,s5=Symbol("GENERATOR"),D2=Symbol("STYLER"),pZ=Symbol("IS_EMPTY"),hz=["ansi","ansi","ansi256","ansi16m"],E2=Object.create(null),
|
|
275
|
-
`);if(z!==-1)Z=yz(Z,G,Y,z);return Y+Z+G};Object.defineProperties(iZ.prototype,E2);var
|
|
276
|
-
`)}if(
|
|
277
|
-
`)}function Y3($,Z={}){console.log(lz($,Z)),console.log()}function QX($,Z={}){if(Z.verbose)return JSON.stringify($,null,2);let X={...$,docs:$.docs.map((Y)=>({...Y,content:""})),scenarios:$.scenarios.map((Y)=>{let{allResponses:G,verifierSamples:z,cells:J,surfaces:Q,...W}=Y;return{...W,verifierSamples:z?.map((V)=>({...V,content:""})),cells:J?.map((V)=>{let{allResponses:K,...H}=V;return H}),surfaces:Q?.map((V)=>{let{allResponses:K,...H}=V;return H})}})};return JSON.stringify(X,null,2)}function
|
|
278
|
-
`);let Q=z.findings.filter((H)=>H.severity==="error").length,W=z.findings.filter((H)=>H.severity==="warning").length;if(Y!=="json"&&!X)if(console.log(),Q===0&&W===0)console.log(j.green("Audit clean. No issues found."));else{let H=Q>0?j.red:j.yellow;console.log(H(`Audit found ${Q} error(s), ${W} warning(s).`))}if((Z.failOn??"error")==="warning"?Q+W>0:Q>0)process.exit(1)}function
|
|
279
|
-
`);else Y3(T,{threshold:V});if(w){if(X||Y)console.error(j.red(`Overall: ${T.summary.score} / 100 \xB7 threshold ${V} \xB7 run fails`)),console.error(j.dim("Review failed scenarios before trusting this surface."));process.exit(1)}}function
|
|
274
|
+
`,G)}while(Y!==-1);return z+=$.slice(G),z}var{stdout:vz,stderr:gz}=bz,s5=Symbol("GENERATOR"),D2=Symbol("STYLER"),pZ=Symbol("IS_EMPTY"),hz=["ansi","ansi","ansi256","ansi16m"],E2=Object.create(null),KO=($,Z={})=>{if(Z.level&&!(Number.isInteger(Z.level)&&Z.level>=0&&Z.level<=3))throw Error("The `level` option should be an integer from 0 to 3");let X=vz?vz.level:0;$.level=Z.level===void 0?X:Z.level};var BO=($)=>{let Z=(...X)=>X.join(" ");return KO(Z,$),Object.setPrototypeOf(Z,iZ.prototype),Z};function iZ($){return BO($)}Object.setPrototypeOf(iZ.prototype,Function.prototype);for(let[$,Z]of Object.entries(g0))E2[$]={get(){let X=zX(this,t5(Z.open,Z.close,this[D2]),this[pZ]);return Object.defineProperty(this,$,{value:X}),X}};E2.visible={get(){let $=zX(this,this[D2],!0);return Object.defineProperty(this,"visible",{value:$}),$}};var o5=($,Z,X,...Y)=>{if($==="rgb"){if(Z==="ansi16m")return g0[X].ansi16m(...Y);if(Z==="ansi256")return g0[X].ansi256(g0.rgbToAnsi256(...Y));return g0[X].ansi(g0.rgbToAnsi(...Y))}if($==="hex")return o5("rgb",Z,X,...g0.hexToRgb(...Y));return g0[X][$](...Y)},UO=["rgb","hex","ansi256"];for(let $ of UO){E2[$]={get(){let{level:X}=this;return function(...Y){let G=t5(o5($,hz[X],"color",...Y),g0.color.close,this[D2]);return zX(this,G,this[pZ])}}};let Z="bg"+$[0].toUpperCase()+$.slice(1);E2[Z]={get(){let{level:X}=this;return function(...Y){let G=t5(o5($,hz[X],"bgColor",...Y),g0.bgColor.close,this[D2]);return zX(this,G,this[pZ])}}}}var qO=Object.defineProperties(()=>{},{...E2,level:{enumerable:!0,get(){return this[s5].level},set($){this[s5].level=$}}}),t5=($,Z,X)=>{let Y,G;if(X===void 0)Y=$,G=Z;else Y=X.openAll+$,G=Z+X.closeAll;return{open:$,close:Z,openAll:Y,closeAll:G,parent:X}},zX=($,Z,X)=>{let Y=(...G)=>TO(Y,G.length===1?""+G[0]:G.join(" "));return Object.setPrototypeOf(Y,qO),Y[s5]=$,Y[D2]=Z,Y[pZ]=X,Y},TO=($,Z)=>{if($.level<=0||!Z)return $[pZ]?"":Z;let X=$[D2];if(X===void 0)return Z;let{openAll:Y,closeAll:G}=X;if(Z.includes("\x1B"))while(X!==void 0)Z=fz(Z,X.close,X.open),X=X.parent;let z=Z.indexOf(`
|
|
275
|
+
`);if(z!==-1)Z=yz(Z,G,Y,z);return Y+Z+G};Object.defineProperties(iZ.prototype,E2);var FO=iZ(),Af=iZ({level:gz?gz.level:0});var j=FO;var e5="\u2500".repeat(55);function $3($){if($==="success")return j.green;if($==="warning")return j.yellow;return j.red}function LO($){if($>=70)return j.green;if($>=50)return j.yellow;return j.red}function Z3($){if($.label==="Error")return $.label;return`${$.label} (${$.confidence}%)`}function wO($){let Z=e0({answerable:$.answerable??"NO",confidence:$.confidence??0,traps:$.traps??{fired:[],avoided:[]},error:$.error}),X=$3(Z.tone);return{icon:X(Z.icon),status:Z3(Z),color:X}}function MO($){let Z=$.target?.target??"default",X=$.context?.name??"default";if(Z==="default"&&X==="default")return"";if(X==="default")return j.dim(`[${Z}]`);return j.dim(`[${Z}/${X}]`)}function OO($){let Z=$.map((X)=>X.scenario.name);return new Set(Z).size!==Z.length}function JX($){return $.map((Z)=>`[${Z}]`).join(", ")}function X3($,Z){let X=[];if($.error)return X.push(j.dim(`${Z}error: ${$.error}`)),X;if($.traps.fired.length>0)for(let Y of $.traps.fired)X.push(j.red(`${Z}trap: ${Y.id}`)),X.push(j.dim(`${Z}reason: ${Y.reason}`)),X.push(j.dim(`${Z}match: "${Y.matched}"`));else if($.reason&&$.answerable!=="YES")X.push(j.dim(`${Z}reason: ${$.reason}`));if($.citations.cited.length>0)X.push(j.dim(`${Z}cited: ${JX($.citations.cited)}`));if($.citations.missing.length>0)X.push(j.dim(`${Z}missing: ${JX($.citations.missing)}`));if($.citations.unknown.length>0)X.push(j.dim(`${Z}unknown: ${JX($.citations.unknown)}`));return X}function uz($,Z){if(!$.traps||!$.citations)return[];return X3({error:$.error,traps:$.traps,reason:$.reason??"",answerable:$.answerable??"NO",citations:$.citations},Z)}function mz($){let{icon:Z,status:X,color:Y}=wO($),G=MO($),z=`${Z} ${X}`;return G?`${G} ${Y(z)}`:Y(z)}function dz($,Z){if(!$.cells)return[];let X=[];X.push(`${Z}${j.dim("Matrix cells (interface \xB7 source \xB7 toolset)")}`);for(let Y of $.cells){let G=e0(Y),z=$3(G.tone),J=`[${Y.cell.interface} \xB7 ${Y.cell.source??"-"} \xB7 ${Y.cell.toolset}]`,Q=j.dim(J),W=`${z(G.icon)} ${z(Z3(G))}`;if(X.push(`${Z}${Q} ${W}`),X.push(...X3({traps:Y.traps,reason:Y.reason,answerable:Y.answerable,citations:Y.citations??{cited:[],required:[],missing:[],unknown:[]}},`${Z} `)),Y.expected){let V=Y.expected.includes.filter((H)=>!H.satisfied).map((H)=>`"${H.value}"`),K=Y.expected.excludes.filter((H)=>!H.satisfied).map((H)=>`"${H.value}"`);if(V.length>0)X.push(j.dim(`${Z} expected.includes missing: ${V.join(", ")}`));if(K.length>0)X.push(j.dim(`${Z} expected.excludes hit: ${K.join(", ")}`))}if(Y.toolsUsed&&Y.toolsUsed.length>0)X.push(j.dim(`${Z} tools: ${Y.toolsUsed.join(", ")}`))}if($.verifierSamples&&$.verifierSamples.length>0){X.push(`${Z}${j.dim("Verifier samples (human review; never LLM-judged)")}`);for(let Y of $.verifierSamples){let G=Y.content.slice(0,200).replace(/\s+/g," ").trim(),z=Y.content.length>200?"...":"";X.push(j.dim(`${Z} [${Y.id}] ${Y.name}`)),X.push(j.dim(`${Z} ${G}${z}`))}}return X}function cz($,Z){if(!$.surfaces)return[];let X=[];X.push(`${Z}${j.dim("Citations scoped to active surface (compare mode)")}`);for(let Y of $.surfaces){let G=e0(Y),z=$3(G.tone),J=j.dim(`[${Y.active.join(",")}]`),Q=`${z(G.icon)} ${z(Z3(G))}`;X.push(`${Z}${J} ${Q}`),X.push(...X3({traps:Y.traps,reason:Y.reason,answerable:Y.answerable,citations:Y.citations},`${Z} `))}return X}function NO($){let Z=0,X=0,Y=0;for(let G of $){if(G.surfaces){for(let z of G.surfaces)Z+=z.traps.fired.length,X+=z.citations.missing.length,Y+=z.citations.unknown.length;continue}if(G.traps)Z+=G.traps.fired.length;if(G.citations)X+=G.citations.missing.length,Y+=G.citations.unknown.length}if(Z>0&&X+Y>0)return"Review fired traps and citation gaps.";if(Z>0)return"Review fired traps before trusting this surface.";if(X+Y>0)return"Review missing and unknown citations.";return"Citations hold. No declared traps fired."}function _O($,Z){let X=$.summary.score,G=`Overall: ${LO(X)(`${X}`)} / 100`;if(Z===void 0||Z<=0)return G;let J=X>=Z?j.green("run passes"):j.red("run fails");return`${G} \xB7 threshold ${Z} \xB7 ${J}`}function lz($,Z={}){let{tool:X,scenarios:Y,summary:G}=$,z=Y,J=[];if(J.push(j.bold("pickled check")),J.push(e5),J.push(`Tool: ${j.cyan(X.name)}`),$.docs.length>0)J.push(`Sources: ${j.dim(JX($.docs.map((Q)=>Q.id)))}`);else J.push(`Sources: ${j.dim("none registered")}`);if(J.push(`Scenarios: ${j.dim(String(G.total))}`),$.plan){let{expandedCells:Q,selectedCells:W,seed:V}=$.plan,H=W<Q?`Cells: ${j.dim(`${W} of ${Q} (sampled${V?`, seed=${V}`:""})`)}`:`Cells: ${j.dim(String(Q))}`;J.push(H)}if(J.push(""),$.plan?.cells&&z.length===0){if(J.push(j.bold(`Planned cells (${$.plan.selectedCells} of ${$.plan.expandedCells})`)),$.plan.cells.length===0)J.push(j.dim(" (no cells after filters)"));let Q="";for(let W of $.plan.cells){if(W.scenario!==Q)Q=W.scenario,J.push(` ${W.scenario}`);let V=W.interface!==void 0?`[${W.interface} \xB7 ${W.source??"-"} \xB7 ${W.toolset}]`:`[${W.target}${W.context&&W.context!=="default"?`/${W.context}`:""}]`;J.push(` ${j.dim(V)}`)}return J.push(""),J.push(e5),J.push(j.dim("Dry-run: no model calls. Re-run without --plan to execute these cells.")),J.join(`
|
|
276
|
+
`)}if(OO(z)){let Q=new Map;for(let W of z){let V=W.scenario.name;if(!Q.has(V))Q.set(V,[]);Q.get(V)?.push(W)}for(let[W,V]of Q){J.push(`Scenario: ${W}`);for(let K of V)if(K.cells)J.push(...dz(K," "));else if(K.surfaces)J.push(...cz(K," "));else J.push(` ${mz(K)}`),J.push(...uz(K," "));J.push("")}}else for(let Q of z){if(J.push(`Scenario: ${Q.scenario.name}`),Q.cells)J.push(...dz(Q," "));else if(Q.surfaces)J.push(...cz(Q," "));else J.push(` ${mz(Q)}`),J.push(...uz(Q," "));J.push("")}return J.push(e5),J.push(_O($,Z.threshold)),J.push(j.dim(NO(Y))),J.join(`
|
|
277
|
+
`)}function Y3($,Z={}){console.log(lz($,Z)),console.log()}function QX($,Z={}){if(Z.verbose)return JSON.stringify($,null,2);let X={...$,docs:$.docs.map((Y)=>({...Y,content:""})),scenarios:$.scenarios.map((Y)=>{let{allResponses:G,verifierSamples:z,cells:J,surfaces:Q,...W}=Y;return{...W,verifierSamples:z?.map((V)=>({...V,content:""})),cells:J?.map((V)=>{let{allResponses:K,...H}=V;return H}),surfaces:Q?.map((V)=>{let{allResponses:K,...H}=V;return H})}})};return JSON.stringify(X,null,2)}function DO($){if($.format&&$.format!=="terminal")return $.format;if($.json)return"json";return $.format??"terminal"}async function pz($,Z){let{output:X}=Z,Y=DO(Z),G=PO.resolve($),z;try{z=await a6({targetRepo:G})}catch(H){console.error(j.red(H instanceof Error?H.message:H)),process.exit(1)}let J;if(Y==="json")J=u6(z);else if(Y==="markdown")J=h6(z);else J=m6(z);if(X)await Bun.write(X,J);else await EO(`${J}
|
|
278
|
+
`);let Q=z.findings.filter((H)=>H.severity==="error").length,W=z.findings.filter((H)=>H.severity==="warning").length;if(Y!=="json"&&!X)if(console.log(),Q===0&&W===0)console.log(j.green("Audit clean. No issues found."));else{let H=Q>0?j.red:j.yellow;console.log(H(`Audit found ${Q} error(s), ${W} warning(s).`))}if((Z.failOn??"error")==="warning"?Q+W>0:Q>0)process.exit(1)}function EO($){return new Promise((Z,X)=>{process.stdout.write($,(Y)=>{if(Y)X(Y);else Z()})})}import RO from"path";async function nz($,Z){let{json:X,output:Y,verbose:G}=Z,z=(O)=>!X&&console.log(O),J=RO.resolve($),Q;try{Q=await j1(J)}catch(O){console.error(j.red(O instanceof Error?O.message:O)),console.error(),console.error(j.dim("Run `pickled init` to create a config file")),process.exit(1)}if(Z.target){let O=Q.scenarios.length;try{Q=k9(Q,Z.target)}catch(_){console.error(j.red(_ instanceof Error?_.message:String(_))),process.exit(1)}let N=O-Q.scenarios.length;if(N>0&&!X)z(j.dim(`Skipping ${N} scenario(s) with explicit target != "${Z.target}"`))}let W={name:Q.tool.name,description:Q.tool.description,path:J};if(G){z(j.bold("pickled check")),z(""),z(j.dim(` Tool: ${W.name}`)),z(j.dim(` Scenarios: ${Q.scenarios.length}`));for(let O of Q.scenarios)z(j.dim(` - ${O.name}`))}let V;try{V=SO(Z.threshold,Q.threshold)}catch(O){console.error(j.red(O instanceof Error?O.message:O)),process.exit(1)}let K=Z.interface??Z.target,H=K||Z.source||Z.toolset?{interface:K,source:Z.source,toolset:Z.toolset}:void 0,B=Z.scenario?[Z.scenario]:void 0,U;if(Z.sample!==void 0){if(U=iz(Z.sample,"--sample"),U===null)process.exit(1)}let F;if(Z.maxCells!==void 0){if(F=iz(Z.maxCells,"--max-cells"),F===null)process.exit(1)}let T;try{T=await n5(W,Q,{onProgress:G?(O)=>{if(!X)z(j.dim(` ${O}`))}:void 0,cellFilter:H,scenarioFilter:B,plan:Z.plan,maxCells:F,sample:U,seed:Z.seed})}catch(O){console.error(j.red(O instanceof Error?O.message:O)),process.exit(1)}let w=AO({plan:Z.plan===!0,threshold:V,score:T.summary.score});if(Y)await Bun.write(Y,QX(T,{verbose:G}));else if(X)await CO(`${QX(T,{verbose:G})}
|
|
279
|
+
`);else Y3(T,{threshold:V});if(w){if(X||Y)console.error(j.red(`Overall: ${T.summary.score} / 100 \xB7 threshold ${V} \xB7 run fails`)),console.error(j.dim("Review failed scenarios before trusting this surface."));process.exit(1)}}function AO($){if($.plan)return!1;return $.threshold>0&&$.score<$.threshold}function SO($,Z){if($===void 0){if(Z===void 0)return 0;return az(Z,"pickled.yml threshold")}return az($,"--threshold")}function iz($,Z){if(!/^\d+$/.test($))return console.error(j.red(`Invalid ${Z} "${$}". Expected a non-negative integer.`)),null;return Number($)}function az($,Z){if(typeof $==="number"){if(Number.isInteger($)&&$>=0&&$<=100)return $;throw Error(`Invalid ${Z} "${$}". Expected an integer from 0 to 100.`)}if(typeof $==="string"&&/^\d+$/.test($)){let X=Number($);if(X<=100)return X}if(typeof $==="string")throw Error(`Invalid ${Z} "${$}". Expected an integer from 0 to 100.`);throw Error(`Invalid ${Z}. Expected an integer from 0 to 100.`)}function CO($){return new Promise((Z,X)=>{process.stdout.write($,(Y)=>{if(Y)X(Y);else Z()})})}import IO from"path";var jO=`# pickled.yml - Agent legibility check for your product
|
|
280
280
|
|
|
281
281
|
tool:
|
|
282
282
|
name: "your-product"
|
|
@@ -298,4 +298,4 @@ scenarios:
|
|
|
298
298
|
|
|
299
299
|
# Optional: fail CI if score falls below threshold
|
|
300
300
|
# threshold: 80
|
|
301
|
-
`;async function rz($){let X=`${
|
|
301
|
+
`;async function rz($){let X=`${IO.resolve($)}/pickled.yml`;if(await Bun.file(X).exists())console.error(j.red("pickled.yml already exists")),process.exit(1);await Bun.write(X,jO),console.log(j.green("Created pickled.yml")),console.log(),console.log(j.dim("Next steps:")),console.log(j.dim(" 1. Edit pickled.yml: list your sources and scenarios")),console.log(j.dim(" 2. Run: pickled check")),console.log()}M1.name("pickled").description("Test what agents actually understand about your product").version(D3.version);M1.command("init").description("Create a pickled.yml config file").argument("[path]","Path to your project (default: current directory)",".").action(rz);M1.command("audit").description("Static scan of agent-context files (CLAUDE.md, AGENTS.md, llms.txt). No LLM calls.").argument("[path]","Path to your project (default: current directory)",".").addOption(new wX("--format <name>","Output format").choices(["terminal","markdown","json"]).default("terminal")).option("--json","Shorthand for --format json").option("-o, --output <file>","Save report to file").addOption(new wX("--fail-on <level>","Exit non-zero on this severity or higher").choices(["error","warning"]).default("error")).action(pz);M1.command("check").description("Run agent scenarios against registered sources").argument("[path]","Path to your project (default: current directory)",".").option("--json","Output as JSON").option("-o, --output <file>","Save report to file").option("-v, --verbose","Show detailed progress").option("-t, --threshold <percent>","Minimum score % to pass (overrides config)").option("--target <name>","Restrict to the named target. Overrides matrix.target for non-matrix scenarios; for matrix scenarios, also filters cells by interface unless --interface is explicitly set.").option("--scenario <name>","Run only the named scenario (for CI matrix one-job-per-cell usage)").option("--interface <name>","Matrix cell filter: run only cells with this interface. Takes precedence over --target for matrix cells.").option("--source <name>","Matrix cell filter: run only cells with this source id").option("--toolset <name>","Matrix cell filter: run only cells with this toolset name").option("--plan","Dry-run: expand the matrix, apply filters and sampling, print the cell list, exit. Makes zero model calls.").option("--max-cells <n>","Hard cap on the number of cells (after filters and sampling). Exits non-zero before any model call if exceeded.").option("--sample <n>","Deterministic per-scenario sample. Picks N cells per matrix scenario; single-cell scenarios always run.").option("--seed <value>",'Seed for --sample. Defaults to "default" so reruns without --seed are reproducible.').action(nz);await M1.parseAsync();
|