npm - @vinkius-core/mcp-fusion - Versions diffs - 2.7.0 → 2.8.0 - Mend

@vinkius-core/mcp-fusion 2.7.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/dist/cli/fusion.d.ts +101 -0
package/dist/cli/fusion.d.ts.map +1 -0
package/dist/cli/fusion.js +333 -0
package/dist/cli/fusion.js.map +1 -0
package/dist/index.d.ts +41 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +22 -0
package/dist/index.js.map +1 -1
package/dist/introspection/BehaviorDigest.d.ts +112 -0
package/dist/introspection/BehaviorDigest.d.ts.map +1 -0
package/dist/introspection/BehaviorDigest.js +146 -0
package/dist/introspection/BehaviorDigest.js.map +1 -0
package/dist/introspection/CapabilityLockfile.d.ts +259 -0
package/dist/introspection/CapabilityLockfile.d.ts.map +1 -0
package/dist/introspection/CapabilityLockfile.js +391 -0
package/dist/introspection/CapabilityLockfile.js.map +1 -0
package/dist/introspection/ContractAwareSelfHealing.d.ts +90 -0
package/dist/introspection/ContractAwareSelfHealing.d.ts.map +1 -0
package/dist/introspection/ContractAwareSelfHealing.js +132 -0
package/dist/introspection/ContractAwareSelfHealing.js.map +1 -0
package/dist/introspection/ContractDiff.d.ts +91 -0
package/dist/introspection/ContractDiff.d.ts.map +1 -0
package/dist/introspection/ContractDiff.js +466 -0
package/dist/introspection/ContractDiff.js.map +1 -0
package/dist/introspection/CryptoAttestation.d.ts +143 -0
package/dist/introspection/CryptoAttestation.d.ts.map +1 -0
package/dist/introspection/CryptoAttestation.js +194 -0
package/dist/introspection/CryptoAttestation.js.map +1 -0
package/dist/introspection/EntitlementScanner.d.ts +124 -0
package/dist/introspection/EntitlementScanner.d.ts.map +1 -0
package/dist/introspection/EntitlementScanner.js +244 -0
package/dist/introspection/EntitlementScanner.js.map +1 -0
package/dist/introspection/GovernanceObserver.d.ts +88 -0
package/dist/introspection/GovernanceObserver.d.ts.map +1 -0
package/dist/introspection/GovernanceObserver.js +132 -0
package/dist/introspection/GovernanceObserver.js.map +1 -0
package/dist/introspection/SemanticProbe.d.ts +207 -0
package/dist/introspection/SemanticProbe.d.ts.map +1 -0
package/dist/introspection/SemanticProbe.js +255 -0
package/dist/introspection/SemanticProbe.js.map +1 -0
package/dist/introspection/TokenEconomics.d.ts +210 -0
package/dist/introspection/TokenEconomics.d.ts.map +1 -0
package/dist/introspection/TokenEconomics.js +286 -0
package/dist/introspection/TokenEconomics.js.map +1 -0
package/dist/introspection/ToolContract.d.ts +159 -0
package/dist/introspection/ToolContract.d.ts.map +1 -0
package/dist/introspection/ToolContract.js +191 -0
package/dist/introspection/ToolContract.js.map +1 -0
package/dist/introspection/canonicalize.d.ts +20 -0
package/dist/introspection/canonicalize.d.ts.map +1 -0
package/dist/introspection/canonicalize.js +51 -0
package/dist/introspection/canonicalize.js.map +1 -0
package/dist/introspection/index.d.ts +20 -0
package/dist/introspection/index.d.ts.map +1 -1
package/dist/introspection/index.js +20 -0
package/dist/introspection/index.js.map +1 -1
package/dist/observability/DebugObserver.d.ts +26 -1
package/dist/observability/DebugObserver.d.ts.map +1 -1
package/dist/observability/DebugObserver.js +8 -1
package/dist/observability/DebugObserver.js.map +1 -1
package/dist/observability/index.d.ts +1 -1
package/dist/observability/index.d.ts.map +1 -1
package/dist/observability/index.js.map +1 -1
package/dist/server/ServerAttachment.d.ts +41 -0
package/dist/server/ServerAttachment.d.ts.map +1 -1
package/dist/server/ServerAttachment.js +25 -1
package/dist/server/ServerAttachment.js.map +1 -1
package/package.json +8 -1

package/dist/introspection/SemanticProbe.d.ts ADDED Viewed

@@ -0,0 +1,207 @@
+/**
+ * SemanticProbe — LLM-as-a-Judge for Opaque Behavior Detection
+ *
+ * **Evolution 2: Semantic Probing**
+ *
+ * Provides a framework for using an LLM to evaluate whether
+ * a tool handler's actual runtime behavior matches its declared
+ * behavioral contract. This detects "semantic drift" — situations
+ * where the handler's output changes meaning even when the
+ * egress schema and system rules remain structurally identical.
+ *
+ * **Architecture**: This module defines the probe protocol,
+ * types, and evaluation pipeline. The actual LLM invocation
+ * is delegated to user-provided adapters — the module never
+ * makes LLM calls directly, maintaining the "no hidden
+ * network dependencies" principle.
+ *
+ * **Testing integration**: Designed to be integrated with
+ * `FusionTester.callAction()` for automated regression
+ * testing: "given these inputs, does the output semantically
+ * match the previous known-good output?"
+ *
+ * Pure-function module for probe construction and evaluation;
+ * LLM interaction is async via pluggable adapters.
+ *
+ * @module
+ */
+/**
+ * Configuration for semantic probing.
+ */
+export interface SemanticProbeConfig {
+    /** The LLM adapter to use for evaluation */
+    readonly adapter: SemanticProbeAdapter;
+    /** Risk thresholds for classification */
+    readonly thresholds?: Partial<SemanticThresholds>;
+    /** Maximum number of probes to run in parallel */
+    readonly concurrency?: number;
+    /** Whether to include raw LLM responses in results */
+    readonly includeRawResponses?: boolean;
+}
+/**
+ * Pluggable LLM adapter for semantic evaluation.
+ *
+ * Implementations should call an LLM with the provided prompt
+ * and return the structured evaluation result.
+ */
+export interface SemanticProbeAdapter {
+    /** Human-readable name (e.g., 'claude-3.5', 'gpt-4o') */
+    readonly name: string;
+    /**
+     * Send a semantic evaluation prompt to the LLM.
+     *
+     * @param prompt - Complete evaluation prompt
+     * @returns Raw LLM response text
+     */
+    evaluate(prompt: string): Promise<string>;
+}
+/**
+ * Thresholds for semantic drift classification.
+ */
+export interface SemanticThresholds {
+    /** Score below which drift is considered 'high' (default: 0.5) */
+    readonly highDriftThreshold: number;
+    /** Score below which drift is considered 'medium' (default: 0.75) */
+    readonly mediumDriftThreshold: number;
+}
+/**
+ * A semantic probe definition — a structured test case
+ * for LLM-based behavioral evaluation.
+ */
+export interface SemanticProbe {
+    /** Unique identifier for this probe */
+    readonly id: string;
+    /** Tool name being probed */
+    readonly toolName: string;
+    /** Action key being probed */
+    readonly actionKey: string;
+    /** Description of what this probe tests */
+    readonly description: string;
+    /** Input arguments to the tool */
+    readonly input: Record<string, unknown>;
+    /** Expected output (known-good baseline) */
+    readonly expectedOutput: unknown;
+    /** Actual output from the current handler */
+    readonly actualOutput: unknown;
+    /** Behavioral contract context for the judge */
+    readonly contractContext: ProbeContractContext;
+}
+/**
+ * Contract context injected into the LLM judge prompt.
+ *
+ * Provides the judge with enough information to evaluate
+ * whether the behavioral contract was violated.
+ */
+export interface ProbeContractContext {
+    /** Tool description */
+    readonly description: string | undefined;
+    /** Whether the action is declared readOnly */
+    readonly readOnly: boolean;
+    /** Whether the action is declared destructive */
+    readonly destructive: boolean;
+    /** System rules that should be respected */
+    readonly systemRules: readonly string[];
+    /** Schema field names (expected output shape) */
+    readonly schemaKeys: readonly string[];
+}
+/**
+ * Result of a single semantic probe evaluation.
+ */
+export interface SemanticProbeResult {
+    /** The probe that was evaluated */
+    readonly probe: SemanticProbe;
+    /** Semantic similarity score (0.0 = completely different, 1.0 = identical) */
+    readonly similarityScore: number;
+    /** Drift classification */
+    readonly driftLevel: DriftLevel;
+    /** Whether the behavioral contract was violated */
+    readonly contractViolated: boolean;
+    /** Specific violations detected by the judge */
+    readonly violations: readonly string[];
+    /** LLM judge's reasoning */
+    readonly reasoning: string;
+    /** Raw LLM response (if configured) */
+    readonly rawResponse: string | null;
+    /** ISO-8601 timestamp of evaluation */
+    readonly evaluatedAt: string;
+}
+/** Drift level classification */
+export type DriftLevel = 'none' | 'low' | 'medium' | 'high';
+/**
+ * Aggregated result of multiple semantic probes.
+ */
+export interface SemanticProbeReport {
+    /** Tool name */
+    readonly toolName: string;
+    /** All individual probe results */
+    readonly results: readonly SemanticProbeResult[];
+    /** Overall drift assessment */
+    readonly overallDrift: DriftLevel;
+    /** Number of contract violations */
+    readonly violationCount: number;
+    /** Whether the tool is considered semantically stable */
+    readonly stable: boolean;
+    /** Human-readable summary */
+    readonly summary: string;
+    /** ISO-8601 timestamp */
+    readonly completedAt: string;
+}
+/**
+ * Create a semantic probe from input/output pairs.
+ *
+ * @param toolName - Tool name
+ * @param actionKey - Action key
+ * @param input - Input arguments
+ * @param expectedOutput - Known-good baseline output
+ * @param actualOutput - Current handler output
+ * @param contractContext - Behavioral contract context
+ * @returns A structured semantic probe
+ */
+export declare function createProbe(toolName: string, actionKey: string, input: Record<string, unknown>, expectedOutput: unknown, actualOutput: unknown, contractContext: ProbeContractContext): SemanticProbe;
+/**
+ * Build the evaluation prompt for the LLM judge.
+ *
+ * The prompt is structured to elicit a JSON-formatted response
+ * with specific fields for programmatic parsing.
+ *
+ * @param probe - The semantic probe to evaluate
+ * @returns Complete evaluation prompt
+ */
+export declare function buildJudgePrompt(probe: SemanticProbe): string;
+/**
+ * Parse the LLM judge's response into a structured result.
+ *
+ * Handles malformed responses gracefully by falling back
+ * to conservative defaults.
+ *
+ * @param probe - The probe that was evaluated
+ * @param rawResponse - Raw LLM response text
+ * @param config - Probe configuration
+ * @returns Structured probe result
+ */
+export declare function parseJudgeResponse(probe: SemanticProbe, rawResponse: string, config: SemanticProbeConfig): SemanticProbeResult;
+/**
+ * Run a complete semantic probe evaluation.
+ *
+ * @param probe - The probe to evaluate
+ * @param config - Probe configuration (includes LLM adapter)
+ * @returns Evaluation result
+ */
+export declare function evaluateProbe(probe: SemanticProbe, config: SemanticProbeConfig): Promise<SemanticProbeResult>;
+/**
+ * Run multiple probes and aggregate results.
+ *
+ * @param probes - Array of probes to evaluate
+ * @param config - Probe configuration
+ * @returns Aggregated report
+ */
+export declare function evaluateProbes(probes: readonly SemanticProbe[], config: SemanticProbeConfig): Promise<SemanticProbeReport>;
+/**
+ * Aggregate individual probe results into a report.
+ *
+ * @param toolName - Tool name
+ * @param results - Individual probe results
+ * @returns Aggregated report
+ */
+export declare function aggregateResults(toolName: string, results: readonly SemanticProbeResult[]): SemanticProbeReport;
+//# sourceMappingURL=SemanticProbe.d.ts.map

package/dist/introspection/SemanticProbe.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"SemanticProbe.d.ts","sourceRoot":"","sources":["../../src/introspection/SemanticProbe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,4CAA4C;IAC5C,QAAQ,CAAC,OAAO,EAAE,oBAAoB,CAAC;IACvC,yCAAyC;IACzC,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAClD,kDAAkD;IAClD,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,sDAAsD;IACtD,QAAQ,CAAC,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC1C;AAED;;;;;GAKG;AACH,MAAM,WAAW,oBAAoB;IACjC,yDAAyD;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B,kEAAkE;IAClE,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,qEAAqE;IACrE,QAAQ,CAAC,oBAAoB,EAAE,MAAM,CAAC;CACzC;AAED;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC1B,uCAAuC;IACvC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,6BAA6B;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,8BAA8B;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,2CAA2C;IAC3C,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,kCAAkC;IAClC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACxC,4CAA4C;IAC5C,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,6CAA6C;IAC7C,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC;IAC/B,gDAAgD;IAChD,QAAQ,CAAC,eAAe,EAAE,oBAAoB,CAAC;CAClD;AAED;;;;;GAKG;AACH,MAAM,WAAW,oBAAoB;IACjC,uBAAuB;IACvB,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,8CAA8C;IAC9C,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,iDAAiD;IACjD,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;IAC9B,4CAA4C;IAC5C,QAAQ,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,iDAAiD;IACjD,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,mCAAmC;IACnC,QAAQ,CAAC,KAAK,EAAE,aAAa,CAAC;IAC9B,8EAA8E;IAC9E,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,2BAA2B;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;IAChC,mDAAmD;IACnD,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACnC,gDAAgD;IAChD,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,4BAA4B;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,uCAAuC;IACvC,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,uCAAuC;IACvC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAED,iCAAiC;AACjC,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAE5D;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,gBAAgB;IAChB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,mCAAmC;IACnC,QAAQ,CAAC,OAAO,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACjD,+BAA+B;IAC/B,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC;IAClC,oCAAoC;IACpC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,yDAAyD;IACzD,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,6BAA6B;IAC7B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAMD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CACvB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC9B,cAAc,EAAE,OAAO,EACvB,YAAY,EAAE,OAAO,EACrB,eAAe,EAAE,oBAAoB,GACtC,aAAa,CAaf;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CAqD7D;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAC9B,KAAK,EAAE,aAAa,EACpB,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,mBAAmB,GAC5B,mBAAmB,CAoCrB;AAED;;;;;;GAMG;AACH,wBAAsB,aAAa,CAC/B,KAAK,EAAE,aAAa,EACpB,MAAM,EAAE,mBAAmB,GAC5B,OAAO,CAAC,mBAAmB,CAAC,CAI9B;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CAChC,MAAM,EAAE,SAAS,aAAa,EAAE,EAChC,MAAM,EAAE,mBAAmB,GAC5B,OAAO,CAAC,mBAAmB,CAAC,CAc9B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,SAAS,mBAAmB,EAAE,GACxC,mBAAmB,CA8BrB"}

package/dist/introspection/SemanticProbe.js ADDED Viewed

@@ -0,0 +1,255 @@
+/**
+ * SemanticProbe — LLM-as-a-Judge for Opaque Behavior Detection
+ *
+ * **Evolution 2: Semantic Probing**
+ *
+ * Provides a framework for using an LLM to evaluate whether
+ * a tool handler's actual runtime behavior matches its declared
+ * behavioral contract. This detects "semantic drift" — situations
+ * where the handler's output changes meaning even when the
+ * egress schema and system rules remain structurally identical.
+ *
+ * **Architecture**: This module defines the probe protocol,
+ * types, and evaluation pipeline. The actual LLM invocation
+ * is delegated to user-provided adapters — the module never
+ * makes LLM calls directly, maintaining the "no hidden
+ * network dependencies" principle.
+ *
+ * **Testing integration**: Designed to be integrated with
+ * `FusionTester.callAction()` for automated regression
+ * testing: "given these inputs, does the output semantically
+ * match the previous known-good output?"
+ *
+ * Pure-function module for probe construction and evaluation;
+ * LLM interaction is async via pluggable adapters.
+ *
+ * @module
+ */
+// ============================================================================
+// Probe Construction
+// ============================================================================
+/**
+ * Create a semantic probe from input/output pairs.
+ *
+ * @param toolName - Tool name
+ * @param actionKey - Action key
+ * @param input - Input arguments
+ * @param expectedOutput - Known-good baseline output
+ * @param actualOutput - Current handler output
+ * @param contractContext - Behavioral contract context
+ * @returns A structured semantic probe
+ */
+export function createProbe(toolName, actionKey, input, expectedOutput, actualOutput, contractContext) {
+    const id = `${toolName}::${actionKey}::${Date.now()}`;
+    return {
+        id,
+        toolName,
+        actionKey,
+        description: `Semantic probe for ${toolName}.${actionKey}`,
+        input,
+        expectedOutput,
+        actualOutput,
+        contractContext,
+    };
+}
+/**
+ * Build the evaluation prompt for the LLM judge.
+ *
+ * The prompt is structured to elicit a JSON-formatted response
+ * with specific fields for programmatic parsing.
+ *
+ * @param probe - The semantic probe to evaluate
+ * @returns Complete evaluation prompt
+ */
+export function buildJudgePrompt(probe) {
+    return `You are a semantic evaluation judge for an MCP (Model Context Protocol) tool.
+Your task is to compare two outputs from the same tool handler and determine:
+1. Whether they are semantically equivalent
+2. Whether the current output violates the tool's behavioral contract
+## Tool Information
+- **Tool**: ${probe.toolName}
+- **Action**: ${probe.actionKey}
+- **Description**: ${probe.contractContext.description ?? 'No description'}
+- **Read-Only**: ${probe.contractContext.readOnly}
+- **Destructive**: ${probe.contractContext.destructive}
+## Behavioral Contract
+${probe.contractContext.systemRules.length > 0
+        ? `### System Rules\n${probe.contractContext.systemRules.map((r, i) => `${i + 1}. ${r}`).join('\n')}`
+        : 'No system rules declared.'}
+### Expected Output Schema Fields
+${probe.contractContext.schemaKeys.join(', ') || 'No schema declared'}
+## Input Arguments
+\`\`\`json
+${JSON.stringify(probe.input, null, 2)}
+\`\`\`
+## Expected Output (Baseline)
+\`\`\`json
+${JSON.stringify(probe.expectedOutput, null, 2)}
+\`\`\`
+## Actual Output (Current)
+\`\`\`json
+${JSON.stringify(probe.actualOutput, null, 2)}
+\`\`\`
+## Evaluation Instructions
+Compare the Expected Output with the Actual Output. Consider:
+- Are the outputs semantically equivalent (same meaning, even if format differs)?
+- Does the Actual Output violate any system rules?
+- Does the Actual Output return fields not in the expected schema?
+- Has the behavior meaningfully changed from the baseline?
+Respond with ONLY a JSON object in this exact format:
+\`\`\`json
+{
+  "similarityScore": <number 0.0-1.0>,
+  "contractViolated": <boolean>,
+  "violations": [<string descriptions of violations>],
+  "reasoning": "<brief explanation of your assessment>"
+}
+\`\`\``;
+}
+/**
+ * Parse the LLM judge's response into a structured result.
+ *
+ * Handles malformed responses gracefully by falling back
+ * to conservative defaults.
+ *
+ * @param probe - The probe that was evaluated
+ * @param rawResponse - Raw LLM response text
+ * @param config - Probe configuration
+ * @returns Structured probe result
+ */
+export function parseJudgeResponse(probe, rawResponse, config) {
+    const thresholds = resolveThresholds(config);
+    try {
+        // Extract JSON from response (handle markdown code blocks)
+        const jsonMatch = rawResponse.match(/\{[\s\S]*\}/);
+        if (!jsonMatch) {
+            return fallbackResult(probe, rawResponse, config);
+        }
+        const parsed = JSON.parse(jsonMatch[0]);
+        const similarityScore = typeof parsed.similarityScore === 'number'
+            ? Math.max(0, Math.min(1, parsed.similarityScore))
+            : 0.5;
+        const driftLevel = classifyDrift(similarityScore, thresholds);
+        return {
+            probe,
+            similarityScore,
+            driftLevel,
+            contractViolated: parsed.contractViolated ?? false,
+            violations: parsed.violations ?? [],
+            reasoning: parsed.reasoning ?? 'No reasoning provided',
+            rawResponse: config.includeRawResponses ? rawResponse : null,
+            evaluatedAt: new Date().toISOString(),
+        };
+    }
+    catch {
+        return fallbackResult(probe, rawResponse, config);
+    }
+}
+/**
+ * Run a complete semantic probe evaluation.
+ *
+ * @param probe - The probe to evaluate
+ * @param config - Probe configuration (includes LLM adapter)
+ * @returns Evaluation result
+ */
+export async function evaluateProbe(probe, config) {
+    const prompt = buildJudgePrompt(probe);
+    const rawResponse = await config.adapter.evaluate(prompt);
+    return parseJudgeResponse(probe, rawResponse, config);
+}
+/**
+ * Run multiple probes and aggregate results.
+ *
+ * @param probes - Array of probes to evaluate
+ * @param config - Probe configuration
+ * @returns Aggregated report
+ */
+export async function evaluateProbes(probes, config) {
+    const concurrency = config.concurrency ?? 3;
+    // Run probes with concurrency control
+    const results = [];
+    for (let i = 0; i < probes.length; i += concurrency) {
+        const batch = probes.slice(i, i + concurrency);
+        const batchResults = await Promise.all(batch.map(probe => evaluateProbe(probe, config)));
+        results.push(...batchResults);
+    }
+    return aggregateResults(probes[0]?.toolName ?? 'unknown', results);
+}
+/**
+ * Aggregate individual probe results into a report.
+ *
+ * @param toolName - Tool name
+ * @param results - Individual probe results
+ * @returns Aggregated report
+ */
+export function aggregateResults(toolName, results) {
+    const violationCount = results.filter(r => r.contractViolated).length;
+    const avgSimilarity = results.length > 0
+        ? results.reduce((sum, r) => sum + r.similarityScore, 0) / results.length
+        : 1.0;
+    const overallDrift = results.length > 0
+        ? classifyDrift(avgSimilarity, {
+            highDriftThreshold: 0.5,
+            mediumDriftThreshold: 0.75,
+        })
+        : 'none';
+    const stable = overallDrift === 'none' || overallDrift === 'low';
+    const summary = results.length === 0
+        ? 'No probes evaluated.'
+        : `${results.length} probes evaluated. Avg similarity: ${(avgSimilarity * 100).toFixed(1)}%. ` +
+            `Drift: ${overallDrift}. Violations: ${violationCount}. ` +
+            `Status: ${stable ? 'STABLE' : 'UNSTABLE'}`;
+    return {
+        toolName,
+        results,
+        overallDrift,
+        violationCount,
+        stable,
+        summary,
+        completedAt: new Date().toISOString(),
+    };
+}
+// ============================================================================
+// Internals
+// ============================================================================
+const DEFAULT_THRESHOLDS = {
+    highDriftThreshold: 0.5,
+    mediumDriftThreshold: 0.75,
+};
+function resolveThresholds(config) {
+    return {
+        highDriftThreshold: config.thresholds?.highDriftThreshold ?? DEFAULT_THRESHOLDS.highDriftThreshold,
+        mediumDriftThreshold: config.thresholds?.mediumDriftThreshold ?? DEFAULT_THRESHOLDS.mediumDriftThreshold,
+    };
+}
+function classifyDrift(similarity, thresholds) {
+    if (similarity >= 0.95)
+        return 'none';
+    if (similarity >= thresholds.mediumDriftThreshold)
+        return 'low';
+    if (similarity >= thresholds.highDriftThreshold)
+        return 'medium';
+    return 'high';
+}
+function fallbackResult(probe, rawResponse, config) {
+    return {
+        probe,
+        similarityScore: 0.5,
+        driftLevel: 'medium',
+        contractViolated: false,
+        violations: ['Unable to parse LLM judge response'],
+        reasoning: 'Fallback: LLM response could not be parsed as JSON',
+        rawResponse: config.includeRawResponses ? rawResponse : null,
+        evaluatedAt: new Date().toISOString(),
+    };
+}
+//# sourceMappingURL=SemanticProbe.js.map

package/dist/introspection/SemanticProbe.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"SemanticProbe.js","sourceRoot":"","sources":["../../src/introspection/SemanticProbe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAuIH,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E;;;;;;;;;;GAUG;AACH,MAAM,UAAU,WAAW,CACvB,QAAgB,EAChB,SAAiB,EACjB,KAA8B,EAC9B,cAAuB,EACvB,YAAqB,EACrB,eAAqC;IAErC,MAAM,EAAE,GAAG,GAAG,QAAQ,KAAK,SAAS,KAAK,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IAEtD,OAAO;QACH,EAAE;QACF,QAAQ;QACR,SAAS;QACT,WAAW,EAAE,sBAAsB,QAAQ,IAAI,SAAS,EAAE;QAC1D,KAAK;QACL,cAAc;QACd,YAAY;QACZ,eAAe;KAClB,CAAC;AACN,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAoB;IACjD,OAAO;;;;;;;cAOG,KAAK,CAAC,QAAQ;gBACZ,KAAK,CAAC,SAAS;qBACV,KAAK,CAAC,eAAe,CAAC,WAAW,IAAI,gBAAgB;mBACvD,KAAK,CAAC,eAAe,CAAC,QAAQ;qBAC5B,KAAK,CAAC,eAAe,CAAC,WAAW;;;EAGpD,KAAK,CAAC,eAAe,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;QACtC,CAAC,CAAC,qBAAqB,KAAK,CAAC,eAAe,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;QACrG,CAAC,CAAC,2BAA2B;;;EAGnC,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,oBAAoB;;;;EAInE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;EAKpC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;EAK7C,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;;;;;;;;;;;;;;OAkBtC,CAAC;AACR,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,kBAAkB,CAC9B,KAAoB,EACpB,WAAmB,EACnB,MAA2B;IAE3B,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAE7C,IAAI,CAAC;QACD,2DAA2D;QAC3D,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACnD,IAAI,CAAC,SAAS,EAAE,CAAC;YACb,OAAO,cAAc,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAKrC,CAAC;QAEF,MAAM,eAAe,GAAG,OAAO,MAAM,CAAC,eAAe,KAAK,QAAQ;YAC9D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,eAAe,CAAC,CAAC;YAClD,CAAC,CAAC,GAAG,CAAC;QAEV,MAAM,UAAU,GAAG,aAAa,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAE9D,OAAO;YACH,KAAK;YACL,eAAe;YACf,UAAU;YACV,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,KAAK;YAClD,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,EAAE;YACnC,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,uBAAuB;YACtD,WAAW,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;YAC5D,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACxC,CAAC;IACN,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,cAAc,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IACtD,CAAC;AACL,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAC/B,KAAoB,EACpB,MAA2B;IAE3B,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;IACvC,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC1D,OAAO,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;AAC1D,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAChC,MAAgC,EAChC,MAA2B;IAE3B,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;IAE5C,sCAAsC;IACtC,MAAM,OAAO,GAA0B,EAAE,CAAC;IAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAClC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CACnD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IAClC,CAAC;IAED,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,QAAQ,IAAI,SAAS,EAAE,OAAO,CAAC,CAAC;AACvE,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAC5B,QAAgB,EAChB,OAAuC;IAEvC,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC;IACtE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACpC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;QACzE,CAAC,CAAC,GAAG,CAAC;IAEV,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACnC,CAAC,CAAC,aAAa,CAAC,aAAa,EAAE;YAC3B,kBAAkB,EAAE,GAAG;YACvB,oBAAoB,EAAE,IAAI;SAC7B,CAAC;QACF,CAAC,CAAC,MAAoB,CAAC;IAE3B,MAAM,MAAM,GAAG,YAAY,KAAK,MAAM,IAAI,YAAY,KAAK,KAAK,CAAC;IAEjE,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC;QAChC,CAAC,CAAC,sBAAsB;QACxB,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,sCAAsC,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAC9F,UAAU,YAAY,iBAAiB,cAAc,IAAI;YACzD,WAAW,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC;IAEhD,OAAO;QACH,QAAQ;QACR,OAAO;QACP,YAAY;QACZ,cAAc;QACd,MAAM;QACN,OAAO;QACP,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACxC,CAAC;AACN,CAAC;AAED,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,kBAAkB,GAAuB;IAC3C,kBAAkB,EAAE,GAAG;IACvB,oBAAoB,EAAE,IAAI;CAC7B,CAAC;AAEF,SAAS,iBAAiB,CAAC,MAA2B;IAClD,OAAO;QACH,kBAAkB,EAAE,MAAM,CAAC,UAAU,EAAE,kBAAkB,IAAI,kBAAkB,CAAC,kBAAkB;QAClG,oBAAoB,EAAE,MAAM,CAAC,UAAU,EAAE,oBAAoB,IAAI,kBAAkB,CAAC,oBAAoB;KAC3G,CAAC;AACN,CAAC;AAED,SAAS,aAAa,CAAC,UAAkB,EAAE,UAA8B;IACrE,IAAI,UAAU,IAAI,IAAI;QAAE,OAAO,MAAM,CAAC;IACtC,IAAI,UAAU,IAAI,UAAU,CAAC,oBAAoB;QAAE,OAAO,KAAK,CAAC;IAChE,IAAI,UAAU,IAAI,UAAU,CAAC,kBAAkB;QAAE,OAAO,QAAQ,CAAC;IACjE,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,SAAS,cAAc,CACnB,KAAoB,EACpB,WAAmB,EACnB,MAA2B;IAE3B,OAAO;QACH,KAAK;QACL,eAAe,EAAE,GAAG;QACpB,UAAU,EAAE,QAAQ;QACpB,gBAAgB,EAAE,KAAK;QACvB,UAAU,EAAE,CAAC,oCAAoC,CAAC;QAClD,SAAS,EAAE,oDAAoD;QAC/D,WAAW,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;QAC5D,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACxC,CAAC;AACN,CAAC"}

package/dist/introspection/TokenEconomics.d.ts ADDED Viewed

@@ -0,0 +1,210 @@
+/**
+ * TokenEconomics — Cognitive Overload Detection
+ *
+ * **Evolution 3: Token Economics**
+ *
+ * Profiles the token density and context spread of MCP tool
+ * responses to detect cognitive overload scenarios where the
+ * LLM's working memory is flooded by verbose tool output,
+ * evicting system rules and degrading reasoning quality.
+ *
+ * **Key insight**: An MCP tool that returns 50KB of JSON per
+ * call will rapidly exhaust the context window. If that output
+ * isn't constrained by `agentLimit` or `egressMaxBytes`, the
+ * system rules injected by the Presenter's `addRules()` will
+ * be pushed out of the LLM's attention window — silently
+ * degrading behavioral correctness.
+ *
+ * This module provides:
+ *
+ * 1. **Static analysis**: Estimate token density from Presenter
+ *    schema and guardrail configuration (zero-cost at runtime).
+ *
+ * 2. **Runtime profiling**: Measure actual token counts of
+ *    response blocks after Presenter rendering (opt-in).
+ *
+ * 3. **Overload classification**: Classify responses into
+ *    risk levels based on configurable thresholds.
+ *
+ * 4. **Integration with BehaviorDigest**: Token economics risk
+ *    level is part of the behavioral contract — changes to
+ *    the token profile are tracked as contract deltas.
+ *
+ * Pure-function module for analysis; runtime profiling hooks
+ * are designed for zero-overhead when not configured.
+ *
+ * @module
+ */
+/**
+ * Token economics analysis for a single tool response.
+ */
+export interface TokenAnalysis {
+    /** Tool name */
+    readonly toolName: string;
+    /** Action key, if applicable */
+    readonly actionKey: string | null;
+    /** Estimated total tokens in the response */
+    readonly estimatedTokens: number;
+    /** Number of content blocks in the response */
+    readonly blockCount: number;
+    /** Per-block token breakdown */
+    readonly blocks: readonly BlockTokenProfile[];
+    /** Overhead tokens (rules, affordances, UI decorators) */
+    readonly overheadTokens: number;
+    /** Data payload tokens (actual tool output) */
+    readonly dataTokens: number;
+    /** Overhead-to-data ratio (higher = more overhead) */
+    readonly overheadRatio: number;
+    /** Risk classification */
+    readonly risk: TokenRisk;
+    /** Human-readable advisory */
+    readonly advisory: string | null;
+}
+/**
+ * Token profile for a single content block.
+ */
+export interface BlockTokenProfile {
+    /** Block type (e.g., 'text', 'resource', 'image') */
+    readonly type: string;
+    /** Estimated tokens in this block */
+    readonly estimatedTokens: number;
+    /** Raw byte size */
+    readonly bytes: number;
+}
+/**
+ * Token risk classification.
+ */
+export type TokenRisk = 'low' | 'medium' | 'high' | 'critical';
+/**
+ * Thresholds for token risk classification.
+ */
+export interface TokenThresholds {
+    /** Maximum tokens for 'low' risk (default: 1000) */
+    readonly low: number;
+    /** Maximum tokens for 'medium' risk (default: 4000) */
+    readonly medium: number;
+    /** Maximum tokens for 'high' risk (default: 8000) */
+    readonly high: number;
+}
+/**
+ * Configuration for the token economics profiler.
+ */
+export interface TokenEconomicsConfig {
+    /** Custom thresholds (defaults provided) */
+    readonly thresholds?: Partial<TokenThresholds>;
+    /** Whether to emit warnings to debug observer */
+    readonly emitWarnings?: boolean;
+    /** Maximum acceptable overhead ratio (default: 0.3) */
+    readonly maxOverheadRatio?: number;
+}
+/**
+ * Static token profile derived from Presenter configuration.
+ * Computed once at build time — zero runtime cost.
+ */
+export interface StaticTokenProfile {
+    /** Tool name */
+    readonly toolName: string;
+    /** Estimated minimum tokens per response */
+    readonly minTokens: number;
+    /** Estimated maximum tokens per response (with agentLimit) */
+    readonly maxTokens: number;
+    /** Whether the maximum is bounded (agentLimit/egressMaxBytes set) */
+    readonly bounded: boolean;
+    /** Per-field estimated token cost */
+    readonly fieldBreakdown: readonly FieldTokenEstimate[];
+    /** Risk classification based on max estimate */
+    readonly risk: TokenRisk;
+    /** Recommendations for reducing token cost */
+    readonly recommendations: readonly string[];
+}
+/**
+ * Per-field token estimate.
+ */
+export interface FieldTokenEstimate {
+    /** Field name */
+    readonly name: string;
+    /** Estimated tokens per occurrence */
+    readonly estimatedTokens: number;
+    /** Whether this field is a collection/array type */
+    readonly isCollection: boolean;
+}
+/**
+ * Estimate token count from a string using the ~4 chars/token heuristic.
+ *
+ * This is a fast approximation suitable for profiling. For precise
+ * token counting, use a tokenizer library (tiktoken, etc.).
+ *
+ * @param text - The text to estimate tokens for
+ * @returns Estimated token count
+ */
+export declare function estimateTokens(text: string): number;
+/**
+ * Estimate tokens for a content block structure (MCP response block).
+ *
+ * @param block - The content block with `type` and `text` fields
+ * @returns Block-level token profile
+ */
+export declare function profileBlock(block: {
+    type: string;
+    text?: string;
+}): BlockTokenProfile;
+/**
+ * Profile a complete tool response for token economics.
+ *
+ * Analyzes all content blocks in a tool response to compute
+ * total token usage, overhead ratio, and risk classification.
+ *
+ * @param toolName - Name of the tool that produced the response
+ * @param actionKey - Action key (if applicable)
+ * @param blocks - Content blocks from the tool response
+ * @param overheadBlocks - Number of blocks that are overhead (rules, UI)
+ * @param config - Token economics configuration
+ * @returns Complete token analysis
+ */
+export declare function profileResponse(toolName: string, actionKey: string | null, blocks: readonly {
+    type: string;
+    text?: string;
+}[], overheadBlocks?: number, config?: TokenEconomicsConfig): TokenAnalysis;
+/**
+ * Compute a static token profile from Presenter metadata.
+ *
+ * This runs once at manifest compilation time and produces
+ * a zero-cost profile that estimates the worst-case token
+ * usage for a tool based on its schema and guardrail config.
+ *
+ * @param toolName - Tool name
+ * @param schemaKeys - Presenter schema field names
+ * @param agentLimitMax - Maximum items from agentLimit() config
+ * @param egressMaxBytes - Maximum bytes from egressMaxBytes() config
+ * @returns Static token profile with recommendations
+ */
+export declare function computeStaticProfile(toolName: string, schemaKeys: readonly string[], agentLimitMax: number | null, egressMaxBytes: number | null): StaticTokenProfile;
+/**
+ * Aggregate static profiles into a server-level summary.
+ *
+ * @param profiles - All static profiles for the server
+ * @returns Server-level token economics summary
+ */
+export declare function aggregateProfiles(profiles: readonly StaticTokenProfile[]): ServerTokenSummary;
+/**
+ * Server-level token economics summary.
+ */
+export interface ServerTokenSummary {
+    /** Total number of tools */
+    readonly toolCount: number;
+    /** Sum of all tools' minimum token estimates */
+    readonly totalMinTokens: number;
+    /** Sum of all tools' maximum token estimates */
+    readonly totalMaxTokens: number;
+    /** Number of tools without bounded output */
+    readonly unboundedToolCount: number;
+    /** Names of unbounded tools */
+    readonly unboundedToolNames: readonly string[];
+    /** Overall risk classification */
+    readonly overallRisk: TokenRisk;
+    /** Names of critical-risk tools */
+    readonly criticalToolNames: readonly string[];
+    /** Aggregated recommendations */
+    readonly recommendations: readonly string[];
+}
+//# sourceMappingURL=TokenEconomics.d.ts.map