@vinkius-core/mcp-fusion 2.7.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/fusion.d.ts +101 -0
- package/dist/cli/fusion.d.ts.map +1 -0
- package/dist/cli/fusion.js +333 -0
- package/dist/cli/fusion.js.map +1 -0
- package/dist/index.d.ts +41 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -1
- package/dist/introspection/BehaviorDigest.d.ts +112 -0
- package/dist/introspection/BehaviorDigest.d.ts.map +1 -0
- package/dist/introspection/BehaviorDigest.js +147 -0
- package/dist/introspection/BehaviorDigest.js.map +1 -0
- package/dist/introspection/CapabilityLockfile.d.ts +261 -0
- package/dist/introspection/CapabilityLockfile.d.ts.map +1 -0
- package/dist/introspection/CapabilityLockfile.js +392 -0
- package/dist/introspection/CapabilityLockfile.js.map +1 -0
- package/dist/introspection/ContractAwareSelfHealing.d.ts +90 -0
- package/dist/introspection/ContractAwareSelfHealing.d.ts.map +1 -0
- package/dist/introspection/ContractAwareSelfHealing.js +132 -0
- package/dist/introspection/ContractAwareSelfHealing.js.map +1 -0
- package/dist/introspection/ContractDiff.d.ts +91 -0
- package/dist/introspection/ContractDiff.d.ts.map +1 -0
- package/dist/introspection/ContractDiff.js +466 -0
- package/dist/introspection/ContractDiff.js.map +1 -0
- package/dist/introspection/CryptoAttestation.d.ts +143 -0
- package/dist/introspection/CryptoAttestation.d.ts.map +1 -0
- package/dist/introspection/CryptoAttestation.js +194 -0
- package/dist/introspection/CryptoAttestation.js.map +1 -0
- package/dist/introspection/EntitlementScanner.d.ts +177 -0
- package/dist/introspection/EntitlementScanner.d.ts.map +1 -0
- package/dist/introspection/EntitlementScanner.js +459 -0
- package/dist/introspection/EntitlementScanner.js.map +1 -0
- package/dist/introspection/GovernanceObserver.d.ts +88 -0
- package/dist/introspection/GovernanceObserver.d.ts.map +1 -0
- package/dist/introspection/GovernanceObserver.js +132 -0
- package/dist/introspection/GovernanceObserver.js.map +1 -0
- package/dist/introspection/SemanticProbe.d.ts +207 -0
- package/dist/introspection/SemanticProbe.d.ts.map +1 -0
- package/dist/introspection/SemanticProbe.js +255 -0
- package/dist/introspection/SemanticProbe.js.map +1 -0
- package/dist/introspection/TokenEconomics.d.ts +210 -0
- package/dist/introspection/TokenEconomics.d.ts.map +1 -0
- package/dist/introspection/TokenEconomics.js +286 -0
- package/dist/introspection/TokenEconomics.js.map +1 -0
- package/dist/introspection/ToolContract.d.ts +161 -0
- package/dist/introspection/ToolContract.d.ts.map +1 -0
- package/dist/introspection/ToolContract.js +192 -0
- package/dist/introspection/ToolContract.js.map +1 -0
- package/dist/introspection/canonicalize.d.ts +20 -0
- package/dist/introspection/canonicalize.d.ts.map +1 -0
- package/dist/introspection/canonicalize.js +51 -0
- package/dist/introspection/canonicalize.js.map +1 -0
- package/dist/introspection/index.d.ts +20 -0
- package/dist/introspection/index.d.ts.map +1 -1
- package/dist/introspection/index.js +20 -0
- package/dist/introspection/index.js.map +1 -1
- package/dist/observability/DebugObserver.d.ts +26 -1
- package/dist/observability/DebugObserver.d.ts.map +1 -1
- package/dist/observability/DebugObserver.js +8 -1
- package/dist/observability/DebugObserver.js.map +1 -1
- package/dist/observability/index.d.ts +1 -1
- package/dist/observability/index.d.ts.map +1 -1
- package/dist/observability/index.js.map +1 -1
- package/dist/server/ServerAttachment.d.ts +41 -0
- package/dist/server/ServerAttachment.d.ts.map +1 -1
- package/dist/server/ServerAttachment.js +25 -1
- package/dist/server/ServerAttachment.js.map +1 -1
- package/package.json +8 -1
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { SpanStatusCode } from '../observability/Tracing.js';
|
|
2
|
+
/**
|
|
3
|
+
* Create a governance observer that emits debug events and/or tracing
|
|
4
|
+
* spans for governance operations.
|
|
5
|
+
*
|
|
6
|
+
* @param config - Observer configuration (debug handler and/or tracer)
|
|
7
|
+
* @returns A `GovernanceObserver` instance
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createGovernanceObserver } from '@vinkius-core/mcp-fusion/introspection';
|
|
12
|
+
* import { createDebugObserver } from '@vinkius-core/mcp-fusion';
|
|
13
|
+
*
|
|
14
|
+
* const observer = createGovernanceObserver({
|
|
15
|
+
* debug: createDebugObserver(),
|
|
16
|
+
* });
|
|
17
|
+
*
|
|
18
|
+
* const contracts = observer.observe(
|
|
19
|
+
* 'contract.compile',
|
|
20
|
+
* 'Compiling 5 tool contracts',
|
|
21
|
+
* () => compileContracts(builders),
|
|
22
|
+
* );
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export function createGovernanceObserver(config) {
|
|
26
|
+
const { debug, tracer } = config;
|
|
27
|
+
function observe(operation, label, fn) {
|
|
28
|
+
const start = Date.now();
|
|
29
|
+
const span = tracer?.startSpan(`mcp.governance.${operation}`, {
|
|
30
|
+
attributes: {
|
|
31
|
+
'mcp.governance.operation': operation,
|
|
32
|
+
'mcp.governance.label': label,
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
try {
|
|
36
|
+
const result = fn();
|
|
37
|
+
const durationMs = Date.now() - start;
|
|
38
|
+
span?.setAttribute('mcp.governance.outcome', 'success');
|
|
39
|
+
span?.setAttribute('mcp.durationMs', durationMs);
|
|
40
|
+
span?.setStatus({ code: SpanStatusCode.OK });
|
|
41
|
+
debug?.({
|
|
42
|
+
type: 'governance',
|
|
43
|
+
operation,
|
|
44
|
+
label,
|
|
45
|
+
outcome: 'success',
|
|
46
|
+
durationMs,
|
|
47
|
+
timestamp: Date.now(),
|
|
48
|
+
});
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
catch (err) {
|
|
52
|
+
const durationMs = Date.now() - start;
|
|
53
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
54
|
+
span?.setAttribute('mcp.governance.outcome', 'failure');
|
|
55
|
+
span?.setAttribute('mcp.durationMs', durationMs);
|
|
56
|
+
span?.setStatus({ code: SpanStatusCode.ERROR, message });
|
|
57
|
+
span?.recordException(err instanceof Error ? err : new Error(message));
|
|
58
|
+
debug?.({
|
|
59
|
+
type: 'governance',
|
|
60
|
+
operation,
|
|
61
|
+
label,
|
|
62
|
+
outcome: 'failure',
|
|
63
|
+
detail: message,
|
|
64
|
+
durationMs,
|
|
65
|
+
timestamp: Date.now(),
|
|
66
|
+
});
|
|
67
|
+
throw err;
|
|
68
|
+
}
|
|
69
|
+
finally {
|
|
70
|
+
span?.end();
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
async function observeAsync(operation, label, fn) {
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
const span = tracer?.startSpan(`mcp.governance.${operation}`, {
|
|
76
|
+
attributes: {
|
|
77
|
+
'mcp.governance.operation': operation,
|
|
78
|
+
'mcp.governance.label': label,
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
try {
|
|
82
|
+
const result = await fn();
|
|
83
|
+
const durationMs = Date.now() - start;
|
|
84
|
+
span?.setAttribute('mcp.governance.outcome', 'success');
|
|
85
|
+
span?.setAttribute('mcp.durationMs', durationMs);
|
|
86
|
+
span?.setStatus({ code: SpanStatusCode.OK });
|
|
87
|
+
debug?.({
|
|
88
|
+
type: 'governance',
|
|
89
|
+
operation,
|
|
90
|
+
label,
|
|
91
|
+
outcome: 'success',
|
|
92
|
+
durationMs,
|
|
93
|
+
timestamp: Date.now(),
|
|
94
|
+
});
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
catch (err) {
|
|
98
|
+
const durationMs = Date.now() - start;
|
|
99
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
100
|
+
span?.setAttribute('mcp.governance.outcome', 'failure');
|
|
101
|
+
span?.setAttribute('mcp.durationMs', durationMs);
|
|
102
|
+
span?.setStatus({ code: SpanStatusCode.ERROR, message });
|
|
103
|
+
span?.recordException(err instanceof Error ? err : new Error(message));
|
|
104
|
+
debug?.({
|
|
105
|
+
type: 'governance',
|
|
106
|
+
operation,
|
|
107
|
+
label,
|
|
108
|
+
outcome: 'failure',
|
|
109
|
+
detail: message,
|
|
110
|
+
durationMs,
|
|
111
|
+
timestamp: Date.now(),
|
|
112
|
+
});
|
|
113
|
+
throw err;
|
|
114
|
+
}
|
|
115
|
+
finally {
|
|
116
|
+
span?.end();
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return { observe, observeAsync };
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Create a no-op governance observer.
|
|
123
|
+
*
|
|
124
|
+
* Used when observability is not configured. Zero overhead.
|
|
125
|
+
*/
|
|
126
|
+
export function createNoopObserver() {
|
|
127
|
+
return {
|
|
128
|
+
observe: (_op, _label, fn) => fn(),
|
|
129
|
+
observeAsync: (_op, _label, fn) => fn(),
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
//# sourceMappingURL=GovernanceObserver.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GovernanceObserver.js","sourceRoot":"","sources":["../../src/introspection/GovernanceObserver.ts"],"names":[],"mappings":"AAmBA,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAkE7D;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,wBAAwB,CAAC,MAAgC;IACrE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAEjC,SAAS,OAAO,CACZ,SAA8B,EAC9B,KAAa,EACb,EAAW;QAEX,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,MAAM,EAAE,SAAS,CAAC,kBAAkB,SAAS,EAAE,EAAE;YAC1D,UAAU,EAAE;gBACR,0BAA0B,EAAE,SAAS;gBACrC,sBAAsB,EAAE,KAAK;aAChC;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,EAAE,EAAE,CAAC;YACpB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEtC,IAAI,EAAE,YAAY,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;YACxD,IAAI,EAAE,YAAY,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;YACjD,IAAI,EAAE,SAAS,CAAC,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;YAE7C,KAAK,EAAE,CAAC;gBACJ,IAAI,EAAE,YAAY;gBAClB,SAAS;gBACT,KAAK;gBACL,OAAO,EAAE,SAAS;gBAClB,UAAU;gBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACxB,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAClB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACtC,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAEjE,IAAI,EAAE,YAAY,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;YACxD,IAAI,EAAE,YAAY,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;YACjD,IAAI,EAAE,SAAS,CAAC,EAAE,IAAI,EAAE,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YACzD,IAAI,EAAE,eAAe,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YAEvE,KAAK,EAAE,CAAC;gBACJ,IAAI,EAAE,YAAY;gBAClB,SAAS;gBACT,KAAK;gBACL,OAAO,EAAE,SAAS;gBAClB,MAAM,EAAE,OAAO;gBACf,UAAU;gBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACxB,CAAC,CAAC;YAEH,MAAM,GAAG,CAAC;QACd,CAAC;gBAAS,CAAC;YACP,IAAI,EAAE,GAAG,EAAE,CAAC;QAChB,CAAC;IACL,CAAC;IAED,KAAK,UAAU,YAAY,CACvB,SAA8B,EAC9B,KAAa,EACb,EAAoB;QAEpB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,MAAM,EAAE,SAAS,CAAC,kBAAkB,SAAS,EAAE,EAAE;YAC1D,UAAU,EAAE;gBACR,0BAA0B,EAAE,SAAS;gBACrC,sBAAsB,EAAE,KAAK;aAChC;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,EAAE,EAAE,CAAC;YAC1B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YAEtC,IAAI,EAAE,YAAY,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;YACxD,IAAI,EAAE,YAAY,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;YACjD,IAAI,EAAE,SAAS,CAAC,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,EAAE,CAAC,CAAC;YAE7C,KAAK,EAAE,CAAC;gBACJ,IAAI,EAAE,YAAY;gBAClB,SAAS;gBACT,KAAK;gBACL,OAAO,EAAE,SAAS;gBAClB,UAAU;gBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACxB,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAClB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACtC,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAEjE,IAAI,EAAE,YAAY,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;YACxD,IAAI,EAAE,YAAY,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;YACjD,IAAI,EAAE,SAAS,CAAC,EAAE,IAAI,EAAE,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YACzD,IAAI,EAAE,eAAe,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YAEvE,KAAK,EAAE,CAAC;gBACJ,IAAI,EAAE,YAAY;gBAClB,SAAS;gBACT,KAAK;gBACL,OAAO,EAAE,SAAS;gBAClB,MAAM,EAAE,OAAO;gBACf,UAAU;gBACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACxB,CAAC,CAAC;YAEH,MAAM,GAAG,CAAC;QACd,CAAC;gBAAS,CAAC;YACP,IAAI,EAAE,GAAG,EAAE,CAAC;QAChB,CAAC;IACL,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB;IAC9B,OAAO;QACH,OAAO,EAAE,CAAI,GAAwB,EAAE,MAAc,EAAE,EAAW,EAAK,EAAE,CAAC,EAAE,EAAE;QAC9E,YAAY,EAAE,CAAI,GAAwB,EAAE,MAAc,EAAE,EAAoB,EAAc,EAAE,CAAC,EAAE,EAAE;KACxG,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SemanticProbe — LLM-as-a-Judge for Opaque Behavior Detection
|
|
3
|
+
*
|
|
4
|
+
* **Evolution 2: Semantic Probing**
|
|
5
|
+
*
|
|
6
|
+
* Provides a framework for using an LLM to evaluate whether
|
|
7
|
+
* a tool handler's actual runtime behavior matches its declared
|
|
8
|
+
* behavioral contract. This detects "semantic drift" — situations
|
|
9
|
+
* where the handler's output changes meaning even when the
|
|
10
|
+
* egress schema and system rules remain structurally identical.
|
|
11
|
+
*
|
|
12
|
+
* **Architecture**: This module defines the probe protocol,
|
|
13
|
+
* types, and evaluation pipeline. The actual LLM invocation
|
|
14
|
+
* is delegated to user-provided adapters — the module never
|
|
15
|
+
* makes LLM calls directly, maintaining the "no hidden
|
|
16
|
+
* network dependencies" principle.
|
|
17
|
+
*
|
|
18
|
+
* **Testing integration**: Designed to be integrated with
|
|
19
|
+
* `FusionTester.callAction()` for automated regression
|
|
20
|
+
* testing: "given these inputs, does the output semantically
|
|
21
|
+
* match the previous known-good output?"
|
|
22
|
+
*
|
|
23
|
+
* Pure-function module for probe construction and evaluation;
|
|
24
|
+
* LLM interaction is async via pluggable adapters.
|
|
25
|
+
*
|
|
26
|
+
* @module
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Configuration for semantic probing.
|
|
30
|
+
*/
|
|
31
|
+
export interface SemanticProbeConfig {
|
|
32
|
+
/** The LLM adapter to use for evaluation */
|
|
33
|
+
readonly adapter: SemanticProbeAdapter;
|
|
34
|
+
/** Risk thresholds for classification */
|
|
35
|
+
readonly thresholds?: Partial<SemanticThresholds>;
|
|
36
|
+
/** Maximum number of probes to run in parallel */
|
|
37
|
+
readonly concurrency?: number;
|
|
38
|
+
/** Whether to include raw LLM responses in results */
|
|
39
|
+
readonly includeRawResponses?: boolean;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Pluggable LLM adapter for semantic evaluation.
|
|
43
|
+
*
|
|
44
|
+
* Implementations should call an LLM with the provided prompt
|
|
45
|
+
* and return the structured evaluation result.
|
|
46
|
+
*/
|
|
47
|
+
export interface SemanticProbeAdapter {
|
|
48
|
+
/** Human-readable name (e.g., 'claude-3.5', 'gpt-4o') */
|
|
49
|
+
readonly name: string;
|
|
50
|
+
/**
|
|
51
|
+
* Send a semantic evaluation prompt to the LLM.
|
|
52
|
+
*
|
|
53
|
+
* @param prompt - Complete evaluation prompt
|
|
54
|
+
* @returns Raw LLM response text
|
|
55
|
+
*/
|
|
56
|
+
evaluate(prompt: string): Promise<string>;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Thresholds for semantic drift classification.
|
|
60
|
+
*/
|
|
61
|
+
export interface SemanticThresholds {
|
|
62
|
+
/** Score below which drift is considered 'high' (default: 0.5) */
|
|
63
|
+
readonly highDriftThreshold: number;
|
|
64
|
+
/** Score below which drift is considered 'medium' (default: 0.75) */
|
|
65
|
+
readonly mediumDriftThreshold: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* A semantic probe definition — a structured test case
|
|
69
|
+
* for LLM-based behavioral evaluation.
|
|
70
|
+
*/
|
|
71
|
+
export interface SemanticProbe {
|
|
72
|
+
/** Unique identifier for this probe */
|
|
73
|
+
readonly id: string;
|
|
74
|
+
/** Tool name being probed */
|
|
75
|
+
readonly toolName: string;
|
|
76
|
+
/** Action key being probed */
|
|
77
|
+
readonly actionKey: string;
|
|
78
|
+
/** Description of what this probe tests */
|
|
79
|
+
readonly description: string;
|
|
80
|
+
/** Input arguments to the tool */
|
|
81
|
+
readonly input: Record<string, unknown>;
|
|
82
|
+
/** Expected output (known-good baseline) */
|
|
83
|
+
readonly expectedOutput: unknown;
|
|
84
|
+
/** Actual output from the current handler */
|
|
85
|
+
readonly actualOutput: unknown;
|
|
86
|
+
/** Behavioral contract context for the judge */
|
|
87
|
+
readonly contractContext: ProbeContractContext;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Contract context injected into the LLM judge prompt.
|
|
91
|
+
*
|
|
92
|
+
* Provides the judge with enough information to evaluate
|
|
93
|
+
* whether the behavioral contract was violated.
|
|
94
|
+
*/
|
|
95
|
+
export interface ProbeContractContext {
|
|
96
|
+
/** Tool description */
|
|
97
|
+
readonly description: string | undefined;
|
|
98
|
+
/** Whether the action is declared readOnly */
|
|
99
|
+
readonly readOnly: boolean;
|
|
100
|
+
/** Whether the action is declared destructive */
|
|
101
|
+
readonly destructive: boolean;
|
|
102
|
+
/** System rules that should be respected */
|
|
103
|
+
readonly systemRules: readonly string[];
|
|
104
|
+
/** Schema field names (expected output shape) */
|
|
105
|
+
readonly schemaKeys: readonly string[];
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Result of a single semantic probe evaluation.
|
|
109
|
+
*/
|
|
110
|
+
export interface SemanticProbeResult {
|
|
111
|
+
/** The probe that was evaluated */
|
|
112
|
+
readonly probe: SemanticProbe;
|
|
113
|
+
/** Semantic similarity score (0.0 = completely different, 1.0 = identical) */
|
|
114
|
+
readonly similarityScore: number;
|
|
115
|
+
/** Drift classification */
|
|
116
|
+
readonly driftLevel: DriftLevel;
|
|
117
|
+
/** Whether the behavioral contract was violated */
|
|
118
|
+
readonly contractViolated: boolean;
|
|
119
|
+
/** Specific violations detected by the judge */
|
|
120
|
+
readonly violations: readonly string[];
|
|
121
|
+
/** LLM judge's reasoning */
|
|
122
|
+
readonly reasoning: string;
|
|
123
|
+
/** Raw LLM response (if configured) */
|
|
124
|
+
readonly rawResponse: string | null;
|
|
125
|
+
/** ISO-8601 timestamp of evaluation */
|
|
126
|
+
readonly evaluatedAt: string;
|
|
127
|
+
}
|
|
128
|
+
/** Drift level classification */
|
|
129
|
+
export type DriftLevel = 'none' | 'low' | 'medium' | 'high';
|
|
130
|
+
/**
|
|
131
|
+
* Aggregated result of multiple semantic probes.
|
|
132
|
+
*/
|
|
133
|
+
export interface SemanticProbeReport {
|
|
134
|
+
/** Tool name */
|
|
135
|
+
readonly toolName: string;
|
|
136
|
+
/** All individual probe results */
|
|
137
|
+
readonly results: readonly SemanticProbeResult[];
|
|
138
|
+
/** Overall drift assessment */
|
|
139
|
+
readonly overallDrift: DriftLevel;
|
|
140
|
+
/** Number of contract violations */
|
|
141
|
+
readonly violationCount: number;
|
|
142
|
+
/** Whether the tool is considered semantically stable */
|
|
143
|
+
readonly stable: boolean;
|
|
144
|
+
/** Human-readable summary */
|
|
145
|
+
readonly summary: string;
|
|
146
|
+
/** ISO-8601 timestamp */
|
|
147
|
+
readonly completedAt: string;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Create a semantic probe from input/output pairs.
|
|
151
|
+
*
|
|
152
|
+
* @param toolName - Tool name
|
|
153
|
+
* @param actionKey - Action key
|
|
154
|
+
* @param input - Input arguments
|
|
155
|
+
* @param expectedOutput - Known-good baseline output
|
|
156
|
+
* @param actualOutput - Current handler output
|
|
157
|
+
* @param contractContext - Behavioral contract context
|
|
158
|
+
* @returns A structured semantic probe
|
|
159
|
+
*/
|
|
160
|
+
export declare function createProbe(toolName: string, actionKey: string, input: Record<string, unknown>, expectedOutput: unknown, actualOutput: unknown, contractContext: ProbeContractContext): SemanticProbe;
|
|
161
|
+
/**
|
|
162
|
+
* Build the evaluation prompt for the LLM judge.
|
|
163
|
+
*
|
|
164
|
+
* The prompt is structured to elicit a JSON-formatted response
|
|
165
|
+
* with specific fields for programmatic parsing.
|
|
166
|
+
*
|
|
167
|
+
* @param probe - The semantic probe to evaluate
|
|
168
|
+
* @returns Complete evaluation prompt
|
|
169
|
+
*/
|
|
170
|
+
export declare function buildJudgePrompt(probe: SemanticProbe): string;
|
|
171
|
+
/**
|
|
172
|
+
* Parse the LLM judge's response into a structured result.
|
|
173
|
+
*
|
|
174
|
+
* Handles malformed responses gracefully by falling back
|
|
175
|
+
* to conservative defaults.
|
|
176
|
+
*
|
|
177
|
+
* @param probe - The probe that was evaluated
|
|
178
|
+
* @param rawResponse - Raw LLM response text
|
|
179
|
+
* @param config - Probe configuration
|
|
180
|
+
* @returns Structured probe result
|
|
181
|
+
*/
|
|
182
|
+
export declare function parseJudgeResponse(probe: SemanticProbe, rawResponse: string, config: SemanticProbeConfig): SemanticProbeResult;
|
|
183
|
+
/**
|
|
184
|
+
* Run a complete semantic probe evaluation.
|
|
185
|
+
*
|
|
186
|
+
* @param probe - The probe to evaluate
|
|
187
|
+
* @param config - Probe configuration (includes LLM adapter)
|
|
188
|
+
* @returns Evaluation result
|
|
189
|
+
*/
|
|
190
|
+
export declare function evaluateProbe(probe: SemanticProbe, config: SemanticProbeConfig): Promise<SemanticProbeResult>;
|
|
191
|
+
/**
|
|
192
|
+
* Run multiple probes and aggregate results.
|
|
193
|
+
*
|
|
194
|
+
* @param probes - Array of probes to evaluate
|
|
195
|
+
* @param config - Probe configuration
|
|
196
|
+
* @returns Aggregated report
|
|
197
|
+
*/
|
|
198
|
+
export declare function evaluateProbes(probes: readonly SemanticProbe[], config: SemanticProbeConfig): Promise<SemanticProbeReport>;
|
|
199
|
+
/**
|
|
200
|
+
* Aggregate individual probe results into a report.
|
|
201
|
+
*
|
|
202
|
+
* @param toolName - Tool name
|
|
203
|
+
* @param results - Individual probe results
|
|
204
|
+
* @returns Aggregated report
|
|
205
|
+
*/
|
|
206
|
+
export declare function aggregateResults(toolName: string, results: readonly SemanticProbeResult[]): SemanticProbeReport;
|
|
207
|
+
//# sourceMappingURL=SemanticProbe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SemanticProbe.d.ts","sourceRoot":"","sources":["../../src/introspection/SemanticProbe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,4CAA4C;IAC5C,QAAQ,CAAC,OAAO,EAAE,oBAAoB,CAAC;IACvC,yCAAyC;IACzC,QAAQ,CAAC,UAAU,CAAC,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAClD,kDAAkD;IAClD,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,sDAAsD;IACtD,QAAQ,CAAC,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC1C;AAED;;;;;GAKG;AACH,MAAM,WAAW,oBAAoB;IACjC,yDAAyD;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAC/B,kEAAkE;IAClE,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,qEAAqE;IACrE,QAAQ,CAAC,oBAAoB,EAAE,MAAM,CAAC;CACzC;AAED;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC1B,uCAAuC;IACvC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,6BAA6B;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,8BAA8B;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,2CAA2C;IAC3C,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,kCAAkC;IAClC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACxC,4CAA4C;IAC5C,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,6CAA6C;IAC7C,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC;IAC/B,gDAAgD;IAChD,QAAQ,CAAC,eAAe,EAAE,oBAAoB,CAAC;CAClD;AAED;;;;;GAKG;AACH,MAAM,WAAW,oBAAoB;IACjC,uBAAuB;IACvB,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,8CAA8C;IAC9C,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,iDAAiD;IACjD,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;IAC9B,4CAA4C;IAC5C,QAAQ,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,iDAAiD;IACjD,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,mCAAmC;IACnC,QAAQ,CAAC,KAAK,EAAE,aAAa,CAAC;IAC9B,8EAA8E;IAC9E,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,2BAA2B;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;IAChC,mDAAmD;IACnD,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACnC,gDAAgD;IAChD,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,4BAA4B;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,uCAAuC;IACvC,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,uCAAuC;IACvC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAED,iCAAiC;AACjC,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAE5D;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,gBAAgB;IAChB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,mCAAmC;IACnC,QAAQ,CAAC,OAAO,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACjD,+BAA+B;IAC/B,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC;IAClC,oCAAoC;IACpC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,yDAAyD;IACzD,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,6BAA6B;IAC7B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAMD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CACvB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC9B,cAAc,EAAE,OAAO,EACvB,YAAY,EAAE,OAAO,EACrB,eAAe,EAAE,oBAAoB,GACtC,aAAa,CAaf;AAED;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CAqD7D;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CAC9B,KAAK,EAAE,aAAa,EACpB,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,mBAAmB,GAC5B,mBAAmB,CAoCrB;AAED;;;;;;GAMG;AACH,wBAAsB,aAAa,CAC/B,KAAK,EAAE,aAAa,EACpB,MAAM,EAAE,mBAAmB,GAC5B,OAAO,CAAC,mBAAmB,CAAC,CAI9B;AAED;;;;;;GAMG;AACH,wBAAsB,cAAc,CAChC,MAAM,EAAE,SAAS,aAAa,EAAE,EAChC,MAAM,EAAE,mBAAmB,GAC5B,OAAO,CAAC,mBAAmB,CAAC,CAc9B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,SAAS,mBAAmB,EAAE,GACxC,mBAAmB,CA8BrB"}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SemanticProbe — LLM-as-a-Judge for Opaque Behavior Detection
|
|
3
|
+
*
|
|
4
|
+
* **Evolution 2: Semantic Probing**
|
|
5
|
+
*
|
|
6
|
+
* Provides a framework for using an LLM to evaluate whether
|
|
7
|
+
* a tool handler's actual runtime behavior matches its declared
|
|
8
|
+
* behavioral contract. This detects "semantic drift" — situations
|
|
9
|
+
* where the handler's output changes meaning even when the
|
|
10
|
+
* egress schema and system rules remain structurally identical.
|
|
11
|
+
*
|
|
12
|
+
* **Architecture**: This module defines the probe protocol,
|
|
13
|
+
* types, and evaluation pipeline. The actual LLM invocation
|
|
14
|
+
* is delegated to user-provided adapters — the module never
|
|
15
|
+
* makes LLM calls directly, maintaining the "no hidden
|
|
16
|
+
* network dependencies" principle.
|
|
17
|
+
*
|
|
18
|
+
* **Testing integration**: Designed to be integrated with
|
|
19
|
+
* `FusionTester.callAction()` for automated regression
|
|
20
|
+
* testing: "given these inputs, does the output semantically
|
|
21
|
+
* match the previous known-good output?"
|
|
22
|
+
*
|
|
23
|
+
* Pure-function module for probe construction and evaluation;
|
|
24
|
+
* LLM interaction is async via pluggable adapters.
|
|
25
|
+
*
|
|
26
|
+
* @module
|
|
27
|
+
*/
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Probe Construction
|
|
30
|
+
// ============================================================================
|
|
31
|
+
/**
|
|
32
|
+
* Create a semantic probe from input/output pairs.
|
|
33
|
+
*
|
|
34
|
+
* @param toolName - Tool name
|
|
35
|
+
* @param actionKey - Action key
|
|
36
|
+
* @param input - Input arguments
|
|
37
|
+
* @param expectedOutput - Known-good baseline output
|
|
38
|
+
* @param actualOutput - Current handler output
|
|
39
|
+
* @param contractContext - Behavioral contract context
|
|
40
|
+
* @returns A structured semantic probe
|
|
41
|
+
*/
|
|
42
|
+
export function createProbe(toolName, actionKey, input, expectedOutput, actualOutput, contractContext) {
|
|
43
|
+
const id = `${toolName}::${actionKey}::${Date.now()}`;
|
|
44
|
+
return {
|
|
45
|
+
id,
|
|
46
|
+
toolName,
|
|
47
|
+
actionKey,
|
|
48
|
+
description: `Semantic probe for ${toolName}.${actionKey}`,
|
|
49
|
+
input,
|
|
50
|
+
expectedOutput,
|
|
51
|
+
actualOutput,
|
|
52
|
+
contractContext,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Build the evaluation prompt for the LLM judge.
|
|
57
|
+
*
|
|
58
|
+
* The prompt is structured to elicit a JSON-formatted response
|
|
59
|
+
* with specific fields for programmatic parsing.
|
|
60
|
+
*
|
|
61
|
+
* @param probe - The semantic probe to evaluate
|
|
62
|
+
* @returns Complete evaluation prompt
|
|
63
|
+
*/
|
|
64
|
+
export function buildJudgePrompt(probe) {
|
|
65
|
+
return `You are a semantic evaluation judge for an MCP (Model Context Protocol) tool.
|
|
66
|
+
|
|
67
|
+
Your task is to compare two outputs from the same tool handler and determine:
|
|
68
|
+
1. Whether they are semantically equivalent
|
|
69
|
+
2. Whether the current output violates the tool's behavioral contract
|
|
70
|
+
|
|
71
|
+
## Tool Information
|
|
72
|
+
- **Tool**: ${probe.toolName}
|
|
73
|
+
- **Action**: ${probe.actionKey}
|
|
74
|
+
- **Description**: ${probe.contractContext.description ?? 'No description'}
|
|
75
|
+
- **Read-Only**: ${probe.contractContext.readOnly}
|
|
76
|
+
- **Destructive**: ${probe.contractContext.destructive}
|
|
77
|
+
|
|
78
|
+
## Behavioral Contract
|
|
79
|
+
${probe.contractContext.systemRules.length > 0
|
|
80
|
+
? `### System Rules\n${probe.contractContext.systemRules.map((r, i) => `${i + 1}. ${r}`).join('\n')}`
|
|
81
|
+
: 'No system rules declared.'}
|
|
82
|
+
|
|
83
|
+
### Expected Output Schema Fields
|
|
84
|
+
${probe.contractContext.schemaKeys.join(', ') || 'No schema declared'}
|
|
85
|
+
|
|
86
|
+
## Input Arguments
|
|
87
|
+
\`\`\`json
|
|
88
|
+
${JSON.stringify(probe.input, null, 2)}
|
|
89
|
+
\`\`\`
|
|
90
|
+
|
|
91
|
+
## Expected Output (Baseline)
|
|
92
|
+
\`\`\`json
|
|
93
|
+
${JSON.stringify(probe.expectedOutput, null, 2)}
|
|
94
|
+
\`\`\`
|
|
95
|
+
|
|
96
|
+
## Actual Output (Current)
|
|
97
|
+
\`\`\`json
|
|
98
|
+
${JSON.stringify(probe.actualOutput, null, 2)}
|
|
99
|
+
\`\`\`
|
|
100
|
+
|
|
101
|
+
## Evaluation Instructions
|
|
102
|
+
Compare the Expected Output with the Actual Output. Consider:
|
|
103
|
+
- Are the outputs semantically equivalent (same meaning, even if format differs)?
|
|
104
|
+
- Does the Actual Output violate any system rules?
|
|
105
|
+
- Does the Actual Output return fields not in the expected schema?
|
|
106
|
+
- Has the behavior meaningfully changed from the baseline?
|
|
107
|
+
|
|
108
|
+
Respond with ONLY a JSON object in this exact format:
|
|
109
|
+
\`\`\`json
|
|
110
|
+
{
|
|
111
|
+
"similarityScore": <number 0.0-1.0>,
|
|
112
|
+
"contractViolated": <boolean>,
|
|
113
|
+
"violations": [<string descriptions of violations>],
|
|
114
|
+
"reasoning": "<brief explanation of your assessment>"
|
|
115
|
+
}
|
|
116
|
+
\`\`\``;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Parse the LLM judge's response into a structured result.
|
|
120
|
+
*
|
|
121
|
+
* Handles malformed responses gracefully by falling back
|
|
122
|
+
* to conservative defaults.
|
|
123
|
+
*
|
|
124
|
+
* @param probe - The probe that was evaluated
|
|
125
|
+
* @param rawResponse - Raw LLM response text
|
|
126
|
+
* @param config - Probe configuration
|
|
127
|
+
* @returns Structured probe result
|
|
128
|
+
*/
|
|
129
|
+
export function parseJudgeResponse(probe, rawResponse, config) {
|
|
130
|
+
const thresholds = resolveThresholds(config);
|
|
131
|
+
try {
|
|
132
|
+
// Extract JSON from response (handle markdown code blocks)
|
|
133
|
+
const jsonMatch = rawResponse.match(/\{[\s\S]*\}/);
|
|
134
|
+
if (!jsonMatch) {
|
|
135
|
+
return fallbackResult(probe, rawResponse, config);
|
|
136
|
+
}
|
|
137
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
138
|
+
const similarityScore = typeof parsed.similarityScore === 'number'
|
|
139
|
+
? Math.max(0, Math.min(1, parsed.similarityScore))
|
|
140
|
+
: 0.5;
|
|
141
|
+
const driftLevel = classifyDrift(similarityScore, thresholds);
|
|
142
|
+
return {
|
|
143
|
+
probe,
|
|
144
|
+
similarityScore,
|
|
145
|
+
driftLevel,
|
|
146
|
+
contractViolated: parsed.contractViolated ?? false,
|
|
147
|
+
violations: parsed.violations ?? [],
|
|
148
|
+
reasoning: parsed.reasoning ?? 'No reasoning provided',
|
|
149
|
+
rawResponse: config.includeRawResponses ? rawResponse : null,
|
|
150
|
+
evaluatedAt: new Date().toISOString(),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return fallbackResult(probe, rawResponse, config);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Run a complete semantic probe evaluation.
|
|
159
|
+
*
|
|
160
|
+
* @param probe - The probe to evaluate
|
|
161
|
+
* @param config - Probe configuration (includes LLM adapter)
|
|
162
|
+
* @returns Evaluation result
|
|
163
|
+
*/
|
|
164
|
+
export async function evaluateProbe(probe, config) {
|
|
165
|
+
const prompt = buildJudgePrompt(probe);
|
|
166
|
+
const rawResponse = await config.adapter.evaluate(prompt);
|
|
167
|
+
return parseJudgeResponse(probe, rawResponse, config);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Run multiple probes and aggregate results.
|
|
171
|
+
*
|
|
172
|
+
* @param probes - Array of probes to evaluate
|
|
173
|
+
* @param config - Probe configuration
|
|
174
|
+
* @returns Aggregated report
|
|
175
|
+
*/
|
|
176
|
+
export async function evaluateProbes(probes, config) {
|
|
177
|
+
const concurrency = config.concurrency ?? 3;
|
|
178
|
+
// Run probes with concurrency control
|
|
179
|
+
const results = [];
|
|
180
|
+
for (let i = 0; i < probes.length; i += concurrency) {
|
|
181
|
+
const batch = probes.slice(i, i + concurrency);
|
|
182
|
+
const batchResults = await Promise.all(batch.map(probe => evaluateProbe(probe, config)));
|
|
183
|
+
results.push(...batchResults);
|
|
184
|
+
}
|
|
185
|
+
return aggregateResults(probes[0]?.toolName ?? 'unknown', results);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Aggregate individual probe results into a report.
|
|
189
|
+
*
|
|
190
|
+
* @param toolName - Tool name
|
|
191
|
+
* @param results - Individual probe results
|
|
192
|
+
* @returns Aggregated report
|
|
193
|
+
*/
|
|
194
|
+
export function aggregateResults(toolName, results) {
|
|
195
|
+
const violationCount = results.filter(r => r.contractViolated).length;
|
|
196
|
+
const avgSimilarity = results.length > 0
|
|
197
|
+
? results.reduce((sum, r) => sum + r.similarityScore, 0) / results.length
|
|
198
|
+
: 1.0;
|
|
199
|
+
const overallDrift = results.length > 0
|
|
200
|
+
? classifyDrift(avgSimilarity, {
|
|
201
|
+
highDriftThreshold: 0.5,
|
|
202
|
+
mediumDriftThreshold: 0.75,
|
|
203
|
+
})
|
|
204
|
+
: 'none';
|
|
205
|
+
const stable = overallDrift === 'none' || overallDrift === 'low';
|
|
206
|
+
const summary = results.length === 0
|
|
207
|
+
? 'No probes evaluated.'
|
|
208
|
+
: `${results.length} probes evaluated. Avg similarity: ${(avgSimilarity * 100).toFixed(1)}%. ` +
|
|
209
|
+
`Drift: ${overallDrift}. Violations: ${violationCount}. ` +
|
|
210
|
+
`Status: ${stable ? 'STABLE' : 'UNSTABLE'}`;
|
|
211
|
+
return {
|
|
212
|
+
toolName,
|
|
213
|
+
results,
|
|
214
|
+
overallDrift,
|
|
215
|
+
violationCount,
|
|
216
|
+
stable,
|
|
217
|
+
summary,
|
|
218
|
+
completedAt: new Date().toISOString(),
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
// ============================================================================
|
|
222
|
+
// Internals
|
|
223
|
+
// ============================================================================
|
|
224
|
+
const DEFAULT_THRESHOLDS = {
|
|
225
|
+
highDriftThreshold: 0.5,
|
|
226
|
+
mediumDriftThreshold: 0.75,
|
|
227
|
+
};
|
|
228
|
+
function resolveThresholds(config) {
|
|
229
|
+
return {
|
|
230
|
+
highDriftThreshold: config.thresholds?.highDriftThreshold ?? DEFAULT_THRESHOLDS.highDriftThreshold,
|
|
231
|
+
mediumDriftThreshold: config.thresholds?.mediumDriftThreshold ?? DEFAULT_THRESHOLDS.mediumDriftThreshold,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function classifyDrift(similarity, thresholds) {
|
|
235
|
+
if (similarity >= 0.95)
|
|
236
|
+
return 'none';
|
|
237
|
+
if (similarity >= thresholds.mediumDriftThreshold)
|
|
238
|
+
return 'low';
|
|
239
|
+
if (similarity >= thresholds.highDriftThreshold)
|
|
240
|
+
return 'medium';
|
|
241
|
+
return 'high';
|
|
242
|
+
}
|
|
243
|
+
function fallbackResult(probe, rawResponse, config) {
|
|
244
|
+
return {
|
|
245
|
+
probe,
|
|
246
|
+
similarityScore: 0.5,
|
|
247
|
+
driftLevel: 'medium',
|
|
248
|
+
contractViolated: false,
|
|
249
|
+
violations: ['Unable to parse LLM judge response'],
|
|
250
|
+
reasoning: 'Fallback: LLM response could not be parsed as JSON',
|
|
251
|
+
rawResponse: config.includeRawResponses ? rawResponse : null,
|
|
252
|
+
evaluatedAt: new Date().toISOString(),
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
//# sourceMappingURL=SemanticProbe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SemanticProbe.js","sourceRoot":"","sources":["../../src/introspection/SemanticProbe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAuIH,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E;;;;;;;;;;GAUG;AACH,MAAM,UAAU,WAAW,CACvB,QAAgB,EAChB,SAAiB,EACjB,KAA8B,EAC9B,cAAuB,EACvB,YAAqB,EACrB,eAAqC;IAErC,MAAM,EAAE,GAAG,GAAG,QAAQ,KAAK,SAAS,KAAK,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IAEtD,OAAO;QACH,EAAE;QACF,QAAQ;QACR,SAAS;QACT,WAAW,EAAE,sBAAsB,QAAQ,IAAI,SAAS,EAAE;QAC1D,KAAK;QACL,cAAc;QACd,YAAY;QACZ,eAAe;KAClB,CAAC;AACN,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAoB;IACjD,OAAO;;;;;;;cAOG,KAAK,CAAC,QAAQ;gBACZ,KAAK,CAAC,SAAS;qBACV,KAAK,CAAC,eAAe,CAAC,WAAW,IAAI,gBAAgB;mBACvD,KAAK,CAAC,eAAe,CAAC,QAAQ;qBAC5B,KAAK,CAAC,eAAe,CAAC,WAAW;;;EAGpD,KAAK,CAAC,eAAe,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC;QACtC,CAAC,CAAC,qBAAqB,KAAK,CAAC,eAAe,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;QACrG,CAAC,CAAC,2BAA2B;;;EAGnC,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,oBAAoB;;;;EAInE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;EAKpC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;EAK7C,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;;;;;;;;;;;;;;;;;;OAkBtC,CAAC;AACR,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,kBAAkB,CAC9B,KAAoB,EACpB,WAAmB,EACnB,MAA2B;IAE3B,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAE7C,IAAI,CAAC;QACD,2DAA2D;QAC3D,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACnD,IAAI,CAAC,SAAS,EAAE,CAAC;YACb,OAAO,cAAc,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAKrC,CAAC;QAEF,MAAM,eAAe,GAAG,OAAO,MAAM,CAAC,eAAe,KAAK,QAAQ;YAC9D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,eAAe,CAAC,CAAC;YAClD,CAAC,CAAC,GAAG,CAAC;QAEV,MAAM,UAAU,GAAG,aAAa,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAE9D,OAAO;YACH,KAAK;YACL,eAAe;YACf,UAAU;YACV,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,KAAK;YAClD,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,EAAE;YACnC,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,uBAAuB;YACtD,WAAW,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;YAC5D,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACxC,CAAC;IACN,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,cAAc,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IACtD,CAAC;AACL,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAC/B,KAAoB,EACpB,MAA2B;IAE3B,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;IACvC,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC1D,OAAO,kBAAkB,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;AAC1D,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAChC,MAAgC,EAChC,MAA2B;IAE3B,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;IAE5C,sCAAsC;IACtC,MAAM,OAAO,GAA0B,EAAE,CAAC;IAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QAC/C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAClC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CACnD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IAClC,CAAC;IAED,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,QAAQ,IAAI,SAAS,EAAE,OAAO,CAAC,CAAC;AACvE,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAC5B,QAAgB,EAChB,OAAuC;IAEvC,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC;IACtE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACpC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;QACzE,CAAC,CAAC,GAAG,CAAC;IAEV,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;QACnC,CAAC,CAAC,aAAa,CAAC,aAAa,EAAE;YAC3B,kBAAkB,EAAE,GAAG;YACvB,oBAAoB,EAAE,IAAI;SAC7B,CAAC;QACF,CAAC,CAAC,MAAoB,CAAC;IAE3B,MAAM,MAAM,GAAG,YAAY,KAAK,MAAM,IAAI,YAAY,KAAK,KAAK,CAAC;IAEjE,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC;QAChC,CAAC,CAAC,sBAAsB;QACxB,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,sCAAsC,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YAC9F,UAAU,YAAY,iBAAiB,cAAc,IAAI;YACzD,WAAW,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC;IAEhD,OAAO;QACH,QAAQ;QACR,OAAO;QACP,YAAY;QACZ,cAAc;QACd,MAAM;QACN,OAAO;QACP,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACxC,CAAC;AACN,CAAC;AAED,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,kBAAkB,GAAuB;IAC3C,kBAAkB,EAAE,GAAG;IACvB,oBAAoB,EAAE,IAAI;CAC7B,CAAC;AAEF,SAAS,iBAAiB,CAAC,MAA2B;IAClD,OAAO;QACH,kBAAkB,EAAE,MAAM,CAAC,UAAU,EAAE,kBAAkB,IAAI,kBAAkB,CAAC,kBAAkB;QAClG,oBAAoB,EAAE,MAAM,CAAC,UAAU,EAAE,oBAAoB,IAAI,kBAAkB,CAAC,oBAAoB;KAC3G,CAAC;AACN,CAAC;AAED,SAAS,aAAa,CAAC,UAAkB,EAAE,UAA8B;IACrE,IAAI,UAAU,IAAI,IAAI;QAAE,OAAO,MAAM,CAAC;IACtC,IAAI,UAAU,IAAI,UAAU,CAAC,oBAAoB;QAAE,OAAO,KAAK,CAAC;IAChE,IAAI,UAAU,IAAI,UAAU,CAAC,kBAAkB;QAAE,OAAO,QAAQ,CAAC;IACjE,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,SAAS,cAAc,CACnB,KAAoB,EACpB,WAAmB,EACnB,MAA2B;IAE3B,OAAO;QACH,KAAK;QACL,eAAe,EAAE,GAAG;QACpB,UAAU,EAAE,QAAQ;QACpB,gBAAgB,EAAE,KAAK;QACvB,UAAU,EAAE,CAAC,oCAAoC,CAAC;QAClD,SAAS,EAAE,oDAAoD;QAC/D,WAAW,EAAE,MAAM,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI;QAC5D,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACxC,CAAC;AACN,CAAC"}
|