erosolar-cli 1.7.23 → 1.7.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/responseVerifier.d.ts +50 -182
- package/dist/core/responseVerifier.d.ts.map +1 -1
- package/dist/core/responseVerifier.js +369 -1671
- package/dist/core/responseVerifier.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +12 -13
- package/dist/shell/interactiveShell.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,221 +1,89 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* AI Response Verification System
|
|
2
|
+
* AI Response Verification System - Isolated Runtime Only
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* 2. Generating runtime verification tests
|
|
7
|
-
* 3. Executing tests to verify claims
|
|
8
|
-
* 4. Reporting verification results
|
|
4
|
+
* Verifies assistant claims by spawning fresh CLI instances and running
|
|
5
|
+
* actual runtime tests. All verification happens in isolation.
|
|
9
6
|
*
|
|
10
7
|
* @license MIT
|
|
11
8
|
*/
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
*/
|
|
15
|
-
export type ClaimType = 'file_created' | 'file_modified' | 'file_deleted' | 'command_executed' | 'code_compiles' | 'tests_pass' | 'content_contains' | 'dependency_installed' | 'service_running' | 'url_accessible' | 'git_committed' | 'package_published' | 'api_response' | 'database_updated' | 'config_changed' | 'env_var_set' | 'permission_granted' | 'data_transformed' | 'error_fixed' | 'feature_implemented' | 'refactor_complete' | 'generic';
|
|
16
|
-
/**
|
|
17
|
-
* A verifiable claim extracted from assistant response
|
|
18
|
-
*/
|
|
19
|
-
export interface VerifiableClaim {
|
|
20
|
-
type: ClaimType;
|
|
9
|
+
export interface IsolatedRuntimeTest {
|
|
10
|
+
id: string;
|
|
21
11
|
description: string;
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
count?: number;
|
|
29
|
-
package?: string;
|
|
30
|
-
port?: number;
|
|
31
|
-
name?: string;
|
|
32
|
-
url?: string;
|
|
33
|
-
content?: string;
|
|
34
|
-
key?: string;
|
|
35
|
-
value?: unknown;
|
|
36
|
-
status?: number;
|
|
37
|
-
body?: string;
|
|
38
|
-
input?: string;
|
|
39
|
-
output?: string;
|
|
40
|
-
checkCommand?: string;
|
|
41
|
-
mode?: string;
|
|
42
|
-
[key: string]: unknown;
|
|
43
|
-
};
|
|
12
|
+
commands: string[];
|
|
13
|
+
shellCommands?: string[];
|
|
14
|
+
expectedOutputs?: string[];
|
|
15
|
+
expectedBehavior?: string;
|
|
16
|
+
timeout?: number;
|
|
17
|
+
requiresBuild?: boolean;
|
|
44
18
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
19
|
+
export interface IsolatedRuntimeResult {
|
|
20
|
+
test: IsolatedRuntimeTest;
|
|
21
|
+
success: boolean;
|
|
22
|
+
output: string;
|
|
23
|
+
errors: string;
|
|
24
|
+
exitCode: number | null;
|
|
25
|
+
duration: number;
|
|
26
|
+
matchedPatterns: string[];
|
|
27
|
+
unmatchedPatterns: string[];
|
|
28
|
+
llmAssessment?: string;
|
|
55
29
|
}
|
|
56
|
-
/**
|
|
57
|
-
* Overall verification report
|
|
58
|
-
*/
|
|
59
|
-
export interface VerificationReport {
|
|
60
|
-
responseId: string;
|
|
61
|
-
timestamp: string;
|
|
62
|
-
claims: VerifiableClaim[];
|
|
63
|
-
results: ClaimVerificationResult[];
|
|
64
|
-
summary: {
|
|
65
|
-
total: number;
|
|
66
|
-
verified: number;
|
|
67
|
-
failed: number;
|
|
68
|
-
inconclusive: number;
|
|
69
|
-
};
|
|
70
|
-
overallVerdict: 'verified' | 'partially_verified' | 'unverified' | 'contradicted';
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Extract claims using LLM (for complex responses).
|
|
74
|
-
* Falls back to pattern matching if LLM extraction fails.
|
|
75
|
-
*/
|
|
76
|
-
export declare function extractClaimsWithLLM(response: string, llmCall?: (prompt: string) => Promise<string>): Promise<VerifiableClaim[]>;
|
|
77
|
-
/**
|
|
78
|
-
* Extract verifiable claims from an assistant response.
|
|
79
|
-
* Covers common patterns for file operations, builds, tests, git, and npm.
|
|
80
|
-
*/
|
|
81
|
-
export declare function extractClaims(response: string): VerifiableClaim[];
|
|
82
|
-
/**
|
|
83
|
-
* Generate a verification test for a claim
|
|
84
|
-
*/
|
|
85
|
-
export declare function generateVerificationTest(claim: VerifiableClaim): () => Promise<ClaimVerificationResult>;
|
|
86
|
-
/**
|
|
87
|
-
* Verify all claims in an assistant response using LLM-based semantic analysis.
|
|
88
|
-
* Requires a VerificationContext with an llmVerifier function.
|
|
89
|
-
* All claim extraction and verification is done via LLM.
|
|
90
|
-
*/
|
|
91
|
-
export declare function verifyResponse(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
92
|
-
/**
|
|
93
|
-
* Format a verification report for display
|
|
94
|
-
*/
|
|
95
|
-
export declare function formatVerificationReport(report: VerificationReport): string;
|
|
96
|
-
/**
|
|
97
|
-
* Quick verification - returns true if response claims are valid.
|
|
98
|
-
* Requires a VerificationContext with llmVerifier for LLM-based semantic analysis.
|
|
99
|
-
*/
|
|
100
|
-
export declare function quickVerify(response: string, context: VerificationContext): Promise<boolean>;
|
|
101
|
-
/**
|
|
102
|
-
* Verification strategy types
|
|
103
|
-
*/
|
|
104
|
-
export type VerificationStrategy = 'runtime' | 'filesystem' | 'network' | 'llm' | 'semantic' | 'comparison' | 'manual';
|
|
105
|
-
/**
|
|
106
|
-
* Extended verification context for complex claims
|
|
107
|
-
*/
|
|
108
30
|
export interface VerificationContext {
|
|
109
31
|
workingDirectory: string;
|
|
110
|
-
previousState?: Record<string, unknown>;
|
|
111
|
-
currentState?: Record<string, unknown>;
|
|
112
32
|
conversationHistory?: string[];
|
|
113
33
|
llmVerifier?: (prompt: string) => Promise<string>;
|
|
114
34
|
}
|
|
115
|
-
|
|
116
|
-
* Verify a claim using LLM when runtime verification isn't possible
|
|
117
|
-
*/
|
|
118
|
-
export declare function verifyClaimWithLLM(claim: VerifiableClaim, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
119
|
-
/**
|
|
120
|
-
* Generate verification test for extended claim types
|
|
121
|
-
*/
|
|
122
|
-
export declare function generateExtendedVerificationTest(claim: VerifiableClaim, context: VerificationContext): () => Promise<ClaimVerificationResult>;
|
|
123
|
-
/**
|
|
124
|
-
* Comprehensive verification using LLM-based semantic analysis.
|
|
125
|
-
* Requires an LLM verifier - all claims are verified through LLM semantic analysis.
|
|
126
|
-
*/
|
|
127
|
-
export declare function verifyResponseComprehensive(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
128
|
-
/**
|
|
129
|
-
* Determine the best verification strategy for a claim
|
|
130
|
-
*/
|
|
131
|
-
export declare function getVerificationStrategy(claim: VerifiableClaim): VerificationStrategy;
|
|
132
|
-
/**
|
|
133
|
-
* LLM-generated verification test interface
|
|
134
|
-
*/
|
|
135
|
-
export interface GeneratedVerificationTest {
|
|
136
|
-
claim: VerifiableClaim;
|
|
137
|
-
testType: 'shell' | 'javascript' | 'api';
|
|
138
|
-
code: string;
|
|
139
|
-
description: string;
|
|
140
|
-
expectedOutcome: string;
|
|
141
|
-
safetyCheck: boolean;
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Generate verification code using LLM
|
|
145
|
-
*/
|
|
146
|
-
export declare function generateVerificationCode(claim: VerifiableClaim, context: VerificationContext): Promise<GeneratedVerificationTest | null>;
|
|
147
|
-
/**
|
|
148
|
-
* Validate that generated code is safe to execute
|
|
149
|
-
*/
|
|
150
|
-
export declare function validateGeneratedCode(test: GeneratedVerificationTest): {
|
|
151
|
-
safe: boolean;
|
|
152
|
-
reason: string;
|
|
153
|
-
};
|
|
154
|
-
/**
|
|
155
|
-
* Execute a generated verification test
|
|
156
|
-
*/
|
|
157
|
-
export declare function executeGeneratedTest(test: GeneratedVerificationTest, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
158
|
-
/**
|
|
159
|
-
* Verify a claim using LLM-generated runtime test
|
|
160
|
-
*/
|
|
161
|
-
export declare function verifyWithGeneratedTest(claim: VerifiableClaim, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
162
|
-
/**
|
|
163
|
-
* Full verification using LLM-generated tests
|
|
164
|
-
* This is the most powerful verification method - LLM decides HOW to verify each claim
|
|
165
|
-
*/
|
|
166
|
-
export declare function verifyResponseWithGeneratedTests(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
167
|
-
/**
|
|
168
|
-
* Hybrid verification - uses generated tests when available, falls back to predefined tests
|
|
169
|
-
*/
|
|
170
|
-
export declare function verifyResponseHybrid(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
171
|
-
export interface UniversalClaim {
|
|
35
|
+
export interface Claim {
|
|
172
36
|
id: string;
|
|
173
37
|
statement: string;
|
|
174
38
|
category: string;
|
|
175
39
|
verifiable: boolean;
|
|
176
|
-
verificationApproach: string;
|
|
177
40
|
priority: 'critical' | 'high' | 'medium' | 'low';
|
|
178
41
|
context: Record<string, unknown>;
|
|
179
42
|
}
|
|
180
|
-
export interface
|
|
181
|
-
claim:
|
|
43
|
+
export interface ClaimVerificationResult {
|
|
44
|
+
claim: Claim;
|
|
182
45
|
verified: boolean;
|
|
183
|
-
confidence:
|
|
184
|
-
method: string;
|
|
46
|
+
confidence: 'high' | 'medium' | 'low';
|
|
185
47
|
evidence: string;
|
|
186
|
-
|
|
187
|
-
|
|
48
|
+
method: string;
|
|
49
|
+
reasoning?: string;
|
|
188
50
|
executedCode?: string;
|
|
189
51
|
rawOutput?: string;
|
|
52
|
+
error?: string;
|
|
190
53
|
timestamp: string;
|
|
191
54
|
}
|
|
192
|
-
export interface
|
|
55
|
+
export interface VerificationReport {
|
|
193
56
|
responseId: string;
|
|
194
|
-
originalResponse: string;
|
|
195
57
|
timestamp: string;
|
|
196
|
-
claims:
|
|
197
|
-
results:
|
|
58
|
+
claims: Claim[];
|
|
59
|
+
results: ClaimVerificationResult[];
|
|
198
60
|
summary: {
|
|
199
|
-
|
|
200
|
-
verifiableClaims: number;
|
|
61
|
+
total: number;
|
|
201
62
|
verified: number;
|
|
202
63
|
failed: number;
|
|
203
64
|
inconclusive: number;
|
|
204
|
-
averageConfidence: number;
|
|
205
65
|
};
|
|
206
|
-
|
|
66
|
+
overallVerdict: 'verified' | 'partially_verified' | 'contradicted' | 'unverified';
|
|
207
67
|
trustScore: number;
|
|
208
68
|
}
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
69
|
+
/**
|
|
70
|
+
* Runs an isolated runtime test
|
|
71
|
+
*/
|
|
72
|
+
export declare function runIsolatedTest(test: IsolatedRuntimeTest, cwd: string, llmVerifier?: (prompt: string) => Promise<string>): Promise<IsolatedRuntimeResult>;
|
|
73
|
+
/**
|
|
74
|
+
* Verify an assistant response using isolated runtime tests.
|
|
75
|
+
* This is the main entry point for verification.
|
|
76
|
+
*/
|
|
77
|
+
export declare function verifyResponse(response: string, ctx: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
78
|
+
/**
|
|
79
|
+
* Format verification report for display
|
|
80
|
+
*/
|
|
81
|
+
export declare function formatVerificationReport(report: VerificationReport): string;
|
|
82
|
+
/**
|
|
83
|
+
* Quick verification - verify only critical/high priority claims
|
|
84
|
+
*/
|
|
85
|
+
export declare function quickVerify(response: string, ctx: VerificationContext): Promise<{
|
|
217
86
|
trustScore: number;
|
|
218
87
|
summary: string;
|
|
219
88
|
}>;
|
|
220
|
-
export declare function formatUniversalReport(r: UniversalVerificationReport): string;
|
|
221
89
|
//# sourceMappingURL=responseVerifier.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID;;;GAGG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,EACxB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,CAAC,CA0F7B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+B3E;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,GACvB,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAoBlD"}
|