erosolar-cli 1.7.24 → 1.7.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/responseVerifier.d.ts +29 -210
- package/dist/core/responseVerifier.d.ts.map +1 -1
- package/dist/core/responseVerifier.js +241 -1834
- package/dist/core/responseVerifier.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +12 -13
- package/dist/shell/interactiveShell.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* AI Response Verification System
|
|
2
|
+
* AI Response Verification System - Isolated Runtime Only
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* 2. Generating runtime verification tests
|
|
7
|
-
* 3. Executing tests to verify claims
|
|
8
|
-
* 4. Reporting verification results
|
|
4
|
+
* Verifies assistant claims by spawning fresh CLI instances and running
|
|
5
|
+
* actual runtime tests. All verification happens in isolation.
|
|
9
6
|
*
|
|
10
7
|
* @license MIT
|
|
11
8
|
*/
|
|
@@ -13,6 +10,7 @@ export interface IsolatedRuntimeTest {
|
|
|
13
10
|
id: string;
|
|
14
11
|
description: string;
|
|
15
12
|
commands: string[];
|
|
13
|
+
shellCommands?: string[];
|
|
16
14
|
expectedOutputs?: string[];
|
|
17
15
|
expectedBehavior?: string;
|
|
18
16
|
timeout?: number;
|
|
@@ -29,86 +27,35 @@ export interface IsolatedRuntimeResult {
|
|
|
29
27
|
unmatchedPatterns: string[];
|
|
30
28
|
llmAssessment?: string;
|
|
31
29
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
*/
|
|
40
|
-
export declare function generateIsolatedTests(claims: Array<{
|
|
41
|
-
statement: string;
|
|
42
|
-
category: string;
|
|
43
|
-
context: Record<string, unknown>;
|
|
44
|
-
}>, llmVerifier: (prompt: string) => Promise<string>): Promise<IsolatedRuntimeTest[]>;
|
|
45
|
-
/**
|
|
46
|
-
* Runs all isolated tests and returns aggregated results
|
|
47
|
-
*/
|
|
48
|
-
export declare function runIsolatedVerification(claims: Array<{
|
|
30
|
+
export interface VerificationContext {
|
|
31
|
+
workingDirectory: string;
|
|
32
|
+
conversationHistory?: string[];
|
|
33
|
+
llmVerifier?: (prompt: string) => Promise<string>;
|
|
34
|
+
}
|
|
35
|
+
export interface Claim {
|
|
36
|
+
id: string;
|
|
49
37
|
statement: string;
|
|
50
38
|
category: string;
|
|
39
|
+
verifiable: boolean;
|
|
40
|
+
priority: 'critical' | 'high' | 'medium' | 'low';
|
|
51
41
|
context: Record<string, unknown>;
|
|
52
|
-
}>, cwd: string, llmVerifier?: (prompt: string) => Promise<string>): Promise<{
|
|
53
|
-
tests: IsolatedRuntimeResult[];
|
|
54
|
-
summary: {
|
|
55
|
-
total: number;
|
|
56
|
-
passed: number;
|
|
57
|
-
failed: number;
|
|
58
|
-
};
|
|
59
|
-
allPassed: boolean;
|
|
60
|
-
}>;
|
|
61
|
-
/**
|
|
62
|
-
* Types of claims that can be verified
|
|
63
|
-
*/
|
|
64
|
-
export type ClaimType = 'file_created' | 'file_modified' | 'file_deleted' | 'command_executed' | 'code_compiles' | 'tests_pass' | 'content_contains' | 'dependency_installed' | 'service_running' | 'url_accessible' | 'git_committed' | 'package_published' | 'api_response' | 'database_updated' | 'config_changed' | 'env_var_set' | 'permission_granted' | 'data_transformed' | 'error_fixed' | 'feature_implemented' | 'refactor_complete' | 'generic';
|
|
65
|
-
/**
|
|
66
|
-
* A verifiable claim extracted from assistant response
|
|
67
|
-
*/
|
|
68
|
-
export interface VerifiableClaim {
|
|
69
|
-
type: ClaimType;
|
|
70
|
-
description: string;
|
|
71
|
-
evidence: string;
|
|
72
|
-
params: {
|
|
73
|
-
path?: string;
|
|
74
|
-
command?: string;
|
|
75
|
-
hash?: string;
|
|
76
|
-
version?: string;
|
|
77
|
-
count?: number;
|
|
78
|
-
package?: string;
|
|
79
|
-
port?: number;
|
|
80
|
-
name?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
content?: string;
|
|
83
|
-
key?: string;
|
|
84
|
-
value?: unknown;
|
|
85
|
-
status?: number;
|
|
86
|
-
body?: string;
|
|
87
|
-
input?: string;
|
|
88
|
-
output?: string;
|
|
89
|
-
checkCommand?: string;
|
|
90
|
-
mode?: string;
|
|
91
|
-
[key: string]: unknown;
|
|
92
|
-
};
|
|
93
42
|
}
|
|
94
|
-
/**
|
|
95
|
-
* Result of verifying a claim
|
|
96
|
-
*/
|
|
97
43
|
export interface ClaimVerificationResult {
|
|
98
|
-
claim:
|
|
44
|
+
claim: Claim;
|
|
99
45
|
verified: boolean;
|
|
100
46
|
confidence: 'high' | 'medium' | 'low';
|
|
101
47
|
evidence: string;
|
|
48
|
+
method: string;
|
|
49
|
+
reasoning?: string;
|
|
50
|
+
executedCode?: string;
|
|
51
|
+
rawOutput?: string;
|
|
102
52
|
error?: string;
|
|
103
53
|
timestamp: string;
|
|
104
54
|
}
|
|
105
|
-
/**
|
|
106
|
-
* Overall verification report
|
|
107
|
-
*/
|
|
108
55
|
export interface VerificationReport {
|
|
109
56
|
responseId: string;
|
|
110
57
|
timestamp: string;
|
|
111
|
-
claims:
|
|
58
|
+
claims: Claim[];
|
|
112
59
|
results: ClaimVerificationResult[];
|
|
113
60
|
summary: {
|
|
114
61
|
total: number;
|
|
@@ -116,155 +63,27 @@ export interface VerificationReport {
|
|
|
116
63
|
failed: number;
|
|
117
64
|
inconclusive: number;
|
|
118
65
|
};
|
|
119
|
-
overallVerdict: 'verified' | 'partially_verified' | '
|
|
66
|
+
overallVerdict: 'verified' | 'partially_verified' | 'contradicted' | 'unverified';
|
|
67
|
+
trustScore: number;
|
|
120
68
|
}
|
|
121
69
|
/**
|
|
122
|
-
*
|
|
123
|
-
* Falls back to pattern matching if LLM extraction fails.
|
|
70
|
+
* Runs an isolated runtime test
|
|
124
71
|
*/
|
|
125
|
-
export declare function
|
|
126
|
-
/**
|
|
127
|
-
* Extract verifiable claims from an assistant response.
|
|
128
|
-
* Covers common patterns for file operations, builds, tests, git, and npm.
|
|
129
|
-
*/
|
|
130
|
-
export declare function extractClaims(response: string): VerifiableClaim[];
|
|
131
|
-
/**
|
|
132
|
-
* Generate a verification test for a claim
|
|
133
|
-
*/
|
|
134
|
-
export declare function generateVerificationTest(claim: VerifiableClaim): () => Promise<ClaimVerificationResult>;
|
|
72
|
+
export declare function runIsolatedTest(test: IsolatedRuntimeTest, cwd: string, llmVerifier?: (prompt: string) => Promise<string>): Promise<IsolatedRuntimeResult>;
|
|
135
73
|
/**
|
|
136
|
-
* Verify
|
|
137
|
-
*
|
|
138
|
-
* All claim extraction and verification is done via LLM.
|
|
74
|
+
* Verify an assistant response using isolated runtime tests.
|
|
75
|
+
* This is the main entry point for verification.
|
|
139
76
|
*/
|
|
140
|
-
export declare function verifyResponse(response: string,
|
|
77
|
+
export declare function verifyResponse(response: string, ctx: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
141
78
|
/**
|
|
142
|
-
* Format
|
|
79
|
+
* Format verification report for display
|
|
143
80
|
*/
|
|
144
81
|
export declare function formatVerificationReport(report: VerificationReport): string;
|
|
145
82
|
/**
|
|
146
|
-
* Quick verification -
|
|
147
|
-
* Requires a VerificationContext with llmVerifier for LLM-based semantic analysis.
|
|
148
|
-
*/
|
|
149
|
-
export declare function quickVerify(response: string, context: VerificationContext): Promise<boolean>;
|
|
150
|
-
/**
|
|
151
|
-
* Verification strategy types
|
|
152
|
-
*/
|
|
153
|
-
export type VerificationStrategy = 'runtime' | 'filesystem' | 'network' | 'llm' | 'semantic' | 'comparison' | 'manual';
|
|
154
|
-
/**
|
|
155
|
-
* Extended verification context for complex claims
|
|
83
|
+
* Quick verification - verify only critical/high priority claims
|
|
156
84
|
*/
|
|
157
|
-
export
|
|
158
|
-
workingDirectory: string;
|
|
159
|
-
previousState?: Record<string, unknown>;
|
|
160
|
-
currentState?: Record<string, unknown>;
|
|
161
|
-
conversationHistory?: string[];
|
|
162
|
-
llmVerifier?: (prompt: string) => Promise<string>;
|
|
163
|
-
}
|
|
164
|
-
/**
|
|
165
|
-
* Verify a claim using LLM when runtime verification isn't possible
|
|
166
|
-
*/
|
|
167
|
-
export declare function verifyClaimWithLLM(claim: VerifiableClaim, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
168
|
-
/**
|
|
169
|
-
* Generate verification test for extended claim types
|
|
170
|
-
*/
|
|
171
|
-
export declare function generateExtendedVerificationTest(claim: VerifiableClaim, context: VerificationContext): () => Promise<ClaimVerificationResult>;
|
|
172
|
-
/**
|
|
173
|
-
* Comprehensive verification using LLM-based semantic analysis.
|
|
174
|
-
* Requires an LLM verifier - all claims are verified through LLM semantic analysis.
|
|
175
|
-
*/
|
|
176
|
-
export declare function verifyResponseComprehensive(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
177
|
-
/**
|
|
178
|
-
* Determine the best verification strategy for a claim
|
|
179
|
-
*/
|
|
180
|
-
export declare function getVerificationStrategy(claim: VerifiableClaim): VerificationStrategy;
|
|
181
|
-
/**
|
|
182
|
-
* LLM-generated verification test interface
|
|
183
|
-
*/
|
|
184
|
-
export interface GeneratedVerificationTest {
|
|
185
|
-
claim: VerifiableClaim;
|
|
186
|
-
testType: 'shell' | 'javascript' | 'api';
|
|
187
|
-
code: string;
|
|
188
|
-
description: string;
|
|
189
|
-
expectedOutcome: string;
|
|
190
|
-
safetyCheck: boolean;
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Generate verification code using LLM
|
|
194
|
-
*/
|
|
195
|
-
export declare function generateVerificationCode(claim: VerifiableClaim, context: VerificationContext): Promise<GeneratedVerificationTest | null>;
|
|
196
|
-
/**
|
|
197
|
-
* Validate that generated code is safe to execute
|
|
198
|
-
*/
|
|
199
|
-
export declare function validateGeneratedCode(test: GeneratedVerificationTest): {
|
|
200
|
-
safe: boolean;
|
|
201
|
-
reason: string;
|
|
202
|
-
};
|
|
203
|
-
/**
|
|
204
|
-
* Execute a generated verification test
|
|
205
|
-
*/
|
|
206
|
-
export declare function executeGeneratedTest(test: GeneratedVerificationTest, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
207
|
-
/**
|
|
208
|
-
* Verify a claim using LLM-generated runtime test
|
|
209
|
-
*/
|
|
210
|
-
export declare function verifyWithGeneratedTest(claim: VerifiableClaim, context: VerificationContext): Promise<ClaimVerificationResult>;
|
|
211
|
-
/**
|
|
212
|
-
* Full verification using LLM-generated tests
|
|
213
|
-
* This is the most powerful verification method - LLM decides HOW to verify each claim
|
|
214
|
-
*/
|
|
215
|
-
export declare function verifyResponseWithGeneratedTests(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
216
|
-
/**
|
|
217
|
-
* Hybrid verification - uses generated tests when available, falls back to predefined tests
|
|
218
|
-
*/
|
|
219
|
-
export declare function verifyResponseHybrid(response: string, context: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
220
|
-
export interface UniversalClaim {
|
|
221
|
-
id: string;
|
|
222
|
-
statement: string;
|
|
223
|
-
category: string;
|
|
224
|
-
verifiable: boolean;
|
|
225
|
-
verificationApproach: string;
|
|
226
|
-
priority: 'critical' | 'high' | 'medium' | 'low';
|
|
227
|
-
context: Record<string, unknown>;
|
|
228
|
-
}
|
|
229
|
-
export interface UniversalVerificationResult {
|
|
230
|
-
claim: UniversalClaim;
|
|
231
|
-
verified: boolean;
|
|
232
|
-
confidence: number;
|
|
233
|
-
method: string;
|
|
234
|
-
evidence: string;
|
|
235
|
-
reasoning: string;
|
|
236
|
-
suggestedFollowUp?: string;
|
|
237
|
-
executedCode?: string;
|
|
238
|
-
rawOutput?: string;
|
|
239
|
-
timestamp: string;
|
|
240
|
-
}
|
|
241
|
-
export interface UniversalVerificationReport {
|
|
242
|
-
responseId: string;
|
|
243
|
-
originalResponse: string;
|
|
244
|
-
timestamp: string;
|
|
245
|
-
claims: UniversalClaim[];
|
|
246
|
-
results: UniversalVerificationResult[];
|
|
247
|
-
summary: {
|
|
248
|
-
totalClaims: number;
|
|
249
|
-
verifiableClaims: number;
|
|
250
|
-
verified: number;
|
|
251
|
-
failed: number;
|
|
252
|
-
inconclusive: number;
|
|
253
|
-
averageConfidence: number;
|
|
254
|
-
};
|
|
255
|
-
overallAssessment: string;
|
|
256
|
-
trustScore: number;
|
|
257
|
-
}
|
|
258
|
-
export declare function validateUniversalCode(c: string): {
|
|
259
|
-
safe: boolean;
|
|
260
|
-
reason: string;
|
|
261
|
-
};
|
|
262
|
-
export declare function extractUniversalClaims(r: string, ctx: VerificationContext): Promise<UniversalClaim[]>;
|
|
263
|
-
export declare function verifyUniversalClaim(claim: UniversalClaim, ctx: VerificationContext): Promise<UniversalVerificationResult>;
|
|
264
|
-
export declare function verifyResponseUniversal(response: string, ctx: VerificationContext, id?: string): Promise<UniversalVerificationReport>;
|
|
265
|
-
export declare function quickUniversalVerify(r: string, ctx: VerificationContext): Promise<{
|
|
85
|
+
export declare function quickVerify(response: string, ctx: VerificationContext): Promise<{
|
|
266
86
|
trustScore: number;
|
|
267
87
|
summary: string;
|
|
268
88
|
}>;
|
|
269
|
-
export declare function formatUniversalReport(r: UniversalVerificationReport): string;
|
|
270
89
|
//# sourceMappingURL=responseVerifier.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID;;;GAGG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,EACxB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,CAAC,CA0F7B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+B3E;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,GACvB,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAoBlD"}
|