@o-lang/resolver-tests 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/R-010-resolver-output-contract/test.json +1 -3
- package/R-013-resolver-kernel-bypass/README.md +16 -0
- package/R-013-resolver-kernel-bypass/resolver.js +0 -0
- package/R-013-resolver-kernel-bypass/test.json +33 -0
- package/badges/notify-telegram-badge.svg +9 -9
- package/badges/unknown-resolver-badge.svg +18 -6
- package/conformance.json +30 -2
- package/lib/badge.js +33 -23
- package/lib/runner.js +282 -27
- package/package.json +1 -1
- package/run.js +2 -1
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# R-013: Resolver Kernel Bypass Prevention
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
Ensures resolvers maintain semantic truth by:
|
|
5
|
+
- Blocking unresolved variables before LLM calls
|
|
6
|
+
- Preventing direct prompt parsing for variable extraction
|
|
7
|
+
- Never defaulting missing values to hallucinated data
|
|
8
|
+
|
|
9
|
+
## Requirements
|
|
10
|
+
Resolvers must:
|
|
11
|
+
- Return `{ error: "UNRESOLVED_VARIABLES" }` for prompts with unresolved placeholders
|
|
12
|
+
- Only use kernel-provided context (never parse prompts directly)
|
|
13
|
+
- Never fabricate default values (like `$0` for missing balances)
|
|
14
|
+
|
|
15
|
+
## Certification Impact
|
|
16
|
+
Failure indicates resolver bypasses O-Lang's kernel mediation layer, violating the core trust model.
|
|
File without changes
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test_id": "R-013-resolver-kernel-bypass",
|
|
3
|
+
"name": "Resolver Kernel Bypass Prevention",
|
|
4
|
+
"description": "Ensures resolvers never bypass kernel mediation or hallucinate missing data",
|
|
5
|
+
"category": "resolver-runtime",
|
|
6
|
+
"fixtures": {
|
|
7
|
+
"inputs": [
|
|
8
|
+
{
|
|
9
|
+
"invoke": "__USE_RESOLVER_EXAMPLE_ACTION__"
|
|
10
|
+
}
|
|
11
|
+
]
|
|
12
|
+
},
|
|
13
|
+
"assertions": [
|
|
14
|
+
{
|
|
15
|
+
"id": "blocks_unresolved_variables",
|
|
16
|
+
"type": "blocks_unresolved_variables",
|
|
17
|
+
"severity": "fatal",
|
|
18
|
+
"description": "Resolver must reject prompts with unresolved placeholders"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "no_direct_prompt_parsing",
|
|
22
|
+
"type": "no_direct_prompt_parsing",
|
|
23
|
+
"severity": "fatal",
|
|
24
|
+
"description": "Resolver must not parse prompts directly for variable extraction"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"id": "no_default_values",
|
|
28
|
+
"type": "no_default_values",
|
|
29
|
+
"severity": "fatal",
|
|
30
|
+
"description": "Resolver must never default missing values to 0, empty string, etc."
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="412" height="20">
|
|
2
|
-
<rect width="412" height="20" fill="green" rx="3" ry="3"/>
|
|
3
|
-
<text x="206" y="14"
|
|
4
|
-
fill="#fff"
|
|
5
|
-
font-family="Verdana"
|
|
6
|
-
font-size="12"
|
|
7
|
-
text-anchor="middle">
|
|
8
|
-
O-lang | notify-telegram v1.0.3 — Certified (2026-01-20)
|
|
9
|
-
</text>
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="412" height="20">
|
|
2
|
+
<rect width="412" height="20" fill="green" rx="3" ry="3"/>
|
|
3
|
+
<text x="206" y="14"
|
|
4
|
+
fill="#fff"
|
|
5
|
+
font-family="Verdana"
|
|
6
|
+
font-size="12"
|
|
7
|
+
text-anchor="middle">
|
|
8
|
+
O-lang | notify-telegram v1.0.3 — Certified (2026-01-20)
|
|
9
|
+
</text>
|
|
10
10
|
</svg>
|
|
@@ -1,10 +1,22 @@
|
|
|
1
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="
|
|
2
|
-
|
|
3
|
-
<
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="332" height="20">
|
|
2
|
+
<!-- Left segment: O-lang (purple) -->
|
|
3
|
+
<rect x="0" y="0" width="54" height="20" fill="#8A2BE2" rx="3" ry="3"/>
|
|
4
|
+
<text x="27" y="14"
|
|
4
5
|
fill="#fff"
|
|
5
|
-
font-family="Verdana"
|
|
6
|
-
font-size="
|
|
6
|
+
font-family="Verdana, DejaVu Sans, sans-serif"
|
|
7
|
+
font-size="11"
|
|
8
|
+
font-weight="bold"
|
|
7
9
|
text-anchor="middle">
|
|
8
|
-
O-lang
|
|
10
|
+
O-lang
|
|
11
|
+
</text>
|
|
12
|
+
|
|
13
|
+
<!-- Right segment: status info (green/red) -->
|
|
14
|
+
<rect x="54" y="0" width="278" height="20" fill="#F44336" rx="3" ry="3"/>
|
|
15
|
+
<text x="193" y="14"
|
|
16
|
+
fill="#fff"
|
|
17
|
+
font-family="Verdana, DejaVu Sans, sans-serif"
|
|
18
|
+
font-size="11"
|
|
19
|
+
text-anchor="middle">
|
|
20
|
+
unknown-resolver — Failed (2026-02-01)
|
|
9
21
|
</text>
|
|
10
22
|
</svg>
|
package/conformance.json
CHANGED
|
@@ -1,10 +1,38 @@
|
|
|
1
1
|
{
|
|
2
2
|
"resolver": "unknown",
|
|
3
|
-
"timestamp": "2026-
|
|
3
|
+
"timestamp": "2026-02-01T06:27:08.331Z",
|
|
4
4
|
"results": [
|
|
5
5
|
{
|
|
6
6
|
"suite": "R-005-resolver-metadata-contract",
|
|
7
|
-
"status": "
|
|
7
|
+
"status": "fail"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"suite": "R-006-resolver-runtime-shape",
|
|
11
|
+
"status": "fail"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"suite": "R-007-resolver-failure-contract",
|
|
15
|
+
"status": "fail"
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"suite": "R-008-resolver-input-validation",
|
|
19
|
+
"status": "fail"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"suite": "R-009-resolver-retry-semantics",
|
|
23
|
+
"status": "fail"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"suite": "R-010-resolver-output-contract",
|
|
27
|
+
"status": "fail"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"suite": "R-011-resolver-determinism",
|
|
31
|
+
"status": "fail"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"suite": "R-012-resolver-side-effects",
|
|
35
|
+
"status": "fail"
|
|
8
36
|
}
|
|
9
37
|
]
|
|
10
38
|
}
|
package/lib/badge.js
CHANGED
|
@@ -2,62 +2,72 @@ const fs = require("fs");
|
|
|
2
2
|
const path = require("path");
|
|
3
3
|
console.log("✅ O-lang badge generator loaded");
|
|
4
4
|
|
|
5
|
-
// ----------------------
|
|
6
|
-
// Resolver-specific badge generator with O-lang tag
|
|
7
|
-
// ----------------------
|
|
8
5
|
function generateBadge({
|
|
9
6
|
resolverName = "Unknown",
|
|
10
7
|
version = "",
|
|
11
8
|
passed = false,
|
|
12
|
-
outputDir = process.cwd()
|
|
9
|
+
outputDir = process.cwd()
|
|
13
10
|
}) {
|
|
14
|
-
|
|
11
|
+
// Colors
|
|
12
|
+
const olangColor = "#8A2BE2"; // Purple for "O-lang"
|
|
13
|
+
const statusColor = passed ? "#4CAF50" : "#F44336"; // Green/Red for status
|
|
14
|
+
|
|
15
15
|
const statusText = passed ? "Certified" : "Failed";
|
|
16
16
|
const versionText = version ? ` v${version}` : "";
|
|
17
17
|
const timestamp = new Date().toISOString().split("T")[0]; // YYYY-MM-DD
|
|
18
18
|
|
|
19
|
-
// O-lang
|
|
20
|
-
const
|
|
19
|
+
// Left label: "O-lang"
|
|
20
|
+
const leftLabel = "O-lang";
|
|
21
|
+
// Right message: "resolver-name v1.0 — Certified (2026-01-22)"
|
|
22
|
+
const rightMessage = `${resolverName}${versionText} — ${statusText} (${timestamp})`;
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
// Estimate text widths (approx. 7px per char + padding)
|
|
25
|
+
const leftWidth = 12 + leftLabel.length * 7;
|
|
26
|
+
const rightWidth = 12 + rightMessage.length * 7;
|
|
27
|
+
const totalWidth = leftWidth + rightWidth;
|
|
23
28
|
|
|
24
29
|
const badgeSvg = `
|
|
25
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="${
|
|
26
|
-
|
|
27
|
-
<
|
|
30
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="${totalWidth}" height="20">
|
|
31
|
+
<!-- Left segment: O-lang (purple) -->
|
|
32
|
+
<rect x="0" y="0" width="${leftWidth}" height="20" fill="${olangColor}" rx="3" ry="3"/>
|
|
33
|
+
<text x="${leftWidth / 2}" y="14"
|
|
34
|
+
fill="#fff"
|
|
35
|
+
font-family="Verdana, DejaVu Sans, sans-serif"
|
|
36
|
+
font-size="11"
|
|
37
|
+
font-weight="bold"
|
|
38
|
+
text-anchor="middle">
|
|
39
|
+
${leftLabel}
|
|
40
|
+
</text>
|
|
41
|
+
|
|
42
|
+
<!-- Right segment: status info (green/red) -->
|
|
43
|
+
<rect x="${leftWidth}" y="0" width="${rightWidth}" height="20" fill="${statusColor}" rx="3" ry="3"/>
|
|
44
|
+
<text x="${leftWidth + rightWidth / 2}" y="14"
|
|
28
45
|
fill="#fff"
|
|
29
|
-
font-family="Verdana"
|
|
30
|
-
font-size="
|
|
46
|
+
font-family="Verdana, DejaVu Sans, sans-serif"
|
|
47
|
+
font-size="11"
|
|
31
48
|
text-anchor="middle">
|
|
32
|
-
${
|
|
49
|
+
${rightMessage}
|
|
33
50
|
</text>
|
|
34
51
|
</svg>
|
|
35
52
|
`.trim();
|
|
36
53
|
|
|
37
|
-
// ----------------------
|
|
38
54
|
// Ensure badges folder exists
|
|
39
|
-
// ----------------------
|
|
40
55
|
const badgesDir = path.join(outputDir, "badges");
|
|
41
56
|
if (!fs.existsSync(badgesDir)) {
|
|
42
57
|
fs.mkdirSync(badgesDir, { recursive: true });
|
|
43
58
|
}
|
|
44
59
|
|
|
45
|
-
// ----------------------
|
|
46
60
|
// Write badge file
|
|
47
|
-
// ----------------------
|
|
48
61
|
const safeName = resolverName.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
49
62
|
const badgePath = path.join(badgesDir, `${safeName}-badge.svg`);
|
|
50
63
|
|
|
51
64
|
fs.writeFileSync(badgePath, badgeSvg, "utf8");
|
|
52
|
-
|
|
53
65
|
console.log(`🏷 Badge written to ${badgePath}`);
|
|
54
66
|
|
|
55
67
|
return badgePath;
|
|
56
68
|
}
|
|
57
69
|
|
|
58
|
-
// ----------------------
|
|
59
|
-
// Export
|
|
60
|
-
// ----------------------
|
|
61
70
|
module.exports = {
|
|
71
|
+
generateBadges: generateBadge, // alias for backward compat if needed
|
|
62
72
|
generateBadge
|
|
63
|
-
};
|
|
73
|
+
};
|
package/lib/runner.js
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
const path = require("path");
|
|
2
2
|
const fs = require("fs");
|
|
3
3
|
|
|
4
|
+
// ----------------------
|
|
5
|
+
// Helper: Check if resolver handles templated prompts
|
|
6
|
+
// ----------------------
|
|
7
|
+
function resolverHandlesTemplatedPrompts(resolverMeta) {
|
|
8
|
+
return resolverMeta?.properties?.handlesTemplatedPrompts === true;
|
|
9
|
+
}
|
|
10
|
+
|
|
4
11
|
// ----------------------
|
|
5
12
|
// Validator functions for RESOLVER METADATA (R-005)
|
|
6
13
|
// ----------------------
|
|
@@ -65,13 +72,21 @@ function checkResolverIsCallable(resolver) {
|
|
|
65
72
|
}
|
|
66
73
|
|
|
67
74
|
function checkFailureCodeDeclared(observedError, resolverMeta) {
|
|
68
|
-
if (!observedError?.code) return true;
|
|
75
|
+
if (!observedError?.code) return true;
|
|
69
76
|
const declaredCodes = (resolverMeta.failures || []).map(f => f.code);
|
|
70
77
|
return declaredCodes.includes(observedError.code);
|
|
71
78
|
}
|
|
72
79
|
|
|
80
|
+
// ✅ FIXED: Accept structured errors (O-Lang standard) instead of requiring throws
|
|
73
81
|
function checkRejectsMissingRequiredInput(invocationResult) {
|
|
74
|
-
|
|
82
|
+
// Accept either: threw an error, OR returned { error: ... }
|
|
83
|
+
if (invocationResult.threw) return true;
|
|
84
|
+
|
|
85
|
+
const output = invocationResult.output;
|
|
86
|
+
return output != null &&
|
|
87
|
+
typeof output === 'object' &&
|
|
88
|
+
!Array.isArray(output) &&
|
|
89
|
+
'error' in output;
|
|
75
90
|
}
|
|
76
91
|
|
|
77
92
|
function checkRetryCountWithinLimit(observedRetries, resolverMeta, errorCode) {
|
|
@@ -80,14 +95,66 @@ function checkRetryCountWithinLimit(observedRetries, resolverMeta, errorCode) {
|
|
|
80
95
|
return observedRetries <= failure.retries;
|
|
81
96
|
}
|
|
82
97
|
|
|
98
|
+
// ✅ UPDATED: Handle both kernel-mode { output: ... } and direct-mode { field: value }
|
|
83
99
|
function checkOutputIsObject(output) {
|
|
84
|
-
|
|
100
|
+
if (!output || typeof output !== 'object' || Array.isArray(output)) {
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// If it has an 'output' key, validate the nested object
|
|
105
|
+
if ('output' in output) {
|
|
106
|
+
return output.output !== null && typeof output.output === 'object' && !Array.isArray(output.output);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Otherwise, treat the whole object as the output (direct mode)
|
|
110
|
+
return true;
|
|
85
111
|
}
|
|
86
112
|
|
|
113
|
+
// ✅ ENHANCED: Return rich error details for guidance
|
|
87
114
|
function checkOutputFieldsMatchContract(output, resolverMeta) {
|
|
88
|
-
if (!output
|
|
115
|
+
if (!output || typeof output !== 'object') {
|
|
116
|
+
return {
|
|
117
|
+
passed: false,
|
|
118
|
+
details: {
|
|
119
|
+
reason: 'no_output',
|
|
120
|
+
actualOutput: output
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
let actualOutput;
|
|
126
|
+
if ('output' in output) {
|
|
127
|
+
actualOutput = output.output;
|
|
128
|
+
} else {
|
|
129
|
+
actualOutput = output; // direct mode
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (!actualOutput || typeof actualOutput !== 'object') {
|
|
133
|
+
return {
|
|
134
|
+
passed: false,
|
|
135
|
+
details: {
|
|
136
|
+
reason: 'invalid_output_shape',
|
|
137
|
+
actualOutput: output
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
89
142
|
const declaredNames = (resolverMeta.outputs || []).map(o => o.name);
|
|
90
|
-
|
|
143
|
+
const missingFields = declaredNames.filter(name => !(name in actualOutput));
|
|
144
|
+
|
|
145
|
+
if (missingFields.length > 0) {
|
|
146
|
+
return {
|
|
147
|
+
passed: false,
|
|
148
|
+
details: {
|
|
149
|
+
reason: 'missing_fields',
|
|
150
|
+
missingFields,
|
|
151
|
+
actualOutput,
|
|
152
|
+
expectedFields: declaredNames
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return true;
|
|
91
158
|
}
|
|
92
159
|
|
|
93
160
|
function checkDeterministicOutput(results) {
|
|
@@ -97,15 +164,154 @@ function checkDeterministicOutput(results) {
|
|
|
97
164
|
}
|
|
98
165
|
|
|
99
166
|
function checkNoGlobalMutation() {
|
|
100
|
-
// Placeholder: real impl would compare global state snapshots
|
|
101
167
|
return true;
|
|
102
168
|
}
|
|
103
169
|
|
|
170
|
+
// ----------------------
|
|
171
|
+
// Validator functions for RESOLVER KERNEL BYPASS (R-013)
|
|
172
|
+
// ----------------------
|
|
173
|
+
|
|
174
|
+
async function checkBlocksUnresolvedVariables(invocationResult, resolverMeta, testCase) {
|
|
175
|
+
// Skip if resolver doesn't handle templated prompts
|
|
176
|
+
if (!resolverHandlesTemplatedPrompts(resolverMeta)) {
|
|
177
|
+
return true; // Pass by default
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const resolverFunc = invocationResult.resolver;
|
|
181
|
+
const testInput = 'Ask llm-groq "Balance is {account_info.balance}"';
|
|
182
|
+
|
|
183
|
+
try {
|
|
184
|
+
const result = await resolverFunc(testInput, {});
|
|
185
|
+
|
|
186
|
+
// If it returns undefined, it's ignoring the action (unsafe)
|
|
187
|
+
if (result === undefined) {
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// If it returns a successful response, it's hallucinating (unsafe)
|
|
192
|
+
if (result && typeof result === 'object' && !result.error) {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// If it returns an error, it must be UNRESOLVED_VARIABLES
|
|
197
|
+
if (result && typeof result === 'object' && result.error) {
|
|
198
|
+
return result.error === "UNRESOLVED_VARIABLES";
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return false; // Any other return type is unsafe
|
|
202
|
+
|
|
203
|
+
} catch (e) {
|
|
204
|
+
// Thrown errors are acceptable (though structured errors are preferred)
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function checkNoDirectPromptParsing(invocationResult, resolverMeta) {
|
|
210
|
+
// Skip if resolver doesn't handle templated prompts
|
|
211
|
+
if (!resolverHandlesTemplatedPrompts(resolverMeta)) {
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
// This is verified by resolver structure compliance
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
async function checkNoDefaultValues(invocationResult, resolverMeta, testCase) {
|
|
219
|
+
// Skip if resolver doesn't handle templated prompts
|
|
220
|
+
if (!resolverHandlesTemplatedPrompts(resolverMeta)) {
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const resolverFunc = invocationResult.resolver;
|
|
225
|
+
const testInput = 'Ask llm-groq "Compute {value}"';
|
|
226
|
+
|
|
227
|
+
try {
|
|
228
|
+
const result = await resolverFunc(testInput, {});
|
|
229
|
+
|
|
230
|
+
// Should not return successful responses with hallucinated values
|
|
231
|
+
if (result && typeof result === 'object' && result.output) {
|
|
232
|
+
const output = result.output.response || '';
|
|
233
|
+
// Check for common hallucination patterns
|
|
234
|
+
if (output.includes('$0') || output.includes('0.00') ||
|
|
235
|
+
output.includes('undefined') || output.includes('null')) {
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Returning errors or undefined is safe
|
|
241
|
+
return true;
|
|
242
|
+
|
|
243
|
+
} catch (e) {
|
|
244
|
+
return true; // Errors are safe
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// ----------------------
|
|
249
|
+
// Guidance Registry - Contextual Help
|
|
250
|
+
// ----------------------
|
|
251
|
+
function getGuidanceMessage(assertionType, details, resolverMeta) {
|
|
252
|
+
const resolverName = resolverMeta?.resolverName || 'unknown';
|
|
253
|
+
|
|
254
|
+
switch (assertionType) {
|
|
255
|
+
case 'output_fields_match_contract':
|
|
256
|
+
if (details.reason === 'missing_fields') {
|
|
257
|
+
const resolverTips = resolverName.includes('bank') ? `
|
|
258
|
+
💡 Bank resolver tip:
|
|
259
|
+
- Your capability.js must return { balance: integer }
|
|
260
|
+
- Create test/bank.db with customer_id=12345
|
|
261
|
+
- Use exampleAction: "Action bank-account-lookup customer_id=12345 bank_db_path=./test/bank.db"` :
|
|
262
|
+
resolverName.includes('telegram') ? `
|
|
263
|
+
💡 Telegram resolver tip:
|
|
264
|
+
- Your exampleAction must exactly match your hardcoded test string
|
|
265
|
+
- Return { status: "sent" } for the test action` : '';
|
|
266
|
+
|
|
267
|
+
return `
|
|
268
|
+
🔍 What happened?
|
|
269
|
+
Your resolver returned: ${JSON.stringify(details.actualOutput)}
|
|
270
|
+
But your resolver.js declares outputs: [${details.expectedFields.map(f => `"${f}"`).join(', ')}]
|
|
271
|
+
|
|
272
|
+
💡 How to fix:
|
|
273
|
+
1. Ensure your resolver returns the correct output structure
|
|
274
|
+
2. Check your exampleAction matches your test logic
|
|
275
|
+
3. Verify your test data exists${resolverTips}
|
|
276
|
+
|
|
277
|
+
📘 Learn more: https://o-lang.org/docs/conformance/output-contract
|
|
278
|
+
`.trim();
|
|
279
|
+
}
|
|
280
|
+
break;
|
|
281
|
+
|
|
282
|
+
case 'rejects_missing_required_input':
|
|
283
|
+
return `
|
|
284
|
+
🔍 What happened?
|
|
285
|
+
Your resolver didn't reject invalid input properly.
|
|
286
|
+
|
|
287
|
+
💡 How to fix:
|
|
288
|
+
- Return { error: "INVALID_INPUT" } for empty/missing inputs
|
|
289
|
+
- Return undefined only for non-matching actions (e.g., Telegram actions sent to bank resolver)
|
|
290
|
+
|
|
291
|
+
📘 Learn more: https://o-lang.org/docs/conformance/input-validation
|
|
292
|
+
`.trim();
|
|
293
|
+
|
|
294
|
+
case 'output_is_object':
|
|
295
|
+
return `
|
|
296
|
+
🔍 What happened?
|
|
297
|
+
Your resolver didn't return an object.
|
|
298
|
+
|
|
299
|
+
💡 How to fix:
|
|
300
|
+
- Return { output: { ... } } for kernel mode
|
|
301
|
+
- Or return { field: value } for direct mode
|
|
302
|
+
- Never return primitives, arrays, or undefined for valid actions
|
|
303
|
+
|
|
304
|
+
📘 Learn more: https://o-lang.org/docs/conformance/output-contract
|
|
305
|
+
`.trim();
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return `Assertion failed. Check your resolver implementation.`;
|
|
309
|
+
}
|
|
310
|
+
|
|
104
311
|
// ----------------------
|
|
105
312
|
// Assertion handler registry.
|
|
106
313
|
// ----------------------
|
|
107
314
|
const assertionHandlers = {
|
|
108
|
-
// R-005: Metadata
|
|
109
315
|
resolver_has_field: (resolverMeta, assertion) =>
|
|
110
316
|
checkResolverHasField(resolverMeta, assertion),
|
|
111
317
|
resolver_inputs_valid: checkResolverInputsValid,
|
|
@@ -113,7 +319,6 @@ const assertionHandlers = {
|
|
|
113
319
|
field_names_normalized: checkFieldNamesNormalized,
|
|
114
320
|
resolver_failures_valid: checkResolverFailuresValid,
|
|
115
321
|
|
|
116
|
-
// R-006–R-012: Runtime
|
|
117
322
|
resolver_is_callable: (ctx) => checkResolverIsCallable(ctx.resolver),
|
|
118
323
|
resolver_failure_declared: (ctx) => checkFailureCodeDeclared(ctx.error, ctx.resolverMeta),
|
|
119
324
|
rejects_missing_required_input: (ctx) => checkRejectsMissingRequiredInput(ctx),
|
|
@@ -123,12 +328,22 @@ const assertionHandlers = {
|
|
|
123
328
|
output_fields_match_contract: (ctx) => checkOutputFieldsMatchContract(ctx.output, ctx.resolverMeta),
|
|
124
329
|
deterministic_output: (ctx) => checkDeterministicOutput(ctx.outputs),
|
|
125
330
|
no_global_state_mutation: () => checkNoGlobalMutation(),
|
|
331
|
+
|
|
332
|
+
// ----------------------
|
|
333
|
+
// R-013 handlers (now async)
|
|
334
|
+
// ----------------------
|
|
335
|
+
blocks_unresolved_variables: async (ctx, assertion, status) =>
|
|
336
|
+
await checkBlocksUnresolvedVariables(ctx, ctx.resolverMeta, status),
|
|
337
|
+
no_direct_prompt_parsing: async (ctx, assertion, status) =>
|
|
338
|
+
await checkNoDirectPromptParsing(ctx, ctx.resolverMeta),
|
|
339
|
+
no_default_values: async (ctx, assertion, status) =>
|
|
340
|
+
await checkNoDefaultValues(ctx, ctx.resolverMeta, status),
|
|
126
341
|
};
|
|
127
342
|
|
|
128
343
|
// ----------------------
|
|
129
|
-
// Main assertion runner
|
|
344
|
+
// Main assertion runner with guided feedback
|
|
130
345
|
// ----------------------
|
|
131
|
-
function runAssertions(testSpec, target, status = {}) {
|
|
346
|
+
async function runAssertions(testSpec, target, status = {}) {
|
|
132
347
|
if (!testSpec.assertions?.length) {
|
|
133
348
|
return { ok: true, message: "No assertions defined" };
|
|
134
349
|
}
|
|
@@ -137,10 +352,10 @@ function runAssertions(testSpec, target, status = {}) {
|
|
|
137
352
|
|
|
138
353
|
for (const assertion of testSpec.assertions) {
|
|
139
354
|
const { id, type, severity = "fatal", description } = assertion;
|
|
140
|
-
let
|
|
355
|
+
let result = false;
|
|
141
356
|
|
|
142
357
|
if (type in assertionHandlers) {
|
|
143
|
-
|
|
358
|
+
result = await assertionHandlers[type](target, assertion, status);
|
|
144
359
|
} else {
|
|
145
360
|
failures.push({
|
|
146
361
|
id,
|
|
@@ -150,11 +365,29 @@ function runAssertions(testSpec, target, status = {}) {
|
|
|
150
365
|
continue;
|
|
151
366
|
}
|
|
152
367
|
|
|
368
|
+
// Handle rich error objects
|
|
369
|
+
let passed = true;
|
|
370
|
+
let details = null;
|
|
371
|
+
|
|
372
|
+
if (typeof result === 'object' && result !== null) {
|
|
373
|
+
passed = result.passed;
|
|
374
|
+
details = result.details;
|
|
375
|
+
} else {
|
|
376
|
+
passed = result;
|
|
377
|
+
}
|
|
378
|
+
|
|
153
379
|
if (!passed) {
|
|
380
|
+
let message = description || `Assertion failed: ${id}`;
|
|
381
|
+
|
|
382
|
+
// Add guided feedback
|
|
383
|
+
if (details) {
|
|
384
|
+
message = getGuidanceMessage(type, details, status.resolverMeta || target);
|
|
385
|
+
}
|
|
386
|
+
|
|
154
387
|
failures.push({
|
|
155
388
|
id,
|
|
156
389
|
severity,
|
|
157
|
-
message:
|
|
390
|
+
message: message
|
|
158
391
|
});
|
|
159
392
|
}
|
|
160
393
|
}
|
|
@@ -164,7 +397,7 @@ function runAssertions(testSpec, target, status = {}) {
|
|
|
164
397
|
message:
|
|
165
398
|
failures.length === 0
|
|
166
399
|
? "All assertions passed"
|
|
167
|
-
: failures.map(f => `[${f.severity}] ${f.id}: ${f.message}`).join("
|
|
400
|
+
: failures.map(f => `[${f.severity}] ${f.id}: ${f.message}`).join("\n\n"),
|
|
168
401
|
failures,
|
|
169
402
|
};
|
|
170
403
|
}
|
|
@@ -183,8 +416,17 @@ async function invokeResolverWithObservation(resolver, resolverMeta, testSpec, f
|
|
|
183
416
|
retryCount: 0,
|
|
184
417
|
};
|
|
185
418
|
|
|
186
|
-
// ✅ Use
|
|
187
|
-
|
|
419
|
+
// ✅ DYNAMIC INPUT: Use resolver's exampleAction if marker is present
|
|
420
|
+
let input;
|
|
421
|
+
if (fixture?.invoke === "__USE_RESOLVER_EXAMPLE_ACTION__") {
|
|
422
|
+
const exampleAction = resolverMeta?.exampleAction;
|
|
423
|
+
if (!exampleAction) {
|
|
424
|
+
throw new Error("Resolver must declare 'exampleAction' in resolver.js");
|
|
425
|
+
}
|
|
426
|
+
input = exampleAction;
|
|
427
|
+
} else {
|
|
428
|
+
input = fixture?.invoke || {};
|
|
429
|
+
}
|
|
188
430
|
|
|
189
431
|
const runs = testSpec.test_id === 'R-011-determinism' ? 3 : 1;
|
|
190
432
|
|
|
@@ -196,12 +438,11 @@ async function invokeResolverWithObservation(resolver, resolverMeta, testSpec, f
|
|
|
196
438
|
} catch (err) {
|
|
197
439
|
ctx.threw = true;
|
|
198
440
|
ctx.error = err;
|
|
199
|
-
// Simple retry count inference (real impl would use retry loop)
|
|
200
441
|
if (err.code) {
|
|
201
442
|
const decl = resolverMeta.failures?.find(f => f.code === err.code);
|
|
202
443
|
if (decl) ctx.retryCount = decl.retries;
|
|
203
444
|
}
|
|
204
|
-
break;
|
|
445
|
+
break;
|
|
205
446
|
}
|
|
206
447
|
}
|
|
207
448
|
|
|
@@ -209,13 +450,20 @@ async function invokeResolverWithObservation(resolver, resolverMeta, testSpec, f
|
|
|
209
450
|
}
|
|
210
451
|
|
|
211
452
|
// ----------------------
|
|
212
|
-
// Test suite executor
|
|
453
|
+
// Test suite executor with enhanced output
|
|
213
454
|
// ----------------------
|
|
214
455
|
async function runAllTests({ suites, resolver }) {
|
|
215
456
|
let failed = 0;
|
|
216
457
|
const PACKAGE_ROOT = path.join(__dirname, '..');
|
|
217
458
|
const resolverMeta = resolver.resolverDeclaration || resolver;
|
|
218
459
|
|
|
460
|
+
// Pre-check: warn about common issues
|
|
461
|
+
const resolverName = resolverMeta.resolverName || 'unknown';
|
|
462
|
+
if (resolverName.includes('bank') && !fs.existsSync(path.join(process.cwd(), 'test', 'bank.db'))) {
|
|
463
|
+
console.warn("\n⚠️ Warning: test/bank.db not found. Create it with:");
|
|
464
|
+
console.warn(" node scripts/create-test-db.mjs\n");
|
|
465
|
+
}
|
|
466
|
+
|
|
219
467
|
for (const suite of suites) {
|
|
220
468
|
const suiteDir = path.join(PACKAGE_ROOT, suite);
|
|
221
469
|
const testSpecPath = path.join(suiteDir, "test.json");
|
|
@@ -227,10 +475,15 @@ async function runAllTests({ suites, resolver }) {
|
|
|
227
475
|
}
|
|
228
476
|
|
|
229
477
|
const testSpec = JSON.parse(fs.readFileSync(testSpecPath, "utf8"));
|
|
230
|
-
|
|
478
|
+
let fixture = testSpec.fixtures.inputs[0];
|
|
479
|
+
|
|
480
|
+
// ✅ Allow local override (optional but useful)
|
|
481
|
+
const localFixturePath = path.join(process.cwd(), 'test-fixtures', `${suite}.json`);
|
|
482
|
+
if (fs.existsSync(localFixturePath)) {
|
|
483
|
+
fixture = JSON.parse(fs.readFileSync(localFixturePath, 'utf8'));
|
|
484
|
+
}
|
|
231
485
|
|
|
232
486
|
if (fixture.resolver_contract) {
|
|
233
|
-
// R-005: Validate metadata structure
|
|
234
487
|
const contractPath = path.join(suiteDir, fixture.resolver_contract);
|
|
235
488
|
if (!fs.existsSync(contractPath)) {
|
|
236
489
|
console.error(`❌ Resolver contract missing: ${contractPath}`);
|
|
@@ -247,22 +500,24 @@ async function runAllTests({ suites, resolver }) {
|
|
|
247
500
|
continue;
|
|
248
501
|
}
|
|
249
502
|
|
|
250
|
-
const result = runAssertions(testSpec, target);
|
|
503
|
+
const result = await runAssertions(testSpec, target);
|
|
251
504
|
if (!result.ok) {
|
|
252
|
-
console.error(
|
|
505
|
+
console.error(`\n❌ ${suite} failed:\n`);
|
|
506
|
+
console.error(result.message);
|
|
507
|
+
console.error('\n' + '='.repeat(60) + '\n');
|
|
253
508
|
failed++;
|
|
254
509
|
} else {
|
|
255
510
|
console.log(`✅ ${suite} passed`);
|
|
256
511
|
}
|
|
257
512
|
} else if (testSpec.category === "resolver-runtime") {
|
|
258
|
-
// R-006 → R-012: Observe real resolver behavior
|
|
259
513
|
try {
|
|
260
|
-
// ✅ Pass fixture to invoker
|
|
261
514
|
const runtimeContext = await invokeResolverWithObservation(resolver, resolverMeta, testSpec, fixture);
|
|
262
|
-
const result = runAssertions(testSpec, runtimeContext);
|
|
515
|
+
const result = await runAssertions(testSpec, runtimeContext, { resolverMeta });
|
|
263
516
|
|
|
264
517
|
if (!result.ok) {
|
|
265
|
-
console.error(
|
|
518
|
+
console.error(`\n❌ ${suite} failed:\n`);
|
|
519
|
+
console.error(result.message);
|
|
520
|
+
console.error('\n' + '='.repeat(60) + '\n');
|
|
266
521
|
failed++;
|
|
267
522
|
} else {
|
|
268
523
|
console.log(`✅ ${suite} passed`);
|
package/package.json
CHANGED
package/run.js
CHANGED
|
@@ -79,7 +79,8 @@ const { generateBadge } = require("./lib/badge");
|
|
|
79
79
|
"R-009-resolver-retry-semantics",
|
|
80
80
|
"R-010-resolver-output-contract",
|
|
81
81
|
"R-011-resolver-determinism",
|
|
82
|
-
"R-012-resolver-side-effects"
|
|
82
|
+
"R-012-resolver-side-effects",
|
|
83
|
+
"R-013-resolver-kernel-bypass"
|
|
83
84
|
];
|
|
84
85
|
|
|
85
86
|
const result = await runAllTests({
|