@framers/agentos 0.1.93 → 0.1.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +10 -0
  2. package/dist/api/AgentOS.d.ts +13 -0
  3. package/dist/api/AgentOS.d.ts.map +1 -1
  4. package/dist/api/AgentOS.js +36 -1
  5. package/dist/api/AgentOS.js.map +1 -1
  6. package/dist/api/agent.d.ts +7 -0
  7. package/dist/api/agent.d.ts.map +1 -1
  8. package/dist/api/agent.js +34 -2
  9. package/dist/api/agent.js.map +1 -1
  10. package/dist/api/generateImage.d.ts +3 -0
  11. package/dist/api/generateImage.d.ts.map +1 -1
  12. package/dist/api/generateImage.js +95 -36
  13. package/dist/api/generateImage.js.map +1 -1
  14. package/dist/api/generateText.d.ts +9 -0
  15. package/dist/api/generateText.d.ts.map +1 -1
  16. package/dist/api/generateText.js +173 -101
  17. package/dist/api/generateText.js.map +1 -1
  18. package/dist/api/observability.d.ts +16 -0
  19. package/dist/api/observability.d.ts.map +1 -0
  20. package/dist/api/observability.js +46 -0
  21. package/dist/api/observability.js.map +1 -0
  22. package/dist/api/streamText.d.ts.map +1 -1
  23. package/dist/api/streamText.js +93 -22
  24. package/dist/api/streamText.js.map +1 -1
  25. package/dist/api/usageLedger.d.ts +58 -0
  26. package/dist/api/usageLedger.d.ts.map +1 -0
  27. package/dist/api/usageLedger.js +151 -0
  28. package/dist/api/usageLedger.js.map +1 -0
  29. package/dist/core/tools/IToolOrchestrator.d.ts +38 -1
  30. package/dist/core/tools/IToolOrchestrator.d.ts.map +1 -1
  31. package/dist/core/tools/ToolOrchestrator.d.ts +58 -1
  32. package/dist/core/tools/ToolOrchestrator.d.ts.map +1 -1
  33. package/dist/core/tools/ToolOrchestrator.js +116 -1
  34. package/dist/core/tools/ToolOrchestrator.js.map +1 -1
  35. package/dist/discovery/CapabilityDiscoveryEngine.d.ts +19 -0
  36. package/dist/discovery/CapabilityDiscoveryEngine.d.ts.map +1 -1
  37. package/dist/discovery/CapabilityDiscoveryEngine.js +54 -0
  38. package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
  39. package/dist/discovery/types.d.ts +8 -1
  40. package/dist/discovery/types.d.ts.map +1 -1
  41. package/dist/discovery/types.js.map +1 -1
  42. package/dist/emergent/ComposableToolBuilder.d.ts +125 -0
  43. package/dist/emergent/ComposableToolBuilder.d.ts.map +1 -0
  44. package/dist/emergent/ComposableToolBuilder.js +318 -0
  45. package/dist/emergent/ComposableToolBuilder.js.map +1 -0
  46. package/dist/emergent/EmergentCapabilityEngine.d.ts +168 -0
  47. package/dist/emergent/EmergentCapabilityEngine.d.ts.map +1 -0
  48. package/dist/emergent/EmergentCapabilityEngine.js +437 -0
  49. package/dist/emergent/EmergentCapabilityEngine.js.map +1 -0
  50. package/dist/emergent/EmergentJudge.d.ts +283 -0
  51. package/dist/emergent/EmergentJudge.d.ts.map +1 -0
  52. package/dist/emergent/EmergentJudge.js +463 -0
  53. package/dist/emergent/EmergentJudge.js.map +1 -0
  54. package/dist/emergent/EmergentToolRegistry.d.ts +286 -0
  55. package/dist/emergent/EmergentToolRegistry.d.ts.map +1 -0
  56. package/dist/emergent/EmergentToolRegistry.js +546 -0
  57. package/dist/emergent/EmergentToolRegistry.js.map +1 -0
  58. package/dist/emergent/ForgeToolMetaTool.d.ts +124 -0
  59. package/dist/emergent/ForgeToolMetaTool.d.ts.map +1 -0
  60. package/dist/emergent/ForgeToolMetaTool.js +170 -0
  61. package/dist/emergent/ForgeToolMetaTool.js.map +1 -0
  62. package/dist/emergent/SandboxedToolForge.d.ts +185 -0
  63. package/dist/emergent/SandboxedToolForge.d.ts.map +1 -0
  64. package/dist/emergent/SandboxedToolForge.js +383 -0
  65. package/dist/emergent/SandboxedToolForge.js.map +1 -0
  66. package/dist/emergent/index.d.ts +25 -0
  67. package/dist/emergent/index.d.ts.map +1 -0
  68. package/dist/emergent/index.js +20 -0
  69. package/dist/emergent/index.js.map +1 -0
  70. package/dist/emergent/types.d.ts +596 -0
  71. package/dist/emergent/types.d.ts.map +1 -0
  72. package/dist/emergent/types.js +36 -0
  73. package/dist/emergent/types.js.map +1 -0
  74. package/dist/index.d.ts +8 -1
  75. package/dist/index.d.ts.map +1 -1
  76. package/dist/index.js +7 -1
  77. package/dist/index.js.map +1 -1
  78. package/dist/orchestration/runtime/GraphRuntime.d.ts.map +1 -1
  79. package/dist/orchestration/runtime/GraphRuntime.js +53 -11
  80. package/dist/orchestration/runtime/GraphRuntime.js.map +1 -1
  81. package/package.json +1 -1
@@ -0,0 +1,463 @@
1
+ /**
2
+ * @fileoverview EmergentJudge — LLM-as-judge evaluator for forged tools.
3
+ * @module @framers/agentos/emergent/EmergentJudge
4
+ *
5
+ * Evaluates runtime-created tools across three distinct evaluation modes, each
6
+ * scaled to the risk level of the operation:
7
+ *
8
+ * - **Creation review** (`reviewCreation`): Full code audit + test output
9
+ * validation. Invokes a single LLM call against the judge model to score
10
+ * safety, correctness, determinism, and boundedness. The tool is only
11
+ * approved if both safety and correctness pass.
12
+ *
13
+ * - **Reuse validation** (`validateReuse`): Pure programmatic schema conformance
14
+ * check run on every invocation. Zero LLM calls — must be fast. Validates
15
+ * that the tool's output matches its declared JSON Schema type constraints.
16
+ *
17
+ * - **Promotion panel** (`reviewPromotion`): Two independent LLM calls using
18
+ * the promotion model — one safety auditor and one correctness reviewer.
19
+ * Both must approve for the promotion verdict to pass. This dual-judge
20
+ * gate prevents single-point-of-failure in the promotion decision.
21
+ *
22
+ * All LLM interaction is abstracted behind a `generateText` callback so the
23
+ * judge is model-agnostic and trivially testable with mock implementations.
24
+ */
25
+ // ============================================================================
26
+ // EMERGENT JUDGE
27
+ // ============================================================================
28
+ /**
29
+ * Evaluates forged tools for safety, correctness, and quality using LLM-as-judge.
30
+ *
31
+ * Three evaluation modes, each scaled to the risk level of the operation:
32
+ *
33
+ * | Mode | LLM calls | When used |
34
+ * |---|---|---|
35
+ * | `reviewCreation` | 1 | Newly forged tool — full code audit + test validation |
36
+ * | `validateReuse` | 0 | Every invocation — pure programmatic schema check |
37
+ * | `reviewPromotion` | 2 | Tier promotion — dual-judge safety + correctness panel |
38
+ *
39
+ * @example
40
+ * ```ts
41
+ * const judge = new EmergentJudge({
42
+ * judgeModel: 'gpt-4o-mini',
43
+ * promotionModel: 'gpt-4o',
44
+ * generateText: async (model, prompt) => callLlm(model, prompt),
45
+ * });
46
+ *
47
+ * // Creation review
48
+ * const verdict = await judge.reviewCreation(candidate);
49
+ * if (verdict.approved) { registry.register(tool, 'session'); }
50
+ *
51
+ * // Reuse validation (no LLM call)
52
+ * const reuse = judge.validateReuse('tool-1', output, outputSchema);
53
+ * if (!reuse.valid) { throw new Error(reuse.schemaErrors.join(', ')); }
54
+ *
55
+ * // Promotion panel
56
+ * const promotion = await judge.reviewPromotion(tool);
57
+ * if (promotion.approved) { registry.promote(tool.id, 'agent'); }
58
+ * ```
59
+ */
60
+ export class EmergentJudge {
61
+ /**
62
+ * Create a new EmergentJudge instance.
63
+ *
64
+ * @param config - Judge configuration specifying models and the LLM callback.
65
+ * The `generateText` function is called for creation reviews and promotion
66
+ * panels but never for reuse validation (which is purely programmatic).
67
+ */
68
+ constructor(config) {
69
+ this.config = config;
70
+ }
71
+ // --------------------------------------------------------------------------
72
+ // PUBLIC: reviewCreation
73
+ // --------------------------------------------------------------------------
74
+ /**
75
+ * Full code + test review for a newly forged tool.
76
+ *
77
+ * Builds a structured prompt from the candidate's details (name, description,
78
+ * schemas, source code, sandbox allowlist, test results) and asks the LLM to
79
+ * evaluate four dimensions: SAFETY, CORRECTNESS, DETERMINISM, BOUNDED.
80
+ *
81
+ * The tool is approved only if both `safety.passed` AND `correctness.passed`
82
+ * are `true` in the LLM response.
83
+ *
84
+ * If the LLM returns malformed JSON that cannot be parsed, a rejected verdict
85
+ * is returned with confidence 0 and a reasoning string explaining the parse
86
+ * failure. This prevents bad LLM output from accidentally approving a tool.
87
+ *
88
+ * @param candidate - The tool candidate to evaluate. Must include source code
89
+ * and at least one test result.
90
+ * @returns A {@link CreationVerdict} indicating approval or rejection with
91
+ * per-dimension scores and reasoning.
92
+ */
93
+ async reviewCreation(candidate) {
94
+ const prompt = this.buildCreationPrompt(candidate);
95
+ let rawResponse;
96
+ try {
97
+ rawResponse = await this.config.generateText(this.config.judgeModel, prompt);
98
+ }
99
+ catch {
100
+ return this.rejectedVerdict('LLM call failed during creation review.');
101
+ }
102
+ // Parse the JSON response from the LLM.
103
+ let parsed;
104
+ try {
105
+ parsed = JSON.parse(this.extractJson(rawResponse));
106
+ }
107
+ catch {
108
+ return this.rejectedVerdict('Failed to parse LLM response as JSON during creation review.');
109
+ }
110
+ // Map LLM response to CreationVerdict.
111
+ // Approved only if both safety and correctness passed.
112
+ const safetyPassed = parsed.safety?.passed === true;
113
+ const correctnessPassed = parsed.correctness?.passed === true;
114
+ const approved = safetyPassed && correctnessPassed;
115
+ return {
116
+ approved,
117
+ confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0,
118
+ safety: safetyPassed ? 1.0 : 0.0,
119
+ correctness: correctnessPassed ? 1.0 : 0.0,
120
+ determinism: parsed.determinism?.likely ? 1.0 : 0.5,
121
+ bounded: parsed.bounded?.likely ? 1.0 : 0.5,
122
+ reasoning: parsed.reasoning ?? '',
123
+ };
124
+ }
125
+ // --------------------------------------------------------------------------
126
+ // PUBLIC: validateReuse
127
+ // --------------------------------------------------------------------------
128
+ /**
129
+ * Pure schema validation on each reuse — no LLM call.
130
+ *
131
+ * Validates that `output` conforms to the declared `schema` using basic type
132
+ * checking. This runs on every tool invocation so it must be fast — no LLM
133
+ * calls, no network I/O, no async operations.
134
+ *
135
+ * Checks performed:
136
+ * - If schema declares `type: 'object'`, verify output is a non-null object.
137
+ * - If schema declares `properties`, verify each declared property key exists
138
+ * on the output object.
139
+ * - If schema declares `required`, verify each required property key exists.
140
+ * - If schema declares `type: 'string'`, verify output is a string.
141
+ * - If schema declares `type: 'number'` or `type: 'integer'`, verify output
142
+ * is a number.
143
+ * - If schema declares `type: 'boolean'`, verify output is a boolean.
144
+ * - If schema declares `type: 'array'`, verify output is an array.
145
+ *
146
+ * @param _toolId - The ID of the tool being reused (reserved for future
147
+ * anomaly detection; currently unused).
148
+ * @param output - The actual output value produced by the tool invocation.
149
+ * @param schema - The tool's declared output JSON Schema.
150
+ * @returns A {@link ReuseVerdict} with `valid: true` if the output conforms,
151
+ * or `valid: false` with a `schemaErrors` array describing each mismatch.
152
+ */
153
+ validateReuse(_toolId, output, schema) {
154
+ const errors = [];
155
+ if (schema.type) {
156
+ errors.push(...this.validateType(output, schema));
157
+ }
158
+ return {
159
+ valid: errors.length === 0,
160
+ schemaErrors: errors,
161
+ anomaly: false,
162
+ };
163
+ }
164
+ // --------------------------------------------------------------------------
165
+ // PUBLIC: reviewPromotion
166
+ // --------------------------------------------------------------------------
167
+ /**
168
+ * Two-judge panel for tier promotion. Both must approve.
169
+ *
170
+ * Sends two independent LLM calls in parallel using the promotion model:
171
+ * 1. **Safety auditor**: Reviews the tool's source code and usage history for
172
+ * security concerns (data exfiltration, resource exhaustion, API abuse).
173
+ * 2. **Correctness reviewer**: Reviews the tool's source code and all historical
174
+ * outputs for correctness issues (schema violations, edge case failures).
175
+ *
176
+ * Both reviewers must return `approved: true` for the promotion to pass. If
177
+ * either reviewer's response fails to parse as JSON, the promotion is rejected.
178
+ *
179
+ * @param tool - The emergent tool to evaluate for promotion. Must have usage
180
+ * stats and judge verdicts from prior reviews.
181
+ * @returns A {@link PromotionVerdict} containing both sub-verdicts and the
182
+ * combined approval decision.
183
+ */
184
+ async reviewPromotion(tool) {
185
+ const safetyPrompt = this.buildSafetyAuditorPrompt(tool);
186
+ const correctnessPrompt = this.buildCorrectnessReviewerPrompt(tool);
187
+ // Run both reviewer calls in parallel.
188
+ const [safetyRaw, correctnessRaw] = await Promise.all([
189
+ this.config
190
+ .generateText(this.config.promotionModel, safetyPrompt)
191
+ .catch(() => ''),
192
+ this.config
193
+ .generateText(this.config.promotionModel, correctnessPrompt)
194
+ .catch(() => ''),
195
+ ]);
196
+ // Parse safety auditor response.
197
+ let safetyResult;
198
+ try {
199
+ safetyResult = JSON.parse(this.extractJson(safetyRaw));
200
+ }
201
+ catch {
202
+ safetyResult = { approved: false, confidence: 0, reasoning: 'Failed to parse safety auditor response.' };
203
+ }
204
+ // Parse correctness reviewer response.
205
+ let correctnessResult;
206
+ try {
207
+ correctnessResult = JSON.parse(this.extractJson(correctnessRaw));
208
+ }
209
+ catch {
210
+ correctnessResult = { approved: false, confidence: 0, reasoning: 'Failed to parse correctness reviewer response.' };
211
+ }
212
+ // Both must approve.
213
+ const approved = safetyResult.approved === true && correctnessResult.approved === true;
214
+ // Combined confidence is the minimum of both sub-scores.
215
+ const safetyConfidence = typeof safetyResult.confidence === 'number' ? safetyResult.confidence : 0;
216
+ const correctnessConfidence = typeof correctnessResult.confidence === 'number' ? correctnessResult.confidence : 0;
217
+ return {
218
+ approved,
219
+ safetyAuditor: {
220
+ approved: safetyResult.approved === true,
221
+ confidence: safetyConfidence,
222
+ reasoning: safetyResult.reasoning ?? '',
223
+ },
224
+ correctnessReviewer: {
225
+ approved: correctnessResult.approved === true,
226
+ confidence: correctnessConfidence,
227
+ reasoning: correctnessResult.reasoning ?? '',
228
+ },
229
+ confidence: Math.min(safetyConfidence, correctnessConfidence),
230
+ };
231
+ }
232
+ // --------------------------------------------------------------------------
233
+ // PRIVATE: Prompt builders
234
+ // --------------------------------------------------------------------------
235
+ /**
236
+ * Build the creation review prompt from a tool candidate.
237
+ *
238
+ * The prompt asks the LLM to act as a security auditor and evaluate the
239
+ * candidate across four dimensions: SAFETY, CORRECTNESS, DETERMINISM, BOUNDED.
240
+ *
241
+ * @param candidate - The tool candidate to build the prompt for.
242
+ * @returns The fully-formed prompt string.
243
+ */
244
+ buildCreationPrompt(candidate) {
245
+ const testResultsFormatted = candidate.testResults
246
+ .map((r, i) => `Test ${i + 1}: input=${JSON.stringify(r.input)} output=${JSON.stringify(r.output)} success=${r.success}${r.error ? ` error=${r.error}` : ''}`)
247
+ .join('\n');
248
+ return `You are a security auditor reviewing a tool an AI agent created at runtime.
249
+
250
+ Tool name: ${candidate.name}
251
+ Description: ${candidate.description}
252
+ Input schema: ${JSON.stringify(candidate.inputSchema)}
253
+ Output schema: ${JSON.stringify(candidate.outputSchema)}
254
+ Implementation mode: ${candidate.implementationMode}
255
+ Source code:
256
+ ${candidate.source}
257
+ Requested sandbox APIs: ${JSON.stringify(candidate.allowlist ?? [])}
258
+ Test cases and results:
259
+ ${testResultsFormatted}
260
+
261
+ Evaluate:
262
+ 1. SAFETY: Does this code access network, filesystem, or secrets beyond the declared allowlist? Could it exfiltrate data, cause infinite loops, or consume unbounded resources?
263
+ 2. CORRECTNESS: Do all test outputs match the declared output schema? Are edge cases handled?
264
+ 3. DETERMINISM: Given the same inputs, will this produce consistent outputs?
265
+ 4. BOUNDED: Will this always terminate within resource limits?
266
+
267
+ Respond ONLY with JSON:
268
+ {"safety":{"passed":true/false,"concerns":[]},"correctness":{"passed":true/false,"failedTests":[]},"determinism":{"likely":true/false,"reasoning":""},"bounded":{"likely":true/false,"reasoning":""},"confidence":0.0-1.0,"approved":true/false,"reasoning":""}`;
269
+ }
270
+ /**
271
+ * Build the safety auditor prompt for promotion review.
272
+ *
273
+ * Focuses the reviewer on security concerns: API surface, data exfiltration,
274
+ * resource exhaustion, and sandbox escape vectors.
275
+ *
276
+ * @param tool - The emergent tool being considered for promotion.
277
+ * @returns The safety auditor prompt string.
278
+ */
279
+ buildSafetyAuditorPrompt(tool) {
280
+ return `You are a security auditor evaluating whether an AI-created tool should be promoted to a higher trust tier.
281
+
282
+ Tool name: ${tool.name}
283
+ Description: ${tool.description}
284
+ Current tier: ${tool.tier}
285
+ Implementation mode: ${tool.implementation.mode}
286
+ Implementation: ${JSON.stringify(tool.implementation)}
287
+ Usage stats: ${JSON.stringify(tool.usageStats)}
288
+ Previous verdicts: ${JSON.stringify(tool.judgeVerdicts)}
289
+
290
+ Focus on SAFETY:
291
+ - Does the implementation access network, filesystem, or secrets beyond what is necessary?
292
+ - Could it exfiltrate data or be used as an attack vector?
293
+ - Are there any resource exhaustion concerns (infinite loops, unbounded memory)?
294
+ - Has the tool's usage history shown any anomalous patterns?
295
+
296
+ Respond ONLY with JSON:
297
+ {"approved":true/false,"confidence":0.0-1.0,"reasoning":""}`;
298
+ }
299
+ /**
300
+ * Build the correctness reviewer prompt for promotion review.
301
+ *
302
+ * Focuses the reviewer on functional correctness: schema conformance,
303
+ * edge case handling, success rate, and output consistency.
304
+ *
305
+ * @param tool - The emergent tool being considered for promotion.
306
+ * @returns The correctness reviewer prompt string.
307
+ */
308
+ buildCorrectnessReviewerPrompt(tool) {
309
+ return `You are a correctness reviewer evaluating whether an AI-created tool should be promoted to a higher trust tier.
310
+
311
+ Tool name: ${tool.name}
312
+ Description: ${tool.description}
313
+ Current tier: ${tool.tier}
314
+ Implementation mode: ${tool.implementation.mode}
315
+ Implementation: ${JSON.stringify(tool.implementation)}
316
+ Usage stats: ${JSON.stringify(tool.usageStats)}
317
+ Previous verdicts: ${JSON.stringify(tool.judgeVerdicts)}
318
+
319
+ Focus on CORRECTNESS:
320
+ - Does the implementation correctly handle all declared input schema variations?
321
+ - Are edge cases properly handled (empty inputs, null values, large inputs)?
322
+ - Does the success rate (${tool.usageStats.successCount}/${tool.usageStats.totalUses}) indicate reliability?
323
+ - Are there any patterns in the failure history that suggest systematic issues?
324
+
325
+ Respond ONLY with JSON:
326
+ {"approved":true/false,"confidence":0.0-1.0,"reasoning":""}`;
327
+ }
328
+ // --------------------------------------------------------------------------
329
+ // PRIVATE: Schema validation helpers
330
+ // --------------------------------------------------------------------------
331
+ /**
332
+ * Validate a value against a JSON Schema `type` declaration.
333
+ *
334
+ * Performs basic type checking without a full JSON Schema validator library.
335
+ * Supports object (with optional properties/required checks), string, number,
336
+ * integer, boolean, and array types.
337
+ *
338
+ * @param value - The value to validate.
339
+ * @param schema - The JSON Schema to validate against.
340
+ * @returns An array of error strings (empty if valid).
341
+ */
342
+ validateType(value, schema) {
343
+ const errors = [];
344
+ const schemaType = schema.type;
345
+ switch (schemaType) {
346
+ case 'object': {
347
+ if (value === null || typeof value !== 'object' || Array.isArray(value)) {
348
+ errors.push(`Expected type "object" but got "${this.describeType(value)}".`);
349
+ break;
350
+ }
351
+ const obj = value;
352
+ // Check declared properties exist.
353
+ if (schema.properties) {
354
+ for (const key of Object.keys(schema.properties)) {
355
+ if (!(key in obj)) {
356
+ errors.push(`Missing property "${key}" declared in schema.`);
357
+ }
358
+ }
359
+ }
360
+ // Check required properties.
361
+ if (Array.isArray(schema.required)) {
362
+ for (const key of schema.required) {
363
+ if (!(key in obj)) {
364
+ errors.push(`Missing required property "${key}".`);
365
+ }
366
+ }
367
+ }
368
+ break;
369
+ }
370
+ case 'string': {
371
+ if (typeof value !== 'string') {
372
+ errors.push(`Expected type "string" but got "${this.describeType(value)}".`);
373
+ }
374
+ break;
375
+ }
376
+ case 'number': {
377
+ if (typeof value !== 'number') {
378
+ errors.push(`Expected type "number" but got "${this.describeType(value)}".`);
379
+ }
380
+ break;
381
+ }
382
+ case 'integer': {
383
+ if (typeof value !== 'number' || !Number.isInteger(value)) {
384
+ errors.push(`Expected type "integer" but got "${this.describeType(value)}".`);
385
+ }
386
+ break;
387
+ }
388
+ case 'boolean': {
389
+ if (typeof value !== 'boolean') {
390
+ errors.push(`Expected type "boolean" but got "${this.describeType(value)}".`);
391
+ }
392
+ break;
393
+ }
394
+ case 'array': {
395
+ if (!Array.isArray(value)) {
396
+ errors.push(`Expected type "array" but got "${this.describeType(value)}".`);
397
+ }
398
+ break;
399
+ }
400
+ default:
401
+ // Unknown or unhandled schema type — skip validation.
402
+ break;
403
+ }
404
+ return errors;
405
+ }
406
+ /**
407
+ * Produce a human-readable type description for an arbitrary value.
408
+ *
409
+ * @param value - The value to describe.
410
+ * @returns A string like `"null"`, `"array"`, `"object"`, `"string"`, etc.
411
+ */
412
+ describeType(value) {
413
+ if (value === null)
414
+ return 'null';
415
+ if (Array.isArray(value))
416
+ return 'array';
417
+ return typeof value;
418
+ }
419
+ // --------------------------------------------------------------------------
420
+ // PRIVATE: JSON extraction & verdict helpers
421
+ // --------------------------------------------------------------------------
422
+ /**
423
+ * Extract a JSON object from a potentially wrapped LLM response.
424
+ *
425
+ * LLMs sometimes wrap JSON in markdown code fences or prepend/append prose.
426
+ * This method attempts to find the first `{` and last `}` and extract the
427
+ * substring between them.
428
+ *
429
+ * @param raw - The raw LLM response string.
430
+ * @returns The extracted JSON substring, or the original string if no
431
+ * braces are found.
432
+ */
433
+ extractJson(raw) {
434
+ const firstBrace = raw.indexOf('{');
435
+ const lastBrace = raw.lastIndexOf('}');
436
+ if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
437
+ return raw.substring(firstBrace, lastBrace + 1);
438
+ }
439
+ return raw;
440
+ }
441
+ /**
442
+ * Build a rejected {@link CreationVerdict} with confidence 0.
443
+ *
444
+ * Used when the LLM call fails or returns unparseable output. By defaulting
445
+ * to rejection, we ensure that system-level failures never accidentally
446
+ * approve a tool.
447
+ *
448
+ * @param reasoning - Explanation of why the verdict defaulted to rejection.
449
+ * @returns A rejected CreationVerdict.
450
+ */
451
+ rejectedVerdict(reasoning) {
452
+ return {
453
+ approved: false,
454
+ confidence: 0,
455
+ safety: 0,
456
+ correctness: 0,
457
+ determinism: 0,
458
+ bounded: 0,
459
+ reasoning,
460
+ };
461
+ }
462
+ }
463
+ //# sourceMappingURL=EmergentJudge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmergentJudge.js","sourceRoot":"","sources":["../../src/emergent/EmergentJudge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAoIH,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,MAAM,OAAO,aAAa;IAIxB;;;;;;OAMG;IACH,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,6EAA6E;IAC7E,yBAAyB;IACzB,6EAA6E;IAE7E;;;;;;;;;;;;;;;;;;OAkBG;IACH,KAAK,CAAC,cAAc,CAAC,SAAwB;QAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAEnD,IAAI,WAAmB,CAAC;QACxB,IAAI,CAAC;YACH,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAC/E,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC,eAAe,CAAC,yCAAyC,CAAC,CAAC;QACzE,CAAC;QAED,wCAAwC;QACxC,IAAI,MAA2B,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;QACrD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC,eAAe,CACzB,8DAA8D,CAC/D,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,uDAAuD;QACvD,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;QACpD,MAAM,iBAAiB,GAAG,MAAM,CAAC,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;QAC9D,MAAM,QAAQ,GAAG,YAAY,IAAI,iBAAiB,CAAC;QAEnD,OAAO;YACL,QAAQ;YACR,UAAU,EAAE,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACzE,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;YAChC,WAAW,EAAE,iBAAiB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;YAC1C,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;YACnD,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;YAC3C,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,EAAE;SAClC,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,wBAAwB;IACxB,6EAA6E;IAE7E;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,aAAa,CAAC,OAAe,EAAE,MAAe,EAAE,MAAwB;QACtE,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QACpD,CAAC;QAED,OAAO;YACL,KAAK,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC1B,YAAY,EAAE,MAAM;YACpB,OAAO,EAAE,KAAK;SACf,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,0BAA0B;IAC1B,6EAA6E;IAE7E;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,eAAe,CAAC,IAAkB;QACtC,MAAM,YAAY,GAAG,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,iBAAiB,GAAG,IAAI,CAAC,8BAA8B,CAAC,IAAI,CAAC,CAAC;QAEpE,uCAAuC;QACvC,MAAM,CAAC,SAAS,EAAE,cAAc,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACpD,IAAI,CAAC,MAAM;iBACR,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,YAAY,CAAC;iBACtD,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;YAClB,IAAI,CAAC,MAAM;iBACR,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,iBAAiB,CAAC;iBAC3D,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;SACnB,CAAC,CAAC;QAEH,iCAAiC;QACjC,IAAI,YAAuC,CAAC;QAC5C,IAAI,CAAC;YACH,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,YAAY,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,0CAA0C,EAAE,CAAC;QAC3G,CAAC;QAED,uCAAuC;QACvC,IAAI,iBAA4C,CAAC;QACjD,IAAI,CAAC;YACH,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,CAAC;QACnE,CAAC;QAAC,MAAM,CAAC;YACP,iBAAiB,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,gDAAgD,EAAE,CAAC;QACtH,CAAC;QAED,qBAAqB;QACrB,MAAM,QAAQ,GACZ,YAAY,CAAC,QAAQ,KAAK,IAAI,IAAI,iBAAiB,CAAC,QAAQ,KAAK,IAAI,CAAC;QAExE,yDAAyD;QACzD,MAAM,gBAAgB,GAAG,OAAO,YAAY,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACnG,MAAM,qBAAqB,GAAG,OAAO,iBAAiB,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAElH,OAAO;YACL,QAAQ;YACR,aAAa,EAAE;gBACb,QAAQ,EAAE,YAAY,CAAC,QAAQ,KAAK,IAAI;gBACxC,UAAU,EAAE,gBAAgB;gBAC5B,SAAS,EAAE,YAAY,CAAC,SAAS,IAAI,EAAE;aACxC;YACD,mBAAmB,EAAE;gBACnB,QAAQ,EAAE,iBAAiB,CAAC,QAAQ,KAAK,IAAI;gBAC7C,UAAU,EAAE,qBAAqB;gBACjC,SAAS,EAAE,iBAAiB,CAAC,SAAS,IAAI,EAAE;aAC7C;YACD,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,qBAAqB,CAAC;SAC9D,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,2BAA2B;IAC3B,6EAA6E;IAE7E;;;;;;;;OAQG;IACK,mBAAmB,CAAC,SAAwB;QAClD,MAAM,oBAAoB,GAAG,SAAS,CAAC,WAAW;aAC/C,GAAG,CACF,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,QAAQ,CAAC,GAAG,CAAC,WAAW,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CACjJ;aACA,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO;;aAEE,SAAS,CAAC,IAAI;eACZ,SAAS,CAAC,WAAW;gBACpB,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,WAAW,CAAC;iBACpC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,YAAY,CAAC;uBAChC,SAAS,CAAC,kBAAkB;;EAEjD,SAAS,CAAC,MAAM;0BACQ,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,SAAS,IAAI,EAAE,CAAC;;EAEjE,oBAAoB;;;;;;;;;gQAS0O,CAAC;IAC/P,CAAC;IAED;;;;;;;;OAQG;IACK,wBAAwB,CAAC,IAAkB;QACjD,OAAO;;aAEE,IAAI,CAAC,IAAI;eACP,IAAI,CAAC,WAAW;gBACf,IAAI,CAAC,IAAI;uBACF,IAAI,CAAC,cAAc,CAAC,IAAI;kBAC7B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,cAAc,CAAC;eACtC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC;qBACzB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC;;;;;;;;;4DASK,CAAC;IAC3D,CAAC;IAED;;;;;;;;OAQG;IACK,8BAA8B,CAAC,IAAkB;QACvD,OAAO;;aAEE,IAAI,CAAC,IAAI;eACP,IAAI,CAAC,WAAW;gBACf,IAAI,CAAC,IAAI;uBACF,IAAI,CAAC,cAAc,CAAC,IAAI;kBAC7B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,cAAc,CAAC;eACtC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC;qBACzB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC;;;;;2BAK5B,IAAI,CAAC,UAAU,CAAC,YAAY,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS;;;;4DAIxB,CAAC;IAC3D,CAAC;IAED,6EAA6E;IAC7E,qCAAqC;IACrC,6EAA6E;IAE7E;;;;;;;;;;OAUG;IACK,YAAY,CAAC,KAAc,EAAE,MAAwB;QAC3D,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC;QAE/B,QAAQ,UAAU,EAAE,CAAC;YACnB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;oBACxE,MAAM,CAAC,IAAI,CAAC,mCAAmC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAC7E,MAAM;gBACR,CAAC;gBAED,MAAM,GAAG,GAAG,KAAgC,CAAC;gBAE7C,mCAAmC;gBACnC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;oBACtB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;wBACjD,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,EAAE,CAAC;4BAClB,MAAM,CAAC,IAAI,CAAC,qBAAqB,GAAG,uBAAuB,CAAC,CAAC;wBAC/D,CAAC;oBACH,CAAC;gBACH,CAAC;gBAED,6BAA6B;gBAC7B,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACnC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;wBAClC,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,EAAE,CAAC;4BAClB,MAAM,CAAC,IAAI,CAAC,8BAA8B,GAAG,IAAI,CAAC,CAAC;wBACrD,CAAC;oBACH,CAAC;gBACH,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;oBAC9B,MAAM,CAAC,IAAI,CAAC,mCAAmC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC/E,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;oBAC9B,MAAM,CAAC,IAAI,CAAC,mCAAmC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC/E,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;oBAC1D,MAAM,CAAC,IAAI,CAAC,oCAAoC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAChF,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,IAAI,OAAO,KAAK,KAAK,SAAS,EAAE,CAAC;oBAC/B,MAAM,CAAC,IAAI,CAAC,oCAAoC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAChF,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;oBAC1B,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC9E,CAAC;gBACD,MAAM;YACR,CAAC;YAED;gBACE,sDAAsD;gBACtD,MAAM;QACV,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;OAKG;IACK,YAAY,CAAC,KAAc;QACjC,IAAI,KAAK,KAAK,IAAI;YAAE,OAAO,MAAM,CAAC;QAClC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QACzC,OAAO,OAAO,KAAK,CAAC;IACtB,CAAC;IAED,6EAA6E;IAC7E,6CAA6C;IAC7C,6EAA6E;IAE7E;;;;;;;;;;OAUG;IACK,WAAW,CAAC,GAAW;QAC7B,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAEvC,IAAI,UAAU,KAAK,CAAC,CAAC,IAAI,SAAS,KAAK,CAAC,CAAC,IAAI,SAAS,GAAG,UAAU,EAAE,CAAC;YACpE,OAAO,GAAG,CAAC,SAAS,CAAC,UAAU,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;QAClD,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;;;;;;;;OASG;IACK,eAAe,CAAC,SAAiB;QACvC,OAAO;YACL,QAAQ,EAAE,KAAK;YACf,UAAU,EAAE,CAAC;YACb,MAAM,EAAE,CAAC;YACT,WAAW,EAAE,CAAC;YACd,WAAW,EAAE,CAAC;YACd,OAAO,EAAE,CAAC;YACV,SAAS;SACV,CAAC;IACJ,CAAC;CACF"}