@elizaos/cli 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/BrowserWebSocketTransport-5YQPVDV7.js +7 -0
  2. package/dist/EnhancedEvaluationEngine-APOQ6INN.js +473 -0
  3. package/dist/EvaluationEngine-Y7ZQJBRC.js +9 -0
  4. package/dist/LocalEnvironmentProvider-JWFGG4IN.js +15 -0
  5. package/dist/NodeWebSocketTransport-PUO724EY.js +8 -0
  6. package/dist/ScreenRecorder-YK246DNJ.js +10 -0
  7. package/dist/agent-start-6QJQAMKA.js +13 -0
  8. package/dist/bidi-2SVNH6F7.js +15309 -0
  9. package/dist/{bun-exec-ULMPAIQC.js → bun-exec-NH4UCUY4.js} +1 -1
  10. package/dist/chunk-2ESYSVXG.js +48 -0
  11. package/dist/chunk-3AEYIKBZ.js +432 -0
  12. package/dist/chunk-5IWKEMEF.js +239 -0
  13. package/dist/chunk-5WZO2HMM.js +2644 -0
  14. package/dist/chunk-ABGBVB74.js +3501 -0
  15. package/dist/{chunk-NSNXXD3I.js → chunk-BCO32GR6.js} +2 -2
  16. package/dist/chunk-CGXTFHQP.js +25 -0
  17. package/dist/chunk-EXUFDTUD.js +3948 -0
  18. package/dist/chunk-FGGNHEXZ.js +211860 -0
  19. package/dist/chunk-FWYHSCLF.js +243 -0
  20. package/dist/chunk-I57T3WPO.js +165 -0
  21. package/dist/chunk-LBZLMFFF.js +221 -0
  22. package/dist/chunk-LG7YDBMV.js +401 -0
  23. package/dist/chunk-NHKLUXNE.js +166 -0
  24. package/dist/chunk-PUZHCSGF.js +828 -0
  25. package/dist/chunk-PWDR7CPA.js +7828 -0
  26. package/dist/{chunk-N5G5XSGP.js → chunk-Q6M2K53X.js} +3 -3
  27. package/dist/chunk-SVHCNBHM.js +289 -0
  28. package/dist/{chunk-HOC6B3QV.js → chunk-VFFOOPYS.js} +4 -238
  29. package/dist/chunk-WX37MM4G.js +292 -0
  30. package/dist/chunk-XFJIHUT3.js +6 -0
  31. package/dist/chunk-XPPESCCM.js +787 -0
  32. package/dist/chunk-YBDC5OZO.js +40 -0
  33. package/dist/commands/agent/actions/index.js +2 -2
  34. package/dist/commands/agent/index.js +2 -2
  35. package/dist/commands/create/actions/index.js +4 -3
  36. package/dist/commands/create/index.js +5 -4
  37. package/dist/commands/shared/index.js +1 -1
  38. package/dist/index.js +66820 -5009
  39. package/dist/js-yaml-KADNMPWR.js +35 -0
  40. package/dist/matrix-orchestrator-3WLRK7GG.js +1070 -0
  41. package/dist/matrix-runner-KDPETCKQ.js +160 -0
  42. package/dist/matrix-schema-PCO2KGJY.js +102 -0
  43. package/dist/parameter-override-ALOPPXCE.js +487 -0
  44. package/dist/{plugin-creator-TCUFII32.js → plugin-creator-J7GNPMPG.js} +1 -1
  45. package/dist/process-manager-IU2A3BTQ.js +9 -0
  46. package/dist/{registry-ELONUC44.js → registry-65KMEA7N.js} +2 -2
  47. package/dist/resource-monitor-EHZSH2P6.js +15 -0
  48. package/dist/run-isolation-PGLZ37Y7.js +29 -0
  49. package/dist/runtime-factory-Q4U5YBNV.js +22 -0
  50. package/dist/schema-C25LVPEK.js +17 -0
  51. package/dist/src/commands/report/src/assets/report_template.html +1704 -0
  52. package/dist/src-EJG4ILDC.js +5 -0
  53. package/dist/templates/plugin-quick-starter/package.json +2 -2
  54. package/dist/templates/plugin-quick-starter/src/__tests__/test-utils.ts +1 -0
  55. package/dist/templates/plugin-starter/package.json +2 -2
  56. package/dist/templates/plugin-starter/src/__tests__/test-utils.ts +1 -0
  57. package/dist/templates/project-starter/package.json +4 -4
  58. package/dist/templates/project-tee-starter/package.json +4 -4
  59. package/dist/templates/project-tee-starter/src/index.ts +1 -2
  60. package/dist/typescript-ZF3IK2DJ.js +5 -0
  61. package/dist/{utils-X6UXPLKD.js → utils-QFD2PW4X.js} +2 -2
  62. package/package.json +14 -8
  63. package/templates/plugin-quick-starter/package.json +2 -2
  64. package/templates/plugin-quick-starter/src/__tests__/test-utils.ts +1 -0
  65. package/templates/plugin-starter/package.json +2 -2
  66. package/templates/plugin-starter/src/__tests__/test-utils.ts +1 -0
  67. package/templates/project-starter/package.json +4 -4
  68. package/templates/project-tee-starter/package.json +4 -4
  69. package/templates/project-tee-starter/src/index.ts +1 -2
  70. package/dist/chunk-3RG5ZIWI.js +0 -10
@@ -0,0 +1,7 @@
1
+ import {
2
+ BrowserWebSocketTransport
3
+ } from "./chunk-YBDC5OZO.js";
4
+ import "./chunk-2ESYSVXG.js";
5
+ export {
6
+ BrowserWebSocketTransport
7
+ };
@@ -0,0 +1,473 @@
1
+ import "./chunk-2ESYSVXG.js";
2
+
3
+ // src/commands/scenario/src/EnhancedEvaluationEngine.ts
4
+ import { ModelType } from "@elizaos/core";
5
+ import { z } from "zod";
6
+ var EnhancedEvaluationEngine = class {
7
+ constructor(runtime) {
8
+ this.runtime = runtime;
9
+ this.register("string_contains", new EnhancedStringContainsEvaluator());
10
+ this.register("regex_match", new EnhancedRegexMatchEvaluator());
11
+ this.register("file_exists", new EnhancedFileExistsEvaluator());
12
+ this.register("trajectory_contains_action", new EnhancedTrajectoryContainsActionEvaluator());
13
+ this.register("llm_judge", new EnhancedLLMJudgeEvaluator());
14
+ this.register("execution_time", new EnhancedExecutionTimeEvaluator());
15
+ }
16
+ enhancedEvaluators = /* @__PURE__ */ new Map();
17
+ register(type, evaluator) {
18
+ this.enhancedEvaluators.set(type, evaluator);
19
+ }
20
+ /**
21
+ * NEW: Run evaluations with structured JSON output
22
+ */
23
+ async runEnhancedEvaluations(evaluations, runResult) {
24
+ const results = [];
25
+ for (const evaluation of evaluations) {
26
+ const evaluator = this.enhancedEvaluators.get(evaluation.type);
27
+ if (!evaluator) {
28
+ results.push({
29
+ evaluator_type: evaluation.type,
30
+ success: false,
31
+ summary: `Unknown evaluator type: '${evaluation.type}'`,
32
+ details: {
33
+ error: "evaluator_not_found",
34
+ requested_type: evaluation.type,
35
+ available_types: Array.from(this.enhancedEvaluators.keys())
36
+ }
37
+ });
38
+ continue;
39
+ }
40
+ try {
41
+ const result = await evaluator.evaluateEnhanced(evaluation, runResult, this.runtime);
42
+ results.push(result);
43
+ } catch (error) {
44
+ results.push({
45
+ evaluator_type: evaluation.type,
46
+ success: false,
47
+ summary: `Evaluator '${evaluation.type}' failed with error: ${error instanceof Error ? error.message : String(error)}`,
48
+ details: {
49
+ error: "evaluator_execution_failed",
50
+ error_message: error instanceof Error ? error.message : String(error),
51
+ evaluation_config: evaluation
52
+ }
53
+ });
54
+ }
55
+ }
56
+ return results;
57
+ }
58
+ };
59
+ var EnhancedStringContainsEvaluator = class {
60
+ async evaluateEnhanced(params, runResult) {
61
+ if (params.type !== "string_contains") throw new Error("Mismatched evaluator");
62
+ const expectedValue = params.value;
63
+ const actualOutput = runResult.stdout;
64
+ const caseSensitive = params.case_sensitive ?? false;
65
+ const searchIn = caseSensitive ? actualOutput : actualOutput.toLowerCase();
66
+ const searchFor = caseSensitive ? expectedValue : expectedValue.toLowerCase();
67
+ const success = searchIn.includes(searchFor);
68
+ return {
69
+ evaluator_type: "string_contains",
70
+ success,
71
+ summary: success ? `Assertion PASSED: Agent response contained the expected substring "${expectedValue}".` : `Assertion FAILED: Agent response did not contain the expected substring "${expectedValue}".`,
72
+ details: {
73
+ expected_value: expectedValue,
74
+ actual_output: actualOutput,
75
+ case_sensitive: caseSensitive,
76
+ search_performed: `Looking for "${searchFor}" in "${searchIn.substring(0, 100)}${searchIn.length > 100 ? "..." : ""}"`
77
+ }
78
+ };
79
+ }
80
+ };
81
+ var EnhancedRegexMatchEvaluator = class {
82
+ async evaluateEnhanced(params, runResult) {
83
+ if (params.type !== "regex_match") throw new Error("Mismatched evaluator");
84
+ const pattern = params.pattern;
85
+ const actualOutput = runResult.stdout;
86
+ const regex = new RegExp(pattern, "i");
87
+ const match = regex.exec(actualOutput);
88
+ const success = match !== null;
89
+ return {
90
+ evaluator_type: "regex_match",
91
+ success,
92
+ summary: success ? `Regex PASSED: Pattern "${pattern}" matched in agent output.` : `Regex FAILED: Pattern "${pattern}" did not match in agent output.`,
93
+ details: {
94
+ pattern,
95
+ regex_flags: "i",
96
+ actual_output: actualOutput,
97
+ match_found: match,
98
+ match_index: match?.index,
99
+ matched_text: match?.[0]
100
+ }
101
+ };
102
+ }
103
+ };
104
+ var EnhancedFileExistsEvaluator = class {
105
+ async evaluateEnhanced(params, runResult) {
106
+ if (params.type !== "file_exists") throw new Error("Mismatched evaluator");
107
+ const expectedPath = params.path;
108
+ const createdFiles = Object.keys(runResult.files);
109
+ const success = createdFiles.includes(expectedPath) || createdFiles.includes(`./${expectedPath}`) || createdFiles.includes(expectedPath.replace(/^\.\//, ""));
110
+ const matchingPath = createdFiles.find(
111
+ (path) => path === expectedPath || path === `./${expectedPath}` || path === expectedPath.replace(/^\.\//, "")
112
+ );
113
+ return {
114
+ evaluator_type: "file_exists",
115
+ success,
116
+ summary: success ? `File check PASSED: File "${expectedPath}" was created by the agent.` : `File check FAILED: File "${expectedPath}" was not created by the agent.`,
117
+ details: {
118
+ expected_path: expectedPath,
119
+ created_files: createdFiles,
120
+ matching_path: matchingPath,
121
+ total_files_created: createdFiles.length
122
+ }
123
+ };
124
+ }
125
+ };
126
+ var EnhancedExecutionTimeEvaluator = class {
127
+ async evaluateEnhanced(params, runResult) {
128
+ if (params.type !== "execution_time") throw new Error("Mismatched evaluator");
129
+ const duration = runResult.durationMs ?? (runResult.endedAtMs ?? 0) - (runResult.startedAtMs ?? 0);
130
+ if (duration == null || Number.isNaN(duration) || runResult.durationMs === void 0 && (runResult.startedAtMs === void 0 || runResult.endedAtMs === void 0)) {
131
+ return {
132
+ evaluator_type: "execution_time",
133
+ success: false,
134
+ summary: "Timing check FAILED: No timing information available for this step.",
135
+ details: {
136
+ error: "no_timing_data",
137
+ runResult_timing: {
138
+ durationMs: runResult.durationMs,
139
+ startedAtMs: runResult.startedAtMs,
140
+ endedAtMs: runResult.endedAtMs
141
+ },
142
+ constraints: params
143
+ }
144
+ };
145
+ }
146
+ const tooSlow = duration > params.max_duration_ms;
147
+ const tooFast = params.min_duration_ms != null && duration < params.min_duration_ms;
148
+ const success = !tooSlow && !tooFast;
149
+ let summary;
150
+ if (success) {
151
+ summary = `Timing check PASSED: Execution took ${duration}ms (within expected range).`;
152
+ } else if (tooSlow) {
153
+ summary = `Timing check FAILED: Execution took ${duration}ms (exceeded maximum of ${params.max_duration_ms}ms).`;
154
+ } else {
155
+ summary = `Timing check FAILED: Execution took ${duration}ms (below minimum of ${params.min_duration_ms}ms).`;
156
+ }
157
+ return {
158
+ evaluator_type: "execution_time",
159
+ success,
160
+ summary,
161
+ details: {
162
+ actual_duration_ms: duration,
163
+ max_duration_ms: params.max_duration_ms,
164
+ min_duration_ms: params.min_duration_ms,
165
+ target_duration_ms: params.target_duration_ms,
166
+ performance_rating: params.target_duration_ms ? Math.abs(duration - params.target_duration_ms) / params.target_duration_ms : null,
167
+ timing_breakdown: {
168
+ started_at: runResult.startedAtMs,
169
+ ended_at: runResult.endedAtMs,
170
+ calculated_duration: runResult.durationMs
171
+ }
172
+ }
173
+ };
174
+ }
175
+ };
176
+ var EnhancedTrajectoryContainsActionEvaluator = class {
177
+ async evaluateEnhanced(params, runResult, runtime) {
178
+ if (params.type !== "trajectory_contains_action") throw new Error("Mismatched evaluator");
179
+ const actionName = params.action;
180
+ try {
181
+ const actionMemories = await runtime.getMemories({
182
+ tableName: "messages",
183
+ agentId: runtime.agentId,
184
+ count: 50,
185
+ unique: false
186
+ });
187
+ const actionResults = actionMemories.filter(
188
+ (mem) => mem?.type === "messages" && mem.content?.type === "action_result"
189
+ );
190
+ const normalize = (name) => (typeof name === "string" ? name : "").toLowerCase().replace(/_/g, "");
191
+ const target = normalize(actionName);
192
+ const matchingAction = actionResults.find(
193
+ (mem) => normalize(mem.content?.actionName ?? "") === target
194
+ );
195
+ const allActionNames = actionResults.map((mem) => mem.content?.actionName || "unknown");
196
+ if (!matchingAction) {
197
+ return {
198
+ evaluator_type: "trajectory_contains_action",
199
+ success: false,
200
+ summary: `Action check FAILED: Action '${actionName}' was not found in the execution trajectory.`,
201
+ details: {
202
+ expected_action: actionName,
203
+ normalized_expected: target,
204
+ actions_found: allActionNames,
205
+ total_actions_in_trajectory: actionResults.length,
206
+ search_method: "case_insensitive_with_underscore_normalization"
207
+ }
208
+ };
209
+ }
210
+ const actionStatus = matchingAction.content?.actionStatus || "unknown";
211
+ const actionSucceeded = actionStatus === "completed";
212
+ return {
213
+ evaluator_type: "trajectory_contains_action",
214
+ success: true,
215
+ // Success means the action was found (regardless of its outcome)
216
+ summary: actionSucceeded ? `Action check PASSED: Action '${actionName}' was executed successfully.` : `Action check PASSED: Action '${actionName}' was found but failed execution.`,
217
+ details: {
218
+ expected_action: actionName,
219
+ found_action: matchingAction.content?.actionName,
220
+ action_status: actionStatus,
221
+ action_succeeded: actionSucceeded,
222
+ action_error: matchingAction.content?.error,
223
+ action_result: matchingAction.content?.result,
224
+ memory_id: matchingAction.id,
225
+ all_actions_in_trajectory: allActionNames
226
+ }
227
+ };
228
+ } catch (error) {
229
+ return {
230
+ evaluator_type: "trajectory_contains_action",
231
+ success: false,
232
+ summary: `Action check FAILED: Error while checking trajectory: ${error instanceof Error ? error.message : String(error)}`,
233
+ details: {
234
+ expected_action: actionName,
235
+ error: "trajectory_access_failed",
236
+ error_message: error instanceof Error ? error.message : String(error),
237
+ runtime_available: !!runtime,
238
+ agent_id: runtime?.agentId
239
+ }
240
+ };
241
+ }
242
+ }
243
+ };
244
+ var EnhancedLLMJudgeEvaluator = class {
245
+ async evaluateEnhanced(params, runResult, runtime) {
246
+ if (params.type !== "llm_judge") throw new Error("Mismatched evaluator");
247
+ const prompt = params.prompt;
248
+ const expected = params.expected;
249
+ const candidateModels = [ModelType.OBJECT_SMALL, ModelType.TEXT_LARGE, ModelType.TEXT_SMALL];
250
+ const temperature = params.temperature || 0.1;
251
+ const timeoutMs = Number(process.env.LLM_JUDGE_TIMEOUT_MS || 15e3);
252
+ let modelType = candidateModels.find((m) => runtime.getModel?.(m)) ?? ModelType.TEXT_LARGE;
253
+ const capabilities = params.capabilities;
254
+ if (capabilities !== void 0) {
255
+ try {
256
+ const capabilitiesSchema = z.array(z.string()).min(1, "Capabilities array must not be empty");
257
+ capabilitiesSchema.parse(capabilities);
258
+ } catch (error) {
259
+ throw new Error(`Invalid capabilities: ${error.message}`);
260
+ }
261
+ }
262
+ const structuredPrompt = this.createStructuredPrompt(runResult, prompt, expected, capabilities);
263
+ const jsonSchema = this.getStructuredJudgmentSchema();
264
+ try {
265
+ const modelHandler = runtime.getModel(modelType);
266
+ if (!modelHandler) {
267
+ return {
268
+ evaluator_type: "llm_judge",
269
+ success: false,
270
+ summary: `LLM Judge FAILED: No available model handler found.`,
271
+ details: {
272
+ error: "no_model_available",
273
+ attempted_models: candidateModels,
274
+ models_available: Object.keys(runtime.models || {}),
275
+ prompt,
276
+ expected
277
+ }
278
+ };
279
+ }
280
+ const objectParams = {
281
+ prompt: structuredPrompt,
282
+ schema: jsonSchema,
283
+ temperature,
284
+ output: "object"
285
+ };
286
+ const response = await Promise.race([
287
+ runtime.useModel(modelType, objectParams),
288
+ new Promise(
289
+ (_, reject) => setTimeout(() => reject(new Error(`LLM judge timeout after ${timeoutMs}ms`)), timeoutMs)
290
+ )
291
+ ]);
292
+ let parsedResponse;
293
+ try {
294
+ parsedResponse = this.validateStructuredResponse(response, jsonSchema);
295
+ } catch (parseError) {
296
+ return {
297
+ evaluator_type: "llm_judge",
298
+ success: false,
299
+ summary: `LLM Judge FAILED: Invalid LLM response - ${parseError.message}`,
300
+ details: {
301
+ error: "llm_parse_error",
302
+ error_type: "llm_parse_error",
303
+ error_message: parseError.message,
304
+ model_used: modelType,
305
+ prompt,
306
+ expected,
307
+ raw_llm_response: response,
308
+ custom_capabilities_provided: !!(capabilities && capabilities.length > 0),
309
+ capabilities_count: capabilities ? capabilities.length : 0
310
+ }
311
+ };
312
+ }
313
+ const success = this.compareWithExpected(parsedResponse, expected);
314
+ return {
315
+ evaluator_type: "llm_judge",
316
+ success,
317
+ summary: `LLM Judge ${success ? "PASSED" : "FAILED"}: ${parsedResponse.qualitative_summary.substring(0, 150)}${parsedResponse.qualitative_summary.length > 150 ? "..." : ""}`,
318
+ details: {
319
+ llm_judge_result: {
320
+ qualitative_summary: parsedResponse.qualitative_summary,
321
+ capability_checklist: parsedResponse.capability_checklist
322
+ },
323
+ custom_capabilities_provided: !!(capabilities && capabilities.length > 0),
324
+ capabilities_count: capabilities ? capabilities.length : 5,
325
+ // Default capabilities count
326
+ judgment_confidence: parsedResponse.confidence,
327
+ expected_outcome: expected,
328
+ model_used: modelType,
329
+ prompt_used: prompt,
330
+ raw_llm_response: response
331
+ }
332
+ };
333
+ } catch (error) {
334
+ const msg = error?.message || String(error);
335
+ const isTimeout = msg.toLowerCase().includes("timeout");
336
+ return {
337
+ evaluator_type: "llm_judge",
338
+ success: false,
339
+ summary: isTimeout ? `LLM Judge FAILED: Timed out after ${timeoutMs}ms.` : `LLM Judge FAILED: ${msg}`,
340
+ details: {
341
+ error: isTimeout ? "llm_timeout" : "llm_error",
342
+ error_type: isTimeout ? "llm_timeout" : "llm_error",
343
+ error_message: msg,
344
+ timeout_ms: timeoutMs,
345
+ model_attempted: modelType,
346
+ prompt,
347
+ expected,
348
+ custom_capabilities_provided: !!(capabilities && capabilities.length > 0),
349
+ capabilities_count: capabilities ? capabilities.length : 0
350
+ }
351
+ };
352
+ }
353
+ }
354
+ createStructuredPrompt(runResult, userPrompt, expected, capabilities) {
355
+ const defaultCapabilities = [
356
+ "Task Completion",
357
+ "Response Quality",
358
+ "User Intent Understanding",
359
+ "Error Handling",
360
+ "Appropriate Response Format"
361
+ ];
362
+ const capabilitiesToUse = capabilities && capabilities.length > 0 ? capabilities : defaultCapabilities;
363
+ const capabilitiesSection = capabilitiesToUse.map((capability, index) => {
364
+ return `${index + 1}. ${capability}`;
365
+ }).join("\n");
366
+ return `You are an expert evaluator analyzing an AI agent's performance. Provide a comprehensive, structured assessment.
367
+
368
+ ## Execution Context
369
+ - Exit Code: ${runResult.exitCode}
370
+ - Standard Output: ${runResult.stdout}
371
+ - Standard Error: ${runResult.stderr}
372
+ - Files Created: ${Object.keys(runResult.files).join(", ") || "None"}
373
+
374
+ ## Evaluation Question
375
+ ${userPrompt}
376
+
377
+ ## Expected Outcome
378
+ ${expected}
379
+
380
+ ## Instructions
381
+ Analyze the agent's performance and provide a detailed assessment. You must evaluate the agent against the following specific capabilities:
382
+
383
+ ${capabilitiesSection}
384
+
385
+ For each capability listed above, you must assess whether the agent achieved it and provide detailed reasoning. Your response should include:
386
+
387
+ 1. **Qualitative Summary**: A comprehensive paragraph summarizing overall performance
388
+ 2. **Capability Checklist**: For each capability above, provide:
389
+ - capability: The exact capability name from the list
390
+ - achieved: Boolean indicating if the capability was demonstrated
391
+ - reasoning: Detailed explanation of your assessment
392
+
393
+ Provide your assessment as a structured JSON response with detailed reasoning for each capability.`;
394
+ }
395
+ getStructuredJudgmentSchema() {
396
+ return {
397
+ type: "object",
398
+ properties: {
399
+ qualitative_summary: {
400
+ type: "string",
401
+ description: "A comprehensive paragraph summarizing the agent's performance, reasoning, and notable successes or failures"
402
+ },
403
+ capability_checklist: {
404
+ type: "array",
405
+ items: {
406
+ type: "object",
407
+ properties: {
408
+ capability: { type: "string" },
409
+ achieved: { type: "boolean" },
410
+ reasoning: { type: "string" }
411
+ },
412
+ required: ["capability", "achieved", "reasoning"]
413
+ }
414
+ },
415
+ confidence: {
416
+ type: "number",
417
+ minimum: 0,
418
+ maximum: 1,
419
+ description: "Confidence level in the assessment (0-1)"
420
+ },
421
+ overall_success: {
422
+ type: "boolean",
423
+ description: "Whether the agent successfully met the expected outcome"
424
+ }
425
+ },
426
+ required: ["qualitative_summary", "capability_checklist", "confidence", "overall_success"]
427
+ };
428
+ }
429
+ validateStructuredResponse(response, schema) {
430
+ if (typeof response === "string") {
431
+ response = JSON.parse(response);
432
+ }
433
+ if (!response.qualitative_summary || !response.capability_checklist) {
434
+ throw new Error("Invalid LLM response: missing required fields");
435
+ }
436
+ if (!Array.isArray(response.capability_checklist)) {
437
+ throw new Error("Invalid LLM response: capability_checklist must be an array");
438
+ }
439
+ if (response.capability_checklist.length === 0) {
440
+ response.capability_checklist = [
441
+ {
442
+ capability: "Task Completion",
443
+ achieved: response.overall_success || false,
444
+ reasoning: "Default capability assessment based on overall success"
445
+ }
446
+ ];
447
+ }
448
+ if (response.confidence === void 0) {
449
+ response.confidence = 0.8;
450
+ }
451
+ if (response.overall_success === void 0) {
452
+ const allAchieved = response.capability_checklist.every((cap) => cap.achieved === true);
453
+ response.overall_success = allAchieved;
454
+ }
455
+ return response;
456
+ }
457
+ compareWithExpected(parsedResponse, expected) {
458
+ const overallSuccess = parsedResponse.overall_success;
459
+ const confidence = parsedResponse.confidence || 0;
460
+ const expectedLower = expected.toLowerCase();
461
+ if (expectedLower === "yes" || expectedLower === "no") {
462
+ return expectedLower === "yes" === overallSuccess;
463
+ }
464
+ if (expectedLower.includes("+")) {
465
+ const threshold = parseFloat(expectedLower.replace("+", ""));
466
+ return confidence >= threshold;
467
+ }
468
+ return overallSuccess;
469
+ }
470
+ };
471
+ export {
472
+ EnhancedEvaluationEngine
473
+ };
@@ -0,0 +1,9 @@
1
+ import {
2
+ EvaluationEngine,
3
+ TrajectoryContainsActionEvaluator
4
+ } from "./chunk-SVHCNBHM.js";
5
+ import "./chunk-2ESYSVXG.js";
6
+ export {
7
+ EvaluationEngine,
8
+ TrajectoryContainsActionEvaluator
9
+ };
@@ -0,0 +1,15 @@
1
+ import {
2
+ LocalEnvironmentProvider
3
+ } from "./chunk-LG7YDBMV.js";
4
+ import "./chunk-PUZHCSGF.js";
5
+ import "./chunk-WX37MM4G.js";
6
+ import "./chunk-I57T3WPO.js";
7
+ import "./chunk-5IWKEMEF.js";
8
+ import "./chunk-Q6M2K53X.js";
9
+ import "./chunk-FQYWRHLX.js";
10
+ import "./chunk-GXWWPFBO.js";
11
+ import "./chunk-I4L4T7QX.js";
12
+ import "./chunk-2ESYSVXG.js";
13
+ export {
14
+ LocalEnvironmentProvider
15
+ };
@@ -0,0 +1,8 @@
1
+ import {
2
+ NodeWebSocketTransport
3
+ } from "./chunk-EXUFDTUD.js";
4
+ import "./chunk-XFJIHUT3.js";
5
+ import "./chunk-2ESYSVXG.js";
6
+ export {
7
+ NodeWebSocketTransport
8
+ };
@@ -0,0 +1,10 @@
1
+ import {
2
+ ScreenRecorder
3
+ } from "./chunk-FWYHSCLF.js";
4
+ import "./chunk-XPPESCCM.js";
5
+ import "./chunk-ABGBVB74.js";
6
+ import "./chunk-XFJIHUT3.js";
7
+ import "./chunk-2ESYSVXG.js";
8
+ export {
9
+ ScreenRecorder
10
+ };
@@ -0,0 +1,13 @@
1
+ import {
2
+ startAgent,
3
+ stopAgent
4
+ } from "./chunk-WX37MM4G.js";
5
+ import "./chunk-Q6M2K53X.js";
6
+ import "./chunk-FQYWRHLX.js";
7
+ import "./chunk-GXWWPFBO.js";
8
+ import "./chunk-I4L4T7QX.js";
9
+ import "./chunk-2ESYSVXG.js";
10
+ export {
11
+ startAgent,
12
+ stopAgent
13
+ };