@operor/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -0
- package/dist/config-Bn2pbORi.js +34 -0
- package/dist/config-Bn2pbORi.js.map +1 -0
- package/dist/converse-C_PB7-JH.js +142 -0
- package/dist/converse-C_PB7-JH.js.map +1 -0
- package/dist/doctor-98gPl743.js +122 -0
- package/dist/doctor-98gPl743.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2268 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-override-BIQl0V6H.js +445 -0
- package/dist/llm-override-BIQl0V6H.js.map +1 -0
- package/dist/reset-DT8SBgFS.js +87 -0
- package/dist/reset-DT8SBgFS.js.map +1 -0
- package/dist/simulate-BKv62GJc.js +144 -0
- package/dist/simulate-BKv62GJc.js.map +1 -0
- package/dist/status-D6LIZvQa.js +82 -0
- package/dist/status-D6LIZvQa.js.map +1 -0
- package/dist/test-DYjkxbtK.js +177 -0
- package/dist/test-DYjkxbtK.js.map +1 -0
- package/dist/test-suite-D8H_5uKs.js +209 -0
- package/dist/test-suite-D8H_5uKs.js.map +1 -0
- package/dist/utils-BuV4q7f6.js +11 -0
- package/dist/utils-BuV4q7f6.js.map +1 -0
- package/dist/vibe-Bl_js3Jo.js +395 -0
- package/dist/vibe-Bl_js3Jo.js.map +1 -0
- package/package.json +43 -0
- package/src/commands/analytics.ts +408 -0
- package/src/commands/chat.ts +310 -0
- package/src/commands/config.ts +34 -0
- package/src/commands/converse.ts +182 -0
- package/src/commands/doctor.ts +154 -0
- package/src/commands/history.ts +60 -0
- package/src/commands/init.ts +163 -0
- package/src/commands/kb.ts +429 -0
- package/src/commands/llm-override.ts +480 -0
- package/src/commands/reset.ts +72 -0
- package/src/commands/simulate.ts +187 -0
- package/src/commands/status.ts +112 -0
- package/src/commands/test-suite.ts +247 -0
- package/src/commands/test.ts +177 -0
- package/src/commands/vibe.ts +478 -0
- package/src/config.ts +127 -0
- package/src/index.ts +190 -0
- package/src/log-timestamps.ts +26 -0
- package/src/setup.ts +712 -0
- package/src/start.ts +573 -0
- package/src/utils.ts +6 -0
- package/templates/agents/_defaults/SOUL.md +20 -0
- package/templates/agents/_defaults/USER.md +16 -0
- package/templates/agents/customer-support/IDENTITY.md +6 -0
- package/templates/agents/customer-support/INSTRUCTIONS.md +79 -0
- package/templates/agents/customer-support/SOUL.md +26 -0
- package/templates/agents/faq-bot/IDENTITY.md +6 -0
- package/templates/agents/faq-bot/INSTRUCTIONS.md +53 -0
- package/templates/agents/faq-bot/SOUL.md +19 -0
- package/templates/agents/sales/IDENTITY.md +6 -0
- package/templates/agents/sales/INSTRUCTIONS.md +67 -0
- package/templates/agents/sales/SOUL.md +20 -0
- package/tsconfig.json +9 -0
- package/tsdown.config.ts +13 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { n as readConfig } from "./index.js";
|
|
2
|
+
import { t as formatTimestamp } from "./utils-BuV4q7f6.js";
|
|
3
|
+
import { writeFileSync } from "fs";
|
|
4
|
+
|
|
5
|
+
//#region src/commands/simulate.ts
|
|
6
|
+
async function runSimulate(options) {
|
|
7
|
+
const config = readConfig();
|
|
8
|
+
const clack = await import("@clack/prompts");
|
|
9
|
+
const { Operor } = await import("@operor/core");
|
|
10
|
+
const { MockProvider } = await import("@operor/provider-mock");
|
|
11
|
+
const { MockShopifySkill, SimulationRunner, ECOMMERCE_SCENARIOS, SkillTestHarness } = await import("@operor/testing");
|
|
12
|
+
let llm;
|
|
13
|
+
if (config.LLM_PROVIDER && config.LLM_API_KEY) {
|
|
14
|
+
const { AIProvider } = await import("@operor/llm");
|
|
15
|
+
llm = new AIProvider({
|
|
16
|
+
provider: config.LLM_PROVIDER,
|
|
17
|
+
apiKey: config.LLM_API_KEY,
|
|
18
|
+
model: config.LLM_MODEL
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
let conversationScenarios = void 0;
|
|
22
|
+
if (options.scenarios) if (options.scenarios.includes("all")) conversationScenarios = "builtin";
|
|
23
|
+
else {
|
|
24
|
+
conversationScenarios = ECOMMERCE_SCENARIOS.filter((s) => options.scenarios.some((name) => s.name.toLowerCase().includes(name.toLowerCase()) || s.id.toLowerCase().includes(name.toLowerCase())));
|
|
25
|
+
if (conversationScenarios.length === 0) {
|
|
26
|
+
console.error(`No scenarios matching: ${options.scenarios.join(", ")}`);
|
|
27
|
+
console.error("Available scenarios:");
|
|
28
|
+
ECOMMERCE_SCENARIOS.forEach((s) => console.error(` - ${s.id}: ${s.name}`));
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
else conversationScenarios = "builtin";
|
|
33
|
+
const os = new Operor({
|
|
34
|
+
debug: false,
|
|
35
|
+
batchWindowMs: 0
|
|
36
|
+
});
|
|
37
|
+
const provider = new MockProvider();
|
|
38
|
+
const shopify = new MockShopifySkill();
|
|
39
|
+
let skill = shopify;
|
|
40
|
+
if (options.real || options.dryRun) {
|
|
41
|
+
skill = new SkillTestHarness(shopify, {
|
|
42
|
+
allowWrites: options.allowWrites ?? false,
|
|
43
|
+
dryRun: options.dryRun ?? false
|
|
44
|
+
});
|
|
45
|
+
await skill.authenticate();
|
|
46
|
+
}
|
|
47
|
+
await os.addProvider(provider);
|
|
48
|
+
await os.addSkill(skill);
|
|
49
|
+
const allTools = [
|
|
50
|
+
shopify.tools.get_order,
|
|
51
|
+
shopify.tools.create_discount,
|
|
52
|
+
shopify.tools.search_products
|
|
53
|
+
];
|
|
54
|
+
const agent = os.createAgent({
|
|
55
|
+
name: "Test Agent",
|
|
56
|
+
purpose: "Handle customer support conversations",
|
|
57
|
+
personality: "empathetic and solution-focused",
|
|
58
|
+
triggers: ["order_tracking", "general"],
|
|
59
|
+
tools: allTools,
|
|
60
|
+
rules: [{
|
|
61
|
+
name: "Auto-compensation",
|
|
62
|
+
condition: async (_ctx, toolResults) => {
|
|
63
|
+
const order = toolResults.find((t) => t.name === "get_order");
|
|
64
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
65
|
+
},
|
|
66
|
+
action: async () => {
|
|
67
|
+
return {
|
|
68
|
+
type: "discount_created",
|
|
69
|
+
code: (await shopify.tools.create_discount.execute({
|
|
70
|
+
percent: 10,
|
|
71
|
+
validDays: 30
|
|
72
|
+
})).code,
|
|
73
|
+
percent: 10,
|
|
74
|
+
validDays: 30
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}]
|
|
78
|
+
});
|
|
79
|
+
let kbRuntime;
|
|
80
|
+
if (config.KB_ENABLED === "true") try {
|
|
81
|
+
const { SQLiteKnowledgeStore, EmbeddingService, RetrievalPipeline } = await import("@operor/knowledge");
|
|
82
|
+
const embedder = new EmbeddingService({
|
|
83
|
+
provider: config.KB_EMBEDDING_PROVIDER || config.LLM_PROVIDER,
|
|
84
|
+
apiKey: config.KB_EMBEDDING_API_KEY || config.LLM_API_KEY || "",
|
|
85
|
+
model: config.KB_EMBEDDING_MODEL
|
|
86
|
+
});
|
|
87
|
+
const kbStore = new SQLiteKnowledgeStore(config.KB_DB_PATH || "./knowledge.db", embedder.dimensions);
|
|
88
|
+
await kbStore.initialize();
|
|
89
|
+
const retrieval = new RetrievalPipeline(kbStore, embedder);
|
|
90
|
+
kbRuntime = { retrieve: (q) => retrieval.retrieve(q) };
|
|
91
|
+
console.log("[Operor] 📚 Knowledge Base enabled for simulation");
|
|
92
|
+
} catch (kbError) {
|
|
93
|
+
console.warn("[Operor] ⚠️ Failed to initialize Knowledge Base:", kbError.message);
|
|
94
|
+
}
|
|
95
|
+
if (llm) {
|
|
96
|
+
const { applyLLMOverride } = await import("./llm-override-BIQl0V6H.js").then((n) => n.n);
|
|
97
|
+
applyLLMOverride(agent, llm, allTools, { kbRuntime });
|
|
98
|
+
}
|
|
99
|
+
await os.start();
|
|
100
|
+
const totalConversations = options.conversations ?? 10;
|
|
101
|
+
const runner = new SimulationRunner({
|
|
102
|
+
agentOS: os,
|
|
103
|
+
config: {
|
|
104
|
+
testSuiteFiles: options.tests,
|
|
105
|
+
conversationScenarios,
|
|
106
|
+
totalConversations,
|
|
107
|
+
evaluationStrategy: options.strategy,
|
|
108
|
+
integrationMode: options.real ? "real" : options.dryRun ? "dry-run" : "mock",
|
|
109
|
+
allowWrites: options.allowWrites,
|
|
110
|
+
timeout: options.timeout,
|
|
111
|
+
parallel: options.parallel
|
|
112
|
+
},
|
|
113
|
+
llm
|
|
114
|
+
});
|
|
115
|
+
if (!options.json) clack.intro("operor simulate");
|
|
116
|
+
const spinner = !options.json ? clack.spinner() : null;
|
|
117
|
+
spinner?.start(`Running simulation (0/${totalConversations} conversations)...`);
|
|
118
|
+
const report = await runner.run((completed, total, result) => {
|
|
119
|
+
const status = result.passed ? "PASS" : "FAIL";
|
|
120
|
+
spinner?.message(`[${formatTimestamp()}] ${completed}/${total} — ${result.scenario.name} [${status}]`);
|
|
121
|
+
});
|
|
122
|
+
spinner?.stop(`Simulation complete: ${report.passed}/${report.totalConversations} passed`);
|
|
123
|
+
await os.stop();
|
|
124
|
+
if (options.json) {
|
|
125
|
+
const jsonOutput = JSON.stringify(report, null, 2);
|
|
126
|
+
if (options.output) {
|
|
127
|
+
writeFileSync(options.output, jsonOutput, "utf-8");
|
|
128
|
+
console.log(`Report saved to ${options.output}`);
|
|
129
|
+
} else console.log(jsonOutput);
|
|
130
|
+
} else {
|
|
131
|
+
const formatted = SimulationRunner.formatReport(report);
|
|
132
|
+
console.log("\n" + formatted);
|
|
133
|
+
if (options.output) {
|
|
134
|
+
writeFileSync(options.output, JSON.stringify(report, null, 2), "utf-8");
|
|
135
|
+
clack.note(`Detailed report saved to ${options.output}`);
|
|
136
|
+
}
|
|
137
|
+
clack.outro(report.overallPassed ? "Simulation PASSED" : "Simulation FAILED");
|
|
138
|
+
}
|
|
139
|
+
process.exit(report.overallPassed ? 0 : 1);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
//#endregion
|
|
143
|
+
export { runSimulate };
|
|
144
|
+
//# sourceMappingURL=simulate-BKv62GJc.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"simulate-BKv62GJc.js","names":[],"sources":["../src/commands/simulate.ts"],"sourcesContent":["import { readConfig } from '../config.js';\nimport { writeFileSync } from 'fs';\nimport { formatTimestamp } from '../utils.js';\n\nexport async function runSimulate(options: {\n tests?: string[];\n scenarios?: string[];\n conversations?: number;\n strategy?: 'exact' | 'similarity' | 'llm_judge';\n real?: boolean;\n allowWrites?: boolean;\n dryRun?: boolean;\n timeout?: number;\n parallel?: boolean;\n json?: boolean;\n output?: string;\n}): Promise<void> {\n const config = readConfig();\n const clack = await import('@clack/prompts');\n\n const { Operor } = await import('@operor/core');\n const { MockProvider } = await import('@operor/provider-mock');\n const {\n MockShopifySkill,\n SimulationRunner,\n ECOMMERCE_SCENARIOS,\n SkillTestHarness,\n } = await import('@operor/testing');\n\n // Optionally set up LLM if configured\n let llm: any;\n if (config.LLM_PROVIDER && config.LLM_API_KEY) {\n const { AIProvider } = await import('@operor/llm');\n llm = new AIProvider({\n provider: config.LLM_PROVIDER as any,\n apiKey: config.LLM_API_KEY,\n model: config.LLM_MODEL,\n });\n }\n\n // Determine conversation scenarios\n let conversationScenarios: any = undefined;\n if (options.scenarios) {\n if (options.scenarios.includes('all')) {\n conversationScenarios = 'builtin';\n } else {\n conversationScenarios = ECOMMERCE_SCENARIOS.filter((s) =>\n options.scenarios!.some((name) =>\n s.name.toLowerCase().includes(name.toLowerCase()) ||\n s.id.toLowerCase().includes(name.toLowerCase())\n )\n );\n if (conversationScenarios.length === 0) {\n console.error(`No scenarios matching: ${options.scenarios.join(', ')}`);\n console.error('Available scenarios:');\n ECOMMERCE_SCENARIOS.forEach((s) => console.error(` - ${s.id}: ${s.name}`));\n process.exit(1);\n }\n }\n } else {\n // Default: use all built-in scenarios\n conversationScenarios = 'builtin';\n }\n\n // Set up Operor with appropriate skills\n const os = new Operor({ debug: false, batchWindowMs: 0 });\n const provider = new MockProvider();\n const shopify = new MockShopifySkill();\n\n // Wrap skill with safety harness if --real or --dry-run\n let skill: any = shopify;\n if (options.real || options.dryRun) {\n skill = new SkillTestHarness(shopify, {\n allowWrites: options.allowWrites ?? false,\n dryRun: options.dryRun ?? false,\n });\n await skill.authenticate();\n }\n\n await os.addProvider(provider);\n await os.addSkill(skill);\n\n const allTools = [shopify.tools.get_order, shopify.tools.create_discount, shopify.tools.search_products];\n\n const agent = os.createAgent({\n name: 'Test Agent',\n purpose: 'Handle customer support conversations',\n personality: 'empathetic and solution-focused',\n triggers: ['order_tracking', 'general'],\n tools: allTools,\n rules: [{\n name: 'Auto-compensation',\n condition: async (_ctx: any, toolResults: any[]) => {\n const order = toolResults.find((t) => t.name === 'get_order');\n return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;\n },\n action: async () => {\n const discount = await shopify.tools.create_discount.execute({ percent: 10, validDays: 30 });\n return { type: 'discount_created', code: discount.code, percent: 10, validDays: 30 };\n },\n }],\n });\n\n // Set up Knowledge Base if enabled\n let kbRuntime: any;\n if (config.KB_ENABLED === 'true') {\n try {\n const { SQLiteKnowledgeStore, EmbeddingService, RetrievalPipeline } = await import('@operor/knowledge');\n const embedder = new EmbeddingService({\n provider: (config.KB_EMBEDDING_PROVIDER || config.LLM_PROVIDER) as any,\n apiKey: config.KB_EMBEDDING_API_KEY || config.LLM_API_KEY || '',\n model: config.KB_EMBEDDING_MODEL,\n });\n const kbStore = new SQLiteKnowledgeStore(config.KB_DB_PATH || './knowledge.db', embedder.dimensions);\n await kbStore.initialize();\n const retrieval = new RetrievalPipeline(kbStore, embedder);\n kbRuntime = { retrieve: (q: string) => retrieval.retrieve(q) };\n console.log('[Operor] 📚 Knowledge Base enabled for simulation');\n } catch (kbError: any) {\n console.warn('[Operor] ⚠️ Failed to initialize Knowledge Base:', kbError.message);\n }\n }\n\n // Override agent.process with LLM-based implementation if LLM is configured\n if (llm) {\n const { applyLLMOverride } = await import('./llm-override.js');\n applyLLMOverride(agent, llm, allTools, { kbRuntime });\n }\n\n await os.start();\n\n const totalConversations = options.conversations ?? 10;\n\n // Create simulation runner\n const runner = new SimulationRunner({\n agentOS: os,\n config: {\n testSuiteFiles: options.tests,\n conversationScenarios,\n totalConversations,\n evaluationStrategy: options.strategy,\n integrationMode: options.real ? 'real' : options.dryRun ? 'dry-run' : 'mock',\n allowWrites: options.allowWrites,\n timeout: options.timeout,\n parallel: options.parallel,\n },\n llm,\n });\n\n if (!options.json) {\n clack.intro('operor simulate');\n }\n\n const spinner = !options.json ? clack.spinner() : null;\n spinner?.start(`Running simulation (0/${totalConversations} conversations)...`);\n\n const report = await runner.run((completed, total, result) => {\n const status = result.passed ? 'PASS' : 'FAIL';\n spinner?.message(`[${formatTimestamp()}] ${completed}/${total} — ${result.scenario.name} [${status}]`);\n });\n\n spinner?.stop(`Simulation complete: ${report.passed}/${report.totalConversations} passed`);\n await os.stop();\n\n // Output results\n if (options.json) {\n const jsonOutput = JSON.stringify(report, null, 2);\n if (options.output) {\n writeFileSync(options.output, jsonOutput, 'utf-8');\n console.log(`Report saved to ${options.output}`);\n } else {\n console.log(jsonOutput);\n }\n } else {\n const formatted = SimulationRunner.formatReport(report);\n console.log('\\n' + formatted);\n\n if (options.output) {\n writeFileSync(options.output, JSON.stringify(report, null, 2), 'utf-8');\n clack.note(`Detailed report saved to ${options.output}`);\n }\n\n clack.outro(report.overallPassed ? 'Simulation PASSED' : 'Simulation FAILED');\n }\n\n process.exit(report.overallPassed ? 0 : 1);\n}\n"],"mappings":";;;;;AAIA,eAAsB,YAAY,SAYhB;CAChB,MAAM,SAAS,YAAY;CAC3B,MAAM,QAAQ,MAAM,OAAO;CAE3B,MAAM,EAAE,WAAW,MAAM,OAAO;CAChC,MAAM,EAAE,iBAAiB,MAAM,OAAO;CACtC,MAAM,EACJ,kBACA,kBACA,qBACA,qBACE,MAAM,OAAO;CAGjB,IAAI;AACJ,KAAI,OAAO,gBAAgB,OAAO,aAAa;EAC7C,MAAM,EAAE,eAAe,MAAM,OAAO;AACpC,QAAM,IAAI,WAAW;GACnB,UAAU,OAAO;GACjB,QAAQ,OAAO;GACf,OAAO,OAAO;GACf,CAAC;;CAIJ,IAAI,wBAA6B;AACjC,KAAI,QAAQ,UACV,KAAI,QAAQ,UAAU,SAAS,MAAM,CACnC,yBAAwB;MACnB;AACL,0BAAwB,oBAAoB,QAAQ,MAClD,QAAQ,UAAW,MAAM,SACvB,EAAE,KAAK,aAAa,CAAC,SAAS,KAAK,aAAa,CAAC,IACjD,EAAE,GAAG,aAAa,CAAC,SAAS,KAAK,aAAa,CAAC,CAChD,CACF;AACD,MAAI,sBAAsB,WAAW,GAAG;AACtC,WAAQ,MAAM,0BAA0B,QAAQ,UAAU,KAAK,KAAK,GAAG;AACvE,WAAQ,MAAM,uBAAuB;AACrC,uBAAoB,SAAS,MAAM,QAAQ,MAAM,OAAO,EAAE,GAAG,IAAI,EAAE,OAAO,CAAC;AAC3E,WAAQ,KAAK,EAAE;;;KAKnB,yBAAwB;CAI1B,MAAM,KAAK,IAAI,OAAO;EAAE,OAAO;EAAO,eAAe;EAAG,CAAC;CACzD,MAAM,WAAW,IAAI,cAAc;CACnC,MAAM,UAAU,IAAI,kBAAkB;CAGtC,IAAI,QAAa;AACjB,KAAI,QAAQ,QAAQ,QAAQ,QAAQ;AAClC,UAAQ,IAAI,iBAAiB,SAAS;GACpC,aAAa,QAAQ,eAAe;GACpC,QAAQ,QAAQ,UAAU;GAC3B,CAAC;AACF,QAAM,MAAM,cAAc;;AAG5B,OAAM,GAAG,YAAY,SAAS;AAC9B,OAAM,GAAG,SAAS,MAAM;CAExB,MAAM,WAAW;EAAC,QAAQ,MAAM;EAAW,QAAQ,MAAM;EAAiB,QAAQ,MAAM;EAAgB;CAExG,MAAM,QAAQ,GAAG,YAAY;EAC3B,MAAM;EACN,SAAS;EACT,aAAa;EACb,UAAU,CAAC,kBAAkB,UAAU;EACvC,OAAO;EACP,OAAO,CAAC;GACN,MAAM;GACN,WAAW,OAAO,MAAW,gBAAuB;IAClD,MAAM,QAAQ,YAAY,MAAM,MAAM,EAAE,SAAS,YAAY;AAC7D,WAAO,OAAO,WAAW,MAAM,QAAQ,aAAa,MAAM,QAAQ,aAAa;;GAEjF,QAAQ,YAAY;AAElB,WAAO;KAAE,MAAM;KAAoB,OADlB,MAAM,QAAQ,MAAM,gBAAgB,QAAQ;MAAE,SAAS;MAAI,WAAW;MAAI,CAAC,EAC1C;KAAM,SAAS;KAAI,WAAW;KAAI;;GAEvF,CAAC;EACH,CAAC;CAGF,IAAI;AACJ,KAAI,OAAO,eAAe,OACxB,KAAI;EACF,MAAM,EAAE,sBAAsB,kBAAkB,sBAAsB,MAAM,OAAO;EACnF,MAAM,WAAW,IAAI,iBAAiB;GACpC,UAAW,OAAO,yBAAyB,OAAO;GAClD,QAAQ,OAAO,wBAAwB,OAAO,eAAe;GAC7D,OAAO,OAAO;GACf,CAAC;EACF,MAAM,UAAU,IAAI,qBAAqB,OAAO,cAAc,kBAAkB,SAAS,WAAW;AACpG,QAAM,QAAQ,YAAY;EAC1B,MAAM,YAAY,IAAI,kBAAkB,SAAS,SAAS;AAC1D,cAAY,EAAE,WAAW,MAAc,UAAU,SAAS,EAAE,EAAE;AAC9D,UAAQ,IAAI,oDAAoD;UACzD,SAAc;AACrB,UAAQ,KAAK,qDAAqD,QAAQ,QAAQ;;AAKtF,KAAI,KAAK;EACP,MAAM,EAAE,qBAAqB,MAAM,OAAO;AAC1C,mBAAiB,OAAO,KAAK,UAAU,EAAE,WAAW,CAAC;;AAGvD,OAAM,GAAG,OAAO;CAEhB,MAAM,qBAAqB,QAAQ,iBAAiB;CAGpD,MAAM,SAAS,IAAI,iBAAiB;EAClC,SAAS;EACT,QAAQ;GACN,gBAAgB,QAAQ;GACxB;GACA;GACA,oBAAoB,QAAQ;GAC5B,iBAAiB,QAAQ,OAAO,SAAS,QAAQ,SAAS,YAAY;GACtE,aAAa,QAAQ;GACrB,SAAS,QAAQ;GACjB,UAAU,QAAQ;GACnB;EACD;EACD,CAAC;AAEF,KAAI,CAAC,QAAQ,KACX,OAAM,MAAM,kBAAkB;CAGhC,MAAM,UAAU,CAAC,QAAQ,OAAO,MAAM,SAAS,GAAG;AAClD,UAAS,MAAM,yBAAyB,mBAAmB,oBAAoB;CAE/E,MAAM,SAAS,MAAM,OAAO,KAAK,WAAW,OAAO,WAAW;EAC5D,MAAM,SAAS,OAAO,SAAS,SAAS;AACxC,WAAS,QAAQ,IAAI,iBAAiB,CAAC,IAAI,UAAU,GAAG,MAAM,KAAK,OAAO,SAAS,KAAK,IAAI,OAAO,GAAG;GACtG;AAEF,UAAS,KAAK,wBAAwB,OAAO,OAAO,GAAG,OAAO,mBAAmB,SAAS;AAC1F,OAAM,GAAG,MAAM;AAGf,KAAI,QAAQ,MAAM;EAChB,MAAM,aAAa,KAAK,UAAU,QAAQ,MAAM,EAAE;AAClD,MAAI,QAAQ,QAAQ;AAClB,iBAAc,QAAQ,QAAQ,YAAY,QAAQ;AAClD,WAAQ,IAAI,mBAAmB,QAAQ,SAAS;QAEhD,SAAQ,IAAI,WAAW;QAEpB;EACL,MAAM,YAAY,iBAAiB,aAAa,OAAO;AACvD,UAAQ,IAAI,OAAO,UAAU;AAE7B,MAAI,QAAQ,QAAQ;AAClB,iBAAc,QAAQ,QAAQ,KAAK,UAAU,QAAQ,MAAM,EAAE,EAAE,QAAQ;AACvE,SAAM,KAAK,4BAA4B,QAAQ,SAAS;;AAG1D,QAAM,MAAM,OAAO,gBAAgB,sBAAsB,oBAAoB;;AAG/E,SAAQ,KAAK,OAAO,gBAAgB,IAAI,EAAE"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { n as readConfig } from "./index.js";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
|
|
4
|
+
//#region src/commands/status.ts
|
|
5
|
+
async function runStatus() {
|
|
6
|
+
const config = readConfig();
|
|
7
|
+
console.log("\n Operor Status\n");
|
|
8
|
+
const llmStatus = config.LLM_PROVIDER ? `${config.LLM_PROVIDER} (${config.LLM_MODEL || "default"})` : "Not configured";
|
|
9
|
+
console.log(` LLM: ${config.LLM_PROVIDER ? "✓" : "✗"} ${llmStatus}`);
|
|
10
|
+
const channelStatus = config.CHANNEL || "Not configured";
|
|
11
|
+
console.log(` Channel: ${config.CHANNEL ? "✓" : "✗"} ${channelStatus}`);
|
|
12
|
+
if (config.SKILLS_ENABLED !== "false") try {
|
|
13
|
+
const { loadSkillsConfig } = await import("@operor/skills");
|
|
14
|
+
const enabledSkills = loadSkillsConfig().skills.filter((s) => s.enabled !== false);
|
|
15
|
+
if (enabledSkills.length > 0) {
|
|
16
|
+
console.log(` Skills: ✓ ${enabledSkills.length} MCP skill(s)`);
|
|
17
|
+
for (const skill of enabledSkills) {
|
|
18
|
+
const transport = skill.transport || "stdio";
|
|
19
|
+
const target = skill.command || skill.url || "?";
|
|
20
|
+
console.log(` └─ ${skill.name} (${transport}: ${target})`);
|
|
21
|
+
}
|
|
22
|
+
} else console.log(` Skills: ─ No skills in mcp.json`);
|
|
23
|
+
} catch {
|
|
24
|
+
console.log(` Skills: ─ No mcp.json found`);
|
|
25
|
+
}
|
|
26
|
+
else console.log(` Skills: ✗ Disabled`);
|
|
27
|
+
if (config.ANALYTICS_ENABLED !== "false") {
|
|
28
|
+
const dbPath = config.ANALYTICS_DB_PATH || "./analytics.db";
|
|
29
|
+
const digestStatus = config.ANALYTICS_DIGEST_ENABLED === "true" ? `${config.ANALYTICS_DIGEST_SCHEDULE || "daily"} at ${config.ANALYTICS_DIGEST_TIME || "09:00"}` : "disabled";
|
|
30
|
+
console.log(` Analytics: ✓ Enabled (${dbPath})`);
|
|
31
|
+
console.log(` └─ Digest: ${digestStatus}`);
|
|
32
|
+
} else console.log(` Analytics: ✗ Disabled`);
|
|
33
|
+
const agentsDir = `${process.cwd()}/agents`;
|
|
34
|
+
if (fs.existsSync(agentsDir)) try {
|
|
35
|
+
const { AgentLoader } = await import("@operor/core");
|
|
36
|
+
const definitions = await new AgentLoader(process.cwd()).loadAll();
|
|
37
|
+
if (definitions.length > 0) {
|
|
38
|
+
console.log(`\n Agents: ✓ ${definitions.length} loaded from agents/`);
|
|
39
|
+
for (const def of definitions) {
|
|
40
|
+
const cfg = def.config;
|
|
41
|
+
const details = [];
|
|
42
|
+
if (cfg.channels?.length) details.push(`channels: ${cfg.channels.join(", ")}`);
|
|
43
|
+
if (cfg.skills?.length) details.push(`skills: ${cfg.skills.join(", ")}`);
|
|
44
|
+
if (cfg.knowledgeBase) details.push("KB");
|
|
45
|
+
if (cfg.priority) details.push(`priority: ${cfg.priority}`);
|
|
46
|
+
const suffix = details.length > 0 ? ` (${details.join(" | ")})` : "";
|
|
47
|
+
console.log(` └─ ${cfg.name}${suffix}`);
|
|
48
|
+
}
|
|
49
|
+
} else console.log(`\n Agents: ✗ agents/ directory found but no valid definitions`);
|
|
50
|
+
} catch {
|
|
51
|
+
console.log(`\n Agents: ✗ Failed to load agents/`);
|
|
52
|
+
}
|
|
53
|
+
else console.log(`\n Agents: ─ No agents/ directory (using default single-agent mode)`);
|
|
54
|
+
if (config.ANALYTICS_ENABLED !== "false") try {
|
|
55
|
+
const { SQLiteAnalyticsStore } = await import("@operor/analytics");
|
|
56
|
+
const dbPath = config.ANALYTICS_DB_PATH || "./analytics.db";
|
|
57
|
+
if (fs.existsSync(dbPath)) {
|
|
58
|
+
const store = new SQLiteAnalyticsStore(dbPath);
|
|
59
|
+
await store.initialize();
|
|
60
|
+
const now = Date.now();
|
|
61
|
+
const range = {
|
|
62
|
+
from: now - 1440 * 60 * 1e3,
|
|
63
|
+
to: now
|
|
64
|
+
};
|
|
65
|
+
const summary = await store.getSummary(range);
|
|
66
|
+
await store.close();
|
|
67
|
+
if (summary.totalMessages > 0) {
|
|
68
|
+
console.log(` Usage (last 24h)`);
|
|
69
|
+
console.log(` Messages: ${summary.totalMessages}`);
|
|
70
|
+
console.log(` Customers: ${summary.uniqueCustomers}`);
|
|
71
|
+
console.log(` KB answered: ${summary.kbAnsweredPct.toFixed(1)}%`);
|
|
72
|
+
console.log(` Avg response: ${summary.avgResponseTime < 1e3 ? Math.round(summary.avgResponseTime) + "ms" : (summary.avgResponseTime / 1e3).toFixed(1) + "s"}`);
|
|
73
|
+
console.log();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
} catch {}
|
|
77
|
+
console.log();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
//#endregion
|
|
81
|
+
export { runStatus };
|
|
82
|
+
//# sourceMappingURL=status-D6LIZvQa.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"status-D6LIZvQa.js","names":[],"sources":["../src/commands/status.ts"],"sourcesContent":["import fs from 'fs';\nimport { readConfig } from '../config.js';\n\nexport async function runStatus(): Promise<void> {\n const config = readConfig();\n\n console.log('\\n Operor Status\\n');\n\n // LLM\n const llmStatus = config.LLM_PROVIDER\n ? `${config.LLM_PROVIDER} (${config.LLM_MODEL || 'default'})`\n : 'Not configured';\n console.log(` LLM: ${config.LLM_PROVIDER ? '✓' : '✗'} ${llmStatus}`);\n\n // Channel\n const channelStatus = config.CHANNEL || 'Not configured';\n console.log(` Channel: ${config.CHANNEL ? '✓' : '✗'} ${channelStatus}`);\n\n // Skills (MCP)\n if (config.SKILLS_ENABLED !== 'false') {\n try {\n const { loadSkillsConfig } = await import('@operor/skills');\n const skillsConfig = loadSkillsConfig();\n const enabledSkills = skillsConfig.skills.filter((s: any) => s.enabled !== false);\n if (enabledSkills.length > 0) {\n console.log(` Skills: ✓ ${enabledSkills.length} MCP skill(s)`);\n for (const skill of enabledSkills) {\n const transport = skill.transport || 'stdio';\n const target = skill.command || skill.url || '?';\n console.log(` └─ ${skill.name} (${transport}: ${target})`);\n }\n } else {\n console.log(` Skills: ─ No skills in mcp.json`);\n }\n } catch {\n console.log(` Skills: ─ No mcp.json found`);\n }\n } else {\n console.log(` Skills: ✗ Disabled`);\n }\n\n // Analytics\n if (config.ANALYTICS_ENABLED !== 'false') {\n const dbPath = config.ANALYTICS_DB_PATH || './analytics.db';\n const digestStatus = config.ANALYTICS_DIGEST_ENABLED === 'true'\n ? `${config.ANALYTICS_DIGEST_SCHEDULE || 'daily'} at ${config.ANALYTICS_DIGEST_TIME || '09:00'}`\n : 'disabled';\n console.log(` Analytics: ✓ Enabled (${dbPath})`);\n console.log(` └─ Digest: ${digestStatus}`);\n } else {\n console.log(` Analytics: ✗ Disabled`);\n }\n\n // Agents from agents/ directory\n const agentsDir = `${process.cwd()}/agents`;\n if (fs.existsSync(agentsDir)) {\n try {\n const { AgentLoader } = await import('@operor/core');\n const loader = new AgentLoader(process.cwd());\n const definitions = await loader.loadAll();\n\n if (definitions.length > 0) {\n console.log(`\\n Agents: ✓ ${definitions.length} loaded from agents/`);\n for (const def of definitions) {\n const cfg = def.config;\n const details: string[] = [];\n if (cfg.channels?.length) details.push(`channels: ${cfg.channels.join(', ')}`);\n if (cfg.skills?.length) details.push(`skills: ${cfg.skills.join(', ')}`);\n if (cfg.knowledgeBase) details.push('KB');\n if (cfg.priority) details.push(`priority: ${cfg.priority}`);\n const suffix = details.length > 0 ? ` (${details.join(' | ')})` : '';\n console.log(` └─ ${cfg.name}${suffix}`);\n }\n } else {\n console.log(`\\n Agents: ✗ agents/ directory found but no valid definitions`);\n }\n } catch {\n console.log(`\\n Agents: ✗ Failed to load agents/`);\n }\n } else {\n console.log(`\\n Agents: ─ No agents/ directory (using default single-agent mode)`);\n }\n\n // Usage summary (last 24h)\n if (config.ANALYTICS_ENABLED !== 'false') {\n try {\n const { SQLiteAnalyticsStore } = await import('@operor/analytics');\n const dbPath = config.ANALYTICS_DB_PATH || './analytics.db';\n if (fs.existsSync(dbPath)) {\n const store = new SQLiteAnalyticsStore(dbPath);\n await store.initialize();\n const now = Date.now();\n const range = { from: now - 24 * 60 * 60 * 1000, to: now };\n const summary = await store.getSummary(range);\n await store.close();\n\n if (summary.totalMessages > 0) {\n console.log(` Usage (last 24h)`);\n console.log(` Messages: ${summary.totalMessages}`);\n console.log(` Customers: ${summary.uniqueCustomers}`);\n console.log(` KB answered: ${summary.kbAnsweredPct.toFixed(1)}%`);\n console.log(` Avg response: ${summary.avgResponseTime < 1000 ? Math.round(summary.avgResponseTime) + 'ms' : (summary.avgResponseTime / 1000).toFixed(1) + 's'}`);\n console.log();\n }\n }\n } catch {\n // Analytics DB not available — skip silently\n }\n }\n\n console.log();\n}\n"],"mappings":";;;;AAGA,eAAsB,YAA2B;CAC/C,MAAM,SAAS,YAAY;AAE3B,SAAQ,IAAI,sBAAsB;CAGlC,MAAM,YAAY,OAAO,eACrB,GAAG,OAAO,aAAa,IAAI,OAAO,aAAa,UAAU,KACzD;AACJ,SAAQ,IAAI,oBAAoB,OAAO,eAAe,MAAM,IAAI,GAAG,YAAY;CAG/E,MAAM,gBAAgB,OAAO,WAAW;AACxC,SAAQ,IAAI,oBAAoB,OAAO,UAAU,MAAM,IAAI,GAAG,gBAAgB;AAG9E,KAAI,OAAO,mBAAmB,QAC5B,KAAI;EACF,MAAM,EAAE,qBAAqB,MAAM,OAAO;EAE1C,MAAM,gBADe,kBAAkB,CACJ,OAAO,QAAQ,MAAW,EAAE,YAAY,MAAM;AACjF,MAAI,cAAc,SAAS,GAAG;AAC5B,WAAQ,IAAI,sBAAsB,cAAc,OAAO,eAAe;AACtE,QAAK,MAAM,SAAS,eAAe;IACjC,MAAM,YAAY,MAAM,aAAa;IACrC,MAAM,SAAS,MAAM,WAAW,MAAM,OAAO;AAC7C,YAAQ,IAAI,UAAU,MAAM,KAAK,IAAI,UAAU,IAAI,OAAO,GAAG;;QAG/D,SAAQ,IAAI,2CAA2C;SAEnD;AACN,UAAQ,IAAI,uCAAuC;;KAGrD,SAAQ,IAAI,8BAA8B;AAI5C,KAAI,OAAO,sBAAsB,SAAS;EACxC,MAAM,SAAS,OAAO,qBAAqB;EAC3C,MAAM,eAAe,OAAO,6BAA6B,SACrD,GAAG,OAAO,6BAA6B,QAAQ,MAAM,OAAO,yBAAyB,YACrF;AACJ,UAAQ,IAAI,+BAA+B,OAAO,GAAG;AACrD,UAAQ,IAAI,kBAAkB,eAAe;OAE7C,SAAQ,IAAI,8BAA8B;CAI5C,MAAM,YAAY,GAAG,QAAQ,KAAK,CAAC;AACnC,KAAI,GAAG,WAAW,UAAU,CAC1B,KAAI;EACF,MAAM,EAAE,gBAAgB,MAAM,OAAO;EAErC,MAAM,cAAc,MADL,IAAI,YAAY,QAAQ,KAAK,CAAC,CACZ,SAAS;AAE1C,MAAI,YAAY,SAAS,GAAG;AAC1B,WAAQ,IAAI,wBAAwB,YAAY,OAAO,sBAAsB;AAC7E,QAAK,MAAM,OAAO,aAAa;IAC7B,MAAM,MAAM,IAAI;IAChB,MAAM,UAAoB,EAAE;AAC5B,QAAI,IAAI,UAAU,OAAQ,SAAQ,KAAK,aAAa,IAAI,SAAS,KAAK,KAAK,GAAG;AAC9E,QAAI,IAAI,QAAQ,OAAQ,SAAQ,KAAK,WAAW,IAAI,OAAO,KAAK,KAAK,GAAG;AACxE,QAAI,IAAI,cAAe,SAAQ,KAAK,KAAK;AACzC,QAAI,IAAI,SAAU,SAAQ,KAAK,aAAa,IAAI,WAAW;IAC3D,MAAM,SAAS,QAAQ,SAAS,IAAI,KAAK,QAAQ,KAAK,MAAM,CAAC,KAAK;AAClE,YAAQ,IAAI,UAAU,IAAI,OAAO,SAAS;;QAG5C,SAAQ,IAAI,wEAAwE;SAEhF;AACN,UAAQ,IAAI,8CAA8C;;KAG5D,SAAQ,IAAI,8EAA8E;AAI5F,KAAI,OAAO,sBAAsB,QAC/B,KAAI;EACF,MAAM,EAAE,yBAAyB,MAAM,OAAO;EAC9C,MAAM,SAAS,OAAO,qBAAqB;AAC3C,MAAI,GAAG,WAAW,OAAO,EAAE;GACzB,MAAM,QAAQ,IAAI,qBAAqB,OAAO;AAC9C,SAAM,MAAM,YAAY;GACxB,MAAM,MAAM,KAAK,KAAK;GACtB,MAAM,QAAQ;IAAE,MAAM,MAAM,OAAU,KAAK;IAAM,IAAI;IAAK;GAC1D,MAAM,UAAU,MAAM,MAAM,WAAW,MAAM;AAC7C,SAAM,MAAM,OAAO;AAEnB,OAAI,QAAQ,gBAAgB,GAAG;AAC7B,YAAQ,IAAI,qBAAqB;AACjC,YAAQ,IAAI,oBAAoB,QAAQ,gBAAgB;AACxD,YAAQ,IAAI,oBAAoB,QAAQ,kBAAkB;AAC1D,YAAQ,IAAI,oBAAoB,QAAQ,cAAc,QAAQ,EAAE,CAAC,GAAG;AACpE,YAAQ,IAAI,qBAAqB,QAAQ,kBAAkB,MAAO,KAAK,MAAM,QAAQ,gBAAgB,GAAG,QAAQ,QAAQ,kBAAkB,KAAM,QAAQ,EAAE,GAAG,MAAM;AACnK,YAAQ,KAAK;;;SAGX;AAKV,SAAQ,KAAK"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { n as readConfig } from "./index.js";
|
|
2
|
+
import { t as formatTimestamp } from "./utils-BuV4q7f6.js";
|
|
3
|
+
import { writeFileSync } from "fs";
|
|
4
|
+
|
|
5
|
+
//#region src/commands/test.ts
|
|
6
|
+
async function runTest(options = {}) {
|
|
7
|
+
const config = readConfig();
|
|
8
|
+
const { Operor } = await import("@operor/core");
|
|
9
|
+
const { MockProvider } = await import("@operor/provider-mock");
|
|
10
|
+
const { MockShopifySkill } = await import("@operor/testing");
|
|
11
|
+
if (options.csv) {
|
|
12
|
+
const { CSVLoader, TestSuiteRunner, SkillTestHarness } = await import("@operor/testing");
|
|
13
|
+
const os = new Operor({
|
|
14
|
+
debug: false,
|
|
15
|
+
batchWindowMs: 0
|
|
16
|
+
});
|
|
17
|
+
const provider = new MockProvider();
|
|
18
|
+
const shopify = new MockShopifySkill();
|
|
19
|
+
let skill = shopify;
|
|
20
|
+
if (options.real || options.dryRun) {
|
|
21
|
+
skill = new SkillTestHarness(shopify, {
|
|
22
|
+
allowWrites: options.allowWrites ?? false,
|
|
23
|
+
dryRun: options.dryRun ?? false
|
|
24
|
+
});
|
|
25
|
+
await skill.authenticate();
|
|
26
|
+
}
|
|
27
|
+
await os.addProvider(provider);
|
|
28
|
+
await os.addSkill(skill);
|
|
29
|
+
os.createAgent({
|
|
30
|
+
name: "Test Agent",
|
|
31
|
+
purpose: "Handle customer support conversations",
|
|
32
|
+
personality: "empathetic and solution-focused",
|
|
33
|
+
triggers: ["order_tracking", "general"],
|
|
34
|
+
tools: [
|
|
35
|
+
shopify.tools.get_order,
|
|
36
|
+
shopify.tools.create_discount,
|
|
37
|
+
shopify.tools.search_products
|
|
38
|
+
],
|
|
39
|
+
rules: [{
|
|
40
|
+
name: "Auto-compensation",
|
|
41
|
+
condition: async (_ctx, toolResults) => {
|
|
42
|
+
const order = toolResults.find((t) => t.name === "get_order");
|
|
43
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
44
|
+
},
|
|
45
|
+
action: async () => {
|
|
46
|
+
return {
|
|
47
|
+
type: "discount_created",
|
|
48
|
+
code: (await shopify.tools.create_discount.execute({
|
|
49
|
+
percent: 10,
|
|
50
|
+
validDays: 30
|
|
51
|
+
})).code,
|
|
52
|
+
percent: 10,
|
|
53
|
+
validDays: 30
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
}]
|
|
57
|
+
});
|
|
58
|
+
await os.start();
|
|
59
|
+
let testCases = await CSVLoader.fromFile(options.csv);
|
|
60
|
+
if (options.tag) {
|
|
61
|
+
testCases = testCases.filter((tc) => tc.tags?.some((t) => t.toLowerCase() === options.tag.toLowerCase()));
|
|
62
|
+
if (testCases.length === 0) {
|
|
63
|
+
console.error(`No test cases found with tag "${options.tag}"`);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
console.log(`\n Running ${testCases.length} test case(s) from ${options.csv}...\n`);
|
|
68
|
+
let llm;
|
|
69
|
+
if (config.LLM_PROVIDER && config.LLM_API_KEY) {
|
|
70
|
+
const { AIProvider } = await import("@operor/llm");
|
|
71
|
+
llm = new AIProvider({
|
|
72
|
+
provider: config.LLM_PROVIDER,
|
|
73
|
+
apiKey: config.LLM_API_KEY,
|
|
74
|
+
model: config.LLM_MODEL
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
const result = await new TestSuiteRunner({
|
|
78
|
+
agentOS: os,
|
|
79
|
+
llm
|
|
80
|
+
}).runSuite(testCases);
|
|
81
|
+
await os.stop();
|
|
82
|
+
for (const r of result.results) {
|
|
83
|
+
const icon = r.evaluation.passed ? "✓" : "✗";
|
|
84
|
+
const status = r.evaluation.passed ? "PASS" : "FAIL";
|
|
85
|
+
console.log(` ${icon} [${formatTimestamp()}] [${status}] ${r.testCase.id}: ${r.testCase.question}`);
|
|
86
|
+
if (!r.evaluation.passed) console.log(` Score: ${r.evaluation.score.toFixed(2)} — ${r.evaluation.reasoning}`);
|
|
87
|
+
}
|
|
88
|
+
console.log(`\n Results: ${result.passed}/${result.total} passed (avg score: ${result.averageScore.toFixed(2)})`);
|
|
89
|
+
console.log(` Duration: ${(result.totalDuration / 1e3).toFixed(1)}s`);
|
|
90
|
+
console.log(` Status: ${result.failed === 0 ? "✓ ALL PASSED" : "✗ SOME FAILED"}\n`);
|
|
91
|
+
if (options.report) {
|
|
92
|
+
writeFileSync(options.report, JSON.stringify(result, null, 2), "utf-8");
|
|
93
|
+
console.log(` Report saved to ${options.report}\n`);
|
|
94
|
+
}
|
|
95
|
+
process.exit(result.failed === 0 ? 0 : 1);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
console.log("\n Running Operor test scenarios...\n");
|
|
99
|
+
const os = new Operor({
|
|
100
|
+
debug: true,
|
|
101
|
+
batchWindowMs: 0
|
|
102
|
+
});
|
|
103
|
+
const provider = new MockProvider();
|
|
104
|
+
const shopify = new MockShopifySkill();
|
|
105
|
+
await os.addProvider(provider);
|
|
106
|
+
await os.addSkill(shopify);
|
|
107
|
+
os.createAgent({
|
|
108
|
+
name: "Test Agent",
|
|
109
|
+
purpose: "Validate agent processing pipeline",
|
|
110
|
+
personality: "helpful",
|
|
111
|
+
triggers: ["order_tracking", "general"],
|
|
112
|
+
tools: [shopify.tools.get_order, shopify.tools.create_discount],
|
|
113
|
+
rules: [{
|
|
114
|
+
name: "Auto-compensation",
|
|
115
|
+
condition: async (_ctx, toolResults) => {
|
|
116
|
+
const order = toolResults.find((t) => t.name === "get_order");
|
|
117
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
118
|
+
},
|
|
119
|
+
action: async () => {
|
|
120
|
+
return {
|
|
121
|
+
type: "discount_created",
|
|
122
|
+
code: (await shopify.tools.create_discount.execute({
|
|
123
|
+
percent: 10,
|
|
124
|
+
validDays: 30
|
|
125
|
+
})).code,
|
|
126
|
+
percent: 10,
|
|
127
|
+
validDays: 30
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}]
|
|
131
|
+
});
|
|
132
|
+
let processed = 0;
|
|
133
|
+
const total = 4;
|
|
134
|
+
const done = new Promise((resolve) => {
|
|
135
|
+
os.on("message:processed", () => {
|
|
136
|
+
if (++processed >= total) resolve();
|
|
137
|
+
});
|
|
138
|
+
});
|
|
139
|
+
await os.start();
|
|
140
|
+
for (const s of [
|
|
141
|
+
{
|
|
142
|
+
phone: "+1",
|
|
143
|
+
msg: "Where is my order #12345?",
|
|
144
|
+
label: "Delayed order (auto-compensation)"
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
phone: "+2",
|
|
148
|
+
msg: "Check order #67890",
|
|
149
|
+
label: "On-time order"
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
phone: "+3",
|
|
153
|
+
msg: "Where is order #99999?",
|
|
154
|
+
label: "Order not found"
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
phone: "+4",
|
|
158
|
+
msg: "Hello!",
|
|
159
|
+
label: "General greeting"
|
|
160
|
+
}
|
|
161
|
+
]) {
|
|
162
|
+
console.log(` [${formatTimestamp()}] Test: ${s.label}`);
|
|
163
|
+
provider.simulateIncomingMessage(s.phone, s.msg);
|
|
164
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
165
|
+
}
|
|
166
|
+
await done;
|
|
167
|
+
await os.stop();
|
|
168
|
+
const discounts = shopify.getDiscounts();
|
|
169
|
+
console.log(`\n Results: ${processed}/${total} messages processed`);
|
|
170
|
+
console.log(` Discounts created: ${discounts.length}`);
|
|
171
|
+
console.log(` Status: ${processed === total ? "✓ PASS" : "✗ FAIL"}\n`);
|
|
172
|
+
process.exit(processed === total ? 0 : 1);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
//#endregion
|
|
176
|
+
export { runTest };
|
|
177
|
+
//# sourceMappingURL=test-DYjkxbtK.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-DYjkxbtK.js","names":[],"sources":["../src/commands/test.ts"],"sourcesContent":["import { readConfig } from '../config.js';\nimport { writeFileSync } from 'fs';\nimport { formatTimestamp } from '../utils.js';\n\nexport async function runTest(options: {\n csv?: string;\n tag?: string;\n report?: string;\n real?: boolean;\n allowWrites?: boolean;\n dryRun?: boolean;\n} = {}): Promise<void> {\n const config = readConfig();\n\n const { Operor } = await import('@operor/core');\n const { MockProvider } = await import('@operor/provider-mock');\n const { MockShopifySkill } = await import('@operor/testing');\n\n // If --csv is provided, run CSV test suite mode\n if (options.csv) {\n const { CSVLoader, TestSuiteRunner, SkillTestHarness } = await import('@operor/testing');\n\n const os = new Operor({ debug: false, batchWindowMs: 0 });\n const provider = new MockProvider();\n const shopify = new MockShopifySkill();\n\n // Wrap skill with safety harness if --real or --dry-run\n let skill: any = shopify;\n if (options.real || options.dryRun) {\n skill = new SkillTestHarness(shopify, {\n allowWrites: options.allowWrites ?? false,\n dryRun: options.dryRun ?? false,\n });\n await skill.authenticate();\n }\n\n await os.addProvider(provider);\n await os.addSkill(skill);\n\n os.createAgent({\n name: 'Test Agent',\n purpose: 'Handle customer support conversations',\n personality: 'empathetic and solution-focused',\n triggers: ['order_tracking', 'general'],\n tools: [shopify.tools.get_order, shopify.tools.create_discount, shopify.tools.search_products],\n rules: [{\n name: 'Auto-compensation',\n condition: async (_ctx: any, toolResults: any[]) => {\n const order = toolResults.find((t) => t.name === 'get_order');\n return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;\n },\n action: async () => {\n const discount = await shopify.tools.create_discount.execute({ percent: 10, validDays: 30 });\n return { type: 'discount_created', code: discount.code, percent: 10, validDays: 30 };\n },\n }],\n });\n\n await os.start();\n\n let testCases = await CSVLoader.fromFile(options.csv);\n\n // Filter by tag if specified\n if (options.tag) {\n testCases = testCases.filter((tc) =>\n tc.tags?.some((t) => t.toLowerCase() === options.tag!.toLowerCase())\n );\n if (testCases.length === 0) {\n console.error(`No test cases found with tag \"${options.tag}\"`);\n process.exit(1);\n }\n }\n\n console.log(`\\n Running ${testCases.length} test case(s) from ${options.csv}...\\n`);\n\n // Set up LLM if available for evaluation\n let llm: any;\n if (config.LLM_PROVIDER && config.LLM_API_KEY) {\n const { AIProvider } = await import('@operor/llm');\n llm = new AIProvider({\n provider: config.LLM_PROVIDER as any,\n apiKey: config.LLM_API_KEY,\n model: config.LLM_MODEL,\n });\n }\n\n const suiteRunner = new TestSuiteRunner({\n agentOS: os,\n llm,\n });\n\n const result = await suiteRunner.runSuite(testCases);\n await os.stop();\n\n // Display results\n for (const r of result.results) {\n const icon = r.evaluation.passed ? '✓' : '✗';\n const status = r.evaluation.passed ? 'PASS' : 'FAIL';\n console.log(` ${icon} [${formatTimestamp()}] [${status}] ${r.testCase.id}: ${r.testCase.question}`);\n if (!r.evaluation.passed) {\n console.log(` Score: ${r.evaluation.score.toFixed(2)} — ${r.evaluation.reasoning}`);\n }\n }\n\n console.log(`\\n Results: ${result.passed}/${result.total} passed (avg score: ${result.averageScore.toFixed(2)})`);\n console.log(` Duration: ${(result.totalDuration / 1000).toFixed(1)}s`);\n console.log(` Status: ${result.failed === 0 ? '✓ ALL PASSED' : '✗ SOME FAILED'}\\n`);\n\n if (options.report) {\n writeFileSync(options.report, JSON.stringify(result, null, 2), 'utf-8');\n console.log(` Report saved to ${options.report}\\n`);\n }\n\n process.exit(result.failed === 0 ? 0 : 1);\n return;\n }\n\n // Default: run built-in demo scenarios\n console.log('\\n Running Operor test scenarios...\\n');\n\n const os = new Operor({ debug: true, batchWindowMs: 0 });\n const provider = new MockProvider();\n const shopify = new MockShopifySkill();\n\n await os.addProvider(provider);\n await os.addSkill(shopify);\n\n os.createAgent({\n name: 'Test Agent',\n purpose: 'Validate agent processing pipeline',\n personality: 'helpful',\n triggers: ['order_tracking', 'general'],\n tools: [shopify.tools.get_order, shopify.tools.create_discount],\n rules: [{\n name: 'Auto-compensation',\n condition: async (_ctx: any, toolResults: any[]) => {\n const order = toolResults.find(t => t.name === 'get_order');\n return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;\n },\n action: async () => {\n const discount = await shopify.tools.create_discount.execute({ percent: 10, validDays: 30 });\n return { type: 'discount_created', code: discount.code, percent: 10, validDays: 30 };\n },\n }],\n });\n\n let processed = 0;\n const total = 4;\n const done = new Promise<void>(resolve => {\n os.on('message:processed', () => { if (++processed >= total) resolve(); });\n });\n\n await os.start();\n\n const scenarios = [\n { phone: '+1', msg: 'Where is my order #12345?', label: 'Delayed order (auto-compensation)' },\n { phone: '+2', msg: 'Check order #67890', label: 'On-time order' },\n { phone: '+3', msg: 'Where is order #99999?', label: 'Order not found' },\n { phone: '+4', msg: 'Hello!', label: 'General greeting' },\n ];\n\n for (const s of scenarios) {\n console.log(` [${formatTimestamp()}] Test: ${s.label}`);\n provider.simulateIncomingMessage(s.phone, s.msg);\n await new Promise(r => setTimeout(r, 100));\n }\n\n await done;\n await os.stop();\n\n const discounts = shopify.getDiscounts();\n console.log(`\\n Results: ${processed}/${total} messages processed`);\n console.log(` Discounts created: ${discounts.length}`);\n console.log(` Status: ${processed === total ? '✓ PASS' : '✗ FAIL'}\\n`);\n\n process.exit(processed === total ? 0 : 1);\n}\n"],"mappings":";;;;;AAIA,eAAsB,QAAQ,UAO1B,EAAE,EAAiB;CACrB,MAAM,SAAS,YAAY;CAE3B,MAAM,EAAE,WAAW,MAAM,OAAO;CAChC,MAAM,EAAE,iBAAiB,MAAM,OAAO;CACtC,MAAM,EAAE,qBAAqB,MAAM,OAAO;AAG1C,KAAI,QAAQ,KAAK;EACf,MAAM,EAAE,WAAW,iBAAiB,qBAAqB,MAAM,OAAO;EAEtE,MAAM,KAAK,IAAI,OAAO;GAAE,OAAO;GAAO,eAAe;GAAG,CAAC;EACzD,MAAM,WAAW,IAAI,cAAc;EACnC,MAAM,UAAU,IAAI,kBAAkB;EAGtC,IAAI,QAAa;AACjB,MAAI,QAAQ,QAAQ,QAAQ,QAAQ;AAClC,WAAQ,IAAI,iBAAiB,SAAS;IACpC,aAAa,QAAQ,eAAe;IACpC,QAAQ,QAAQ,UAAU;IAC3B,CAAC;AACF,SAAM,MAAM,cAAc;;AAG5B,QAAM,GAAG,YAAY,SAAS;AAC9B,QAAM,GAAG,SAAS,MAAM;AAExB,KAAG,YAAY;GACb,MAAM;GACN,SAAS;GACT,aAAa;GACb,UAAU,CAAC,kBAAkB,UAAU;GACvC,OAAO;IAAC,QAAQ,MAAM;IAAW,QAAQ,MAAM;IAAiB,QAAQ,MAAM;IAAgB;GAC9F,OAAO,CAAC;IACN,MAAM;IACN,WAAW,OAAO,MAAW,gBAAuB;KAClD,MAAM,QAAQ,YAAY,MAAM,MAAM,EAAE,SAAS,YAAY;AAC7D,YAAO,OAAO,WAAW,MAAM,QAAQ,aAAa,MAAM,QAAQ,aAAa;;IAEjF,QAAQ,YAAY;AAElB,YAAO;MAAE,MAAM;MAAoB,OADlB,MAAM,QAAQ,MAAM,gBAAgB,QAAQ;OAAE,SAAS;OAAI,WAAW;OAAI,CAAC,EAC1C;MAAM,SAAS;MAAI,WAAW;MAAI;;IAEvF,CAAC;GACH,CAAC;AAEF,QAAM,GAAG,OAAO;EAEhB,IAAI,YAAY,MAAM,UAAU,SAAS,QAAQ,IAAI;AAGrD,MAAI,QAAQ,KAAK;AACf,eAAY,UAAU,QAAQ,OAC5B,GAAG,MAAM,MAAM,MAAM,EAAE,aAAa,KAAK,QAAQ,IAAK,aAAa,CAAC,CACrE;AACD,OAAI,UAAU,WAAW,GAAG;AAC1B,YAAQ,MAAM,iCAAiC,QAAQ,IAAI,GAAG;AAC9D,YAAQ,KAAK,EAAE;;;AAInB,UAAQ,IAAI,eAAe,UAAU,OAAO,qBAAqB,QAAQ,IAAI,OAAO;EAGpF,IAAI;AACJ,MAAI,OAAO,gBAAgB,OAAO,aAAa;GAC7C,MAAM,EAAE,eAAe,MAAM,OAAO;AACpC,SAAM,IAAI,WAAW;IACnB,UAAU,OAAO;IACjB,QAAQ,OAAO;IACf,OAAO,OAAO;IACf,CAAC;;EAQJ,MAAM,SAAS,MALK,IAAI,gBAAgB;GACtC,SAAS;GACT;GACD,CAAC,CAE+B,SAAS,UAAU;AACpD,QAAM,GAAG,MAAM;AAGf,OAAK,MAAM,KAAK,OAAO,SAAS;GAC9B,MAAM,OAAO,EAAE,WAAW,SAAS,MAAM;GACzC,MAAM,SAAS,EAAE,WAAW,SAAS,SAAS;AAC9C,WAAQ,IAAI,KAAK,KAAK,IAAI,iBAAiB,CAAC,KAAK,OAAO,IAAI,EAAE,SAAS,GAAG,IAAI,EAAE,SAAS,WAAW;AACpG,OAAI,CAAC,EAAE,WAAW,OAChB,SAAQ,IAAI,cAAc,EAAE,WAAW,MAAM,QAAQ,EAAE,CAAC,KAAK,EAAE,WAAW,YAAY;;AAI1F,UAAQ,IAAI,gBAAgB,OAAO,OAAO,GAAG,OAAO,MAAM,sBAAsB,OAAO,aAAa,QAAQ,EAAE,CAAC,GAAG;AAClH,UAAQ,IAAI,gBAAgB,OAAO,gBAAgB,KAAM,QAAQ,EAAE,CAAC,GAAG;AACvE,UAAQ,IAAI,aAAa,OAAO,WAAW,IAAI,iBAAiB,gBAAgB,IAAI;AAEpF,MAAI,QAAQ,QAAQ;AAClB,iBAAc,QAAQ,QAAQ,KAAK,UAAU,QAAQ,MAAM,EAAE,EAAE,QAAQ;AACvE,WAAQ,IAAI,qBAAqB,QAAQ,OAAO,IAAI;;AAGtD,UAAQ,KAAK,OAAO,WAAW,IAAI,IAAI,EAAE;AACzC;;AAIF,SAAQ,IAAI,yCAAyC;CAErD,MAAM,KAAK,IAAI,OAAO;EAAE,OAAO;EAAM,eAAe;EAAG,CAAC;CACxD,MAAM,WAAW,IAAI,cAAc;CACnC,MAAM,UAAU,IAAI,kBAAkB;AAEtC,OAAM,GAAG,YAAY,SAAS;AAC9B,OAAM,GAAG,SAAS,QAAQ;AAE1B,IAAG,YAAY;EACb,MAAM;EACN,SAAS;EACT,aAAa;EACb,UAAU,CAAC,kBAAkB,UAAU;EACvC,OAAO,CAAC,QAAQ,MAAM,WAAW,QAAQ,MAAM,gBAAgB;EAC/D,OAAO,CAAC;GACN,MAAM;GACN,WAAW,OAAO,MAAW,gBAAuB;IAClD,MAAM,QAAQ,YAAY,MAAK,MAAK,EAAE,SAAS,YAAY;AAC3D,WAAO,OAAO,WAAW,MAAM,QAAQ,aAAa,MAAM,QAAQ,aAAa;;GAEjF,QAAQ,YAAY;AAElB,WAAO;KAAE,MAAM;KAAoB,OADlB,MAAM,QAAQ,MAAM,gBAAgB,QAAQ;MAAE,SAAS;MAAI,WAAW;MAAI,CAAC,EAC1C;KAAM,SAAS;KAAI,WAAW;KAAI;;GAEvF,CAAC;EACH,CAAC;CAEF,IAAI,YAAY;CAChB,MAAM,QAAQ;CACd,MAAM,OAAO,IAAI,SAAc,YAAW;AACxC,KAAG,GAAG,2BAA2B;AAAE,OAAI,EAAE,aAAa,MAAO,UAAS;IAAI;GAC1E;AAEF,OAAM,GAAG,OAAO;AAShB,MAAK,MAAM,KAPO;EAChB;GAAE,OAAO;GAAM,KAAK;GAA6B,OAAO;GAAqC;EAC7F;GAAE,OAAO;GAAM,KAAK;GAAsB,OAAO;GAAiB;EAClE;GAAE,OAAO;GAAM,KAAK;GAA0B,OAAO;GAAmB;EACxE;GAAE,OAAO;GAAM,KAAK;GAAU,OAAO;GAAoB;EAC1D,EAE0B;AACzB,UAAQ,IAAI,MAAM,iBAAiB,CAAC,UAAU,EAAE,QAAQ;AACxD,WAAS,wBAAwB,EAAE,OAAO,EAAE,IAAI;AAChD,QAAM,IAAI,SAAQ,MAAK,WAAW,GAAG,IAAI,CAAC;;AAG5C,OAAM;AACN,OAAM,GAAG,MAAM;CAEf,MAAM,YAAY,QAAQ,cAAc;AACxC,SAAQ,IAAI,gBAAgB,UAAU,GAAG,MAAM,qBAAqB;AACpE,SAAQ,IAAI,wBAAwB,UAAU,SAAS;AACvD,SAAQ,IAAI,aAAa,cAAc,QAAQ,WAAW,SAAS,IAAI;AAEvE,SAAQ,KAAK,cAAc,QAAQ,IAAI,EAAE"}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import { n as readConfig } from "./index.js";
|
|
2
|
+
import { t as formatTimestamp } from "./utils-BuV4q7f6.js";
|
|
3
|
+
|
|
4
|
+
//#region src/commands/test-suite.ts
|
|
5
|
+
async function runTestSuite(file, options) {
|
|
6
|
+
const { Operor } = await import("@operor/core");
|
|
7
|
+
const { MockProvider } = await import("@operor/provider-mock");
|
|
8
|
+
const { CSVLoader, TestSuiteRunner, SkillTestHarness } = await import("@operor/testing");
|
|
9
|
+
let testCases;
|
|
10
|
+
try {
|
|
11
|
+
testCases = await CSVLoader.fromFile(file);
|
|
12
|
+
} catch (err) {
|
|
13
|
+
if (err instanceof Error && "code" in err && err.code === "ENOENT") {
|
|
14
|
+
console.error(`\n Test file not found: ${file}\n`);
|
|
15
|
+
console.error(` To get started, create a CSV file with this format:\n`);
|
|
16
|
+
console.error(` id,question,expected_answer,expected_tools,persona,tags`);
|
|
17
|
+
console.error(` greeting-1,Hello,Hi! How can I help you?,,friendly,greeting`);
|
|
18
|
+
console.error(` order-1,Where is my order?,Let me check.,get_order,helpful,order_tracking\n`);
|
|
19
|
+
console.error(` Required columns: id, question`);
|
|
20
|
+
console.error(` Optional columns: expected_answer, expected_tools, persona, tags\n`);
|
|
21
|
+
console.error(` A sample file is included at tests.csv in the project root.`);
|
|
22
|
+
console.error(` You can also use JSON format — see docs for details.\n`);
|
|
23
|
+
} else console.error(`Failed to load test cases from ${file}: ${err instanceof Error ? err.message : err}`);
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
if (testCases.length === 0) {
|
|
27
|
+
console.error("No test cases found in file.");
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
const os = new Operor({
|
|
31
|
+
debug: false,
|
|
32
|
+
batchWindowMs: 0
|
|
33
|
+
});
|
|
34
|
+
const provider = new MockProvider();
|
|
35
|
+
await os.addProvider(provider);
|
|
36
|
+
let agent;
|
|
37
|
+
let allTools = [];
|
|
38
|
+
if (options.real || options.dryRun) {
|
|
39
|
+
console.error("--real/--dry-run mode has been removed. Configure MCP skills in mcp.json instead.");
|
|
40
|
+
console.error("Use the mock-based test suite (default) or Docker E2E tests for integration testing.");
|
|
41
|
+
process.exit(1);
|
|
42
|
+
} else {
|
|
43
|
+
const { MockShopifySkill } = await import("@operor/testing");
|
|
44
|
+
const shopify = new MockShopifySkill();
|
|
45
|
+
await os.addSkill(shopify);
|
|
46
|
+
allTools = [
|
|
47
|
+
shopify.tools.get_order,
|
|
48
|
+
shopify.tools.create_discount,
|
|
49
|
+
shopify.tools.search_products
|
|
50
|
+
];
|
|
51
|
+
agent = os.createAgent({
|
|
52
|
+
name: "Test Agent",
|
|
53
|
+
purpose: "Handle customer support queries",
|
|
54
|
+
personality: "helpful and solution-focused",
|
|
55
|
+
triggers: ["order_tracking", "general"],
|
|
56
|
+
tools: allTools,
|
|
57
|
+
rules: [{
|
|
58
|
+
name: "Auto-compensation",
|
|
59
|
+
condition: async (_ctx, toolResults) => {
|
|
60
|
+
const order = toolResults.find((t) => t.name === "get_order");
|
|
61
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
62
|
+
},
|
|
63
|
+
action: async () => {
|
|
64
|
+
return {
|
|
65
|
+
type: "discount_created",
|
|
66
|
+
code: (await shopify.tools.create_discount.execute({
|
|
67
|
+
percent: 10,
|
|
68
|
+
validDays: 30
|
|
69
|
+
})).code,
|
|
70
|
+
percent: 10,
|
|
71
|
+
validDays: 30
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}]
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
let llm;
|
|
78
|
+
if (options.llm) {
|
|
79
|
+
const config = readConfig();
|
|
80
|
+
if (!config.LLM_PROVIDER || !config.LLM_API_KEY) {
|
|
81
|
+
console.error("--llm requires LLM_PROVIDER and LLM_API_KEY in .env. Run \"operor setup\" or \"operor config set\" to configure.");
|
|
82
|
+
process.exit(1);
|
|
83
|
+
}
|
|
84
|
+
const { AIProvider } = await import("@operor/llm");
|
|
85
|
+
llm = new AIProvider({
|
|
86
|
+
provider: config.LLM_PROVIDER,
|
|
87
|
+
apiKey: config.LLM_API_KEY,
|
|
88
|
+
model: config.LLM_MODEL
|
|
89
|
+
});
|
|
90
|
+
if (!options.json) console.log(`\n LLM mode: ${llm.getProviderName()} (${llm.getModelName()})`);
|
|
91
|
+
agent.process = async (context) => {
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
const messages = [
|
|
94
|
+
{
|
|
95
|
+
role: "system",
|
|
96
|
+
content: `You are a ${agent.config.personality} customer support agent. ${agent.config.purpose}.`
|
|
97
|
+
},
|
|
98
|
+
...context.history.map((m) => ({
|
|
99
|
+
role: m.role,
|
|
100
|
+
content: m.content
|
|
101
|
+
})),
|
|
102
|
+
{
|
|
103
|
+
role: "user",
|
|
104
|
+
content: context.currentMessage.text
|
|
105
|
+
}
|
|
106
|
+
];
|
|
107
|
+
const toolCalls = [];
|
|
108
|
+
let finalText = "";
|
|
109
|
+
let iterations = 0;
|
|
110
|
+
const maxIterations = 5;
|
|
111
|
+
while (iterations < maxIterations) {
|
|
112
|
+
iterations++;
|
|
113
|
+
const response = await llm.complete(messages, { tools: allTools.map((t) => ({
|
|
114
|
+
name: t.name,
|
|
115
|
+
description: t.description,
|
|
116
|
+
parameters: t.parameters
|
|
117
|
+
})) });
|
|
118
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
119
|
+
const executedTools = [];
|
|
120
|
+
for (const tc of response.toolCalls) {
|
|
121
|
+
const tool = allTools.find((t) => t.name === tc.name);
|
|
122
|
+
if (!tool) continue;
|
|
123
|
+
try {
|
|
124
|
+
const result = await tool.execute(tc.arguments);
|
|
125
|
+
toolCalls.push({
|
|
126
|
+
id: tc.id,
|
|
127
|
+
name: tc.name,
|
|
128
|
+
params: tc.arguments,
|
|
129
|
+
result,
|
|
130
|
+
success: true
|
|
131
|
+
});
|
|
132
|
+
executedTools.push({
|
|
133
|
+
name: tc.name,
|
|
134
|
+
result,
|
|
135
|
+
success: true
|
|
136
|
+
});
|
|
137
|
+
} catch (err) {
|
|
138
|
+
toolCalls.push({
|
|
139
|
+
id: tc.id,
|
|
140
|
+
name: tc.name,
|
|
141
|
+
params: tc.arguments,
|
|
142
|
+
result: null,
|
|
143
|
+
success: false,
|
|
144
|
+
error: err.message
|
|
145
|
+
});
|
|
146
|
+
executedTools.push({
|
|
147
|
+
name: tc.name,
|
|
148
|
+
result: null,
|
|
149
|
+
success: false,
|
|
150
|
+
error: err.message
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const toolResultSummary = executedTools.map((tc) => `[Tool ${tc.name}]: ${JSON.stringify(tc.success ? tc.result : { error: tc.error })}`).join("\n");
|
|
155
|
+
messages.push({
|
|
156
|
+
role: "assistant",
|
|
157
|
+
content: `I'll call ${executedTools.map((tc) => tc.name).join(", ")} to help with that.`
|
|
158
|
+
}, {
|
|
159
|
+
role: "user",
|
|
160
|
+
content: `Tool results:\n${toolResultSummary}\n\nPlease use these results to respond to the customer.`
|
|
161
|
+
});
|
|
162
|
+
} else {
|
|
163
|
+
finalText = response.text;
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
text: finalText,
|
|
169
|
+
toolCalls,
|
|
170
|
+
duration: Date.now() - startTime
|
|
171
|
+
};
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
await os.start();
|
|
175
|
+
if (!options.json) console.log(`\n Running ${testCases.length} test case(s) from ${file}...\n`);
|
|
176
|
+
const result = await new TestSuiteRunner({
|
|
177
|
+
agentOS: os,
|
|
178
|
+
llm,
|
|
179
|
+
timeout: options.timeout ?? 1e4,
|
|
180
|
+
strategy: options.strategy
|
|
181
|
+
}).runSuite(testCases);
|
|
182
|
+
await os.stop();
|
|
183
|
+
if (options.json) console.log(JSON.stringify(result, null, 2));
|
|
184
|
+
else {
|
|
185
|
+
for (const r of result.results) {
|
|
186
|
+
const status = r.evaluation.passed ? "PASS" : "FAIL";
|
|
187
|
+
const score = r.evaluation.score.toFixed(2);
|
|
188
|
+
const line = ` [${formatTimestamp()}] [${status}] ${r.testCase.id}: ${r.testCase.question} (score: ${score}, ${r.duration}ms)`;
|
|
189
|
+
console.log(line);
|
|
190
|
+
if (options.verbose && !r.evaluation.passed) {
|
|
191
|
+
console.log(` Reason: ${r.evaluation.reasoning}`);
|
|
192
|
+
console.log(` Response: ${r.agentResponse.slice(0, 120)}`);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
const tags = Object.entries(result.byTag);
|
|
196
|
+
if (tags.length > 0) {
|
|
197
|
+
console.log("\n --- By Tag ---");
|
|
198
|
+
for (const [tag, stats] of tags) console.log(` ${tag}: ${stats.passed}/${stats.total} passed (avg score: ${stats.avgScore.toFixed(2)})`);
|
|
199
|
+
}
|
|
200
|
+
console.log(`\n Summary: ${result.passed}/${result.total} passed, avg score: ${result.averageScore.toFixed(2)}`);
|
|
201
|
+
console.log(` Duration: ${(result.totalDuration / 1e3).toFixed(1)}s, Cost: $${result.totalCost.toFixed(4)}`);
|
|
202
|
+
console.log(` Result: ${result.failed === 0 ? "PASSED" : "FAILED"}\n`);
|
|
203
|
+
}
|
|
204
|
+
process.exit(result.failed === 0 ? 0 : 1);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
//#endregion
|
|
208
|
+
export { runTestSuite };
|
|
209
|
+
//# sourceMappingURL=test-suite-D8H_5uKs.js.map
|