@operor/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -0
- package/dist/config-Bn2pbORi.js +34 -0
- package/dist/config-Bn2pbORi.js.map +1 -0
- package/dist/converse-C_PB7-JH.js +142 -0
- package/dist/converse-C_PB7-JH.js.map +1 -0
- package/dist/doctor-98gPl743.js +122 -0
- package/dist/doctor-98gPl743.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2268 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-override-BIQl0V6H.js +445 -0
- package/dist/llm-override-BIQl0V6H.js.map +1 -0
- package/dist/reset-DT8SBgFS.js +87 -0
- package/dist/reset-DT8SBgFS.js.map +1 -0
- package/dist/simulate-BKv62GJc.js +144 -0
- package/dist/simulate-BKv62GJc.js.map +1 -0
- package/dist/status-D6LIZvQa.js +82 -0
- package/dist/status-D6LIZvQa.js.map +1 -0
- package/dist/test-DYjkxbtK.js +177 -0
- package/dist/test-DYjkxbtK.js.map +1 -0
- package/dist/test-suite-D8H_5uKs.js +209 -0
- package/dist/test-suite-D8H_5uKs.js.map +1 -0
- package/dist/utils-BuV4q7f6.js +11 -0
- package/dist/utils-BuV4q7f6.js.map +1 -0
- package/dist/vibe-Bl_js3Jo.js +395 -0
- package/dist/vibe-Bl_js3Jo.js.map +1 -0
- package/package.json +43 -0
- package/src/commands/analytics.ts +408 -0
- package/src/commands/chat.ts +310 -0
- package/src/commands/config.ts +34 -0
- package/src/commands/converse.ts +182 -0
- package/src/commands/doctor.ts +154 -0
- package/src/commands/history.ts +60 -0
- package/src/commands/init.ts +163 -0
- package/src/commands/kb.ts +429 -0
- package/src/commands/llm-override.ts +480 -0
- package/src/commands/reset.ts +72 -0
- package/src/commands/simulate.ts +187 -0
- package/src/commands/status.ts +112 -0
- package/src/commands/test-suite.ts +247 -0
- package/src/commands/test.ts +177 -0
- package/src/commands/vibe.ts +478 -0
- package/src/config.ts +127 -0
- package/src/index.ts +190 -0
- package/src/log-timestamps.ts +26 -0
- package/src/setup.ts +712 -0
- package/src/start.ts +573 -0
- package/src/utils.ts +6 -0
- package/templates/agents/_defaults/SOUL.md +20 -0
- package/templates/agents/_defaults/USER.md +16 -0
- package/templates/agents/customer-support/IDENTITY.md +6 -0
- package/templates/agents/customer-support/INSTRUCTIONS.md +79 -0
- package/templates/agents/customer-support/SOUL.md +26 -0
- package/templates/agents/faq-bot/IDENTITY.md +6 -0
- package/templates/agents/faq-bot/INSTRUCTIONS.md +53 -0
- package/templates/agents/faq-bot/SOUL.md +19 -0
- package/templates/agents/sales/IDENTITY.md +6 -0
- package/templates/agents/sales/INSTRUCTIONS.md +67 -0
- package/templates/agents/sales/SOUL.md +20 -0
- package/tsconfig.json +9 -0
- package/tsdown.config.ts +13 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { readConfig } from '../config.js';
|
|
2
|
+
import { writeFileSync } from 'fs';
|
|
3
|
+
import { formatTimestamp } from '../utils.js';
|
|
4
|
+
|
|
5
|
+
export async function runSimulate(options: {
|
|
6
|
+
tests?: string[];
|
|
7
|
+
scenarios?: string[];
|
|
8
|
+
conversations?: number;
|
|
9
|
+
strategy?: 'exact' | 'similarity' | 'llm_judge';
|
|
10
|
+
real?: boolean;
|
|
11
|
+
allowWrites?: boolean;
|
|
12
|
+
dryRun?: boolean;
|
|
13
|
+
timeout?: number;
|
|
14
|
+
parallel?: boolean;
|
|
15
|
+
json?: boolean;
|
|
16
|
+
output?: string;
|
|
17
|
+
}): Promise<void> {
|
|
18
|
+
const config = readConfig();
|
|
19
|
+
const clack = await import('@clack/prompts');
|
|
20
|
+
|
|
21
|
+
const { Operor } = await import('@operor/core');
|
|
22
|
+
const { MockProvider } = await import('@operor/provider-mock');
|
|
23
|
+
const {
|
|
24
|
+
MockShopifySkill,
|
|
25
|
+
SimulationRunner,
|
|
26
|
+
ECOMMERCE_SCENARIOS,
|
|
27
|
+
SkillTestHarness,
|
|
28
|
+
} = await import('@operor/testing');
|
|
29
|
+
|
|
30
|
+
// Optionally set up LLM if configured
|
|
31
|
+
let llm: any;
|
|
32
|
+
if (config.LLM_PROVIDER && config.LLM_API_KEY) {
|
|
33
|
+
const { AIProvider } = await import('@operor/llm');
|
|
34
|
+
llm = new AIProvider({
|
|
35
|
+
provider: config.LLM_PROVIDER as any,
|
|
36
|
+
apiKey: config.LLM_API_KEY,
|
|
37
|
+
model: config.LLM_MODEL,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Determine conversation scenarios
|
|
42
|
+
let conversationScenarios: any = undefined;
|
|
43
|
+
if (options.scenarios) {
|
|
44
|
+
if (options.scenarios.includes('all')) {
|
|
45
|
+
conversationScenarios = 'builtin';
|
|
46
|
+
} else {
|
|
47
|
+
conversationScenarios = ECOMMERCE_SCENARIOS.filter((s) =>
|
|
48
|
+
options.scenarios!.some((name) =>
|
|
49
|
+
s.name.toLowerCase().includes(name.toLowerCase()) ||
|
|
50
|
+
s.id.toLowerCase().includes(name.toLowerCase())
|
|
51
|
+
)
|
|
52
|
+
);
|
|
53
|
+
if (conversationScenarios.length === 0) {
|
|
54
|
+
console.error(`No scenarios matching: ${options.scenarios.join(', ')}`);
|
|
55
|
+
console.error('Available scenarios:');
|
|
56
|
+
ECOMMERCE_SCENARIOS.forEach((s) => console.error(` - ${s.id}: ${s.name}`));
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
// Default: use all built-in scenarios
|
|
62
|
+
conversationScenarios = 'builtin';
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Set up Operor with appropriate skills
|
|
66
|
+
const os = new Operor({ debug: false, batchWindowMs: 0 });
|
|
67
|
+
const provider = new MockProvider();
|
|
68
|
+
const shopify = new MockShopifySkill();
|
|
69
|
+
|
|
70
|
+
// Wrap skill with safety harness if --real or --dry-run
|
|
71
|
+
let skill: any = shopify;
|
|
72
|
+
if (options.real || options.dryRun) {
|
|
73
|
+
skill = new SkillTestHarness(shopify, {
|
|
74
|
+
allowWrites: options.allowWrites ?? false,
|
|
75
|
+
dryRun: options.dryRun ?? false,
|
|
76
|
+
});
|
|
77
|
+
await skill.authenticate();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
await os.addProvider(provider);
|
|
81
|
+
await os.addSkill(skill);
|
|
82
|
+
|
|
83
|
+
const allTools = [shopify.tools.get_order, shopify.tools.create_discount, shopify.tools.search_products];
|
|
84
|
+
|
|
85
|
+
const agent = os.createAgent({
|
|
86
|
+
name: 'Test Agent',
|
|
87
|
+
purpose: 'Handle customer support conversations',
|
|
88
|
+
personality: 'empathetic and solution-focused',
|
|
89
|
+
triggers: ['order_tracking', 'general'],
|
|
90
|
+
tools: allTools,
|
|
91
|
+
rules: [{
|
|
92
|
+
name: 'Auto-compensation',
|
|
93
|
+
condition: async (_ctx: any, toolResults: any[]) => {
|
|
94
|
+
const order = toolResults.find((t) => t.name === 'get_order');
|
|
95
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
96
|
+
},
|
|
97
|
+
action: async () => {
|
|
98
|
+
const discount = await shopify.tools.create_discount.execute({ percent: 10, validDays: 30 });
|
|
99
|
+
return { type: 'discount_created', code: discount.code, percent: 10, validDays: 30 };
|
|
100
|
+
},
|
|
101
|
+
}],
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Set up Knowledge Base if enabled
|
|
105
|
+
let kbRuntime: any;
|
|
106
|
+
if (config.KB_ENABLED === 'true') {
|
|
107
|
+
try {
|
|
108
|
+
const { SQLiteKnowledgeStore, EmbeddingService, RetrievalPipeline } = await import('@operor/knowledge');
|
|
109
|
+
const embedder = new EmbeddingService({
|
|
110
|
+
provider: (config.KB_EMBEDDING_PROVIDER || config.LLM_PROVIDER) as any,
|
|
111
|
+
apiKey: config.KB_EMBEDDING_API_KEY || config.LLM_API_KEY || '',
|
|
112
|
+
model: config.KB_EMBEDDING_MODEL,
|
|
113
|
+
});
|
|
114
|
+
const kbStore = new SQLiteKnowledgeStore(config.KB_DB_PATH || './knowledge.db', embedder.dimensions);
|
|
115
|
+
await kbStore.initialize();
|
|
116
|
+
const retrieval = new RetrievalPipeline(kbStore, embedder);
|
|
117
|
+
kbRuntime = { retrieve: (q: string) => retrieval.retrieve(q) };
|
|
118
|
+
console.log('[Operor] 📚 Knowledge Base enabled for simulation');
|
|
119
|
+
} catch (kbError: any) {
|
|
120
|
+
console.warn('[Operor] ⚠️ Failed to initialize Knowledge Base:', kbError.message);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Override agent.process with LLM-based implementation if LLM is configured
|
|
125
|
+
if (llm) {
|
|
126
|
+
const { applyLLMOverride } = await import('./llm-override.js');
|
|
127
|
+
applyLLMOverride(agent, llm, allTools, { kbRuntime });
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
await os.start();
|
|
131
|
+
|
|
132
|
+
const totalConversations = options.conversations ?? 10;
|
|
133
|
+
|
|
134
|
+
// Create simulation runner
|
|
135
|
+
const runner = new SimulationRunner({
|
|
136
|
+
agentOS: os,
|
|
137
|
+
config: {
|
|
138
|
+
testSuiteFiles: options.tests,
|
|
139
|
+
conversationScenarios,
|
|
140
|
+
totalConversations,
|
|
141
|
+
evaluationStrategy: options.strategy,
|
|
142
|
+
integrationMode: options.real ? 'real' : options.dryRun ? 'dry-run' : 'mock',
|
|
143
|
+
allowWrites: options.allowWrites,
|
|
144
|
+
timeout: options.timeout,
|
|
145
|
+
parallel: options.parallel,
|
|
146
|
+
},
|
|
147
|
+
llm,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
if (!options.json) {
|
|
151
|
+
clack.intro('operor simulate');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const spinner = !options.json ? clack.spinner() : null;
|
|
155
|
+
spinner?.start(`Running simulation (0/${totalConversations} conversations)...`);
|
|
156
|
+
|
|
157
|
+
const report = await runner.run((completed, total, result) => {
|
|
158
|
+
const status = result.passed ? 'PASS' : 'FAIL';
|
|
159
|
+
spinner?.message(`[${formatTimestamp()}] ${completed}/${total} — ${result.scenario.name} [${status}]`);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
spinner?.stop(`Simulation complete: ${report.passed}/${report.totalConversations} passed`);
|
|
163
|
+
await os.stop();
|
|
164
|
+
|
|
165
|
+
// Output results
|
|
166
|
+
if (options.json) {
|
|
167
|
+
const jsonOutput = JSON.stringify(report, null, 2);
|
|
168
|
+
if (options.output) {
|
|
169
|
+
writeFileSync(options.output, jsonOutput, 'utf-8');
|
|
170
|
+
console.log(`Report saved to ${options.output}`);
|
|
171
|
+
} else {
|
|
172
|
+
console.log(jsonOutput);
|
|
173
|
+
}
|
|
174
|
+
} else {
|
|
175
|
+
const formatted = SimulationRunner.formatReport(report);
|
|
176
|
+
console.log('\n' + formatted);
|
|
177
|
+
|
|
178
|
+
if (options.output) {
|
|
179
|
+
writeFileSync(options.output, JSON.stringify(report, null, 2), 'utf-8');
|
|
180
|
+
clack.note(`Detailed report saved to ${options.output}`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
clack.outro(report.overallPassed ? 'Simulation PASSED' : 'Simulation FAILED');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
process.exit(report.overallPassed ? 0 : 1);
|
|
187
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import { readConfig } from '../config.js';
|
|
3
|
+
|
|
4
|
+
export async function runStatus(): Promise<void> {
|
|
5
|
+
const config = readConfig();
|
|
6
|
+
|
|
7
|
+
console.log('\n Operor Status\n');
|
|
8
|
+
|
|
9
|
+
// LLM
|
|
10
|
+
const llmStatus = config.LLM_PROVIDER
|
|
11
|
+
? `${config.LLM_PROVIDER} (${config.LLM_MODEL || 'default'})`
|
|
12
|
+
: 'Not configured';
|
|
13
|
+
console.log(` LLM: ${config.LLM_PROVIDER ? '✓' : '✗'} ${llmStatus}`);
|
|
14
|
+
|
|
15
|
+
// Channel
|
|
16
|
+
const channelStatus = config.CHANNEL || 'Not configured';
|
|
17
|
+
console.log(` Channel: ${config.CHANNEL ? '✓' : '✗'} ${channelStatus}`);
|
|
18
|
+
|
|
19
|
+
// Skills (MCP)
|
|
20
|
+
if (config.SKILLS_ENABLED !== 'false') {
|
|
21
|
+
try {
|
|
22
|
+
const { loadSkillsConfig } = await import('@operor/skills');
|
|
23
|
+
const skillsConfig = loadSkillsConfig();
|
|
24
|
+
const enabledSkills = skillsConfig.skills.filter((s: any) => s.enabled !== false);
|
|
25
|
+
if (enabledSkills.length > 0) {
|
|
26
|
+
console.log(` Skills: ✓ ${enabledSkills.length} MCP skill(s)`);
|
|
27
|
+
for (const skill of enabledSkills) {
|
|
28
|
+
const transport = skill.transport || 'stdio';
|
|
29
|
+
const target = skill.command || skill.url || '?';
|
|
30
|
+
console.log(` └─ ${skill.name} (${transport}: ${target})`);
|
|
31
|
+
}
|
|
32
|
+
} else {
|
|
33
|
+
console.log(` Skills: ─ No skills in mcp.json`);
|
|
34
|
+
}
|
|
35
|
+
} catch {
|
|
36
|
+
console.log(` Skills: ─ No mcp.json found`);
|
|
37
|
+
}
|
|
38
|
+
} else {
|
|
39
|
+
console.log(` Skills: ✗ Disabled`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Analytics
|
|
43
|
+
if (config.ANALYTICS_ENABLED !== 'false') {
|
|
44
|
+
const dbPath = config.ANALYTICS_DB_PATH || './analytics.db';
|
|
45
|
+
const digestStatus = config.ANALYTICS_DIGEST_ENABLED === 'true'
|
|
46
|
+
? `${config.ANALYTICS_DIGEST_SCHEDULE || 'daily'} at ${config.ANALYTICS_DIGEST_TIME || '09:00'}`
|
|
47
|
+
: 'disabled';
|
|
48
|
+
console.log(` Analytics: ✓ Enabled (${dbPath})`);
|
|
49
|
+
console.log(` └─ Digest: ${digestStatus}`);
|
|
50
|
+
} else {
|
|
51
|
+
console.log(` Analytics: ✗ Disabled`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Agents from agents/ directory
|
|
55
|
+
const agentsDir = `${process.cwd()}/agents`;
|
|
56
|
+
if (fs.existsSync(agentsDir)) {
|
|
57
|
+
try {
|
|
58
|
+
const { AgentLoader } = await import('@operor/core');
|
|
59
|
+
const loader = new AgentLoader(process.cwd());
|
|
60
|
+
const definitions = await loader.loadAll();
|
|
61
|
+
|
|
62
|
+
if (definitions.length > 0) {
|
|
63
|
+
console.log(`\n Agents: ✓ ${definitions.length} loaded from agents/`);
|
|
64
|
+
for (const def of definitions) {
|
|
65
|
+
const cfg = def.config;
|
|
66
|
+
const details: string[] = [];
|
|
67
|
+
if (cfg.channels?.length) details.push(`channels: ${cfg.channels.join(', ')}`);
|
|
68
|
+
if (cfg.skills?.length) details.push(`skills: ${cfg.skills.join(', ')}`);
|
|
69
|
+
if (cfg.knowledgeBase) details.push('KB');
|
|
70
|
+
if (cfg.priority) details.push(`priority: ${cfg.priority}`);
|
|
71
|
+
const suffix = details.length > 0 ? ` (${details.join(' | ')})` : '';
|
|
72
|
+
console.log(` └─ ${cfg.name}${suffix}`);
|
|
73
|
+
}
|
|
74
|
+
} else {
|
|
75
|
+
console.log(`\n Agents: ✗ agents/ directory found but no valid definitions`);
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
console.log(`\n Agents: ✗ Failed to load agents/`);
|
|
79
|
+
}
|
|
80
|
+
} else {
|
|
81
|
+
console.log(`\n Agents: ─ No agents/ directory (using default single-agent mode)`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Usage summary (last 24h)
|
|
85
|
+
if (config.ANALYTICS_ENABLED !== 'false') {
|
|
86
|
+
try {
|
|
87
|
+
const { SQLiteAnalyticsStore } = await import('@operor/analytics');
|
|
88
|
+
const dbPath = config.ANALYTICS_DB_PATH || './analytics.db';
|
|
89
|
+
if (fs.existsSync(dbPath)) {
|
|
90
|
+
const store = new SQLiteAnalyticsStore(dbPath);
|
|
91
|
+
await store.initialize();
|
|
92
|
+
const now = Date.now();
|
|
93
|
+
const range = { from: now - 24 * 60 * 60 * 1000, to: now };
|
|
94
|
+
const summary = await store.getSummary(range);
|
|
95
|
+
await store.close();
|
|
96
|
+
|
|
97
|
+
if (summary.totalMessages > 0) {
|
|
98
|
+
console.log(` Usage (last 24h)`);
|
|
99
|
+
console.log(` Messages: ${summary.totalMessages}`);
|
|
100
|
+
console.log(` Customers: ${summary.uniqueCustomers}`);
|
|
101
|
+
console.log(` KB answered: ${summary.kbAnsweredPct.toFixed(1)}%`);
|
|
102
|
+
console.log(` Avg response: ${summary.avgResponseTime < 1000 ? Math.round(summary.avgResponseTime) + 'ms' : (summary.avgResponseTime / 1000).toFixed(1) + 's'}`);
|
|
103
|
+
console.log();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
} catch {
|
|
107
|
+
// Analytics DB not available — skip silently
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
console.log();
|
|
112
|
+
}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { formatTimestamp } from '../utils.js';
|
|
2
|
+
import { readConfig } from '../config.js';
|
|
3
|
+
|
|
4
|
+
export async function runTestSuite(file: string, options: {
|
|
5
|
+
strategy?: 'exact' | 'contains' | 'similarity' | 'semantic';
|
|
6
|
+
timeout?: number;
|
|
7
|
+
parallel?: boolean;
|
|
8
|
+
verbose?: boolean;
|
|
9
|
+
json?: boolean;
|
|
10
|
+
real?: boolean;
|
|
11
|
+
allowWrites?: boolean;
|
|
12
|
+
dryRun?: boolean;
|
|
13
|
+
llm?: boolean;
|
|
14
|
+
}): Promise<void> {
|
|
15
|
+
const { Operor } = await import('@operor/core');
|
|
16
|
+
const { MockProvider } = await import('@operor/provider-mock');
|
|
17
|
+
const { CSVLoader, TestSuiteRunner, SkillTestHarness } = await import('@operor/testing');
|
|
18
|
+
|
|
19
|
+
// Load test cases from file
|
|
20
|
+
let testCases;
|
|
21
|
+
try {
|
|
22
|
+
testCases = await CSVLoader.fromFile(file);
|
|
23
|
+
} catch (err) {
|
|
24
|
+
const isNotFound = err instanceof Error && 'code' in err && (err as any).code === 'ENOENT';
|
|
25
|
+
if (isNotFound) {
|
|
26
|
+
console.error(`\n Test file not found: ${file}\n`);
|
|
27
|
+
console.error(` To get started, create a CSV file with this format:\n`);
|
|
28
|
+
console.error(` id,question,expected_answer,expected_tools,persona,tags`);
|
|
29
|
+
console.error(` greeting-1,Hello,Hi! How can I help you?,,friendly,greeting`);
|
|
30
|
+
console.error(` order-1,Where is my order?,Let me check.,get_order,helpful,order_tracking\n`);
|
|
31
|
+
console.error(` Required columns: id, question`);
|
|
32
|
+
console.error(` Optional columns: expected_answer, expected_tools, persona, tags\n`);
|
|
33
|
+
console.error(` A sample file is included at tests.csv in the project root.`);
|
|
34
|
+
console.error(` You can also use JSON format — see docs for details.\n`);
|
|
35
|
+
} else {
|
|
36
|
+
console.error(`Failed to load test cases from ${file}: ${err instanceof Error ? err.message : err}`);
|
|
37
|
+
}
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (testCases.length === 0) {
|
|
42
|
+
console.error('No test cases found in file.');
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Set up Operor
|
|
47
|
+
const os = new Operor({ debug: false, batchWindowMs: 0 });
|
|
48
|
+
const provider = new MockProvider();
|
|
49
|
+
await os.addProvider(provider);
|
|
50
|
+
|
|
51
|
+
let harness: InstanceType<typeof SkillTestHarness> | null = null;
|
|
52
|
+
let agent: any;
|
|
53
|
+
let allTools: any[] = [];
|
|
54
|
+
|
|
55
|
+
if (options.real || options.dryRun) {
|
|
56
|
+
// Real integration testing now uses MCP skills via mcp.json
|
|
57
|
+
console.error('--real/--dry-run mode has been removed. Configure MCP skills in mcp.json instead.');
|
|
58
|
+
console.error('Use the mock-based test suite (default) or Docker E2E tests for integration testing.');
|
|
59
|
+
process.exit(1);
|
|
60
|
+
} else {
|
|
61
|
+
// Default: use mocks
|
|
62
|
+
const { MockShopifySkill } = await import('@operor/testing');
|
|
63
|
+
const shopify = new MockShopifySkill();
|
|
64
|
+
await os.addSkill(shopify);
|
|
65
|
+
|
|
66
|
+
allTools = [shopify.tools.get_order, shopify.tools.create_discount, shopify.tools.search_products];
|
|
67
|
+
|
|
68
|
+
agent = os.createAgent({
|
|
69
|
+
name: 'Test Agent',
|
|
70
|
+
purpose: 'Handle customer support queries',
|
|
71
|
+
personality: 'helpful and solution-focused',
|
|
72
|
+
triggers: ['order_tracking', 'general'],
|
|
73
|
+
tools: allTools,
|
|
74
|
+
rules: [{
|
|
75
|
+
name: 'Auto-compensation',
|
|
76
|
+
condition: async (_ctx: any, toolResults: any[]) => {
|
|
77
|
+
const order = toolResults.find((t) => t.name === 'get_order');
|
|
78
|
+
return order?.success && order.result?.isDelayed && order.result?.delayDays >= 2;
|
|
79
|
+
},
|
|
80
|
+
action: async () => {
|
|
81
|
+
const discount = await shopify.tools.create_discount.execute({ percent: 10, validDays: 30 });
|
|
82
|
+
return { type: 'discount_created', code: discount.code, percent: 10, validDays: 30 };
|
|
83
|
+
},
|
|
84
|
+
}],
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Set up LLM-based agent processing if --llm flag is set
|
|
89
|
+
let llm: any;
|
|
90
|
+
if (options.llm) {
|
|
91
|
+
const config = readConfig();
|
|
92
|
+
|
|
93
|
+
if (!config.LLM_PROVIDER || !config.LLM_API_KEY) {
|
|
94
|
+
console.error('--llm requires LLM_PROVIDER and LLM_API_KEY in .env. Run "operor setup" or "operor config set" to configure.');
|
|
95
|
+
process.exit(1);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const { AIProvider } = await import('@operor/llm');
|
|
99
|
+
llm = new AIProvider({
|
|
100
|
+
provider: config.LLM_PROVIDER as any,
|
|
101
|
+
apiKey: config.LLM_API_KEY,
|
|
102
|
+
model: config.LLM_MODEL,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
if (!options.json) {
|
|
106
|
+
console.log(`\n LLM mode: ${llm.getProviderName()} (${llm.getModelName()})`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Override agent.process with LLM-based implementation
|
|
110
|
+
agent.process = async (context: any) => {
|
|
111
|
+
const startTime = Date.now();
|
|
112
|
+
const systemMessage = `You are a ${agent.config.personality} customer support agent. ${agent.config.purpose}.`;
|
|
113
|
+
const messages = [
|
|
114
|
+
{ role: 'system' as const, content: systemMessage },
|
|
115
|
+
...context.history.map((m: any) => ({
|
|
116
|
+
role: m.role as 'user' | 'assistant',
|
|
117
|
+
content: m.content,
|
|
118
|
+
})),
|
|
119
|
+
{ role: 'user' as const, content: context.currentMessage.text },
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
const toolCalls: any[] = [];
|
|
123
|
+
let finalText = '';
|
|
124
|
+
let iterations = 0;
|
|
125
|
+
const maxIterations = 5;
|
|
126
|
+
|
|
127
|
+
while (iterations < maxIterations) {
|
|
128
|
+
iterations++;
|
|
129
|
+
|
|
130
|
+
const response = await llm.complete(messages, {
|
|
131
|
+
tools: allTools.map((t) => ({
|
|
132
|
+
name: t.name,
|
|
133
|
+
description: t.description,
|
|
134
|
+
parameters: t.parameters,
|
|
135
|
+
})),
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
139
|
+
const executedTools: Array<{ name: string; result: any; success: boolean; error?: string }> = [];
|
|
140
|
+
|
|
141
|
+
for (const tc of response.toolCalls) {
|
|
142
|
+
const tool = allTools.find((t) => t.name === tc.name);
|
|
143
|
+
if (!tool) continue;
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
const result = await tool.execute(tc.arguments);
|
|
147
|
+
toolCalls.push({ id: tc.id, name: tc.name, params: tc.arguments, result, success: true });
|
|
148
|
+
executedTools.push({ name: tc.name, result, success: true });
|
|
149
|
+
} catch (err: any) {
|
|
150
|
+
toolCalls.push({ id: tc.id, name: tc.name, params: tc.arguments, result: null, success: false, error: err.message });
|
|
151
|
+
executedTools.push({ name: tc.name, result: null, success: false, error: err.message });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const toolResultSummary = executedTools.map(tc =>
|
|
156
|
+
`[Tool ${tc.name}]: ${JSON.stringify(tc.success ? tc.result : { error: tc.error })}`
|
|
157
|
+
).join('\n');
|
|
158
|
+
|
|
159
|
+
messages.push(
|
|
160
|
+
{ role: 'assistant', content: `I'll call ${executedTools.map(tc => tc.name).join(', ')} to help with that.` },
|
|
161
|
+
{ role: 'user', content: `Tool results:\n${toolResultSummary}\n\nPlease use these results to respond to the customer.` }
|
|
162
|
+
);
|
|
163
|
+
} else {
|
|
164
|
+
finalText = response.text;
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
text: finalText,
|
|
171
|
+
toolCalls,
|
|
172
|
+
duration: Date.now() - startTime,
|
|
173
|
+
};
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
await os.start();
|
|
178
|
+
|
|
179
|
+
if (!options.json) {
|
|
180
|
+
console.log(`\n Running ${testCases.length} test case(s) from ${file}...\n`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const runner = new TestSuiteRunner({
|
|
184
|
+
agentOS: os,
|
|
185
|
+
llm,
|
|
186
|
+
timeout: options.timeout ?? 10000,
|
|
187
|
+
strategy: options.strategy,
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const result = await runner.runSuite(testCases);
|
|
191
|
+
await os.stop();
|
|
192
|
+
|
|
193
|
+
// Output results
|
|
194
|
+
if (options.json) {
|
|
195
|
+
console.log(JSON.stringify(result, null, 2));
|
|
196
|
+
} else {
|
|
197
|
+
// Per-test results
|
|
198
|
+
for (const r of result.results) {
|
|
199
|
+
const status = r.evaluation.passed ? 'PASS' : 'FAIL';
|
|
200
|
+
const score = r.evaluation.score.toFixed(2);
|
|
201
|
+
const line = ` [${formatTimestamp()}] [${status}] ${r.testCase.id}: ${r.testCase.question} (score: ${score}, ${r.duration}ms)`;
|
|
202
|
+
console.log(line);
|
|
203
|
+
|
|
204
|
+
if (options.verbose && !r.evaluation.passed) {
|
|
205
|
+
console.log(` Reason: ${r.evaluation.reasoning}`);
|
|
206
|
+
console.log(` Response: ${r.agentResponse.slice(0, 120)}`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Tag breakdown
|
|
211
|
+
const tags = Object.entries(result.byTag);
|
|
212
|
+
if (tags.length > 0) {
|
|
213
|
+
console.log('\n --- By Tag ---');
|
|
214
|
+
for (const [tag, stats] of tags) {
|
|
215
|
+
console.log(` ${tag}: ${stats.passed}/${stats.total} passed (avg score: ${stats.avgScore.toFixed(2)})`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Audit log summary (when using SkillTestHarness)
|
|
220
|
+
if (harness) {
|
|
221
|
+
const log = harness.getAuditLog();
|
|
222
|
+
if (log.length > 0) {
|
|
223
|
+
console.log('\n --- Skill Call Log ---');
|
|
224
|
+
const reads = log.filter((e) => e.classification === 'read');
|
|
225
|
+
const writes = log.filter((e) => e.classification === 'write');
|
|
226
|
+
const destructive = log.filter((e) => e.classification === 'destructive');
|
|
227
|
+
const dryRuns = log.filter((e) => (e.result as any)?.dryRun);
|
|
228
|
+
|
|
229
|
+
console.log(` Total calls: ${log.length} (${reads.length} read, ${writes.length} write, ${destructive.length} destructive)`);
|
|
230
|
+
if (dryRuns.length > 0) {
|
|
231
|
+
console.log(` Dry-run calls: ${dryRuns.length} (not executed)`);
|
|
232
|
+
}
|
|
233
|
+
for (const entry of log) {
|
|
234
|
+
const dryTag = (entry.result as any)?.dryRun ? ' [DRY-RUN]' : '';
|
|
235
|
+
console.log(` ${entry.classification.toUpperCase()} ${entry.name}${dryTag} (${entry.duration}ms)`);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Summary
|
|
241
|
+
console.log(`\n Summary: ${result.passed}/${result.total} passed, avg score: ${result.averageScore.toFixed(2)}`);
|
|
242
|
+
console.log(` Duration: ${(result.totalDuration / 1000).toFixed(1)}s, Cost: $${result.totalCost.toFixed(4)}`);
|
|
243
|
+
console.log(` Result: ${result.failed === 0 ? 'PASSED' : 'FAILED'}\n`);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
process.exit(result.failed === 0 ? 0 : 1);
|
|
247
|
+
}
|