@virtualkitchenco/multiverse-sdk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/langchain.d.ts +65 -0
- package/dist/adapters/langchain.d.ts.map +1 -0
- package/dist/adapters/langchain.js +80 -0
- package/dist/adapters/langchain.js.map +1 -0
- package/dist/ci.d.ts +31 -0
- package/dist/ci.d.ts.map +1 -0
- package/dist/ci.js +105 -0
- package/dist/ci.js.map +1 -0
- package/dist/client.d.ts +136 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +92 -0
- package/dist/client.js.map +1 -0
- package/dist/github.d.ts +35 -0
- package/dist/github.d.ts.map +1 -0
- package/dist/github.js +113 -0
- package/dist/github.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/anthropic.d.ts +18 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +91 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/google.d.ts +18 -0
- package/dist/llm/google.d.ts.map +1 -0
- package/dist/llm/google.js +97 -0
- package/dist/llm/google.js.map +1 -0
- package/dist/llm/index.d.ts +14 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +29 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai.d.ts +18 -0
- package/dist/llm/openai.d.ts.map +1 -0
- package/dist/llm/openai.js +78 -0
- package/dist/llm/openai.js.map +1 -0
- package/dist/llm/provider.d.ts +42 -0
- package/dist/llm/provider.d.ts.map +1 -0
- package/dist/llm/provider.js +7 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/multiverse.d.ts +105 -0
- package/dist/multiverse.d.ts.map +1 -0
- package/dist/multiverse.js +449 -0
- package/dist/multiverse.js.map +1 -0
- package/dist/report.d.ts +30 -0
- package/dist/report.d.ts.map +1 -0
- package/dist/report.js +128 -0
- package/dist/report.js.map +1 -0
- package/dist/simulate.d.ts +49 -0
- package/dist/simulate.d.ts.map +1 -0
- package/dist/simulate.js +476 -0
- package/dist/simulate.js.map +1 -0
- package/package.json +68 -0
package/dist/report.js
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Report Card Generation
|
|
3
|
+
*
|
|
4
|
+
* Generates markdown report cards from test results
|
|
5
|
+
* for posting to PRs and CI output.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Calculate tier from pass rate
|
|
9
|
+
*/
|
|
10
|
+
export function calculateTier(passRate) {
|
|
11
|
+
if (passRate >= 95)
|
|
12
|
+
return 'S';
|
|
13
|
+
if (passRate >= 85)
|
|
14
|
+
return 'A';
|
|
15
|
+
if (passRate >= 70)
|
|
16
|
+
return 'B';
|
|
17
|
+
if (passRate >= 50)
|
|
18
|
+
return 'C';
|
|
19
|
+
return 'D';
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Get emoji for tier
|
|
23
|
+
*/
|
|
24
|
+
function tierEmoji(tier) {
|
|
25
|
+
switch (tier) {
|
|
26
|
+
case 'S': return '๐';
|
|
27
|
+
case 'A': return 'โ
';
|
|
28
|
+
case 'B': return 'โ ๏ธ';
|
|
29
|
+
case 'C': return '๐ถ';
|
|
30
|
+
case 'D': return 'โ';
|
|
31
|
+
default: return 'โ';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Generate markdown report card from test results
|
|
36
|
+
*/
|
|
37
|
+
export function generateReportCard(results, options = {}) {
|
|
38
|
+
const { dashboardUrl, detailed = true } = options;
|
|
39
|
+
const tier = calculateTier(results.passRate);
|
|
40
|
+
const emoji = tierEmoji(tier);
|
|
41
|
+
const lines = [];
|
|
42
|
+
// Header
|
|
43
|
+
lines.push('## ๐ฎ Multiverse Report');
|
|
44
|
+
lines.push('');
|
|
45
|
+
// Summary
|
|
46
|
+
lines.push(`**Tier ${tier}** ${emoji} ยท ${results.passRate}% pass rate`);
|
|
47
|
+
lines.push('');
|
|
48
|
+
// Stats
|
|
49
|
+
const passed = results.runs.filter(r => r.passed).length;
|
|
50
|
+
const failed = results.runs.length - passed;
|
|
51
|
+
lines.push(`| Metric | Value |`);
|
|
52
|
+
lines.push(`|--------|-------|`);
|
|
53
|
+
lines.push(`| Total Runs | ${results.runs.length} |`);
|
|
54
|
+
lines.push(`| Passed | ${passed} |`);
|
|
55
|
+
lines.push(`| Failed | ${failed} |`);
|
|
56
|
+
lines.push(`| Duration | ${(results.duration / 1000).toFixed(1)}s |`);
|
|
57
|
+
lines.push('');
|
|
58
|
+
// Scenario breakdown
|
|
59
|
+
if (detailed && results.runs.length > 0) {
|
|
60
|
+
const scenarioStats = aggregateByScenario(results.runs);
|
|
61
|
+
lines.push('### Scenarios');
|
|
62
|
+
lines.push('');
|
|
63
|
+
lines.push('| Scenario | Passed | Rate |');
|
|
64
|
+
lines.push('|----------|--------|------|');
|
|
65
|
+
for (const [name, stats] of scenarioStats) {
|
|
66
|
+
const rate = Math.round((stats.passed / stats.total) * 100);
|
|
67
|
+
const status = rate >= 80 ? 'โ
' : rate >= 50 ? 'โ ๏ธ' : 'โ';
|
|
68
|
+
lines.push(`| ${name} | ${stats.passed}/${stats.total} | ${status} ${rate}% |`);
|
|
69
|
+
}
|
|
70
|
+
lines.push('');
|
|
71
|
+
}
|
|
72
|
+
// Weak spots
|
|
73
|
+
if (results.weakSpots && results.weakSpots.length > 0) {
|
|
74
|
+
lines.push('### Weak Spots');
|
|
75
|
+
lines.push('');
|
|
76
|
+
for (const spot of results.weakSpots) {
|
|
77
|
+
lines.push(`- **${spot.scenario}**: ${spot.passRate}% pass rate`);
|
|
78
|
+
}
|
|
79
|
+
lines.push('');
|
|
80
|
+
}
|
|
81
|
+
// Dashboard link
|
|
82
|
+
if (dashboardUrl || results.url) {
|
|
83
|
+
const url = dashboardUrl || results.url;
|
|
84
|
+
lines.push(`[View full report โ](${url})`);
|
|
85
|
+
lines.push('');
|
|
86
|
+
}
|
|
87
|
+
// Footer
|
|
88
|
+
lines.push('---');
|
|
89
|
+
lines.push('*Generated by [Multiverse](https://github.com/anthropics/multiverse)*');
|
|
90
|
+
return lines.join('\n');
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Generate short summary for CI output
|
|
94
|
+
*/
|
|
95
|
+
export function generateSummary(results) {
|
|
96
|
+
const tier = calculateTier(results.passRate);
|
|
97
|
+
const passed = results.runs.filter(r => r.passed).length;
|
|
98
|
+
const failed = results.runs.length - passed;
|
|
99
|
+
return `Multiverse: Tier ${tier} (${results.passRate}%) - ${passed} passed, ${failed} failed`;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Aggregate run results by scenario
|
|
103
|
+
*/
|
|
104
|
+
function aggregateByScenario(runs) {
|
|
105
|
+
const stats = new Map();
|
|
106
|
+
for (const run of runs) {
|
|
107
|
+
const name = run.scenario.name;
|
|
108
|
+
const current = stats.get(name) || { passed: 0, total: 0 };
|
|
109
|
+
current.total++;
|
|
110
|
+
if (run.passed)
|
|
111
|
+
current.passed++;
|
|
112
|
+
stats.set(name, current);
|
|
113
|
+
}
|
|
114
|
+
return stats;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Generate badge URL (shields.io style)
|
|
118
|
+
*/
|
|
119
|
+
export function generateBadgeUrl(results) {
|
|
120
|
+
const tier = calculateTier(results.passRate);
|
|
121
|
+
const color = tier === 'S' || tier === 'A' ? 'brightgreen' :
|
|
122
|
+
tier === 'B' ? 'yellow' :
|
|
123
|
+
tier === 'C' ? 'orange' : 'red';
|
|
124
|
+
const label = encodeURIComponent('multiverse');
|
|
125
|
+
const message = encodeURIComponent(`Tier ${tier} (${results.passRate}%)`);
|
|
126
|
+
return `https://img.shields.io/badge/${label}-${message}-${color}`;
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../src/report.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAWH;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,QAAgB;IAC5C,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC;QACrB,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC;QACrB,OAAO,CAAC,CAAC,OAAO,GAAG,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,OAAoB,EACpB,UAAyB,EAAE;IAE3B,MAAM,EAAE,YAAY,EAAE,QAAQ,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IAClD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE9B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,SAAS;IACT,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IACtC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,UAAU;IACV,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,MAAM,KAAK,MAAM,OAAO,CAAC,QAAQ,aAAa,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,QAAQ;IACR,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IAC5C,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjC,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjC,KAAK,CAAC,IAAI,CAAC,kBAAkB,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;IACtD,KAAK,CAAC,IAAI,CAAC,cAAc,MAAM,IAAI,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,cAAc,MAAM,IAAI,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACtE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,qBAAqB;IACrB,IAAI,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAExD,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAE3C,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;YAC5D,MAAM,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;YAC1D,KAAK,CAAC,IAAI,CAAC,KAAK,IAAI,MAAM,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,MAAM,MAAM,IAAI,IAAI,KAAK,CAAC,CAAC;QAClF,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,aAAa;IACb,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACrC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,OAAO,IAAI,CAAC,QAAQ,aAAa,CAAC,CAAC;QACpE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,iBAAiB;IACjB,IAAI,YAAY,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC;QACxC,KAAK,CAAC,IAAI,CAAC,wBAAwB,GAAG,GAAG,CAAC,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,SAAS;IACT,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,KAAK,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;IAEpF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,OAAoB;IAClD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IAE5C,OAAO,oBAAoB,IAAI,KAAK,OAAO,CAAC,QAAQ,QAAQ,MAAM,YAAY,MAAM,SAAS,CAAC;AAChG,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAiB;IAC5C,MAAM,KAAK,GAAG,IAAI,GAAG,EAA6C,CAAC;IAEnE,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC;QAC/B,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QAC3D,OAAO,CAAC,KAAK,EAAE,CAAC;QAChB,IAAI,GAAG,CAAC,MAAM;YAAE,OAAO,CAAC,MAAM,EAAE,CAAC;QACjC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAoB;IACnD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACzB,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAE9C,MAAM,KAAK,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAG,kBAAkB,CAAC,QAAQ,IAAI,KAAK,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC;IAE1E,OAAO,gCAAgC,KAAK,IAAI,OAAO,IAAI,KAAK,EAAE,CAAC;AACrE,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-based tool simulation with two-phase approach
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 (Analysis): LLM analyzes world state vs query to decide action
|
|
5
|
+
* Phase 2 (Execution): Execute based on action (may skip LLM entirely)
|
|
6
|
+
*
|
|
7
|
+
* This ensures consistency - same tool + overlapping queries return consistent data.
|
|
8
|
+
*/
|
|
9
|
+
import type { World, Mutation, Scenario, Invariant, ZodLikeSchema, Effect, WorldStateAccessor } from '@multiverse/core';
|
|
10
|
+
import type { LLMProvider } from './llm/index.js';
|
|
11
|
+
export interface SimulationResult {
|
|
12
|
+
response: unknown;
|
|
13
|
+
mutations: Mutation[];
|
|
14
|
+
analysis?: AnalysisResult;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Conversation entry for context
|
|
18
|
+
*/
|
|
19
|
+
export interface ConversationEntry {
|
|
20
|
+
role: 'user' | 'agent';
|
|
21
|
+
content: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Tool simulation config
|
|
25
|
+
*/
|
|
26
|
+
export interface ToolSimConfig {
|
|
27
|
+
outputSchema?: ZodLikeSchema;
|
|
28
|
+
effects?: (output: unknown, world: WorldStateAccessor) => Effect[];
|
|
29
|
+
invariants?: Invariant[];
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Two-phase simulation types
|
|
33
|
+
*/
|
|
34
|
+
export type SimulationAction = 'return_existing' | 'filter' | 'augment' | 'generate';
|
|
35
|
+
export interface AnalysisResult {
|
|
36
|
+
action: SimulationAction;
|
|
37
|
+
matches: string[];
|
|
38
|
+
collection: string | null;
|
|
39
|
+
gaps: string[];
|
|
40
|
+
reasoning: string;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Simulate a tool call using two-phase approach
|
|
44
|
+
*
|
|
45
|
+
* Phase 1: Analyze world state vs query
|
|
46
|
+
* Phase 2: Execute based on analysis (may skip LLM entirely)
|
|
47
|
+
*/
|
|
48
|
+
export declare function simulateToolCall(toolName: string, toolDescription: string, input: unknown, world: World, llm: LLMProvider, scenario?: Scenario, conversationHistory?: ConversationEntry[], config?: ToolSimConfig): Promise<SimulationResult>;
|
|
49
|
+
//# sourceMappingURL=simulate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"simulate.d.ts","sourceRoot":"","sources":["../src/simulate.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAa,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,kBAAkB,EAAU,MAAM,kBAAkB,CAAC;AAC3I,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAElD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,QAAQ,CAAC,EAAE,cAAc,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,kBAAkB,KAAK,MAAM,EAAE,CAAC;IACnE,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,iBAAiB,GAAG,QAAQ,GAAG,SAAS,GAAG,UAAU,CAAC;AAErF,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,gBAAgB,CAAC;IACzB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AA6ZD;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,OAAO,EACd,KAAK,EAAE,KAAK,EACZ,GAAG,EAAE,WAAW,EAChB,QAAQ,CAAC,EAAE,QAAQ,EACnB,mBAAmB,CAAC,EAAE,iBAAiB,EAAE,EACzC,MAAM,CAAC,EAAE,aAAa,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAS3B"}
|
package/dist/simulate.js
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-based tool simulation with two-phase approach
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 (Analysis): LLM analyzes world state vs query to decide action
|
|
5
|
+
* Phase 2 (Execution): Execute based on action (may skip LLM entirely)
|
|
6
|
+
*
|
|
7
|
+
* This ensures consistency - same tool + overlapping queries return consistent data.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Convert a zod schema to a JSON schema description for the LLM prompt
|
|
11
|
+
*/
|
|
12
|
+
function schemaToDescription(schema) {
|
|
13
|
+
// Try to get the shape from common zod schema structures
|
|
14
|
+
const s = schema;
|
|
15
|
+
// Check for zod's internal shape property
|
|
16
|
+
if (s._def && typeof s._def === 'object') {
|
|
17
|
+
const def = s._def;
|
|
18
|
+
if (def.shape && typeof def.shape === 'function') {
|
|
19
|
+
try {
|
|
20
|
+
const shape = def.shape();
|
|
21
|
+
return JSON.stringify(Object.fromEntries(Object.entries(shape).map(([k, v]) => [k, describeZodType(v)])), null, 2);
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
// Fall through
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
// Handle array types
|
|
28
|
+
if (def.typeName === 'ZodArray' && def.type) {
|
|
29
|
+
return `Array<${schemaToDescription(def.type)}>`;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// Fallback: just indicate there's a schema
|
|
33
|
+
return '(structured output - follow the shape implied by the tool description)';
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Describe a single zod type for the prompt
|
|
37
|
+
*/
|
|
38
|
+
function describeZodType(zodType) {
|
|
39
|
+
const t = zodType;
|
|
40
|
+
if (!t._def)
|
|
41
|
+
return 'unknown';
|
|
42
|
+
const def = t._def;
|
|
43
|
+
const typeName = def.typeName;
|
|
44
|
+
switch (typeName) {
|
|
45
|
+
case 'ZodString':
|
|
46
|
+
return 'string';
|
|
47
|
+
case 'ZodNumber':
|
|
48
|
+
return 'number';
|
|
49
|
+
case 'ZodBoolean':
|
|
50
|
+
return 'boolean';
|
|
51
|
+
case 'ZodArray':
|
|
52
|
+
return `array<${describeZodType(def.type)}>`;
|
|
53
|
+
case 'ZodObject':
|
|
54
|
+
if (def.shape && typeof def.shape === 'function') {
|
|
55
|
+
const shape = def.shape();
|
|
56
|
+
return JSON.stringify(Object.fromEntries(Object.entries(shape).map(([k, v]) => [k, describeZodType(v)])));
|
|
57
|
+
}
|
|
58
|
+
return 'object';
|
|
59
|
+
case 'ZodEnum':
|
|
60
|
+
return `enum(${def.values.join('|')})`;
|
|
61
|
+
case 'ZodOptional':
|
|
62
|
+
return `${describeZodType(def.innerType)}?`;
|
|
63
|
+
default:
|
|
64
|
+
return typeName.replace('Zod', '').toLowerCase();
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Validate invariants against world state
|
|
69
|
+
*/
|
|
70
|
+
function validateInvariants(world, invariants) {
|
|
71
|
+
const errors = [];
|
|
72
|
+
for (const inv of invariants) {
|
|
73
|
+
const collection = world.getCollection(inv.collection);
|
|
74
|
+
if (!collection)
|
|
75
|
+
continue;
|
|
76
|
+
for (const [id, entity] of collection) {
|
|
77
|
+
const fieldValue = entity.data[inv.field];
|
|
78
|
+
if (fieldValue === undefined || fieldValue === null)
|
|
79
|
+
continue;
|
|
80
|
+
// Cast to number for numeric comparisons
|
|
81
|
+
const numFieldValue = fieldValue;
|
|
82
|
+
const numInvValue = inv.value;
|
|
83
|
+
let passes = false;
|
|
84
|
+
switch (inv.condition) {
|
|
85
|
+
case 'gt':
|
|
86
|
+
passes = numFieldValue > numInvValue;
|
|
87
|
+
break;
|
|
88
|
+
case 'gte':
|
|
89
|
+
passes = numFieldValue >= numInvValue;
|
|
90
|
+
break;
|
|
91
|
+
case 'lt':
|
|
92
|
+
passes = numFieldValue < numInvValue;
|
|
93
|
+
break;
|
|
94
|
+
case 'lte':
|
|
95
|
+
passes = numFieldValue <= numInvValue;
|
|
96
|
+
break;
|
|
97
|
+
case 'eq':
|
|
98
|
+
passes = fieldValue === inv.value;
|
|
99
|
+
break;
|
|
100
|
+
case 'neq':
|
|
101
|
+
passes = fieldValue !== inv.value;
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
if (!passes) {
|
|
105
|
+
errors.push(`Invariant violated: ${inv.collection}.${id}.${inv.field} (${fieldValue}) ${inv.condition} ${inv.value}`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return { valid: errors.length === 0, errors };
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Convert Effect to Mutation
|
|
113
|
+
*/
|
|
114
|
+
function effectToMutation(effect) {
|
|
115
|
+
return {
|
|
116
|
+
operation: effect.operation,
|
|
117
|
+
collection: effect.collection,
|
|
118
|
+
id: effect.id,
|
|
119
|
+
data: effect.data,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
// ============================================================================
|
|
123
|
+
// Two-Phase Simulation
|
|
124
|
+
// ============================================================================
|
|
125
|
+
/**
|
|
126
|
+
* Phase 1: Analyze the query against world state
|
|
127
|
+
* Determines what action to take (return existing, filter, augment, or generate)
|
|
128
|
+
*/
|
|
129
|
+
async function analyzeQuery(toolName, toolDescription, input, world, llm) {
|
|
130
|
+
const worldState = world.toJSON();
|
|
131
|
+
const isEmpty = Object.keys(worldState).length === 0 ||
|
|
132
|
+
Object.values(worldState).every(collection => Object.keys(collection).length === 0);
|
|
133
|
+
// Fast path: if world is empty, always generate
|
|
134
|
+
if (isEmpty) {
|
|
135
|
+
return {
|
|
136
|
+
action: 'generate',
|
|
137
|
+
matches: [],
|
|
138
|
+
collection: null,
|
|
139
|
+
gaps: ['World state is empty - generate all data'],
|
|
140
|
+
reasoning: 'No existing data in world state',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
const prompt = `Analyze this tool call against the current world state.
|
|
144
|
+
|
|
145
|
+
WORLD STATE:
|
|
146
|
+
${JSON.stringify(worldState, null, 2)}
|
|
147
|
+
|
|
148
|
+
TOOL: ${toolName}
|
|
149
|
+
DESCRIPTION: ${toolDescription}
|
|
150
|
+
INPUT: ${JSON.stringify(input)}
|
|
151
|
+
|
|
152
|
+
Determine:
|
|
153
|
+
1. Is this a READ (query/search/get/list/find) or WRITE (create/book/update/delete) operation?
|
|
154
|
+
2. For READ: Do entities in world state match this query? Which ones?
|
|
155
|
+
3. What gaps exist between existing data and query requirements?
|
|
156
|
+
|
|
157
|
+
Return JSON only:
|
|
158
|
+
{
|
|
159
|
+
"action": "return_existing" | "filter" | "augment" | "generate",
|
|
160
|
+
"collection": "<collection name that contains relevant data, or null>",
|
|
161
|
+
"matches": ["<entity IDs that match or partially match the query>"],
|
|
162
|
+
"gaps": ["<descriptions of what data is missing, if any>"],
|
|
163
|
+
"reasoning": "<brief 1-sentence explanation of your decision>"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
ACTION MEANINGS:
|
|
167
|
+
- return_existing: World has entities that FULLY satisfy this query - return them as-is
|
|
168
|
+
- filter: World has MORE data than needed - filter down to matching subset
|
|
169
|
+
- augment: World has SOME matching data but gaps exist - return existing + generate missing
|
|
170
|
+
- generate: World has NO relevant data OR this is a WRITE operation - generate fresh
|
|
171
|
+
|
|
172
|
+
IMPORTANT:
|
|
173
|
+
- For WRITE operations (book, create, update), always use "generate"
|
|
174
|
+
- For READ operations, prefer using existing data when available
|
|
175
|
+
- Include ALL entity IDs that could be relevant in "matches"`;
|
|
176
|
+
const result = await llm.generate(prompt);
|
|
177
|
+
// Parse JSON from response
|
|
178
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
179
|
+
if (!jsonMatch) {
|
|
180
|
+
// Fallback to generate if analysis fails
|
|
181
|
+
return {
|
|
182
|
+
action: 'generate',
|
|
183
|
+
matches: [],
|
|
184
|
+
collection: null,
|
|
185
|
+
gaps: ['Failed to parse analysis'],
|
|
186
|
+
reasoning: 'Analysis parsing failed, falling back to generation',
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
try {
|
|
190
|
+
return JSON.parse(jsonMatch[0]);
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return {
|
|
194
|
+
action: 'generate',
|
|
195
|
+
matches: [],
|
|
196
|
+
collection: null,
|
|
197
|
+
gaps: ['Failed to parse analysis JSON'],
|
|
198
|
+
reasoning: 'Analysis JSON parsing failed, falling back to generation',
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Infer the response key from tool name (e.g., searchUsers โ users, getProducts โ products)
|
|
204
|
+
*/
|
|
205
|
+
function inferResultKey(toolName) {
|
|
206
|
+
const match = toolName.match(/(?:search|get|list|find)(\w+)/i);
|
|
207
|
+
return match ? match[1].toLowerCase() : 'results';
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Format entities as a tool response
|
|
211
|
+
*/
|
|
212
|
+
function formatAsToolResponse(toolName, entities, _config) {
|
|
213
|
+
const validEntities = entities.filter((e) => e !== undefined);
|
|
214
|
+
const response = { [inferResultKey(toolName)]: validEntities.map(e => e.data) };
|
|
215
|
+
return { response, mutations: [] }; // No new mutations for existing data
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Generate only the gap data (for augment action)
|
|
219
|
+
*/
|
|
220
|
+
async function generateGapData(toolName, toolDescription, input, existing, gaps, world, llm, scenario, config) {
|
|
221
|
+
const currentDate = new Date().toISOString().split('T')[0];
|
|
222
|
+
const scenarioContext = scenario ? `
|
|
223
|
+
SCENARIO CONTEXT:
|
|
224
|
+
- Persona: ${scenario.persona.backstory}
|
|
225
|
+
- Situation: ${scenario.persona.situation}
|
|
226
|
+
- Style: ${scenario.persona.style}
|
|
227
|
+
` : '';
|
|
228
|
+
const prompt = `Generate ONLY the missing data to fill gaps in this query.
|
|
229
|
+
|
|
230
|
+
EXISTING DATA (do NOT duplicate these - they will be included automatically):
|
|
231
|
+
${JSON.stringify(existing.map(e => e.data), null, 2)}
|
|
232
|
+
|
|
233
|
+
${scenarioContext}
|
|
234
|
+
TOOL: ${toolName}
|
|
235
|
+
DESCRIPTION: ${toolDescription}
|
|
236
|
+
INPUT: ${JSON.stringify(input)}
|
|
237
|
+
CURRENT DATE: ${currentDate}
|
|
238
|
+
|
|
239
|
+
GAPS TO FILL:
|
|
240
|
+
${gaps.map(g => `- ${g}`).join('\n')}
|
|
241
|
+
|
|
242
|
+
Generate NEW entities to fill these gaps. Be consistent with existing data:
|
|
243
|
+
- Use similar price ranges
|
|
244
|
+
- Use similar ID formats
|
|
245
|
+
- Maintain realistic relationships
|
|
246
|
+
|
|
247
|
+
Return JSON only:
|
|
248
|
+
{
|
|
249
|
+
"newEntities": [<array of new entity data objects>],
|
|
250
|
+
"mutations": [
|
|
251
|
+
{ "operation": "create", "collection": "<collection>", "id": "<id>", "data": {<entity data>} }
|
|
252
|
+
]
|
|
253
|
+
}`;
|
|
254
|
+
const result = await llm.generate(prompt);
|
|
255
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
256
|
+
if (!jsonMatch) {
|
|
257
|
+
return { entities: [], mutations: [] };
|
|
258
|
+
}
|
|
259
|
+
try {
|
|
260
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
261
|
+
const mutations = parsed.mutations || [];
|
|
262
|
+
// Apply mutations to world state
|
|
263
|
+
for (const mutation of mutations) {
|
|
264
|
+
world.mutate(mutation);
|
|
265
|
+
}
|
|
266
|
+
// Convert new entities to Entity format
|
|
267
|
+
const newEntities = (parsed.newEntities || []).map((data, idx) => ({
|
|
268
|
+
id: data.id || `generated-${idx}`,
|
|
269
|
+
data,
|
|
270
|
+
}));
|
|
271
|
+
return { entities: newEntities, mutations };
|
|
272
|
+
}
|
|
273
|
+
catch {
|
|
274
|
+
return { entities: [], mutations: [] };
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Phase 2: Execute based on analysis result
|
|
279
|
+
*/
|
|
280
|
+
async function executeAction(analysis, toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
|
|
281
|
+
switch (analysis.action) {
|
|
282
|
+
case 'return_existing': {
|
|
283
|
+
// No LLM needed - just return matched entities from world state
|
|
284
|
+
const entities = analysis.matches.map(id => world.getEntity(analysis.collection, id));
|
|
285
|
+
const result = formatAsToolResponse(toolName, entities, config);
|
|
286
|
+
return { ...result, analysis };
|
|
287
|
+
}
|
|
288
|
+
case 'filter': {
|
|
289
|
+
// LLM already determined matches in Phase 1 - trust those matches
|
|
290
|
+
// Only do lightweight client-side filtering for obvious mismatches
|
|
291
|
+
const entities = analysis.matches.map(id => world.getEntity(analysis.collection, id));
|
|
292
|
+
// Filter out undefined entities (in case they were deleted)
|
|
293
|
+
const validEntities = entities.filter((e) => e !== undefined);
|
|
294
|
+
// Trust LLM's matches - it already analyzed which entities fit the query
|
|
295
|
+
const result = formatAsToolResponse(toolName, validEntities, config);
|
|
296
|
+
return { ...result, analysis };
|
|
297
|
+
}
|
|
298
|
+
case 'augment': {
|
|
299
|
+
// LLM generates ONLY the gaps, merge with existing
|
|
300
|
+
// IMPORTANT: Fetch ALL entities from collection, not just analysis.matches
|
|
301
|
+
// This prevents the LLM from regenerating entities it missed in analysis
|
|
302
|
+
const collection = analysis.collection ? world.getCollection(analysis.collection) : null;
|
|
303
|
+
const allExisting = [];
|
|
304
|
+
if (collection) {
|
|
305
|
+
// Get all entities from the collection
|
|
306
|
+
for (const [id, entity] of collection) {
|
|
307
|
+
allExisting.push(entity);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
// Fallback to analysis.matches if no collection
|
|
312
|
+
const fromMatches = analysis.matches
|
|
313
|
+
.map(id => world.getEntity(analysis.collection, id))
|
|
314
|
+
.filter((e) => e !== undefined);
|
|
315
|
+
allExisting.push(...fromMatches);
|
|
316
|
+
}
|
|
317
|
+
const { entities: newEntities, mutations } = await generateGapData(toolName, toolDescription, input, allExisting, analysis.gaps, world, llm, scenario, config);
|
|
318
|
+
// Merge existing + new, dedupe by ID (prefer existing data)
|
|
319
|
+
const seenIds = new Set(allExisting.map(e => e.id));
|
|
320
|
+
const dedupedNew = newEntities.filter(e => !seenIds.has(e.id));
|
|
321
|
+
const allEntities = [...allExisting, ...dedupedNew];
|
|
322
|
+
const result = formatAsToolResponse(toolName, allEntities, config);
|
|
323
|
+
return { ...result, mutations, analysis };
|
|
324
|
+
}
|
|
325
|
+
case 'generate':
|
|
326
|
+
default: {
|
|
327
|
+
// Full LLM generation (current behavior)
|
|
328
|
+
const result = await generateFullResponse(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config);
|
|
329
|
+
return { ...result, analysis };
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Simulate a tool call using two-phase approach
|
|
335
|
+
*
|
|
336
|
+
* Phase 1: Analyze world state vs query
|
|
337
|
+
* Phase 2: Execute based on analysis (may skip LLM entirely)
|
|
338
|
+
*/
|
|
339
|
+
export async function simulateToolCall(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
|
|
340
|
+
// Phase 1: Analyze
|
|
341
|
+
const analysis = await analyzeQuery(toolName, toolDescription, input, world, llm);
|
|
342
|
+
// Phase 2: Execute based on analysis
|
|
343
|
+
return executeAction(analysis, toolName, toolDescription, input, world, llm, scenario, conversationHistory, config);
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Generate full response via LLM (used when action is 'generate')
|
|
347
|
+
* Called when world state is empty or this is a write operation
|
|
348
|
+
*
|
|
349
|
+
* If outputSchema is provided, LLM is instructed to match that shape.
|
|
350
|
+
* If effects function is provided, mutations are computed from the response.
|
|
351
|
+
*/
|
|
352
|
+
async function generateFullResponse(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
|
|
353
|
+
const worldState = world.toJSON();
|
|
354
|
+
const hasSeededData = Object.keys(worldState).length > 0;
|
|
355
|
+
const currentDate = new Date().toISOString().split('T')[0];
|
|
356
|
+
const hasEffectsFunction = !!config?.effects;
|
|
357
|
+
// Extract scenario context for more accurate simulation
|
|
358
|
+
const scenarioContext = scenario ? `
|
|
359
|
+
SCENARIO CONTEXT:
|
|
360
|
+
- Persona: ${scenario.persona.backstory}
|
|
361
|
+
- Situation: ${scenario.persona.situation}
|
|
362
|
+
- Style: ${scenario.persona.style}
|
|
363
|
+
${scenario.persona.constraints ? `- Constraints: ${JSON.stringify(scenario.persona.constraints)}` : ''}
|
|
364
|
+
|
|
365
|
+
IMPORTANT: Generate data that matches the SCENARIO CONTEXT, not generic data.
|
|
366
|
+
For example, if the persona wants items from category X, generate category X items.
|
|
367
|
+
` : '';
|
|
368
|
+
// Include recent conversation for context (e.g., user said "February 14th")
|
|
369
|
+
const recentConversation = conversationHistory && conversationHistory.length > 0 ? `
|
|
370
|
+
RECENT CONVERSATION (for context):
|
|
371
|
+
${conversationHistory.slice(-6).map(e => `${e.role.toUpperCase()}: ${e.content}`).join('\n')}
|
|
372
|
+
|
|
373
|
+
IMPORTANT: Use conversation context to understand what the user actually wants.
|
|
374
|
+
For example, if user mentioned specific criteria, generate data matching those criteria even if not in tool input.
|
|
375
|
+
` : '';
|
|
376
|
+
// Output schema instruction
|
|
377
|
+
const outputSchemaInstruction = config?.outputSchema ? `
|
|
378
|
+
OUTPUT SCHEMA (you MUST match this structure exactly):
|
|
379
|
+
${schemaToDescription(config.outputSchema)}
|
|
380
|
+
` : '';
|
|
381
|
+
// Determine if we need mutations in the prompt
|
|
382
|
+
const mutationsInstruction = hasEffectsFunction
|
|
383
|
+
? `\nNOTE: Do NOT include "mutations" in your response - they will be computed automatically.`
|
|
384
|
+
: `
|
|
385
|
+
If this tool creates or modifies entities, include mutations.`;
|
|
386
|
+
const responseFormat = hasEffectsFunction
|
|
387
|
+
? `Return JSON only:
|
|
388
|
+
{
|
|
389
|
+
"response": <the tool response matching the output schema>
|
|
390
|
+
}`
|
|
391
|
+
: `Return JSON only:
|
|
392
|
+
{
|
|
393
|
+
"response": <the tool response - match the expected format>,
|
|
394
|
+
"mutations": [
|
|
395
|
+
{ "operation": "create", "collection": "orders", "id": "ORD-123", "data": {...} },
|
|
396
|
+
{ "operation": "update", "collection": "inventory", "id": "ITEM-456", "data": { "quantity": 49 } }
|
|
397
|
+
]
|
|
398
|
+
}`;
|
|
399
|
+
const prompt = `You are simulating a tool response for an AI agent test.
|
|
400
|
+
|
|
401
|
+
CURRENT DATE: ${currentDate}
|
|
402
|
+
${scenarioContext}${recentConversation}
|
|
403
|
+
${hasSeededData ? `EXISTING WORLD STATE:
|
|
404
|
+
${JSON.stringify(worldState, null, 2)}
|
|
405
|
+
|
|
406
|
+
NOTE: If existing data doesn't match the scenario context (e.g., wrong routes), generate NEW contextually appropriate data instead.` : `WORLD STATE: (empty - generate plausible mock data)`}
|
|
407
|
+
|
|
408
|
+
TOOL CALL:
|
|
409
|
+
Name: ${toolName}
|
|
410
|
+
Description: ${toolDescription}
|
|
411
|
+
Input: ${JSON.stringify(input)}
|
|
412
|
+
${outputSchemaInstruction}
|
|
413
|
+
Generate a realistic response that makes sense for the scenario and input.
|
|
414
|
+
${mutationsInstruction}
|
|
415
|
+
|
|
416
|
+
RULES:
|
|
417
|
+
1. PRIORITIZE scenario context over seeded data - generate data matching what the persona actually wants
|
|
418
|
+
2. Use readable IDs (UA123, BK-7890) not UUIDs
|
|
419
|
+
3. Be contextually realistic:
|
|
420
|
+
- Dates should be current or near-future (relative to ${currentDate})
|
|
421
|
+
- Prices should be realistic for the domain
|
|
422
|
+
- Routes/data should match what the persona actually wants
|
|
423
|
+
4. For search/query operations: return 2-4 entities matching the INPUT parameters
|
|
424
|
+
5. For create/write operations: create new entities and update related ones (e.g., decrement inventory)
|
|
425
|
+
${hasEffectsFunction ? '' : '6. Always include mutations to track generated data in world state'}
|
|
426
|
+
|
|
427
|
+
${responseFormat}`;
|
|
428
|
+
const result = await llm.generate(prompt);
|
|
429
|
+
// Parse JSON from response
|
|
430
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
431
|
+
if (!jsonMatch) {
|
|
432
|
+
throw new Error(`Failed to parse simulated response for ${toolName}`);
|
|
433
|
+
}
|
|
434
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
435
|
+
let response = parsed.response;
|
|
436
|
+
// Validate against output schema if provided
|
|
437
|
+
if (config?.outputSchema) {
|
|
438
|
+
const validation = config.outputSchema.safeParse(response);
|
|
439
|
+
if (!validation.success) {
|
|
440
|
+
console.warn(`Simulated response for ${toolName} doesn't match output schema:`, validation.error);
|
|
441
|
+
// Try to use it anyway - LLM might have gotten close enough
|
|
442
|
+
}
|
|
443
|
+
else {
|
|
444
|
+
response = validation.data;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
// Compute mutations
|
|
448
|
+
let mutations = [];
|
|
449
|
+
if (config?.effects) {
|
|
450
|
+
// Use effects function to compute mutations from response
|
|
451
|
+
// Pass world state so effects can access current state (e.g., for computing decrements)
|
|
452
|
+
const effects = config.effects(response, world);
|
|
453
|
+
mutations = effects.map(effectToMutation);
|
|
454
|
+
}
|
|
455
|
+
else if (parsed.mutations) {
|
|
456
|
+
// Use LLM-generated mutations (legacy path)
|
|
457
|
+
mutations = parsed.mutations;
|
|
458
|
+
}
|
|
459
|
+
// Apply mutations to world
|
|
460
|
+
for (const mutation of mutations) {
|
|
461
|
+
world.mutate(mutation);
|
|
462
|
+
}
|
|
463
|
+
// Validate invariants if provided
|
|
464
|
+
if (config?.invariants && config.invariants.length > 0) {
|
|
465
|
+
const { valid, errors } = validateInvariants(world, config.invariants);
|
|
466
|
+
if (!valid) {
|
|
467
|
+
console.warn(`Invariant violations after ${toolName}:`, errors);
|
|
468
|
+
// Could throw here to make it strict, but for now just warn
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return {
|
|
472
|
+
response,
|
|
473
|
+
mutations,
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
//# sourceMappingURL=simulate.js.map
|