@virtualkitchenco/multiverse-sdk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/adapters/langchain.d.ts +65 -0
  2. package/dist/adapters/langchain.d.ts.map +1 -0
  3. package/dist/adapters/langchain.js +80 -0
  4. package/dist/adapters/langchain.js.map +1 -0
  5. package/dist/ci.d.ts +31 -0
  6. package/dist/ci.d.ts.map +1 -0
  7. package/dist/ci.js +105 -0
  8. package/dist/ci.js.map +1 -0
  9. package/dist/client.d.ts +136 -0
  10. package/dist/client.d.ts.map +1 -0
  11. package/dist/client.js +92 -0
  12. package/dist/client.js.map +1 -0
  13. package/dist/github.d.ts +35 -0
  14. package/dist/github.d.ts.map +1 -0
  15. package/dist/github.js +113 -0
  16. package/dist/github.js.map +1 -0
  17. package/dist/index.d.ts +19 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +19 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/llm/anthropic.d.ts +18 -0
  22. package/dist/llm/anthropic.d.ts.map +1 -0
  23. package/dist/llm/anthropic.js +91 -0
  24. package/dist/llm/anthropic.js.map +1 -0
  25. package/dist/llm/google.d.ts +18 -0
  26. package/dist/llm/google.d.ts.map +1 -0
  27. package/dist/llm/google.js +97 -0
  28. package/dist/llm/google.js.map +1 -0
  29. package/dist/llm/index.d.ts +14 -0
  30. package/dist/llm/index.d.ts.map +1 -0
  31. package/dist/llm/index.js +29 -0
  32. package/dist/llm/index.js.map +1 -0
  33. package/dist/llm/openai.d.ts +18 -0
  34. package/dist/llm/openai.d.ts.map +1 -0
  35. package/dist/llm/openai.js +78 -0
  36. package/dist/llm/openai.js.map +1 -0
  37. package/dist/llm/provider.d.ts +42 -0
  38. package/dist/llm/provider.d.ts.map +1 -0
  39. package/dist/llm/provider.js +7 -0
  40. package/dist/llm/provider.js.map +1 -0
  41. package/dist/multiverse.d.ts +105 -0
  42. package/dist/multiverse.d.ts.map +1 -0
  43. package/dist/multiverse.js +449 -0
  44. package/dist/multiverse.js.map +1 -0
  45. package/dist/report.d.ts +30 -0
  46. package/dist/report.d.ts.map +1 -0
  47. package/dist/report.js +128 -0
  48. package/dist/report.js.map +1 -0
  49. package/dist/simulate.d.ts +49 -0
  50. package/dist/simulate.d.ts.map +1 -0
  51. package/dist/simulate.js +476 -0
  52. package/dist/simulate.js.map +1 -0
  53. package/package.json +68 -0
package/dist/report.js ADDED
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Report Card Generation
3
+ *
4
+ * Generates markdown report cards from test results
5
+ * for posting to PRs and CI output.
6
+ */
7
+ /**
8
+ * Calculate tier from pass rate
9
+ */
10
+ export function calculateTier(passRate) {
11
+ if (passRate >= 95)
12
+ return 'S';
13
+ if (passRate >= 85)
14
+ return 'A';
15
+ if (passRate >= 70)
16
+ return 'B';
17
+ if (passRate >= 50)
18
+ return 'C';
19
+ return 'D';
20
+ }
21
+ /**
22
+ * Get emoji for tier
23
+ */
24
+ function tierEmoji(tier) {
25
+ switch (tier) {
26
+ case 'S': return '๐Ÿ†';
27
+ case 'A': return 'โœ…';
28
+ case 'B': return 'โš ๏ธ';
29
+ case 'C': return '๐Ÿ”ถ';
30
+ case 'D': return 'โŒ';
31
+ default: return 'โ“';
32
+ }
33
+ }
34
+ /**
35
+ * Generate markdown report card from test results
36
+ */
37
+ export function generateReportCard(results, options = {}) {
38
+ const { dashboardUrl, detailed = true } = options;
39
+ const tier = calculateTier(results.passRate);
40
+ const emoji = tierEmoji(tier);
41
+ const lines = [];
42
+ // Header
43
+ lines.push('## ๐Ÿ”ฎ Multiverse Report');
44
+ lines.push('');
45
+ // Summary
46
+ lines.push(`**Tier ${tier}** ${emoji} ยท ${results.passRate}% pass rate`);
47
+ lines.push('');
48
+ // Stats
49
+ const passed = results.runs.filter(r => r.passed).length;
50
+ const failed = results.runs.length - passed;
51
+ lines.push(`| Metric | Value |`);
52
+ lines.push(`|--------|-------|`);
53
+ lines.push(`| Total Runs | ${results.runs.length} |`);
54
+ lines.push(`| Passed | ${passed} |`);
55
+ lines.push(`| Failed | ${failed} |`);
56
+ lines.push(`| Duration | ${(results.duration / 1000).toFixed(1)}s |`);
57
+ lines.push('');
58
+ // Scenario breakdown
59
+ if (detailed && results.runs.length > 0) {
60
+ const scenarioStats = aggregateByScenario(results.runs);
61
+ lines.push('### Scenarios');
62
+ lines.push('');
63
+ lines.push('| Scenario | Passed | Rate |');
64
+ lines.push('|----------|--------|------|');
65
+ for (const [name, stats] of scenarioStats) {
66
+ const rate = Math.round((stats.passed / stats.total) * 100);
67
+ const status = rate >= 80 ? 'โœ…' : rate >= 50 ? 'โš ๏ธ' : 'โŒ';
68
+ lines.push(`| ${name} | ${stats.passed}/${stats.total} | ${status} ${rate}% |`);
69
+ }
70
+ lines.push('');
71
+ }
72
+ // Weak spots
73
+ if (results.weakSpots && results.weakSpots.length > 0) {
74
+ lines.push('### Weak Spots');
75
+ lines.push('');
76
+ for (const spot of results.weakSpots) {
77
+ lines.push(`- **${spot.scenario}**: ${spot.passRate}% pass rate`);
78
+ }
79
+ lines.push('');
80
+ }
81
+ // Dashboard link
82
+ if (dashboardUrl || results.url) {
83
+ const url = dashboardUrl || results.url;
84
+ lines.push(`[View full report โ†’](${url})`);
85
+ lines.push('');
86
+ }
87
+ // Footer
88
+ lines.push('---');
89
+ lines.push('*Generated by [Multiverse](https://github.com/anthropics/multiverse)*');
90
+ return lines.join('\n');
91
+ }
92
+ /**
93
+ * Generate short summary for CI output
94
+ */
95
+ export function generateSummary(results) {
96
+ const tier = calculateTier(results.passRate);
97
+ const passed = results.runs.filter(r => r.passed).length;
98
+ const failed = results.runs.length - passed;
99
+ return `Multiverse: Tier ${tier} (${results.passRate}%) - ${passed} passed, ${failed} failed`;
100
+ }
101
+ /**
102
+ * Aggregate run results by scenario
103
+ */
104
+ function aggregateByScenario(runs) {
105
+ const stats = new Map();
106
+ for (const run of runs) {
107
+ const name = run.scenario.name;
108
+ const current = stats.get(name) || { passed: 0, total: 0 };
109
+ current.total++;
110
+ if (run.passed)
111
+ current.passed++;
112
+ stats.set(name, current);
113
+ }
114
+ return stats;
115
+ }
116
+ /**
117
+ * Generate badge URL (shields.io style)
118
+ */
119
+ export function generateBadgeUrl(results) {
120
+ const tier = calculateTier(results.passRate);
121
+ const color = tier === 'S' || tier === 'A' ? 'brightgreen' :
122
+ tier === 'B' ? 'yellow' :
123
+ tier === 'C' ? 'orange' : 'red';
124
+ const label = encodeURIComponent('multiverse');
125
+ const message = encodeURIComponent(`Tier ${tier} (${results.passRate}%)`);
126
+ return `https://img.shields.io/badge/${label}-${message}-${color}`;
127
+ }
128
+ //# sourceMappingURL=report.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.js","sourceRoot":"","sources":["../src/report.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAWH;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,QAAgB;IAC5C,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,IAAI,QAAQ,IAAI,EAAE;QAAE,OAAO,GAAG,CAAC;IAC/B,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC;QACrB,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,IAAI,CAAC;QACtB,KAAK,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC;QACrB,OAAO,CAAC,CAAC,OAAO,GAAG,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,OAAoB,EACpB,UAAyB,EAAE;IAE3B,MAAM,EAAE,YAAY,EAAE,QAAQ,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IAClD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE9B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,SAAS;IACT,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IACtC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,UAAU;IACV,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,MAAM,KAAK,MAAM,OAAO,CAAC,QAAQ,aAAa,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,QAAQ;IACR,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IAC5C,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjC,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjC,KAAK,CAAC,IAAI,CAAC,kBAAkB,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;IACtD,KAAK,CAAC,IAAI,CAAC,cAAc,MAAM,IAAI,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,cAAc,MAAM,IAAI,CAAC,CAAC;IACrC,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACtE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,qBAAqB;IACrB,IAAI,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAExD,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAE3C,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;YAC5D,MAAM,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;YAC1D,KAAK,CAAC,IAAI,CAAC,KAAK,IAAI,MAAM,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,MAAM,MAAM,IAAI,IAAI,KAAK,CAAC,CAAC;QAClF,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,aAAa;IACb,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACrC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,OAAO,IAAI,CAAC,QAAQ,aAAa,CAAC,CAAC;QACpE,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,iBAAiB;IACjB,IAAI,YAAY,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,GAAG,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC;QACxC,KAAK,CAAC,IAAI,CAAC,wBAAwB,GAAG,GAAG,CAAC,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,SAAS;IACT,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,KAAK,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;IAEpF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,OAAoB;IAClD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IAE5C,OAAO,oBAAoB,IAAI,KAAK,OAAO,CAAC,QAAQ,QAAQ,MAAM,YAAY,MAAM,SAAS,CAAC;AAChG,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAiB;IAC5C,MAAM,KAAK,GAAG,IAAI,GAAG,EAA6C,CAAC;IAEnE,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC;QAC/B,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QAC3D,OAAO,CAAC,KAAK,EAAE,CAAC;QAChB,IAAI,GAAG,CAAC,MAAM;YAAE,OAAO,CAAC,MAAM,EAAE,CAAC;QACjC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAoB;IACnD,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACzB,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IAE9C,MAAM,KAAK,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAG,kBAAkB,CAAC,QAAQ,IAAI,KAAK,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC;IAE1E,OAAO,gCAAgC,KAAK,IAAI,OAAO,IAAI,KAAK,EAAE,CAAC;AACrE,CAAC"}
@@ -0,0 +1,49 @@
1
+ /**
2
+ * LLM-based tool simulation with two-phase approach
3
+ *
4
+ * Phase 1 (Analysis): LLM analyzes world state vs query to decide action
5
+ * Phase 2 (Execution): Execute based on action (may skip LLM entirely)
6
+ *
7
+ * This ensures consistency - same tool + overlapping queries return consistent data.
8
+ */
9
+ import type { World, Mutation, Scenario, Invariant, ZodLikeSchema, Effect, WorldStateAccessor } from '@multiverse/core';
10
+ import type { LLMProvider } from './llm/index.js';
11
+ export interface SimulationResult {
12
+ response: unknown;
13
+ mutations: Mutation[];
14
+ analysis?: AnalysisResult;
15
+ }
16
+ /**
17
+ * Conversation entry for context
18
+ */
19
+ export interface ConversationEntry {
20
+ role: 'user' | 'agent';
21
+ content: string;
22
+ }
23
+ /**
24
+ * Tool simulation config
25
+ */
26
+ export interface ToolSimConfig {
27
+ outputSchema?: ZodLikeSchema;
28
+ effects?: (output: unknown, world: WorldStateAccessor) => Effect[];
29
+ invariants?: Invariant[];
30
+ }
31
+ /**
32
+ * Two-phase simulation types
33
+ */
34
+ export type SimulationAction = 'return_existing' | 'filter' | 'augment' | 'generate';
35
+ export interface AnalysisResult {
36
+ action: SimulationAction;
37
+ matches: string[];
38
+ collection: string | null;
39
+ gaps: string[];
40
+ reasoning: string;
41
+ }
42
+ /**
43
+ * Simulate a tool call using two-phase approach
44
+ *
45
+ * Phase 1: Analyze world state vs query
46
+ * Phase 2: Execute based on analysis (may skip LLM entirely)
47
+ */
48
+ export declare function simulateToolCall(toolName: string, toolDescription: string, input: unknown, world: World, llm: LLMProvider, scenario?: Scenario, conversationHistory?: ConversationEntry[], config?: ToolSimConfig): Promise<SimulationResult>;
49
+ //# sourceMappingURL=simulate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"simulate.d.ts","sourceRoot":"","sources":["../src/simulate.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAa,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,kBAAkB,EAAU,MAAM,kBAAkB,CAAC;AAC3I,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAElD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,QAAQ,CAAC,EAAE,cAAc,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,kBAAkB,KAAK,MAAM,EAAE,CAAC;IACnE,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,iBAAiB,GAAG,QAAQ,GAAG,SAAS,GAAG,UAAU,CAAC;AAErF,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,gBAAgB,CAAC;IACzB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AA6ZD;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,eAAe,EAAE,MAAM,EACvB,KAAK,EAAE,OAAO,EACd,KAAK,EAAE,KAAK,EACZ,GAAG,EAAE,WAAW,EAChB,QAAQ,CAAC,EAAE,QAAQ,EACnB,mBAAmB,CAAC,EAAE,iBAAiB,EAAE,EACzC,MAAM,CAAC,EAAE,aAAa,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAS3B"}
@@ -0,0 +1,476 @@
1
+ /**
2
+ * LLM-based tool simulation with two-phase approach
3
+ *
4
+ * Phase 1 (Analysis): LLM analyzes world state vs query to decide action
5
+ * Phase 2 (Execution): Execute based on action (may skip LLM entirely)
6
+ *
7
+ * This ensures consistency - same tool + overlapping queries return consistent data.
8
+ */
9
+ /**
10
+ * Convert a zod schema to a JSON schema description for the LLM prompt
11
+ */
12
+ function schemaToDescription(schema) {
13
+ // Try to get the shape from common zod schema structures
14
+ const s = schema;
15
+ // Check for zod's internal shape property
16
+ if (s._def && typeof s._def === 'object') {
17
+ const def = s._def;
18
+ if (def.shape && typeof def.shape === 'function') {
19
+ try {
20
+ const shape = def.shape();
21
+ return JSON.stringify(Object.fromEntries(Object.entries(shape).map(([k, v]) => [k, describeZodType(v)])), null, 2);
22
+ }
23
+ catch {
24
+ // Fall through
25
+ }
26
+ }
27
+ // Handle array types
28
+ if (def.typeName === 'ZodArray' && def.type) {
29
+ return `Array<${schemaToDescription(def.type)}>`;
30
+ }
31
+ }
32
+ // Fallback: just indicate there's a schema
33
+ return '(structured output - follow the shape implied by the tool description)';
34
+ }
35
+ /**
36
+ * Describe a single zod type for the prompt
37
+ */
38
+ function describeZodType(zodType) {
39
+ const t = zodType;
40
+ if (!t._def)
41
+ return 'unknown';
42
+ const def = t._def;
43
+ const typeName = def.typeName;
44
+ switch (typeName) {
45
+ case 'ZodString':
46
+ return 'string';
47
+ case 'ZodNumber':
48
+ return 'number';
49
+ case 'ZodBoolean':
50
+ return 'boolean';
51
+ case 'ZodArray':
52
+ return `array<${describeZodType(def.type)}>`;
53
+ case 'ZodObject':
54
+ if (def.shape && typeof def.shape === 'function') {
55
+ const shape = def.shape();
56
+ return JSON.stringify(Object.fromEntries(Object.entries(shape).map(([k, v]) => [k, describeZodType(v)])));
57
+ }
58
+ return 'object';
59
+ case 'ZodEnum':
60
+ return `enum(${def.values.join('|')})`;
61
+ case 'ZodOptional':
62
+ return `${describeZodType(def.innerType)}?`;
63
+ default:
64
+ return typeName.replace('Zod', '').toLowerCase();
65
+ }
66
+ }
67
+ /**
68
+ * Validate invariants against world state
69
+ */
70
+ function validateInvariants(world, invariants) {
71
+ const errors = [];
72
+ for (const inv of invariants) {
73
+ const collection = world.getCollection(inv.collection);
74
+ if (!collection)
75
+ continue;
76
+ for (const [id, entity] of collection) {
77
+ const fieldValue = entity.data[inv.field];
78
+ if (fieldValue === undefined || fieldValue === null)
79
+ continue;
80
+ // Cast to number for numeric comparisons
81
+ const numFieldValue = fieldValue;
82
+ const numInvValue = inv.value;
83
+ let passes = false;
84
+ switch (inv.condition) {
85
+ case 'gt':
86
+ passes = numFieldValue > numInvValue;
87
+ break;
88
+ case 'gte':
89
+ passes = numFieldValue >= numInvValue;
90
+ break;
91
+ case 'lt':
92
+ passes = numFieldValue < numInvValue;
93
+ break;
94
+ case 'lte':
95
+ passes = numFieldValue <= numInvValue;
96
+ break;
97
+ case 'eq':
98
+ passes = fieldValue === inv.value;
99
+ break;
100
+ case 'neq':
101
+ passes = fieldValue !== inv.value;
102
+ break;
103
+ }
104
+ if (!passes) {
105
+ errors.push(`Invariant violated: ${inv.collection}.${id}.${inv.field} (${fieldValue}) ${inv.condition} ${inv.value}`);
106
+ }
107
+ }
108
+ }
109
+ return { valid: errors.length === 0, errors };
110
+ }
111
+ /**
112
+ * Convert Effect to Mutation
113
+ */
114
+ function effectToMutation(effect) {
115
+ return {
116
+ operation: effect.operation,
117
+ collection: effect.collection,
118
+ id: effect.id,
119
+ data: effect.data,
120
+ };
121
+ }
122
+ // ============================================================================
123
+ // Two-Phase Simulation
124
+ // ============================================================================
125
+ /**
126
+ * Phase 1: Analyze the query against world state
127
+ * Determines what action to take (return existing, filter, augment, or generate)
128
+ */
129
+ async function analyzeQuery(toolName, toolDescription, input, world, llm) {
130
+ const worldState = world.toJSON();
131
+ const isEmpty = Object.keys(worldState).length === 0 ||
132
+ Object.values(worldState).every(collection => Object.keys(collection).length === 0);
133
+ // Fast path: if world is empty, always generate
134
+ if (isEmpty) {
135
+ return {
136
+ action: 'generate',
137
+ matches: [],
138
+ collection: null,
139
+ gaps: ['World state is empty - generate all data'],
140
+ reasoning: 'No existing data in world state',
141
+ };
142
+ }
143
+ const prompt = `Analyze this tool call against the current world state.
144
+
145
+ WORLD STATE:
146
+ ${JSON.stringify(worldState, null, 2)}
147
+
148
+ TOOL: ${toolName}
149
+ DESCRIPTION: ${toolDescription}
150
+ INPUT: ${JSON.stringify(input)}
151
+
152
+ Determine:
153
+ 1. Is this a READ (query/search/get/list/find) or WRITE (create/book/update/delete) operation?
154
+ 2. For READ: Do entities in world state match this query? Which ones?
155
+ 3. What gaps exist between existing data and query requirements?
156
+
157
+ Return JSON only:
158
+ {
159
+ "action": "return_existing" | "filter" | "augment" | "generate",
160
+ "collection": "<collection name that contains relevant data, or null>",
161
+ "matches": ["<entity IDs that match or partially match the query>"],
162
+ "gaps": ["<descriptions of what data is missing, if any>"],
163
+ "reasoning": "<brief 1-sentence explanation of your decision>"
164
+ }
165
+
166
+ ACTION MEANINGS:
167
+ - return_existing: World has entities that FULLY satisfy this query - return them as-is
168
+ - filter: World has MORE data than needed - filter down to matching subset
169
+ - augment: World has SOME matching data but gaps exist - return existing + generate missing
170
+ - generate: World has NO relevant data OR this is a WRITE operation - generate fresh
171
+
172
+ IMPORTANT:
173
+ - For WRITE operations (book, create, update), always use "generate"
174
+ - For READ operations, prefer using existing data when available
175
+ - Include ALL entity IDs that could be relevant in "matches"`;
176
+ const result = await llm.generate(prompt);
177
+ // Parse JSON from response
178
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
179
+ if (!jsonMatch) {
180
+ // Fallback to generate if analysis fails
181
+ return {
182
+ action: 'generate',
183
+ matches: [],
184
+ collection: null,
185
+ gaps: ['Failed to parse analysis'],
186
+ reasoning: 'Analysis parsing failed, falling back to generation',
187
+ };
188
+ }
189
+ try {
190
+ return JSON.parse(jsonMatch[0]);
191
+ }
192
+ catch {
193
+ return {
194
+ action: 'generate',
195
+ matches: [],
196
+ collection: null,
197
+ gaps: ['Failed to parse analysis JSON'],
198
+ reasoning: 'Analysis JSON parsing failed, falling back to generation',
199
+ };
200
+ }
201
+ }
202
+ /**
203
+ * Infer the response key from tool name (e.g., searchUsers โ†’ users, getProducts โ†’ products)
204
+ */
205
+ function inferResultKey(toolName) {
206
+ const match = toolName.match(/(?:search|get|list|find)(\w+)/i);
207
+ return match ? match[1].toLowerCase() : 'results';
208
+ }
209
+ /**
210
+ * Format entities as a tool response
211
+ */
212
+ function formatAsToolResponse(toolName, entities, _config) {
213
+ const validEntities = entities.filter((e) => e !== undefined);
214
+ const response = { [inferResultKey(toolName)]: validEntities.map(e => e.data) };
215
+ return { response, mutations: [] }; // No new mutations for existing data
216
+ }
217
+ /**
218
+ * Generate only the gap data (for augment action)
219
+ */
220
+ async function generateGapData(toolName, toolDescription, input, existing, gaps, world, llm, scenario, config) {
221
+ const currentDate = new Date().toISOString().split('T')[0];
222
+ const scenarioContext = scenario ? `
223
+ SCENARIO CONTEXT:
224
+ - Persona: ${scenario.persona.backstory}
225
+ - Situation: ${scenario.persona.situation}
226
+ - Style: ${scenario.persona.style}
227
+ ` : '';
228
+ const prompt = `Generate ONLY the missing data to fill gaps in this query.
229
+
230
+ EXISTING DATA (do NOT duplicate these - they will be included automatically):
231
+ ${JSON.stringify(existing.map(e => e.data), null, 2)}
232
+
233
+ ${scenarioContext}
234
+ TOOL: ${toolName}
235
+ DESCRIPTION: ${toolDescription}
236
+ INPUT: ${JSON.stringify(input)}
237
+ CURRENT DATE: ${currentDate}
238
+
239
+ GAPS TO FILL:
240
+ ${gaps.map(g => `- ${g}`).join('\n')}
241
+
242
+ Generate NEW entities to fill these gaps. Be consistent with existing data:
243
+ - Use similar price ranges
244
+ - Use similar ID formats
245
+ - Maintain realistic relationships
246
+
247
+ Return JSON only:
248
+ {
249
+ "newEntities": [<array of new entity data objects>],
250
+ "mutations": [
251
+ { "operation": "create", "collection": "<collection>", "id": "<id>", "data": {<entity data>} }
252
+ ]
253
+ }`;
254
+ const result = await llm.generate(prompt);
255
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
256
+ if (!jsonMatch) {
257
+ return { entities: [], mutations: [] };
258
+ }
259
+ try {
260
+ const parsed = JSON.parse(jsonMatch[0]);
261
+ const mutations = parsed.mutations || [];
262
+ // Apply mutations to world state
263
+ for (const mutation of mutations) {
264
+ world.mutate(mutation);
265
+ }
266
+ // Convert new entities to Entity format
267
+ const newEntities = (parsed.newEntities || []).map((data, idx) => ({
268
+ id: data.id || `generated-${idx}`,
269
+ data,
270
+ }));
271
+ return { entities: newEntities, mutations };
272
+ }
273
+ catch {
274
+ return { entities: [], mutations: [] };
275
+ }
276
+ }
277
+ /**
278
+ * Phase 2: Execute based on analysis result
279
+ */
280
+ async function executeAction(analysis, toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
281
+ switch (analysis.action) {
282
+ case 'return_existing': {
283
+ // No LLM needed - just return matched entities from world state
284
+ const entities = analysis.matches.map(id => world.getEntity(analysis.collection, id));
285
+ const result = formatAsToolResponse(toolName, entities, config);
286
+ return { ...result, analysis };
287
+ }
288
+ case 'filter': {
289
+ // LLM already determined matches in Phase 1 - trust those matches
290
+ // Only do lightweight client-side filtering for obvious mismatches
291
+ const entities = analysis.matches.map(id => world.getEntity(analysis.collection, id));
292
+ // Filter out undefined entities (in case they were deleted)
293
+ const validEntities = entities.filter((e) => e !== undefined);
294
+ // Trust LLM's matches - it already analyzed which entities fit the query
295
+ const result = formatAsToolResponse(toolName, validEntities, config);
296
+ return { ...result, analysis };
297
+ }
298
+ case 'augment': {
299
+ // LLM generates ONLY the gaps, merge with existing
300
+ // IMPORTANT: Fetch ALL entities from collection, not just analysis.matches
301
+ // This prevents the LLM from regenerating entities it missed in analysis
302
+ const collection = analysis.collection ? world.getCollection(analysis.collection) : null;
303
+ const allExisting = [];
304
+ if (collection) {
305
+ // Get all entities from the collection
306
+ for (const [id, entity] of collection) {
307
+ allExisting.push(entity);
308
+ }
309
+ }
310
+ else {
311
+ // Fallback to analysis.matches if no collection
312
+ const fromMatches = analysis.matches
313
+ .map(id => world.getEntity(analysis.collection, id))
314
+ .filter((e) => e !== undefined);
315
+ allExisting.push(...fromMatches);
316
+ }
317
+ const { entities: newEntities, mutations } = await generateGapData(toolName, toolDescription, input, allExisting, analysis.gaps, world, llm, scenario, config);
318
+ // Merge existing + new, dedupe by ID (prefer existing data)
319
+ const seenIds = new Set(allExisting.map(e => e.id));
320
+ const dedupedNew = newEntities.filter(e => !seenIds.has(e.id));
321
+ const allEntities = [...allExisting, ...dedupedNew];
322
+ const result = formatAsToolResponse(toolName, allEntities, config);
323
+ return { ...result, mutations, analysis };
324
+ }
325
+ case 'generate':
326
+ default: {
327
+ // Full LLM generation (current behavior)
328
+ const result = await generateFullResponse(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config);
329
+ return { ...result, analysis };
330
+ }
331
+ }
332
+ }
333
+ /**
334
+ * Simulate a tool call using two-phase approach
335
+ *
336
+ * Phase 1: Analyze world state vs query
337
+ * Phase 2: Execute based on analysis (may skip LLM entirely)
338
+ */
339
+ export async function simulateToolCall(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
340
+ // Phase 1: Analyze
341
+ const analysis = await analyzeQuery(toolName, toolDescription, input, world, llm);
342
+ // Phase 2: Execute based on analysis
343
+ return executeAction(analysis, toolName, toolDescription, input, world, llm, scenario, conversationHistory, config);
344
+ }
345
+ /**
346
+ * Generate full response via LLM (used when action is 'generate')
347
+ * Called when world state is empty or this is a write operation
348
+ *
349
+ * If outputSchema is provided, LLM is instructed to match that shape.
350
+ * If effects function is provided, mutations are computed from the response.
351
+ */
352
+ async function generateFullResponse(toolName, toolDescription, input, world, llm, scenario, conversationHistory, config) {
353
+ const worldState = world.toJSON();
354
+ const hasSeededData = Object.keys(worldState).length > 0;
355
+ const currentDate = new Date().toISOString().split('T')[0];
356
+ const hasEffectsFunction = !!config?.effects;
357
+ // Extract scenario context for more accurate simulation
358
+ const scenarioContext = scenario ? `
359
+ SCENARIO CONTEXT:
360
+ - Persona: ${scenario.persona.backstory}
361
+ - Situation: ${scenario.persona.situation}
362
+ - Style: ${scenario.persona.style}
363
+ ${scenario.persona.constraints ? `- Constraints: ${JSON.stringify(scenario.persona.constraints)}` : ''}
364
+
365
+ IMPORTANT: Generate data that matches the SCENARIO CONTEXT, not generic data.
366
+ For example, if the persona wants items from category X, generate category X items.
367
+ ` : '';
368
+ // Include recent conversation for context (e.g., user said "February 14th")
369
+ const recentConversation = conversationHistory && conversationHistory.length > 0 ? `
370
+ RECENT CONVERSATION (for context):
371
+ ${conversationHistory.slice(-6).map(e => `${e.role.toUpperCase()}: ${e.content}`).join('\n')}
372
+
373
+ IMPORTANT: Use conversation context to understand what the user actually wants.
374
+ For example, if user mentioned specific criteria, generate data matching those criteria even if not in tool input.
375
+ ` : '';
376
+ // Output schema instruction
377
+ const outputSchemaInstruction = config?.outputSchema ? `
378
+ OUTPUT SCHEMA (you MUST match this structure exactly):
379
+ ${schemaToDescription(config.outputSchema)}
380
+ ` : '';
381
+ // Determine if we need mutations in the prompt
382
+ const mutationsInstruction = hasEffectsFunction
383
+ ? `\nNOTE: Do NOT include "mutations" in your response - they will be computed automatically.`
384
+ : `
385
+ If this tool creates or modifies entities, include mutations.`;
386
+ const responseFormat = hasEffectsFunction
387
+ ? `Return JSON only:
388
+ {
389
+ "response": <the tool response matching the output schema>
390
+ }`
391
+ : `Return JSON only:
392
+ {
393
+ "response": <the tool response - match the expected format>,
394
+ "mutations": [
395
+ { "operation": "create", "collection": "orders", "id": "ORD-123", "data": {...} },
396
+ { "operation": "update", "collection": "inventory", "id": "ITEM-456", "data": { "quantity": 49 } }
397
+ ]
398
+ }`;
399
+ const prompt = `You are simulating a tool response for an AI agent test.
400
+
401
+ CURRENT DATE: ${currentDate}
402
+ ${scenarioContext}${recentConversation}
403
+ ${hasSeededData ? `EXISTING WORLD STATE:
404
+ ${JSON.stringify(worldState, null, 2)}
405
+
406
+ NOTE: If existing data doesn't match the scenario context (e.g., wrong routes), generate NEW contextually appropriate data instead.` : `WORLD STATE: (empty - generate plausible mock data)`}
407
+
408
+ TOOL CALL:
409
+ Name: ${toolName}
410
+ Description: ${toolDescription}
411
+ Input: ${JSON.stringify(input)}
412
+ ${outputSchemaInstruction}
413
+ Generate a realistic response that makes sense for the scenario and input.
414
+ ${mutationsInstruction}
415
+
416
+ RULES:
417
+ 1. PRIORITIZE scenario context over seeded data - generate data matching what the persona actually wants
418
+ 2. Use readable IDs (UA123, BK-7890) not UUIDs
419
+ 3. Be contextually realistic:
420
+ - Dates should be current or near-future (relative to ${currentDate})
421
+ - Prices should be realistic for the domain
422
+ - Routes/data should match what the persona actually wants
423
+ 4. For search/query operations: return 2-4 entities matching the INPUT parameters
424
+ 5. For create/write operations: create new entities and update related ones (e.g., decrement inventory)
425
+ ${hasEffectsFunction ? '' : '6. Always include mutations to track generated data in world state'}
426
+
427
+ ${responseFormat}`;
428
+ const result = await llm.generate(prompt);
429
+ // Parse JSON from response
430
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
431
+ if (!jsonMatch) {
432
+ throw new Error(`Failed to parse simulated response for ${toolName}`);
433
+ }
434
+ const parsed = JSON.parse(jsonMatch[0]);
435
+ let response = parsed.response;
436
+ // Validate against output schema if provided
437
+ if (config?.outputSchema) {
438
+ const validation = config.outputSchema.safeParse(response);
439
+ if (!validation.success) {
440
+ console.warn(`Simulated response for ${toolName} doesn't match output schema:`, validation.error);
441
+ // Try to use it anyway - LLM might have gotten close enough
442
+ }
443
+ else {
444
+ response = validation.data;
445
+ }
446
+ }
447
+ // Compute mutations
448
+ let mutations = [];
449
+ if (config?.effects) {
450
+ // Use effects function to compute mutations from response
451
+ // Pass world state so effects can access current state (e.g., for computing decrements)
452
+ const effects = config.effects(response, world);
453
+ mutations = effects.map(effectToMutation);
454
+ }
455
+ else if (parsed.mutations) {
456
+ // Use LLM-generated mutations (legacy path)
457
+ mutations = parsed.mutations;
458
+ }
459
+ // Apply mutations to world
460
+ for (const mutation of mutations) {
461
+ world.mutate(mutation);
462
+ }
463
+ // Validate invariants if provided
464
+ if (config?.invariants && config.invariants.length > 0) {
465
+ const { valid, errors } = validateInvariants(world, config.invariants);
466
+ if (!valid) {
467
+ console.warn(`Invariant violations after ${toolName}:`, errors);
468
+ // Could throw here to make it strict, but for now just warn
469
+ }
470
+ }
471
+ return {
472
+ response,
473
+ mutations,
474
+ };
475
+ }
476
+ //# sourceMappingURL=simulate.js.map