@ryanfw/prompt-orchestration-pipeline 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +290 -0
- package/package.json +51 -0
- package/src/api/index.js +220 -0
- package/src/cli/index.js +70 -0
- package/src/core/config.js +345 -0
- package/src/core/environment.js +56 -0
- package/src/core/orchestrator.js +335 -0
- package/src/core/pipeline-runner.js +182 -0
- package/src/core/retry.js +83 -0
- package/src/core/task-runner.js +305 -0
- package/src/core/validation.js +100 -0
- package/src/llm/README.md +345 -0
- package/src/llm/index.js +320 -0
- package/src/providers/anthropic.js +117 -0
- package/src/providers/base.js +71 -0
- package/src/providers/deepseek.js +122 -0
- package/src/providers/openai.js +314 -0
- package/src/ui/README.md +86 -0
- package/src/ui/public/app.js +260 -0
- package/src/ui/public/index.html +53 -0
- package/src/ui/public/style.css +341 -0
- package/src/ui/server.js +230 -0
- package/src/ui/state.js +67 -0
- package/src/ui/watcher.js +85 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# LLM Abstraction Layer
|
|
2
|
+
|
|
3
|
+
This directory contains the unified LLM (Large Language Model) abstraction layer for the prompt orchestration pipeline.
|
|
4
|
+
|
|
5
|
+
## Purpose
|
|
6
|
+
|
|
7
|
+
The LLM layer provides a consistent interface for interacting with multiple AI providers (OpenAI, DeepSeek, Anthropic) while handling:
|
|
8
|
+
|
|
9
|
+
- Provider routing and API key validation
|
|
10
|
+
- Request/response formatting
|
|
11
|
+
- Token usage tracking and cost calculation
|
|
12
|
+
- Event-based metrics collection
|
|
13
|
+
- Error handling and retries
|
|
14
|
+
- Multi-turn conversation chains
|
|
15
|
+
|
|
16
|
+
## Architecture
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
src/
|
|
20
|
+
├── llm/
|
|
21
|
+
│ ├── index.js ← Main LLM abstraction layer (THIS FILE)
|
|
22
|
+
│ └── README.md ← This documentation
|
|
23
|
+
└── providers/
|
|
24
|
+
├── base.js ← Base provider class
|
|
25
|
+
├── openai.js ← OpenAI implementation
|
|
26
|
+
├── deepseek.js ← DeepSeek implementation
|
|
27
|
+
└── anthropic.js ← Anthropic implementation
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Core Functions
|
|
31
|
+
|
|
32
|
+
### `chat(options)`
|
|
33
|
+
|
|
34
|
+
Main function for LLM interactions. Supports multiple providers and models.
|
|
35
|
+
|
|
36
|
+
```javascript
|
|
37
|
+
import { chat } from "../llm/index.js";
|
|
38
|
+
|
|
39
|
+
const response = await chat({
|
|
40
|
+
provider: "openai", // or "deepseek", "anthropic"
|
|
41
|
+
model: "gpt-5-chat-latest",
|
|
42
|
+
messages: [
|
|
43
|
+
{ role: "system", content: "You are a helpful assistant" },
|
|
44
|
+
{ role: "user", content: "Hello!" },
|
|
45
|
+
],
|
|
46
|
+
temperature: 0.7,
|
|
47
|
+
maxTokens: 1000,
|
|
48
|
+
metadata: { taskId: "task-123" }, // Optional tracking data
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
console.log(response.content); // AI response text
|
|
52
|
+
console.log(response.usage); // Token usage stats
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### `complete(prompt, options)`
|
|
56
|
+
|
|
57
|
+
Convenience function for simple single-turn completions.
|
|
58
|
+
|
|
59
|
+
```javascript
|
|
60
|
+
import { complete } from "../llm/index.js";
|
|
61
|
+
|
|
62
|
+
const response = await complete("What is 2+2?", {
|
|
63
|
+
provider: "openai",
|
|
64
|
+
model: "gpt-5-chat-latest",
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
console.log(response.content); // "4"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### `createLLM(options)`
|
|
71
|
+
|
|
72
|
+
Factory function to create a bound LLM interface with default settings.
|
|
73
|
+
|
|
74
|
+
```javascript
|
|
75
|
+
import { createLLM } from "../llm/index.js";
|
|
76
|
+
|
|
77
|
+
const llm = createLLM({
|
|
78
|
+
defaultProvider: "openai",
|
|
79
|
+
defaultModel: "gpt-5-chat-latest",
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
// Use the bound interface
|
|
83
|
+
const response = await llm.chat({
|
|
84
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Or use convenience methods
|
|
88
|
+
const result = await llm.complete("What is AI?");
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### `createChain()`
|
|
92
|
+
|
|
93
|
+
Create a multi-turn conversation chain.
|
|
94
|
+
|
|
95
|
+
```javascript
|
|
96
|
+
import { createChain } from "../llm/index.js";
|
|
97
|
+
|
|
98
|
+
const chain = createChain();
|
|
99
|
+
|
|
100
|
+
chain
|
|
101
|
+
.addSystemMessage("You are a helpful math tutor")
|
|
102
|
+
.addUserMessage("What is 2+2?");
|
|
103
|
+
|
|
104
|
+
const response1 = await chain.execute({ provider: "openai" });
|
|
105
|
+
console.log(response1.content); // "4"
|
|
106
|
+
|
|
107
|
+
chain.addUserMessage("What about 3+3?");
|
|
108
|
+
const response2 = await chain.execute({ provider: "openai" });
|
|
109
|
+
console.log(response2.content); // "6"
|
|
110
|
+
|
|
111
|
+
// Get full conversation history
|
|
112
|
+
const history = chain.getMessages();
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### `withRetry(fn, args, maxRetries, backoffMs)`
|
|
116
|
+
|
|
117
|
+
Retry wrapper with exponential backoff.
|
|
118
|
+
|
|
119
|
+
```javascript
|
|
120
|
+
import { withRetry, chat } from "../llm/index.js";
|
|
121
|
+
|
|
122
|
+
const response = await withRetry(
|
|
123
|
+
chat,
|
|
124
|
+
[
|
|
125
|
+
{
|
|
126
|
+
provider: "openai",
|
|
127
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
3, // max retries
|
|
131
|
+
1000 // initial backoff in ms
|
|
132
|
+
);
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### `parallel(fn, items, maxConcurrency)`
|
|
136
|
+
|
|
137
|
+
Execute multiple LLM requests in parallel with concurrency control.
|
|
138
|
+
|
|
139
|
+
```javascript
|
|
140
|
+
import { parallel, chat } from "../llm/index.js";
|
|
141
|
+
|
|
142
|
+
const prompts = ["What is AI?", "What is ML?", "What is DL?"];
|
|
143
|
+
|
|
144
|
+
const responses = await parallel(
|
|
145
|
+
(prompt) =>
|
|
146
|
+
chat({
|
|
147
|
+
provider: "openai",
|
|
148
|
+
messages: [{ role: "user", content: prompt }],
|
|
149
|
+
}),
|
|
150
|
+
prompts,
|
|
151
|
+
5 // max concurrent requests
|
|
152
|
+
);
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Utility Functions
|
|
156
|
+
|
|
157
|
+
### `getAvailableProviders()`
|
|
158
|
+
|
|
159
|
+
Check which providers have API keys configured.
|
|
160
|
+
|
|
161
|
+
```javascript
|
|
162
|
+
import { getAvailableProviders } from "../llm/index.js";
|
|
163
|
+
|
|
164
|
+
const available = getAvailableProviders();
|
|
165
|
+
// { openai: true, deepseek: false, anthropic: true }
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### `calculateCost(provider, model, usage)`
|
|
169
|
+
|
|
170
|
+
Calculate the cost of an LLM request based on token usage.
|
|
171
|
+
|
|
172
|
+
```javascript
|
|
173
|
+
import { calculateCost } from "../llm/index.js";
|
|
174
|
+
|
|
175
|
+
const cost = calculateCost("openai", "gpt-5-chat-latest", {
|
|
176
|
+
promptTokens: 100,
|
|
177
|
+
completionTokens: 50,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
console.log(`Cost: $${cost.toFixed(4)}`);
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### `estimateTokens(text)`
|
|
184
|
+
|
|
185
|
+
Rough estimation of token count for a text string.
|
|
186
|
+
|
|
187
|
+
```javascript
|
|
188
|
+
import { estimateTokens } from "../llm/index.js";
|
|
189
|
+
|
|
190
|
+
const tokens = estimateTokens("Hello, world!");
|
|
191
|
+
console.log(`Estimated tokens: ${tokens}`);
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Event System
|
|
195
|
+
|
|
196
|
+
The LLM layer emits events for monitoring and metrics collection.
|
|
197
|
+
|
|
198
|
+
```javascript
|
|
199
|
+
import { getLLMEvents } from "../llm/index.js";
|
|
200
|
+
|
|
201
|
+
const events = getLLMEvents();
|
|
202
|
+
|
|
203
|
+
// Listen for request start
|
|
204
|
+
events.on("llm:request:start", (data) => {
|
|
205
|
+
console.log(`Request ${data.id} started`);
|
|
206
|
+
console.log(`Provider: ${data.provider}, Model: ${data.model}`);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// Listen for successful completion
|
|
210
|
+
events.on("llm:request:complete", (data) => {
|
|
211
|
+
console.log(`Request ${data.id} completed in ${data.duration}ms`);
|
|
212
|
+
console.log(`Tokens: ${data.totalTokens}, Cost: $${data.cost}`);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
// Listen for errors
|
|
216
|
+
events.on("llm:request:error", (data) => {
|
|
217
|
+
console.error(`Request ${data.id} failed: ${data.error}`);
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Provider Configuration
|
|
222
|
+
|
|
223
|
+
### Environment Variables
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
# OpenAI
|
|
227
|
+
export OPENAI_API_KEY="sk-..."
|
|
228
|
+
|
|
229
|
+
# DeepSeek
|
|
230
|
+
export DEEPSEEK_API_KEY="..."
|
|
231
|
+
|
|
232
|
+
# Anthropic
|
|
233
|
+
export ANTHROPIC_API_KEY="..."
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Supported Models
|
|
237
|
+
|
|
238
|
+
**OpenAI:**
|
|
239
|
+
|
|
240
|
+
- `gpt-5-chat-latest` (default)
|
|
241
|
+
- `gpt-5-chat-preview`
|
|
242
|
+
- `gpt-4-turbo-preview`
|
|
243
|
+
- `gpt-4`
|
|
244
|
+
- `gpt-3.5-turbo`
|
|
245
|
+
|
|
246
|
+
**DeepSeek:**
|
|
247
|
+
|
|
248
|
+
- `deepseek-reasoner` (default)
|
|
249
|
+
- `deepseek-chat`
|
|
250
|
+
- `deepseek-coder`
|
|
251
|
+
|
|
252
|
+
**Anthropic:**
|
|
253
|
+
|
|
254
|
+
- `claude-3-opus-20240229`
|
|
255
|
+
- `claude-3-sonnet-20240229`
|
|
256
|
+
- `claude-3-haiku-20240307`
|
|
257
|
+
|
|
258
|
+
## Error Handling
|
|
259
|
+
|
|
260
|
+
The LLM layer handles various error scenarios:
|
|
261
|
+
|
|
262
|
+
```javascript
|
|
263
|
+
try {
|
|
264
|
+
const response = await chat({
|
|
265
|
+
provider: "openai",
|
|
266
|
+
messages: [{ role: "user", content: "Hello!" }],
|
|
267
|
+
});
|
|
268
|
+
} catch (error) {
|
|
269
|
+
if (error.status === 401) {
|
|
270
|
+
console.error("Invalid API key");
|
|
271
|
+
} else if (error.message?.includes("rate limit")) {
|
|
272
|
+
console.error("Rate limit exceeded");
|
|
273
|
+
} else {
|
|
274
|
+
console.error("Request failed:", error.message);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
## Best Practices
|
|
280
|
+
|
|
281
|
+
1. **Use `createLLM()` for consistent settings:**
|
|
282
|
+
|
|
283
|
+
```javascript
|
|
284
|
+
const llm = createLLM({ defaultProvider: "openai" });
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
2. **Add metadata for tracking:**
|
|
288
|
+
|
|
289
|
+
```javascript
|
|
290
|
+
await chat({
|
|
291
|
+
messages: [...],
|
|
292
|
+
metadata: { taskId: "task-123", userId: "user-456" },
|
|
293
|
+
});
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
3. **Use chains for multi-turn conversations:**
|
|
297
|
+
|
|
298
|
+
```javascript
|
|
299
|
+
const chain = createChain();
|
|
300
|
+
// Maintains conversation history automatically
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
4. **Implement retries for reliability:**
|
|
304
|
+
|
|
305
|
+
```javascript
|
|
306
|
+
await withRetry(chat, [options], 3, 1000);
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
5. **Control concurrency for batch operations:**
|
|
310
|
+
```javascript
|
|
311
|
+
await parallel(fn, items, 5); // Max 5 concurrent requests
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
## Testing
|
|
315
|
+
|
|
316
|
+
See `tests/llm.test.js` for comprehensive test coverage including:
|
|
317
|
+
|
|
318
|
+
- Provider routing and validation
|
|
319
|
+
- Token usage tracking
|
|
320
|
+
- Cost calculation
|
|
321
|
+
- Event emission
|
|
322
|
+
- Error handling
|
|
323
|
+
- Retry logic
|
|
324
|
+
- Parallel execution
|
|
325
|
+
|
|
326
|
+
## Migration from Legacy Code
|
|
327
|
+
|
|
328
|
+
If you're migrating from the old `src/providers/index.js` (now removed):
|
|
329
|
+
|
|
330
|
+
```javascript
|
|
331
|
+
// OLD (removed):
|
|
332
|
+
import { chat } from "../providers/index.js";
|
|
333
|
+
|
|
334
|
+
// NEW (correct):
|
|
335
|
+
import { chat } from "../llm/index.js";
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
All exports are compatible - this is a drop-in replacement.
|
|
339
|
+
|
|
340
|
+
## Related Documentation
|
|
341
|
+
|
|
342
|
+
- **Architecture:** `docs/architecture.md`
|
|
343
|
+
- **Provider Implementations:** `src/providers/`
|
|
344
|
+
- **Test Coverage:** `tests/llm.test.js`
|
|
345
|
+
- **Provider Fix Documentation:** `docs/providers-fix.md`
|
package/src/llm/index.js
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import { openaiChat } from "../providers/openai.js";
|
|
2
|
+
import { deepseekChat } from "../providers/deepseek.js";
|
|
3
|
+
import { EventEmitter } from "node:events";
|
|
4
|
+
import { getConfig } from "../core/config.js";
|
|
5
|
+
|
|
6
|
+
// Global mock provider instance (for demo/testing)
|
|
7
|
+
let mockProviderInstance = null;
|
|
8
|
+
|
|
9
|
+
// Global event bus for LLM metrics
|
|
10
|
+
const llmEvents = new EventEmitter();
|
|
11
|
+
export const getLLMEvents = () => llmEvents;
|
|
12
|
+
|
|
13
|
+
// Register mock provider for demo/testing
|
|
14
|
+
export function registerMockProvider(provider) {
|
|
15
|
+
mockProviderInstance = provider;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Check available providers
|
|
19
|
+
export function getAvailableProviders() {
|
|
20
|
+
return {
|
|
21
|
+
openai: !!process.env.OPENAI_API_KEY,
|
|
22
|
+
deepseek: !!process.env.DEEPSEEK_API_KEY,
|
|
23
|
+
anthropic: !!process.env.ANTHROPIC_API_KEY,
|
|
24
|
+
mock: !!mockProviderInstance,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Simple token estimation
|
|
29
|
+
export function estimateTokens(text) {
|
|
30
|
+
return Math.ceil((text || "").length / 4);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Calculate cost based on provider and model
|
|
34
|
+
export function calculateCost(provider, model, usage) {
|
|
35
|
+
const pricing = {
|
|
36
|
+
mock: {
|
|
37
|
+
"gpt-3.5-turbo": { prompt: 0.0005, completion: 0.0015 },
|
|
38
|
+
"gpt-4": { prompt: 0.03, completion: 0.06 },
|
|
39
|
+
"gpt-4-turbo": { prompt: 0.01, completion: 0.03 },
|
|
40
|
+
},
|
|
41
|
+
openai: {
|
|
42
|
+
"gpt-5-chat-latest": { prompt: 0.015, completion: 0.06 },
|
|
43
|
+
"gpt-4": { prompt: 0.03, completion: 0.06 },
|
|
44
|
+
"gpt-4-turbo": { prompt: 0.01, completion: 0.03 },
|
|
45
|
+
"gpt-3.5-turbo": { prompt: 0.0005, completion: 0.0015 },
|
|
46
|
+
},
|
|
47
|
+
deepseek: {
|
|
48
|
+
"deepseek-reasoner": { prompt: 0.001, completion: 0.002 },
|
|
49
|
+
"deepseek-chat": { prompt: 0.0005, completion: 0.001 },
|
|
50
|
+
},
|
|
51
|
+
anthropic: {
|
|
52
|
+
"claude-3-opus": { prompt: 0.015, completion: 0.075 },
|
|
53
|
+
"claude-3-sonnet": { prompt: 0.003, completion: 0.015 },
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const modelPricing = pricing[provider]?.[model];
|
|
58
|
+
if (!modelPricing || !usage) return 0;
|
|
59
|
+
|
|
60
|
+
const promptCost = ((usage.promptTokens || 0) / 1000) * modelPricing.prompt;
|
|
61
|
+
const completionCost =
|
|
62
|
+
((usage.completionTokens || 0) / 1000) * modelPricing.completion;
|
|
63
|
+
|
|
64
|
+
return promptCost + completionCost;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Main chat function - no metrics handling needed!
|
|
68
|
+
export async function chat(options) {
|
|
69
|
+
const {
|
|
70
|
+
provider = "openai",
|
|
71
|
+
model,
|
|
72
|
+
messages = [],
|
|
73
|
+
temperature,
|
|
74
|
+
maxTokens,
|
|
75
|
+
metadata = {},
|
|
76
|
+
...rest
|
|
77
|
+
} = options;
|
|
78
|
+
|
|
79
|
+
const available = getAvailableProviders();
|
|
80
|
+
|
|
81
|
+
if (!available[provider]) {
|
|
82
|
+
throw new Error(`Provider ${provider} not available. Check API keys.`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const startTime = Date.now();
|
|
86
|
+
const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
87
|
+
|
|
88
|
+
// Extract system and user messages
|
|
89
|
+
const systemMsg = messages.find((m) => m.role === "system")?.content || "";
|
|
90
|
+
const userMessages = messages.filter((m) => m.role === "user");
|
|
91
|
+
const userMsg = userMessages.map((m) => m.content).join("\n");
|
|
92
|
+
|
|
93
|
+
// Emit request start event
|
|
94
|
+
llmEvents.emit("llm:request:start", {
|
|
95
|
+
id: requestId,
|
|
96
|
+
provider,
|
|
97
|
+
model,
|
|
98
|
+
metadata,
|
|
99
|
+
timestamp: new Date().toISOString(),
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
let response;
|
|
104
|
+
let usage;
|
|
105
|
+
|
|
106
|
+
if (provider === "mock") {
|
|
107
|
+
if (!mockProviderInstance) {
|
|
108
|
+
throw new Error(
|
|
109
|
+
"Mock provider not registered. Call registerMockProvider() first."
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const result = await mockProviderInstance.chat({
|
|
114
|
+
messages,
|
|
115
|
+
model: model || "gpt-3.5-turbo",
|
|
116
|
+
temperature,
|
|
117
|
+
maxTokens,
|
|
118
|
+
...rest,
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
response = {
|
|
122
|
+
content: result.content,
|
|
123
|
+
raw: result.raw,
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
usage = {
|
|
127
|
+
promptTokens: result.usage.prompt_tokens,
|
|
128
|
+
completionTokens: result.usage.completion_tokens,
|
|
129
|
+
totalTokens: result.usage.total_tokens,
|
|
130
|
+
};
|
|
131
|
+
} else if (provider === "openai") {
|
|
132
|
+
const result = await openaiChat({
|
|
133
|
+
messages,
|
|
134
|
+
model: model || "gpt-5-chat-latest",
|
|
135
|
+
maxTokens,
|
|
136
|
+
temperature,
|
|
137
|
+
...rest,
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
response = {
|
|
141
|
+
content: typeof result === "string" ? result : JSON.stringify(result),
|
|
142
|
+
raw: result,
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// Estimate tokens since GPT-5 responses API might not return usage
|
|
146
|
+
const promptTokens = estimateTokens(systemMsg + userMsg);
|
|
147
|
+
const completionTokens = estimateTokens(response.content);
|
|
148
|
+
usage = {
|
|
149
|
+
promptTokens,
|
|
150
|
+
completionTokens,
|
|
151
|
+
totalTokens: promptTokens + completionTokens,
|
|
152
|
+
};
|
|
153
|
+
} else if (provider === "deepseek") {
|
|
154
|
+
const result = await deepseekChat(
|
|
155
|
+
systemMsg,
|
|
156
|
+
userMsg,
|
|
157
|
+
model || "deepseek-reasoner"
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
response = {
|
|
161
|
+
content: typeof result === "string" ? result : JSON.stringify(result),
|
|
162
|
+
raw: result,
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
const promptTokens = estimateTokens(systemMsg + userMsg);
|
|
166
|
+
const completionTokens = estimateTokens(response.content);
|
|
167
|
+
usage = {
|
|
168
|
+
promptTokens,
|
|
169
|
+
completionTokens,
|
|
170
|
+
totalTokens: promptTokens + completionTokens,
|
|
171
|
+
};
|
|
172
|
+
} else {
|
|
173
|
+
throw new Error(`Provider ${provider} not yet implemented`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const duration = Date.now() - startTime;
|
|
177
|
+
const cost = calculateCost(provider, model, usage);
|
|
178
|
+
|
|
179
|
+
// Emit success event with metrics
|
|
180
|
+
llmEvents.emit("llm:request:complete", {
|
|
181
|
+
id: requestId,
|
|
182
|
+
provider,
|
|
183
|
+
model,
|
|
184
|
+
duration,
|
|
185
|
+
...usage,
|
|
186
|
+
cost,
|
|
187
|
+
metadata,
|
|
188
|
+
timestamp: new Date().toISOString(),
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
// Return clean response - no metrics attached!
|
|
192
|
+
return {
|
|
193
|
+
...response,
|
|
194
|
+
usage,
|
|
195
|
+
};
|
|
196
|
+
} catch (error) {
|
|
197
|
+
const duration = Date.now() - startTime;
|
|
198
|
+
|
|
199
|
+
// Emit error event
|
|
200
|
+
llmEvents.emit("llm:request:error", {
|
|
201
|
+
id: requestId,
|
|
202
|
+
provider,
|
|
203
|
+
model,
|
|
204
|
+
duration,
|
|
205
|
+
error: error.message,
|
|
206
|
+
metadata,
|
|
207
|
+
timestamp: new Date().toISOString(),
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
throw error;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Convenience function for simple completions
|
|
215
|
+
export async function complete(prompt, options = {}) {
|
|
216
|
+
return chat({
|
|
217
|
+
...options,
|
|
218
|
+
messages: [{ role: "user", content: prompt }],
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Create a chain for multi-turn conversations
|
|
223
|
+
export function createChain() {
|
|
224
|
+
const messages = [];
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
addSystemMessage: function (content) {
|
|
228
|
+
messages.push({ role: "system", content });
|
|
229
|
+
return this;
|
|
230
|
+
},
|
|
231
|
+
|
|
232
|
+
addUserMessage: function (content) {
|
|
233
|
+
messages.push({ role: "user", content });
|
|
234
|
+
return this;
|
|
235
|
+
},
|
|
236
|
+
|
|
237
|
+
addAssistantMessage: function (content) {
|
|
238
|
+
messages.push({ role: "assistant", content });
|
|
239
|
+
return this;
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
execute: async function (options = {}) {
|
|
243
|
+
const response = await chat({ ...options, messages });
|
|
244
|
+
messages.push({
|
|
245
|
+
role: "assistant",
|
|
246
|
+
content: response.content,
|
|
247
|
+
});
|
|
248
|
+
return response;
|
|
249
|
+
},
|
|
250
|
+
|
|
251
|
+
getMessages: () => [...messages],
|
|
252
|
+
|
|
253
|
+
clear: function () {
|
|
254
|
+
messages.length = 0;
|
|
255
|
+
return this;
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Retry wrapper
|
|
261
|
+
export async function withRetry(fn, args = [], options = {}) {
|
|
262
|
+
const config = getConfig();
|
|
263
|
+
const maxRetries = options.maxRetries ?? config.llm.retryMaxAttempts;
|
|
264
|
+
const backoffMs = options.backoffMs ?? config.llm.retryBackoffMs;
|
|
265
|
+
|
|
266
|
+
let lastError;
|
|
267
|
+
|
|
268
|
+
for (let i = 0; i <= maxRetries; i++) {
|
|
269
|
+
try {
|
|
270
|
+
if (i > 0) {
|
|
271
|
+
await new Promise((r) => setTimeout(r, backoffMs * Math.pow(2, i - 1)));
|
|
272
|
+
}
|
|
273
|
+
return await fn(...args);
|
|
274
|
+
} catch (error) {
|
|
275
|
+
lastError = error;
|
|
276
|
+
// Don't retry auth errors
|
|
277
|
+
if (error.status === 401 || error.message?.includes("API key")) {
|
|
278
|
+
throw error;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
throw lastError;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Parallel execution with concurrency control
|
|
287
|
+
export async function parallel(fn, items, maxConcurrency) {
|
|
288
|
+
const config = getConfig();
|
|
289
|
+
const concurrency = maxConcurrency ?? config.llm.maxConcurrency;
|
|
290
|
+
|
|
291
|
+
const results = [];
|
|
292
|
+
for (let i = 0; i < items.length; i += concurrency) {
|
|
293
|
+
const batch = items.slice(i, i + concurrency);
|
|
294
|
+
const batchResults = await Promise.all(batch.map((item) => fn(item)));
|
|
295
|
+
results.push(...batchResults);
|
|
296
|
+
}
|
|
297
|
+
return results;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Create a bound LLM interface (no metrics handling needed!)
|
|
301
|
+
export function createLLM(options = {}) {
|
|
302
|
+
const config = getConfig();
|
|
303
|
+
const defaultProvider = options.defaultProvider || config.llm.defaultProvider;
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
chat: (opts) => chat({ provider: defaultProvider, ...opts }),
|
|
307
|
+
complete: (prompt, opts) =>
|
|
308
|
+
complete(prompt, { provider: defaultProvider, ...opts }),
|
|
309
|
+
createChain: () => createChain(),
|
|
310
|
+
withRetry: (opts) =>
|
|
311
|
+
withRetry(chat, [{ provider: defaultProvider, ...opts }]),
|
|
312
|
+
parallel: (requests, maxConcurrency) =>
|
|
313
|
+
parallel(
|
|
314
|
+
(req) => chat({ provider: defaultProvider, ...req }),
|
|
315
|
+
requests,
|
|
316
|
+
maxConcurrency
|
|
317
|
+
),
|
|
318
|
+
getAvailableProviders,
|
|
319
|
+
};
|
|
320
|
+
}
|