@ryanfw/prompt-orchestration-pipeline 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,345 @@
1
+ # LLM Abstraction Layer
2
+
3
+ This directory contains the unified LLM (Large Language Model) abstraction layer for the prompt orchestration pipeline.
4
+
5
+ ## Purpose
6
+
7
+ The LLM layer provides a consistent interface for interacting with multiple AI providers (OpenAI, DeepSeek, Anthropic) while handling:
8
+
9
+ - Provider routing and API key validation
10
+ - Request/response formatting
11
+ - Token usage tracking and cost calculation
12
+ - Event-based metrics collection
13
+ - Error handling and retries
14
+ - Multi-turn conversation chains
15
+
16
+ ## Architecture
17
+
18
+ ```
19
+ src/
20
+ ├── llm/
21
+ │ ├── index.js ← Main LLM abstraction layer (THIS FILE)
22
+ │ └── README.md ← This documentation
23
+ └── providers/
24
+ ├── base.js ← Base provider class
25
+ ├── openai.js ← OpenAI implementation
26
+ ├── deepseek.js ← DeepSeek implementation
27
+ └── anthropic.js ← Anthropic implementation
28
+ ```
29
+
30
+ ## Core Functions
31
+
32
+ ### `chat(options)`
33
+
34
+ Main function for LLM interactions. Supports multiple providers and models.
35
+
36
+ ```javascript
37
+ import { chat } from "../llm/index.js";
38
+
39
+ const response = await chat({
40
+ provider: "openai", // or "deepseek", "anthropic"
41
+ model: "gpt-5-chat-latest",
42
+ messages: [
43
+ { role: "system", content: "You are a helpful assistant" },
44
+ { role: "user", content: "Hello!" },
45
+ ],
46
+ temperature: 0.7,
47
+ maxTokens: 1000,
48
+ metadata: { taskId: "task-123" }, // Optional tracking data
49
+ });
50
+
51
+ console.log(response.content); // AI response text
52
+ console.log(response.usage); // Token usage stats
53
+ ```
54
+
55
+ ### `complete(prompt, options)`
56
+
57
+ Convenience function for simple single-turn completions.
58
+
59
+ ```javascript
60
+ import { complete } from "../llm/index.js";
61
+
62
+ const response = await complete("What is 2+2?", {
63
+ provider: "openai",
64
+ model: "gpt-5-chat-latest",
65
+ });
66
+
67
+ console.log(response.content); // "4"
68
+ ```
69
+
70
+ ### `createLLM(options)`
71
+
72
+ Factory function to create a bound LLM interface with default settings.
73
+
74
+ ```javascript
75
+ import { createLLM } from "../llm/index.js";
76
+
77
+ const llm = createLLM({
78
+ defaultProvider: "openai",
79
+ defaultModel: "gpt-5-chat-latest",
80
+ });
81
+
82
+ // Use the bound interface
83
+ const response = await llm.chat({
84
+ messages: [{ role: "user", content: "Hello!" }],
85
+ });
86
+
87
+ // Or use convenience methods
88
+ const result = await llm.complete("What is AI?");
89
+ ```
90
+
91
+ ### `createChain()`
92
+
93
+ Create a multi-turn conversation chain.
94
+
95
+ ```javascript
96
+ import { createChain } from "../llm/index.js";
97
+
98
+ const chain = createChain();
99
+
100
+ chain
101
+ .addSystemMessage("You are a helpful math tutor")
102
+ .addUserMessage("What is 2+2?");
103
+
104
+ const response1 = await chain.execute({ provider: "openai" });
105
+ console.log(response1.content); // "4"
106
+
107
+ chain.addUserMessage("What about 3+3?");
108
+ const response2 = await chain.execute({ provider: "openai" });
109
+ console.log(response2.content); // "6"
110
+
111
+ // Get full conversation history
112
+ const history = chain.getMessages();
113
+ ```
114
+
115
+ ### `withRetry(fn, args, maxRetries, backoffMs)`
116
+
117
+ Retry wrapper with exponential backoff.
118
+
119
+ ```javascript
120
+ import { withRetry, chat } from "../llm/index.js";
121
+
122
+ const response = await withRetry(
123
+ chat,
124
+ [
125
+ {
126
+ provider: "openai",
127
+ messages: [{ role: "user", content: "Hello!" }],
128
+ },
129
+ ],
130
+ 3, // max retries
131
+ 1000 // initial backoff in ms
132
+ );
133
+ ```
134
+
135
+ ### `parallel(fn, items, maxConcurrency)`
136
+
137
+ Execute multiple LLM requests in parallel with concurrency control.
138
+
139
+ ```javascript
140
+ import { parallel, chat } from "../llm/index.js";
141
+
142
+ const prompts = ["What is AI?", "What is ML?", "What is DL?"];
143
+
144
+ const responses = await parallel(
145
+ (prompt) =>
146
+ chat({
147
+ provider: "openai",
148
+ messages: [{ role: "user", content: prompt }],
149
+ }),
150
+ prompts,
151
+ 5 // max concurrent requests
152
+ );
153
+ ```
154
+
155
+ ## Utility Functions
156
+
157
+ ### `getAvailableProviders()`
158
+
159
+ Check which providers have API keys configured.
160
+
161
+ ```javascript
162
+ import { getAvailableProviders } from "../llm/index.js";
163
+
164
+ const available = getAvailableProviders();
165
+ // { openai: true, deepseek: false, anthropic: true }
166
+ ```
167
+
168
+ ### `calculateCost(provider, model, usage)`
169
+
170
+ Calculate the cost of an LLM request based on token usage.
171
+
172
+ ```javascript
173
+ import { calculateCost } from "../llm/index.js";
174
+
175
+ const cost = calculateCost("openai", "gpt-5-chat-latest", {
176
+ promptTokens: 100,
177
+ completionTokens: 50,
178
+ });
179
+
180
+ console.log(`Cost: $${cost.toFixed(4)}`);
181
+ ```
182
+
183
+ ### `estimateTokens(text)`
184
+
185
+ Rough estimation of token count for a text string.
186
+
187
+ ```javascript
188
+ import { estimateTokens } from "../llm/index.js";
189
+
190
+ const tokens = estimateTokens("Hello, world!");
191
+ console.log(`Estimated tokens: ${tokens}`);
192
+ ```
193
+
194
+ ## Event System
195
+
196
+ The LLM layer emits events for monitoring and metrics collection.
197
+
198
+ ```javascript
199
+ import { getLLMEvents } from "../llm/index.js";
200
+
201
+ const events = getLLMEvents();
202
+
203
+ // Listen for request start
204
+ events.on("llm:request:start", (data) => {
205
+ console.log(`Request ${data.id} started`);
206
+ console.log(`Provider: ${data.provider}, Model: ${data.model}`);
207
+ });
208
+
209
+ // Listen for successful completion
210
+ events.on("llm:request:complete", (data) => {
211
+ console.log(`Request ${data.id} completed in ${data.duration}ms`);
212
+ console.log(`Tokens: ${data.totalTokens}, Cost: $${data.cost}`);
213
+ });
214
+
215
+ // Listen for errors
216
+ events.on("llm:request:error", (data) => {
217
+ console.error(`Request ${data.id} failed: ${data.error}`);
218
+ });
219
+ ```
220
+
221
+ ## Provider Configuration
222
+
223
+ ### Environment Variables
224
+
225
+ ```bash
226
+ # OpenAI
227
+ export OPENAI_API_KEY="sk-..."
228
+
229
+ # DeepSeek
230
+ export DEEPSEEK_API_KEY="..."
231
+
232
+ # Anthropic
233
+ export ANTHROPIC_API_KEY="..."
234
+ ```
235
+
236
+ ### Supported Models
237
+
238
+ **OpenAI:**
239
+
240
+ - `gpt-5-chat-latest` (default)
241
+ - `gpt-5-chat-preview`
242
+ - `gpt-4-turbo-preview`
243
+ - `gpt-4`
244
+ - `gpt-3.5-turbo`
245
+
246
+ **DeepSeek:**
247
+
248
+ - `deepseek-reasoner` (default)
249
+ - `deepseek-chat`
250
+ - `deepseek-coder`
251
+
252
+ **Anthropic:**
253
+
254
+ - `claude-3-opus-20240229`
255
+ - `claude-3-sonnet-20240229`
256
+ - `claude-3-haiku-20240307`
257
+
258
+ ## Error Handling
259
+
260
+ The LLM layer handles various error scenarios:
261
+
262
+ ```javascript
263
+ try {
264
+ const response = await chat({
265
+ provider: "openai",
266
+ messages: [{ role: "user", content: "Hello!" }],
267
+ });
268
+ } catch (error) {
269
+ if (error.status === 401) {
270
+ console.error("Invalid API key");
271
+ } else if (error.message?.includes("rate limit")) {
272
+ console.error("Rate limit exceeded");
273
+ } else {
274
+ console.error("Request failed:", error.message);
275
+ }
276
+ }
277
+ ```
278
+
279
+ ## Best Practices
280
+
281
+ 1. **Use `createLLM()` for consistent settings:**
282
+
283
+ ```javascript
284
+ const llm = createLLM({ defaultProvider: "openai" });
285
+ ```
286
+
287
+ 2. **Add metadata for tracking:**
288
+
289
+ ```javascript
290
+ await chat({
291
+ messages: [...],
292
+ metadata: { taskId: "task-123", userId: "user-456" },
293
+ });
294
+ ```
295
+
296
+ 3. **Use chains for multi-turn conversations:**
297
+
298
+ ```javascript
299
+ const chain = createChain();
300
+ // Maintains conversation history automatically
301
+ ```
302
+
303
+ 4. **Implement retries for reliability:**
304
+
305
+ ```javascript
306
+ await withRetry(chat, [options], 3, 1000);
307
+ ```
308
+
309
+ 5. **Control concurrency for batch operations:**
310
+ ```javascript
311
+ await parallel(fn, items, 5); // Max 5 concurrent requests
312
+ ```
313
+
314
+ ## Testing
315
+
316
+ See `tests/llm.test.js` for comprehensive test coverage including:
317
+
318
+ - Provider routing and validation
319
+ - Token usage tracking
320
+ - Cost calculation
321
+ - Event emission
322
+ - Error handling
323
+ - Retry logic
324
+ - Parallel execution
325
+
326
+ ## Migration from Legacy Code
327
+
328
+ If you're migrating from the old `src/providers/index.js` (now removed):
329
+
330
+ ```javascript
331
+ // OLD (removed):
332
+ import { chat } from "../providers/index.js";
333
+
334
+ // NEW (correct):
335
+ import { chat } from "../llm/index.js";
336
+ ```
337
+
338
+ All exports are compatible - this is a drop-in replacement.
339
+
340
+ ## Related Documentation
341
+
342
+ - **Architecture:** `docs/architecture.md`
343
+ - **Provider Implementations:** `src/providers/`
344
+ - **Test Coverage:** `tests/llm.test.js`
345
+ - **Provider Fix Documentation:** `docs/providers-fix.md`
@@ -0,0 +1,320 @@
1
+ import { openaiChat } from "../providers/openai.js";
2
+ import { deepseekChat } from "../providers/deepseek.js";
3
+ import { EventEmitter } from "node:events";
4
+ import { getConfig } from "../core/config.js";
5
+
6
+ // Global mock provider instance (for demo/testing)
7
+ let mockProviderInstance = null;
8
+
9
+ // Global event bus for LLM metrics
10
+ const llmEvents = new EventEmitter();
11
+ export const getLLMEvents = () => llmEvents;
12
+
13
+ // Register mock provider for demo/testing
14
+ export function registerMockProvider(provider) {
15
+ mockProviderInstance = provider;
16
+ }
17
+
18
+ // Check available providers
19
+ export function getAvailableProviders() {
20
+ return {
21
+ openai: !!process.env.OPENAI_API_KEY,
22
+ deepseek: !!process.env.DEEPSEEK_API_KEY,
23
+ anthropic: !!process.env.ANTHROPIC_API_KEY,
24
+ mock: !!mockProviderInstance,
25
+ };
26
+ }
27
+
28
+ // Simple token estimation
29
+ export function estimateTokens(text) {
30
+ return Math.ceil((text || "").length / 4);
31
+ }
32
+
33
+ // Calculate cost based on provider and model
34
+ export function calculateCost(provider, model, usage) {
35
+ const pricing = {
36
+ mock: {
37
+ "gpt-3.5-turbo": { prompt: 0.0005, completion: 0.0015 },
38
+ "gpt-4": { prompt: 0.03, completion: 0.06 },
39
+ "gpt-4-turbo": { prompt: 0.01, completion: 0.03 },
40
+ },
41
+ openai: {
42
+ "gpt-5-chat-latest": { prompt: 0.015, completion: 0.06 },
43
+ "gpt-4": { prompt: 0.03, completion: 0.06 },
44
+ "gpt-4-turbo": { prompt: 0.01, completion: 0.03 },
45
+ "gpt-3.5-turbo": { prompt: 0.0005, completion: 0.0015 },
46
+ },
47
+ deepseek: {
48
+ "deepseek-reasoner": { prompt: 0.001, completion: 0.002 },
49
+ "deepseek-chat": { prompt: 0.0005, completion: 0.001 },
50
+ },
51
+ anthropic: {
52
+ "claude-3-opus": { prompt: 0.015, completion: 0.075 },
53
+ "claude-3-sonnet": { prompt: 0.003, completion: 0.015 },
54
+ },
55
+ };
56
+
57
+ const modelPricing = pricing[provider]?.[model];
58
+ if (!modelPricing || !usage) return 0;
59
+
60
+ const promptCost = ((usage.promptTokens || 0) / 1000) * modelPricing.prompt;
61
+ const completionCost =
62
+ ((usage.completionTokens || 0) / 1000) * modelPricing.completion;
63
+
64
+ return promptCost + completionCost;
65
+ }
66
+
67
+ // Main chat function - no metrics handling needed!
68
+ export async function chat(options) {
69
+ const {
70
+ provider = "openai",
71
+ model,
72
+ messages = [],
73
+ temperature,
74
+ maxTokens,
75
+ metadata = {},
76
+ ...rest
77
+ } = options;
78
+
79
+ const available = getAvailableProviders();
80
+
81
+ if (!available[provider]) {
82
+ throw new Error(`Provider ${provider} not available. Check API keys.`);
83
+ }
84
+
85
+ const startTime = Date.now();
86
+ const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(7)}`;
87
+
88
+ // Extract system and user messages
89
+ const systemMsg = messages.find((m) => m.role === "system")?.content || "";
90
+ const userMessages = messages.filter((m) => m.role === "user");
91
+ const userMsg = userMessages.map((m) => m.content).join("\n");
92
+
93
+ // Emit request start event
94
+ llmEvents.emit("llm:request:start", {
95
+ id: requestId,
96
+ provider,
97
+ model,
98
+ metadata,
99
+ timestamp: new Date().toISOString(),
100
+ });
101
+
102
+ try {
103
+ let response;
104
+ let usage;
105
+
106
+ if (provider === "mock") {
107
+ if (!mockProviderInstance) {
108
+ throw new Error(
109
+ "Mock provider not registered. Call registerMockProvider() first."
110
+ );
111
+ }
112
+
113
+ const result = await mockProviderInstance.chat({
114
+ messages,
115
+ model: model || "gpt-3.5-turbo",
116
+ temperature,
117
+ maxTokens,
118
+ ...rest,
119
+ });
120
+
121
+ response = {
122
+ content: result.content,
123
+ raw: result.raw,
124
+ };
125
+
126
+ usage = {
127
+ promptTokens: result.usage.prompt_tokens,
128
+ completionTokens: result.usage.completion_tokens,
129
+ totalTokens: result.usage.total_tokens,
130
+ };
131
+ } else if (provider === "openai") {
132
+ const result = await openaiChat({
133
+ messages,
134
+ model: model || "gpt-5-chat-latest",
135
+ maxTokens,
136
+ temperature,
137
+ ...rest,
138
+ });
139
+
140
+ response = {
141
+ content: typeof result === "string" ? result : JSON.stringify(result),
142
+ raw: result,
143
+ };
144
+
145
+ // Estimate tokens since GPT-5 responses API might not return usage
146
+ const promptTokens = estimateTokens(systemMsg + userMsg);
147
+ const completionTokens = estimateTokens(response.content);
148
+ usage = {
149
+ promptTokens,
150
+ completionTokens,
151
+ totalTokens: promptTokens + completionTokens,
152
+ };
153
+ } else if (provider === "deepseek") {
154
+ const result = await deepseekChat(
155
+ systemMsg,
156
+ userMsg,
157
+ model || "deepseek-reasoner"
158
+ );
159
+
160
+ response = {
161
+ content: typeof result === "string" ? result : JSON.stringify(result),
162
+ raw: result,
163
+ };
164
+
165
+ const promptTokens = estimateTokens(systemMsg + userMsg);
166
+ const completionTokens = estimateTokens(response.content);
167
+ usage = {
168
+ promptTokens,
169
+ completionTokens,
170
+ totalTokens: promptTokens + completionTokens,
171
+ };
172
+ } else {
173
+ throw new Error(`Provider ${provider} not yet implemented`);
174
+ }
175
+
176
+ const duration = Date.now() - startTime;
177
+ const cost = calculateCost(provider, model, usage);
178
+
179
+ // Emit success event with metrics
180
+ llmEvents.emit("llm:request:complete", {
181
+ id: requestId,
182
+ provider,
183
+ model,
184
+ duration,
185
+ ...usage,
186
+ cost,
187
+ metadata,
188
+ timestamp: new Date().toISOString(),
189
+ });
190
+
191
+ // Return clean response - no metrics attached!
192
+ return {
193
+ ...response,
194
+ usage,
195
+ };
196
+ } catch (error) {
197
+ const duration = Date.now() - startTime;
198
+
199
+ // Emit error event
200
+ llmEvents.emit("llm:request:error", {
201
+ id: requestId,
202
+ provider,
203
+ model,
204
+ duration,
205
+ error: error.message,
206
+ metadata,
207
+ timestamp: new Date().toISOString(),
208
+ });
209
+
210
+ throw error;
211
+ }
212
+ }
213
+
214
+ // Convenience function for simple completions
215
+ export async function complete(prompt, options = {}) {
216
+ return chat({
217
+ ...options,
218
+ messages: [{ role: "user", content: prompt }],
219
+ });
220
+ }
221
+
222
+ // Create a chain for multi-turn conversations
223
+ export function createChain() {
224
+ const messages = [];
225
+
226
+ return {
227
+ addSystemMessage: function (content) {
228
+ messages.push({ role: "system", content });
229
+ return this;
230
+ },
231
+
232
+ addUserMessage: function (content) {
233
+ messages.push({ role: "user", content });
234
+ return this;
235
+ },
236
+
237
+ addAssistantMessage: function (content) {
238
+ messages.push({ role: "assistant", content });
239
+ return this;
240
+ },
241
+
242
+ execute: async function (options = {}) {
243
+ const response = await chat({ ...options, messages });
244
+ messages.push({
245
+ role: "assistant",
246
+ content: response.content,
247
+ });
248
+ return response;
249
+ },
250
+
251
+ getMessages: () => [...messages],
252
+
253
+ clear: function () {
254
+ messages.length = 0;
255
+ return this;
256
+ },
257
+ };
258
+ }
259
+
260
+ // Retry wrapper
261
+ export async function withRetry(fn, args = [], options = {}) {
262
+ const config = getConfig();
263
+ const maxRetries = options.maxRetries ?? config.llm.retryMaxAttempts;
264
+ const backoffMs = options.backoffMs ?? config.llm.retryBackoffMs;
265
+
266
+ let lastError;
267
+
268
+ for (let i = 0; i <= maxRetries; i++) {
269
+ try {
270
+ if (i > 0) {
271
+ await new Promise((r) => setTimeout(r, backoffMs * Math.pow(2, i - 1)));
272
+ }
273
+ return await fn(...args);
274
+ } catch (error) {
275
+ lastError = error;
276
+ // Don't retry auth errors
277
+ if (error.status === 401 || error.message?.includes("API key")) {
278
+ throw error;
279
+ }
280
+ }
281
+ }
282
+
283
+ throw lastError;
284
+ }
285
+
286
+ // Parallel execution with concurrency control
287
+ export async function parallel(fn, items, maxConcurrency) {
288
+ const config = getConfig();
289
+ const concurrency = maxConcurrency ?? config.llm.maxConcurrency;
290
+
291
+ const results = [];
292
+ for (let i = 0; i < items.length; i += concurrency) {
293
+ const batch = items.slice(i, i + concurrency);
294
+ const batchResults = await Promise.all(batch.map((item) => fn(item)));
295
+ results.push(...batchResults);
296
+ }
297
+ return results;
298
+ }
299
+
300
+ // Create a bound LLM interface (no metrics handling needed!)
301
+ export function createLLM(options = {}) {
302
+ const config = getConfig();
303
+ const defaultProvider = options.defaultProvider || config.llm.defaultProvider;
304
+
305
+ return {
306
+ chat: (opts) => chat({ provider: defaultProvider, ...opts }),
307
+ complete: (prompt, opts) =>
308
+ complete(prompt, { provider: defaultProvider, ...opts }),
309
+ createChain: () => createChain(),
310
+ withRetry: (opts) =>
311
+ withRetry(chat, [{ provider: defaultProvider, ...opts }]),
312
+ parallel: (requests, maxConcurrency) =>
313
+ parallel(
314
+ (req) => chat({ provider: defaultProvider, ...req }),
315
+ requests,
316
+ maxConcurrency
317
+ ),
318
+ getAvailableProviders,
319
+ };
320
+ }