@almadar/llm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +72 -0
- package/dist/chunk-KH4JNOLT.js +174 -0
- package/dist/chunk-KH4JNOLT.js.map +1 -0
- package/dist/chunk-MJS33AAS.js +234 -0
- package/dist/chunk-MJS33AAS.js.map +1 -0
- package/dist/chunk-PV3G5PJS.js +633 -0
- package/dist/chunk-PV3G5PJS.js.map +1 -0
- package/dist/chunk-WM7QVK2Z.js +192 -0
- package/dist/chunk-WM7QVK2Z.js.map +1 -0
- package/dist/client.d.ts +136 -0
- package/dist/client.js +39 -0
- package/dist/client.js.map +1 -0
- package/dist/index.d.ts +67 -0
- package/dist/index.js +477 -0
- package/dist/index.js.map +1 -0
- package/dist/json-parser.d.ts +43 -0
- package/dist/json-parser.js +15 -0
- package/dist/json-parser.js.map +1 -0
- package/dist/rate-limiter-9XAWfHwe.d.ts +98 -0
- package/dist/structured-output.d.ts +113 -0
- package/dist/structured-output.js +16 -0
- package/dist/structured-output.js.map +1 -0
- package/package.json +55 -0
- package/src/client.ts +967 -0
- package/src/continuation.ts +290 -0
- package/src/index.ts +87 -0
- package/src/json-parser.ts +273 -0
- package/src/rate-limiter.ts +237 -0
- package/src/structured-output.ts +330 -0
- package/src/token-tracker.ts +116 -0
- package/src/truncation-detector.ts +308 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Parameters
|
|
4
|
+
|
|
5
|
+
Licensor: Almadar FZE
|
|
6
|
+
Licensed Work: KFlow Builder / Almadar
|
|
7
|
+
The Licensed Work is (c) 2025-2026 Almadar FZE.
|
|
8
|
+
Additional Use Grant: You may make production use of the Licensed Work for
|
|
9
|
+
non-commercial purposes and for internal evaluation.
|
|
10
|
+
Production use for commercial purposes requires a
|
|
11
|
+
commercial license from the Licensor.
|
|
12
|
+
Change Date: 2030-02-01
|
|
13
|
+
Change License: Apache License, Version 2.0
|
|
14
|
+
|
|
15
|
+
Terms
|
|
16
|
+
|
|
17
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
18
|
+
works, redistribute, and make non-production use of the Licensed Work. The
|
|
19
|
+
Licensor may make an Additional Use Grant, above, permitting limited
|
|
20
|
+
production use.
|
|
21
|
+
|
|
22
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly
|
|
23
|
+
available distribution of a specific version of the Licensed Work under this
|
|
24
|
+
License, whichever comes first, the Licensor hereby grants you rights under
|
|
25
|
+
the terms of the Change License, and the rights granted in the paragraph
|
|
26
|
+
above terminate.
|
|
27
|
+
|
|
28
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
29
|
+
currently in effect as described in this License, you must purchase a
|
|
30
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
31
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
32
|
+
|
|
33
|
+
All copies of the original and modified Licensed Work, and derivative works
|
|
34
|
+
of the Licensed Work, are subject to this License. This License applies
|
|
35
|
+
separately for each version of the Licensed Work and the Change Date may vary
|
|
36
|
+
for each version of the Licensed Work released by Licensor.
|
|
37
|
+
|
|
38
|
+
You must conspicuously display this License on each original or modified copy
|
|
39
|
+
of the Licensed Work. If you receive the Licensed Work in original or
|
|
40
|
+
modified form from a third party, the terms and conditions set forth in this
|
|
41
|
+
License apply to your use of that work.
|
|
42
|
+
|
|
43
|
+
Any use of the Licensed Work in violation of this License will automatically
|
|
44
|
+
terminate your rights under this License for the current and all other
|
|
45
|
+
versions of the Licensed Work.
|
|
46
|
+
|
|
47
|
+
This License does not grant you any right in any trademark or logo of
|
|
48
|
+
Licensor or its affiliates (provided that you may use a trademark or logo of
|
|
49
|
+
Licensor as expressly required by this License).
|
|
50
|
+
|
|
51
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
|
52
|
+
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
|
53
|
+
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
|
54
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
|
55
|
+
TITLE.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
|
|
60
|
+
"Business Source License" is a trademark of MariaDB Corporation Ab.
|
|
61
|
+
|
|
62
|
+
ADDITIONAL TERMS:
|
|
63
|
+
|
|
64
|
+
Documentation (builder/packages/website/docs/) is licensed under CC BY 4.0.
|
|
65
|
+
|
|
66
|
+
TRADEMARKS:
|
|
67
|
+
|
|
68
|
+
"Orbital", "KFlow", "Almadar", and the Almadar logo are trademarks of
|
|
69
|
+
Almadar FZE. You may not use these trademarks without prior written
|
|
70
|
+
permission from Almadar FZE.
|
|
71
|
+
|
|
72
|
+
For licensing inquiries: licensing@almadar.io
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import {
|
|
2
|
+
RateLimiter,
|
|
3
|
+
getGlobalRateLimiter,
|
|
4
|
+
getGlobalTokenTracker
|
|
5
|
+
} from "./chunk-MJS33AAS.js";
|
|
6
|
+
|
|
7
|
+
// src/structured-output.ts
|
|
8
|
+
import OpenAI from "openai";
|
|
9
|
+
var STRUCTURED_OUTPUT_MODELS = {
|
|
10
|
+
GPT5_MINI: "gpt-5-mini",
|
|
11
|
+
GPT4O_MINI: "gpt-4o-mini",
|
|
12
|
+
GPT4O: "gpt-4o",
|
|
13
|
+
GPT4O_2024_08_06: "gpt-4o-2024-08-06"
|
|
14
|
+
};
|
|
15
|
+
var DEFAULT_SYSTEM_PROMPT = `You are an expert application architect that generates structured schemas from natural language requirements.
|
|
16
|
+
|
|
17
|
+
Generate a complete, well-structured schema based on the user's requirements. Follow the JSON Schema structure exactly.`;
|
|
18
|
+
var StructuredOutputClient = class {
|
|
19
|
+
constructor(options = {}) {
|
|
20
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
21
|
+
if (!apiKey) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
"OPENAI_API_KEY environment variable is required for StructuredOutputClient"
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
this.openai = new OpenAI({ apiKey });
|
|
27
|
+
this.defaultModel = options.model || STRUCTURED_OUTPUT_MODELS.GPT5_MINI;
|
|
28
|
+
this.defaultTemperature = options.temperature ?? 0.3;
|
|
29
|
+
this.defaultMaxTokens = options.maxTokens ?? 16384;
|
|
30
|
+
this.rateLimiter = options.useGlobalRateLimiter !== false ? getGlobalRateLimiter(options.rateLimiter) : new RateLimiter(options.rateLimiter);
|
|
31
|
+
this.tokenTracker = options.trackTokens !== false ? getGlobalTokenTracker(this.defaultModel) : null;
|
|
32
|
+
console.log(
|
|
33
|
+
`[StructuredOutputClient] Initialized with model: ${this.defaultModel}`
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
usesMaxCompletionTokens(model) {
|
|
37
|
+
const m = model.toLowerCase();
|
|
38
|
+
return m.startsWith("o1") || m.startsWith("gpt-5") || m.includes("o1-") || m.includes("o3");
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Generate structured output with guaranteed JSON Schema compliance.
|
|
42
|
+
*/
|
|
43
|
+
async generate(options) {
|
|
44
|
+
const model = options.model || this.defaultModel;
|
|
45
|
+
const temperature = options.temperature ?? this.defaultTemperature;
|
|
46
|
+
const maxTokens = options.maxTokens ?? this.defaultMaxTokens;
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
const jsonSchema = options.jsonSchema || {
|
|
49
|
+
type: "object",
|
|
50
|
+
properties: {},
|
|
51
|
+
required: [],
|
|
52
|
+
additionalProperties: false
|
|
53
|
+
};
|
|
54
|
+
let systemPrompt;
|
|
55
|
+
if (options.systemPrompt) {
|
|
56
|
+
systemPrompt = options.systemPrompt;
|
|
57
|
+
} else if (options.buildSystemPrompt) {
|
|
58
|
+
systemPrompt = options.buildSystemPrompt();
|
|
59
|
+
} else {
|
|
60
|
+
systemPrompt = DEFAULT_SYSTEM_PROMPT;
|
|
61
|
+
}
|
|
62
|
+
if (options.additionalInstructions) {
|
|
63
|
+
systemPrompt += `
|
|
64
|
+
|
|
65
|
+
## Additional Instructions
|
|
66
|
+
${options.additionalInstructions}`;
|
|
67
|
+
}
|
|
68
|
+
let userPrompt = options.userRequest;
|
|
69
|
+
if (options.existingContext) {
|
|
70
|
+
userPrompt += `
|
|
71
|
+
|
|
72
|
+
## Existing Context
|
|
73
|
+
Update based on the above request:
|
|
74
|
+
\`\`\`json
|
|
75
|
+
${options.existingContext}
|
|
76
|
+
\`\`\``;
|
|
77
|
+
}
|
|
78
|
+
const schemaName = options.schemaName || "structured_output";
|
|
79
|
+
console.log(
|
|
80
|
+
`[StructuredOutputClient] Generating with ${model}...`
|
|
81
|
+
);
|
|
82
|
+
console.log(
|
|
83
|
+
`[StructuredOutputClient] Request: "${options.userRequest.slice(0, 80)}..."`
|
|
84
|
+
);
|
|
85
|
+
const response = await this.rateLimiter.execute(async () => {
|
|
86
|
+
const isReasoningModel = this.usesMaxCompletionTokens(model);
|
|
87
|
+
const tokenParam = isReasoningModel ? { max_completion_tokens: maxTokens } : { max_tokens: maxTokens };
|
|
88
|
+
const tempParam = isReasoningModel ? {} : { temperature };
|
|
89
|
+
const params = {
|
|
90
|
+
model,
|
|
91
|
+
messages: [
|
|
92
|
+
{ role: "system", content: systemPrompt },
|
|
93
|
+
{ role: "user", content: userPrompt }
|
|
94
|
+
],
|
|
95
|
+
response_format: {
|
|
96
|
+
type: "json_schema",
|
|
97
|
+
json_schema: {
|
|
98
|
+
name: schemaName,
|
|
99
|
+
strict: true,
|
|
100
|
+
schema: jsonSchema
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
...tempParam,
|
|
104
|
+
...tokenParam
|
|
105
|
+
};
|
|
106
|
+
return this.openai.chat.completions.create(params);
|
|
107
|
+
});
|
|
108
|
+
const latencyMs = Date.now() - startTime;
|
|
109
|
+
const content = response.choices[0]?.message?.content;
|
|
110
|
+
if (!content) {
|
|
111
|
+
throw new Error("No content in OpenAI response");
|
|
112
|
+
}
|
|
113
|
+
let data;
|
|
114
|
+
try {
|
|
115
|
+
data = JSON.parse(content);
|
|
116
|
+
} catch (error) {
|
|
117
|
+
throw new Error(`Failed to parse response JSON: ${error}`);
|
|
118
|
+
}
|
|
119
|
+
const usage = {
|
|
120
|
+
promptTokens: response.usage?.prompt_tokens || 0,
|
|
121
|
+
completionTokens: response.usage?.completion_tokens || 0,
|
|
122
|
+
totalTokens: response.usage?.total_tokens || 0
|
|
123
|
+
};
|
|
124
|
+
if (this.tokenTracker) {
|
|
125
|
+
this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens);
|
|
126
|
+
}
|
|
127
|
+
console.log(
|
|
128
|
+
`[StructuredOutputClient] Generated in ${latencyMs}ms, ${usage.totalTokens} tokens`
|
|
129
|
+
);
|
|
130
|
+
let zodValidation;
|
|
131
|
+
if (!options.skipValidation) {
|
|
132
|
+
zodValidation = { success: true };
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
data,
|
|
136
|
+
raw: content,
|
|
137
|
+
usage,
|
|
138
|
+
latencyMs,
|
|
139
|
+
model,
|
|
140
|
+
zodValidation
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
getModel() {
|
|
144
|
+
return this.defaultModel;
|
|
145
|
+
}
|
|
146
|
+
getRateLimiterStatus() {
|
|
147
|
+
return this.rateLimiter.getStatus();
|
|
148
|
+
}
|
|
149
|
+
getTokenUsage() {
|
|
150
|
+
return this.tokenTracker?.getSummary() ?? null;
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
var sharedClient = null;
|
|
154
|
+
function getStructuredOutputClient(options) {
|
|
155
|
+
if (!sharedClient) {
|
|
156
|
+
sharedClient = new StructuredOutputClient(options);
|
|
157
|
+
}
|
|
158
|
+
return sharedClient;
|
|
159
|
+
}
|
|
160
|
+
function resetStructuredOutputClient() {
|
|
161
|
+
sharedClient = null;
|
|
162
|
+
}
|
|
163
|
+
function isStructuredOutputAvailable() {
|
|
164
|
+
return !!process.env.OPENAI_API_KEY;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export {
|
|
168
|
+
STRUCTURED_OUTPUT_MODELS,
|
|
169
|
+
StructuredOutputClient,
|
|
170
|
+
getStructuredOutputClient,
|
|
171
|
+
resetStructuredOutputClient,
|
|
172
|
+
isStructuredOutputAvailable
|
|
173
|
+
};
|
|
174
|
+
//# sourceMappingURL=chunk-KH4JNOLT.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/structured-output.ts"],"sourcesContent":["/**\n * Structured Output Client for OpenAI\n *\n * Uses OpenAI's structured outputs feature (json_schema response_format)\n * to guarantee schema compliance at generation time.\n *\n * The system prompt builder is injectable so consumers can provide\n * domain-specific prompts (e.g., orbital schema references).\n *\n * @packageDocumentation\n */\n\nimport OpenAI from 'openai';\nimport type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions';\nimport { z } from 'zod';\nimport {\n RateLimiter,\n getGlobalRateLimiter,\n type RateLimiterOptions,\n} from './rate-limiter.js';\nimport { TokenTracker, getGlobalTokenTracker } from './token-tracker.js';\n\n// ============================================================================\n// Types\n// ============================================================================\n\n/**\n * JSON Schema type used for OpenAI structured outputs.\n */\nexport interface JsonSchema {\n type?: string | string[];\n properties?: Record<string, JsonSchema>;\n required?: string[];\n items?: JsonSchema;\n enum?: unknown[];\n const?: unknown;\n anyOf?: JsonSchema[];\n oneOf?: JsonSchema[];\n allOf?: JsonSchema[];\n $ref?: string;\n $defs?: Record<string, JsonSchema>;\n definitions?: Record<string, JsonSchema>;\n additionalProperties?: boolean | JsonSchema;\n description?: string;\n default?: unknown;\n minItems?: number;\n maxItems?: number;\n minLength?: number;\n}\n\nexport interface StructuredOutputOptions {\n model?: string;\n temperature?: number;\n maxTokens?: number;\n rateLimiter?: RateLimiterOptions;\n useGlobalRateLimiter?: boolean;\n trackTokens?: boolean;\n}\n\nexport interface StructuredGenerationOptions {\n /** User's natural language request */\n userRequest: string;\n /** Model to use (overrides client default) */\n model?: string;\n /** Temperature (overrides client default) */\n temperature?: number;\n /** Maximum tokens (overrides client default) */\n maxTokens?: number;\n /** JSON Schema for structured output */\n jsonSchema?: JsonSchema;\n /** Schema name for the json_schema response format */\n schemaName?: string;\n /** System prompt override */\n systemPrompt?: string;\n /** System prompt builder function (called dynamically) */\n buildSystemPrompt?: () => string;\n /** Additional system prompt instructions */\n additionalInstructions?: string;\n /** Existing context for updates (e.g., existing schema JSON) */\n existingContext?: string;\n /** Skip post-generation validation (default: false) */\n skipValidation?: boolean;\n}\n\nexport interface StructuredGenerationResult<T = unknown> {\n /** Generated data (guaranteed to match JSON Schema structure) */\n data: T;\n /** Raw JSON string from API */\n raw: string;\n /** Token usage statistics */\n usage: {\n promptTokens: number;\n completionTokens: number;\n totalTokens: number;\n };\n /** Generation latency in milliseconds */\n latencyMs: number;\n /** Model used for generation */\n model: string;\n /** Zod validation result (if not skipped) */\n zodValidation?: {\n success: boolean;\n errors?: z.ZodError['errors'];\n };\n}\n\nexport const STRUCTURED_OUTPUT_MODELS = {\n GPT5_MINI: 'gpt-5-mini',\n GPT4O_MINI: 'gpt-4o-mini',\n GPT4O: 'gpt-4o',\n GPT4O_2024_08_06: 'gpt-4o-2024-08-06',\n} as const;\n\n// ============================================================================\n// Default System Prompt\n// ============================================================================\n\nconst DEFAULT_SYSTEM_PROMPT = `You are an expert application architect that generates structured schemas from natural language requirements.\n\nGenerate a complete, well-structured schema based on the user's requirements. Follow the JSON Schema structure exactly.`;\n\n// ============================================================================\n// Structured Output Client\n// ============================================================================\n\nexport class StructuredOutputClient {\n private openai: OpenAI;\n private rateLimiter: RateLimiter;\n private tokenTracker: TokenTracker | null;\n private defaultModel: string;\n private defaultTemperature: number;\n private defaultMaxTokens: number;\n\n constructor(options: StructuredOutputOptions = {}) {\n const apiKey = process.env.OPENAI_API_KEY;\n if (!apiKey) {\n throw new Error(\n 'OPENAI_API_KEY environment variable is required for StructuredOutputClient',\n );\n }\n\n this.openai = new OpenAI({ apiKey });\n this.defaultModel = options.model || STRUCTURED_OUTPUT_MODELS.GPT5_MINI;\n this.defaultTemperature = options.temperature ?? 0.3;\n this.defaultMaxTokens = options.maxTokens ?? 16384;\n\n this.rateLimiter =\n options.useGlobalRateLimiter !== false\n ? getGlobalRateLimiter(options.rateLimiter)\n : new RateLimiter(options.rateLimiter);\n\n this.tokenTracker =\n options.trackTokens !== false\n ? getGlobalTokenTracker(this.defaultModel)\n : null;\n\n console.log(\n `[StructuredOutputClient] Initialized with model: ${this.defaultModel}`,\n );\n }\n\n private usesMaxCompletionTokens(model: string): boolean {\n const m = model.toLowerCase();\n return (\n m.startsWith('o1') ||\n m.startsWith('gpt-5') ||\n m.includes('o1-') ||\n m.includes('o3')\n );\n }\n\n /**\n * Generate structured output with guaranteed JSON Schema compliance.\n */\n async generate<T = unknown>(\n options: StructuredGenerationOptions,\n ): Promise<StructuredGenerationResult<T>> {\n const model = options.model || this.defaultModel;\n const temperature = options.temperature ?? this.defaultTemperature;\n const maxTokens = options.maxTokens ?? this.defaultMaxTokens;\n const startTime = Date.now();\n\n const jsonSchema: JsonSchema = options.jsonSchema || {\n type: 'object',\n properties: {},\n required: [],\n additionalProperties: false,\n };\n\n // Build system prompt\n let systemPrompt: string;\n if (options.systemPrompt) {\n systemPrompt = options.systemPrompt;\n } else if (options.buildSystemPrompt) {\n systemPrompt = options.buildSystemPrompt();\n } else {\n systemPrompt = DEFAULT_SYSTEM_PROMPT;\n }\n\n if (options.additionalInstructions) {\n systemPrompt += `\\n\\n## Additional Instructions\\n${options.additionalInstructions}`;\n }\n\n // Build user prompt\n let userPrompt = options.userRequest;\n if (options.existingContext) {\n userPrompt += `\\n\\n## Existing Context\\nUpdate based on the above request:\\n\\`\\`\\`json\\n${options.existingContext}\\n\\`\\`\\``;\n }\n\n const schemaName = options.schemaName || 'structured_output';\n\n console.log(\n `[StructuredOutputClient] Generating with ${model}...`,\n );\n console.log(\n `[StructuredOutputClient] Request: \"${options.userRequest.slice(0, 80)}...\"`,\n );\n\n const response = await this.rateLimiter.execute(async () => {\n const isReasoningModel = this.usesMaxCompletionTokens(model);\n\n const tokenParam = isReasoningModel\n ? { max_completion_tokens: maxTokens }\n : { max_tokens: maxTokens };\n\n const tempParam = isReasoningModel ? {} : { temperature };\n\n const params: ChatCompletionCreateParamsNonStreaming = {\n model,\n messages: [\n { role: 'system', content: systemPrompt },\n { role: 'user', content: userPrompt },\n ],\n response_format: {\n type: 'json_schema',\n json_schema: {\n name: schemaName,\n strict: true,\n schema: jsonSchema as Record<string, unknown>,\n },\n },\n ...tempParam,\n ...tokenParam,\n };\n\n return this.openai.chat.completions.create(params);\n });\n\n const latencyMs = Date.now() - startTime;\n\n const content = response.choices[0]?.message?.content;\n if (!content) {\n throw new Error('No content in OpenAI response');\n }\n\n let data: T;\n try {\n data = JSON.parse(content) as T;\n } catch (error) {\n throw new Error(`Failed to parse response JSON: ${error}`);\n }\n\n const usage = {\n promptTokens: response.usage?.prompt_tokens || 0,\n completionTokens: response.usage?.completion_tokens || 0,\n totalTokens: response.usage?.total_tokens || 0,\n };\n\n if (this.tokenTracker) {\n this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens);\n }\n\n console.log(\n `[StructuredOutputClient] Generated in ${latencyMs}ms, ${usage.totalTokens} tokens`,\n );\n\n let zodValidation: StructuredGenerationResult['zodValidation'];\n if (!options.skipValidation) {\n zodValidation = { success: true };\n }\n\n return {\n data,\n raw: content,\n usage,\n latencyMs,\n model,\n zodValidation,\n };\n }\n\n getModel(): string {\n return this.defaultModel;\n }\n\n getRateLimiterStatus() {\n return this.rateLimiter.getStatus();\n }\n\n getTokenUsage() {\n return this.tokenTracker?.getSummary() ?? null;\n }\n}\n\n// ============================================================================\n// Singleton Instance\n// ============================================================================\n\nlet sharedClient: StructuredOutputClient | null = null;\n\nexport function getStructuredOutputClient(\n options?: StructuredOutputOptions,\n): StructuredOutputClient {\n if (!sharedClient) {\n sharedClient = new StructuredOutputClient(options);\n }\n return sharedClient;\n}\n\nexport function resetStructuredOutputClient(): void {\n sharedClient = null;\n}\n\n// ============================================================================\n// Convenience Functions\n// ============================================================================\n\nexport function isStructuredOutputAvailable(): boolean {\n return !!process.env.OPENAI_API_KEY;\n}\n"],"mappings":";;;;;;;AAYA,OAAO,YAAY;AA8FZ,IAAM,2BAA2B;AAAA,EACtC,WAAW;AAAA,EACX,YAAY;AAAA,EACZ,OAAO;AAAA,EACP,kBAAkB;AACpB;AAMA,IAAM,wBAAwB;AAAA;AAAA;AAQvB,IAAM,yBAAN,MAA6B;AAAA,EAQlC,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM,SAAS,QAAQ,IAAI;AAC3B,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS,IAAI,OAAO,EAAE,OAAO,CAAC;AACnC,SAAK,eAAe,QAAQ,SAAS,yBAAyB;AAC9D,SAAK,qBAAqB,QAAQ,eAAe;AACjD,SAAK,mBAAmB,QAAQ,aAAa;AAE7C,SAAK,cACH,QAAQ,yBAAyB,QAC7B,qBAAqB,QAAQ,WAAW,IACxC,IAAI,YAAY,QAAQ,WAAW;AAEzC,SAAK,eACH,QAAQ,gBAAgB,QACpB,sBAAsB,KAAK,YAAY,IACvC;AAEN,YAAQ;AAAA,MACN,oDAAoD,KAAK,YAAY;AAAA,IACvE;AAAA,EACF;AAAA,EAEQ,wBAAwB,OAAwB;AACtD,UAAM,IAAI,MAAM,YAAY;AAC5B,WACE,EAAE,WAAW,IAAI,KACjB,EAAE,WAAW,OAAO,KACpB,EAAE,SAAS,KAAK,KAChB,EAAE,SAAS,IAAI;AAAA,EAEnB;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,SACJ,SACwC;AACxC,UAAM,QAAQ,QAAQ,SAAS,KAAK;AACpC,UAAM,cAAc,QAAQ,eAAe,KAAK;AAChD,UAAM,YAAY,QAAQ,aAAa,KAAK;AAC5C,UAAM,YAAY,KAAK,IAAI;AAE3B,UAAM,aAAyB,QAAQ,cAAc;AAAA,MACnD,MAAM;AAAA,MACN,YAAY,CAAC;AAAA,MACb,UAAU,CAAC;AAAA,MACX,sBAAsB;AAAA,IACxB;AAGA,QAAI;AACJ,QAAI,QAAQ,cAAc;AACxB,qBAAe,QAAQ;AAAA,IACzB,WAAW,QAAQ,mBAAmB;AACpC,qBAAe,QAAQ,kBAAkB;AAAA,IAC3C,OAAO;AACL,qBAAe;AAAA,IACjB;AAEA,QAAI,QAAQ,wBAAwB;AAClC,sBAAgB;AAAA;AAAA;AAAA,EAAmC,QAAQ,sBAAsB;AAAA,IACnF;AAGA,QAAI,aAAa,QAAQ;AACzB,QAAI,QAAQ,iBAAiB;AAC3B,oBAAc;AAAA;AAAA;AAAA;AAAA;AAAA,EAA4E,QAAQ,eAAe;AAAA;AAAA,IACnH;AAEA,UAAM,aAAa,QAAQ,cAAc;AAEzC,YAAQ;AAAA,MACN,4CAA4C,KAAK;AAAA,IACnD;AACA,YAAQ;AAAA,MACN,sCAAsC,QAAQ,YAAY,MAAM,GAAG,EAAE,CAAC;AAAA,IACxE;AAEA,UAAM,WAAW,MAAM,KAAK,YAAY,QAAQ,YAAY;AAC1D,YAAM,mBAAmB,KAAK,wBAAwB,KAAK;AAE3D,YAAM,aAAa,mBACf,EAAE,uBAAuB,UAAU,IACnC,EAAE,YAAY,UAAU;AAE5B,YAAM,YAAY,mBAAmB,CAAC,IAAI,EAAE,YAAY;AAExD,YAAM,SAAiD;AAAA,QACrD;AAAA,QACA,UAAU;AAAA,UACR,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,UACxC,EAAE,MAAM,QAAQ,SAAS,WAAW;AAAA,QACtC;AAAA,QACA,iBAAiB;AAAA,UACf,MAAM;AAAA,UACN,aAAa;AAAA,YACX,MAAM;AAAA,YACN,QAAQ;AAAA,YACR,QAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,GAAG;AAAA,QACH,GAAG;AAAA,MACL;AAEA,aAAO,KAAK,OAAO,KAAK,YAAY,OAAO,MAAM;AAAA,IACnD,CAAC;AAED,UAAM,YAAY,KAAK,IAAI,IAAI;AAE/B,UAAM,UAAU,SAAS,QAAQ,CAAC,GAAG,SAAS;AAC9C,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,MAAM,+BAA+B;AAAA,IACjD;AAEA,QAAI;AACJ,QAAI;AACF,aAAO,KAAK,MAAM,OAAO;AAAA,IAC3B,SAAS,OAAO;AACd,YAAM,IAAI,MAAM,kCAAkC,KAAK,EAAE;AAAA,IAC3D;AAEA,UAAM,QAAQ;AAAA,MACZ,cAAc,SAAS,OAAO,iBAAiB;AAAA,MAC/C,kBAAkB,SAAS,OAAO,qBAAqB;AAAA,MACvD,aAAa,SAAS,OAAO,gBAAgB;AAAA,IAC/C;AAEA,QAAI,KAAK,cAAc;AACrB,WAAK,aAAa,SAAS,MAAM,cAAc,MAAM,gBAAgB;AAAA,IACvE;AAEA,YAAQ;AAAA,MACN,yCAAyC,SAAS,OAAO,MAAM,WAAW;AAAA,IAC5E;AAEA,QAAI;AACJ,QAAI,CAAC,QAAQ,gBAAgB;AAC3B,sBAAgB,EAAE,SAAS,KAAK;AAAA,IAClC;AAEA,WAAO;AAAA,MACL;AAAA,MACA,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAmB;AACjB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,uBAAuB;AACrB,WAAO,KAAK,YAAY,UAAU;AAAA,EACpC;AAAA,EAEA,gBAAgB;AACd,WAAO,KAAK,cAAc,WAAW,KAAK;AAAA,EAC5C;AACF;AAMA,IAAI,eAA8C;AAE3C,SAAS,0BACd,SACwB;AACxB,MAAI,CAAC,cAAc;AACjB,mBAAe,IAAI,uBAAuB,OAAO;AAAA,EACnD;AACA,SAAO;AACT;AAEO,SAAS,8BAAoC;AAClD,iBAAe;AACjB;AAMO,SAAS,8BAAuC;AACrD,SAAO,CAAC,CAAC,QAAQ,IAAI;AACvB;","names":[]}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
// src/rate-limiter.ts
|
|
2
|
+
var RateLimiter = class {
|
|
3
|
+
constructor(options = {}) {
|
|
4
|
+
this.activeRequests = 0;
|
|
5
|
+
this.queue = [];
|
|
6
|
+
this.lastMinuteReset = Date.now();
|
|
7
|
+
this.lastSecondReset = Date.now();
|
|
8
|
+
this.processing = false;
|
|
9
|
+
this.currentBackoffMs = 0;
|
|
10
|
+
this.requestsPerMinute = options.requestsPerMinute ?? 60;
|
|
11
|
+
this.requestsPerSecond = options.requestsPerSecond ?? 3;
|
|
12
|
+
this.maxConcurrent = options.maxConcurrent ?? 5;
|
|
13
|
+
this.baseBackoffMs = options.baseBackoffMs ?? 1e3;
|
|
14
|
+
this.maxBackoffMs = options.maxBackoffMs ?? 6e4;
|
|
15
|
+
this.minuteTokens = this.requestsPerMinute;
|
|
16
|
+
this.secondTokens = this.requestsPerSecond;
|
|
17
|
+
}
|
|
18
|
+
async execute(fn, _maxRetries = 3) {
|
|
19
|
+
return new Promise((resolve, reject) => {
|
|
20
|
+
this.queue.push({
|
|
21
|
+
execute: fn,
|
|
22
|
+
resolve,
|
|
23
|
+
reject,
|
|
24
|
+
retryCount: 0
|
|
25
|
+
});
|
|
26
|
+
this.processQueue();
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
getStatus() {
|
|
30
|
+
return {
|
|
31
|
+
queueLength: this.queue.length,
|
|
32
|
+
activeRequests: this.activeRequests,
|
|
33
|
+
minuteTokens: this.minuteTokens,
|
|
34
|
+
secondTokens: this.secondTokens,
|
|
35
|
+
backoffMs: this.currentBackoffMs
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
reset() {
|
|
39
|
+
this.minuteTokens = this.requestsPerMinute;
|
|
40
|
+
this.secondTokens = this.requestsPerSecond;
|
|
41
|
+
this.activeRequests = 0;
|
|
42
|
+
this.queue = [];
|
|
43
|
+
this.currentBackoffMs = 0;
|
|
44
|
+
this.lastMinuteReset = Date.now();
|
|
45
|
+
this.lastSecondReset = Date.now();
|
|
46
|
+
}
|
|
47
|
+
async processQueue() {
|
|
48
|
+
if (this.processing) return;
|
|
49
|
+
this.processing = true;
|
|
50
|
+
while (this.queue.length > 0) {
|
|
51
|
+
this.refillTokens();
|
|
52
|
+
if (!this.canMakeRequest()) {
|
|
53
|
+
const waitTime = this.getWaitTime();
|
|
54
|
+
await this.sleep(waitTime);
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (this.currentBackoffMs > 0) {
|
|
58
|
+
await this.sleep(this.currentBackoffMs);
|
|
59
|
+
this.currentBackoffMs = 0;
|
|
60
|
+
}
|
|
61
|
+
const request = this.queue.shift();
|
|
62
|
+
if (!request) continue;
|
|
63
|
+
this.consumeTokens();
|
|
64
|
+
this.activeRequests++;
|
|
65
|
+
try {
|
|
66
|
+
const result = await request.execute();
|
|
67
|
+
request.resolve(result);
|
|
68
|
+
this.currentBackoffMs = 0;
|
|
69
|
+
} catch (error) {
|
|
70
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
71
|
+
if (this.isRateLimitError(err)) {
|
|
72
|
+
this.currentBackoffMs = Math.min(
|
|
73
|
+
this.baseBackoffMs * Math.pow(2, request.retryCount),
|
|
74
|
+
this.maxBackoffMs
|
|
75
|
+
);
|
|
76
|
+
console.warn(
|
|
77
|
+
`[RateLimiter] Rate limited. Backing off for ${this.currentBackoffMs}ms (retry ${request.retryCount + 1})`
|
|
78
|
+
);
|
|
79
|
+
if (request.retryCount < 3) {
|
|
80
|
+
this.queue.unshift({
|
|
81
|
+
...request,
|
|
82
|
+
retryCount: request.retryCount + 1
|
|
83
|
+
});
|
|
84
|
+
} else {
|
|
85
|
+
request.reject(
|
|
86
|
+
new Error(
|
|
87
|
+
`Rate limit exceeded after ${request.retryCount + 1} retries: ${err.message}`
|
|
88
|
+
)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
} else {
|
|
92
|
+
request.reject(err);
|
|
93
|
+
}
|
|
94
|
+
} finally {
|
|
95
|
+
this.activeRequests--;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
this.processing = false;
|
|
99
|
+
}
|
|
100
|
+
refillTokens() {
|
|
101
|
+
const now = Date.now();
|
|
102
|
+
if (now - this.lastMinuteReset >= 6e4) {
|
|
103
|
+
this.minuteTokens = this.requestsPerMinute;
|
|
104
|
+
this.lastMinuteReset = now;
|
|
105
|
+
}
|
|
106
|
+
if (now - this.lastSecondReset >= 1e3) {
|
|
107
|
+
this.secondTokens = this.requestsPerSecond;
|
|
108
|
+
this.lastSecondReset = now;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
canMakeRequest() {
|
|
112
|
+
return this.minuteTokens > 0 && this.secondTokens > 0 && this.activeRequests < this.maxConcurrent;
|
|
113
|
+
}
|
|
114
|
+
consumeTokens() {
|
|
115
|
+
this.minuteTokens--;
|
|
116
|
+
this.secondTokens--;
|
|
117
|
+
}
|
|
118
|
+
getWaitTime() {
|
|
119
|
+
const now = Date.now();
|
|
120
|
+
if (this.secondTokens <= 0) {
|
|
121
|
+
return Math.max(0, 1e3 - (now - this.lastSecondReset));
|
|
122
|
+
}
|
|
123
|
+
if (this.minuteTokens <= 0) {
|
|
124
|
+
return Math.max(0, 6e4 - (now - this.lastMinuteReset));
|
|
125
|
+
}
|
|
126
|
+
return 100;
|
|
127
|
+
}
|
|
128
|
+
isRateLimitError(error) {
|
|
129
|
+
const message = error.message.toLowerCase();
|
|
130
|
+
return message.includes("429") || message.includes("rate limit") || message.includes("too many requests") || message.includes("quota exceeded");
|
|
131
|
+
}
|
|
132
|
+
sleep(ms) {
|
|
133
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
var globalRateLimiter = null;
|
|
137
|
+
function getGlobalRateLimiter(options) {
|
|
138
|
+
if (!globalRateLimiter) {
|
|
139
|
+
globalRateLimiter = new RateLimiter(options);
|
|
140
|
+
}
|
|
141
|
+
return globalRateLimiter;
|
|
142
|
+
}
|
|
143
|
+
function resetGlobalRateLimiter() {
|
|
144
|
+
globalRateLimiter?.reset();
|
|
145
|
+
globalRateLimiter = null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// src/token-tracker.ts
|
|
149
|
+
var MODEL_COSTS = {
|
|
150
|
+
"gpt-4o": { promptCostPer1K: 5e-3, completionCostPer1K: 0.015 },
|
|
151
|
+
"gpt-4o-mini": { promptCostPer1K: 15e-5, completionCostPer1K: 6e-4 },
|
|
152
|
+
"gpt-4-turbo": { promptCostPer1K: 0.01, completionCostPer1K: 0.03 },
|
|
153
|
+
"gpt-4": { promptCostPer1K: 0.03, completionCostPer1K: 0.06 },
|
|
154
|
+
"gpt-3.5-turbo": {
|
|
155
|
+
promptCostPer1K: 5e-4,
|
|
156
|
+
completionCostPer1K: 15e-4
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
var TokenTracker = class {
|
|
160
|
+
constructor(model = "gpt-4o") {
|
|
161
|
+
this.usage = {
|
|
162
|
+
promptTokens: 0,
|
|
163
|
+
completionTokens: 0,
|
|
164
|
+
totalTokens: 0,
|
|
165
|
+
callCount: 0
|
|
166
|
+
};
|
|
167
|
+
this.model = model;
|
|
168
|
+
}
|
|
169
|
+
addUsage(promptTokens, completionTokens) {
|
|
170
|
+
this.usage.promptTokens += promptTokens;
|
|
171
|
+
this.usage.completionTokens += completionTokens;
|
|
172
|
+
this.usage.totalTokens += promptTokens + completionTokens;
|
|
173
|
+
this.usage.callCount++;
|
|
174
|
+
}
|
|
175
|
+
getSummary() {
|
|
176
|
+
return { ...this.usage };
|
|
177
|
+
}
|
|
178
|
+
getEstimatedCost() {
|
|
179
|
+
const costs = MODEL_COSTS[this.model] || MODEL_COSTS["gpt-4o"];
|
|
180
|
+
const promptCost = this.usage.promptTokens / 1e3 * costs.promptCostPer1K;
|
|
181
|
+
const completionCost = this.usage.completionTokens / 1e3 * costs.completionCostPer1K;
|
|
182
|
+
return promptCost + completionCost;
|
|
183
|
+
}
|
|
184
|
+
getFormattedCost() {
|
|
185
|
+
const cost = this.getEstimatedCost();
|
|
186
|
+
return `$${cost.toFixed(4)}`;
|
|
187
|
+
}
|
|
188
|
+
getReport() {
|
|
189
|
+
const summary = this.getSummary();
|
|
190
|
+
const cost = this.getEstimatedCost();
|
|
191
|
+
return [
|
|
192
|
+
`Token Usage Report (${this.model})`,
|
|
193
|
+
`\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
|
|
194
|
+
`Calls: ${summary.callCount}`,
|
|
195
|
+
`Prompt Tokens: ${summary.promptTokens.toLocaleString()}`,
|
|
196
|
+
`Completion Tokens: ${summary.completionTokens.toLocaleString()}`,
|
|
197
|
+
`Total Tokens: ${summary.totalTokens.toLocaleString()}`,
|
|
198
|
+
`Estimated Cost: $${cost.toFixed(4)}`
|
|
199
|
+
].join("\n");
|
|
200
|
+
}
|
|
201
|
+
reset() {
|
|
202
|
+
this.usage = {
|
|
203
|
+
promptTokens: 0,
|
|
204
|
+
completionTokens: 0,
|
|
205
|
+
totalTokens: 0,
|
|
206
|
+
callCount: 0
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
setModel(model) {
|
|
210
|
+
this.model = model;
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
var globalTracker = null;
|
|
214
|
+
function getGlobalTokenTracker(model) {
|
|
215
|
+
if (!globalTracker) {
|
|
216
|
+
globalTracker = new TokenTracker(model);
|
|
217
|
+
} else if (model) {
|
|
218
|
+
globalTracker.setModel(model);
|
|
219
|
+
}
|
|
220
|
+
return globalTracker;
|
|
221
|
+
}
|
|
222
|
+
function resetGlobalTokenTracker() {
|
|
223
|
+
globalTracker?.reset();
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export {
|
|
227
|
+
RateLimiter,
|
|
228
|
+
getGlobalRateLimiter,
|
|
229
|
+
resetGlobalRateLimiter,
|
|
230
|
+
TokenTracker,
|
|
231
|
+
getGlobalTokenTracker,
|
|
232
|
+
resetGlobalTokenTracker
|
|
233
|
+
};
|
|
234
|
+
//# sourceMappingURL=chunk-MJS33AAS.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/rate-limiter.ts","../src/token-tracker.ts"],"sourcesContent":["/**\n * Rate Limiter for LLM API Calls\n *\n * Implements token bucket algorithm with:\n * - Configurable requests per minute/second\n * - Automatic backoff on 429 errors\n * - Queue for pending requests\n *\n * @packageDocumentation\n */\n\nexport interface RateLimiterOptions {\n /** Maximum requests per minute (default: 60) */\n requestsPerMinute?: number;\n /** Maximum requests per second (default: 3) */\n requestsPerSecond?: number;\n /** Maximum concurrent requests (default: 5) */\n maxConcurrent?: number;\n /** Base delay for exponential backoff in ms (default: 1000) */\n baseBackoffMs?: number;\n /** Maximum backoff delay in ms (default: 60000) */\n maxBackoffMs?: number;\n}\n\ninterface QueuedRequest<T> {\n execute: () => Promise<T>;\n resolve: (value: T) => void;\n reject: (error: Error) => void;\n retryCount: number;\n}\n\n/**\n * Rate limiter for LLM API calls using token bucket algorithm.\n *\n * @example\n * ```typescript\n * const limiter = new RateLimiter({ requestsPerMinute: 30 });\n * const result = await limiter.execute(() => llm.invoke(messages));\n * ```\n */\nexport class RateLimiter {\n private requestsPerMinute: number;\n private requestsPerSecond: number;\n private maxConcurrent: number;\n private baseBackoffMs: number;\n private maxBackoffMs: number;\n\n private minuteTokens: number;\n private secondTokens: number;\n private activeRequests: number = 0;\n private queue: QueuedRequest<unknown>[] = [];\n private lastMinuteReset: number = Date.now();\n private lastSecondReset: number = Date.now();\n private processing: boolean = false;\n private currentBackoffMs: number = 0;\n\n constructor(options: RateLimiterOptions = {}) {\n this.requestsPerMinute = options.requestsPerMinute ?? 60;\n this.requestsPerSecond = options.requestsPerSecond ?? 3;\n this.maxConcurrent = options.maxConcurrent ?? 5;\n this.baseBackoffMs = options.baseBackoffMs ?? 1000;\n this.maxBackoffMs = options.maxBackoffMs ?? 60000;\n\n this.minuteTokens = this.requestsPerMinute;\n this.secondTokens = this.requestsPerSecond;\n }\n\n async execute<T>(fn: () => Promise<T>, _maxRetries: number = 3): Promise<T> {\n return new Promise<T>((resolve, reject) => {\n this.queue.push({\n execute: fn as () => Promise<unknown>,\n resolve: resolve as (value: unknown) => void,\n reject,\n retryCount: 0,\n });\n this.processQueue();\n });\n }\n\n getStatus(): {\n queueLength: number;\n activeRequests: number;\n minuteTokens: number;\n secondTokens: number;\n backoffMs: number;\n } {\n return {\n queueLength: this.queue.length,\n activeRequests: this.activeRequests,\n minuteTokens: this.minuteTokens,\n secondTokens: this.secondTokens,\n backoffMs: this.currentBackoffMs,\n };\n }\n\n reset(): void {\n this.minuteTokens = this.requestsPerMinute;\n this.secondTokens = this.requestsPerSecond;\n this.activeRequests = 0;\n this.queue = [];\n this.currentBackoffMs = 0;\n this.lastMinuteReset = Date.now();\n this.lastSecondReset = Date.now();\n }\n\n private async processQueue(): Promise<void> {\n if (this.processing) return;\n this.processing = true;\n\n while (this.queue.length > 0) {\n this.refillTokens();\n\n if (!this.canMakeRequest()) {\n const waitTime = this.getWaitTime();\n await this.sleep(waitTime);\n continue;\n }\n\n if (this.currentBackoffMs > 0) {\n await this.sleep(this.currentBackoffMs);\n this.currentBackoffMs = 0;\n }\n\n const request = this.queue.shift();\n if (!request) continue;\n\n this.consumeTokens();\n this.activeRequests++;\n\n try {\n const result = await request.execute();\n request.resolve(result);\n this.currentBackoffMs = 0;\n } catch (error) {\n const err = error instanceof Error ? error : new Error(String(error));\n\n if (this.isRateLimitError(err)) {\n this.currentBackoffMs = Math.min(\n this.baseBackoffMs * Math.pow(2, request.retryCount),\n this.maxBackoffMs,\n );\n\n console.warn(\n `[RateLimiter] Rate limited. Backing off for ${this.currentBackoffMs}ms ` +\n `(retry ${request.retryCount + 1})`,\n );\n\n if (request.retryCount < 3) {\n this.queue.unshift({\n ...request,\n retryCount: request.retryCount + 1,\n });\n } else {\n request.reject(\n new Error(\n `Rate limit exceeded after ${request.retryCount + 1} retries: ${err.message}`,\n ),\n );\n }\n } else {\n request.reject(err);\n }\n } finally {\n this.activeRequests--;\n }\n }\n\n this.processing = false;\n }\n\n private refillTokens(): void {\n const now = Date.now();\n if (now - this.lastMinuteReset >= 60000) {\n this.minuteTokens = this.requestsPerMinute;\n this.lastMinuteReset = now;\n }\n if (now - this.lastSecondReset >= 1000) {\n this.secondTokens = this.requestsPerSecond;\n this.lastSecondReset = now;\n }\n }\n\n private canMakeRequest(): boolean {\n return (\n this.minuteTokens > 0 &&\n this.secondTokens > 0 &&\n this.activeRequests < this.maxConcurrent\n );\n }\n\n private consumeTokens(): void {\n this.minuteTokens--;\n this.secondTokens--;\n }\n\n private getWaitTime(): number {\n const now = Date.now();\n if (this.secondTokens <= 0) {\n return Math.max(0, 1000 - (now - this.lastSecondReset));\n }\n if (this.minuteTokens <= 0) {\n return Math.max(0, 60000 - (now - this.lastMinuteReset));\n }\n return 100;\n }\n\n private isRateLimitError(error: Error): boolean {\n const message = error.message.toLowerCase();\n return (\n message.includes('429') ||\n message.includes('rate limit') ||\n message.includes('too many requests') ||\n message.includes('quota exceeded')\n );\n }\n\n private sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n}\n\n// Singleton instance\nlet globalRateLimiter: RateLimiter | null = null;\n\nexport function getGlobalRateLimiter(\n options?: RateLimiterOptions,\n): RateLimiter {\n if (!globalRateLimiter) {\n globalRateLimiter = new RateLimiter(options);\n }\n return globalRateLimiter;\n}\n\nexport function resetGlobalRateLimiter(): void {\n globalRateLimiter?.reset();\n globalRateLimiter = null;\n}\n","/**\n * Token Tracker for LLM Usage\n *\n * Tracks token usage across multiple LLM calls for:\n * - Cost estimation\n * - Usage monitoring\n * - Quota management\n *\n * @packageDocumentation\n */\n\nexport interface TokenUsage {\n promptTokens: number;\n completionTokens: number;\n totalTokens: number;\n callCount: number;\n}\n\nexport interface TokenCost {\n promptCostPer1K: number;\n completionCostPer1K: number;\n}\n\n// Pricing as of 2024 (update as needed)\nconst MODEL_COSTS: Record<string, TokenCost> = {\n 'gpt-4o': { promptCostPer1K: 0.005, completionCostPer1K: 0.015 },\n 'gpt-4o-mini': { promptCostPer1K: 0.00015, completionCostPer1K: 0.0006 },\n 'gpt-4-turbo': { promptCostPer1K: 0.01, completionCostPer1K: 0.03 },\n 'gpt-4': { promptCostPer1K: 0.03, completionCostPer1K: 0.06 },\n 'gpt-3.5-turbo': {\n promptCostPer1K: 0.0005,\n completionCostPer1K: 0.0015,\n },\n};\n\nexport class TokenTracker {\n private model: string;\n private usage: TokenUsage = {\n promptTokens: 0,\n completionTokens: 0,\n totalTokens: 0,\n callCount: 0,\n };\n\n constructor(model: string = 'gpt-4o') {\n this.model = model;\n }\n\n addUsage(promptTokens: number, completionTokens: number): void {\n this.usage.promptTokens += promptTokens;\n this.usage.completionTokens += completionTokens;\n this.usage.totalTokens += promptTokens + completionTokens;\n this.usage.callCount++;\n }\n\n getSummary(): TokenUsage {\n return { ...this.usage };\n }\n\n getEstimatedCost(): number {\n const costs = MODEL_COSTS[this.model] || MODEL_COSTS['gpt-4o'];\n const promptCost =\n (this.usage.promptTokens / 1000) * costs.promptCostPer1K;\n const completionCost =\n (this.usage.completionTokens / 1000) * costs.completionCostPer1K;\n return promptCost + completionCost;\n }\n\n getFormattedCost(): string {\n const cost = this.getEstimatedCost();\n return `$${cost.toFixed(4)}`;\n }\n\n getReport(): string {\n const summary = this.getSummary();\n const cost = this.getEstimatedCost();\n return [\n `Token Usage Report (${this.model})`,\n `─────────────────────────────`,\n `Calls: ${summary.callCount}`,\n `Prompt Tokens: ${summary.promptTokens.toLocaleString()}`,\n `Completion Tokens: ${summary.completionTokens.toLocaleString()}`,\n `Total Tokens: ${summary.totalTokens.toLocaleString()}`,\n `Estimated Cost: $${cost.toFixed(4)}`,\n ].join('\\n');\n }\n\n reset(): void {\n this.usage = {\n promptTokens: 0,\n completionTokens: 0,\n totalTokens: 0,\n callCount: 0,\n };\n }\n\n setModel(model: string): void {\n this.model = model;\n }\n}\n\n// Global tracker instance\nlet globalTracker: TokenTracker | null = null;\n\nexport function getGlobalTokenTracker(model?: string): TokenTracker {\n if (!globalTracker) {\n globalTracker = new TokenTracker(model);\n } else if (model) {\n globalTracker.setModel(model);\n }\n return globalTracker;\n}\n\nexport function resetGlobalTokenTracker(): void {\n globalTracker?.reset();\n}\n"],"mappings":";AAwCO,IAAM,cAAN,MAAkB;AAAA,EAgBvB,YAAY,UAA8B,CAAC,GAAG;AAP9C,SAAQ,iBAAyB;AACjC,SAAQ,QAAkC,CAAC;AAC3C,SAAQ,kBAA0B,KAAK,IAAI;AAC3C,SAAQ,kBAA0B,KAAK,IAAI;AAC3C,SAAQ,aAAsB;AAC9B,SAAQ,mBAA2B;AAGjC,SAAK,oBAAoB,QAAQ,qBAAqB;AACtD,SAAK,oBAAoB,QAAQ,qBAAqB;AACtD,SAAK,gBAAgB,QAAQ,iBAAiB;AAC9C,SAAK,gBAAgB,QAAQ,iBAAiB;AAC9C,SAAK,eAAe,QAAQ,gBAAgB;AAE5C,SAAK,eAAe,KAAK;AACzB,SAAK,eAAe,KAAK;AAAA,EAC3B;AAAA,EAEA,MAAM,QAAW,IAAsB,cAAsB,GAAe;AAC1E,WAAO,IAAI,QAAW,CAAC,SAAS,WAAW;AACzC,WAAK,MAAM,KAAK;AAAA,QACd,SAAS;AAAA,QACT;AAAA,QACA;AAAA,QACA,YAAY;AAAA,MACd,CAAC;AACD,WAAK,aAAa;AAAA,IACpB,CAAC;AAAA,EACH;AAAA,EAEA,YAME;AACA,WAAO;AAAA,MACL,aAAa,KAAK,MAAM;AAAA,MACxB,gBAAgB,KAAK;AAAA,MACrB,cAAc,KAAK;AAAA,MACnB,cAAc,KAAK;AAAA,MACnB,WAAW,KAAK;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,QAAc;AACZ,SAAK,eAAe,KAAK;AACzB,SAAK,eAAe,KAAK;AACzB,SAAK,iBAAiB;AACtB,SAAK,QAAQ,CAAC;AACd,SAAK,mBAAmB;AACxB,SAAK,kBAAkB,KAAK,IAAI;AAChC,SAAK,kBAAkB,KAAK,IAAI;AAAA,EAClC;AAAA,EAEA,MAAc,eAA8B;AAC1C,QAAI,KAAK,WAAY;AACrB,SAAK,aAAa;AAElB,WAAO,KAAK,MAAM,SAAS,GAAG;AAC5B,WAAK,aAAa;AAElB,UAAI,CAAC,KAAK,eAAe,GAAG;AAC1B,cAAM,WAAW,KAAK,YAAY;AAClC,cAAM,KAAK,MAAM,QAAQ;AACzB;AAAA,MACF;AAEA,UAAI,KAAK,mBAAmB,GAAG;AAC7B,cAAM,KAAK,MAAM,KAAK,gBAAgB;AACtC,aAAK,mBAAmB;AAAA,MAC1B;AAEA,YAAM,UAAU,KAAK,MAAM,MAAM;AACjC,UAAI,CAAC,QAAS;AAEd,WAAK,cAAc;AACnB,WAAK;AAEL,UAAI;AACF,cAAM,SAAS,MAAM,QAAQ,QAAQ;AACrC,gBAAQ,QAAQ,MAAM;AACtB,aAAK,mBAAmB;AAAA,MAC1B,SAAS,OAAO;AACd,cAAM,MAAM,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAEpE,YAAI,KAAK,iBAAiB,GAAG,GAAG;AAC9B,eAAK,mBAAmB,KAAK;AAAA,YAC3B,KAAK,gBAAgB,KAAK,IAAI,GAAG,QAAQ,UAAU;AAAA,YACnD,KAAK;AAAA,UACP;AAEA,kBAAQ;AAAA,YACN,+CAA+C,KAAK,gBAAgB,aACxD,QAAQ,aAAa,CAAC;AAAA,UACpC;AAEA,cAAI,QAAQ,aAAa,GAAG;AAC1B,iBAAK,MAAM,QAAQ;AAAA,cACjB,GAAG;AAAA,cACH,YAAY,QAAQ,aAAa;AAAA,YACnC,CAAC;AAAA,UACH,OAAO;AACL,oBAAQ;AAAA,cACN,IAAI;AAAA,gBACF,6BAA6B,QAAQ,aAAa,CAAC,aAAa,IAAI,OAAO;AAAA,cAC7E;AAAA,YACF;AAAA,UACF;AAAA,QACF,OAAO;AACL,kBAAQ,OAAO,GAAG;AAAA,QACpB;AAAA,MACF,UAAE;AACA,aAAK;AAAA,MACP;AAAA,IACF;AAEA,SAAK,aAAa;AAAA,EACpB;AAAA,EAEQ,eAAqB;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,QAAI,MAAM,KAAK,mBAAmB,KAAO;AACvC,WAAK,eAAe,KAAK;AACzB,WAAK,kBAAkB;AAAA,IACzB;AACA,QAAI,MAAM,KAAK,mBAAmB,KAAM;AACtC,WAAK,eAAe,KAAK;AACzB,WAAK,kBAAkB;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAA0B;AAChC,WACE,KAAK,eAAe,KACpB,KAAK,eAAe,KACpB,KAAK,iBAAiB,KAAK;AAAA,EAE/B;AAAA,EAEQ,gBAAsB;AAC5B,SAAK;AACL,SAAK;AAAA,EACP;AAAA,EAEQ,cAAsB;AAC5B,UAAM,MAAM,KAAK,IAAI;AACrB,QAAI,KAAK,gBAAgB,GAAG;AAC1B,aAAO,KAAK,IAAI,GAAG,OAAQ,MAAM,KAAK,gBAAgB;AAAA,IACxD;AACA,QAAI,KAAK,gBAAgB,GAAG;AAC1B,aAAO,KAAK,IAAI,GAAG,OAAS,MAAM,KAAK,gBAAgB;AAAA,IACzD;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,iBAAiB,OAAuB;AAC9C,UAAM,UAAU,MAAM,QAAQ,YAAY;AAC1C,WACE,QAAQ,SAAS,KAAK,KACtB,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,mBAAmB,KACpC,QAAQ,SAAS,gBAAgB;AAAA,EAErC;AAAA,EAEQ,MAAM,IAA2B;AACvC,WAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AAAA,EACzD;AACF;AAGA,IAAI,oBAAwC;AAErC,SAAS,qBACd,SACa;AACb,MAAI,CAAC,mBAAmB;AACtB,wBAAoB,IAAI,YAAY,OAAO;AAAA,EAC7C;AACA,SAAO;AACT;AAEO,SAAS,yBAA+B;AAC7C,qBAAmB,MAAM;AACzB,sBAAoB;AACtB;;;ACpNA,IAAM,cAAyC;AAAA,EAC7C,UAAU,EAAE,iBAAiB,MAAO,qBAAqB,MAAM;AAAA,EAC/D,eAAe,EAAE,iBAAiB,OAAS,qBAAqB,KAAO;AAAA,EACvE,eAAe,EAAE,iBAAiB,MAAM,qBAAqB,KAAK;AAAA,EAClE,SAAS,EAAE,iBAAiB,MAAM,qBAAqB,KAAK;AAAA,EAC5D,iBAAiB;AAAA,IACf,iBAAiB;AAAA,IACjB,qBAAqB;AAAA,EACvB;AACF;AAEO,IAAM,eAAN,MAAmB;AAAA,EASxB,YAAY,QAAgB,UAAU;AAPtC,SAAQ,QAAoB;AAAA,MAC1B,cAAc;AAAA,MACd,kBAAkB;AAAA,MAClB,aAAa;AAAA,MACb,WAAW;AAAA,IACb;AAGE,SAAK,QAAQ;AAAA,EACf;AAAA,EAEA,SAAS,cAAsB,kBAAgC;AAC7D,SAAK,MAAM,gBAAgB;AAC3B,SAAK,MAAM,oBAAoB;AAC/B,SAAK,MAAM,eAAe,eAAe;AACzC,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,aAAyB;AACvB,WAAO,EAAE,GAAG,KAAK,MAAM;AAAA,EACzB;AAAA,EAEA,mBAA2B;AACzB,UAAM,QAAQ,YAAY,KAAK,KAAK,KAAK,YAAY,QAAQ;AAC7D,UAAM,aACH,KAAK,MAAM,eAAe,MAAQ,MAAM;AAC3C,UAAM,iBACH,KAAK,MAAM,mBAAmB,MAAQ,MAAM;AAC/C,WAAO,aAAa;AAAA,EACtB;AAAA,EAEA,mBAA2B;AACzB,UAAM,OAAO,KAAK,iBAAiB;AACnC,WAAO,IAAI,KAAK,QAAQ,CAAC,CAAC;AAAA,EAC5B;AAAA,EAEA,YAAoB;AAClB,UAAM,UAAU,KAAK,WAAW;AAChC,UAAM,OAAO,KAAK,iBAAiB;AACnC,WAAO;AAAA,MACL,uBAAuB,KAAK,KAAK;AAAA,MACjC;AAAA,MACA,uBAAuB,QAAQ,SAAS;AAAA,MACxC,uBAAuB,QAAQ,aAAa,eAAe,CAAC;AAAA,MAC5D,uBAAuB,QAAQ,iBAAiB,eAAe,CAAC;AAAA,MAChE,uBAAuB,QAAQ,YAAY,eAAe,CAAC;AAAA,MAC3D,wBAAwB,KAAK,QAAQ,CAAC,CAAC;AAAA,IACzC,EAAE,KAAK,IAAI;AAAA,EACb;AAAA,EAEA,QAAc;AACZ,SAAK,QAAQ;AAAA,MACX,cAAc;AAAA,MACd,kBAAkB;AAAA,MAClB,aAAa;AAAA,MACb,WAAW;AAAA,IACb;AAAA,EACF;AAAA,EAEA,SAAS,OAAqB;AAC5B,SAAK,QAAQ;AAAA,EACf;AACF;AAGA,IAAI,gBAAqC;AAElC,SAAS,sBAAsB,OAA8B;AAClE,MAAI,CAAC,eAAe;AAClB,oBAAgB,IAAI,aAAa,KAAK;AAAA,EACxC,WAAW,OAAO;AAChB,kBAAc,SAAS,KAAK;AAAA,EAC9B;AACA,SAAO;AACT;AAEO,SAAS,0BAAgC;AAC9C,iBAAe,MAAM;AACvB;","names":[]}
|