@almadar/llm 1.0.16 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -72
- package/README.md +25 -0
- package/dist/{chunk-56H37PN5.js → chunk-YJVZ6ZWO.js} +80 -57
- package/dist/chunk-YJVZ6ZWO.js.map +1 -0
- package/dist/client.js +5 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/package.json +11 -2
- package/src/client.ts +99 -76
- package/src/index.ts +2 -0
- package/dist/chunk-56H37PN5.js.map +0 -1
- package/dist/client.d.ts +0 -136
- package/dist/index.d.ts +0 -67
- package/dist/json-parser.d.ts +0 -43
- package/dist/rate-limiter-9XAWfHwe.d.ts +0 -98
- package/dist/structured-output.d.ts +0 -113
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Token Tracker for LLM Usage
|
|
3
|
-
*
|
|
4
|
-
* Tracks token usage across multiple LLM calls for:
|
|
5
|
-
* - Cost estimation
|
|
6
|
-
* - Usage monitoring
|
|
7
|
-
* - Quota management
|
|
8
|
-
*
|
|
9
|
-
* @packageDocumentation
|
|
10
|
-
*/
|
|
11
|
-
interface TokenUsage {
|
|
12
|
-
promptTokens: number;
|
|
13
|
-
completionTokens: number;
|
|
14
|
-
totalTokens: number;
|
|
15
|
-
callCount: number;
|
|
16
|
-
}
|
|
17
|
-
declare class TokenTracker {
|
|
18
|
-
private model;
|
|
19
|
-
private usage;
|
|
20
|
-
constructor(model?: string);
|
|
21
|
-
addUsage(promptTokens: number, completionTokens: number): void;
|
|
22
|
-
getSummary(): TokenUsage;
|
|
23
|
-
getEstimatedCost(): number;
|
|
24
|
-
getFormattedCost(): string;
|
|
25
|
-
getReport(): string;
|
|
26
|
-
reset(): void;
|
|
27
|
-
setModel(model: string): void;
|
|
28
|
-
}
|
|
29
|
-
declare function getGlobalTokenTracker(model?: string): TokenTracker;
|
|
30
|
-
declare function resetGlobalTokenTracker(): void;
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Rate Limiter for LLM API Calls
|
|
34
|
-
*
|
|
35
|
-
* Implements token bucket algorithm with:
|
|
36
|
-
* - Configurable requests per minute/second
|
|
37
|
-
* - Automatic backoff on 429 errors
|
|
38
|
-
* - Queue for pending requests
|
|
39
|
-
*
|
|
40
|
-
* @packageDocumentation
|
|
41
|
-
*/
|
|
42
|
-
interface RateLimiterOptions {
|
|
43
|
-
/** Maximum requests per minute (default: 60) */
|
|
44
|
-
requestsPerMinute?: number;
|
|
45
|
-
/** Maximum requests per second (default: 3) */
|
|
46
|
-
requestsPerSecond?: number;
|
|
47
|
-
/** Maximum concurrent requests (default: 5) */
|
|
48
|
-
maxConcurrent?: number;
|
|
49
|
-
/** Base delay for exponential backoff in ms (default: 1000) */
|
|
50
|
-
baseBackoffMs?: number;
|
|
51
|
-
/** Maximum backoff delay in ms (default: 60000) */
|
|
52
|
-
maxBackoffMs?: number;
|
|
53
|
-
}
|
|
54
|
-
/**
|
|
55
|
-
* Rate limiter for LLM API calls using token bucket algorithm.
|
|
56
|
-
*
|
|
57
|
-
* @example
|
|
58
|
-
* ```typescript
|
|
59
|
-
* const limiter = new RateLimiter({ requestsPerMinute: 30 });
|
|
60
|
-
* const result = await limiter.execute(() => llm.invoke(messages));
|
|
61
|
-
* ```
|
|
62
|
-
*/
|
|
63
|
-
declare class RateLimiter {
|
|
64
|
-
private requestsPerMinute;
|
|
65
|
-
private requestsPerSecond;
|
|
66
|
-
private maxConcurrent;
|
|
67
|
-
private baseBackoffMs;
|
|
68
|
-
private maxBackoffMs;
|
|
69
|
-
private minuteTokens;
|
|
70
|
-
private secondTokens;
|
|
71
|
-
private activeRequests;
|
|
72
|
-
private queue;
|
|
73
|
-
private lastMinuteReset;
|
|
74
|
-
private lastSecondReset;
|
|
75
|
-
private processing;
|
|
76
|
-
private currentBackoffMs;
|
|
77
|
-
constructor(options?: RateLimiterOptions);
|
|
78
|
-
execute<T>(fn: () => Promise<T>, _maxRetries?: number): Promise<T>;
|
|
79
|
-
getStatus(): {
|
|
80
|
-
queueLength: number;
|
|
81
|
-
activeRequests: number;
|
|
82
|
-
minuteTokens: number;
|
|
83
|
-
secondTokens: number;
|
|
84
|
-
backoffMs: number;
|
|
85
|
-
};
|
|
86
|
-
reset(): void;
|
|
87
|
-
private processQueue;
|
|
88
|
-
private refillTokens;
|
|
89
|
-
private canMakeRequest;
|
|
90
|
-
private consumeTokens;
|
|
91
|
-
private getWaitTime;
|
|
92
|
-
private isRateLimitError;
|
|
93
|
-
private sleep;
|
|
94
|
-
}
|
|
95
|
-
declare function getGlobalRateLimiter(options?: RateLimiterOptions): RateLimiter;
|
|
96
|
-
declare function resetGlobalRateLimiter(): void;
|
|
97
|
-
|
|
98
|
-
export { RateLimiter as R, TokenTracker as T, type RateLimiterOptions as a, type TokenUsage as b, getGlobalTokenTracker as c, resetGlobalTokenTracker as d, getGlobalRateLimiter as g, resetGlobalRateLimiter as r };
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-9XAWfHwe.js';
|
|
2
|
-
import { z } from 'zod';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* JSON Schema type used for OpenAI structured outputs.
|
|
6
|
-
*/
|
|
7
|
-
interface JsonSchema {
|
|
8
|
-
type?: string | string[];
|
|
9
|
-
properties?: Record<string, JsonSchema>;
|
|
10
|
-
required?: string[];
|
|
11
|
-
items?: JsonSchema;
|
|
12
|
-
enum?: unknown[];
|
|
13
|
-
const?: unknown;
|
|
14
|
-
anyOf?: JsonSchema[];
|
|
15
|
-
oneOf?: JsonSchema[];
|
|
16
|
-
allOf?: JsonSchema[];
|
|
17
|
-
$ref?: string;
|
|
18
|
-
$defs?: Record<string, JsonSchema>;
|
|
19
|
-
definitions?: Record<string, JsonSchema>;
|
|
20
|
-
additionalProperties?: boolean | JsonSchema;
|
|
21
|
-
description?: string;
|
|
22
|
-
default?: unknown;
|
|
23
|
-
minItems?: number;
|
|
24
|
-
maxItems?: number;
|
|
25
|
-
minLength?: number;
|
|
26
|
-
}
|
|
27
|
-
interface StructuredOutputOptions {
|
|
28
|
-
model?: string;
|
|
29
|
-
temperature?: number;
|
|
30
|
-
maxTokens?: number;
|
|
31
|
-
rateLimiter?: RateLimiterOptions;
|
|
32
|
-
useGlobalRateLimiter?: boolean;
|
|
33
|
-
trackTokens?: boolean;
|
|
34
|
-
}
|
|
35
|
-
interface StructuredGenerationOptions {
|
|
36
|
-
/** User's natural language request */
|
|
37
|
-
userRequest: string;
|
|
38
|
-
/** Model to use (overrides client default) */
|
|
39
|
-
model?: string;
|
|
40
|
-
/** Temperature (overrides client default) */
|
|
41
|
-
temperature?: number;
|
|
42
|
-
/** Maximum tokens (overrides client default) */
|
|
43
|
-
maxTokens?: number;
|
|
44
|
-
/** JSON Schema for structured output */
|
|
45
|
-
jsonSchema?: JsonSchema;
|
|
46
|
-
/** Schema name for the json_schema response format */
|
|
47
|
-
schemaName?: string;
|
|
48
|
-
/** System prompt override */
|
|
49
|
-
systemPrompt?: string;
|
|
50
|
-
/** System prompt builder function (called dynamically) */
|
|
51
|
-
buildSystemPrompt?: () => string;
|
|
52
|
-
/** Additional system prompt instructions */
|
|
53
|
-
additionalInstructions?: string;
|
|
54
|
-
/** Existing context for updates (e.g., existing schema JSON) */
|
|
55
|
-
existingContext?: string;
|
|
56
|
-
/** Skip post-generation validation (default: false) */
|
|
57
|
-
skipValidation?: boolean;
|
|
58
|
-
}
|
|
59
|
-
interface StructuredGenerationResult<T = unknown> {
|
|
60
|
-
/** Generated data (guaranteed to match JSON Schema structure) */
|
|
61
|
-
data: T;
|
|
62
|
-
/** Raw JSON string from API */
|
|
63
|
-
raw: string;
|
|
64
|
-
/** Token usage statistics */
|
|
65
|
-
usage: {
|
|
66
|
-
promptTokens: number;
|
|
67
|
-
completionTokens: number;
|
|
68
|
-
totalTokens: number;
|
|
69
|
-
};
|
|
70
|
-
/** Generation latency in milliseconds */
|
|
71
|
-
latencyMs: number;
|
|
72
|
-
/** Model used for generation */
|
|
73
|
-
model: string;
|
|
74
|
-
/** Zod validation result (if not skipped) */
|
|
75
|
-
zodValidation?: {
|
|
76
|
-
success: boolean;
|
|
77
|
-
errors?: z.ZodError['errors'];
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
declare const STRUCTURED_OUTPUT_MODELS: {
|
|
81
|
-
readonly GPT5_MINI: "gpt-5-mini";
|
|
82
|
-
readonly GPT4O_MINI: "gpt-4o-mini";
|
|
83
|
-
readonly GPT4O: "gpt-4o";
|
|
84
|
-
readonly GPT4O_2024_08_06: "gpt-4o-2024-08-06";
|
|
85
|
-
};
|
|
86
|
-
declare class StructuredOutputClient {
|
|
87
|
-
private openai;
|
|
88
|
-
private rateLimiter;
|
|
89
|
-
private tokenTracker;
|
|
90
|
-
private defaultModel;
|
|
91
|
-
private defaultTemperature;
|
|
92
|
-
private defaultMaxTokens;
|
|
93
|
-
constructor(options?: StructuredOutputOptions);
|
|
94
|
-
private usesMaxCompletionTokens;
|
|
95
|
-
/**
|
|
96
|
-
* Generate structured output with guaranteed JSON Schema compliance.
|
|
97
|
-
*/
|
|
98
|
-
generate<T = unknown>(options: StructuredGenerationOptions): Promise<StructuredGenerationResult<T>>;
|
|
99
|
-
getModel(): string;
|
|
100
|
-
getRateLimiterStatus(): {
|
|
101
|
-
queueLength: number;
|
|
102
|
-
activeRequests: number;
|
|
103
|
-
minuteTokens: number;
|
|
104
|
-
secondTokens: number;
|
|
105
|
-
backoffMs: number;
|
|
106
|
-
};
|
|
107
|
-
getTokenUsage(): TokenUsage | null;
|
|
108
|
-
}
|
|
109
|
-
declare function getStructuredOutputClient(options?: StructuredOutputOptions): StructuredOutputClient;
|
|
110
|
-
declare function resetStructuredOutputClient(): void;
|
|
111
|
-
declare function isStructuredOutputAvailable(): boolean;
|
|
112
|
-
|
|
113
|
-
export { type JsonSchema, STRUCTURED_OUTPUT_MODELS, type StructuredGenerationOptions, type StructuredGenerationResult, StructuredOutputClient, type StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient };
|