@almadar/llm 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-9XAWfHwe.js';
2
+ import { ChatOpenAI } from '@langchain/openai';
3
+ import { ChatAnthropic } from '@langchain/anthropic';
4
+ import { z } from 'zod';
5
+
6
+ type ChatModel = ChatOpenAI | ChatAnthropic;
7
+ type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter';
8
+ interface ProviderConfig {
9
+ apiKey: string;
10
+ baseUrl?: string;
11
+ defaultModel: string;
12
+ }
13
+ interface LLMClientOptions {
14
+ provider?: LLMProvider;
15
+ model?: string;
16
+ temperature?: number;
17
+ streaming?: boolean;
18
+ rateLimiter?: RateLimiterOptions;
19
+ useGlobalRateLimiter?: boolean;
20
+ trackTokens?: boolean;
21
+ }
22
+ interface LLMCallOptions<T = unknown> {
23
+ systemPrompt: string;
24
+ userPrompt: string;
25
+ schema?: z.ZodSchema<T>;
26
+ maxRetries?: number;
27
+ retryWithContext?: boolean;
28
+ maxTokens?: number;
29
+ skipSchemaValidation?: boolean;
30
+ temperature?: number;
31
+ }
32
+ interface CacheableBlock {
33
+ type: 'text';
34
+ text: string;
35
+ cache_control?: {
36
+ type: 'ephemeral';
37
+ };
38
+ }
39
+ interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
40
+ systemBlocks?: CacheableBlock[];
41
+ userBlocks?: CacheableBlock[];
42
+ rawText?: boolean;
43
+ }
44
+ interface LLMUsage {
45
+ promptTokens: number;
46
+ completionTokens: number;
47
+ totalTokens: number;
48
+ }
49
+ type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
50
+ interface LLMResponse<T> {
51
+ data: T;
52
+ raw: string;
53
+ finishReason: LLMFinishReason;
54
+ usage: LLMUsage | null;
55
+ }
56
+ declare const DEEPSEEK_MODELS: {
57
+ readonly CHAT: "deepseek-chat";
58
+ readonly CODER: "deepseek-coder";
59
+ readonly REASONER: "deepseek-reasoner";
60
+ };
61
+ declare const OPENAI_MODELS: {
62
+ readonly GPT4O: "gpt-4o";
63
+ readonly GPT4O_MINI: "gpt-4o-mini";
64
+ readonly GPT4_TURBO: "gpt-4-turbo";
65
+ readonly GPT35_TURBO: "gpt-3.5-turbo";
66
+ readonly GPT_5_1: "gpt-5.1";
67
+ };
68
+ declare const ANTHROPIC_MODELS: {
69
+ readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
70
+ readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
71
+ readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
72
+ readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
73
+ };
74
+ declare const KIMI_MODELS: {
75
+ readonly K2_5: "kimi-k2.5";
76
+ };
77
+ declare const OPENROUTER_MODELS: {
78
+ readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
79
+ readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
80
+ readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
81
+ readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
82
+ readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
83
+ readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
84
+ readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
85
+ };
86
+ declare class LLMClient {
87
+ private model;
88
+ private rateLimiter;
89
+ private tokenTracker;
90
+ private modelName;
91
+ private provider;
92
+ private providerConfig;
93
+ private temperature;
94
+ private streaming;
95
+ constructor(options?: LLMClientOptions);
96
+ private usesMaxCompletionTokens;
97
+ private createModel;
98
+ private getModelWithOptions;
99
+ getProvider(): LLMProvider;
100
+ getModelName(): string;
101
+ getModel(): ChatModel;
102
+ getRateLimiterStatus(): {
103
+ queueLength: number;
104
+ activeRequests: number;
105
+ minuteTokens: number;
106
+ secondTokens: number;
107
+ backoffMs: number;
108
+ };
109
+ getTokenUsage(): TokenUsage | null;
110
+ call<T>(options: LLMCallOptions<T>): Promise<T>;
111
+ callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
112
+ private extractFinishReason;
113
+ callRaw(options: {
114
+ systemPrompt: string;
115
+ userPrompt: string;
116
+ maxTokens?: number;
117
+ }): Promise<string>;
118
+ callRawWithMetadata(options: {
119
+ systemPrompt: string;
120
+ userPrompt: string;
121
+ maxTokens?: number;
122
+ }): Promise<Omit<LLMResponse<string>, 'data'> & {
123
+ raw: string;
124
+ }>;
125
+ private isRateLimitError;
126
+ callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
127
+ static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
128
+ }
129
+ declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
130
+ declare function resetSharedLLMClient(provider?: LLMProvider): void;
131
+ declare function getAvailableProvider(): LLMProvider;
132
+ declare function isProviderAvailable(provider: LLMProvider): boolean;
133
+ declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
134
+ declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
135
+ declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
136
+ declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
137
+ declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
138
+ declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
139
+ declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
140
+ declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
141
+
142
+ export { ANTHROPIC_MODELS, type CacheAwareLLMCallOptions, type CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, type LLMCallOptions, LLMClient, type LLMClientOptions, type LLMFinishReason, type LLMProvider, type LLMResponse, type LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, type ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient };
@@ -0,0 +1,67 @@
1
+ import { LLMFinishReason, LLMClient } from './client.js';
2
+ export { ANTHROPIC_MODELS, CacheAwareLLMCallOptions, CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, LLMCallOptions, LLMClientOptions, LLMProvider, LLMResponse, LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient } from './client.js';
3
+ export { R as RateLimiter, a as RateLimiterOptions, T as TokenTracker, b as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-9XAWfHwe.js';
4
+ export { autoCloseJson, extractJsonFromText, isValidJson, parseJsonResponse, safeParseJson } from './json-parser.js';
5
+ import { z } from 'zod';
6
+ export { JsonSchema, STRUCTURED_OUTPUT_MODELS, StructuredGenerationOptions, StructuredGenerationResult, StructuredOutputClient, StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient } from './structured-output.js';
7
+ import '@langchain/openai';
8
+ import '@langchain/anthropic';
9
+
10
+ /**
11
+ * Truncation Detector
12
+ *
13
+ * Utilities for detecting when LLM output has been truncated and
14
+ * extracting usable content from partial responses.
15
+ *
16
+ * @packageDocumentation
17
+ */
18
+
19
+ type TruncationReason = 'finish_reason' | 'json_incomplete' | 'bracket_mismatch' | 'none';
20
+ interface TruncationResult {
21
+ isTruncated: boolean;
22
+ reason: TruncationReason;
23
+ partialContent?: string;
24
+ lastCompleteElement?: unknown;
25
+ missingCloseBrackets?: number;
26
+ missingCloseBraces?: number;
27
+ }
28
+ declare function detectTruncation(response: string, finishReason: LLMFinishReason): TruncationResult;
29
+ declare function findLastCompleteElement(json: string): unknown | null;
30
+ declare function isLikelyTruncated(content: string): boolean;
31
+
32
+ /**
33
+ * LLM Continuation Utility
34
+ *
35
+ * Handles truncated LLM responses with automatic continuation.
36
+ * - Detects truncation via finish_reason and JSON structure
37
+ * - Automatically continues with full context
38
+ * - Merges partial and continuation responses
39
+ * - Salvages partial data if max continuations reached
40
+ *
41
+ * @packageDocumentation
42
+ */
43
+
44
+ interface ContinuationOptions<T> {
45
+ client: LLMClient;
46
+ systemPrompt: string;
47
+ userPrompt: string;
48
+ schema?: z.ZodSchema<T>;
49
+ maxTokens?: number;
50
+ maxContinuations?: number;
51
+ maxRetries?: number;
52
+ buildContinuationPrompt: (partialResponse: string, attempt: number) => string;
53
+ continuationSystemPrompt?: string;
54
+ }
55
+ interface ContinuationResult<T> {
56
+ data: T;
57
+ raw: string;
58
+ continuationCount: number;
59
+ warnings: string[];
60
+ wasSalvaged: boolean;
61
+ }
62
+ declare function mergeResponses(previous: string, continuation: string): string;
63
+ declare function salvagePartialResponse<T>(rawResponse: string): T | null;
64
+ declare function callWithContinuation<T>(options: ContinuationOptions<T>): Promise<ContinuationResult<T>>;
65
+ declare function buildGenericContinuationPrompt(context: string, partialResponse: string, attempt: number, maxAttempts?: number): string;
66
+
67
+ export { type ContinuationOptions, type ContinuationResult, LLMClient, LLMFinishReason, type TruncationReason, type TruncationResult, buildGenericContinuationPrompt, callWithContinuation, detectTruncation, findLastCompleteElement, isLikelyTruncated, mergeResponses, salvagePartialResponse };
@@ -0,0 +1,43 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * JSON Parser Utilities
5
+ *
6
+ * Robust JSON parsing for LLM responses that may contain:
7
+ * - Markdown code blocks
8
+ * - Extra text before/after JSON
9
+ * - Minor formatting issues
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+
14
+ /**
15
+ * Extract JSON from LLM response text.
16
+ *
17
+ * Handles markdown code blocks, raw JSON objects/arrays, and primitive values.
18
+ */
19
+ declare function extractJsonFromText(text: string): string | null;
20
+ /**
21
+ * Parse JSON from LLM response with optional Zod schema validation.
22
+ */
23
+ declare function parseJsonResponse<T>(response: string, schema?: z.ZodSchema<T>): T;
24
+ /**
25
+ * Safely parse JSON without throwing.
26
+ */
27
+ declare function safeParseJson<T>(response: string, schema?: z.ZodSchema<T>): {
28
+ success: true;
29
+ data: T;
30
+ } | {
31
+ success: false;
32
+ error: Error;
33
+ };
34
+ /**
35
+ * Check if a string is valid JSON.
36
+ */
37
+ declare function isValidJson(str: string): boolean;
38
+ /**
39
+ * Attempt to auto-close unclosed JSON brackets.
40
+ */
41
+ declare function autoCloseJson(json: string): string;
42
+
43
+ export { autoCloseJson, extractJsonFromText, isValidJson, parseJsonResponse, safeParseJson };
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Token Tracker for LLM Usage
3
+ *
4
+ * Tracks token usage across multiple LLM calls for:
5
+ * - Cost estimation
6
+ * - Usage monitoring
7
+ * - Quota management
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+ interface TokenUsage {
12
+ promptTokens: number;
13
+ completionTokens: number;
14
+ totalTokens: number;
15
+ callCount: number;
16
+ }
17
+ declare class TokenTracker {
18
+ private model;
19
+ private usage;
20
+ constructor(model?: string);
21
+ addUsage(promptTokens: number, completionTokens: number): void;
22
+ getSummary(): TokenUsage;
23
+ getEstimatedCost(): number;
24
+ getFormattedCost(): string;
25
+ getReport(): string;
26
+ reset(): void;
27
+ setModel(model: string): void;
28
+ }
29
+ declare function getGlobalTokenTracker(model?: string): TokenTracker;
30
+ declare function resetGlobalTokenTracker(): void;
31
+
32
+ /**
33
+ * Rate Limiter for LLM API Calls
34
+ *
35
+ * Implements token bucket algorithm with:
36
+ * - Configurable requests per minute/second
37
+ * - Automatic backoff on 429 errors
38
+ * - Queue for pending requests
39
+ *
40
+ * @packageDocumentation
41
+ */
42
+ interface RateLimiterOptions {
43
+ /** Maximum requests per minute (default: 60) */
44
+ requestsPerMinute?: number;
45
+ /** Maximum requests per second (default: 3) */
46
+ requestsPerSecond?: number;
47
+ /** Maximum concurrent requests (default: 5) */
48
+ maxConcurrent?: number;
49
+ /** Base delay for exponential backoff in ms (default: 1000) */
50
+ baseBackoffMs?: number;
51
+ /** Maximum backoff delay in ms (default: 60000) */
52
+ maxBackoffMs?: number;
53
+ }
54
+ /**
55
+ * Rate limiter for LLM API calls using token bucket algorithm.
56
+ *
57
+ * @example
58
+ * ```typescript
59
+ * const limiter = new RateLimiter({ requestsPerMinute: 30 });
60
+ * const result = await limiter.execute(() => llm.invoke(messages));
61
+ * ```
62
+ */
63
+ declare class RateLimiter {
64
+ private requestsPerMinute;
65
+ private requestsPerSecond;
66
+ private maxConcurrent;
67
+ private baseBackoffMs;
68
+ private maxBackoffMs;
69
+ private minuteTokens;
70
+ private secondTokens;
71
+ private activeRequests;
72
+ private queue;
73
+ private lastMinuteReset;
74
+ private lastSecondReset;
75
+ private processing;
76
+ private currentBackoffMs;
77
+ constructor(options?: RateLimiterOptions);
78
+ execute<T>(fn: () => Promise<T>, _maxRetries?: number): Promise<T>;
79
+ getStatus(): {
80
+ queueLength: number;
81
+ activeRequests: number;
82
+ minuteTokens: number;
83
+ secondTokens: number;
84
+ backoffMs: number;
85
+ };
86
+ reset(): void;
87
+ private processQueue;
88
+ private refillTokens;
89
+ private canMakeRequest;
90
+ private consumeTokens;
91
+ private getWaitTime;
92
+ private isRateLimitError;
93
+ private sleep;
94
+ }
95
+ declare function getGlobalRateLimiter(options?: RateLimiterOptions): RateLimiter;
96
+ declare function resetGlobalRateLimiter(): void;
97
+
98
+ export { RateLimiter as R, TokenTracker as T, type RateLimiterOptions as a, type TokenUsage as b, getGlobalTokenTracker as c, resetGlobalTokenTracker as d, getGlobalRateLimiter as g, resetGlobalRateLimiter as r };
@@ -0,0 +1,113 @@
1
+ import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-9XAWfHwe.js';
2
+ import { z } from 'zod';
3
+
4
+ /**
5
+ * JSON Schema type used for OpenAI structured outputs.
6
+ */
7
+ interface JsonSchema {
8
+ type?: string | string[];
9
+ properties?: Record<string, JsonSchema>;
10
+ required?: string[];
11
+ items?: JsonSchema;
12
+ enum?: unknown[];
13
+ const?: unknown;
14
+ anyOf?: JsonSchema[];
15
+ oneOf?: JsonSchema[];
16
+ allOf?: JsonSchema[];
17
+ $ref?: string;
18
+ $defs?: Record<string, JsonSchema>;
19
+ definitions?: Record<string, JsonSchema>;
20
+ additionalProperties?: boolean | JsonSchema;
21
+ description?: string;
22
+ default?: unknown;
23
+ minItems?: number;
24
+ maxItems?: number;
25
+ minLength?: number;
26
+ }
27
+ interface StructuredOutputOptions {
28
+ model?: string;
29
+ temperature?: number;
30
+ maxTokens?: number;
31
+ rateLimiter?: RateLimiterOptions;
32
+ useGlobalRateLimiter?: boolean;
33
+ trackTokens?: boolean;
34
+ }
35
+ interface StructuredGenerationOptions {
36
+ /** User's natural language request */
37
+ userRequest: string;
38
+ /** Model to use (overrides client default) */
39
+ model?: string;
40
+ /** Temperature (overrides client default) */
41
+ temperature?: number;
42
+ /** Maximum tokens (overrides client default) */
43
+ maxTokens?: number;
44
+ /** JSON Schema for structured output */
45
+ jsonSchema?: JsonSchema;
46
+ /** Schema name for the json_schema response format */
47
+ schemaName?: string;
48
+ /** System prompt override */
49
+ systemPrompt?: string;
50
+ /** System prompt builder function (called dynamically) */
51
+ buildSystemPrompt?: () => string;
52
+ /** Additional system prompt instructions */
53
+ additionalInstructions?: string;
54
+ /** Existing context for updates (e.g., existing schema JSON) */
55
+ existingContext?: string;
56
+ /** Skip post-generation validation (default: false) */
57
+ skipValidation?: boolean;
58
+ }
59
+ interface StructuredGenerationResult<T = unknown> {
60
+ /** Generated data (guaranteed to match JSON Schema structure) */
61
+ data: T;
62
+ /** Raw JSON string from API */
63
+ raw: string;
64
+ /** Token usage statistics */
65
+ usage: {
66
+ promptTokens: number;
67
+ completionTokens: number;
68
+ totalTokens: number;
69
+ };
70
+ /** Generation latency in milliseconds */
71
+ latencyMs: number;
72
+ /** Model used for generation */
73
+ model: string;
74
+ /** Zod validation result (if not skipped) */
75
+ zodValidation?: {
76
+ success: boolean;
77
+ errors?: z.ZodError['errors'];
78
+ };
79
+ }
80
+ declare const STRUCTURED_OUTPUT_MODELS: {
81
+ readonly GPT5_MINI: "gpt-5-mini";
82
+ readonly GPT4O_MINI: "gpt-4o-mini";
83
+ readonly GPT4O: "gpt-4o";
84
+ readonly GPT4O_2024_08_06: "gpt-4o-2024-08-06";
85
+ };
86
+ declare class StructuredOutputClient {
87
+ private openai;
88
+ private rateLimiter;
89
+ private tokenTracker;
90
+ private defaultModel;
91
+ private defaultTemperature;
92
+ private defaultMaxTokens;
93
+ constructor(options?: StructuredOutputOptions);
94
+ private usesMaxCompletionTokens;
95
+ /**
96
+ * Generate structured output with guaranteed JSON Schema compliance.
97
+ */
98
+ generate<T = unknown>(options: StructuredGenerationOptions): Promise<StructuredGenerationResult<T>>;
99
+ getModel(): string;
100
+ getRateLimiterStatus(): {
101
+ queueLength: number;
102
+ activeRequests: number;
103
+ minuteTokens: number;
104
+ secondTokens: number;
105
+ backoffMs: number;
106
+ };
107
+ getTokenUsage(): TokenUsage | null;
108
+ }
109
+ declare function getStructuredOutputClient(options?: StructuredOutputOptions): StructuredOutputClient;
110
+ declare function resetStructuredOutputClient(): void;
111
+ declare function isStructuredOutputAvailable(): boolean;
112
+
113
+ export { type JsonSchema, STRUCTURED_OUTPUT_MODELS, type StructuredGenerationOptions, type StructuredGenerationResult, StructuredOutputClient, type StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@almadar/llm",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "Multi-provider LLM client with rate limiting, token tracking, structured outputs, and continuation handling",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -32,7 +32,8 @@
32
32
  "@langchain/anthropic": "^1.3.16",
33
33
  "@langchain/openai": "^1.2.6",
34
34
  "openai": "^6.18.0",
35
- "zod": "^3.22.0"
35
+ "zod": "^3.22.0",
36
+ "@langchain/core": "^0.3.0"
36
37
  },
37
38
  "peerDependencies": {
38
39
  "@almadar/core": ">=2.0.0"
@@ -44,7 +45,8 @@
44
45
  },
45
46
  "devDependencies": {
46
47
  "tsup": "^8.0.0",
47
- "typescript": "^5.3.0"
48
+ "typescript": "^5.3.0",
49
+ "@types/node": "^22.0.0"
48
50
  },
49
51
  "repository": {
50
52
  "type": "git",