@almadar/llm 2.5.1 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-F2DMHMRH.js → chunk-E4NSQM6D.js} +57 -7
- package/dist/chunk-E4NSQM6D.js.map +1 -0
- package/dist/{chunk-3OVQNNPN.js → chunk-FEN4PB7O.js} +3 -3
- package/dist/chunk-FEN4PB7O.js.map +1 -0
- package/dist/{chunk-QHJ3T46X.js → chunk-MUTXGY6D.js} +1 -1
- package/dist/chunk-MUTXGY6D.js.map +1 -0
- package/dist/{chunk-MJS33AAS.js → chunk-ULT7T7O6.js} +101 -13
- package/dist/chunk-ULT7T7O6.js.map +1 -0
- package/dist/client.d.ts +28 -1
- package/dist/client.js +2 -2
- package/dist/index.d.ts +19 -3
- package/dist/index.js +4 -4
- package/dist/providers/index.d.ts +5 -1
- package/dist/providers/index.js +1 -1
- package/dist/{rate-limiter-DDH7JH5p.d.ts → rate-limiter-B9tDNSMl.d.ts} +6 -2
- package/dist/structured-output.d.ts +1 -1
- package/dist/structured-output.js +2 -2
- package/package.json +1 -1
- package/src/client.ts +121 -16
- package/src/contracts.ts +20 -2
- package/src/providers/masar.ts +4 -1
- package/src/structured-output.ts +3 -2
- package/src/token-tracker.ts +172 -14
- package/dist/chunk-3OVQNNPN.js.map +0 -1
- package/dist/chunk-F2DMHMRH.js.map +0 -1
- package/dist/chunk-MJS33AAS.js.map +0 -1
- package/dist/chunk-QHJ3T46X.js.map +0 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { LLMFinishReason, LLMClient } from './client.js';
|
|
2
2
|
export { ANTHROPIC_MODELS, CacheAwareLLMCallOptions, CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, LLMCallOptions, LLMClientOptions, LLMProvider, LLMResponse, LLMStreamChunk, LLMStreamOptions, LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient } from './client.js';
|
|
3
|
-
export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-
|
|
3
|
+
export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-B9tDNSMl.js';
|
|
4
4
|
export { autoCloseJson, extractJsonFromText, isValidJson, parseJsonResponse, safeParseJson } from './json-parser.js';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
export { JsonSchema, STRUCTURED_OUTPUT_MODELS, StructuredGenerationOptions, StructuredGenerationResult, StructuredOutputClient, StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient } from './structured-output.js';
|
|
@@ -118,6 +118,22 @@ declare function buildGenericContinuationPrompt(context: string, partialResponse
|
|
|
118
118
|
* @packageDocumentation
|
|
119
119
|
*/
|
|
120
120
|
|
|
121
|
+
/** JSON Schema definition for structured extraction. */
|
|
122
|
+
interface JsonSchemaDefinition {
|
|
123
|
+
type?: string | string[];
|
|
124
|
+
properties?: Record<string, JsonSchemaDefinition>;
|
|
125
|
+
required?: string[];
|
|
126
|
+
items?: JsonSchemaDefinition;
|
|
127
|
+
additionalProperties?: boolean | JsonSchemaDefinition;
|
|
128
|
+
description?: string;
|
|
129
|
+
enum?: (string | number | boolean | null)[];
|
|
130
|
+
[key: string]: string | string[] | boolean | number | null | undefined | JsonSchemaDefinition | Record<string, JsonSchemaDefinition> | (string | number | boolean | null)[];
|
|
131
|
+
}
|
|
132
|
+
/** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
|
|
133
|
+
type ExtractedValue = string | number | boolean | null | ExtractedValue[] | {
|
|
134
|
+
[key: string]: ExtractedValue;
|
|
135
|
+
};
|
|
136
|
+
type ExtractedData = Record<string, ExtractedValue>;
|
|
121
137
|
/**
|
|
122
138
|
* All call-service actions exposed by the LLM service.
|
|
123
139
|
*/
|
|
@@ -157,11 +173,11 @@ type LLMServiceActions = {
|
|
|
157
173
|
extract: {
|
|
158
174
|
params: {
|
|
159
175
|
text: string;
|
|
160
|
-
schema:
|
|
176
|
+
schema: JsonSchemaDefinition;
|
|
161
177
|
model?: string;
|
|
162
178
|
};
|
|
163
179
|
result: {
|
|
164
|
-
data:
|
|
180
|
+
data: ExtractedData;
|
|
165
181
|
confidence: number;
|
|
166
182
|
};
|
|
167
183
|
};
|
package/dist/index.js
CHANGED
|
@@ -18,7 +18,7 @@ import {
|
|
|
18
18
|
getSharedLLMClient,
|
|
19
19
|
isProviderAvailable,
|
|
20
20
|
resetSharedLLMClient
|
|
21
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-E4NSQM6D.js";
|
|
22
22
|
import {
|
|
23
23
|
autoCloseJson,
|
|
24
24
|
extractJsonFromText,
|
|
@@ -32,7 +32,7 @@ import {
|
|
|
32
32
|
getStructuredOutputClient,
|
|
33
33
|
isStructuredOutputAvailable,
|
|
34
34
|
resetStructuredOutputClient
|
|
35
|
-
} from "./chunk-
|
|
35
|
+
} from "./chunk-FEN4PB7O.js";
|
|
36
36
|
import {
|
|
37
37
|
RateLimiter,
|
|
38
38
|
TokenTracker,
|
|
@@ -40,13 +40,13 @@ import {
|
|
|
40
40
|
getGlobalTokenTracker,
|
|
41
41
|
resetGlobalRateLimiter,
|
|
42
42
|
resetGlobalTokenTracker
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-ULT7T7O6.js";
|
|
44
44
|
import {
|
|
45
45
|
MasarError,
|
|
46
46
|
MasarProvider,
|
|
47
47
|
getMasarProvider,
|
|
48
48
|
resetMasarProvider
|
|
49
|
-
} from "./chunk-
|
|
49
|
+
} from "./chunk-MUTXGY6D.js";
|
|
50
50
|
|
|
51
51
|
// src/truncation-detector.ts
|
|
52
52
|
function detectTruncation(response, finishReason) {
|
|
@@ -25,6 +25,10 @@ interface MasarGenerateResult {
|
|
|
25
25
|
totalTokens: number;
|
|
26
26
|
};
|
|
27
27
|
}
|
|
28
|
+
/** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
|
|
29
|
+
type ConstraintValue = string | number | boolean | null | ConstraintValue[] | {
|
|
30
|
+
[key: string]: ConstraintValue;
|
|
31
|
+
};
|
|
28
32
|
interface GoalSpec {
|
|
29
33
|
/** Natural-language description of the desired application. */
|
|
30
34
|
description: string;
|
|
@@ -33,7 +37,7 @@ interface GoalSpec {
|
|
|
33
37
|
/** Domain hint (e.g. "e-commerce", "healthcare"). */
|
|
34
38
|
domain?: string;
|
|
35
39
|
/** Additional constraints passed to the GFlowNet sampler. */
|
|
36
|
-
constraints?: Record<string,
|
|
40
|
+
constraints?: Record<string, ConstraintValue>;
|
|
37
41
|
}
|
|
38
42
|
interface GFlowNetResult {
|
|
39
43
|
/** Generated .orb schema text. */
|
package/dist/providers/index.js
CHANGED
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
* Token Tracker for LLM Usage
|
|
3
3
|
*
|
|
4
4
|
* Tracks token usage across multiple LLM calls for:
|
|
5
|
-
* - Cost estimation
|
|
5
|
+
* - Cost estimation (pricing fetched from OpenRouter models API)
|
|
6
6
|
* - Usage monitoring
|
|
7
7
|
* - Quota management
|
|
8
|
+
* - Per-call JSONL logging
|
|
8
9
|
*
|
|
9
10
|
* @packageDocumentation
|
|
10
11
|
*/
|
|
@@ -18,7 +19,10 @@ declare class TokenTracker {
|
|
|
18
19
|
private model;
|
|
19
20
|
private usage;
|
|
20
21
|
constructor(model?: string);
|
|
21
|
-
addUsage(promptTokens: number, completionTokens: number
|
|
22
|
+
addUsage(promptTokens: number, completionTokens: number, options?: {
|
|
23
|
+
provider?: string;
|
|
24
|
+
durationMs?: number;
|
|
25
|
+
}): void;
|
|
22
26
|
getSummary(): TokenUsage;
|
|
23
27
|
getEstimatedCost(): number;
|
|
24
28
|
getFormattedCost(): string;
|
|
@@ -4,8 +4,8 @@ import {
|
|
|
4
4
|
getStructuredOutputClient,
|
|
5
5
|
isStructuredOutputAvailable,
|
|
6
6
|
resetStructuredOutputClient
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
7
|
+
} from "./chunk-FEN4PB7O.js";
|
|
8
|
+
import "./chunk-ULT7T7O6.js";
|
|
9
9
|
export {
|
|
10
10
|
STRUCTURED_OUTPUT_MODELS,
|
|
11
11
|
StructuredOutputClient,
|
package/package.json
CHANGED
package/src/client.ts
CHANGED
|
@@ -24,6 +24,47 @@ import {
|
|
|
24
24
|
import { TokenTracker, getGlobalTokenTracker } from './token-tracker.js';
|
|
25
25
|
import { parseJsonResponse } from './json-parser.js';
|
|
26
26
|
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Local type helpers (avoid Record<string, unknown> and unsafe casts)
|
|
29
|
+
// ============================================================================
|
|
30
|
+
|
|
31
|
+
/** Anthropic generation output with usage metadata (not in Langchain's base types). */
|
|
32
|
+
interface AnthropicGenerationWithUsage {
|
|
33
|
+
message?: {
|
|
34
|
+
usage_metadata?: {
|
|
35
|
+
cache_creation_input_tokens?: number;
|
|
36
|
+
cache_read_input_tokens?: number;
|
|
37
|
+
input_tokens?: number;
|
|
38
|
+
output_tokens?: number;
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Response metadata from OpenAI-compatible providers. */
|
|
44
|
+
interface OpenAIResponseMetadata {
|
|
45
|
+
finish_reason?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Model-specific kwargs passed to ChatOpenAI constructor. */
|
|
49
|
+
interface ModelKwargs {
|
|
50
|
+
max_completion_tokens?: number;
|
|
51
|
+
thinking?: { type: string };
|
|
52
|
+
tool_choice?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Identity cast for generic return types.
|
|
57
|
+
* Used when a string value must satisfy a generic T parameter
|
|
58
|
+
* (e.g., rawText mode where caller declares T = string).
|
|
59
|
+
*
|
|
60
|
+
* Safety: callers only reach this path when rawText=true, which
|
|
61
|
+
* constrains T to string by convention. TypeScript cannot verify
|
|
62
|
+
* this constraint statically because T is caller-supplied.
|
|
63
|
+
*/
|
|
64
|
+
function asGeneric<T>(value: string): T {
|
|
65
|
+
return value as T;
|
|
66
|
+
}
|
|
67
|
+
|
|
27
68
|
// ============================================================================
|
|
28
69
|
// Anthropic Cache Control Helper
|
|
29
70
|
// ============================================================================
|
|
@@ -350,18 +391,10 @@ export class LLMClient {
|
|
|
350
391
|
{
|
|
351
392
|
handleLLMEnd: (output) => {
|
|
352
393
|
const generation = output.generations?.[0]?.[0];
|
|
353
|
-
const
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
cache_creation_input_tokens?: number;
|
|
358
|
-
cache_read_input_tokens?: number;
|
|
359
|
-
input_tokens?: number;
|
|
360
|
-
output_tokens?: number;
|
|
361
|
-
};
|
|
362
|
-
};
|
|
363
|
-
}
|
|
364
|
-
)?.message?.usage_metadata;
|
|
394
|
+
const generationWithUsage = generation as
|
|
395
|
+
| (typeof generation & AnthropicGenerationWithUsage)
|
|
396
|
+
| undefined;
|
|
397
|
+
const usage = generationWithUsage?.message?.usage_metadata;
|
|
365
398
|
|
|
366
399
|
if (usage) {
|
|
367
400
|
const cacheCreated = usage.cache_creation_input_tokens ?? 0;
|
|
@@ -416,7 +449,7 @@ export class LLMClient {
|
|
|
416
449
|
const effectiveTemp = isKimi ? 0.6 : temperature;
|
|
417
450
|
|
|
418
451
|
// Build modelKwargs incrementally to avoid spread conflicts
|
|
419
|
-
const modelKwargs:
|
|
452
|
+
const modelKwargs: ModelKwargs = {};
|
|
420
453
|
if (useCompletionTokens && maxTokens) {
|
|
421
454
|
modelKwargs.max_completion_tokens = maxTokens;
|
|
422
455
|
}
|
|
@@ -571,6 +604,7 @@ export class LLMClient {
|
|
|
571
604
|
this.tokenTracker.addUsage(
|
|
572
605
|
usage.promptTokens,
|
|
573
606
|
usage.completionTokens,
|
|
607
|
+
{ provider: this.provider },
|
|
574
608
|
);
|
|
575
609
|
}
|
|
576
610
|
}
|
|
@@ -641,7 +675,7 @@ export class LLMClient {
|
|
|
641
675
|
response: Awaited<ReturnType<ChatOpenAI['invoke']>>,
|
|
642
676
|
): LLMFinishReason {
|
|
643
677
|
const metadata = response.response_metadata as
|
|
644
|
-
|
|
|
678
|
+
| OpenAIResponseMetadata
|
|
645
679
|
| undefined;
|
|
646
680
|
if (metadata?.finish_reason) {
|
|
647
681
|
const reason = metadata.finish_reason as string;
|
|
@@ -661,6 +695,7 @@ export class LLMClient {
|
|
|
661
695
|
systemPrompt: string;
|
|
662
696
|
userPrompt: string;
|
|
663
697
|
maxTokens?: number;
|
|
698
|
+
signal?: AbortSignal;
|
|
664
699
|
}): Promise<string> {
|
|
665
700
|
const response = await this.callRawWithMetadata(options);
|
|
666
701
|
return response.raw;
|
|
@@ -670,8 +705,9 @@ export class LLMClient {
|
|
|
670
705
|
systemPrompt: string;
|
|
671
706
|
userPrompt: string;
|
|
672
707
|
maxTokens?: number;
|
|
708
|
+
signal?: AbortSignal;
|
|
673
709
|
}): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
|
|
674
|
-
const { systemPrompt, userPrompt, maxTokens } = options;
|
|
710
|
+
const { systemPrompt, userPrompt, maxTokens, signal } = options;
|
|
675
711
|
|
|
676
712
|
return this.rateLimiter.execute(async () => {
|
|
677
713
|
const modelToUse = maxTokens
|
|
@@ -686,6 +722,74 @@ export class LLMClient {
|
|
|
686
722
|
this.provider === 'anthropic'
|
|
687
723
|
? addCacheControlToSystemMessages(messages)
|
|
688
724
|
: messages,
|
|
725
|
+
signal ? { signal } : undefined,
|
|
726
|
+
);
|
|
727
|
+
|
|
728
|
+
let usage: LLMUsage | null = null;
|
|
729
|
+
if (response.usage_metadata) {
|
|
730
|
+
const usageMeta = response.usage_metadata as {
|
|
731
|
+
input_tokens?: number;
|
|
732
|
+
output_tokens?: number;
|
|
733
|
+
};
|
|
734
|
+
usage = {
|
|
735
|
+
promptTokens: usageMeta.input_tokens || 0,
|
|
736
|
+
completionTokens: usageMeta.output_tokens || 0,
|
|
737
|
+
totalTokens:
|
|
738
|
+
(usageMeta.input_tokens || 0) + (usageMeta.output_tokens || 0),
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
if (this.tokenTracker) {
|
|
742
|
+
this.tokenTracker.addUsage(
|
|
743
|
+
usage.promptTokens,
|
|
744
|
+
usage.completionTokens,
|
|
745
|
+
);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
const finishReason = this.extractFinishReason(response);
|
|
750
|
+
const content =
|
|
751
|
+
typeof response.content === 'string'
|
|
752
|
+
? response.content
|
|
753
|
+
: JSON.stringify(response.content);
|
|
754
|
+
|
|
755
|
+
return { raw: content, finishReason, usage };
|
|
756
|
+
});
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Call the LLM with a structured messages array.
|
|
761
|
+
*
|
|
762
|
+
* Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
|
|
763
|
+
* this accepts a full conversation history with proper role separation.
|
|
764
|
+
* This enables:
|
|
765
|
+
* - Anthropic prompt caching on message boundaries (not just system prompt)
|
|
766
|
+
* - Proper tool_use/tool_result role handling across providers
|
|
767
|
+
* - Reduced token waste from string concatenation
|
|
768
|
+
*
|
|
769
|
+
* All providers support the messages format:
|
|
770
|
+
* - Anthropic: native messages API with cache_control
|
|
771
|
+
* - DeepSeek: OpenAI-compatible messages via ChatOpenAI
|
|
772
|
+
* - OpenRouter: OpenAI-compatible messages via ChatOpenAI
|
|
773
|
+
*/
|
|
774
|
+
async callWithMessages(options: {
|
|
775
|
+
messages: Array<{ role: string; content: string }>;
|
|
776
|
+
maxTokens?: number;
|
|
777
|
+
signal?: AbortSignal;
|
|
778
|
+
}): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
|
|
779
|
+
const { messages, maxTokens, signal } = options;
|
|
780
|
+
|
|
781
|
+
return this.rateLimiter.execute(async () => {
|
|
782
|
+
const modelToUse = maxTokens
|
|
783
|
+
? this.getModelWithOptions({ maxTokens })
|
|
784
|
+
: this.model;
|
|
785
|
+
|
|
786
|
+
const langchainMessages = this.provider === 'anthropic'
|
|
787
|
+
? addCacheControlToSystemMessages(messages)
|
|
788
|
+
: (messages as BaseMessageLike[]);
|
|
789
|
+
|
|
790
|
+
const response = await modelToUse.invoke(
|
|
791
|
+
langchainMessages,
|
|
792
|
+
signal ? { signal } : undefined,
|
|
689
793
|
);
|
|
690
794
|
|
|
691
795
|
let usage: LLMUsage | null = null;
|
|
@@ -905,7 +1009,8 @@ export class LLMClient {
|
|
|
905
1009
|
|
|
906
1010
|
let parsed: T;
|
|
907
1011
|
if (rawText) {
|
|
908
|
-
|
|
1012
|
+
// rawText mode: caller expects T = string; content is already a string
|
|
1013
|
+
parsed = asGeneric<T>(result.content);
|
|
909
1014
|
} else if (skipSchemaValidation) {
|
|
910
1015
|
parsed = parseJsonResponse(result.content, undefined) as T;
|
|
911
1016
|
} else {
|
package/src/contracts.ts
CHANGED
|
@@ -10,6 +10,24 @@
|
|
|
10
10
|
|
|
11
11
|
import type { ServiceContract } from "@almadar/core";
|
|
12
12
|
|
|
13
|
+
/** JSON Schema definition for structured extraction. */
|
|
14
|
+
interface JsonSchemaDefinition {
|
|
15
|
+
type?: string | string[];
|
|
16
|
+
properties?: Record<string, JsonSchemaDefinition>;
|
|
17
|
+
required?: string[];
|
|
18
|
+
items?: JsonSchemaDefinition;
|
|
19
|
+
additionalProperties?: boolean | JsonSchemaDefinition;
|
|
20
|
+
description?: string;
|
|
21
|
+
enum?: (string | number | boolean | null)[];
|
|
22
|
+
[key: string]: string | string[] | boolean | number | null | undefined
|
|
23
|
+
| JsonSchemaDefinition | Record<string, JsonSchemaDefinition>
|
|
24
|
+
| (string | number | boolean | null)[];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
|
|
28
|
+
type ExtractedValue = string | number | boolean | null | ExtractedValue[] | { [key: string]: ExtractedValue };
|
|
29
|
+
type ExtractedData = Record<string, ExtractedValue>;
|
|
30
|
+
|
|
13
31
|
/**
|
|
14
32
|
* All call-service actions exposed by the LLM service.
|
|
15
33
|
*/
|
|
@@ -51,11 +69,11 @@ export type LLMServiceActions = {
|
|
|
51
69
|
extract: {
|
|
52
70
|
params: {
|
|
53
71
|
text: string;
|
|
54
|
-
schema:
|
|
72
|
+
schema: JsonSchemaDefinition;
|
|
55
73
|
model?: string;
|
|
56
74
|
};
|
|
57
75
|
result: {
|
|
58
|
-
data:
|
|
76
|
+
data: ExtractedData;
|
|
59
77
|
confidence: number;
|
|
60
78
|
};
|
|
61
79
|
};
|
package/src/providers/masar.ts
CHANGED
|
@@ -32,6 +32,9 @@ export interface MasarGenerateResult {
|
|
|
32
32
|
};
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
/** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
|
|
36
|
+
type ConstraintValue = string | number | boolean | null | ConstraintValue[] | { [key: string]: ConstraintValue };
|
|
37
|
+
|
|
35
38
|
export interface GoalSpec {
|
|
36
39
|
/** Natural-language description of the desired application. */
|
|
37
40
|
description: string;
|
|
@@ -40,7 +43,7 @@ export interface GoalSpec {
|
|
|
40
43
|
/** Domain hint (e.g. "e-commerce", "healthcare"). */
|
|
41
44
|
domain?: string;
|
|
42
45
|
/** Additional constraints passed to the GFlowNet sampler. */
|
|
43
|
-
constraints?: Record<string,
|
|
46
|
+
constraints?: Record<string, ConstraintValue>;
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
export interface GFlowNetResult {
|
package/src/structured-output.ts
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import OpenAI from 'openai';
|
|
14
14
|
import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions';
|
|
15
|
+
import type { ResponseFormatJSONSchema } from 'openai/resources/shared';
|
|
15
16
|
import { z } from 'zod';
|
|
16
17
|
import {
|
|
17
18
|
RateLimiter,
|
|
@@ -236,7 +237,7 @@ export class StructuredOutputClient {
|
|
|
236
237
|
json_schema: {
|
|
237
238
|
name: schemaName,
|
|
238
239
|
strict: true,
|
|
239
|
-
schema: jsonSchema as
|
|
240
|
+
schema: jsonSchema as ResponseFormatJSONSchema.JSONSchema['schema'],
|
|
240
241
|
},
|
|
241
242
|
},
|
|
242
243
|
...tempParam,
|
|
@@ -267,7 +268,7 @@ export class StructuredOutputClient {
|
|
|
267
268
|
};
|
|
268
269
|
|
|
269
270
|
if (this.tokenTracker) {
|
|
270
|
-
this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens);
|
|
271
|
+
this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens, { provider: 'structured-output' });
|
|
271
272
|
}
|
|
272
273
|
|
|
273
274
|
console.log(
|
package/src/token-tracker.ts
CHANGED
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
* Token Tracker for LLM Usage
|
|
3
3
|
*
|
|
4
4
|
* Tracks token usage across multiple LLM calls for:
|
|
5
|
-
* - Cost estimation
|
|
5
|
+
* - Cost estimation (pricing fetched from OpenRouter models API)
|
|
6
6
|
* - Usage monitoring
|
|
7
7
|
* - Quota management
|
|
8
|
+
* - Per-call JSONL logging
|
|
8
9
|
*
|
|
9
10
|
* @packageDocumentation
|
|
10
11
|
*/
|
|
11
12
|
|
|
13
|
+
import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
14
|
+
import { dirname, join } from 'node:path';
|
|
15
|
+
|
|
12
16
|
export interface TokenUsage {
|
|
13
17
|
promptTokens: number;
|
|
14
18
|
completionTokens: number;
|
|
@@ -21,18 +25,136 @@ export interface TokenCost {
|
|
|
21
25
|
completionCostPer1K: number;
|
|
22
26
|
}
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
export interface CallLogEntry {
|
|
29
|
+
timestamp: string;
|
|
30
|
+
provider: string;
|
|
31
|
+
model: string;
|
|
32
|
+
promptTokens: number;
|
|
33
|
+
completionTokens: number;
|
|
34
|
+
totalTokens: number;
|
|
35
|
+
estimatedCost: number;
|
|
36
|
+
durationMs?: number;
|
|
37
|
+
source: 'local-log';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Pricing: fetched from OpenRouter /api/v1/models, cached to disk for 24h
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
const ALMADAR_ROOT = process.env['ALMADAR_ROOT'] ?? process.cwd();
|
|
45
|
+
const PRICING_CACHE_PATH = join(ALMADAR_ROOT, '.llm-pricing-cache.json');
|
|
46
|
+
const CALL_LOG_PATH = join(ALMADAR_ROOT, '.llm-call-log.jsonl');
|
|
47
|
+
const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
48
|
+
|
|
49
|
+
/** Map from our local model name to OpenRouter model ID */
|
|
50
|
+
const MODEL_ID_MAP: Record<string, string> = {
|
|
51
|
+
// Anthropic
|
|
52
|
+
'claude-opus-4-5-20250929': 'anthropic/claude-opus-4.5',
|
|
53
|
+
'claude-sonnet-4-5-20250929': 'anthropic/claude-sonnet-4.5',
|
|
54
|
+
'claude-sonnet-4-20250514': 'anthropic/claude-sonnet-4',
|
|
55
|
+
'claude-3-5-haiku-20241022': 'anthropic/claude-3.5-haiku',
|
|
56
|
+
// DeepSeek — map to current versions on OpenRouter
|
|
57
|
+
'deepseek-chat': 'deepseek/deepseek-v3.2',
|
|
58
|
+
'deepseek-coder': 'deepseek/deepseek-v3.2',
|
|
59
|
+
'deepseek-reasoner': 'deepseek/deepseek-r1-0528',
|
|
60
|
+
// Kimi
|
|
61
|
+
'kimi-k2.5': 'moonshotai/kimi-k2.5',
|
|
34
62
|
};
|
|
35
63
|
|
|
64
|
+
// Fallback: zero cost — forces OpenRouter fetch for real pricing
|
|
65
|
+
const FALLBACK_COSTS: Record<string, TokenCost> = {};
|
|
66
|
+
|
|
67
|
+
interface PricingCache {
|
|
68
|
+
fetchedAt: number;
|
|
69
|
+
models: Record<string, TokenCost>;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
let pricingCache: PricingCache | null = null;
|
|
73
|
+
|
|
74
|
+
function loadCachedPricing(): PricingCache | null {
|
|
75
|
+
try {
|
|
76
|
+
const raw = readFileSync(PRICING_CACHE_PATH, 'utf-8');
|
|
77
|
+
const parsed = JSON.parse(raw) as PricingCache;
|
|
78
|
+
if (Date.now() - parsed.fetchedAt < CACHE_TTL_MS) {
|
|
79
|
+
return parsed;
|
|
80
|
+
}
|
|
81
|
+
} catch {
|
|
82
|
+
// No cache or expired
|
|
83
|
+
}
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async function fetchPricingFromOpenRouter(): Promise<Record<string, TokenCost>> {
|
|
88
|
+
const res = await fetch('https://openrouter.ai/api/v1/models');
|
|
89
|
+
if (!res.ok) throw new Error(`OpenRouter models API: HTTP ${res.status}`);
|
|
90
|
+
const json = await res.json() as { data?: Array<{ id: string; pricing?: { prompt?: string; completion?: string } }> };
|
|
91
|
+
const models: Record<string, TokenCost> = {};
|
|
92
|
+
for (const m of json.data ?? []) {
|
|
93
|
+
const promptPerToken = parseFloat(m.pricing?.prompt ?? '0');
|
|
94
|
+
const completionPerToken = parseFloat(m.pricing?.completion ?? '0');
|
|
95
|
+
if (promptPerToken > 0 || completionPerToken > 0) {
|
|
96
|
+
models[m.id] = {
|
|
97
|
+
promptCostPer1K: promptPerToken * 1000,
|
|
98
|
+
completionCostPer1K: completionPerToken * 1000,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return models;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Get pricing for all models. Uses 24h disk cache, fetches from OpenRouter on miss.
|
|
107
|
+
* Non-blocking: returns cached/fallback immediately, refreshes in background if stale.
|
|
108
|
+
*/
|
|
109
|
+
function getPricing(): Record<string, TokenCost> {
|
|
110
|
+
if (pricingCache) return pricingCache.models;
|
|
111
|
+
|
|
112
|
+
const diskCache = loadCachedPricing();
|
|
113
|
+
if (diskCache) {
|
|
114
|
+
pricingCache = diskCache;
|
|
115
|
+
return diskCache.models;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Trigger background fetch, return fallback for now
|
|
119
|
+
refreshPricingCache();
|
|
120
|
+
return FALLBACK_COSTS;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function refreshPricingCache(): void {
|
|
124
|
+
fetchPricingFromOpenRouter()
|
|
125
|
+
.then((models) => {
|
|
126
|
+
pricingCache = { fetchedAt: Date.now(), models };
|
|
127
|
+
try {
|
|
128
|
+
mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
|
|
129
|
+
writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
|
|
130
|
+
} catch {
|
|
131
|
+
// Non-critical
|
|
132
|
+
}
|
|
133
|
+
})
|
|
134
|
+
.catch(() => {
|
|
135
|
+
// Silently fail, use fallback
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function getCostForModel(model: string): TokenCost {
|
|
140
|
+
const pricing = getPricing();
|
|
141
|
+
// Try direct match on OpenRouter ID
|
|
142
|
+
const orId = MODEL_ID_MAP[model];
|
|
143
|
+
if (orId && pricing[orId]) return pricing[orId];
|
|
144
|
+
// Try direct key match (e.g., user passed "openai/gpt-4o")
|
|
145
|
+
if (pricing[model]) return pricing[model];
|
|
146
|
+
// Fuzzy: find first key containing the model name
|
|
147
|
+
for (const [key, cost] of Object.entries(pricing)) {
|
|
148
|
+
if (key.includes(model) || model.includes(key.split('/')[1] ?? '')) return cost;
|
|
149
|
+
}
|
|
150
|
+
// No pricing available — return zero (OpenRouter fetch pending or model not listed)
|
|
151
|
+
return { promptCostPer1K: 0, completionCostPer1K: 0 };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
// TokenTracker
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
|
|
36
158
|
export class TokenTracker {
|
|
37
159
|
private model: string;
|
|
38
160
|
private usage: TokenUsage = {
|
|
@@ -42,15 +164,39 @@ export class TokenTracker {
|
|
|
42
164
|
callCount: 0,
|
|
43
165
|
};
|
|
44
166
|
|
|
45
|
-
constructor(model: string = '
|
|
167
|
+
constructor(model: string = 'claude-sonnet-4-5-20250929') {
|
|
46
168
|
this.model = model;
|
|
47
169
|
}
|
|
48
170
|
|
|
49
|
-
addUsage(promptTokens: number, completionTokens: number): void {
|
|
171
|
+
addUsage(promptTokens: number, completionTokens: number, options?: { provider?: string; durationMs?: number }): void {
|
|
50
172
|
this.usage.promptTokens += promptTokens;
|
|
51
173
|
this.usage.completionTokens += completionTokens;
|
|
52
174
|
this.usage.totalTokens += promptTokens + completionTokens;
|
|
53
175
|
this.usage.callCount++;
|
|
176
|
+
|
|
177
|
+
const costs = getCostForModel(this.model);
|
|
178
|
+
const estimatedCost =
|
|
179
|
+
(promptTokens / 1000) * costs.promptCostPer1K +
|
|
180
|
+
(completionTokens / 1000) * costs.completionCostPer1K;
|
|
181
|
+
|
|
182
|
+
const entry: CallLogEntry = {
|
|
183
|
+
timestamp: new Date().toISOString(),
|
|
184
|
+
provider: options?.provider ?? 'unknown',
|
|
185
|
+
model: this.model,
|
|
186
|
+
promptTokens,
|
|
187
|
+
completionTokens,
|
|
188
|
+
totalTokens: promptTokens + completionTokens,
|
|
189
|
+
estimatedCost,
|
|
190
|
+
durationMs: options?.durationMs,
|
|
191
|
+
source: 'local-log',
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
mkdirSync(dirname(CALL_LOG_PATH), { recursive: true });
|
|
196
|
+
appendFileSync(CALL_LOG_PATH, JSON.stringify(entry) + '\n');
|
|
197
|
+
} catch {
|
|
198
|
+
// Non-critical: don't break LLM calls if logging fails
|
|
199
|
+
}
|
|
54
200
|
}
|
|
55
201
|
|
|
56
202
|
getSummary(): TokenUsage {
|
|
@@ -58,7 +204,7 @@ export class TokenTracker {
|
|
|
58
204
|
}
|
|
59
205
|
|
|
60
206
|
getEstimatedCost(): number {
|
|
61
|
-
const costs =
|
|
207
|
+
const costs = getCostForModel(this.model);
|
|
62
208
|
const promptCost =
|
|
63
209
|
(this.usage.promptTokens / 1000) * costs.promptCostPer1K;
|
|
64
210
|
const completionCost =
|
|
@@ -114,3 +260,15 @@ export function getGlobalTokenTracker(model?: string): TokenTracker {
|
|
|
114
260
|
export function resetGlobalTokenTracker(): void {
|
|
115
261
|
globalTracker?.reset();
|
|
116
262
|
}
|
|
263
|
+
|
|
264
|
+
export function getCallLogPath(): string {
|
|
265
|
+
return CALL_LOG_PATH;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/** Force-refresh the pricing cache from OpenRouter. */
|
|
269
|
+
export async function refreshPricing(): Promise<void> {
|
|
270
|
+
const models = await fetchPricingFromOpenRouter();
|
|
271
|
+
pricingCache = { fetchedAt: Date.now(), models };
|
|
272
|
+
mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
|
|
273
|
+
writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
|
|
274
|
+
}
|