@compilr-dev/agents 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -57,6 +57,14 @@ export declare class FireworksProvider extends OpenAICompatibleProvider {
|
|
|
57
57
|
* Fireworks AI uses standard OpenAI body format
|
|
58
58
|
*/
|
|
59
59
|
protected buildProviderSpecificBody(_options?: ChatOptions): Record<string, unknown>;
|
|
60
|
+
/**
|
|
61
|
+
* Extract cache statistics from Fireworks response headers.
|
|
62
|
+
* Fireworks returns cache stats in headers rather than the JSON body.
|
|
63
|
+
* @see https://docs.fireworks.ai/guides/prompt-caching
|
|
64
|
+
*/
|
|
65
|
+
protected extractCacheStatsFromHeaders(headers: Headers): {
|
|
66
|
+
cacheReadTokens?: number;
|
|
67
|
+
};
|
|
60
68
|
/**
|
|
61
69
|
* Map HTTP errors with Fireworks AI-specific messages
|
|
62
70
|
*/
|
|
@@ -65,6 +65,21 @@ export class FireworksProvider extends OpenAICompatibleProvider {
|
|
|
65
65
|
buildProviderSpecificBody(_options) {
|
|
66
66
|
return {};
|
|
67
67
|
}
|
|
68
|
+
/**
|
|
69
|
+
* Extract cache statistics from Fireworks response headers.
|
|
70
|
+
* Fireworks returns cache stats in headers rather than the JSON body.
|
|
71
|
+
* @see https://docs.fireworks.ai/guides/prompt-caching
|
|
72
|
+
*/
|
|
73
|
+
extractCacheStatsFromHeaders(headers) {
|
|
74
|
+
const cachedTokens = headers.get('fireworks-cached-prompt-tokens');
|
|
75
|
+
if (cachedTokens) {
|
|
76
|
+
const parsed = parseInt(cachedTokens, 10);
|
|
77
|
+
if (!isNaN(parsed) && parsed > 0) {
|
|
78
|
+
return { cacheReadTokens: parsed };
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return {};
|
|
82
|
+
}
|
|
68
83
|
/**
|
|
69
84
|
* Map HTTP errors with Fireworks AI-specific messages
|
|
70
85
|
*/
|
|
@@ -149,6 +149,15 @@ export declare abstract class OpenAICompatibleProvider implements LLMProvider {
|
|
|
149
149
|
* @returns ProviderError with appropriate message
|
|
150
150
|
*/
|
|
151
151
|
protected abstract mapConnectionError(error: Error): ProviderError;
|
|
152
|
+
/**
|
|
153
|
+
* Extract cache statistics from response headers.
|
|
154
|
+
* Override in subclasses for providers that return cache stats in headers (e.g., Fireworks).
|
|
155
|
+
* @param headers Response headers
|
|
156
|
+
* @returns Partial LLMUsage with cache stats
|
|
157
|
+
*/
|
|
158
|
+
protected extractCacheStatsFromHeaders(_headers: Headers): {
|
|
159
|
+
cacheReadTokens?: number;
|
|
160
|
+
};
|
|
152
161
|
/**
|
|
153
162
|
* Stream chat completion from the provider
|
|
154
163
|
*
|
|
@@ -42,6 +42,15 @@ export class OpenAICompatibleProvider {
|
|
|
42
42
|
this.defaultMaxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
43
43
|
this.timeout = config.timeout ?? DEFAULT_TIMEOUT;
|
|
44
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Extract cache statistics from response headers.
|
|
47
|
+
* Override in subclasses for providers that return cache stats in headers (e.g., Fireworks).
|
|
48
|
+
* @param headers Response headers
|
|
49
|
+
* @returns Partial LLMUsage with cache stats
|
|
50
|
+
*/
|
|
51
|
+
extractCacheStatsFromHeaders(_headers) {
|
|
52
|
+
return {};
|
|
53
|
+
}
|
|
45
54
|
// ==================== SHARED IMPLEMENTATION ====================
|
|
46
55
|
/**
|
|
47
56
|
* Stream chat completion from the provider
|
|
@@ -106,6 +115,8 @@ export class OpenAICompatibleProvider {
|
|
|
106
115
|
const errorBody = await response.text();
|
|
107
116
|
throw this.mapHttpError(response.status, errorBody, model);
|
|
108
117
|
}
|
|
118
|
+
// Extract cache stats from headers (for providers like Fireworks)
|
|
119
|
+
const headerCacheStats = this.extractCacheStatsFromHeaders(response.headers);
|
|
109
120
|
const reader = response.body?.getReader();
|
|
110
121
|
if (!reader) {
|
|
111
122
|
throw new ProviderError('No response body', this.name);
|
|
@@ -153,12 +164,12 @@ export class OpenAICompatibleProvider {
|
|
|
153
164
|
}
|
|
154
165
|
}
|
|
155
166
|
}
|
|
156
|
-
// Yield done chunk with usage
|
|
167
|
+
// Yield done chunk with usage (merge header-based cache stats)
|
|
157
168
|
yield {
|
|
158
169
|
type: 'done',
|
|
159
170
|
usage: usage
|
|
160
|
-
? { ...usage, debugPayload }
|
|
161
|
-
: { inputTokens: 0, outputTokens: 0, debugPayload },
|
|
171
|
+
? { ...usage, ...headerCacheStats, debugPayload }
|
|
172
|
+
: { inputTokens: 0, outputTokens: 0, ...headerCacheStats, debugPayload },
|
|
162
173
|
};
|
|
163
174
|
}
|
|
164
175
|
catch (error) {
|