maestro-core 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cost.d.ts +60 -0
- package/dist/cost.d.ts.map +1 -0
- package/dist/cost.js +68 -0
- package/dist/cost.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +69 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +113 -0
- package/dist/models.js.map +1 -0
- package/dist/ports/quota-store.d.ts +2 -0
- package/dist/ports/quota-store.d.ts.map +1 -1
- package/dist/runtime/empty-recovery.d.ts +62 -0
- package/dist/runtime/empty-recovery.d.ts.map +1 -0
- package/dist/runtime/empty-recovery.js +34 -0
- package/dist/runtime/empty-recovery.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +6 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/memory.d.ts +32 -0
- package/dist/runtime/memory.d.ts.map +1 -0
- package/dist/runtime/memory.js +15 -0
- package/dist/runtime/memory.js.map +1 -0
- package/dist/runtime/providers.d.ts +53 -0
- package/dist/runtime/providers.d.ts.map +1 -0
- package/dist/runtime/providers.js +124 -0
- package/dist/runtime/providers.js.map +1 -0
- package/dist/runtime/quota.d.ts +66 -0
- package/dist/runtime/quota.d.ts.map +1 -0
- package/dist/runtime/quota.js +102 -0
- package/dist/runtime/quota.js.map +1 -0
- package/dist/runtime/run-chat-turn.d.ts +145 -0
- package/dist/runtime/run-chat-turn.d.ts.map +1 -0
- package/dist/runtime/run-chat-turn.js +341 -0
- package/dist/runtime/run-chat-turn.js.map +1 -0
- package/dist/windows.d.ts +49 -0
- package/dist/windows.d.ts.map +1 -0
- package/dist/windows.js +63 -0
- package/dist/windows.js.map +1 -0
- package/package.json +3 -2
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { APICallError, RetryError } from 'ai';
|
|
2
|
+
/**
|
|
3
|
+
* Provider-fallback helpers. Used by hosts that want OpenAI as a
|
|
4
|
+
* resilience fallback when the primary Anthropic call hits a transient
|
|
5
|
+
* failure mode.
|
|
6
|
+
*
|
|
7
|
+
* 0.2.0 ships the helpers but does NOT wrap `runChatTurn` with the
|
|
8
|
+
* retry loop — mid-stream provider switching is invasive enough to
|
|
9
|
+
* deserve its own design pass. Hosts compose retry themselves using
|
|
10
|
+
* these primitives:
|
|
11
|
+
*
|
|
12
|
+
* try {
|
|
13
|
+
* return await runChatTurn({ ..., models: anthropicModels })
|
|
14
|
+
* } catch (e) {
|
|
15
|
+
* if (shouldFallback(e)) {
|
|
16
|
+
* return runChatTurn({
|
|
17
|
+
* ...,
|
|
18
|
+
* models: {
|
|
19
|
+
* fast: mapModelIdToOpenAI(anthropicModels.fast),
|
|
20
|
+
* smart: mapModelIdToOpenAI(anthropicModels.smart),
|
|
21
|
+
* },
|
|
22
|
+
* })
|
|
23
|
+
* }
|
|
24
|
+
* throw e
|
|
25
|
+
* }
|
|
26
|
+
*
|
|
27
|
+
* Built-in retry wrapper tracked for 0.2.1.
|
|
28
|
+
*/
|
|
29
|
+
/**
|
|
30
|
+
* Returns true when the error suggests a transient provider failure
|
|
31
|
+
* worth retrying against a different provider (rate limit, 5xx,
|
|
32
|
+
* network error, timeout). Returns false for caller-side errors
|
|
33
|
+
* (auth, content policy, abort, intentional quota deny) so a real
|
|
34
|
+
* problem doesn't get masked by a successful fallback.
|
|
35
|
+
*
|
|
36
|
+
* Unwraps a single layer of `RetryError` (the AI SDK wraps the
|
|
37
|
+
* underlying APICallError after exhausting its internal retries) so
|
|
38
|
+
* the status-code logic applies to the root cause.
|
|
39
|
+
*/
|
|
40
|
+
export function shouldFallback(err) {
|
|
41
|
+
if (!(err instanceof Error))
|
|
42
|
+
return false;
|
|
43
|
+
// Intentional denies — never fallback.
|
|
44
|
+
if (err.name === 'AiQuotaDeniedError')
|
|
45
|
+
return false;
|
|
46
|
+
// User cancelled — never fallback; the stream would arrive after abort.
|
|
47
|
+
if (err.name === 'AbortError')
|
|
48
|
+
return false;
|
|
49
|
+
// Unwrap one layer of RetryError from the AI SDK.
|
|
50
|
+
if (RetryError.isInstance(err)) {
|
|
51
|
+
if (err.reason === 'abort')
|
|
52
|
+
return false;
|
|
53
|
+
const underlying = err.lastError;
|
|
54
|
+
if (underlying instanceof Error) {
|
|
55
|
+
return shouldFallback(underlying);
|
|
56
|
+
}
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
const message = err.message.toLowerCase();
|
|
60
|
+
// Content policy violation — real signal; OpenAI also refuses this content.
|
|
61
|
+
if (message.includes('content_policy') || message.includes('content policy')) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
// Network layer failures — transient infrastructure issues.
|
|
65
|
+
if (message.includes('econnrefused') ||
|
|
66
|
+
message.includes('enotfound') ||
|
|
67
|
+
message.includes('etimedout') ||
|
|
68
|
+
message.includes('fetch failed') ||
|
|
69
|
+
err.name === 'NetworkError' ||
|
|
70
|
+
err.name === 'FetchError') {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
// Explicit timeout — likely provider degradation.
|
|
74
|
+
if (err.name === 'TimeoutError' || message.includes('timeout')) {
|
|
75
|
+
return true;
|
|
76
|
+
}
|
|
77
|
+
// Rate-limit phrase — providers sometimes format 429s as text-only
|
|
78
|
+
// messages without a clean status code in the string. Match by
|
|
79
|
+
// phrase before falling through to numeric extraction.
|
|
80
|
+
if (message.includes('rate limit') || message.includes('rate_limit')) {
|
|
81
|
+
return true;
|
|
82
|
+
}
|
|
83
|
+
const status = APICallError.isInstance(err)
|
|
84
|
+
? (err.statusCode ?? 0)
|
|
85
|
+
: extractStatusCodeFromMessage(err);
|
|
86
|
+
if (status === 429)
|
|
87
|
+
return true; // rate limit
|
|
88
|
+
if (status >= 500 && status < 600)
|
|
89
|
+
return true; // server error
|
|
90
|
+
if (status === 400 || status === 401 || status === 403)
|
|
91
|
+
return false; // caller-side / auth
|
|
92
|
+
return false; // conservative default
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Map an Anthropic model id to its OpenAI equivalent for the fallback
|
|
96
|
+
* path. Capability + cost parity:
|
|
97
|
+
* - Haiku → gpt-4o-mini (fast, cheap)
|
|
98
|
+
* - Sonnet → gpt-4o (capable, mid-range)
|
|
99
|
+
* - Opus → gpt-4o (closest available equivalent)
|
|
100
|
+
* - Unknown → gpt-4o-mini (conservative default — cheaper)
|
|
101
|
+
*
|
|
102
|
+
* Anthropic model ids may include date suffixes
|
|
103
|
+
* (e.g. `claude-haiku-4-5-20251001`); the substring match handles all
|
|
104
|
+
* variants.
|
|
105
|
+
*/
|
|
106
|
+
export function mapModelIdToOpenAI(anthropicModelId) {
|
|
107
|
+
if (!anthropicModelId)
|
|
108
|
+
return 'gpt-4o-mini';
|
|
109
|
+
const id = anthropicModelId.toLowerCase();
|
|
110
|
+
if (id.includes('sonnet') || id.includes('opus'))
|
|
111
|
+
return 'gpt-4o';
|
|
112
|
+
return 'gpt-4o-mini';
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Last-resort status-code parser. Matches patterns like "503 Service
|
|
116
|
+
* Unavailable" in error messages when the error is not an
|
|
117
|
+
* `APICallError` instance. Returns 0 when no valid HTTP code is found.
|
|
118
|
+
*/
|
|
119
|
+
function extractStatusCodeFromMessage(err) {
|
|
120
|
+
const match = err.message.match(/\b([45]\d{2})\b/);
|
|
121
|
+
const code = match?.[1];
|
|
122
|
+
return code ? parseInt(code, 10) : 0;
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=providers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../../src/runtime/providers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,IAAI,CAAA;AAE7C;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH;;;;;;;;;;GAUG;AACH,MAAM,UAAU,cAAc,CAAC,GAAY;IACvC,IAAI,CAAC,CAAC,GAAG,YAAY,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IAEzC,uCAAuC;IACvC,IAAI,GAAG,CAAC,IAAI,KAAK,oBAAoB;QAAE,OAAO,KAAK,CAAA;IAEnD,wEAAwE;IACxE,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY;QAAE,OAAO,KAAK,CAAA;IAE3C,kDAAkD;IAClD,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7B,IAAI,GAAG,CAAC,MAAM,KAAK,OAAO;YAAE,OAAO,KAAK,CAAA;QACxC,MAAM,UAAU,GAAG,GAAG,CAAC,SAAS,CAAA;QAChC,IAAI,UAAU,YAAY,KAAK,EAAE,CAAC;YAC9B,OAAO,cAAc,CAAC,UAAU,CAAC,CAAA;QACrC,CAAC;QACD,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,CAAA;IAEzC,4EAA4E;IAC5E,IAAI,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;QAC3E,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,4DAA4D;IAC5D,IACI,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;QAChC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;QAC7B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;QAC7B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;QAChC,GAAG,CAAC,IAAI,KAAK,cAAc;QAC3B,GAAG,CAAC,IAAI,KAAK,YAAY,EAC3B,CAAC;QACC,OAAO,IAAI,CAAA;IACf,CAAC;IAED,kDAAkD;IAClD,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC7D,OAAO,IAAI,CAAA;IACf,CAAC;IAED,mEAAmE;IACnE,+DAA+D;IAC/D,uDAAuD;IACvD,IAAI,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACnE,OAAO,IAAI,CAAA;IACf,CAAC;IAED,MAAM,MAAM,GAAG,YAAY,CAAC,UAAU,CAAC,GAAG,CAAC;QACvC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC;QACvB,CAAC,CAAC,4BAA4B,CAAC,GAAG,CAAC,CAAA;IAEvC,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,IAAI,CAAA,CAAC,aAAa;IAC7C,IAAI,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;QAAE,OAAO,IAAI,CAAA,CAAC,eAAe;IAC9D,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,KAAK,CAAA,CAAC,qBAAqB;IAE1F,OAAO,KAAK,CAAA,CAAC,uBAAuB;AACxC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,kBAAkB,CAAC,gBAA2C;IAC1E,IAAI,CAAC,gBAAgB;QAAE,OAAO,aAAa,CAAA;IAC3C,MAAM,EAAE,GAAG,gBAAgB,CAAC,WAAW,EAAE,CAAA;IACzC,IAAI,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;QAAE,OAAO,QAAQ,CAAA;IACjE,OAAO,aAAa,CAAA;AACxB,CAAC;AAED;;;;GAIG;AACH,SAAS,4BAA4B,CAAC,GAAU;IAC5C,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAA;IAClD,MAAM,IAAI,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,CAAA;IACvB,OAAO,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACxC,CAAC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { QuotaState, QuotaStore } from '../ports/quota-store.js';
|
|
2
|
+
/**
|
|
3
|
+
* Why this reason set: each value names the COUNTER that tripped,
|
|
4
|
+
* not the window or unit. Hosts compose their own UI-facing
|
|
5
|
+
* `tokens_per_day` / `cost_per_hour` strings from `reason + window`
|
|
6
|
+
* at render time. Keeping the kernel reason terse lets hosts use
|
|
7
|
+
* any window granularity (daily, hourly, monthly) without the
|
|
8
|
+
* kernel knowing about it.
|
|
9
|
+
*/
|
|
10
|
+
export type AiQuotaDenyReason = 'input_tokens' | 'output_tokens' | 'tool_calls' | 'cost_usd_micro';
|
|
11
|
+
export interface AiQuotaDenyPayload {
|
|
12
|
+
reason: AiQuotaDenyReason;
|
|
13
|
+
/** Ceiling that was tripped, in the natural unit of `reason` (tokens, calls, micro-USD). */
|
|
14
|
+
ceiling: number;
|
|
15
|
+
/** Counter value at check time. May exceed `ceiling` due to race windows + post-call increments. */
|
|
16
|
+
current: number;
|
|
17
|
+
/** When the current window resets. Surface on the deny UX. */
|
|
18
|
+
windowEnd: Date;
|
|
19
|
+
tenantId: string;
|
|
20
|
+
surface: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Thrown by `runChatTurn` when the pre-call quota check finds a
|
|
24
|
+
* counter at or above its ceiling. Hosts catch this at the route
|
|
25
|
+
* boundary and translate to whatever HTTP error shape their client
|
|
26
|
+
* renders (e.g. barbeiro's `buildQuotaDenyPayload` → 429 with
|
|
27
|
+
* localized copy).
|
|
28
|
+
*
|
|
29
|
+
* Captured as a class (not a discriminated union) so `instanceof`
|
|
30
|
+
* works across module boundaries and frame-level `instanceof` checks
|
|
31
|
+
* survive bundler transforms. `payload` carries the structured data.
|
|
32
|
+
*/
|
|
33
|
+
export declare class AiQuotaDeniedError extends Error {
|
|
34
|
+
readonly name = "AiQuotaDeniedError";
|
|
35
|
+
readonly payload: AiQuotaDenyPayload;
|
|
36
|
+
constructor(payload: AiQuotaDenyPayload);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Inspect a `QuotaState` and throw on the first counter that has
|
|
40
|
+
* reached or exceeded its ceiling. Order of evaluation is fixed —
|
|
41
|
+
* `input_tokens` → `output_tokens` → `tool_calls` → `cost_usd_micro` —
|
|
42
|
+
* so the deny reason rendered to the user is stable across deploys.
|
|
43
|
+
*
|
|
44
|
+
* A `null`/undefined ceiling means "unbounded" and is skipped (the
|
|
45
|
+
* tenant has no cap on that counter). Counters with zero ceiling are
|
|
46
|
+
* treated the same as unbounded — a zero cap is almost always a
|
|
47
|
+
* misseed; failing every call would surface the bug as user pain
|
|
48
|
+
* instead of a dashboard alert.
|
|
49
|
+
*/
|
|
50
|
+
export declare function enforceQuotaOrThrow(args: {
|
|
51
|
+
tenantId: string;
|
|
52
|
+
surface: string;
|
|
53
|
+
state: QuotaState;
|
|
54
|
+
}): void;
|
|
55
|
+
/**
|
|
56
|
+
* Convenience: query the port and enforce in one call. The pre-call
|
|
57
|
+
* check inside `runChatTurn` uses this. Fail-open behaviour (swallow
|
|
58
|
+
* port errors and proceed) is the CALLER's choice — `runChatTurn`
|
|
59
|
+
* decides via its `failOpenOnQuotaError` flag.
|
|
60
|
+
*/
|
|
61
|
+
export declare function checkAndEnforce(args: {
|
|
62
|
+
quotaStore: QuotaStore;
|
|
63
|
+
tenantId: string;
|
|
64
|
+
surface: string;
|
|
65
|
+
}): Promise<void>;
|
|
66
|
+
//# sourceMappingURL=quota.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quota.d.ts","sourceRoot":"","sources":["../../src/runtime/quota.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAA;AAErE;;;;;;;GAOG;AACH,MAAM,MAAM,iBAAiB,GACvB,cAAc,GACd,eAAe,GACf,YAAY,GACZ,gBAAgB,CAAA;AAEtB,MAAM,WAAW,kBAAkB;IAC/B,MAAM,EAAE,iBAAiB,CAAA;IACzB,4FAA4F;IAC5F,OAAO,EAAE,MAAM,CAAA;IACf,oGAAoG;IACpG,OAAO,EAAE,MAAM,CAAA;IACf,8DAA8D;IAC9D,SAAS,EAAE,IAAI,CAAA;IACf,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;;;;GAUG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IACzC,SAAkB,IAAI,wBAAuB;IAC7C,QAAQ,CAAC,OAAO,EAAE,kBAAkB,CAAA;gBAExB,OAAO,EAAE,kBAAkB;CAQ1C;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE;IACtC,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,UAAU,CAAA;CACpB,GAAG,IAAI,CA2DP;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE;IACxC,UAAU,EAAE,UAAU,CAAA;IACtB,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;CAClB,GAAG,OAAO,CAAC,IAAI,CAAC,CAUhB"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thrown by `runChatTurn` when the pre-call quota check finds a
|
|
3
|
+
* counter at or above its ceiling. Hosts catch this at the route
|
|
4
|
+
* boundary and translate to whatever HTTP error shape their client
|
|
5
|
+
* renders (e.g. barbeiro's `buildQuotaDenyPayload` → 429 with
|
|
6
|
+
* localized copy).
|
|
7
|
+
*
|
|
8
|
+
* Captured as a class (not a discriminated union) so `instanceof`
|
|
9
|
+
* works across module boundaries and frame-level `instanceof` checks
|
|
10
|
+
* survive bundler transforms. `payload` carries the structured data.
|
|
11
|
+
*/
|
|
12
|
+
export class AiQuotaDeniedError extends Error {
|
|
13
|
+
name = 'AiQuotaDeniedError';
|
|
14
|
+
payload;
|
|
15
|
+
constructor(payload) {
|
|
16
|
+
super(`AI quota exceeded: ${payload.reason} on ${payload.surface} for tenant ${payload.tenantId} (${payload.current}/${payload.ceiling})`);
|
|
17
|
+
this.payload = payload;
|
|
18
|
+
// Restore the prototype chain across bundler transforms.
|
|
19
|
+
Object.setPrototypeOf(this, AiQuotaDeniedError.prototype);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Inspect a `QuotaState` and throw on the first counter that has
|
|
24
|
+
* reached or exceeded its ceiling. Order of evaluation is fixed —
|
|
25
|
+
* `input_tokens` → `output_tokens` → `tool_calls` → `cost_usd_micro` —
|
|
26
|
+
* so the deny reason rendered to the user is stable across deploys.
|
|
27
|
+
*
|
|
28
|
+
* A `null`/undefined ceiling means "unbounded" and is skipped (the
|
|
29
|
+
* tenant has no cap on that counter). Counters with zero ceiling are
|
|
30
|
+
* treated the same as unbounded — a zero cap is almost always a
|
|
31
|
+
* misseed; failing every call would surface the bug as user pain
|
|
32
|
+
* instead of a dashboard alert.
|
|
33
|
+
*/
|
|
34
|
+
export function enforceQuotaOrThrow(args) {
|
|
35
|
+
const { ceilings, used, windowEnd } = args.state;
|
|
36
|
+
if (typeof ceilings.maxTokensIn === 'number' &&
|
|
37
|
+
ceilings.maxTokensIn > 0 &&
|
|
38
|
+
used.tokensIn >= ceilings.maxTokensIn) {
|
|
39
|
+
throw new AiQuotaDeniedError({
|
|
40
|
+
reason: 'input_tokens',
|
|
41
|
+
ceiling: ceilings.maxTokensIn,
|
|
42
|
+
current: used.tokensIn,
|
|
43
|
+
windowEnd,
|
|
44
|
+
tenantId: args.tenantId,
|
|
45
|
+
surface: args.surface,
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
if (typeof ceilings.maxTokensOut === 'number' &&
|
|
49
|
+
ceilings.maxTokensOut > 0 &&
|
|
50
|
+
used.tokensOut >= ceilings.maxTokensOut) {
|
|
51
|
+
throw new AiQuotaDeniedError({
|
|
52
|
+
reason: 'output_tokens',
|
|
53
|
+
ceiling: ceilings.maxTokensOut,
|
|
54
|
+
current: used.tokensOut,
|
|
55
|
+
windowEnd,
|
|
56
|
+
tenantId: args.tenantId,
|
|
57
|
+
surface: args.surface,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
if (typeof ceilings.maxCallsPerWindow === 'number' &&
|
|
61
|
+
ceilings.maxCallsPerWindow > 0 &&
|
|
62
|
+
used.calls >= ceilings.maxCallsPerWindow) {
|
|
63
|
+
throw new AiQuotaDeniedError({
|
|
64
|
+
reason: 'tool_calls',
|
|
65
|
+
ceiling: ceilings.maxCallsPerWindow,
|
|
66
|
+
current: used.calls,
|
|
67
|
+
windowEnd,
|
|
68
|
+
tenantId: args.tenantId,
|
|
69
|
+
surface: args.surface,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
if (typeof ceilings.maxUsdMicro === 'number' &&
|
|
73
|
+
ceilings.maxUsdMicro > 0 &&
|
|
74
|
+
used.usdMicro >= ceilings.maxUsdMicro) {
|
|
75
|
+
throw new AiQuotaDeniedError({
|
|
76
|
+
reason: 'cost_usd_micro',
|
|
77
|
+
ceiling: ceilings.maxUsdMicro,
|
|
78
|
+
current: used.usdMicro,
|
|
79
|
+
windowEnd,
|
|
80
|
+
tenantId: args.tenantId,
|
|
81
|
+
surface: args.surface,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Convenience: query the port and enforce in one call. The pre-call
|
|
87
|
+
* check inside `runChatTurn` uses this. Fail-open behaviour (swallow
|
|
88
|
+
* port errors and proceed) is the CALLER's choice — `runChatTurn`
|
|
89
|
+
* decides via its `failOpenOnQuotaError` flag.
|
|
90
|
+
*/
|
|
91
|
+
export async function checkAndEnforce(args) {
|
|
92
|
+
const state = await args.quotaStore.check({
|
|
93
|
+
tenantId: args.tenantId,
|
|
94
|
+
surface: args.surface,
|
|
95
|
+
});
|
|
96
|
+
enforceQuotaOrThrow({
|
|
97
|
+
tenantId: args.tenantId,
|
|
98
|
+
surface: args.surface,
|
|
99
|
+
state,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=quota.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quota.js","sourceRoot":"","sources":["../../src/runtime/quota.ts"],"names":[],"mappings":"AA4BA;;;;;;;;;;GAUG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACvB,IAAI,GAAG,oBAAoB,CAAA;IACpC,OAAO,CAAoB;IAEpC,YAAY,OAA2B;QACnC,KAAK,CACD,sBAAsB,OAAO,CAAC,MAAM,OAAO,OAAO,CAAC,OAAO,eAAe,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,GAAG,CACtI,CAAA;QACD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,yDAAyD;QACzD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,kBAAkB,CAAC,SAAS,CAAC,CAAA;IAC7D,CAAC;CACJ;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAInC;IACG,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAA;IAEhD,IACI,OAAO,QAAQ,CAAC,WAAW,KAAK,QAAQ;QACxC,QAAQ,CAAC,WAAW,GAAG,CAAC;QACxB,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,WAAW,EACvC,CAAC;QACC,MAAM,IAAI,kBAAkB,CAAC;YACzB,MAAM,EAAE,cAAc;YACtB,OAAO,EAAE,QAAQ,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,QAAQ;YACtB,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACxB,CAAC,CAAA;IACN,CAAC;IACD,IACI,OAAO,QAAQ,CAAC,YAAY,KAAK,QAAQ;QACzC,QAAQ,CAAC,YAAY,GAAG,CAAC;QACzB,IAAI,CAAC,SAAS,IAAI,QAAQ,CAAC,YAAY,EACzC,CAAC;QACC,MAAM,IAAI,kBAAkB,CAAC;YACzB,MAAM,EAAE,eAAe;YACvB,OAAO,EAAE,QAAQ,CAAC,YAAY;YAC9B,OAAO,EAAE,IAAI,CAAC,SAAS;YACvB,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACxB,CAAC,CAAA;IACN,CAAC;IACD,IACI,OAAO,QAAQ,CAAC,iBAAiB,KAAK,QAAQ;QAC9C,QAAQ,CAAC,iBAAiB,GAAG,CAAC;QAC9B,IAAI,CAAC,KAAK,IAAI,QAAQ,CAAC,iBAAiB,EAC1C,CAAC;QACC,MAAM,IAAI,kBAAkB,CAAC;YACzB,MAAM,EAAE,YAAY;YACpB,OAAO,EAAE,QAAQ,CAAC,iBAAiB;YACnC,OAAO,EAAE,IAAI,CAAC,KAAK;YACnB,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACxB,CAAC,CAAA;IACN,CAAC;IACD,IACI,OAAO,QAAQ,CAAC,WAAW,KAAK,QAAQ;QACxC,QAAQ,CAAC,WAAW,GAAG,CAAC;QACxB,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,WAAW,EACvC,CAAC;QACC,MAAM,IAAI,kBAAkB,CAAC;YACzB,MAAM,EAAE,gBAAgB;YACxB,OAAO,EAAE,QAAQ,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,QAAQ;YACtB,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACxB,CAAC,CAAA;IACN,CAAC;AACL,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAIrC;IACG,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;QACtC,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,OAAO,EAAE,IAAI,CAAC,OAAO;KACxB,CAAC,CAAA;IACF,mBAAmB,CAAC;QAChB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,KAAK;KACR,CAAC,CAAA;AACN,CAAC"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { type UIMessage } from 'ai';
|
|
2
|
+
import type { BaseToolContext } from '../context.js';
|
|
3
|
+
import { type ModelTier } from '../models.js';
|
|
4
|
+
import type { AuditStore } from '../ports/audit-store.js';
|
|
5
|
+
import { type Clock } from '../ports/clock.js';
|
|
6
|
+
import type { ModelKeyProvider } from '../ports/key-provider.js';
|
|
7
|
+
import { type Logger } from '../ports/logger.js';
|
|
8
|
+
import type { MemoryStore } from '../ports/memory-store.js';
|
|
9
|
+
import type { QuotaStore } from '../ports/quota-store.js';
|
|
10
|
+
import { type TelemetrySink } from '../ports/telemetry-sink.js';
|
|
11
|
+
import type { TurnRecord, TurnStore } from '../ports/turn-store.js';
|
|
12
|
+
import type { AgentToolDefinition } from '../tool.js';
|
|
13
|
+
/**
|
|
14
|
+
* Public chat-turn entry point. One call replaces the ~300 LoC of stream
|
|
15
|
+
* orchestration a host route would otherwise write by hand
|
|
16
|
+
* (build tools, call streamText, persist turn, emit telemetry, handle
|
|
17
|
+
* errors, return SSE response).
|
|
18
|
+
*
|
|
19
|
+
* Current scope (slice 4):
|
|
20
|
+
* ✓ Model selection via `selectChatModel`
|
|
21
|
+
* ✓ Provider key resolution via `ModelKeyProvider` port
|
|
22
|
+
* ✓ AI SDK tool building via the `buildAiSdkTools` adapter
|
|
23
|
+
* ✓ Turn persistence via `TurnStore` port (pending → completed | failed | aborted)
|
|
24
|
+
* ✓ Telemetry emit via `TelemetrySink` port (default Noop)
|
|
25
|
+
* ✓ Cost estimate via `estimateCost`
|
|
26
|
+
* ✓ SSE Response ready to return from a Next.js route
|
|
27
|
+
* ✓ Pre-call quota gate via `QuotaStore.check` (throws `AiQuotaDeniedError` on deny)
|
|
28
|
+
* ✓ Post-call quota record via `QuotaStore.record` (fire-and-forget)
|
|
29
|
+
* ✓ Memory load via `MemoryStore.load` (formatted into dynamic system segment)
|
|
30
|
+
* ✓ Anthropic prompt-cache breakpoints via `applyCacheBreakpoints`
|
|
31
|
+
*
|
|
32
|
+
* Deferred to later slices / releases:
|
|
33
|
+
* ☐ Empty-recovery classifier — exposed as a helper in slice 5;
|
|
34
|
+
* calling routes decide what to do with the signal.
|
|
35
|
+
* ☐ OpenAI fallback retry wrapper inside runChatTurn — slice 5
|
|
36
|
+
* ships the helper primitives (`shouldFallback`, `mapModelToOpenAI`)
|
|
37
|
+
* so hosts can compose the retry themselves. Built-in retry
|
|
38
|
+
* wrapper deferred to 0.2.1 — mid-stream switching is invasive.
|
|
39
|
+
*/
|
|
40
|
+
export interface RunChatTurnPorts {
|
|
41
|
+
turnStore: TurnStore;
|
|
42
|
+
keyProvider: ModelKeyProvider;
|
|
43
|
+
auditStore?: AuditStore;
|
|
44
|
+
/** Reserved for slice 3. Currently unused — pass when ready. */
|
|
45
|
+
quotaStore?: QuotaStore;
|
|
46
|
+
/** Reserved for slice 4. Currently unused — pass when ready. */
|
|
47
|
+
memoryStore?: MemoryStore;
|
|
48
|
+
telemetry?: TelemetrySink;
|
|
49
|
+
clock?: Clock;
|
|
50
|
+
logger?: Logger;
|
|
51
|
+
}
|
|
52
|
+
export interface RunChatTurnArgs<TCtx extends BaseToolContext> {
|
|
53
|
+
/** Stable id grouping this turn into a conversation. */
|
|
54
|
+
threadId: string;
|
|
55
|
+
/** Per-request context. */
|
|
56
|
+
ctx: TCtx;
|
|
57
|
+
/** UI messages from the client (`@ai-sdk/react` shape). */
|
|
58
|
+
messages: UIMessage[];
|
|
59
|
+
/** Eligible tool registry — host pre-filters for surface/availability. */
|
|
60
|
+
tools: readonly AgentToolDefinition<any, any, TCtx>[];
|
|
61
|
+
/**
|
|
62
|
+
* Split system prompt for Anthropic prompt-cache hits.
|
|
63
|
+
*
|
|
64
|
+
* `static` — tenant-invariant content. Hashed for the cache key.
|
|
65
|
+
* MUST NOT contain per-tenant interpolated strings;
|
|
66
|
+
* numbers / IDs are fine if they live in `dynamic`
|
|
67
|
+
* instead.
|
|
68
|
+
* `dynamic` — tenant-specific content (timezone label, business
|
|
69
|
+
* name in prose, current time, memory facts). Rendered
|
|
70
|
+
* after the cache breakpoint so it never influences
|
|
71
|
+
* the cache key. Optional — omit if there's nothing
|
|
72
|
+
* tenant-specific to inject.
|
|
73
|
+
*
|
|
74
|
+
* Memory facts loaded via the `MemoryStore` port are auto-appended
|
|
75
|
+
* to `dynamic` before the cache split — hosts don't need to format
|
|
76
|
+
* them in by hand.
|
|
77
|
+
*/
|
|
78
|
+
systemPrompt: {
|
|
79
|
+
static: string;
|
|
80
|
+
dynamic?: string;
|
|
81
|
+
};
|
|
82
|
+
/** Per-tier model ids. Host resolves from its env layer. */
|
|
83
|
+
models: {
|
|
84
|
+
fast: string;
|
|
85
|
+
smart: string;
|
|
86
|
+
force?: string | null;
|
|
87
|
+
};
|
|
88
|
+
/** Optional hint that bypasses the model heuristic for this turn. */
|
|
89
|
+
modelHint?: {
|
|
90
|
+
tier?: ModelTier;
|
|
91
|
+
};
|
|
92
|
+
/** Forwarded to streamText for client-side cancellation. */
|
|
93
|
+
abortSignal?: AbortSignal;
|
|
94
|
+
/** Required ports + optional advanced ports. */
|
|
95
|
+
ports: RunChatTurnPorts;
|
|
96
|
+
/** Side-effect hook after the assistant turn row is finalised. */
|
|
97
|
+
onTurnFinalized?: (turn: TurnRecord) => void | Promise<void>;
|
|
98
|
+
/**
|
|
99
|
+
* When true (default) and `ports.quotaStore` is supplied, a thrown
|
|
100
|
+
* error from the port's `check` method is logged at warn and the
|
|
101
|
+
* call proceeds anyway. Matches the barbeiro convention — never
|
|
102
|
+
* block paying customers on a transient Redis blip. Pre-call
|
|
103
|
+
* `AiQuotaDeniedError` throws (which are intentional) propagate
|
|
104
|
+
* regardless of this flag.
|
|
105
|
+
*
|
|
106
|
+
* Set to false in environments where the deny path must be
|
|
107
|
+
* strictly authoritative (compliance, internal-test fixtures).
|
|
108
|
+
*/
|
|
109
|
+
failOpenOnQuotaError?: boolean;
|
|
110
|
+
/**
|
|
111
|
+
* Optional namespace passed to `MemoryStore.load` so a host that
|
|
112
|
+
* partitions facts (e.g. `'preferences'` vs `'facts'`) can scope
|
|
113
|
+
* the load. Omit for the default unscoped lookup. Has no effect
|
|
114
|
+
* when `ports.memoryStore` is not supplied.
|
|
115
|
+
*/
|
|
116
|
+
memoryNamespace?: string;
|
|
117
|
+
/**
|
|
118
|
+
* Optional host-supplied turn id. When provided, the kernel uses
|
|
119
|
+
* it verbatim for the assistant `TurnRecord.id`. When omitted, the
|
|
120
|
+
* kernel generates one via the existing
|
|
121
|
+
* `turn_<epoch>_<requestId?>_<rand>` shape.
|
|
122
|
+
*
|
|
123
|
+
* Hosts whose persistence layer uses a different id space
|
|
124
|
+
* (incrementing integers, externally-supplied UUIDs) pass their
|
|
125
|
+
* id here so the port impl can update the row by its real key
|
|
126
|
+
* instead of maintaining a `kernelTurnId → hostRowId` mapping. The
|
|
127
|
+
* port impl is responsible for parsing the id back to its native
|
|
128
|
+
* shape (e.g. `Number(turn.id)` for integer primary keys).
|
|
129
|
+
*
|
|
130
|
+
* Added in 0.2.1; pre-existing callers continue to get
|
|
131
|
+
* kernel-generated ids unchanged.
|
|
132
|
+
*/
|
|
133
|
+
turnId?: string;
|
|
134
|
+
/**
|
|
135
|
+
* Max steps in the tool-use loop. Default `5` — leaves headroom for
|
|
136
|
+
* a few tool round-trips per turn without runaway loops. AI SDK
|
|
137
|
+
* counts each model response as a step; without this hint, the
|
|
138
|
+
* SDK stops after the FIRST response, meaning tool results never
|
|
139
|
+
* feed back to the model (the user sees the empty bubble after a
|
|
140
|
+
* tool call). Set higher for agents that do deep multi-step work.
|
|
141
|
+
*/
|
|
142
|
+
maxSteps?: number;
|
|
143
|
+
}
|
|
144
|
+
export declare function runChatTurn<TCtx extends BaseToolContext>(args: RunChatTurnArgs<TCtx>): Promise<Response>;
|
|
145
|
+
//# sourceMappingURL=run-chat-turn.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-chat-turn.d.ts","sourceRoot":"","sources":["../../src/runtime/run-chat-turn.ts"],"names":[],"mappings":"AACA,OAAO,EAAmD,KAAK,SAAS,EAAE,MAAM,IAAI,CAAA;AAIpF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAEpD,OAAO,EAAmB,KAAK,SAAS,EAAE,MAAM,cAAc,CAAA;AAC9D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAA;AACzD,OAAO,EAAE,KAAK,KAAK,EAAe,MAAM,mBAAmB,CAAA;AAC3D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAChE,OAAO,EAAE,KAAK,MAAM,EAAgB,MAAM,oBAAoB,CAAA;AAC9D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAA;AAC3D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAA;AACzD,OAAO,EAAqB,KAAK,aAAa,EAAE,MAAM,4BAA4B,CAAA;AAClF,OAAO,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AACnE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAKrD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,WAAW,gBAAgB;IAC7B,SAAS,EAAE,SAAS,CAAA;IACpB,WAAW,EAAE,gBAAgB,CAAA;IAC7B,UAAU,CAAC,EAAE,UAAU,CAAA;IACvB,gEAAgE;IAChE,UAAU,CAAC,EAAE,UAAU,CAAA;IACvB,gEAAgE;IAChE,WAAW,CAAC,EAAE,WAAW,CAAA;IACzB,SAAS,CAAC,EAAE,aAAa,CAAA;IACzB,KAAK,CAAC,EAAE,KAAK,CAAA;IACb,MAAM,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,eAAe,CAAC,IAAI,SAAS,eAAe;IACzD,wDAAwD;IACxD,QAAQ,EAAE,MAAM,CAAA;IAChB,2BAA2B;IAC3B,GAAG,EAAE,IAAI,CAAA;IACT,2DAA2D;IAC3D,QAAQ,EAAE,SAAS,EAAE,CAAA;IACrB,0EAA0E;IAC1E,KAAK,EAAE,SAAS,mBAAmB,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,EAAE,CAAA;IACrD;;;;;;;;;;;;;;;;OAgBG;IACH,YAAY,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;IAClD,4DAA4D;IAC5D,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,CAAA;IAC9D,qEAAqE;IACrE,SAAS,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,SAAS,CAAA;KAAE,CAAA;IAChC,4DAA4D;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAA;IACzB,gDAAgD;IAChD,KAAK,EAAE,gBAAgB,CAAA;IACvB,kEAAkE;IAClE,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC5D;;;;;;;;;;OAUG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAA;IAC9B;;;;;OAKG;IACH,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB;;;;;;;;;;;;;;;OAeG;IACH,MAAM,CAAC,EAAE,MAAM,CAAA;IACf;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,wBAAsB,WAAW,CAAC,IAAI,SAAS,eAAe,EAC1D,IAAI,EAAE,eAAe,CAAC,IAAI,CAAC,GAC5B,OAAO,CAAC,QAAQ,CAAC,CAwUnB"}
|