@seanhogg/builderforce-sdk 0.9.0 → 2026.6.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -3
- package/dist/index.cjs +171 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +185 -10
- package/dist/index.d.ts +185 -10
- package/dist/index.mjs +165 -2
- package/dist/index.mjs.map +1 -1
- package/package.json +6 -4
package/dist/index.d.cts
CHANGED
|
@@ -8,19 +8,37 @@ type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
|
|
|
8
8
|
* upstream — e.g. all `openrouter` means a saturated shared key, not a
|
|
9
9
|
* model-specific issue.
|
|
10
10
|
*/
|
|
11
|
+
/**
|
|
12
|
+
* Coarse failure class for one failover attempt. Branch on this instead of
|
|
13
|
+
* regex-sniffing the error message. `'schema'` means the upstream rejected the
|
|
14
|
+
* `response_format.json_schema` as too complex for its constrained-decoding
|
|
15
|
+
* engine (see `FailoverEvent.reason === 'schema_too_complex'`); `'content_filter'`
|
|
16
|
+
* means a safety system blocked the generation. Open string union for
|
|
17
|
+
* forward-compat — a newer gateway may add classes an older SDK doesn't list.
|
|
18
|
+
*/
|
|
19
|
+
type FailoverKind = 'rate_limit' | 'timeout' | 'auth' | 'server_error' | 'client_error' | 'schema' | 'content_filter' | 'network' | 'skipped' | (string & {});
|
|
11
20
|
interface FailoverEvent {
|
|
12
21
|
model: string;
|
|
13
|
-
/** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama'
|
|
22
|
+
/** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama' | 'googleai' | …` */
|
|
14
23
|
vendor: string;
|
|
15
|
-
/**
|
|
24
|
+
/** Gateway-normalized status, or 0 for embedded errors / network failures.
|
|
25
|
+
* For a schema rejection this is `422` (the request-error class); the REAL
|
|
26
|
+
* upstream status is in `upstreamStatus`. */
|
|
16
27
|
code: number;
|
|
17
28
|
/** Wall-clock time the gateway spent on this attempt, ms. Present on newer
|
|
18
29
|
* gateway versions; absent on older ones. */
|
|
19
30
|
durationMs?: number;
|
|
20
|
-
/** Coarse failure class —
|
|
21
|
-
*
|
|
22
|
-
|
|
23
|
-
|
|
31
|
+
/** Coarse failure class — see {@link FailoverKind}. The full upstream error
|
|
32
|
+
* text is NOT exposed to callers; quote `traceId` to support for that. */
|
|
33
|
+
kind?: FailoverKind;
|
|
34
|
+
/** Stable machine-readable cause slug when one applies — e.g.
|
|
35
|
+
* `'schema_too_complex'`. Branch on this for structured handling instead of
|
|
36
|
+
* parsing `message`. Absent for unclassified failures. */
|
|
37
|
+
reason?: string;
|
|
38
|
+
/** The REAL upstream HTTP status before the gateway normalized it into `code`
|
|
39
|
+
* — e.g. a Gemini schema 400 surfaces as `code: 422` with `upstreamStatus: 400`.
|
|
40
|
+
* Absent when `code` already IS the upstream status. */
|
|
41
|
+
upstreamStatus?: number;
|
|
24
42
|
}
|
|
25
43
|
interface TextContentPart {
|
|
26
44
|
type: 'text';
|
|
@@ -153,9 +171,15 @@ interface ChatCompletionCreateParams extends PerCallOptions {
|
|
|
153
171
|
* gateway-side schema validation with retry across the failover chain. */
|
|
154
172
|
response_format?: ResponseFormat;
|
|
155
173
|
/**
|
|
156
|
-
*
|
|
157
|
-
*
|
|
158
|
-
*
|
|
174
|
+
* Telemetry slug — persisted to `llm_usage_log.use_case` and echoed back in
|
|
175
|
+
* `_builderforce.useCase`. The taxonomy is yours, BUT a few well-known patterns
|
|
176
|
+
* also *influence routing* (the gateway substring-matches them):
|
|
177
|
+
* - `…ocr…` → prefers OCR/vision-capable models.
|
|
178
|
+
* - quality-critical work (`resume`, `cover_letter`, `tailor`, `proposal`,
|
|
179
|
+
* `cv`, …) → leads with the best models your PLAN unlocks (premium writers
|
|
180
|
+
* on paid plans). Failover + the funded reliability backstop still apply.
|
|
181
|
+
* Slugs that don't match any pattern are pure telemetry. Routing is always
|
|
182
|
+
* gateway-owned; this only nudges model selection.
|
|
159
183
|
*/
|
|
160
184
|
useCase?: string;
|
|
161
185
|
/** Free-form key/value pairs persisted to `llm_usage_log.metadata` for billing
|
|
@@ -236,6 +260,15 @@ interface ChatCompletionResponse {
|
|
|
236
260
|
effectivePlan?: string;
|
|
237
261
|
/** Number of vendor retries the gateway performed for json_schema conformance. */
|
|
238
262
|
schemaRetries?: number;
|
|
263
|
+
/**
|
|
264
|
+
* `true` when the gateway AUTO-DOWNGRADED a too-complex `response_format.json_schema`
|
|
265
|
+
* to loose `json_object` and re-ran the cascade so you still got a structured
|
|
266
|
+
* result instead of a terminal `schema_too_complex` error. The strict-schema
|
|
267
|
+
* guarantee was relaxed — **validate the returned JSON yourself** (it parses,
|
|
268
|
+
* but wasn't constrained-decoded against your schema). Pre-empt the round-trip
|
|
269
|
+
* with `deriveResponseFormat` when you know the schema is large.
|
|
270
|
+
*/
|
|
271
|
+
schemaDowngraded?: boolean;
|
|
239
272
|
/** Echo of `request.useCase` (opaque telemetry slug). */
|
|
240
273
|
useCase?: string;
|
|
241
274
|
/** Echo of `request.metadata` for caller-side billing trace-back. */
|
|
@@ -630,4 +663,146 @@ declare class BuilderforceClient {
|
|
|
630
663
|
constructor(options: BuilderforceClientOptions);
|
|
631
664
|
}
|
|
632
665
|
|
|
633
|
-
|
|
666
|
+
/**
|
|
667
|
+
* Coarse, stable error class for a failed gateway call — keyed off the gateway's
|
|
668
|
+
* OWN failure taxonomy (`error.code` + `terminal` + the failover breakdown), NOT
|
|
669
|
+
* raw HTTP-status guessing. Branch on this instead of reinventing a classifier
|
|
670
|
+
* per consumer (which inevitably drifts).
|
|
671
|
+
*
|
|
672
|
+
* rate_limit — the gateway's whole cascade was rate-limited (429
|
|
673
|
+
* `cascade_exhausted`). Retry later (`retryAfter`).
|
|
674
|
+
* token_cap — a per-TENANT cap was hit (plan/monthly/host/claw token
|
|
675
|
+
* or image-credit limit). TERMINAL for this billing
|
|
676
|
+
* window — a different model won't help.
|
|
677
|
+
* schema_too_complex — every candidate rejected the `response_format.json_schema`
|
|
678
|
+
* as too complex for its constrained-decoding engine.
|
|
679
|
+
* TERMINAL: simplify the schema or drop to `json_object`.
|
|
680
|
+
* invalid_request — malformed payload (400/422) every model rejected. TERMINAL.
|
|
681
|
+
* auth — bad/missing API key (401/403). TERMINAL.
|
|
682
|
+
* model_unavailable — a strict-pinned model is on cooldown / unconfigured (503).
|
|
683
|
+
* Not terminal: drop the pin or pick another model.
|
|
684
|
+
* timeout — the request (or a single vendor attempt) timed out (408).
|
|
685
|
+
* service_unavailable — infrastructure ceiling (503 `worker_subrequest_exhausted`)
|
|
686
|
+
* or transient upstream outage (5xx). Retry after a backoff.
|
|
687
|
+
* content_filter — a safety system blocked the generation.
|
|
688
|
+
* network — the request never reached the gateway (DNS/TLS/reset).
|
|
689
|
+
* aborted — the caller's AbortSignal fired (499 / AbortError).
|
|
690
|
+
* unknown — none of the above matched.
|
|
691
|
+
*/
|
|
692
|
+
type ErrorKind = 'rate_limit' | 'token_cap' | 'schema_too_complex' | 'invalid_request' | 'auth' | 'model_unavailable' | 'timeout' | 'service_unavailable' | 'content_filter' | 'network' | 'aborted' | 'unknown';
|
|
693
|
+
interface ErrorClassification {
|
|
694
|
+
kind: ErrorKind;
|
|
695
|
+
/**
|
|
696
|
+
* `true` when retrying the SAME request on a DIFFERENT model will NOT help —
|
|
697
|
+
* the consumer's own failover chain should short-circuit. Sourced from the
|
|
698
|
+
* gateway's `error.terminal` flag when present, with a kind-based fallback.
|
|
699
|
+
*/
|
|
700
|
+
terminal: boolean;
|
|
701
|
+
/**
|
|
702
|
+
* `true` when the SAME request is safe to retry as-is (idempotently), usually
|
|
703
|
+
* after `retryAfter` seconds — e.g. a transient rate-limit/outage/timeout.
|
|
704
|
+
* `false` for deterministic rejections (schema, invalid request, auth, caps).
|
|
705
|
+
*/
|
|
706
|
+
retryable: boolean;
|
|
707
|
+
/** Seconds the caller should wait before retrying, when the gateway supplied it. */
|
|
708
|
+
retryAfter?: number;
|
|
709
|
+
/** HTTP status, when the error reached the gateway. */
|
|
710
|
+
status?: number;
|
|
711
|
+
/** Gateway error code slug, when present (`schema_too_complex`, `plan_token_limit_exceeded`, …). */
|
|
712
|
+
code?: string;
|
|
713
|
+
/** Human-readable message (the gateway's, or the thrown error's). */
|
|
714
|
+
message: string;
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Classify any caught error from a Builderforce SDK call into a structured,
|
|
718
|
+
* actionable verdict. Accepts `unknown` so a consumer can pass a raw `catch`
|
|
719
|
+
* binding — non-`BuilderforceApiError` values (network throws, `AbortError`,
|
|
720
|
+
* plain `Error`) are classified too.
|
|
721
|
+
*
|
|
722
|
+
* This is the FIRST-PARTY classifier the gateway feedback asked for: keyed off
|
|
723
|
+
* the gateway's own taxonomy so every consumer agrees on what "terminal" and
|
|
724
|
+
* "retryable" mean instead of hand-rolling `429/408/401/5xx → kind` guesses that
|
|
725
|
+
* drift apart.
|
|
726
|
+
*/
|
|
727
|
+
declare function classifyError(err: unknown): ErrorClassification;
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* deriveResponseFormat — pick the strongest `response_format` a request can SAFELY
|
|
731
|
+
* use given how complex its JSON-Schema is and (optionally) which vendor will
|
|
732
|
+
* serve it.
|
|
733
|
+
*
|
|
734
|
+
* The problem this solves: a strict `json_schema` gives the best conformance, but
|
|
735
|
+
* some vendors' constrained-decoding engines reject a schema that's too complex
|
|
736
|
+
* (Gemini's "too many states for serving"). The gateway now surfaces that as a
|
|
737
|
+
* terminal `schema_too_complex` error — but the cleaner fix is to NOT send a
|
|
738
|
+
* strict schema a vendor can't honour in the first place. This utility is the
|
|
739
|
+
* pre-flight guard: it emits `{ type: 'json_schema', strict }` when the schema is
|
|
740
|
+
* within the (vendor-specific or conservative-default) complexity ceiling, and
|
|
741
|
+
* falls back to `{ type: 'json_object' }` (loose JSON mode — universally
|
|
742
|
+
* supported) when it isn't.
|
|
743
|
+
*
|
|
744
|
+
* The SDK is zero-dependency, so this takes a plain JSON-Schema object — convert
|
|
745
|
+
* a Zod schema first with `zod-to-json-schema` (`deriveResponseFormat(zodToJsonSchema(MySchema), …)`).
|
|
746
|
+
*/
|
|
747
|
+
interface DeriveResponseFormatOptions {
|
|
748
|
+
/** Schema name sent as `json_schema.name` (default `'response'`). */
|
|
749
|
+
name?: string;
|
|
750
|
+
/** Set `json_schema.strict` when a strict schema is emitted (default `true`). */
|
|
751
|
+
strict?: boolean;
|
|
752
|
+
/**
|
|
753
|
+
* The vendor that will serve the request, when known (the consumer pinned a
|
|
754
|
+
* `model`). Selects that vendor's specific complexity ceiling. Omit when
|
|
755
|
+
* routing is gateway-owned — the conservative default ceiling (the lowest
|
|
756
|
+
* common denominator across vendors) is used so the schema is accepted
|
|
757
|
+
* whichever vendor the gateway picks.
|
|
758
|
+
*/
|
|
759
|
+
vendor?: string;
|
|
760
|
+
/**
|
|
761
|
+
* Override the complexity ceiling (max schema "nodes"; see
|
|
762
|
+
* {@link estimateSchemaComplexity}). Above this, loose `json_object` is emitted.
|
|
763
|
+
* Wins over the vendor/default ceiling.
|
|
764
|
+
*/
|
|
765
|
+
maxComplexity?: number;
|
|
766
|
+
}
|
|
767
|
+
interface SchemaComplexity {
|
|
768
|
+
/** Total schema nodes — every property, array `items`, and enum value counts one. */
|
|
769
|
+
nodes: number;
|
|
770
|
+
/** Deepest nesting level reached. */
|
|
771
|
+
maxDepth: number;
|
|
772
|
+
/** Total enum values across the whole schema (the main driver of constrained-
|
|
773
|
+
* decoding state blow-up). */
|
|
774
|
+
totalEnumValues: number;
|
|
775
|
+
/** Single rolled-up score compared against the ceiling: `nodes + totalEnumValues`. */
|
|
776
|
+
score: number;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Conservative cross-vendor ceiling, used when no `vendor` is supplied (gateway-
|
|
780
|
+
* owned routing). Tuned below the lowest-ceiling vendor (Gemini's constrained-
|
|
781
|
+
* decoding "too many states" limit) so a schema that passes here is accepted by
|
|
782
|
+
* ANY vendor the gateway might route to.
|
|
783
|
+
*/
|
|
784
|
+
declare const DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
|
|
785
|
+
/**
|
|
786
|
+
* Estimate a JSON-Schema's complexity. The dominant cost for constrained decoding
|
|
787
|
+
* is the number of distinct states the engine must track, which grows with the
|
|
788
|
+
* node count and (especially) the total number of enum values. Pure + cheap.
|
|
789
|
+
*/
|
|
790
|
+
declare function estimateSchemaComplexity(schema: unknown): SchemaComplexity;
|
|
791
|
+
/**
|
|
792
|
+
* True when a strict `json_schema` is safe for the given schema + vendor (i.e.
|
|
793
|
+
* within the complexity ceiling, and the vendor isn't strict-schema-incapable).
|
|
794
|
+
* Exposed so callers can branch (e.g. log a downgrade) without re-deriving.
|
|
795
|
+
*/
|
|
796
|
+
declare function canUseStrictSchema(schema: unknown, opts?: DeriveResponseFormatOptions): boolean;
|
|
797
|
+
/**
|
|
798
|
+
* Derive the strongest safe `response_format`:
|
|
799
|
+
* • within the ceiling → `{ type: 'json_schema', json_schema: { name, schema, strict } }`
|
|
800
|
+
* • over the ceiling → `{ type: 'json_object' }` (loose JSON; instruct the
|
|
801
|
+
* model to follow the shape in your prompt)
|
|
802
|
+
*
|
|
803
|
+
* Pure — returns a value the consumer drops straight into
|
|
804
|
+
* `chat.completions.create({ response_format })`.
|
|
805
|
+
*/
|
|
806
|
+
declare function deriveResponseFormat(schema: Record<string, unknown>, opts?: DeriveResponseFormatOptions): ResponseFormat;
|
|
807
|
+
|
|
808
|
+
export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, DEFAULT_SCHEMA_COMPLEXITY_CEILING, type DeriveResponseFormatOptions, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type ErrorClassification, type ErrorKind, type FailoverEvent, type FailoverKind, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type SchemaComplexity, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse, canUseStrictSchema, classifyError, deriveResponseFormat, estimateSchemaComplexity };
|
package/dist/index.d.ts
CHANGED
|
@@ -8,19 +8,37 @@ type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
|
|
|
8
8
|
* upstream — e.g. all `openrouter` means a saturated shared key, not a
|
|
9
9
|
* model-specific issue.
|
|
10
10
|
*/
|
|
11
|
+
/**
|
|
12
|
+
* Coarse failure class for one failover attempt. Branch on this instead of
|
|
13
|
+
* regex-sniffing the error message. `'schema'` means the upstream rejected the
|
|
14
|
+
* `response_format.json_schema` as too complex for its constrained-decoding
|
|
15
|
+
* engine (see `FailoverEvent.reason === 'schema_too_complex'`); `'content_filter'`
|
|
16
|
+
* means a safety system blocked the generation. Open string union for
|
|
17
|
+
* forward-compat — a newer gateway may add classes an older SDK doesn't list.
|
|
18
|
+
*/
|
|
19
|
+
type FailoverKind = 'rate_limit' | 'timeout' | 'auth' | 'server_error' | 'client_error' | 'schema' | 'content_filter' | 'network' | 'skipped' | (string & {});
|
|
11
20
|
interface FailoverEvent {
|
|
12
21
|
model: string;
|
|
13
|
-
/** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama'
|
|
22
|
+
/** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama' | 'googleai' | …` */
|
|
14
23
|
vendor: string;
|
|
15
|
-
/**
|
|
24
|
+
/** Gateway-normalized status, or 0 for embedded errors / network failures.
|
|
25
|
+
* For a schema rejection this is `422` (the request-error class); the REAL
|
|
26
|
+
* upstream status is in `upstreamStatus`. */
|
|
16
27
|
code: number;
|
|
17
28
|
/** Wall-clock time the gateway spent on this attempt, ms. Present on newer
|
|
18
29
|
* gateway versions; absent on older ones. */
|
|
19
30
|
durationMs?: number;
|
|
20
|
-
/** Coarse failure class —
|
|
21
|
-
*
|
|
22
|
-
|
|
23
|
-
|
|
31
|
+
/** Coarse failure class — see {@link FailoverKind}. The full upstream error
|
|
32
|
+
* text is NOT exposed to callers; quote `traceId` to support for that. */
|
|
33
|
+
kind?: FailoverKind;
|
|
34
|
+
/** Stable machine-readable cause slug when one applies — e.g.
|
|
35
|
+
* `'schema_too_complex'`. Branch on this for structured handling instead of
|
|
36
|
+
* parsing `message`. Absent for unclassified failures. */
|
|
37
|
+
reason?: string;
|
|
38
|
+
/** The REAL upstream HTTP status before the gateway normalized it into `code`
|
|
39
|
+
* — e.g. a Gemini schema 400 surfaces as `code: 422` with `upstreamStatus: 400`.
|
|
40
|
+
* Absent when `code` already IS the upstream status. */
|
|
41
|
+
upstreamStatus?: number;
|
|
24
42
|
}
|
|
25
43
|
interface TextContentPart {
|
|
26
44
|
type: 'text';
|
|
@@ -153,9 +171,15 @@ interface ChatCompletionCreateParams extends PerCallOptions {
|
|
|
153
171
|
* gateway-side schema validation with retry across the failover chain. */
|
|
154
172
|
response_format?: ResponseFormat;
|
|
155
173
|
/**
|
|
156
|
-
*
|
|
157
|
-
*
|
|
158
|
-
*
|
|
174
|
+
* Telemetry slug — persisted to `llm_usage_log.use_case` and echoed back in
|
|
175
|
+
* `_builderforce.useCase`. The taxonomy is yours, BUT a few well-known patterns
|
|
176
|
+
* also *influence routing* (the gateway substring-matches them):
|
|
177
|
+
* - `…ocr…` → prefers OCR/vision-capable models.
|
|
178
|
+
* - quality-critical work (`resume`, `cover_letter`, `tailor`, `proposal`,
|
|
179
|
+
* `cv`, …) → leads with the best models your PLAN unlocks (premium writers
|
|
180
|
+
* on paid plans). Failover + the funded reliability backstop still apply.
|
|
181
|
+
* Slugs that don't match any pattern are pure telemetry. Routing is always
|
|
182
|
+
* gateway-owned; this only nudges model selection.
|
|
159
183
|
*/
|
|
160
184
|
useCase?: string;
|
|
161
185
|
/** Free-form key/value pairs persisted to `llm_usage_log.metadata` for billing
|
|
@@ -236,6 +260,15 @@ interface ChatCompletionResponse {
|
|
|
236
260
|
effectivePlan?: string;
|
|
237
261
|
/** Number of vendor retries the gateway performed for json_schema conformance. */
|
|
238
262
|
schemaRetries?: number;
|
|
263
|
+
/**
|
|
264
|
+
* `true` when the gateway AUTO-DOWNGRADED a too-complex `response_format.json_schema`
|
|
265
|
+
* to loose `json_object` and re-ran the cascade so you still got a structured
|
|
266
|
+
* result instead of a terminal `schema_too_complex` error. The strict-schema
|
|
267
|
+
* guarantee was relaxed — **validate the returned JSON yourself** (it parses,
|
|
268
|
+
* but wasn't constrained-decoded against your schema). Pre-empt the round-trip
|
|
269
|
+
* with `deriveResponseFormat` when you know the schema is large.
|
|
270
|
+
*/
|
|
271
|
+
schemaDowngraded?: boolean;
|
|
239
272
|
/** Echo of `request.useCase` (opaque telemetry slug). */
|
|
240
273
|
useCase?: string;
|
|
241
274
|
/** Echo of `request.metadata` for caller-side billing trace-back. */
|
|
@@ -630,4 +663,146 @@ declare class BuilderforceClient {
|
|
|
630
663
|
constructor(options: BuilderforceClientOptions);
|
|
631
664
|
}
|
|
632
665
|
|
|
633
|
-
|
|
666
|
+
/**
|
|
667
|
+
* Coarse, stable error class for a failed gateway call — keyed off the gateway's
|
|
668
|
+
* OWN failure taxonomy (`error.code` + `terminal` + the failover breakdown), NOT
|
|
669
|
+
* raw HTTP-status guessing. Branch on this instead of reinventing a classifier
|
|
670
|
+
* per consumer (which inevitably drifts).
|
|
671
|
+
*
|
|
672
|
+
* rate_limit — the gateway's whole cascade was rate-limited (429
|
|
673
|
+
* `cascade_exhausted`). Retry later (`retryAfter`).
|
|
674
|
+
* token_cap — a per-TENANT cap was hit (plan/monthly/host/claw token
|
|
675
|
+
* or image-credit limit). TERMINAL for this billing
|
|
676
|
+
* window — a different model won't help.
|
|
677
|
+
* schema_too_complex — every candidate rejected the `response_format.json_schema`
|
|
678
|
+
* as too complex for its constrained-decoding engine.
|
|
679
|
+
* TERMINAL: simplify the schema or drop to `json_object`.
|
|
680
|
+
* invalid_request — malformed payload (400/422) every model rejected. TERMINAL.
|
|
681
|
+
* auth — bad/missing API key (401/403). TERMINAL.
|
|
682
|
+
* model_unavailable — a strict-pinned model is on cooldown / unconfigured (503).
|
|
683
|
+
* Not terminal: drop the pin or pick another model.
|
|
684
|
+
* timeout — the request (or a single vendor attempt) timed out (408).
|
|
685
|
+
* service_unavailable — infrastructure ceiling (503 `worker_subrequest_exhausted`)
|
|
686
|
+
* or transient upstream outage (5xx). Retry after a backoff.
|
|
687
|
+
* content_filter — a safety system blocked the generation.
|
|
688
|
+
* network — the request never reached the gateway (DNS/TLS/reset).
|
|
689
|
+
* aborted — the caller's AbortSignal fired (499 / AbortError).
|
|
690
|
+
* unknown — none of the above matched.
|
|
691
|
+
*/
|
|
692
|
+
type ErrorKind = 'rate_limit' | 'token_cap' | 'schema_too_complex' | 'invalid_request' | 'auth' | 'model_unavailable' | 'timeout' | 'service_unavailable' | 'content_filter' | 'network' | 'aborted' | 'unknown';
|
|
693
|
+
interface ErrorClassification {
|
|
694
|
+
kind: ErrorKind;
|
|
695
|
+
/**
|
|
696
|
+
* `true` when retrying the SAME request on a DIFFERENT model will NOT help —
|
|
697
|
+
* the consumer's own failover chain should short-circuit. Sourced from the
|
|
698
|
+
* gateway's `error.terminal` flag when present, with a kind-based fallback.
|
|
699
|
+
*/
|
|
700
|
+
terminal: boolean;
|
|
701
|
+
/**
|
|
702
|
+
* `true` when the SAME request is safe to retry as-is (idempotently), usually
|
|
703
|
+
* after `retryAfter` seconds — e.g. a transient rate-limit/outage/timeout.
|
|
704
|
+
* `false` for deterministic rejections (schema, invalid request, auth, caps).
|
|
705
|
+
*/
|
|
706
|
+
retryable: boolean;
|
|
707
|
+
/** Seconds the caller should wait before retrying, when the gateway supplied it. */
|
|
708
|
+
retryAfter?: number;
|
|
709
|
+
/** HTTP status, when the error reached the gateway. */
|
|
710
|
+
status?: number;
|
|
711
|
+
/** Gateway error code slug, when present (`schema_too_complex`, `plan_token_limit_exceeded`, …). */
|
|
712
|
+
code?: string;
|
|
713
|
+
/** Human-readable message (the gateway's, or the thrown error's). */
|
|
714
|
+
message: string;
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Classify any caught error from a Builderforce SDK call into a structured,
|
|
718
|
+
* actionable verdict. Accepts `unknown` so a consumer can pass a raw `catch`
|
|
719
|
+
* binding — non-`BuilderforceApiError` values (network throws, `AbortError`,
|
|
720
|
+
* plain `Error`) are classified too.
|
|
721
|
+
*
|
|
722
|
+
* This is the FIRST-PARTY classifier the gateway feedback asked for: keyed off
|
|
723
|
+
* the gateway's own taxonomy so every consumer agrees on what "terminal" and
|
|
724
|
+
* "retryable" mean instead of hand-rolling `429/408/401/5xx → kind` guesses that
|
|
725
|
+
* drift apart.
|
|
726
|
+
*/
|
|
727
|
+
declare function classifyError(err: unknown): ErrorClassification;
|
|
728
|
+
|
|
729
|
+
/**
|
|
730
|
+
* deriveResponseFormat — pick the strongest `response_format` a request can SAFELY
|
|
731
|
+
* use given how complex its JSON-Schema is and (optionally) which vendor will
|
|
732
|
+
* serve it.
|
|
733
|
+
*
|
|
734
|
+
* The problem this solves: a strict `json_schema` gives the best conformance, but
|
|
735
|
+
* some vendors' constrained-decoding engines reject a schema that's too complex
|
|
736
|
+
* (Gemini's "too many states for serving"). The gateway now surfaces that as a
|
|
737
|
+
* terminal `schema_too_complex` error — but the cleaner fix is to NOT send a
|
|
738
|
+
* strict schema a vendor can't honour in the first place. This utility is the
|
|
739
|
+
* pre-flight guard: it emits `{ type: 'json_schema', strict }` when the schema is
|
|
740
|
+
* within the (vendor-specific or conservative-default) complexity ceiling, and
|
|
741
|
+
* falls back to `{ type: 'json_object' }` (loose JSON mode — universally
|
|
742
|
+
* supported) when it isn't.
|
|
743
|
+
*
|
|
744
|
+
* The SDK is zero-dependency, so this takes a plain JSON-Schema object — convert
|
|
745
|
+
* a Zod schema first with `zod-to-json-schema` (`deriveResponseFormat(zodToJsonSchema(MySchema), …)`).
|
|
746
|
+
*/
|
|
747
|
+
interface DeriveResponseFormatOptions {
|
|
748
|
+
/** Schema name sent as `json_schema.name` (default `'response'`). */
|
|
749
|
+
name?: string;
|
|
750
|
+
/** Set `json_schema.strict` when a strict schema is emitted (default `true`). */
|
|
751
|
+
strict?: boolean;
|
|
752
|
+
/**
|
|
753
|
+
* The vendor that will serve the request, when known (the consumer pinned a
|
|
754
|
+
* `model`). Selects that vendor's specific complexity ceiling. Omit when
|
|
755
|
+
* routing is gateway-owned — the conservative default ceiling (the lowest
|
|
756
|
+
* common denominator across vendors) is used so the schema is accepted
|
|
757
|
+
* whichever vendor the gateway picks.
|
|
758
|
+
*/
|
|
759
|
+
vendor?: string;
|
|
760
|
+
/**
|
|
761
|
+
* Override the complexity ceiling (max schema "nodes"; see
|
|
762
|
+
* {@link estimateSchemaComplexity}). Above this, loose `json_object` is emitted.
|
|
763
|
+
* Wins over the vendor/default ceiling.
|
|
764
|
+
*/
|
|
765
|
+
maxComplexity?: number;
|
|
766
|
+
}
|
|
767
|
+
interface SchemaComplexity {
|
|
768
|
+
/** Total schema nodes — every property, array `items`, and enum value counts one. */
|
|
769
|
+
nodes: number;
|
|
770
|
+
/** Deepest nesting level reached. */
|
|
771
|
+
maxDepth: number;
|
|
772
|
+
/** Total enum values across the whole schema (the main driver of constrained-
|
|
773
|
+
* decoding state blow-up). */
|
|
774
|
+
totalEnumValues: number;
|
|
775
|
+
/** Single rolled-up score compared against the ceiling: `nodes + totalEnumValues`. */
|
|
776
|
+
score: number;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Conservative cross-vendor ceiling, used when no `vendor` is supplied (gateway-
|
|
780
|
+
* owned routing). Tuned below the lowest-ceiling vendor (Gemini's constrained-
|
|
781
|
+
* decoding "too many states" limit) so a schema that passes here is accepted by
|
|
782
|
+
* ANY vendor the gateway might route to.
|
|
783
|
+
*/
|
|
784
|
+
declare const DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
|
|
785
|
+
/**
|
|
786
|
+
* Estimate a JSON-Schema's complexity. The dominant cost for constrained decoding
|
|
787
|
+
* is the number of distinct states the engine must track, which grows with the
|
|
788
|
+
* node count and (especially) the total number of enum values. Pure + cheap.
|
|
789
|
+
*/
|
|
790
|
+
declare function estimateSchemaComplexity(schema: unknown): SchemaComplexity;
|
|
791
|
+
/**
|
|
792
|
+
* True when a strict `json_schema` is safe for the given schema + vendor (i.e.
|
|
793
|
+
* within the complexity ceiling, and the vendor isn't strict-schema-incapable).
|
|
794
|
+
* Exposed so callers can branch (e.g. log a downgrade) without re-deriving.
|
|
795
|
+
*/
|
|
796
|
+
declare function canUseStrictSchema(schema: unknown, opts?: DeriveResponseFormatOptions): boolean;
|
|
797
|
+
/**
|
|
798
|
+
* Derive the strongest safe `response_format`:
|
|
799
|
+
* • within the ceiling → `{ type: 'json_schema', json_schema: { name, schema, strict } }`
|
|
800
|
+
* • over the ceiling → `{ type: 'json_object' }` (loose JSON; instruct the
|
|
801
|
+
* model to follow the shape in your prompt)
|
|
802
|
+
*
|
|
803
|
+
* Pure — returns a value the consumer drops straight into
|
|
804
|
+
* `chat.completions.create({ response_format })`.
|
|
805
|
+
*/
|
|
806
|
+
declare function deriveResponseFormat(schema: Record<string, unknown>, opts?: DeriveResponseFormatOptions): ResponseFormat;
|
|
807
|
+
|
|
808
|
+
export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, DEFAULT_SCHEMA_COMPLEXITY_CEILING, type DeriveResponseFormatOptions, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type ErrorClassification, type ErrorKind, type FailoverEvent, type FailoverKind, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type SchemaComplexity, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse, canUseStrictSchema, classifyError, deriveResponseFormat, estimateSchemaComplexity };
|
package/dist/index.mjs
CHANGED
|
@@ -258,7 +258,9 @@ var BuilderforceApiError = class extends Error {
|
|
|
258
258
|
vendor: e.vendor,
|
|
259
259
|
code: e.code,
|
|
260
260
|
...typeof ev.durationMs === "number" ? { durationMs: ev.durationMs } : {},
|
|
261
|
-
...typeof ev.kind === "string" ? { kind: ev.kind } : {}
|
|
261
|
+
...typeof ev.kind === "string" ? { kind: ev.kind } : {},
|
|
262
|
+
...typeof ev.reason === "string" ? { reason: ev.reason } : {},
|
|
263
|
+
...typeof ev.upstreamStatus === "number" ? { upstreamStatus: ev.upstreamStatus } : {}
|
|
262
264
|
});
|
|
263
265
|
}
|
|
264
266
|
}
|
|
@@ -437,12 +439,173 @@ var BuilderforceClient = class {
|
|
|
437
439
|
this.usage = new UsageApi(http);
|
|
438
440
|
}
|
|
439
441
|
};
|
|
442
|
+
|
|
443
|
+
// src/application/classifyError.ts
|
|
444
|
+
var TOKEN_CAP_CODES = /* @__PURE__ */ new Set([
|
|
445
|
+
"plan_token_limit_exceeded",
|
|
446
|
+
"plan_monthly_token_limit_exceeded",
|
|
447
|
+
"agent_host_token_limit_exceeded",
|
|
448
|
+
"claw_token_limit_exceeded",
|
|
449
|
+
"image_credit_limit_exceeded"
|
|
450
|
+
]);
|
|
451
|
+
function classifyError(err) {
|
|
452
|
+
if (!(err instanceof BuilderforceApiError)) {
|
|
453
|
+
const name = err?.name;
|
|
454
|
+
const message2 = err instanceof Error ? err.message : String(err);
|
|
455
|
+
if (name === "AbortError") {
|
|
456
|
+
return { kind: "aborted", terminal: true, retryable: false, message: message2 };
|
|
457
|
+
}
|
|
458
|
+
if (err instanceof TypeError) {
|
|
459
|
+
return { kind: "network", terminal: false, retryable: true, message: message2 };
|
|
460
|
+
}
|
|
461
|
+
return { kind: "unknown", terminal: false, retryable: false, message: message2 };
|
|
462
|
+
}
|
|
463
|
+
const { status, code, terminal, retryAfter, message } = err;
|
|
464
|
+
const base = {
|
|
465
|
+
...retryAfter !== void 0 ? { retryAfter } : {},
|
|
466
|
+
...status !== void 0 ? { status } : {},
|
|
467
|
+
...code !== void 0 ? { code } : {},
|
|
468
|
+
message
|
|
469
|
+
};
|
|
470
|
+
if (code === "schema_too_complex" || lastFailoverReason(err) === "schema_too_complex") {
|
|
471
|
+
return { kind: "schema_too_complex", terminal: terminal ?? true, retryable: false, ...base };
|
|
472
|
+
}
|
|
473
|
+
if (code && TOKEN_CAP_CODES.has(code)) {
|
|
474
|
+
return { kind: "token_cap", terminal: terminal ?? true, retryable: false, ...base };
|
|
475
|
+
}
|
|
476
|
+
if (code === "model_unavailable") {
|
|
477
|
+
return { kind: "model_unavailable", terminal: terminal ?? false, retryable: false, ...base };
|
|
478
|
+
}
|
|
479
|
+
if (code === "worker_subrequest_exhausted") {
|
|
480
|
+
return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
|
|
481
|
+
}
|
|
482
|
+
if (code === "aborted") {
|
|
483
|
+
return { kind: "aborted", terminal: true, retryable: false, ...base };
|
|
484
|
+
}
|
|
485
|
+
if (code === "content_filter") {
|
|
486
|
+
return { kind: "content_filter", terminal: terminal ?? true, retryable: false, ...base };
|
|
487
|
+
}
|
|
488
|
+
if (status === 408 || code === "timeout") {
|
|
489
|
+
return { kind: "timeout", terminal: false, retryable: true, ...base };
|
|
490
|
+
}
|
|
491
|
+
if (status === 401 || status === 403) {
|
|
492
|
+
return { kind: "auth", terminal: terminal ?? true, retryable: false, ...base };
|
|
493
|
+
}
|
|
494
|
+
if (status === 429) {
|
|
495
|
+
return { kind: "rate_limit", terminal: terminal ?? false, retryable: !(terminal ?? false), ...base };
|
|
496
|
+
}
|
|
497
|
+
if (status === 400 || status === 422) {
|
|
498
|
+
return { kind: "invalid_request", terminal: terminal ?? true, retryable: false, ...base };
|
|
499
|
+
}
|
|
500
|
+
if (status === 503) {
|
|
501
|
+
return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
|
|
502
|
+
}
|
|
503
|
+
if (status !== void 0 && status >= 500) {
|
|
504
|
+
return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
|
|
505
|
+
}
|
|
506
|
+
return { kind: "unknown", terminal: terminal ?? false, retryable: false, ...base };
|
|
507
|
+
}
|
|
508
|
+
function lastFailoverReason(err) {
|
|
509
|
+
const f = err.failovers;
|
|
510
|
+
if (!f || f.length === 0) return void 0;
|
|
511
|
+
return f[f.length - 1]?.reason;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// src/application/deriveResponseFormat.ts
|
|
515
|
+
var DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
|
|
516
|
+
var VENDOR_SCHEMA_CEILINGS = {
|
|
517
|
+
// Low constrained-decoding ceiling — the vendor that motivated this guard.
|
|
518
|
+
googleai: 60,
|
|
519
|
+
google: 60,
|
|
520
|
+
gemini: 60,
|
|
521
|
+
// High-ceiling, robust strict-schema vendors.
|
|
522
|
+
openai: 600,
|
|
523
|
+
anthropic: 600,
|
|
524
|
+
cerebras: 300,
|
|
525
|
+
nvidia: 300,
|
|
526
|
+
openrouter: 200
|
|
527
|
+
};
|
|
528
|
+
var MAX_SCHEMA_WALK_DEPTH = 64;
|
|
529
|
+
function estimateSchemaComplexity(schema) {
|
|
530
|
+
let nodes = 0;
|
|
531
|
+
let totalEnumValues = 0;
|
|
532
|
+
let maxDepth = 0;
|
|
533
|
+
const walk = (node, depth) => {
|
|
534
|
+
if (depth > MAX_SCHEMA_WALK_DEPTH || node === null || typeof node !== "object") return;
|
|
535
|
+
if (depth > maxDepth) maxDepth = depth;
|
|
536
|
+
const s = node;
|
|
537
|
+
const enumVals = s["enum"];
|
|
538
|
+
if (Array.isArray(enumVals)) totalEnumValues += enumVals.length;
|
|
539
|
+
const props = s["properties"];
|
|
540
|
+
if (props && typeof props === "object") {
|
|
541
|
+
for (const key of Object.keys(props)) {
|
|
542
|
+
nodes += 1;
|
|
543
|
+
walk(props[key], depth + 1);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
const items = s["items"];
|
|
547
|
+
if (Array.isArray(items)) items.forEach((it) => {
|
|
548
|
+
nodes += 1;
|
|
549
|
+
walk(it, depth + 1);
|
|
550
|
+
});
|
|
551
|
+
else if (items && typeof items === "object") {
|
|
552
|
+
nodes += 1;
|
|
553
|
+
walk(items, depth + 1);
|
|
554
|
+
}
|
|
555
|
+
for (const comb of ["anyOf", "oneOf", "allOf"]) {
|
|
556
|
+
const arr = s[comb];
|
|
557
|
+
if (Array.isArray(arr)) arr.forEach((sub) => {
|
|
558
|
+
nodes += 1;
|
|
559
|
+
walk(sub, depth + 1);
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
for (const defsKey of ["$defs", "definitions"]) {
|
|
563
|
+
const defs = s[defsKey];
|
|
564
|
+
if (defs && typeof defs === "object") {
|
|
565
|
+
for (const key of Object.keys(defs)) walk(defs[key], depth + 1);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
};
|
|
569
|
+
walk(schema, 0);
|
|
570
|
+
return { nodes, maxDepth, totalEnumValues, score: nodes + totalEnumValues };
|
|
571
|
+
}
|
|
572
|
+
function ceilingFor(opts) {
|
|
573
|
+
if (opts?.maxComplexity != null && opts.maxComplexity >= 0) return opts.maxComplexity;
|
|
574
|
+
if (opts?.vendor) {
|
|
575
|
+
const v = opts.vendor.toLowerCase();
|
|
576
|
+
if (v in VENDOR_SCHEMA_CEILINGS) return VENDOR_SCHEMA_CEILINGS[v];
|
|
577
|
+
}
|
|
578
|
+
return DEFAULT_SCHEMA_COMPLEXITY_CEILING;
|
|
579
|
+
}
|
|
580
|
+
function canUseStrictSchema(schema, opts) {
|
|
581
|
+
const ceiling = ceilingFor(opts);
|
|
582
|
+
if (ceiling === 0) return false;
|
|
583
|
+
return estimateSchemaComplexity(schema).score <= ceiling;
|
|
584
|
+
}
|
|
585
|
+
function deriveResponseFormat(schema, opts) {
|
|
586
|
+
if (!canUseStrictSchema(schema, opts)) {
|
|
587
|
+
return { type: "json_object" };
|
|
588
|
+
}
|
|
589
|
+
return {
|
|
590
|
+
type: "json_schema",
|
|
591
|
+
json_schema: {
|
|
592
|
+
name: opts?.name ?? "response",
|
|
593
|
+
schema,
|
|
594
|
+
strict: opts?.strict ?? true
|
|
595
|
+
}
|
|
596
|
+
};
|
|
597
|
+
}
|
|
440
598
|
export {
|
|
441
599
|
BuilderforceApiError,
|
|
442
600
|
BuilderforceClient,
|
|
443
601
|
ChatCompletionStream,
|
|
602
|
+
DEFAULT_SCHEMA_COMPLEXITY_CEILING,
|
|
444
603
|
EmbeddingsApi,
|
|
445
604
|
ImagesApi,
|
|
446
|
-
ModelsApi
|
|
605
|
+
ModelsApi,
|
|
606
|
+
canUseStrictSchema,
|
|
607
|
+
classifyError,
|
|
608
|
+
deriveResponseFormat,
|
|
609
|
+
estimateSchemaComplexity
|
|
447
610
|
};
|
|
448
611
|
//# sourceMappingURL=index.mjs.map
|