@seanhogg/builderforce-sdk 2026.6.29 → 2026.6.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -8,19 +8,37 @@ type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
8
8
  * upstream — e.g. all `openrouter` means a saturated shared key, not a
9
9
  * model-specific issue.
10
10
  */
11
+ /**
12
+ * Coarse failure class for one failover attempt. Branch on this instead of
13
+ * regex-sniffing the error message. `'schema'` means the upstream rejected the
14
+ * `response_format.json_schema` as too complex for its constrained-decoding
15
+ * engine (see `FailoverEvent.reason === 'schema_too_complex'`); `'content_filter'`
16
+ * means a safety system blocked the generation. Open string union for
17
+ * forward-compat — a newer gateway may add classes an older SDK doesn't list.
18
+ */
19
+ type FailoverKind = 'rate_limit' | 'timeout' | 'auth' | 'server_error' | 'client_error' | 'schema' | 'content_filter' | 'network' | 'skipped' | (string & {});
11
20
  interface FailoverEvent {
12
21
  model: string;
13
- /** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama'` */
22
+ /** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama' | 'googleai' | …` */
14
23
  vendor: string;
15
- /** HTTP status code, or 0 for embedded errors / network failures. */
24
+ /** Gateway-normalized status, or 0 for embedded errors / network failures.
25
+ * For a schema rejection this is `422` (the request-error class); the REAL
26
+ * upstream status is in `upstreamStatus`. */
16
27
  code: number;
17
28
  /** Wall-clock time the gateway spent on this attempt, ms. Present on newer
18
29
  * gateway versions; absent on older ones. */
19
30
  durationMs?: number;
20
- /** Coarse failure class — `'rate_limit' | 'timeout' | 'auth' | 'server_error'
21
- * | 'client_error' | 'network' | 'skipped'`. The full upstream error text is
22
- * NOT exposed to callers; quote `traceId` to support for that. */
23
- kind?: string;
31
+ /** Coarse failure class — see {@link FailoverKind}. The full upstream error
32
+ * text is NOT exposed to callers; quote `traceId` to support for that. */
33
+ kind?: FailoverKind;
34
+ /** Stable machine-readable cause slug when one applies — e.g.
35
+ * `'schema_too_complex'`. Branch on this for structured handling instead of
36
+ * parsing `message`. Absent for unclassified failures. */
37
+ reason?: string;
38
+ /** The REAL upstream HTTP status before the gateway normalized it into `code`
39
+ * — e.g. a Gemini schema 400 surfaces as `code: 422` with `upstreamStatus: 400`.
40
+ * Absent when `code` already IS the upstream status. */
41
+ upstreamStatus?: number;
24
42
  }
25
43
  interface TextContentPart {
26
44
  type: 'text';
@@ -153,9 +171,15 @@ interface ChatCompletionCreateParams extends PerCallOptions {
153
171
  * gateway-side schema validation with retry across the failover chain. */
154
172
  response_format?: ResponseFormat;
155
173
  /**
156
- * Opaque telemetry slug. The gateway treats this as a free-form string —
157
- * persisted to `llm_usage_log.use_case` and echoed back in `_builderforce.useCase`
158
- * for confirmation, but **never used for routing**. The taxonomy is yours.
174
+ * Telemetry slug persisted to `llm_usage_log.use_case` and echoed back in
175
+ * `_builderforce.useCase`. The taxonomy is yours, BUT a few well-known patterns
176
+ * also *influence routing* (the gateway substring-matches them):
177
+ * - `…ocr…` → prefers OCR/vision-capable models.
178
+ * - quality-critical work (`resume`, `cover_letter`, `tailor`, `proposal`,
179
+ * `cv`, …) → leads with the best models your PLAN unlocks (premium writers
180
+ * on paid plans). Failover + the funded reliability backstop still apply.
181
+ * Slugs that don't match any pattern are pure telemetry. Routing is always
182
+ * gateway-owned; this only nudges model selection.
159
183
  */
160
184
  useCase?: string;
161
185
  /** Free-form key/value pairs persisted to `llm_usage_log.metadata` for billing
@@ -236,6 +260,15 @@ interface ChatCompletionResponse {
236
260
  effectivePlan?: string;
237
261
  /** Number of vendor retries the gateway performed for json_schema conformance. */
238
262
  schemaRetries?: number;
263
+ /**
264
+ * `true` when the gateway AUTO-DOWNGRADED a too-complex `response_format.json_schema`
265
+ * to loose `json_object` and re-ran the cascade so you still got a structured
266
+ * result instead of a terminal `schema_too_complex` error. The strict-schema
267
+ * guarantee was relaxed — **validate the returned JSON yourself** (it parses,
268
+ * but wasn't constrained-decoded against your schema). Pre-empt the round-trip
269
+ * with `deriveResponseFormat` when you know the schema is large.
270
+ */
271
+ schemaDowngraded?: boolean;
239
272
  /** Echo of `request.useCase` (opaque telemetry slug). */
240
273
  useCase?: string;
241
274
  /** Echo of `request.metadata` for caller-side billing trace-back. */
@@ -630,4 +663,146 @@ declare class BuilderforceClient {
630
663
  constructor(options: BuilderforceClientOptions);
631
664
  }
632
665
 
633
- export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type FailoverEvent, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse };
666
+ /**
667
+ * Coarse, stable error class for a failed gateway call — keyed off the gateway's
668
+ * OWN failure taxonomy (`error.code` + `terminal` + the failover breakdown), NOT
669
+ * raw HTTP-status guessing. Branch on this instead of reinventing a classifier
670
+ * per consumer (which inevitably drifts).
671
+ *
672
+ * rate_limit — the gateway's whole cascade was rate-limited (429
673
+ * `cascade_exhausted`). Retry later (`retryAfter`).
674
+ * token_cap — a per-TENANT cap was hit (plan/monthly/host/claw token
675
+ * or image-credit limit). TERMINAL for this billing
676
+ * window — a different model won't help.
677
+ * schema_too_complex — every candidate rejected the `response_format.json_schema`
678
+ * as too complex for its constrained-decoding engine.
679
+ * TERMINAL: simplify the schema or drop to `json_object`.
680
+ * invalid_request — malformed payload (400/422) every model rejected. TERMINAL.
681
+ * auth — bad/missing API key (401/403). TERMINAL.
682
+ * model_unavailable — a strict-pinned model is on cooldown / unconfigured (503).
683
+ * Not terminal: drop the pin or pick another model.
684
+ * timeout — the request (or a single vendor attempt) timed out (408).
685
+ * service_unavailable — infrastructure ceiling (503 `worker_subrequest_exhausted`)
686
+ * or transient upstream outage (5xx). Retry after a backoff.
687
+ * content_filter — a safety system blocked the generation.
688
+ * network — the request never reached the gateway (DNS/TLS/reset).
689
+ * aborted — the caller's AbortSignal fired (499 / AbortError).
690
+ * unknown — none of the above matched.
691
+ */
692
+ type ErrorKind = 'rate_limit' | 'token_cap' | 'schema_too_complex' | 'invalid_request' | 'auth' | 'model_unavailable' | 'timeout' | 'service_unavailable' | 'content_filter' | 'network' | 'aborted' | 'unknown';
693
+ interface ErrorClassification {
694
+ kind: ErrorKind;
695
+ /**
696
+ * `true` when retrying the SAME request on a DIFFERENT model will NOT help —
697
+ * the consumer's own failover chain should short-circuit. Sourced from the
698
+ * gateway's `error.terminal` flag when present, with a kind-based fallback.
699
+ */
700
+ terminal: boolean;
701
+ /**
702
+ * `true` when the SAME request is safe to retry as-is (idempotently), usually
703
+ * after `retryAfter` seconds — e.g. a transient rate-limit/outage/timeout.
704
+ * `false` for deterministic rejections (schema, invalid request, auth, caps).
705
+ */
706
+ retryable: boolean;
707
+ /** Seconds the caller should wait before retrying, when the gateway supplied it. */
708
+ retryAfter?: number;
709
+ /** HTTP status, when the error reached the gateway. */
710
+ status?: number;
711
+ /** Gateway error code slug, when present (`schema_too_complex`, `plan_token_limit_exceeded`, …). */
712
+ code?: string;
713
+ /** Human-readable message (the gateway's, or the thrown error's). */
714
+ message: string;
715
+ }
716
+ /**
717
+ * Classify any caught error from a Builderforce SDK call into a structured,
718
+ * actionable verdict. Accepts `unknown` so a consumer can pass a raw `catch`
719
+ * binding — non-`BuilderforceApiError` values (network throws, `AbortError`,
720
+ * plain `Error`) are classified too.
721
+ *
722
+ * This is the FIRST-PARTY classifier the gateway feedback asked for: keyed off
723
+ * the gateway's own taxonomy so every consumer agrees on what "terminal" and
724
+ * "retryable" mean instead of hand-rolling `429/408/401/5xx → kind` guesses that
725
+ * drift apart.
726
+ */
727
+ declare function classifyError(err: unknown): ErrorClassification;
728
+
729
+ /**
730
+ * deriveResponseFormat — pick the strongest `response_format` a request can SAFELY
731
+ * use given how complex its JSON-Schema is and (optionally) which vendor will
732
+ * serve it.
733
+ *
734
+ * The problem this solves: a strict `json_schema` gives the best conformance, but
735
+ * some vendors' constrained-decoding engines reject a schema that's too complex
736
+ * (Gemini's "too many states for serving"). The gateway now surfaces that as a
737
+ * terminal `schema_too_complex` error — but the cleaner fix is to NOT send a
738
+ * strict schema a vendor can't honour in the first place. This utility is the
739
+ * pre-flight guard: it emits `{ type: 'json_schema', strict }` when the schema is
740
+ * within the (vendor-specific or conservative-default) complexity ceiling, and
741
+ * falls back to `{ type: 'json_object' }` (loose JSON mode — universally
742
+ * supported) when it isn't.
743
+ *
744
+ * The SDK is zero-dependency, so this takes a plain JSON-Schema object — convert
745
+ * a Zod schema first with `zod-to-json-schema` (`deriveResponseFormat(zodToJsonSchema(MySchema), …)`).
746
+ */
747
+ interface DeriveResponseFormatOptions {
748
+ /** Schema name sent as `json_schema.name` (default `'response'`). */
749
+ name?: string;
750
+ /** Set `json_schema.strict` when a strict schema is emitted (default `true`). */
751
+ strict?: boolean;
752
+ /**
753
+ * The vendor that will serve the request, when known (the consumer pinned a
754
+ * `model`). Selects that vendor's specific complexity ceiling. Omit when
755
+ * routing is gateway-owned — the conservative default ceiling (the lowest
756
+ * common denominator across vendors) is used so the schema is accepted
757
+ * whichever vendor the gateway picks.
758
+ */
759
+ vendor?: string;
760
+ /**
761
+ * Override the complexity ceiling (max schema "nodes"; see
762
+ * {@link estimateSchemaComplexity}). Above this, loose `json_object` is emitted.
763
+ * Wins over the vendor/default ceiling.
764
+ */
765
+ maxComplexity?: number;
766
+ }
767
+ interface SchemaComplexity {
768
+ /** Total schema nodes — every property, array `items`, and enum value counts one. */
769
+ nodes: number;
770
+ /** Deepest nesting level reached. */
771
+ maxDepth: number;
772
+ /** Total enum values across the whole schema (the main driver of constrained-
773
+ * decoding state blow-up). */
774
+ totalEnumValues: number;
775
+ /** Single rolled-up score compared against the ceiling: `nodes + totalEnumValues`. */
776
+ score: number;
777
+ }
778
+ /**
779
+ * Conservative cross-vendor ceiling, used when no `vendor` is supplied (gateway-
780
+ * owned routing). Tuned below the lowest-ceiling vendor (Gemini's constrained-
781
+ * decoding "too many states" limit) so a schema that passes here is accepted by
782
+ * ANY vendor the gateway might route to.
783
+ */
784
+ declare const DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
785
+ /**
786
+ * Estimate a JSON-Schema's complexity. The dominant cost for constrained decoding
787
+ * is the number of distinct states the engine must track, which grows with the
788
+ * node count and (especially) the total number of enum values. Pure + cheap.
789
+ */
790
+ declare function estimateSchemaComplexity(schema: unknown): SchemaComplexity;
791
+ /**
792
+ * True when a strict `json_schema` is safe for the given schema + vendor (i.e.
793
+ * within the complexity ceiling, and the vendor isn't strict-schema-incapable).
794
+ * Exposed so callers can branch (e.g. log a downgrade) without re-deriving.
795
+ */
796
+ declare function canUseStrictSchema(schema: unknown, opts?: DeriveResponseFormatOptions): boolean;
797
+ /**
798
+ * Derive the strongest safe `response_format`:
799
+ * • within the ceiling → `{ type: 'json_schema', json_schema: { name, schema, strict } }`
800
+ * • over the ceiling → `{ type: 'json_object' }` (loose JSON; instruct the
801
+ * model to follow the shape in your prompt)
802
+ *
803
+ * Pure — returns a value the consumer drops straight into
804
+ * `chat.completions.create({ response_format })`.
805
+ */
806
+ declare function deriveResponseFormat(schema: Record<string, unknown>, opts?: DeriveResponseFormatOptions): ResponseFormat;
807
+
808
+ export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, DEFAULT_SCHEMA_COMPLEXITY_CEILING, type DeriveResponseFormatOptions, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type ErrorClassification, type ErrorKind, type FailoverEvent, type FailoverKind, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type SchemaComplexity, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse, canUseStrictSchema, classifyError, deriveResponseFormat, estimateSchemaComplexity };
package/dist/index.d.ts CHANGED
@@ -8,19 +8,37 @@ type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
8
8
  * upstream — e.g. all `openrouter` means a saturated shared key, not a
9
9
  * model-specific issue.
10
10
  */
11
+ /**
12
+ * Coarse failure class for one failover attempt. Branch on this instead of
13
+ * regex-sniffing the error message. `'schema'` means the upstream rejected the
14
+ * `response_format.json_schema` as too complex for its constrained-decoding
15
+ * engine (see `FailoverEvent.reason === 'schema_too_complex'`); `'content_filter'`
16
+ * means a safety system blocked the generation. Open string union for
17
+ * forward-compat — a newer gateway may add classes an older SDK doesn't list.
18
+ */
19
+ type FailoverKind = 'rate_limit' | 'timeout' | 'auth' | 'server_error' | 'client_error' | 'schema' | 'content_filter' | 'network' | 'skipped' | (string & {});
11
20
  interface FailoverEvent {
12
21
  model: string;
13
- /** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama'` */
22
+ /** `'openrouter' | 'cerebras' | 'nvidia' | 'ollama' | 'googleai' | …` */
14
23
  vendor: string;
15
- /** HTTP status code, or 0 for embedded errors / network failures. */
24
+ /** Gateway-normalized status, or 0 for embedded errors / network failures.
25
+ * For a schema rejection this is `422` (the request-error class); the REAL
26
+ * upstream status is in `upstreamStatus`. */
16
27
  code: number;
17
28
  /** Wall-clock time the gateway spent on this attempt, ms. Present on newer
18
29
  * gateway versions; absent on older ones. */
19
30
  durationMs?: number;
20
- /** Coarse failure class — `'rate_limit' | 'timeout' | 'auth' | 'server_error'
21
- * | 'client_error' | 'network' | 'skipped'`. The full upstream error text is
22
- * NOT exposed to callers; quote `traceId` to support for that. */
23
- kind?: string;
31
+ /** Coarse failure class — see {@link FailoverKind}. The full upstream error
32
+ * text is NOT exposed to callers; quote `traceId` to support for that. */
33
+ kind?: FailoverKind;
34
+ /** Stable machine-readable cause slug when one applies — e.g.
35
+ * `'schema_too_complex'`. Branch on this for structured handling instead of
36
+ * parsing `message`. Absent for unclassified failures. */
37
+ reason?: string;
38
+ /** The REAL upstream HTTP status before the gateway normalized it into `code`
39
+ * — e.g. a Gemini schema 400 surfaces as `code: 422` with `upstreamStatus: 400`.
40
+ * Absent when `code` already IS the upstream status. */
41
+ upstreamStatus?: number;
24
42
  }
25
43
  interface TextContentPart {
26
44
  type: 'text';
@@ -153,9 +171,15 @@ interface ChatCompletionCreateParams extends PerCallOptions {
153
171
  * gateway-side schema validation with retry across the failover chain. */
154
172
  response_format?: ResponseFormat;
155
173
  /**
156
- * Opaque telemetry slug. The gateway treats this as a free-form string —
157
- * persisted to `llm_usage_log.use_case` and echoed back in `_builderforce.useCase`
158
- * for confirmation, but **never used for routing**. The taxonomy is yours.
174
+ * Telemetry slug persisted to `llm_usage_log.use_case` and echoed back in
175
+ * `_builderforce.useCase`. The taxonomy is yours, BUT a few well-known patterns
176
+ * also *influence routing* (the gateway substring-matches them):
177
+ * - `…ocr…` → prefers OCR/vision-capable models.
178
+ * - quality-critical work (`resume`, `cover_letter`, `tailor`, `proposal`,
179
+ * `cv`, …) → leads with the best models your PLAN unlocks (premium writers
180
+ * on paid plans). Failover + the funded reliability backstop still apply.
181
+ * Slugs that don't match any pattern are pure telemetry. Routing is always
182
+ * gateway-owned; this only nudges model selection.
159
183
  */
160
184
  useCase?: string;
161
185
  /** Free-form key/value pairs persisted to `llm_usage_log.metadata` for billing
@@ -236,6 +260,15 @@ interface ChatCompletionResponse {
236
260
  effectivePlan?: string;
237
261
  /** Number of vendor retries the gateway performed for json_schema conformance. */
238
262
  schemaRetries?: number;
263
+ /**
264
+ * `true` when the gateway AUTO-DOWNGRADED a too-complex `response_format.json_schema`
265
+ * to loose `json_object` and re-ran the cascade so you still got a structured
266
+ * result instead of a terminal `schema_too_complex` error. The strict-schema
267
+ * guarantee was relaxed — **validate the returned JSON yourself** (it parses,
268
+ * but wasn't constrained-decoded against your schema). Pre-empt the round-trip
269
+ * with `deriveResponseFormat` when you know the schema is large.
270
+ */
271
+ schemaDowngraded?: boolean;
239
272
  /** Echo of `request.useCase` (opaque telemetry slug). */
240
273
  useCase?: string;
241
274
  /** Echo of `request.metadata` for caller-side billing trace-back. */
@@ -630,4 +663,146 @@ declare class BuilderforceClient {
630
663
  constructor(options: BuilderforceClientOptions);
631
664
  }
632
665
 
633
- export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type FailoverEvent, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse };
666
+ /**
667
+ * Coarse, stable error class for a failed gateway call — keyed off the gateway's
668
+ * OWN failure taxonomy (`error.code` + `terminal` + the failover breakdown), NOT
669
+ * raw HTTP-status guessing. Branch on this instead of reinventing a classifier
670
+ * per consumer (which inevitably drifts).
671
+ *
672
+ * rate_limit — the gateway's whole cascade was rate-limited (429
673
+ * `cascade_exhausted`). Retry later (`retryAfter`).
674
+ * token_cap — a per-TENANT cap was hit (plan/monthly/host/claw token
675
+ * or image-credit limit). TERMINAL for this billing
676
+ * window — a different model won't help.
677
+ * schema_too_complex — every candidate rejected the `response_format.json_schema`
678
+ * as too complex for its constrained-decoding engine.
679
+ * TERMINAL: simplify the schema or drop to `json_object`.
680
+ * invalid_request — malformed payload (400/422) every model rejected. TERMINAL.
681
+ * auth — bad/missing API key (401/403). TERMINAL.
682
+ * model_unavailable — a strict-pinned model is on cooldown / unconfigured (503).
683
+ * Not terminal: drop the pin or pick another model.
684
+ * timeout — the request (or a single vendor attempt) timed out (408).
685
+ * service_unavailable — infrastructure ceiling (503 `worker_subrequest_exhausted`)
686
+ * or transient upstream outage (5xx). Retry after a backoff.
687
+ * content_filter — a safety system blocked the generation.
688
+ * network — the request never reached the gateway (DNS/TLS/reset).
689
+ * aborted — the caller's AbortSignal fired (499 / AbortError).
690
+ * unknown — none of the above matched.
691
+ */
692
+ type ErrorKind = 'rate_limit' | 'token_cap' | 'schema_too_complex' | 'invalid_request' | 'auth' | 'model_unavailable' | 'timeout' | 'service_unavailable' | 'content_filter' | 'network' | 'aborted' | 'unknown';
693
+ interface ErrorClassification {
694
+ kind: ErrorKind;
695
+ /**
696
+ * `true` when retrying the SAME request on a DIFFERENT model will NOT help —
697
+ * the consumer's own failover chain should short-circuit. Sourced from the
698
+ * gateway's `error.terminal` flag when present, with a kind-based fallback.
699
+ */
700
+ terminal: boolean;
701
+ /**
702
+ * `true` when the SAME request is safe to retry as-is (idempotently), usually
703
+ * after `retryAfter` seconds — e.g. a transient rate-limit/outage/timeout.
704
+ * `false` for deterministic rejections (schema, invalid request, auth, caps).
705
+ */
706
+ retryable: boolean;
707
+ /** Seconds the caller should wait before retrying, when the gateway supplied it. */
708
+ retryAfter?: number;
709
+ /** HTTP status, when the error reached the gateway. */
710
+ status?: number;
711
+ /** Gateway error code slug, when present (`schema_too_complex`, `plan_token_limit_exceeded`, …). */
712
+ code?: string;
713
+ /** Human-readable message (the gateway's, or the thrown error's). */
714
+ message: string;
715
+ }
716
+ /**
717
+ * Classify any caught error from a Builderforce SDK call into a structured,
718
+ * actionable verdict. Accepts `unknown` so a consumer can pass a raw `catch`
719
+ * binding — non-`BuilderforceApiError` values (network throws, `AbortError`,
720
+ * plain `Error`) are classified too.
721
+ *
722
+ * This is the FIRST-PARTY classifier the gateway feedback asked for: keyed off
723
+ * the gateway's own taxonomy so every consumer agrees on what "terminal" and
724
+ * "retryable" mean instead of hand-rolling `429/408/401/5xx → kind` guesses that
725
+ * drift apart.
726
+ */
727
+ declare function classifyError(err: unknown): ErrorClassification;
728
+
729
+ /**
730
+ * deriveResponseFormat — pick the strongest `response_format` a request can SAFELY
731
+ * use given how complex its JSON-Schema is and (optionally) which vendor will
732
+ * serve it.
733
+ *
734
+ * The problem this solves: a strict `json_schema` gives the best conformance, but
735
+ * some vendors' constrained-decoding engines reject a schema that's too complex
736
+ * (Gemini's "too many states for serving"). The gateway now surfaces that as a
737
+ * terminal `schema_too_complex` error — but the cleaner fix is to NOT send a
738
+ * strict schema a vendor can't honour in the first place. This utility is the
739
+ * pre-flight guard: it emits `{ type: 'json_schema', strict }` when the schema is
740
+ * within the (vendor-specific or conservative-default) complexity ceiling, and
741
+ * falls back to `{ type: 'json_object' }` (loose JSON mode — universally
742
+ * supported) when it isn't.
743
+ *
744
+ * The SDK is zero-dependency, so this takes a plain JSON-Schema object — convert
745
+ * a Zod schema first with `zod-to-json-schema` (`deriveResponseFormat(zodToJsonSchema(MySchema), …)`).
746
+ */
747
+ interface DeriveResponseFormatOptions {
748
+ /** Schema name sent as `json_schema.name` (default `'response'`). */
749
+ name?: string;
750
+ /** Set `json_schema.strict` when a strict schema is emitted (default `true`). */
751
+ strict?: boolean;
752
+ /**
753
+ * The vendor that will serve the request, when known (the consumer pinned a
754
+ * `model`). Selects that vendor's specific complexity ceiling. Omit when
755
+ * routing is gateway-owned — the conservative default ceiling (the lowest
756
+ * common denominator across vendors) is used so the schema is accepted
757
+ * whichever vendor the gateway picks.
758
+ */
759
+ vendor?: string;
760
+ /**
761
+ * Override the complexity ceiling (max schema "nodes"; see
762
+ * {@link estimateSchemaComplexity}). Above this, loose `json_object` is emitted.
763
+ * Wins over the vendor/default ceiling.
764
+ */
765
+ maxComplexity?: number;
766
+ }
767
+ interface SchemaComplexity {
768
+ /** Total schema nodes — every property, array `items`, and enum value counts one. */
769
+ nodes: number;
770
+ /** Deepest nesting level reached. */
771
+ maxDepth: number;
772
+ /** Total enum values across the whole schema (the main driver of constrained-
773
+ * decoding state blow-up). */
774
+ totalEnumValues: number;
775
+ /** Single rolled-up score compared against the ceiling: `nodes + totalEnumValues`. */
776
+ score: number;
777
+ }
778
+ /**
779
+ * Conservative cross-vendor ceiling, used when no `vendor` is supplied (gateway-
780
+ * owned routing). Tuned below the lowest-ceiling vendor (Gemini's constrained-
781
+ * decoding "too many states" limit) so a schema that passes here is accepted by
782
+ * ANY vendor the gateway might route to.
783
+ */
784
+ declare const DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
785
+ /**
786
+ * Estimate a JSON-Schema's complexity. The dominant cost for constrained decoding
787
+ * is the number of distinct states the engine must track, which grows with the
788
+ * node count and (especially) the total number of enum values. Pure + cheap.
789
+ */
790
+ declare function estimateSchemaComplexity(schema: unknown): SchemaComplexity;
791
+ /**
792
+ * True when a strict `json_schema` is safe for the given schema + vendor (i.e.
793
+ * within the complexity ceiling, and the vendor isn't strict-schema-incapable).
794
+ * Exposed so callers can branch (e.g. log a downgrade) without re-deriving.
795
+ */
796
+ declare function canUseStrictSchema(schema: unknown, opts?: DeriveResponseFormatOptions): boolean;
797
+ /**
798
+ * Derive the strongest safe `response_format`:
799
+ * • within the ceiling → `{ type: 'json_schema', json_schema: { name, schema, strict } }`
800
+ * • over the ceiling → `{ type: 'json_object' }` (loose JSON; instruct the
801
+ * model to follow the shape in your prompt)
802
+ *
803
+ * Pure — returns a value the consumer drops straight into
804
+ * `chat.completions.create({ response_format })`.
805
+ */
806
+ declare function deriveResponseFormat(schema: Record<string, unknown>, opts?: DeriveResponseFormatOptions): ResponseFormat;
807
+
808
+ export { type AiCapability, BuilderforceApiError, BuilderforceClient, type BuilderforceClientOptions, type ChatCompletionChunk, type ChatCompletionCreateParams, type ChatCompletionResponse, ChatCompletionStream, type ChatMessage, type ChatRole, type ContentPart, DEFAULT_SCHEMA_COMPLEXITY_CEILING, type DeriveResponseFormatOptions, type EmbeddingObject, EmbeddingsApi, type EmbeddingsCreateParams, type EmbeddingsResponse, type ErrorClassification, type ErrorKind, type FailoverEvent, type FailoverKind, type FunctionDefinition, type ImageGenerationCreateParams, type ImageGenerationDataEntry, type ImageGenerationResponse, type ImageUrlContentPart, ImagesApi, type JsonSchemaSpec, type ModelInfo, ModelsApi, type ModelsListResponse, type PerCallOptions, type ResponseFormat, type SchemaComplexity, type TextContentPart, type ToolCall, type ToolCallDelta, type ToolCallFunction, type ToolChoice, type ToolSpec, type UsageByDay, type UsageByModel, type UsageByUser, type UsageGetParams, type UsageResponse, canUseStrictSchema, classifyError, deriveResponseFormat, estimateSchemaComplexity };
package/dist/index.mjs CHANGED
@@ -258,7 +258,9 @@ var BuilderforceApiError = class extends Error {
258
258
  vendor: e.vendor,
259
259
  code: e.code,
260
260
  ...typeof ev.durationMs === "number" ? { durationMs: ev.durationMs } : {},
261
- ...typeof ev.kind === "string" ? { kind: ev.kind } : {}
261
+ ...typeof ev.kind === "string" ? { kind: ev.kind } : {},
262
+ ...typeof ev.reason === "string" ? { reason: ev.reason } : {},
263
+ ...typeof ev.upstreamStatus === "number" ? { upstreamStatus: ev.upstreamStatus } : {}
262
264
  });
263
265
  }
264
266
  }
@@ -437,12 +439,173 @@ var BuilderforceClient = class {
437
439
  this.usage = new UsageApi(http);
438
440
  }
439
441
  };
442
+
443
+ // src/application/classifyError.ts
444
+ var TOKEN_CAP_CODES = /* @__PURE__ */ new Set([
445
+ "plan_token_limit_exceeded",
446
+ "plan_monthly_token_limit_exceeded",
447
+ "agent_host_token_limit_exceeded",
448
+ "claw_token_limit_exceeded",
449
+ "image_credit_limit_exceeded"
450
+ ]);
451
+ function classifyError(err) {
452
+ if (!(err instanceof BuilderforceApiError)) {
453
+ const name = err?.name;
454
+ const message2 = err instanceof Error ? err.message : String(err);
455
+ if (name === "AbortError") {
456
+ return { kind: "aborted", terminal: true, retryable: false, message: message2 };
457
+ }
458
+ if (err instanceof TypeError) {
459
+ return { kind: "network", terminal: false, retryable: true, message: message2 };
460
+ }
461
+ return { kind: "unknown", terminal: false, retryable: false, message: message2 };
462
+ }
463
+ const { status, code, terminal, retryAfter, message } = err;
464
+ const base = {
465
+ ...retryAfter !== void 0 ? { retryAfter } : {},
466
+ ...status !== void 0 ? { status } : {},
467
+ ...code !== void 0 ? { code } : {},
468
+ message
469
+ };
470
+ if (code === "schema_too_complex" || lastFailoverReason(err) === "schema_too_complex") {
471
+ return { kind: "schema_too_complex", terminal: terminal ?? true, retryable: false, ...base };
472
+ }
473
+ if (code && TOKEN_CAP_CODES.has(code)) {
474
+ return { kind: "token_cap", terminal: terminal ?? true, retryable: false, ...base };
475
+ }
476
+ if (code === "model_unavailable") {
477
+ return { kind: "model_unavailable", terminal: terminal ?? false, retryable: false, ...base };
478
+ }
479
+ if (code === "worker_subrequest_exhausted") {
480
+ return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
481
+ }
482
+ if (code === "aborted") {
483
+ return { kind: "aborted", terminal: true, retryable: false, ...base };
484
+ }
485
+ if (code === "content_filter") {
486
+ return { kind: "content_filter", terminal: terminal ?? true, retryable: false, ...base };
487
+ }
488
+ if (status === 408 || code === "timeout") {
489
+ return { kind: "timeout", terminal: false, retryable: true, ...base };
490
+ }
491
+ if (status === 401 || status === 403) {
492
+ return { kind: "auth", terminal: terminal ?? true, retryable: false, ...base };
493
+ }
494
+ if (status === 429) {
495
+ return { kind: "rate_limit", terminal: terminal ?? false, retryable: !(terminal ?? false), ...base };
496
+ }
497
+ if (status === 400 || status === 422) {
498
+ return { kind: "invalid_request", terminal: terminal ?? true, retryable: false, ...base };
499
+ }
500
+ if (status === 503) {
501
+ return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
502
+ }
503
+ if (status !== void 0 && status >= 500) {
504
+ return { kind: "service_unavailable", terminal: false, retryable: true, ...base };
505
+ }
506
+ return { kind: "unknown", terminal: terminal ?? false, retryable: false, ...base };
507
+ }
508
+ function lastFailoverReason(err) {
509
+ const f = err.failovers;
510
+ if (!f || f.length === 0) return void 0;
511
+ return f[f.length - 1]?.reason;
512
+ }
513
+
514
+ // src/application/deriveResponseFormat.ts
515
+ var DEFAULT_SCHEMA_COMPLEXITY_CEILING = 80;
516
+ var VENDOR_SCHEMA_CEILINGS = {
517
+ // Low constrained-decoding ceiling — the vendor that motivated this guard.
518
+ googleai: 60,
519
+ google: 60,
520
+ gemini: 60,
521
+ // High-ceiling, robust strict-schema vendors.
522
+ openai: 600,
523
+ anthropic: 600,
524
+ cerebras: 300,
525
+ nvidia: 300,
526
+ openrouter: 200
527
+ };
528
+ var MAX_SCHEMA_WALK_DEPTH = 64;
529
+ function estimateSchemaComplexity(schema) {
530
+ let nodes = 0;
531
+ let totalEnumValues = 0;
532
+ let maxDepth = 0;
533
+ const walk = (node, depth) => {
534
+ if (depth > MAX_SCHEMA_WALK_DEPTH || node === null || typeof node !== "object") return;
535
+ if (depth > maxDepth) maxDepth = depth;
536
+ const s = node;
537
+ const enumVals = s["enum"];
538
+ if (Array.isArray(enumVals)) totalEnumValues += enumVals.length;
539
+ const props = s["properties"];
540
+ if (props && typeof props === "object") {
541
+ for (const key of Object.keys(props)) {
542
+ nodes += 1;
543
+ walk(props[key], depth + 1);
544
+ }
545
+ }
546
+ const items = s["items"];
547
+ if (Array.isArray(items)) items.forEach((it) => {
548
+ nodes += 1;
549
+ walk(it, depth + 1);
550
+ });
551
+ else if (items && typeof items === "object") {
552
+ nodes += 1;
553
+ walk(items, depth + 1);
554
+ }
555
+ for (const comb of ["anyOf", "oneOf", "allOf"]) {
556
+ const arr = s[comb];
557
+ if (Array.isArray(arr)) arr.forEach((sub) => {
558
+ nodes += 1;
559
+ walk(sub, depth + 1);
560
+ });
561
+ }
562
+ for (const defsKey of ["$defs", "definitions"]) {
563
+ const defs = s[defsKey];
564
+ if (defs && typeof defs === "object") {
565
+ for (const key of Object.keys(defs)) walk(defs[key], depth + 1);
566
+ }
567
+ }
568
+ };
569
+ walk(schema, 0);
570
+ return { nodes, maxDepth, totalEnumValues, score: nodes + totalEnumValues };
571
+ }
572
+ function ceilingFor(opts) {
573
+ if (opts?.maxComplexity != null && opts.maxComplexity >= 0) return opts.maxComplexity;
574
+ if (opts?.vendor) {
575
+ const v = opts.vendor.toLowerCase();
576
+ if (v in VENDOR_SCHEMA_CEILINGS) return VENDOR_SCHEMA_CEILINGS[v];
577
+ }
578
+ return DEFAULT_SCHEMA_COMPLEXITY_CEILING;
579
+ }
580
+ function canUseStrictSchema(schema, opts) {
581
+ const ceiling = ceilingFor(opts);
582
+ if (ceiling === 0) return false;
583
+ return estimateSchemaComplexity(schema).score <= ceiling;
584
+ }
585
+ function deriveResponseFormat(schema, opts) {
586
+ if (!canUseStrictSchema(schema, opts)) {
587
+ return { type: "json_object" };
588
+ }
589
+ return {
590
+ type: "json_schema",
591
+ json_schema: {
592
+ name: opts?.name ?? "response",
593
+ schema,
594
+ strict: opts?.strict ?? true
595
+ }
596
+ };
597
+ }
440
598
  export {
441
599
  BuilderforceApiError,
442
600
  BuilderforceClient,
443
601
  ChatCompletionStream,
602
+ DEFAULT_SCHEMA_COMPLEXITY_CEILING,
444
603
  EmbeddingsApi,
445
604
  ImagesApi,
446
- ModelsApi
605
+ ModelsApi,
606
+ canUseStrictSchema,
607
+ classifyError,
608
+ deriveResponseFormat,
609
+ estimateSchemaComplexity
447
610
  };
448
611
  //# sourceMappingURL=index.mjs.map