@ohm_studio/sdk-core 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -1,9 +1,48 @@
1
- import { OHMAbortError, OHMAuthError, OHMConfigError, OHMError, OHMRateLimitError, OHMServerError, OHMValidationError, } from "./errors";
1
+ import { OHMAbortError, OHMAuthError, OHMConfigError, OHMError, OHMNetworkError, OHMNotFoundError, OHMQuotaExceededError, OHMRateLimitError, OHMServerError, OHMTimeoutError, OHMValidationError, } from "./errors";
2
2
  import { backoffMs, isRetriableStatus } from "./retry";
3
3
  import { mockResponseFor } from "./mock";
4
4
  const DEFAULT_BASE_URL = "https://api.ohm.doctor";
5
5
  const DEFAULT_TIMEOUT_MS = 60_000;
6
6
  const DEFAULT_MAX_RETRIES = 2;
7
+ /**
8
+ * Best-effort `User-Agent` for Node. Browsers + RN reject custom UA
9
+ * via fetch (forbidden header), so we only set it on Node-flavoured
10
+ * runtimes. Empty string disables the header entirely.
11
+ */
12
+ function buildUserAgent(sdkVersion) {
13
+ // @ts-ignore — runtime probe
14
+ const proc = typeof process !== "undefined" ? process : undefined;
15
+ if (proc?.versions?.node && !proc.versions.bun) {
16
+ return `ohm-sdk/${sdkVersion} (node/${proc.versions.node}; ${proc.platform} ${proc.arch})`;
17
+ }
18
+ return "";
19
+ }
20
+ /** Generate a UUID v4. Uses native crypto.randomUUID() when available. */
21
+ function uuidv4() {
22
+ // Node 16+, modern browsers, RN 0.71+ all ship crypto.randomUUID.
23
+ const g = globalThis;
24
+ if (g.crypto?.randomUUID)
25
+ return g.crypto.randomUUID();
26
+ // Fallback for very old hosts — Math.random is acceptable for an
27
+ // idempotency key (server only uses it as a dedupe token, not a secret).
28
+ return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
29
+ const r = (Math.random() * 16) | 0;
30
+ const v = c === "x" ? r : (r & 0x3) | 0x8;
31
+ return v.toString(16);
32
+ });
33
+ }
34
+ /** Safe-fire a hook; swallow exceptions so user bugs don't break requests. */
35
+ function safeHook(fn, arg) {
36
+ if (!fn)
37
+ return;
38
+ try {
39
+ fn(arg);
40
+ }
41
+ catch (err) {
42
+ // eslint-disable-next-line no-console
43
+ console.warn("[ohm-sdk] hook threw — swallowing:", err);
44
+ }
45
+ }
7
46
  /**
8
47
  * Platform-agnostic core client. Subclasses (sdk-js, sdk-react-native)
9
48
  * supply the platform-specific multipart/audio adapter via the `attachAudio`
@@ -14,11 +53,19 @@ export class OHMCoreClient {
14
53
  apiKey;
15
54
  jwt;
16
55
  timeoutMs;
56
+ totalTimeoutMs;
17
57
  maxRetries;
18
58
  fetchImpl;
19
59
  onUsage;
60
+ hooks;
61
+ disableAutoIdempotency;
62
+ userAgent;
20
63
  _mock;
21
64
  _mockResponses;
65
+ /** Cached options for `withOverrides`. */
66
+ _opts;
67
+ /** SDK version stamped on `X-OHM-Client` + `User-Agent`. */
68
+ static SDK_VERSION = "0.8.0";
22
69
  constructor(init) {
23
70
  // Accept either a bare `ohms_live_…` string (`new OHM("…")`) or the
24
71
  // full options object. Most customers want the one-liner.
@@ -30,16 +77,71 @@ export class OHMCoreClient {
30
77
  message: "OHM client requires an apiKey. Pass it as `new OHM('ohms_live_…')` or `new OHM({ apiKey: '…' })`.",
31
78
  });
32
79
  }
80
+ this._opts = opts;
33
81
  this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, "");
34
82
  this.apiKey = opts.apiKey;
35
83
  this.jwt = opts.jwt;
36
84
  this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
85
+ this.totalTimeoutMs = opts.totalTimeoutMs;
37
86
  this.maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES;
38
87
  this.fetchImpl = opts.fetch ?? globalThis.fetch.bind(globalThis);
39
88
  this.onUsage = opts.onUsage;
89
+ this.hooks = opts.hooks;
90
+ this.disableAutoIdempotency = !!opts.disableAutoIdempotency;
91
+ this.userAgent = buildUserAgent(OHMCoreClient.SDK_VERSION);
40
92
  this._mock = !!opts.mock;
41
93
  this._mockResponses = opts.mockResponses;
42
94
  }
95
+ /**
96
+ * Returns the SDK version string. Useful when forwarding the SDK
97
+ * version to your own telemetry pipeline.
98
+ */
99
+ static getVersion() {
100
+ return OHMCoreClient.SDK_VERSION;
101
+ }
102
+ /**
103
+ * Returns a new client with overridden options for one call. The
104
+ * underlying auth + base URL are inherited; you typically only
105
+ * override `timeoutMs` / `maxRetries` / `totalTimeoutMs` for a
106
+ * single known-slow call.
107
+ *
108
+ * @example
109
+ * const slow = ohm.withOverrides({ timeoutMs: 5 * 60_000 });
110
+ * await slow.audio.extract({ apiSlug, file: bigAudio });
111
+ */
112
+ withOverrides(overrides) {
113
+ // Build a new instance of the same subclass with merged options.
114
+ // Cast through `unknown` keeps the static type but uses the
115
+ // runtime constructor (sdk-js / sdk-rn / etc).
116
+ const Ctor = this.constructor;
117
+ return new Ctor({ ...this._opts, ...overrides });
118
+ }
119
+ /**
120
+ * Establish a TCP/TLS connection to the API ahead of the first real
121
+ * call. Drops cold-start latency from ~500 ms to ~150 ms on real-world
122
+ * mobile networks. Safe to call multiple times; no-op in mock mode.
123
+ *
124
+ * const ohm = new OHM({ apiKey });
125
+ * void ohm.warmUp(); // fire-and-forget at app boot
126
+ * // ...
127
+ * await ohm.extract({ ... }); // already-warm connection
128
+ */
129
+ async warmUp() {
130
+ if (this._mock)
131
+ return;
132
+ try {
133
+ await this.fetchImpl(`${this.baseUrl}/api/health`, {
134
+ method: "GET",
135
+ // Short, non-retried, abort-able. We don't care about the
136
+ // response body — only that the TLS handshake completes.
137
+ signal: AbortSignal.timeout(5_000),
138
+ });
139
+ }
140
+ catch {
141
+ // Warm-up is best-effort. If the network is offline the real
142
+ // call will surface its own OHMNetworkError.
143
+ }
144
+ }
43
145
  /**
44
146
  * Audio surface — speech-to-text and audio-to-structured-JSON.
45
147
  * Subclasses fill in the platform-specific multipart adapter (browser
@@ -125,7 +227,7 @@ export class OHMCoreClient {
125
227
  * step finishes. Backend uses Server-Sent Events.
126
228
  *
127
229
  * @example
128
- * const stream = ohm.audio.extract.stream({ apiSlug, file });
230
+ * const stream = ohm.audio.extractStream({ apiSlug, file });
129
231
  * for await (const chunk of stream) {
130
232
  * if (chunk.type === "transcript") setT(chunk.transcript);
131
233
  * if (chunk.type === "data") setData(chunk.data);
@@ -270,9 +372,14 @@ export class OHMCoreClient {
270
372
  * CANCELLED — caller checks status). Never returns mid-state.
271
373
  */
272
374
  poll: async (jobId, options = {}) => {
273
- const intervalMs = options.intervalMs ?? 2000;
375
+ const initialIntervalMs = options.intervalMs ?? 2000;
376
+ const maxIntervalMs = options.maxIntervalMs ?? 30_000;
274
377
  const maxWaitMs = options.maxWaitMs ?? 15 * 60_000;
275
378
  const start = Date.now();
379
+ // Exponential backoff capped at maxIntervalMs — protects the
380
+ // worker from a chatty client when a job stays PROCESSING for
381
+ // 10+ minutes. Grows 1.5× per poll: 2 → 3 → 4.5 → ... → 30.
382
+ let interval = initialIntervalMs;
276
383
  // eslint-disable-next-line no-constant-condition
277
384
  while (true) {
278
385
  if (options.signal?.aborted)
@@ -292,7 +399,8 @@ export class OHMCoreClient {
292
399
  status: 0,
293
400
  });
294
401
  }
295
- await new Promise((r) => setTimeout(r, intervalMs));
402
+ await new Promise((r) => setTimeout(r, interval));
403
+ interval = Math.min(maxIntervalMs, Math.round(interval * 1.5));
296
404
  }
297
405
  },
298
406
  },
@@ -353,6 +461,67 @@ export class OHMCoreClient {
353
461
  idempotencyKey: input.idempotencyKey,
354
462
  });
355
463
  }
464
+ /**
465
+ * Bulk-extract a batch of text inputs concurrently. Partial failures
466
+ * do NOT fail the batch — each input gets a discriminated-union
467
+ * result (`{ ok: true, data }` or `{ ok: false, error, input }`).
468
+ *
469
+ * Use when replaying historical transcripts, batch-tagging lab
470
+ * reports, or anywhere "10 000 of these need to extract this week".
471
+ *
472
+ * Default concurrency is 4 — enough to amortise network round-trips
473
+ * without blowing the per-key rate limit. Pass a higher cap when you
474
+ * know your key's quota is generous.
475
+ *
476
+ * @example
477
+ * const results = await ohm.extractBulk(transcripts.map(t => ({
478
+ * apiSlug: "opd-clinic",
479
+ * text: t,
480
+ * })), {
481
+ * concurrency: 8,
482
+ * onProgress: (done, total) => console.log(`${done}/${total}`),
483
+ * });
484
+ * const errored = results.filter(r => !r.ok);
485
+ */
486
+ async extractBulk(inputs, options = {}) {
487
+ const concurrency = Math.max(1, options.concurrency ?? 4);
488
+ const results = new Array(inputs.length);
489
+ let cursor = 0;
490
+ let done = 0;
491
+ const total = inputs.length;
492
+ const signal = options.signal;
493
+ const worker = async () => {
494
+ while (cursor < total) {
495
+ if (signal?.aborted) {
496
+ throw new OHMAbortError();
497
+ }
498
+ const idx = cursor++;
499
+ const input = inputs[idx];
500
+ try {
501
+ const data = await this.extract({ ...input, signal });
502
+ results[idx] = { ok: true, data };
503
+ }
504
+ catch (err) {
505
+ results[idx] = {
506
+ ok: false,
507
+ error: err instanceof Error ? err : new Error(String(err)),
508
+ input,
509
+ };
510
+ }
511
+ finally {
512
+ done++;
513
+ try {
514
+ options.onProgress?.(done, total);
515
+ }
516
+ catch {
517
+ /* ignore */
518
+ }
519
+ }
520
+ }
521
+ };
522
+ await Promise.all(Array.from({ length: Math.min(concurrency, total) }, () => worker()));
523
+ return results;
524
+ }
356
525
  /**
357
526
  * One-line convenience: pass a transcript, get back the data field.
358
527
  * Equivalent to `(await ohm.extract({ apiSlug, text })).data` — the most
@@ -519,7 +688,7 @@ export class OHMCoreClient {
519
688
  return this.requestRaw(method, path, init, options);
520
689
  }
521
690
  /**
522
- * Default SSE-based streaming for audio.extract.stream. Subclasses
691
+ * Default SSE-based streaming for audio.extractStream. Subclasses
523
692
  * override `runMultipart` to construct the FormData body for their
524
693
  * platform; this method reuses that body and parses an SSE event stream
525
694
  * off the response.
@@ -537,7 +706,7 @@ export class OHMCoreClient {
537
706
  headers.Authorization = `Bearer ${this.apiKey}`;
538
707
  else if (this.jwt)
539
708
  headers.Authorization = `Bearer ${this.jwt}`;
540
- headers["X-OHM-Client"] = "@ohm_studio/sdk-core@0.6.0";
709
+ headers["X-OHM-Client"] = "@ohm_studio/sdk-core@0.7.0";
541
710
  if (opts.idempotencyKey) {
542
711
  headers["Idempotency-Key"] = opts.idempotencyKey;
543
712
  }
@@ -614,6 +783,11 @@ export class OHMCoreClient {
614
783
  });
615
784
  }
616
785
  async requestRaw(method, path, init, options) {
786
+ // ── Resolve per-call overrides (or fall back to client defaults).
787
+ const perAttemptTimeout = options?.timeoutMs ?? this.timeoutMs;
788
+ const maxRetries = options?.maxRetries ?? this.maxRetries;
789
+ const totalDeadline = options?.totalTimeoutMs ?? this.totalTimeoutMs;
790
+ const deadlineAt = totalDeadline != null ? Date.now() + totalDeadline : undefined;
617
791
  const url = `${this.baseUrl}${path}`;
618
792
  const headers = new Headers(init.headers || {});
619
793
  if (this.apiKey)
@@ -621,13 +795,45 @@ export class OHMCoreClient {
621
795
  else if (this.jwt)
622
796
  headers.set("Authorization", `Bearer ${this.jwt}`);
623
797
  if (!headers.has("X-OHM-Client")) {
624
- headers.set("X-OHM-Client", "@ohm_studio/sdk-core@0.6.0");
798
+ headers.set("X-OHM-Client", `@ohm_studio/sdk-core@${OHMCoreClient.SDK_VERSION}`);
799
+ }
800
+ // User-Agent — Node only. Browsers + RN reject custom UA.
801
+ if (this.userAgent && !headers.has("User-Agent")) {
802
+ try {
803
+ headers.set("User-Agent", this.userAgent);
804
+ }
805
+ catch {
806
+ // Some hosts forbid setting User-Agent — swallow.
807
+ }
808
+ }
809
+ // ── Idempotency-Key: caller-supplied wins. Otherwise auto-generate
810
+ // for unsafe methods (POST/PATCH/PUT/DELETE) unless explicitly
811
+ // disabled. `null` from the caller is an explicit opt-out.
812
+ const isUnsafe = method === "POST" ||
813
+ method === "PATCH" ||
814
+ method === "PUT" ||
815
+ method === "DELETE";
816
+ let idempotencyKey;
817
+ if (options?.idempotencyKey === null) {
818
+ // explicit opt-out — leave header off
625
819
  }
626
- // Idempotency-Key server short-circuits same-key retries within
627
- // 24 h to the cached response. Stripe / Twilio convention.
628
- if (options?.idempotencyKey) {
629
- headers.set("Idempotency-Key", options.idempotencyKey);
820
+ else if (typeof options?.idempotencyKey === "string") {
821
+ idempotencyKey = options.idempotencyKey;
630
822
  }
823
+ else if (isUnsafe && !this.disableAutoIdempotency) {
824
+ idempotencyKey = uuidv4();
825
+ }
826
+ if (idempotencyKey) {
827
+ headers.set("Idempotency-Key", idempotencyKey);
828
+ }
829
+ // ── Body inspection for keepalive eligibility. Browser fetch
830
+ // enforces a 64 KB cap on keepalive bodies; we play it safe at
831
+ // 60 KB and skip multipart bodies entirely.
832
+ const bodyAsString = typeof init.body === "string" ? init.body : "";
833
+ const keepaliveEligible = isUnsafe &&
834
+ bodyAsString.length > 0 &&
835
+ bodyAsString.length < 60_000 &&
836
+ headers.get("content-type")?.includes("application/json") === true;
631
837
  // Caller-supplied signal short-circuits before any work is started —
632
838
  // matches DOM fetch() semantics and avoids burning a retry on an
633
839
  // already-cancelled request.
@@ -635,25 +841,45 @@ export class OHMCoreClient {
635
841
  throw new OHMAbortError();
636
842
  }
637
843
  let lastError;
638
- for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
844
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
845
+ // ── Deadline check before each attempt.
846
+ if (deadlineAt != null && Date.now() >= deadlineAt) {
847
+ throw new OHMTimeoutError({
848
+ message: `Total request deadline (${totalDeadline}ms) exceeded after ${attempt} attempt(s)`,
849
+ });
850
+ }
851
+ // ── Per-attempt timeout: never exceed the remaining deadline.
852
+ const remaining = deadlineAt != null ? Math.max(50, deadlineAt - Date.now()) : Infinity;
853
+ const attemptTimeout = Math.min(perAttemptTimeout, remaining);
639
854
  const ac = new AbortController();
640
- const t = setTimeout(() => ac.abort(), this.timeoutMs);
641
- // Bridge the caller's signal into our internal AbortController so
642
- // either source (timeout, user cancel) trips the same fetch abort.
855
+ const t = setTimeout(() => ac.abort(), attemptTimeout);
643
856
  let onCallerAbort;
644
857
  if (options?.signal) {
645
858
  onCallerAbort = () => ac.abort();
646
859
  options.signal.addEventListener("abort", onCallerAbort, { once: true });
647
860
  }
861
+ safeHook(this.hooks?.onRequest, {
862
+ method,
863
+ url,
864
+ attempt,
865
+ idempotencyKey,
866
+ });
648
867
  const tStart = Date.now();
649
868
  try {
650
- const res = await this.fetchImpl(url, {
869
+ const fetchInit = {
651
870
  ...init,
652
871
  method,
653
872
  headers,
654
873
  signal: ac.signal,
655
- });
874
+ };
875
+ if (keepaliveEligible) {
876
+ fetchInit.keepalive = true;
877
+ }
878
+ const res = await this.fetchImpl(url, fetchInit);
656
879
  const latencyMs = Date.now() - tStart;
880
+ const requestId = res.headers.get("x-request-id") ||
881
+ res.headers.get("x-ohm-request-id") ||
882
+ undefined;
657
883
  this.onUsage?.({
658
884
  endpoint: path,
659
885
  method,
@@ -662,30 +888,78 @@ export class OHMCoreClient {
662
888
  latencyMs,
663
889
  retries: attempt,
664
890
  });
891
+ safeHook(this.hooks?.onResponse, {
892
+ method,
893
+ url,
894
+ status: res.status,
895
+ ok: res.ok,
896
+ attempt,
897
+ latencyMs,
898
+ requestId,
899
+ });
665
900
  if (res.ok) {
666
- // 204 no-content tolerance
667
901
  if (res.status === 204)
668
902
  return undefined;
669
903
  return (await res.json());
670
904
  }
671
- if (isRetriableStatus(res.status) && attempt < this.maxRetries) {
672
- await sleep(backoffMs(attempt));
905
+ if (isRetriableStatus(res.status) && attempt < maxRetries) {
906
+ const sleepMs = Math.min(backoffMs(attempt), deadlineAt != null ? Math.max(0, deadlineAt - Date.now() - 50) : Infinity);
907
+ if (deadlineAt != null && sleepMs <= 0) {
908
+ // No headroom for another attempt — fail now rather than
909
+ // sleeping into a guaranteed deadline-exceeded outcome.
910
+ throw new OHMTimeoutError({
911
+ message: `Total request deadline (${totalDeadline}ms) would be exceeded by retry sleep`,
912
+ });
913
+ }
914
+ await sleep(sleepMs);
673
915
  continue;
674
916
  }
675
917
  throw await this.parseError(res);
676
918
  }
677
919
  catch (err) {
678
920
  clearTimeout(t);
921
+ // Hooks first — even for errors we want to surface them to
922
+ // observability before we throw or retry.
923
+ const willRetryDecision = (() => {
924
+ if (err instanceof OHMError)
925
+ return false;
926
+ if (options?.signal?.aborted)
927
+ return false;
928
+ const e = err;
929
+ if (e?.name === "AbortError" || e?.code === "ABORT_ERR")
930
+ return false;
931
+ return attempt < maxRetries;
932
+ })();
933
+ safeHook(this.hooks?.onError, {
934
+ method,
935
+ url,
936
+ attempt,
937
+ error: err instanceof Error ? err : new Error(String(err)),
938
+ willRetry: willRetryDecision,
939
+ });
679
940
  if (err instanceof OHMError)
680
941
  throw err;
681
- // If the caller cancelled, surface that immediately — don't
682
- // burn retries on a request the user no longer wants.
683
942
  if (options?.signal?.aborted) {
684
943
  throw new OHMAbortError();
685
944
  }
945
+ const e = err;
946
+ if (e?.name === "AbortError" || e?.code === "ABORT_ERR") {
947
+ // Distinguish total-deadline timeout from per-attempt timeout
948
+ // in the error message — easier triage in support tickets.
949
+ const msg = deadlineAt != null && Date.now() >= deadlineAt
950
+ ? `Total request deadline (${totalDeadline}ms) exceeded`
951
+ : `Request timed out after ${attemptTimeout}ms`;
952
+ throw new OHMTimeoutError({ message: msg });
953
+ }
686
954
  lastError = err;
687
- if (attempt < this.maxRetries) {
688
- await sleep(backoffMs(attempt));
955
+ if (attempt < maxRetries) {
956
+ const sleepMs = Math.min(backoffMs(attempt), deadlineAt != null ? Math.max(0, deadlineAt - Date.now() - 50) : Infinity);
957
+ if (deadlineAt != null && sleepMs <= 0) {
958
+ throw new OHMTimeoutError({
959
+ message: `Total request deadline (${totalDeadline}ms) would be exceeded by retry sleep`,
960
+ });
961
+ }
962
+ await sleep(sleepMs);
689
963
  continue;
690
964
  }
691
965
  }
@@ -698,43 +972,109 @@ export class OHMCoreClient {
698
972
  }
699
973
  if (lastError instanceof OHMError)
700
974
  throw lastError;
701
- throw new OHMServerError({
975
+ // After all retries exhausted, lastError is a transport-layer
976
+ // failure (DNS / TCP / TLS / dropped connection). Surface as
977
+ // OHMNetworkError so customers can pattern-match for "queue
978
+ // locally" behaviour.
979
+ throw new OHMNetworkError({
702
980
  message: lastError?.message || "Request failed after retries",
703
- status: 0,
704
981
  });
705
982
  }
706
983
  async parseError(res) {
707
984
  let body = {};
985
+ let rawBody;
708
986
  try {
709
- body = await res.json();
987
+ const text = await res.text();
988
+ rawBody = text;
989
+ if (text) {
990
+ try {
991
+ const parsed = JSON.parse(text);
992
+ body = parsed;
993
+ rawBody = parsed;
994
+ }
995
+ catch {
996
+ // body wasn't JSON; that's fine for the error path
997
+ }
998
+ }
710
999
  }
711
1000
  catch {
712
- // body wasn't JSON; that's fine for the error path
1001
+ // body wasn't readable; that's fine for the error path
713
1002
  }
1003
+ // Capture headers as a plain dict so customers can `console.log` or
1004
+ // forward to their telemetry without dragging a Headers instance.
1005
+ const responseHeaders = {};
1006
+ res.headers.forEach((value, key) => {
1007
+ responseHeaders[key.toLowerCase()] = value;
1008
+ });
714
1009
  const requestId = res.headers.get("x-request-id") || res.headers.get("x-ohm-request-id") || undefined;
715
1010
  const message = body?.message || `HTTP ${res.status}`;
1011
+ const base = {
1012
+ message,
1013
+ status: res.status,
1014
+ requestId,
1015
+ responseHeaders,
1016
+ responseBody: rawBody,
1017
+ };
1018
+ // 401 / 403 → auth
716
1019
  if (res.status === 401 || res.status === 403) {
717
- return new OHMAuthError({ message, status: res.status, requestId });
1020
+ return new OHMAuthError(base);
718
1021
  }
1022
+ // 404 → not found (slug, job id, …). Server may include
1023
+ // `availableSlugs` to power a customer-side picker.
1024
+ if (res.status === 404) {
1025
+ return new OHMNotFoundError({
1026
+ ...base,
1027
+ availableSlugs: body?.availableSlugs,
1028
+ });
1029
+ }
1030
+ // 422 / 400 → validation. Server provides a `fields[]` array of
1031
+ // failing JSON-Schema paths.
719
1032
  if (res.status === 422 || res.status === 400) {
720
1033
  return new OHMValidationError({
721
- message,
722
- status: res.status,
723
- requestId,
1034
+ ...base,
724
1035
  fields: body?.fields,
725
1036
  });
726
1037
  }
1038
+ // 429 → rate limit. Distinct from the org-wide quota class; this
1039
+ // is per-key rate limiting that resets every minute.
727
1040
  if (res.status === 429) {
1041
+ // Some providers signal quota exhaustion via 429 with a body
1042
+ // marker. When the server explicitly tags it as a quota issue,
1043
+ // surface it as the more specific class so customers can show
1044
+ // an upgrade-plan modal instead of a "slow down" toast.
1045
+ if (body?.code === "quota_exceeded" || body?.errorCode === "quota_exceeded") {
1046
+ return new OHMQuotaExceededError({
1047
+ ...base,
1048
+ resetAt: body?.resetAt,
1049
+ quotaKind: body?.quotaKind,
1050
+ });
1051
+ }
728
1052
  return new OHMRateLimitError({
729
- message,
730
- status: res.status,
731
- requestId,
1053
+ ...base,
732
1054
  retryAfterSec: Number(res.headers.get("retry-after")) ||
733
1055
  body?.retryAfterSec ||
734
1056
  undefined,
735
1057
  });
736
1058
  }
737
- return new OHMServerError({ message, status: res.status, requestId });
1059
+ // 402 → payment required (Stripe convention) quota.
1060
+ if (res.status === 402) {
1061
+ return new OHMQuotaExceededError({
1062
+ ...base,
1063
+ resetAt: body?.resetAt,
1064
+ quotaKind: body?.quotaKind,
1065
+ });
1066
+ }
1067
+ // 504 / gateway timeout → timeout class so customers can pattern
1068
+ // match for "give it another try" UX.
1069
+ if (res.status === 504 || res.status === 408) {
1070
+ return new OHMTimeoutError({
1071
+ message,
1072
+ status: res.status,
1073
+ responseHeaders,
1074
+ responseBody: rawBody,
1075
+ });
1076
+ }
1077
+ return new OHMServerError(base);
738
1078
  }
739
1079
  }
740
1080
  function sleep(ms) {