@halo-sdk/gateway 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # @halo-sdk/gateway
2
+
3
+ Cache-aware AI gateway for Halo SDK. A `ModelAdapter` that fronts a pool of providers with:
4
+
5
+ - **Sticky routing** — requests stay on the last provider that succeeded, so that provider's prefix cache stays warm. The gateway only moves off it on failure (not round-robin, which would cold-start a different cache every call).
6
+ - **Fallback chains** — on error it tries the remaining providers in order.
7
+
8
+ Every routing decision emits a `route` event onto the S5 observability spine.
9
+
10
+ ## Usage
11
+
12
+ ```ts
13
+ import { GatewayAdapter } from "@halo-sdk/gateway";
14
+ import { AnthropicAdapter, OpenAIAdapter } from "@halo-sdk/adapters";
15
+
16
+ const gateway = new GatewayAdapter({
17
+ routes: [
18
+ { name: "anthropic", adapter: new AnthropicAdapter({ apiKey: A }) },
19
+ { name: "openai", adapter: new OpenAIAdapter({ apiKey: O }) }, // fallback
20
+ ],
21
+ onEvent: (e) => console.log(e), // wire to an agent's event bus
22
+ });
23
+
24
+ const agent = halo.agent({ adapter: gateway /* ... */ });
25
+ ```
26
+
27
+ It composes with the `@halo-sdk/otel` decorators — wrap each route's adapter with `withRetry` / `withTelemetry` as needed.
28
+
29
+ ## Reserved extension point
30
+
31
+ The `middleware` option (`GatewayMiddleware`) is the designated integration seam for a forthcoming maintainer-defined capability. Implementations must emit their activity onto the S5 event stream. This interface will change when that capability lands.
package/dist/index.cjs ADDED
@@ -0,0 +1,115 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ GatewayAdapter: () => GatewayAdapter
24
+ });
25
+ module.exports = __toCommonJS(index_exports);
26
+ var import_core = require("@halo-sdk/core");
27
+ var GatewayAdapter = class {
28
+ _routes;
29
+ _onEvent;
30
+ _middleware;
31
+ _preferred = 0;
32
+ constructor(opts) {
33
+ if (!opts.routes.length) throw new Error("GatewayAdapter requires at least one route.");
34
+ this._routes = opts.routes;
35
+ this._onEvent = opts.onEvent;
36
+ this._middleware = opts.middleware;
37
+ }
38
+ // Surface the currently-preferred provider's capabilities/pricing/identity.
39
+ get _primary() {
40
+ return this._routes[this._preferred].adapter;
41
+ }
42
+ get modelId() {
43
+ return this._primary.modelId;
44
+ }
45
+ get contextWindow() {
46
+ return this._primary.contextWindow;
47
+ }
48
+ get capabilities() {
49
+ return this._primary.capabilities;
50
+ }
51
+ get pricing() {
52
+ return this._primary.pricing;
53
+ }
54
+ /** Provider order the gateway will try this call: preferred first, then the rest. */
55
+ _order() {
56
+ const order = [this._preferred];
57
+ for (let i = 0; i < this._routes.length; i++) if (i !== this._preferred) order.push(i);
58
+ return order;
59
+ }
60
+ _emitRoute(route, reason) {
61
+ this._onEvent?.({
62
+ type: "route",
63
+ ts: (0, import_core.eventNow)(),
64
+ provider: route.name,
65
+ model: route.adapter.modelId,
66
+ reason
67
+ });
68
+ }
69
+ async chat(params) {
70
+ let lastErr = new Error("no routes");
71
+ const order = this._order();
72
+ for (let oi = 0; oi < order.length; oi++) {
73
+ const idx = order[oi];
74
+ const route = this._routes[idx];
75
+ try {
76
+ await this._middleware?.beforeRequest?.({ route, params });
77
+ this._emitRoute(route, oi === 0 ? "sticky" : `fallback:${lastErr.message}`);
78
+ const result = await route.adapter.chat(params);
79
+ await this._middleware?.afterResponse?.({ route, usage: result.usage });
80
+ this._preferred = idx;
81
+ return result;
82
+ } catch (err) {
83
+ lastErr = err instanceof Error ? err : new Error(String(err));
84
+ }
85
+ }
86
+ throw lastErr;
87
+ }
88
+ async *stream(params) {
89
+ const order = this._order();
90
+ let lastErr = new Error("no routes");
91
+ for (let oi = 0; oi < order.length; oi++) {
92
+ const idx = order[oi];
93
+ const route = this._routes[idx];
94
+ try {
95
+ await this._middleware?.beforeRequest?.({ route, params });
96
+ const gen = route.adapter.stream(params);
97
+ const first = await gen.next();
98
+ this._emitRoute(route, oi === 0 ? "sticky" : `fallback:${lastErr.message}`);
99
+ this._preferred = idx;
100
+ if (!first.done) yield first.value;
101
+ yield* gen;
102
+ await this._middleware?.afterResponse?.({ route });
103
+ return;
104
+ } catch (err) {
105
+ lastErr = err instanceof Error ? err : new Error(String(err));
106
+ }
107
+ }
108
+ throw lastErr;
109
+ }
110
+ };
111
+ // Annotate the CommonJS export names for ESM import in node:
112
+ 0 && (module.exports = {
113
+ GatewayAdapter
114
+ });
115
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import {\n eventNow,\n type ChatParams,\n type HaloEvent,\n type ModelAdapter,\n type ModelCapabilities,\n type PricingInfo,\n type ToolCall,\n type TurnChunk,\n type Usage,\n} from \"@halo-sdk/core\";\n\n/** A named provider in the gateway's routing pool. */\nexport interface GatewayRoute {\n /** Human-readable provider name (for route events + diagnostics). */\n name: string;\n adapter: ModelAdapter;\n}\n\n/**\n * Reserved extension seam.\n *\n * The gateway is the designated integration point for a forthcoming\n * maintainer-defined capability (TBD). Implementations receive every request\n * and response and **must** emit their activity onto the S5 event stream via\n * the gateway's route/observer callback rather than logging out of band. This\n * interface and its call sites will change when that capability lands.\n */\nexport interface GatewayMiddleware {\n beforeRequest?(ctx: { route: GatewayRoute; params: ChatParams }): void | Promise<void>;\n afterResponse?(ctx: { route: GatewayRoute; usage?: Usage }): void | Promise<void>;\n}\n\nexport interface GatewayOptions {\n /** Providers in priority order; the first is the default route. */\n routes: GatewayRoute[];\n /**\n * Sink for `route` {@link HaloEvent}s. Wire it to an agent's observer, e.g.\n * `new GatewayAdapter({ routes, onEvent: (e) => bus.emit(e) })`.\n */\n onEvent?: (event: Extract<HaloEvent, { type: \"route\" }>) => void;\n /** Reserved extension seam (see {@link GatewayMiddleware}). */\n middleware?: GatewayMiddleware;\n}\n\n/**\n * A cache-aware {@link ModelAdapter} that fronts a pool of providers.\n *\n * - **Sticky routing** — requests stay on the last provider that succeeded, so\n * that provider's prefix cache stays warm; the gateway only moves on failure.\n * - **Fallback chains** — on error it tries the remaining providers in order;\n * a fallback never silently discards a warm cache because routing is sticky,\n * not round-robin.\n *\n * Composes with the `@halo-sdk/otel` decorators (wrap each route's adapter) and\n * emits a `route` event (onto the S5 spine via `onEvent`) for every decision.\n */\nexport class GatewayAdapter implements ModelAdapter {\n private readonly _routes: GatewayRoute[];\n private readonly _onEvent: ((event: Extract<HaloEvent, { type: \"route\" }>) => void) | undefined;\n private readonly _middleware: GatewayMiddleware | undefined;\n private _preferred = 0;\n\n constructor(opts: GatewayOptions) {\n if (!opts.routes.length) throw new Error(\"GatewayAdapter requires at least one route.\");\n this._routes = opts.routes;\n this._onEvent = opts.onEvent;\n this._middleware = opts.middleware;\n }\n\n // Surface the currently-preferred provider's capabilities/pricing/identity.\n private get _primary(): ModelAdapter {\n return this._routes[this._preferred]!.adapter;\n }\n get modelId(): string {\n return this._primary.modelId;\n }\n get contextWindow(): number {\n return this._primary.contextWindow;\n }\n get capabilities(): ModelCapabilities {\n return this._primary.capabilities;\n }\n get pricing(): PricingInfo | undefined {\n return this._primary.pricing;\n }\n\n /** Provider order the gateway will try this call: preferred first, then the rest. */\n private _order(): number[] {\n const order = [this._preferred];\n for (let i = 0; i < this._routes.length; i++) if (i !== this._preferred) order.push(i);\n return order;\n }\n\n private _emitRoute(route: GatewayRoute, reason: string): void {\n this._onEvent?.({\n type: \"route\",\n ts: eventNow(),\n provider: route.name,\n model: route.adapter.modelId,\n reason,\n });\n }\n\n async chat(params: ChatParams): Promise<{\n content: string;\n toolCalls: ToolCall[];\n usage: Usage;\n reasoning?: { text: string; signature?: string };\n }> {\n let lastErr: Error = new Error(\"no routes\");\n const order = this._order();\n for (let oi = 0; oi < order.length; oi++) {\n const idx = order[oi]!;\n const route = this._routes[idx]!;\n try {\n await this._middleware?.beforeRequest?.({ route, params });\n this._emitRoute(route, oi === 0 ? \"sticky\" : `fallback:${lastErr.message}`);\n const result = await route.adapter.chat(params);\n await this._middleware?.afterResponse?.({ route, usage: result.usage });\n this._preferred = idx; // stay sticky on the provider that worked\n return result;\n } catch (err: unknown) {\n lastErr = err instanceof Error ? err : new Error(String(err));\n }\n }\n throw lastErr;\n }\n\n async *stream(params: ChatParams): AsyncGenerator<TurnChunk> {\n const order = this._order();\n let lastErr: Error = new Error(\"no routes\");\n for (let oi = 0; oi < order.length; oi++) {\n const idx = order[oi]!;\n const route = this._routes[idx]!;\n try {\n await this._middleware?.beforeRequest?.({ route, params });\n // Open the stream eagerly so connection errors are caught here for fallback.\n const gen = route.adapter.stream(params);\n const first = await gen.next();\n this._emitRoute(route, oi === 0 ? \"sticky\" : `fallback:${lastErr.message}`);\n this._preferred = idx;\n if (!first.done) yield first.value;\n yield* gen;\n await this._middleware?.afterResponse?.({ route });\n return;\n } catch (err: unknown) {\n lastErr = err instanceof Error ? err : new Error(String(err));\n }\n }\n throw lastErr;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAUO;AA+CA,IAAM,iBAAN,MAA6C;AAAA,EACjC;AAAA,EACA;AAAA,EACA;AAAA,EACT,aAAa;AAAA,EAErB,YAAY,MAAsB;AAChC,QAAI,CAAC,KAAK,OAAO,OAAQ,OAAM,IAAI,MAAM,6CAA6C;AACtF,SAAK,UAAU,KAAK;AACpB,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,IAAY,WAAyB;AACnC,WAAO,KAAK,QAAQ,KAAK,UAAU,EAAG;AAAA,EACxC;AAAA,EACA,IAAI,UAAkB;AACpB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,gBAAwB;AAC1B,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,eAAkC;AACpC,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,UAAmC;AACrC,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA;AAAA,EAGQ,SAAmB;AACzB,UAAM,QAAQ,CAAC,KAAK,UAAU;AAC9B,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,QAAQ,IAAK,KAAI,MAAM,KAAK,WAAY,OAAM,KAAK,CAAC;AACrF,WAAO;AAAA,EACT;AAAA,EAEQ,WAAW,OAAqB,QAAsB;AAC5D,SAAK,WAAW;AAAA,MACd,MAAM;AAAA,MACN,QAAI,sBAAS;AAAA,MACb,UAAU,MAAM;AAAA,MAChB,OAAO,MAAM,QAAQ;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,KAAK,QAKR;AACD,QAAI,UAAiB,IAAI,MAAM,WAAW;AAC1C,UAAM,QAAQ,KAAK,OAAO;AAC1B,aAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,YAAM,MAAM,MAAM,EAAE;AACpB,YAAM,QAAQ,KAAK,QAAQ,GAAG;AAC9B,UAAI;AACF,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,CAAC;AACzD,aAAK,WAAW,OAAO,OAAO,IAAI,WAAW,YAAY,QAAQ,OAAO,EAAE;AAC1E,cAAM,SAAS,MAAM,MAAM,QAAQ,KAAK,MAAM;AAC9C,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,OAAO,MAAM,CAAC;AACtE,aAAK,aAAa;AAClB,eAAO;AAAA,MACT,SAAS,KAAc;AACrB,kBAAU,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,MAC9D;AAAA,IACF;AACA,UAAM;AAAA,EACR;AAAA,EAEA,OAAO,OAAO,QAA+C;AAC3D,UAAM,QAAQ,KAAK,OAAO;AAC1B,QAAI,UAAiB,IAAI,MAAM,WAAW;AAC1C,aAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,YAAM,MAAM,MAAM,EAAE;AACpB,YAAM,QAAQ,KAAK,QAAQ,GAAG;AAC9B,UAAI;AACF,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,CAAC;AAEzD,cAAM,MAAM,MAAM,QAAQ,OAAO,MAAM;AACvC,cAAM,QAAQ,MAAM,IAAI,KAAK;AAC7B,aAAK,WAAW,OAAO,OAAO,IAAI,WAAW,YAAY,QAAQ,OAAO,EAAE;AAC1E,aAAK,aAAa;AAClB,YAAI,CAAC,MAAM,KAAM,OAAM,MAAM;AAC7B,eAAO;AACP,cAAM,KAAK,aAAa,gBAAgB,EAAE,MAAM,CAAC;AACjD;AAAA,MACF,SAAS,KAAc;AACrB,kBAAU,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,MAC9D;AAAA,IACF;AACA,UAAM;AAAA,EACR;AACF;","names":[]}
@@ -0,0 +1,77 @@
1
+ import { type ChatParams, type HaloEvent, type ModelAdapter, type ModelCapabilities, type PricingInfo, type ToolCall, type TurnChunk, type Usage } from "@halo-sdk/core";
2
+ /** A named provider in the gateway's routing pool. */
3
+ export interface GatewayRoute {
4
+ /** Human-readable provider name (for route events + diagnostics). */
5
+ name: string;
6
+ adapter: ModelAdapter;
7
+ }
8
+ /**
9
+ * Reserved extension seam.
10
+ *
11
+ * The gateway is the designated integration point for a forthcoming
12
+ * maintainer-defined capability (TBD). Implementations receive every request
13
+ * and response and **must** emit their activity onto the S5 event stream via
14
+ * the gateway's route/observer callback rather than logging out of band. This
15
+ * interface and its call sites will change when that capability lands.
16
+ */
17
+ export interface GatewayMiddleware {
18
+ beforeRequest?(ctx: {
19
+ route: GatewayRoute;
20
+ params: ChatParams;
21
+ }): void | Promise<void>;
22
+ afterResponse?(ctx: {
23
+ route: GatewayRoute;
24
+ usage?: Usage;
25
+ }): void | Promise<void>;
26
+ }
27
+ export interface GatewayOptions {
28
+ /** Providers in priority order; the first is the default route. */
29
+ routes: GatewayRoute[];
30
+ /**
31
+ * Sink for `route` {@link HaloEvent}s. Wire it to an agent's observer, e.g.
32
+ * `new GatewayAdapter({ routes, onEvent: (e) => bus.emit(e) })`.
33
+ */
34
+ onEvent?: (event: Extract<HaloEvent, {
35
+ type: "route";
36
+ }>) => void;
37
+ /** Reserved extension seam (see {@link GatewayMiddleware}). */
38
+ middleware?: GatewayMiddleware;
39
+ }
40
+ /**
41
+ * A cache-aware {@link ModelAdapter} that fronts a pool of providers.
42
+ *
43
+ * - **Sticky routing** — requests stay on the last provider that succeeded, so
44
+ * that provider's prefix cache stays warm; the gateway only moves on failure.
45
+ * - **Fallback chains** — on error it tries the remaining providers in order;
46
+ * a fallback never silently discards a warm cache because routing is sticky,
47
+ * not round-robin.
48
+ *
49
+ * Composes with the `@halo-sdk/otel` decorators (wrap each route's adapter) and
50
+ * emits a `route` event (onto the S5 spine via `onEvent`) for every decision.
51
+ */
52
+ export declare class GatewayAdapter implements ModelAdapter {
53
+ private readonly _routes;
54
+ private readonly _onEvent;
55
+ private readonly _middleware;
56
+ private _preferred;
57
+ constructor(opts: GatewayOptions);
58
+ private get _primary();
59
+ get modelId(): string;
60
+ get contextWindow(): number;
61
+ get capabilities(): ModelCapabilities;
62
+ get pricing(): PricingInfo | undefined;
63
+ /** Provider order the gateway will try this call: preferred first, then the rest. */
64
+ private _order;
65
+ private _emitRoute;
66
+ chat(params: ChatParams): Promise<{
67
+ content: string;
68
+ toolCalls: ToolCall[];
69
+ usage: Usage;
70
+ reasoning?: {
71
+ text: string;
72
+ signature?: string;
73
+ };
74
+ }>;
75
+ stream(params: ChatParams): AsyncGenerator<TurnChunk>;
76
+ }
77
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,UAAU,EACf,KAAK,SAAS,EACd,KAAK,YAAY,EACjB,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAChB,KAAK,QAAQ,EACb,KAAK,SAAS,EACd,KAAK,KAAK,EACX,MAAM,gBAAgB,CAAC;AAExB,sDAAsD;AACtD,MAAM,WAAW,YAAY;IAC3B,qEAAqE;IACrE,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,iBAAiB;IAChC,aAAa,CAAC,CAAC,GAAG,EAAE;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,MAAM,EAAE,UAAU,CAAA;KAAE,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACvF,aAAa,CAAC,CAAC,GAAG,EAAE;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,KAAK,CAAC,EAAE,KAAK,CAAA;KAAE,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACnF;AAED,MAAM,WAAW,cAAc;IAC7B,mEAAmE;IACnE,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,CAAC,SAAS,EAAE;QAAE,IAAI,EAAE,OAAO,CAAA;KAAE,CAAC,KAAK,IAAI,CAAC;IACjE,+DAA+D;IAC/D,UAAU,CAAC,EAAE,iBAAiB,CAAC;CAChC;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,cAAe,YAAW,YAAY;IACjD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiB;IACzC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuE;IAChG,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAgC;IAC5D,OAAO,CAAC,UAAU,CAAK;gBAEX,IAAI,EAAE,cAAc;IAQhC,OAAO,KAAK,QAAQ,GAEnB;IACD,IAAI,OAAO,IAAI,MAAM,CAEpB;IACD,IAAI,aAAa,IAAI,MAAM,CAE1B;IACD,IAAI,YAAY,IAAI,iBAAiB,CAEpC;IACD,IAAI,OAAO,IAAI,WAAW,GAAG,SAAS,CAErC;IAED,qFAAqF;IACrF,OAAO,CAAC,MAAM;IAMd,OAAO,CAAC,UAAU;IAUZ,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC;QACtC,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,QAAQ,EAAE,CAAC;QACtB,KAAK,EAAE,KAAK,CAAC;QACb,SAAS,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,SAAS,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAClD,CAAC;IAoBK,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,SAAS,CAAC;CAuB7D"}
package/dist/index.js ADDED
@@ -0,0 +1,92 @@
1
+ // src/index.ts
2
+ import {
3
+ eventNow
4
+ } from "@halo-sdk/core";
5
+ var GatewayAdapter = class {
6
+ _routes;
7
+ _onEvent;
8
+ _middleware;
9
+ _preferred = 0;
10
+ constructor(opts) {
11
+ if (!opts.routes.length) throw new Error("GatewayAdapter requires at least one route.");
12
+ this._routes = opts.routes;
13
+ this._onEvent = opts.onEvent;
14
+ this._middleware = opts.middleware;
15
+ }
16
+ // Surface the currently-preferred provider's capabilities/pricing/identity.
17
+ get _primary() {
18
+ return this._routes[this._preferred].adapter;
19
+ }
20
+ get modelId() {
21
+ return this._primary.modelId;
22
+ }
23
+ get contextWindow() {
24
+ return this._primary.contextWindow;
25
+ }
26
+ get capabilities() {
27
+ return this._primary.capabilities;
28
+ }
29
+ get pricing() {
30
+ return this._primary.pricing;
31
+ }
32
+ /** Provider order the gateway will try this call: preferred first, then the rest. */
33
+ _order() {
34
+ const order = [this._preferred];
35
+ for (let i = 0; i < this._routes.length; i++) if (i !== this._preferred) order.push(i);
36
+ return order;
37
+ }
38
+ _emitRoute(route, reason) {
39
+ this._onEvent?.({
40
+ type: "route",
41
+ ts: eventNow(),
42
+ provider: route.name,
43
+ model: route.adapter.modelId,
44
+ reason
45
+ });
46
+ }
47
+ async chat(params) {
48
+ let lastErr = new Error("no routes");
49
+ const order = this._order();
50
+ for (let oi = 0; oi < order.length; oi++) {
51
+ const idx = order[oi];
52
+ const route = this._routes[idx];
53
+ try {
54
+ await this._middleware?.beforeRequest?.({ route, params });
55
+ this._emitRoute(route, oi === 0 ? "sticky" : `fallback:${lastErr.message}`);
56
+ const result = await route.adapter.chat(params);
57
+ await this._middleware?.afterResponse?.({ route, usage: result.usage });
58
+ this._preferred = idx;
59
+ return result;
60
+ } catch (err) {
61
+ lastErr = err instanceof Error ? err : new Error(String(err));
62
+ }
63
+ }
64
+ throw lastErr;
65
+ }
66
+ async *stream(params) {
67
+ const order = this._order();
68
+ let lastErr = new Error("no routes");
69
+ for (let oi = 0; oi < order.length; oi++) {
70
+ const idx = order[oi];
71
+ const route = this._routes[idx];
72
+ try {
73
+ await this._middleware?.beforeRequest?.({ route, params });
74
+ const gen = route.adapter.stream(params);
75
+ const first = await gen.next();
76
+ this._emitRoute(route, oi === 0 ? "sticky" : `fallback:${lastErr.message}`);
77
+ this._preferred = idx;
78
+ if (!first.done) yield first.value;
79
+ yield* gen;
80
+ await this._middleware?.afterResponse?.({ route });
81
+ return;
82
+ } catch (err) {
83
+ lastErr = err instanceof Error ? err : new Error(String(err));
84
+ }
85
+ }
86
+ throw lastErr;
87
+ }
88
+ };
89
+ export {
90
+ GatewayAdapter
91
+ };
92
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import {\n eventNow,\n type ChatParams,\n type HaloEvent,\n type ModelAdapter,\n type ModelCapabilities,\n type PricingInfo,\n type ToolCall,\n type TurnChunk,\n type Usage,\n} from \"@halo-sdk/core\";\n\n/** A named provider in the gateway's routing pool. */\nexport interface GatewayRoute {\n /** Human-readable provider name (for route events + diagnostics). */\n name: string;\n adapter: ModelAdapter;\n}\n\n/**\n * Reserved extension seam.\n *\n * The gateway is the designated integration point for a forthcoming\n * maintainer-defined capability (TBD). Implementations receive every request\n * and response and **must** emit their activity onto the S5 event stream via\n * the gateway's route/observer callback rather than logging out of band. This\n * interface and its call sites will change when that capability lands.\n */\nexport interface GatewayMiddleware {\n beforeRequest?(ctx: { route: GatewayRoute; params: ChatParams }): void | Promise<void>;\n afterResponse?(ctx: { route: GatewayRoute; usage?: Usage }): void | Promise<void>;\n}\n\nexport interface GatewayOptions {\n /** Providers in priority order; the first is the default route. */\n routes: GatewayRoute[];\n /**\n * Sink for `route` {@link HaloEvent}s. Wire it to an agent's observer, e.g.\n * `new GatewayAdapter({ routes, onEvent: (e) => bus.emit(e) })`.\n */\n onEvent?: (event: Extract<HaloEvent, { type: \"route\" }>) => void;\n /** Reserved extension seam (see {@link GatewayMiddleware}). */\n middleware?: GatewayMiddleware;\n}\n\n/**\n * A cache-aware {@link ModelAdapter} that fronts a pool of providers.\n *\n * - **Sticky routing** — requests stay on the last provider that succeeded, so\n * that provider's prefix cache stays warm; the gateway only moves on failure.\n * - **Fallback chains** — on error it tries the remaining providers in order;\n * a fallback never silently discards a warm cache because routing is sticky,\n * not round-robin.\n *\n * Composes with the `@halo-sdk/otel` decorators (wrap each route's adapter) and\n * emits a `route` event (onto the S5 spine via `onEvent`) for every decision.\n */\nexport class GatewayAdapter implements ModelAdapter {\n private readonly _routes: GatewayRoute[];\n private readonly _onEvent: ((event: Extract<HaloEvent, { type: \"route\" }>) => void) | undefined;\n private readonly _middleware: GatewayMiddleware | undefined;\n private _preferred = 0;\n\n constructor(opts: GatewayOptions) {\n if (!opts.routes.length) throw new Error(\"GatewayAdapter requires at least one route.\");\n this._routes = opts.routes;\n this._onEvent = opts.onEvent;\n this._middleware = opts.middleware;\n }\n\n // Surface the currently-preferred provider's capabilities/pricing/identity.\n private get _primary(): ModelAdapter {\n return this._routes[this._preferred]!.adapter;\n }\n get modelId(): string {\n return this._primary.modelId;\n }\n get contextWindow(): number {\n return this._primary.contextWindow;\n }\n get capabilities(): ModelCapabilities {\n return this._primary.capabilities;\n }\n get pricing(): PricingInfo | undefined {\n return this._primary.pricing;\n }\n\n /** Provider order the gateway will try this call: preferred first, then the rest. */\n private _order(): number[] {\n const order = [this._preferred];\n for (let i = 0; i < this._routes.length; i++) if (i !== this._preferred) order.push(i);\n return order;\n }\n\n private _emitRoute(route: GatewayRoute, reason: string): void {\n this._onEvent?.({\n type: \"route\",\n ts: eventNow(),\n provider: route.name,\n model: route.adapter.modelId,\n reason,\n });\n }\n\n async chat(params: ChatParams): Promise<{\n content: string;\n toolCalls: ToolCall[];\n usage: Usage;\n reasoning?: { text: string; signature?: string };\n }> {\n let lastErr: Error = new Error(\"no routes\");\n const order = this._order();\n for (let oi = 0; oi < order.length; oi++) {\n const idx = order[oi]!;\n const route = this._routes[idx]!;\n try {\n await this._middleware?.beforeRequest?.({ route, params });\n this._emitRoute(route, oi === 0 ? \"sticky\" : `fallback:${lastErr.message}`);\n const result = await route.adapter.chat(params);\n await this._middleware?.afterResponse?.({ route, usage: result.usage });\n this._preferred = idx; // stay sticky on the provider that worked\n return result;\n } catch (err: unknown) {\n lastErr = err instanceof Error ? err : new Error(String(err));\n }\n }\n throw lastErr;\n }\n\n async *stream(params: ChatParams): AsyncGenerator<TurnChunk> {\n const order = this._order();\n let lastErr: Error = new Error(\"no routes\");\n for (let oi = 0; oi < order.length; oi++) {\n const idx = order[oi]!;\n const route = this._routes[idx]!;\n try {\n await this._middleware?.beforeRequest?.({ route, params });\n // Open the stream eagerly so connection errors are caught here for fallback.\n const gen = route.adapter.stream(params);\n const first = await gen.next();\n this._emitRoute(route, oi === 0 ? \"sticky\" : `fallback:${lastErr.message}`);\n this._preferred = idx;\n if (!first.done) yield first.value;\n yield* gen;\n await this._middleware?.afterResponse?.({ route });\n return;\n } catch (err: unknown) {\n lastErr = err instanceof Error ? err : new Error(String(err));\n }\n }\n throw lastErr;\n }\n}\n"],"mappings":";AAAA;AAAA,EACE;AAAA,OASK;AA+CA,IAAM,iBAAN,MAA6C;AAAA,EACjC;AAAA,EACA;AAAA,EACA;AAAA,EACT,aAAa;AAAA,EAErB,YAAY,MAAsB;AAChC,QAAI,CAAC,KAAK,OAAO,OAAQ,OAAM,IAAI,MAAM,6CAA6C;AACtF,SAAK,UAAU,KAAK;AACpB,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,IAAY,WAAyB;AACnC,WAAO,KAAK,QAAQ,KAAK,UAAU,EAAG;AAAA,EACxC;AAAA,EACA,IAAI,UAAkB;AACpB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,gBAAwB;AAC1B,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,eAAkC;AACpC,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EACA,IAAI,UAAmC;AACrC,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA;AAAA,EAGQ,SAAmB;AACzB,UAAM,QAAQ,CAAC,KAAK,UAAU;AAC9B,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,QAAQ,IAAK,KAAI,MAAM,KAAK,WAAY,OAAM,KAAK,CAAC;AACrF,WAAO;AAAA,EACT;AAAA,EAEQ,WAAW,OAAqB,QAAsB;AAC5D,SAAK,WAAW;AAAA,MACd,MAAM;AAAA,MACN,IAAI,SAAS;AAAA,MACb,UAAU,MAAM;AAAA,MAChB,OAAO,MAAM,QAAQ;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,KAAK,QAKR;AACD,QAAI,UAAiB,IAAI,MAAM,WAAW;AAC1C,UAAM,QAAQ,KAAK,OAAO;AAC1B,aAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,YAAM,MAAM,MAAM,EAAE;AACpB,YAAM,QAAQ,KAAK,QAAQ,GAAG;AAC9B,UAAI;AACF,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,CAAC;AACzD,aAAK,WAAW,OAAO,OAAO,IAAI,WAAW,YAAY,QAAQ,OAAO,EAAE;AAC1E,cAAM,SAAS,MAAM,MAAM,QAAQ,KAAK,MAAM;AAC9C,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,OAAO,MAAM,CAAC;AACtE,aAAK,aAAa;AAClB,eAAO;AAAA,MACT,SAAS,KAAc;AACrB,kBAAU,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,MAC9D;AAAA,IACF;AACA,UAAM;AAAA,EACR;AAAA,EAEA,OAAO,OAAO,QAA+C;AAC3D,UAAM,QAAQ,KAAK,OAAO;AAC1B,QAAI,UAAiB,IAAI,MAAM,WAAW;AAC1C,aAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,YAAM,MAAM,MAAM,EAAE;AACpB,YAAM,QAAQ,KAAK,QAAQ,GAAG;AAC9B,UAAI;AACF,cAAM,KAAK,aAAa,gBAAgB,EAAE,OAAO,OAAO,CAAC;AAEzD,cAAM,MAAM,MAAM,QAAQ,OAAO,MAAM;AACvC,cAAM,QAAQ,MAAM,IAAI,KAAK;AAC7B,aAAK,WAAW,OAAO,OAAO,IAAI,WAAW,YAAY,QAAQ,OAAO,EAAE;AAC1E,aAAK,aAAa;AAClB,YAAI,CAAC,MAAM,KAAM,OAAM,MAAM;AAC7B,eAAO;AACP,cAAM,KAAK,aAAa,gBAAgB,EAAE,MAAM,CAAC;AACjD;AAAA,MACF,SAAS,KAAc;AACrB,kBAAU,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,MAC9D;AAAA,IACF;AACA,UAAM;AAAA,EACR;AACF;","names":[]}
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@halo-sdk/gateway",
3
+ "version": "1.0.0",
4
+ "description": "Cache-aware AI gateway for Halo SDK — sticky routing + fallback chains that don't throw away a warm prefix cache",
5
+ "keywords": [
6
+ "ai",
7
+ "fallback",
8
+ "gateway",
9
+ "llm",
10
+ "prefix-cache",
11
+ "routing"
12
+ ],
13
+ "license": "MIT",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/halo-sdk/halo-ai",
17
+ "directory": "packages/gateway"
18
+ },
19
+ "files": [
20
+ "dist"
21
+ ],
22
+ "type": "module",
23
+ "main": "./dist/index.js",
24
+ "types": "./dist/index.d.ts",
25
+ "exports": {
26
+ ".": {
27
+ "types": "./dist/index.d.ts",
28
+ "import": "./dist/index.js",
29
+ "require": "./dist/index.cjs"
30
+ }
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "devDependencies": {
36
+ "typescript": "^5.8.0",
37
+ "vitest": "^3.0.0",
38
+ "@halo-sdk/core": "1.1.0"
39
+ },
40
+ "peerDependencies": {
41
+ "@halo-sdk/core": ">=1.1.0"
42
+ },
43
+ "scripts": {
44
+ "build": "tsc --build --emitDeclarationOnly && tsup",
45
+ "dev": "tsup --watch",
46
+ "clean": "del-cli dist *.tsbuildinfo",
47
+ "publint": "publint",
48
+ "test": "vitest run",
49
+ "test:watch": "vitest"
50
+ }
51
+ }