@modelrelay/sdk 0.24.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,285 +1,115 @@
1
1
  # ModelRelay TypeScript SDK
2
2
 
3
- Typed client for Node.js that wraps the ModelRelay API for **consuming** LLM/usage endpoints. Use secret API keys or bearer tokens issued by your backend; publishable-key frontend token flows have been removed.
4
-
5
- ## Installation
6
-
7
3
  ```bash
8
4
  bun add @modelrelay/sdk
9
- # or: npm install @modelrelay/sdk
10
5
  ```
11
6
 
12
- ## Quick Start
7
+ ## Streaming Chat
13
8
 
14
9
  ```ts
15
10
  import { ModelRelay } from "@modelrelay/sdk";
16
11
 
17
- // Use a secret key or bearer token from your backend.
18
- const mr = new ModelRelay({
19
- key: "mr_sk_..."
20
- });
12
+ const mr = new ModelRelay({ key: "mr_sk_..." });
21
13
 
22
- // Stream chat completions.
23
14
  const stream = await mr.chat.completions.create({
24
- model: "grok-4-1-fast-reasoning",
25
- messages: [{ role: "user", content: "Hello" }]
15
+ model: "claude-sonnet-4-20250514",
16
+ messages: [{ role: "user", content: "Hello" }],
26
17
  });
27
18
 
28
19
  for await (const event of stream) {
29
20
  if (event.type === "message_delta" && event.textDelta) {
30
- console.log(event.textDelta);
21
+ process.stdout.write(event.textDelta);
31
22
  }
32
23
  }
33
24
  ```
34
25
 
35
- ### Server-side usage
36
-
37
- Provide a secret API key or bearer token:
26
+ ## Structured Outputs with Zod
38
27
 
39
28
  ```ts
40
- const mr = new ModelRelay({ key: "mr_sk_..." });
41
- const completion = await mr.chat.completions.create(
42
- { model: "grok-4-1-fast-reasoning", messages: [{ role: "user", content: "Hi" }], stream: false }
43
- );
44
- console.log(completion.content.join(""));
45
- ```
46
-
47
- ## Scripts (run with Bun)
48
-
49
- - `bun run build` — bundle CJS + ESM outputs with type declarations.
50
- - `bun run test` — run unit tests.
51
- - `bun run lint` — typecheck the source without emitting files.
29
+ import { z } from "zod";
52
30
 
53
- ## Configuration
54
-
55
- - **Environments**: `environment: "production" | "staging" | "sandbox"` or override `baseUrl`.
56
- - **Auth**: pass a secret/publishable `key` or a bearer `token`. Publishable keys mint frontend tokens automatically.
57
- - **Timeouts & retries**: `connectTimeoutMs` (default 5s per attempt) and `timeoutMs` (default 60s overall; set `0` to disable). Per-call overrides available on `chat.completions.create`. `retry` config (`{ maxAttempts, baseBackoffMs, maxBackoffMs, retryPost }` or `false`) controls exponential backoff with jitter.
58
- - **Headers & metadata**: `defaultHeaders` are sent with every request; `defaultMetadata` merges into every chat request and can be overridden per-call via `metadata`.
59
- - **Client header**: set `clientHeader` to override the telemetry header (defaults to `modelrelay-ts/<version>`).
60
-
61
- ### Timeouts & retry examples
62
-
63
- ```ts
64
- // Shorten connect + request timeouts globally
65
- const mr = new ModelRelay({
66
- key: "mr_sk_...",
67
- connectTimeoutMs: 3_000,
68
- timeoutMs: 20_000,
69
- retry: { maxAttempts: 4, baseBackoffMs: 200, maxBackoffMs: 2_000 }
31
+ const Person = z.object({
32
+ name: z.string(),
33
+ age: z.number(),
70
34
  });
71
35
 
72
- // Per-call overrides (blocking)
73
- await mr.chat.completions.create(
74
- { model: "grok-4-1-fast-reasoning", messages: [{ role: "user", content: "Hi" }], stream: false },
75
- { timeoutMs: 5_000, retry: false }
76
- );
36
+ const result = await mr.chat.completions.structured(Person, {
37
+ model: "claude-sonnet-4-20250514",
38
+ messages: [{ role: "user", content: "Extract: John Doe is 30" }],
39
+ });
77
40
 
78
- // Streaming: keep connect timeout but disable request timeout
79
- const stream = await mr.chat.completions.create(
80
- { model: "grok-4-1-fast-reasoning", messages: [{ role: "user", content: "Hi" }] },
81
- { connectTimeoutMs: 2_000 } // request timeout is already disabled for streams by default
82
- );
41
+ console.log(result.value); // { name: "John Doe", age: 30 }
83
42
  ```
84
43
 
85
- ### Typed models, stop reasons, and message roles
44
+ ## Streaming Structured Outputs
86
45
 
87
- - Models are plain strings (e.g., `"gpt-4o"`), so new models do not require SDK updates.
88
- - Stop reasons are parsed into the `StopReason` union (e.g., `StopReasons.EndTurn`); unknown values surface as `{ other: "<raw>" }`.
89
- - Message roles use a typed union (`MessageRole`) with constants available via `MessageRoles`.
90
- - Usage backfills `totalTokens` when the backend omits it, ensuring consistent accounting.
46
+ Build progressive UIs that render fields as they complete:
91
47
 
92
48
  ```ts
93
- import { MessageRoles } from "@modelrelay/sdk";
94
-
95
- // Use typed role constants
96
- const messages = [
97
- { role: MessageRoles.System, content: "You are helpful." },
98
- { role: MessageRoles.User, content: "Hello!" },
99
- ];
100
-
101
- // Available roles: User, Assistant, System, Tool
102
- ```
103
-
104
- ### Customer-attributed requests
105
-
106
- For customer-attributed requests, the customer's tier determines which model to use.
107
- Use `forCustomer()` instead of providing a model:
49
+ const Article = z.object({
50
+ title: z.string(),
51
+ summary: z.string(),
52
+ body: z.string(),
53
+ });
108
54
 
109
- ```ts
110
- // Customer-attributed: tier determines model, no model parameter needed
111
- const stream = await mr.chat.forCustomer("customer-123").create({
112
- messages: [{ role: "user", content: "Hello!" }]
55
+ const stream = await mr.chat.completions.streamStructured(Article, {
56
+ model: "claude-sonnet-4-20250514",
57
+ messages: [{ role: "user", content: "Write an article about TypeScript" }],
113
58
  });
114
59
 
115
60
  for await (const event of stream) {
116
- if (event.type === "message_delta" && event.textDelta) {
117
- console.log(event.textDelta);
61
+ // Render fields as soon as they're complete
62
+ if (event.completeFields.has("title")) {
63
+ renderTitle(event.payload.title); // Safe to display
118
64
  }
119
- }
120
-
121
- // Non-streaming
122
- const completion = await mr.chat.forCustomer("customer-123").create(
123
- { messages: [{ role: "user", content: "Hello!" }] },
124
- { stream: false }
125
- );
126
- ```
127
-
128
- This provides compile-time separation between:
129
- - **Direct/PAYGO requests** (`chat.completions.create({ model, ... })`) — model is required
130
- - **Customer-attributed requests** (`chat.forCustomer(id).create(...)`) — tier determines model
131
-
132
- ### Structured outputs (`response_format`)
133
-
134
- Request structured JSON instead of free-form text when the backend supports it:
135
-
136
- ```ts
137
- import { ModelRelay, type ResponseFormat } from "@modelrelay/sdk";
138
-
139
- const mr = new ModelRelay({ key: "mr_sk_..." });
140
-
141
- const format: ResponseFormat = {
142
- type: "json_schema",
143
- json_schema: {
144
- name: "summary",
145
- schema: {
146
- type: "object",
147
- properties: { headline: { type: "string" } },
148
- additionalProperties: false,
149
- },
150
- strict: true,
151
- },
152
- };
153
-
154
- const completion = await mr.chat.completions.create(
155
- {
156
- model: "gpt-4o-mini",
157
- messages: [{ role: "user", content: "Summarize ModelRelay" }],
158
- responseFormat: format,
159
- stream: false,
160
- },
161
- { stream: false },
162
- );
163
-
164
- console.log(completion.content[0]); // JSON string matching your schema
165
- ```
166
-
167
- ### Structured streaming (NDJSON + response_format)
168
-
169
- Use the structured streaming contract for `/llm/proxy` to stream schema-valid
170
- JSON payloads over NDJSON:
171
-
172
- ```ts
173
- type Item = { id: string; label: string };
174
- type RecommendationPayload = { items: Item[] };
175
-
176
- const format: ResponseFormat = {
177
- type: "json_schema",
178
- json_schema: {
179
- name: "recommendations",
180
- schema: {
181
- type: "object",
182
- properties: { items: { type: "array", items: { type: "object" } } },
183
- },
184
- },
185
- };
186
-
187
- const stream = await mr.chat.completions.streamJSON<RecommendationPayload>({
188
- model: "grok-4-1-fast",
189
- messages: [{ role: "user", content: "Recommend items for my user" }],
190
- responseFormat: format,
191
- });
192
-
193
- for await (const evt of stream) {
194
- if (evt.type === "update") {
195
- // Progressive UI: evt.payload is a partial but schema-valid payload.
196
- renderPartial(evt.payload.items);
65
+ if (event.completeFields.has("summary")) {
66
+ renderSummary(event.payload.summary);
197
67
  }
198
- if (evt.type === "completion") {
199
- renderFinal(evt.payload.items);
68
+
69
+ // Show streaming preview of incomplete fields
70
+ if (!event.completeFields.has("body")) {
71
+ renderBodyPreview(event.payload.body + "▋");
200
72
  }
201
73
  }
202
-
203
- // Prefer a single blocking result but still want structured validation?
204
- const final = await stream.collect();
205
- console.log(final.items.length);
206
74
  ```
207
75
 
208
- ### Telemetry & metrics hooks
76
+ ## Customer-Attributed Requests
209
77
 
210
- Provide lightweight callbacks to observe latency and usage without extra deps:
78
+ For metered billing, use `forCustomer()` the customer's tier determines the model:
211
79
 
212
80
  ```ts
213
- const calls: string[] = [];
214
- const mr = new ModelRelay({
215
- key: "mr_sk_...",
216
- metrics: {
217
- httpRequest: (m) => calls.push(`http ${m.context.path} ${m.status} ${m.latencyMs}ms`),
218
- streamFirstToken: (m) => calls.push(`first-token ${m.latencyMs}ms`),
219
- usage: (m) => calls.push(`usage ${m.usage.totalTokens}`)
220
- },
221
- trace: {
222
- streamEvent: ({ event }) => calls.push(`event ${event.type}`),
223
- requestFinish: ({ status, latencyMs }) => calls.push(`finished ${status} in ${latencyMs}`)
224
- }
81
+ const stream = await mr.chat.forCustomer("customer-123").create({
82
+ messages: [{ role: "user", content: "Hello" }],
225
83
  });
226
-
227
- // Per-call overrides
228
- await mr.chat.completions.create(
229
- { model: "echo-1", messages: [{ role: "user", content: "hi" }] },
230
- { metrics: { usage: console.log }, trace: { streamEvent: console.debug } }
231
- );
232
84
  ```
233
85
 
234
- ### Error categories
235
-
236
- - **ConfigError**: missing key/token, invalid base URL, or request validation issues.
237
- - **TransportError**: network/connect/request/timeout failures (`kind` is one of `connect | timeout | request | other`), includes retry metadata when retries were attempted.
238
- - **APIError**: Non-2xx responses with `status`, `code`, `fields`, `requestId`, and optional `retries` metadata.
239
-
240
- ## API surface
241
-
242
- - `chat.completions.create(params, options?)`
243
- - Supports streaming (default) or blocking JSON (`stream: false`).
244
- - Accepts per-call `requestId`, `headers`, `metadata`, `timeoutMs`, and `retry` overrides.
245
- - `apiKeys.list() | create() | delete(id)` — manage API keys when using secret keys or bearer tokens.
246
- - `customers` — manage customers with a secret key (see below).
247
-
248
- ## Backend Customer Management
249
-
250
- Use a secret key (`mr_sk_*`) to manage customers from your backend:
86
+ ## Customer Management (Backend)
251
87
 
252
88
  ```ts
253
- import { ModelRelay } from "@modelrelay/sdk";
254
-
255
- const mr = new ModelRelay({ key: "mr_sk_..." });
256
-
257
- // Create or update a customer (upsert by external_id)
89
+ // Create/update customer
258
90
  const customer = await mr.customers.upsert({
259
- tier_id: "your-tier-uuid",
260
- external_id: "github-user-12345", // your app's user ID
91
+ tier_id: "tier-uuid",
92
+ external_id: "your-user-id",
261
93
  email: "user@example.com",
262
94
  });
263
95
 
264
- // List all customers
265
- const customers = await mr.customers.list();
266
-
267
- // Get a specific customer
268
- const customer = await mr.customers.get("customer-uuid");
269
-
270
- // Create a checkout session for subscription billing
271
- const session = await mr.customers.createCheckoutSession("customer-uuid", {
272
- success_url: "https://myapp.com/billing/success",
273
- cancel_url: "https://myapp.com/billing/cancel",
96
+ // Create checkout session for subscription billing
97
+ const session = await mr.customers.createCheckoutSession(customer.id, {
98
+ success_url: "https://myapp.com/success",
99
+ cancel_url: "https://myapp.com/cancel",
274
100
  });
275
- // Redirect user to session.url to complete payment
276
101
 
277
102
  // Check subscription status
278
- const status = await mr.customers.getSubscription("customer-uuid");
279
- if (status.active) {
280
- // Grant access
281
- }
103
+ const status = await mr.customers.getSubscription(customer.id);
104
+ ```
105
+
106
+ ## Configuration
282
107
 
283
- // Delete a customer
284
- await mr.customers.delete("customer-uuid");
108
+ ```ts
109
+ const mr = new ModelRelay({
110
+ key: "mr_sk_...",
111
+ environment: "production", // or "staging", "sandbox"
112
+ timeoutMs: 30_000,
113
+ retry: { maxAttempts: 3 },
114
+ });
285
115
  ```