@codefionn/llmleaf-client 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,77 @@
1
+ // @codefionn/llmleaf-client — official TypeScript/JavaScript client for the llmleaf LLM proxy.
2
+ //
3
+ // The wire is OpenAI/OpenRouter-shaped JSON over HTTP (see clients/SPEC.md). The proto
4
+ // (clients/proto/llmleaf/v1/llmleaf.proto) is the typed source of truth; the protobuf-es
5
+ // codegen artifact lives under ./gen and the hand-written transport here maps the public
6
+ // model to/from the wire JSON.
7
+
8
+ export { LlmleafClient } from "./client.js";
9
+ export type {
10
+ LlmleafClientOptions,
11
+ FetchLike,
12
+ TranscriptionFile,
13
+ } from "./client.js";
14
+
15
+ export { ApiError } from "./error.js";
16
+
17
+ // Enums (re-exported straight from the generated descriptor file) + the generic
18
+ // wire-token mapping helpers.
19
+ export {
20
+ Role,
21
+ FinishReason,
22
+ BatchStatus,
23
+ enumToWire,
24
+ enumFromWire,
25
+ } from "./enums.js";
26
+
27
+ // The public typed model.
28
+ export type {
29
+ Usage,
30
+ TextPart,
31
+ ImageUrlPart,
32
+ ContentPart,
33
+ MessageContent,
34
+ FunctionCall,
35
+ ToolCall,
36
+ ChatMessage,
37
+ FunctionDef,
38
+ ToolDef,
39
+ ToolChoice,
40
+ ResponseFormat,
41
+ ChatRequest,
42
+ Choice,
43
+ ChatResponse,
44
+ FunctionCallDelta,
45
+ ToolCallDelta,
46
+ Delta,
47
+ ChunkChoice,
48
+ ChatCompletionChunk,
49
+ EmbeddingRequest,
50
+ Embedding,
51
+ EmbeddingResponse,
52
+ SpeechRequest,
53
+ SpeechResult,
54
+ Voice,
55
+ VoicesResponse,
56
+ TranscriptionRequest,
57
+ TranscriptionResponse,
58
+ Architecture,
59
+ Pricing,
60
+ TopProvider,
61
+ ModelEndpoint,
62
+ ModelEntry,
63
+ ListModelsResponse,
64
+ ModelType,
65
+ ListModelsOptions,
66
+ BatchRequestItem,
67
+ BatchCreateRequest,
68
+ BatchCounts,
69
+ BatchHandle,
70
+ BatchResponse,
71
+ BatchError,
72
+ BatchResultLine,
73
+ } from "./types.js";
74
+
75
+ // The generated protobuf-es descriptors/types remain importable for advanced users
76
+ // who want the codegen artifact directly (e.g. binary/JSON reflection via @bufbuild/protobuf).
77
+ export * as gen from "./gen/llmleaf/v1/llmleaf_pb.js";
package/src/stream.ts ADDED
@@ -0,0 +1,82 @@
1
+ // Streaming helpers: decode the response body's ReadableStream into lines, then
2
+ // surface SSE `data:` frames (chat streaming) or NDJSON objects (batch results).
3
+ //
4
+ // These are runtime-agnostic: they only use the WHATWG ReadableStream + TextDecoder
5
+ // that Node 20+, Deno, Bun and browsers all provide.
6
+
7
+ /** Yield decoded text lines from a byte stream, handling \n / \r\n splits across chunks. */
8
+ async function* iterLines(
9
+ body: ReadableStream<Uint8Array>,
10
+ ): AsyncGenerator<string, void, unknown> {
11
+ const reader = body.getReader();
12
+ const decoder = new TextDecoder("utf-8");
13
+ let buffer = "";
14
+ try {
15
+ for (;;) {
16
+ const { done, value } = await reader.read();
17
+ if (done) break;
18
+ buffer += decoder.decode(value, { stream: true });
19
+ let nl: number;
20
+ while ((nl = buffer.indexOf("\n")) !== -1) {
21
+ let line = buffer.slice(0, nl);
22
+ buffer = buffer.slice(nl + 1);
23
+ if (line.endsWith("\r")) line = line.slice(0, -1);
24
+ yield line;
25
+ }
26
+ }
27
+ buffer += decoder.decode();
28
+ if (buffer.endsWith("\r")) buffer = buffer.slice(0, -1);
29
+ if (buffer.length > 0) yield buffer;
30
+ } finally {
31
+ reader.releaseLock();
32
+ }
33
+ }
34
+
35
+ /**
36
+ * Parse a `text/event-stream` body into the raw JSON payload of each `data:` frame.
37
+ *
38
+ * Stops (returns) on the sentinel line `data: [DONE]` WITHOUT yielding it — callers
39
+ * must not JSON-parse the sentinel (SPEC.md). Blank lines (frame separators) and any
40
+ * non-`data:` SSE fields (`event:`, `id:`, comments) are ignored. Multi-line `data:`
41
+ * frames are concatenated with newlines per the SSE spec.
42
+ */
43
+ export async function* parseSseData(
44
+ body: ReadableStream<Uint8Array>,
45
+ ): AsyncGenerator<string, void, unknown> {
46
+ let dataLines: string[] = [];
47
+ for await (const line of iterLines(body)) {
48
+ if (line === "") {
49
+ // Dispatch the accumulated event.
50
+ if (dataLines.length > 0) {
51
+ yield dataLines.join("\n");
52
+ dataLines = [];
53
+ }
54
+ continue;
55
+ }
56
+ if (line.startsWith(":")) continue; // comment
57
+ if (!line.startsWith("data:")) continue; // ignore event:/id:/retry:
58
+ // Strip "data:" and a single optional leading space.
59
+ let value = line.slice(5);
60
+ if (value.startsWith(" ")) value = value.slice(1);
61
+ if (value === "[DONE]") return;
62
+ dataLines.push(value);
63
+ }
64
+ // Flush a trailing event that wasn't followed by a blank line.
65
+ if (dataLines.length > 0) {
66
+ const payload = dataLines.join("\n");
67
+ if (payload !== "[DONE]") yield payload;
68
+ }
69
+ }
70
+
71
+ /**
72
+ * Parse an `application/x-ndjson` body into one parsed JSON value per non-empty line.
73
+ */
74
+ export async function* parseNdjson(
75
+ body: ReadableStream<Uint8Array>,
76
+ ): AsyncGenerator<unknown, void, unknown> {
77
+ for await (const line of iterLines(body)) {
78
+ const trimmed = line.trim();
79
+ if (trimmed === "") continue;
80
+ yield JSON.parse(trimmed);
81
+ }
82
+ }
package/src/types.ts ADDED
@@ -0,0 +1,384 @@
1
+ // Public, hand-written TypeScript surface mirroring proto/llmleaf/v1/llmleaf.proto.
2
+ //
3
+ // Why interfaces and not the generated protobuf-es classes directly? protobuf-es's
4
+ // own JSON codec emits camelCase keys and STRING enum NAMES (e.g. "ASSISTANT"), which
5
+ // do NOT match the OpenAI/OpenRouter wire (snake_case keys + lowercase tokens like
6
+ // "assistant"), and it has no notion of the "free-form JSON carried as a raw string"
7
+ // or "content is string-or-array" conventions in SPEC.md. So these plain interfaces
8
+ // are the ergonomic public model; src/wire.ts maps them to/from the actual wire JSON.
9
+ //
10
+ // The generated descriptors/enums remain the committed codegen artifact (src/gen) and
11
+ // are the single source of truth these shapes track. The three closed-set enums are
12
+ // re-used straight from the generated file via src/enums.ts.
13
+
14
+ import type { Role, FinishReason, BatchStatus } from "./enums.js";
15
+
16
+ export type { Role, FinishReason, BatchStatus };
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Common
20
+ // ---------------------------------------------------------------------------
21
+
22
+ export interface Usage {
23
+ promptTokens: number;
24
+ completionTokens: number;
25
+ totalTokens: number;
26
+ /** llmleaf addition; absent when the model has no known price. */
27
+ costUsd?: number;
28
+ }
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Chat
32
+ // ---------------------------------------------------------------------------
33
+
34
+ export interface TextPart {
35
+ type: "text";
36
+ text: string;
37
+ }
38
+
39
+ export interface ImageUrlPart {
40
+ type: "image_url";
41
+ imageUrl: {
42
+ url: string;
43
+ /** "auto" | "low" | "high" */
44
+ detail?: string;
45
+ };
46
+ }
47
+
48
+ export type ContentPart = TextPart | ImageUrlPart;
49
+
50
+ /** Wire `content` is either a plain string or an array of content parts. */
51
+ export type MessageContent = string | ContentPart[];
52
+
53
+ export interface FunctionCall {
54
+ name: string;
55
+ /** JSON-encoded arguments string (OpenAI shape). */
56
+ arguments: string;
57
+ }
58
+
59
+ export interface ToolCall {
60
+ id: string;
61
+ /** always "function" today */
62
+ type: string;
63
+ function: FunctionCall;
64
+ }
65
+
66
+ export interface ChatMessage {
67
+ role: Role;
68
+ content?: MessageContent;
69
+ name?: string;
70
+ toolCalls?: ToolCall[];
71
+ /** set when role == TOOL */
72
+ toolCallId?: string;
73
+ }
74
+
75
+ export interface FunctionDef {
76
+ name: string;
77
+ description?: string;
78
+ /** raw JSON Schema object, as a JSON string. */
79
+ parameters?: string;
80
+ }
81
+
82
+ export interface ToolDef {
83
+ /** "function" */
84
+ type: string;
85
+ function: FunctionDef;
86
+ }
87
+
88
+ /** "auto" | "none" | "required", or a named-function object. */
89
+ export type ToolChoice =
90
+ | string
91
+ | {
92
+ type: "function";
93
+ function: { name: string };
94
+ };
95
+
96
+ export interface ResponseFormat {
97
+ /** "text" | "json_object" | "json_schema" */
98
+ type: string;
99
+ /** raw JSON object as a JSON string when type == "json_schema". */
100
+ jsonSchema?: string;
101
+ }
102
+
103
+ export interface ChatRequest {
104
+ model: string;
105
+ messages: ChatMessage[];
106
+ stream?: boolean;
107
+ temperature?: number;
108
+ topP?: number;
109
+ /** legacy name */
110
+ maxTokens?: number;
111
+ /** modern name (takes precedence) */
112
+ maxCompletionTokens?: number;
113
+ stop?: string[];
114
+ n?: number;
115
+ seed?: number;
116
+ frequencyPenalty?: number;
117
+ presencePenalty?: number;
118
+ tools?: ToolDef[];
119
+ toolChoice?: ToolChoice;
120
+ responseFormat?: ResponseFormat;
121
+ /** "low" | "medium" | "high" */
122
+ reasoningEffort?: string;
123
+ /** dialect-specific passthrough, raw JSON object as a JSON string, merged at the top level. */
124
+ extra?: string;
125
+ }
126
+
127
+ export interface Choice {
128
+ index: number;
129
+ message: ChatMessage;
130
+ finishReason?: FinishReason;
131
+ }
132
+
133
+ export interface ChatResponse {
134
+ id: string;
135
+ /** "chat.completion" */
136
+ object: string;
137
+ /** unix seconds */
138
+ created: number;
139
+ model: string;
140
+ choices: Choice[];
141
+ usage?: Usage;
142
+ }
143
+
144
+ // Streaming
145
+
146
+ export interface FunctionCallDelta {
147
+ name?: string;
148
+ arguments?: string;
149
+ }
150
+
151
+ export interface ToolCallDelta {
152
+ index: number;
153
+ id?: string;
154
+ type?: string;
155
+ function?: FunctionCallDelta;
156
+ }
157
+
158
+ export interface Delta {
159
+ /** first chunk only */
160
+ role?: Role;
161
+ /** incremental text */
162
+ content?: string;
163
+ toolCalls?: ToolCallDelta[];
164
+ }
165
+
166
+ export interface ChunkChoice {
167
+ index: number;
168
+ delta: Delta;
169
+ finishReason?: FinishReason;
170
+ }
171
+
172
+ export interface ChatCompletionChunk {
173
+ id: string;
174
+ /** "chat.completion.chunk" */
175
+ object: string;
176
+ created: number;
177
+ model: string;
178
+ choices: ChunkChoice[];
179
+ /** terminal chunk only */
180
+ usage?: Usage;
181
+ }
182
+
183
+ // ---------------------------------------------------------------------------
184
+ // Embeddings
185
+ // ---------------------------------------------------------------------------
186
+
187
+ export interface EmbeddingRequest {
188
+ model: string;
189
+ /** wire accepts string or array of strings. */
190
+ input: string[];
191
+ dimensions?: number;
192
+ /** "float" | "base64" */
193
+ encodingFormat?: string;
194
+ /** raw JSON object passthrough as a JSON string. */
195
+ extra?: string;
196
+ }
197
+
198
+ export interface Embedding {
199
+ /** "embedding" */
200
+ object: string;
201
+ index: number;
202
+ /** Always decoded to floats, even when encoding_format == "base64". */
203
+ embedding: number[];
204
+ }
205
+
206
+ export interface EmbeddingResponse {
207
+ /** "list" */
208
+ object: string;
209
+ data: Embedding[];
210
+ model: string;
211
+ usage?: Usage;
212
+ }
213
+
214
+ // ---------------------------------------------------------------------------
215
+ // Audio
216
+ // ---------------------------------------------------------------------------
217
+
218
+ export interface SpeechRequest {
219
+ model: string;
220
+ input: string;
221
+ voice: string;
222
+ /** mp3|opus|aac|flac|wav|pcm */
223
+ responseFormat?: string;
224
+ speed?: number;
225
+ /** raw JSON object passthrough as a JSON string. */
226
+ extra?: string;
227
+ }
228
+
229
+ /** Raw audio bytes plus the Content-Type the server reported. */
230
+ export interface SpeechResult {
231
+ bytes: Uint8Array;
232
+ contentType: string;
233
+ }
234
+
235
+ export interface Voice {
236
+ /** value to put in SpeechRequest.voice */
237
+ id: string;
238
+ name?: string;
239
+ /** BCP-47 tags */
240
+ languages: string[];
241
+ }
242
+
243
+ export interface VoicesResponse {
244
+ model: string;
245
+ voices: Voice[];
246
+ }
247
+
248
+ export interface TranscriptionRequest {
249
+ model: string;
250
+ /** ISO-639-1 hint */
251
+ language?: string;
252
+ /** decoding bias */
253
+ prompt?: string;
254
+ /** json|text|verbose_json|srt|vtt */
255
+ responseFormat?: string;
256
+ temperature?: number;
257
+ }
258
+
259
+ export interface TranscriptionResponse {
260
+ text: string;
261
+ /** "transcribe" (verbose_json) */
262
+ task?: string;
263
+ language?: string;
264
+ duration?: number;
265
+ usage?: Usage;
266
+ }
267
+
268
+ // ---------------------------------------------------------------------------
269
+ // Model catalog
270
+ // ---------------------------------------------------------------------------
271
+
272
+ export interface Architecture {
273
+ inputModalities: string[];
274
+ outputModalities: string[];
275
+ /** "text->text" | "text->audio" | ... */
276
+ modality?: string;
277
+ tokenizer: string;
278
+ instructType?: string;
279
+ }
280
+
281
+ export interface Pricing {
282
+ /** USD per token, decimal string */
283
+ prompt: string;
284
+ completion: string;
285
+ }
286
+
287
+ export interface TopProvider {
288
+ contextLength?: number;
289
+ maxCompletionTokens?: number;
290
+ isModerated: boolean;
291
+ /** llmleaf extension */
292
+ maxThinkingTokens?: number;
293
+ }
294
+
295
+ /** Admin-only fallback-chain entry (present only with a valid admin token). */
296
+ export interface ModelEndpoint {
297
+ provider: string;
298
+ model: string;
299
+ down: boolean;
300
+ /** "route" | "prefix" */
301
+ source: string;
302
+ }
303
+
304
+ export interface ModelEntry {
305
+ id: string;
306
+ canonicalSlug: string;
307
+ name: string;
308
+ created: number;
309
+ description: string;
310
+ contextLength?: number;
311
+ architecture?: Architecture;
312
+ pricing?: Pricing;
313
+ topProvider?: TopProvider;
314
+ supportedParameters: string[];
315
+ unsupportedParameters: string[];
316
+ /** raw JSON object as a JSON string. */
317
+ defaultParameters?: string;
318
+ /** admin-only */
319
+ endpoints: ModelEndpoint[];
320
+ }
321
+
322
+ export interface ListModelsResponse {
323
+ data: ModelEntry[];
324
+ }
325
+
326
+ /** Filter for {@link "./client".LlmleafClient.listModels}. */
327
+ export type ModelType = "all" | "llm" | "tts" | "stt" | "embedding";
328
+
329
+ export interface ListModelsOptions {
330
+ type?: ModelType;
331
+ /** substring search */
332
+ search?: string;
333
+ /** when true, send the admin token so per-model `endpoints` are included. */
334
+ admin?: boolean;
335
+ }
336
+
337
+ // ---------------------------------------------------------------------------
338
+ // Batches
339
+ // ---------------------------------------------------------------------------
340
+
341
+ export interface BatchRequestItem {
342
+ customId: string;
343
+ body: ChatRequest;
344
+ }
345
+
346
+ export interface BatchCreateRequest {
347
+ requests: BatchRequestItem[];
348
+ }
349
+
350
+ export interface BatchCounts {
351
+ total: number;
352
+ processing: number;
353
+ succeeded: number;
354
+ errored: number;
355
+ canceled: number;
356
+ expired: number;
357
+ }
358
+
359
+ export interface BatchHandle {
360
+ id: string;
361
+ status: BatchStatus;
362
+ counts?: BatchCounts;
363
+ createdAt?: number;
364
+ expiresAt?: number;
365
+ endedAt?: number;
366
+ /** e.g. "/v1/chat/completions" */
367
+ endpoint?: string;
368
+ }
369
+
370
+ export interface BatchResponse {
371
+ statusCode: number;
372
+ body: ChatResponse;
373
+ }
374
+
375
+ export interface BatchError {
376
+ code: string;
377
+ message: string;
378
+ }
379
+
380
+ export interface BatchResultLine {
381
+ customId: string;
382
+ response?: BatchResponse;
383
+ error?: BatchError;
384
+ }