@ebowwa/ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +106 -0
- package/dist/client.d.ts +179 -0
- package/dist/client.js +492 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +18 -0
- package/dist/prompts.d.ts +222 -0
- package/dist/prompts.js +462 -0
- package/dist/schemas/ai.d.ts +1335 -0
- package/dist/schemas/ai.js +416 -0
- package/dist/schemas/glm.d.ts +16 -0
- package/dist/schemas/glm.js +25 -0
- package/dist/schemas/index.d.ts +5 -0
- package/dist/schemas/index.js +5 -0
- package/dist/types.d.ts +13 -0
- package/dist/types.js +13 -0
- package/package.json +78 -0
package/dist/client.js
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GLM-4.7 AI Client using Z.AI OpenAI-compatible endpoint
|
|
3
|
+
*
|
|
4
|
+
* Based on documentation from: https://api.z.ai/api/coding/paas/v4
|
|
5
|
+
*
|
|
6
|
+
* Usage with any OpenAI-compatible client library:
|
|
7
|
+
* - Base URL: https://api.z.ai/api/coding/paas/v4
|
|
8
|
+
* - Models: GLM-4.7, GLM-4.6, GLM-4.5, GLM-4.5-air
|
|
9
|
+
*
|
|
10
|
+
* === LAYER 1: OpenAI Protocol Implementation ===
|
|
11
|
+
*
|
|
12
|
+
* This client implements the OpenAI-compatible protocol with:
|
|
13
|
+
* - 30s default timeout (configurable)
|
|
14
|
+
* - 3 retries with exponential backoff (1s → 2s → 4s, max 10s)
|
|
15
|
+
* - Specific error types: GLMTimeoutError, GLMAuthError, GLMRateLimitError, GLMNetworkError
|
|
16
|
+
* - AbortController for cancellation
|
|
17
|
+
* - Runtime validation with Zod schemas
|
|
18
|
+
* - SSE streaming support (NOW IMPLEMENTED)
|
|
19
|
+
*
|
|
20
|
+
* LAYER 2 (CLI Wrapper) concerns:
|
|
21
|
+
* - MCP plugin system → See docs/CLI-ARCHITECTURE.md
|
|
22
|
+
* - Session persistence → See docs/CLI-ARCHITECTURE.md
|
|
23
|
+
* - TUI/Ink rendering → See docs/CLI-ARCHITECTURE.md
|
|
24
|
+
*
|
|
25
|
+
* === STREAMING (Layer 1 - OpenAI Protocol) ===
|
|
26
|
+
*
|
|
27
|
+
* Streaming is now fully implemented with SSE (Server-Sent Events) parsing:
|
|
28
|
+
* - streamChatCompletion() - full chat streaming with StreamChunk objects
|
|
29
|
+
* - streamGenerate() - simple prompt streaming
|
|
30
|
+
* - streamGenerateWithSystem() - streaming with system prompt
|
|
31
|
+
*
|
|
32
|
+
* Each method returns an AsyncGenerator that yields StreamChunk objects:
|
|
33
|
+
* - type: "text" - contains incremental content in `content` field
|
|
34
|
+
* - type: "done" - stream complete
|
|
35
|
+
* - type: "error" - error occurred (check `error` field)
|
|
36
|
+
*
|
|
37
|
+
* Final chunk includes usage information (prompt_tokens, completion_tokens, total_tokens)
|
|
38
|
+
*/
|
|
39
|
+
import { z } from "zod";
|
|
40
|
+
import { ChatCompletionOptionsSchema, ChatMessageSchema, TokenUsageSchema, LatencyInfoSchema, ChatCompletionResponseSchema, RawChatCompletionResponseSchema, } from "./schemas/ai.js";
|
|
41
|
+
import { parseSSEStream } from "./schemas/ai.js";
|
|
42
|
+
const GLM_API_BASE = "https://api.z.ai/api/coding/paas/v4";
|
|
43
|
+
const DEFAULT_TIMEOUT = 30000; // 30 seconds
|
|
44
|
+
const DEFAULT_MAX_RETRIES = 3;
|
|
45
|
+
/**
|
|
46
|
+
* Custom error types for better error handling
|
|
47
|
+
*/
|
|
48
|
+
export class GLMTimeoutError extends Error {
|
|
49
|
+
constructor(message) {
|
|
50
|
+
super(message);
|
|
51
|
+
this.name = "GLMTimeoutError";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export class GLMAuthError extends Error {
|
|
55
|
+
constructor(message) {
|
|
56
|
+
super(message);
|
|
57
|
+
this.name = "GLMAuthError";
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
export class GLMRateLimitError extends Error {
|
|
61
|
+
constructor(message) {
|
|
62
|
+
super(message);
|
|
63
|
+
this.name = "GLMRateLimitError";
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
export class GLMNetworkError extends Error {
|
|
67
|
+
cause;
|
|
68
|
+
constructor(message, cause) {
|
|
69
|
+
super(message);
|
|
70
|
+
this.cause = cause;
|
|
71
|
+
this.name = "GLMNetworkError";
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Resolve GLM API key from environment or explicit parameter
|
|
76
|
+
*/
|
|
77
|
+
function resolveApiKey(apiKey) {
|
|
78
|
+
if (apiKey)
|
|
79
|
+
return apiKey;
|
|
80
|
+
if (process.env.Z_AI_API_KEY)
|
|
81
|
+
return process.env.Z_AI_API_KEY;
|
|
82
|
+
if (process.env.ZAI_API_KEY)
|
|
83
|
+
return process.env.ZAI_API_KEY;
|
|
84
|
+
if (process.env.GLM_API_KEY)
|
|
85
|
+
return process.env.GLM_API_KEY;
|
|
86
|
+
throw new Error("GLM API key not found. Set Z_AI_API_KEY, ZAI_API_KEY, or GLM_API_KEY environment variable.");
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Format milliseconds to human-readable string
|
|
90
|
+
*/
|
|
91
|
+
function formatLatency(ms) {
|
|
92
|
+
if (ms < 1000)
|
|
93
|
+
return `${ms.toFixed(0)}ms`;
|
|
94
|
+
if (ms < 60000)
|
|
95
|
+
return `${(ms / 1000).toFixed(2)}s`;
|
|
96
|
+
return `${(ms / 60000).toFixed(2)}m`;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Sleep for a specified duration (for retry backoff)
|
|
100
|
+
*/
|
|
101
|
+
function sleep(ms) {
|
|
102
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Calculate exponential backoff delay
|
|
106
|
+
*/
|
|
107
|
+
function calculateBackoff(retryCount, baseDelay = 1000) {
|
|
108
|
+
return Math.min(baseDelay * Math.pow(2, retryCount), 10000); // Max 10 seconds
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Create a timeout promise that rejects after specified duration
|
|
112
|
+
*/
|
|
113
|
+
function createTimeoutPromise(ms) {
|
|
114
|
+
return new Promise((_, reject) => {
|
|
115
|
+
setTimeout(() => reject(new GLMTimeoutError(`Request timeout after ${ms}ms`)), ms);
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* GLM-4.7 Client for OpenAI-compatible API
|
|
120
|
+
* Includes timeout handling, retry logic, and detailed error reporting
|
|
121
|
+
*/
|
|
122
|
+
export class GLMClient {
|
|
123
|
+
apiKey;
|
|
124
|
+
baseURL;
|
|
125
|
+
fetchImpl;
|
|
126
|
+
constructor(apiKey, baseURL, fetchImpl) {
|
|
127
|
+
this.apiKey = resolveApiKey(apiKey);
|
|
128
|
+
this.baseURL = baseURL || GLM_API_BASE;
|
|
129
|
+
// Use injected fetch for testing, otherwise use imported fetch
|
|
130
|
+
this.fetchImpl = fetchImpl || fetch;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Make a request to the GLM API with timeout and retry logic (for non-streaming)
|
|
134
|
+
*/
|
|
135
|
+
async requestWithRetry(endpoint, options = {}, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, retryCount = 0) {
|
|
136
|
+
const startTime = performance.now();
|
|
137
|
+
try {
|
|
138
|
+
// Create abort controller for timeout
|
|
139
|
+
const controller = new AbortController();
|
|
140
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
141
|
+
// Race between fetch and timeout
|
|
142
|
+
const response = (await Promise.race([
|
|
143
|
+
this.fetchImpl(`${this.baseURL}${endpoint}`, {
|
|
144
|
+
...options,
|
|
145
|
+
signal: controller.signal,
|
|
146
|
+
headers: {
|
|
147
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
148
|
+
"Content-Type": "application/json",
|
|
149
|
+
...options.headers,
|
|
150
|
+
},
|
|
151
|
+
}),
|
|
152
|
+
createTimeoutPromise(timeout),
|
|
153
|
+
]));
|
|
154
|
+
clearTimeout(timeoutId);
|
|
155
|
+
// Handle specific HTTP status codes
|
|
156
|
+
if (response.status === 401) {
|
|
157
|
+
throw new GLMAuthError("Invalid API key or unauthorized access");
|
|
158
|
+
}
|
|
159
|
+
if (response.status === 429) {
|
|
160
|
+
throw new GLMRateLimitError("Rate limit exceeded, please retry later");
|
|
161
|
+
}
|
|
162
|
+
if (!response.ok) {
|
|
163
|
+
const errorText = await response.text();
|
|
164
|
+
throw new Error(`GLM API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
165
|
+
}
|
|
166
|
+
const data = await response.json();
|
|
167
|
+
const latencyMs = performance.now() - startTime;
|
|
168
|
+
return { data, latencyMs };
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
const latencyMs = performance.now() - startTime;
|
|
172
|
+
// Don't retry on auth errors or if we've exhausted retries
|
|
173
|
+
if (error instanceof GLMAuthError || retryCount >= maxRetries) {
|
|
174
|
+
throw error;
|
|
175
|
+
}
|
|
176
|
+
// Retry on timeout, network errors, or rate limit (with backoff)
|
|
177
|
+
if (error instanceof GLMTimeoutError ||
|
|
178
|
+
error instanceof GLMRateLimitError ||
|
|
179
|
+
(error instanceof TypeError && error.message.includes("fetch"))) {
|
|
180
|
+
const backoffDelay = calculateBackoff(retryCount);
|
|
181
|
+
console.warn(`GLM API request failed (attempt ${retryCount + 1}/${maxRetries + 1}), ` +
|
|
182
|
+
`retrying in ${backoffDelay}ms...`);
|
|
183
|
+
await sleep(backoffDelay);
|
|
184
|
+
return this.requestWithRetry(endpoint, options, timeout, maxRetries, retryCount + 1);
|
|
185
|
+
}
|
|
186
|
+
// Wrap other errors in GLMNetworkError
|
|
187
|
+
throw new GLMNetworkError(`Network error: ${error.message}`, error);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Make a streaming request to the GLM API with timeout and retry logic
|
|
192
|
+
* Returns the raw Response object for SSE parsing
|
|
193
|
+
*/
|
|
194
|
+
async requestStream(endpoint, options = {}, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, retryCount = 0) {
|
|
195
|
+
try {
|
|
196
|
+
// Create abort controller for timeout
|
|
197
|
+
const controller = new AbortController();
|
|
198
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
199
|
+
// Race between fetch and timeout
|
|
200
|
+
const response = (await Promise.race([
|
|
201
|
+
this.fetchImpl(`${this.baseURL}${endpoint}`, {
|
|
202
|
+
...options,
|
|
203
|
+
signal: controller.signal,
|
|
204
|
+
headers: {
|
|
205
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
206
|
+
"Content-Type": "application/json",
|
|
207
|
+
...options.headers,
|
|
208
|
+
},
|
|
209
|
+
}),
|
|
210
|
+
createTimeoutPromise(timeout),
|
|
211
|
+
]));
|
|
212
|
+
clearTimeout(timeoutId);
|
|
213
|
+
// Handle specific HTTP status codes
|
|
214
|
+
if (response.status === 401) {
|
|
215
|
+
throw new GLMAuthError("Invalid API key or unauthorized access");
|
|
216
|
+
}
|
|
217
|
+
if (response.status === 429) {
|
|
218
|
+
throw new GLMRateLimitError("Rate limit exceeded, please retry later");
|
|
219
|
+
}
|
|
220
|
+
if (!response.ok) {
|
|
221
|
+
const errorText = await response.text();
|
|
222
|
+
throw new Error(`GLM API error: ${response.status} ${response.statusText} - ${errorText}`);
|
|
223
|
+
}
|
|
224
|
+
return response;
|
|
225
|
+
}
|
|
226
|
+
catch (error) {
|
|
227
|
+
// Don't retry on auth errors or if we've exhausted retries
|
|
228
|
+
if (error instanceof GLMAuthError || retryCount >= maxRetries) {
|
|
229
|
+
throw error;
|
|
230
|
+
}
|
|
231
|
+
// Retry on timeout, network errors, or rate limit (with backoff)
|
|
232
|
+
if (error instanceof GLMTimeoutError ||
|
|
233
|
+
error instanceof GLMRateLimitError ||
|
|
234
|
+
(error instanceof TypeError && error.message.includes("fetch"))) {
|
|
235
|
+
const backoffDelay = calculateBackoff(retryCount);
|
|
236
|
+
console.warn(`GLM API streaming request failed (attempt ${retryCount + 1}/${maxRetries + 1}), ` +
|
|
237
|
+
`retrying in ${backoffDelay}ms...`);
|
|
238
|
+
await sleep(backoffDelay);
|
|
239
|
+
return this.requestStream(endpoint, options, timeout, maxRetries, retryCount + 1);
|
|
240
|
+
}
|
|
241
|
+
// Wrap other errors in GLMNetworkError
|
|
242
|
+
throw new GLMNetworkError(`Network error: ${error.message}`, error);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Public request method (backward compatibility)
|
|
247
|
+
*/
|
|
248
|
+
async request(endpoint, options = {}) {
|
|
249
|
+
return this.requestWithRetry(endpoint, options, DEFAULT_TIMEOUT);
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Validate and parse chat messages using Zod schema
|
|
253
|
+
*/
|
|
254
|
+
validateMessages(messages) {
|
|
255
|
+
const result = z.array(ChatMessageSchema).min(1).safeParse(messages);
|
|
256
|
+
if (!result.success) {
|
|
257
|
+
throw new Error(`Invalid chat messages: ${result.error.message}`);
|
|
258
|
+
}
|
|
259
|
+
return result.data;
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Validate and parse chat completion options using Zod schema
|
|
263
|
+
*/
|
|
264
|
+
parseOptions(options) {
|
|
265
|
+
const result = ChatCompletionOptionsSchema.safeParse(options);
|
|
266
|
+
if (!result.success) {
|
|
267
|
+
throw new Error(`Invalid chat completion options: ${result.error.message}`);
|
|
268
|
+
}
|
|
269
|
+
return result.data;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Convert raw API response to internal format with Zod validation
|
|
273
|
+
*/
|
|
274
|
+
convertResponse(raw, latencyMs) {
|
|
275
|
+
// Validate raw API response
|
|
276
|
+
const rawResult = RawChatCompletionResponseSchema.safeParse(raw);
|
|
277
|
+
if (!rawResult.success) {
|
|
278
|
+
throw new Error(`Invalid API response: ${rawResult.error.message}`);
|
|
279
|
+
}
|
|
280
|
+
const rawResponse = rawResult.data;
|
|
281
|
+
// Convert snake_case usage to camelCase
|
|
282
|
+
const usage = rawResponse.usage
|
|
283
|
+
? {
|
|
284
|
+
promptTokens: rawResponse.usage.prompt_tokens,
|
|
285
|
+
completionTokens: rawResponse.usage.completion_tokens,
|
|
286
|
+
totalTokens: rawResponse.usage.total_tokens,
|
|
287
|
+
}
|
|
288
|
+
: undefined;
|
|
289
|
+
// Validate usage if present
|
|
290
|
+
if (usage) {
|
|
291
|
+
const usageResult = TokenUsageSchema.safeParse(usage);
|
|
292
|
+
if (!usageResult.success) {
|
|
293
|
+
throw new Error(`Invalid token usage: ${usageResult.error.message}`);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// Create latency info
|
|
297
|
+
const latency = {
|
|
298
|
+
totalMs: latencyMs,
|
|
299
|
+
formatted: formatLatency(latencyMs),
|
|
300
|
+
};
|
|
301
|
+
const latencyResult = LatencyInfoSchema.safeParse(latency);
|
|
302
|
+
if (!latencyResult.success) {
|
|
303
|
+
throw new Error(`Invalid latency info: ${latencyResult.error.message}`);
|
|
304
|
+
}
|
|
305
|
+
// Build final response
|
|
306
|
+
const response = {
|
|
307
|
+
id: rawResponse.id,
|
|
308
|
+
object: rawResponse.object,
|
|
309
|
+
created: rawResponse.created,
|
|
310
|
+
model: rawResponse.model,
|
|
311
|
+
choices: rawResponse.choices,
|
|
312
|
+
usage,
|
|
313
|
+
latency: latencyResult.data,
|
|
314
|
+
};
|
|
315
|
+
// Validate final response
|
|
316
|
+
const finalResult = ChatCompletionResponseSchema.safeParse(response);
|
|
317
|
+
if (!finalResult.success) {
|
|
318
|
+
throw new Error(`Invalid response format: ${finalResult.error.message}`);
|
|
319
|
+
}
|
|
320
|
+
return finalResult.data;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Create a chat completion (non-streaming)
|
|
324
|
+
* Maps API response (snake_case) to internal types (camelCase)
|
|
325
|
+
* Includes latency tracking and configurable timeout
|
|
326
|
+
*/
|
|
327
|
+
async chatCompletion(messages, options = {}) {
|
|
328
|
+
// Validate inputs using Zod schemas
|
|
329
|
+
const validatedMessages = this.validateMessages(messages);
|
|
330
|
+
const validatedOptions = this.parseOptions(options);
|
|
331
|
+
const { model = "GLM-4.7", temperature = 0.7, maxTokens, stream = false, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, } = validatedOptions;
|
|
332
|
+
const body = {
|
|
333
|
+
model,
|
|
334
|
+
messages: validatedMessages,
|
|
335
|
+
temperature,
|
|
336
|
+
};
|
|
337
|
+
if (maxTokens !== undefined) {
|
|
338
|
+
body.max_tokens = maxTokens;
|
|
339
|
+
}
|
|
340
|
+
if (stream) {
|
|
341
|
+
body.stream = true;
|
|
342
|
+
}
|
|
343
|
+
// Raw API response has snake_case usage fields
|
|
344
|
+
const { data: raw, latencyMs } = await this.requestWithRetry("/chat/completions", {
|
|
345
|
+
method: "POST",
|
|
346
|
+
body: JSON.stringify(body),
|
|
347
|
+
}, timeout, maxRetries);
|
|
348
|
+
// Convert and validate response using Zod
|
|
349
|
+
return this.convertResponse(raw, latencyMs);
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Simple generate method for quick prompts (non-streaming)
|
|
353
|
+
*/
|
|
354
|
+
async generate(prompt, options = {}) {
|
|
355
|
+
const response = await this.chatCompletion([{ role: "user", content: prompt }], options);
|
|
356
|
+
return response.choices[0]?.message?.content || "";
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Generate with system prompt (non-streaming)
|
|
360
|
+
*/
|
|
361
|
+
async generateWithSystem(systemPrompt, userPrompt, options = {}) {
|
|
362
|
+
const response = await this.chatCompletion([
|
|
363
|
+
{ role: "system", content: systemPrompt },
|
|
364
|
+
{ role: "user", content: userPrompt },
|
|
365
|
+
], options);
|
|
366
|
+
return response.choices[0]?.message?.content || "";
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Stream a chat completion
|
|
370
|
+
*
|
|
371
|
+
* Returns an async generator that yields StreamChunk objects as they arrive.
|
|
372
|
+
* Each chunk contains incremental text content, and the final chunk includes
|
|
373
|
+
* usage information.
|
|
374
|
+
*
|
|
375
|
+
* @param messages - The chat messages to send
|
|
376
|
+
* @param options - Chat completion options (timeout, maxRetries, etc.)
|
|
377
|
+
* @returns Async generator of StreamChunk objects
|
|
378
|
+
*
|
|
379
|
+
* @example
|
|
380
|
+
* ```ts
|
|
381
|
+
* const chunks = [];
|
|
382
|
+
* for await (const chunk of client.streamChatCompletion(messages)) {
|
|
383
|
+
* if (chunk.type === 'text') {
|
|
384
|
+
* chunks.push(chunk.content);
|
|
385
|
+
* process.stdout.write(chunk.content);
|
|
386
|
+
* } else if (chunk.type === 'done') {
|
|
387
|
+
* console.log('\nStream complete!');
|
|
388
|
+
* }
|
|
389
|
+
* }
|
|
390
|
+
* ```
|
|
391
|
+
*/
|
|
392
|
+
async *streamChatCompletion(messages, options = {}) {
|
|
393
|
+
// Validate inputs using Zod schemas
|
|
394
|
+
const validatedMessages = this.validateMessages(messages);
|
|
395
|
+
const validatedOptions = this.parseOptions(options);
|
|
396
|
+
const { model = "GLM-4.7", temperature = 0.7, maxTokens, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, } = validatedOptions;
|
|
397
|
+
const body = {
|
|
398
|
+
model,
|
|
399
|
+
messages: validatedMessages,
|
|
400
|
+
temperature,
|
|
401
|
+
stream: true, // Enable streaming
|
|
402
|
+
};
|
|
403
|
+
if (maxTokens !== undefined) {
|
|
404
|
+
body.max_tokens = maxTokens;
|
|
405
|
+
}
|
|
406
|
+
try {
|
|
407
|
+
// Make the streaming request
|
|
408
|
+
const response = await this.requestStream("/chat/completions", {
|
|
409
|
+
method: "POST",
|
|
410
|
+
body: JSON.stringify(body),
|
|
411
|
+
}, timeout, maxRetries);
|
|
412
|
+
// Parse the SSE stream and yield chunks
|
|
413
|
+
for await (const chunk of parseSSEStream(response)) {
|
|
414
|
+
yield chunk;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
catch (error) {
|
|
418
|
+
yield {
|
|
419
|
+
type: "error",
|
|
420
|
+
error: error instanceof Error ? error.message : String(error),
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Stream a simple prompt
|
|
426
|
+
*
|
|
427
|
+
* Returns an async generator that yields chunks of text as they arrive.
|
|
428
|
+
* Simplified interface for single-prompt streaming.
|
|
429
|
+
*
|
|
430
|
+
* @param prompt - The prompt to send
|
|
431
|
+
* @param options - Chat completion options (timeout, maxRetries, etc.)
|
|
432
|
+
* @returns Async generator of StreamChunk objects
|
|
433
|
+
*
|
|
434
|
+
* @example
|
|
435
|
+
* ```ts
|
|
436
|
+
* for await (const chunk of client.streamGenerate("Tell me a story")) {
|
|
437
|
+
* if (chunk.type === 'text') {
|
|
438
|
+
* process.stdout.write(chunk.content);
|
|
439
|
+
* }
|
|
440
|
+
* }
|
|
441
|
+
* ```
|
|
442
|
+
*/
|
|
443
|
+
async *streamGenerate(prompt, options = {}) {
|
|
444
|
+
yield* this.streamChatCompletion([{ role: "user", content: prompt }], options);
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Stream with system prompt
|
|
448
|
+
*
|
|
449
|
+
* Returns an async generator that yields chunks of text as they arrive.
|
|
450
|
+
* Includes both system and user prompts.
|
|
451
|
+
*
|
|
452
|
+
* @param systemPrompt - The system prompt
|
|
453
|
+
* @param userPrompt - The user prompt
|
|
454
|
+
* @param options - Chat completion options (timeout, maxRetries, etc.)
|
|
455
|
+
* @returns Async generator of StreamChunk objects
|
|
456
|
+
*
|
|
457
|
+
* @example
|
|
458
|
+
* ```ts
|
|
459
|
+
* for await (const chunk of client.streamGenerateWithSystem(
|
|
460
|
+
* "You are a helpful assistant",
|
|
461
|
+
* "Explain quantum computing"
|
|
462
|
+
* )) {
|
|
463
|
+
* if (chunk.type === 'text') {
|
|
464
|
+
* process.stdout.write(chunk.content);
|
|
465
|
+
* }
|
|
466
|
+
* }
|
|
467
|
+
* ```
|
|
468
|
+
*/
|
|
469
|
+
async *streamGenerateWithSystem(systemPrompt, userPrompt, options = {}) {
|
|
470
|
+
yield* this.streamChatCompletion([
|
|
471
|
+
{ role: "system", content: systemPrompt },
|
|
472
|
+
{ role: "user", content: userPrompt },
|
|
473
|
+
], options);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Create a singleton instance for use in the app
|
|
478
|
+
*/
|
|
479
|
+
let glmClient = null;
|
|
480
|
+
export function getGLMClient() {
|
|
481
|
+
if (glmClient)
|
|
482
|
+
return glmClient;
|
|
483
|
+
try {
|
|
484
|
+
glmClient = new GLMClient();
|
|
485
|
+
console.log("✓ GLM-4.7 API client initialized");
|
|
486
|
+
return glmClient;
|
|
487
|
+
}
|
|
488
|
+
catch (error) {
|
|
489
|
+
console.warn("GLM API key not configured - AI features unavailable");
|
|
490
|
+
return null;
|
|
491
|
+
}
|
|
492
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI module exports
|
|
3
|
+
*
|
|
4
|
+
* Exports:
|
|
5
|
+
* - client: GLMClient class and related error types
|
|
6
|
+
* - types: Type definitions (now re-exported from schema)
|
|
7
|
+
* - prompts: Prompt building utilities
|
|
8
|
+
* - schema: Zod schemas and validation helpers
|
|
9
|
+
*
|
|
10
|
+
* Streaming support:
|
|
11
|
+
* - streamChatCompletion: Stream chat completions with SSE
|
|
12
|
+
* - streamGenerate: Stream simple prompts
|
|
13
|
+
* - streamGenerateWithSystem: Stream with system prompt
|
|
14
|
+
* - StreamChunk: Type for stream chunks
|
|
15
|
+
*/
|
|
16
|
+
export * from "./client.js";
|
|
17
|
+
export * from "./types.js";
|
|
18
|
+
export * from "./prompts.js";
|
|
19
|
+
export type * from "./schemas/ai.js";
|
|
20
|
+
export type { StreamChunk, StreamChunkType, StreamDelta, RawStreamChunk, } from "./schemas/ai.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI module exports
|
|
3
|
+
*
|
|
4
|
+
* Exports:
|
|
5
|
+
* - client: GLMClient class and related error types
|
|
6
|
+
* - types: Type definitions (now re-exported from schema)
|
|
7
|
+
* - prompts: Prompt building utilities
|
|
8
|
+
* - schema: Zod schemas and validation helpers
|
|
9
|
+
*
|
|
10
|
+
* Streaming support:
|
|
11
|
+
* - streamChatCompletion: Stream chat completions with SSE
|
|
12
|
+
* - streamGenerate: Stream simple prompts
|
|
13
|
+
* - streamGenerateWithSystem: Stream with system prompt
|
|
14
|
+
* - StreamChunk: Type for stream chunks
|
|
15
|
+
*/
|
|
16
|
+
export * from "./client.js";
|
|
17
|
+
export * from "./types.js";
|
|
18
|
+
export * from "./prompts.js";
|