@ebowwa/ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js ADDED
@@ -0,0 +1,492 @@
1
+ /**
2
+ * GLM-4.7 AI Client using Z.AI OpenAI-compatible endpoint
3
+ *
4
+ * Based on documentation from: https://api.z.ai/api/coding/paas/v4
5
+ *
6
+ * Usage with any OpenAI-compatible client library:
7
+ * - Base URL: https://api.z.ai/api/coding/paas/v4
8
+ * - Models: GLM-4.7, GLM-4.6, GLM-4.5, GLM-4.5-air
9
+ *
10
+ * === LAYER 1: OpenAI Protocol Implementation ===
11
+ *
12
+ * This client implements the OpenAI-compatible protocol with:
13
+ * - 30s default timeout (configurable)
14
+ * - 3 retries with exponential backoff (1s → 2s → 4s, max 10s)
15
+ * - Specific error types: GLMTimeoutError, GLMAuthError, GLMRateLimitError, GLMNetworkError
16
+ * - AbortController for cancellation
17
+ * - Runtime validation with Zod schemas
18
+ * - SSE streaming support (NOW IMPLEMENTED)
19
+ *
20
+ * LAYER 2 (CLI Wrapper) concerns:
21
+ * - MCP plugin system → See docs/CLI-ARCHITECTURE.md
22
+ * - Session persistence → See docs/CLI-ARCHITECTURE.md
23
+ * - TUI/Ink rendering → See docs/CLI-ARCHITECTURE.md
24
+ *
25
+ * === STREAMING (Layer 1 - OpenAI Protocol) ===
26
+ *
27
+ * Streaming is now fully implemented with SSE (Server-Sent Events) parsing:
28
+ * - streamChatCompletion() - full chat streaming with StreamChunk objects
29
+ * - streamGenerate() - simple prompt streaming
30
+ * - streamGenerateWithSystem() - streaming with system prompt
31
+ *
32
+ * Each method returns an AsyncGenerator that yields StreamChunk objects:
33
+ * - type: "text" - contains incremental content in `content` field
34
+ * - type: "done" - stream complete
35
+ * - type: "error" - error occurred (check `error` field)
36
+ *
37
+ * Final chunk includes usage information (prompt_tokens, completion_tokens, total_tokens)
38
+ */
39
+ import { z } from "zod";
40
+ import { ChatCompletionOptionsSchema, ChatMessageSchema, TokenUsageSchema, LatencyInfoSchema, ChatCompletionResponseSchema, RawChatCompletionResponseSchema, } from "./schemas/ai.js";
41
+ import { parseSSEStream } from "./schemas/ai.js";
42
+ const GLM_API_BASE = "https://api.z.ai/api/coding/paas/v4";
43
+ const DEFAULT_TIMEOUT = 30000; // 30 seconds
44
+ const DEFAULT_MAX_RETRIES = 3;
45
+ /**
46
+ * Custom error types for better error handling
47
+ */
48
+ export class GLMTimeoutError extends Error {
49
+ constructor(message) {
50
+ super(message);
51
+ this.name = "GLMTimeoutError";
52
+ }
53
+ }
54
+ export class GLMAuthError extends Error {
55
+ constructor(message) {
56
+ super(message);
57
+ this.name = "GLMAuthError";
58
+ }
59
+ }
60
+ export class GLMRateLimitError extends Error {
61
+ constructor(message) {
62
+ super(message);
63
+ this.name = "GLMRateLimitError";
64
+ }
65
+ }
66
+ export class GLMNetworkError extends Error {
67
+ cause;
68
+ constructor(message, cause) {
69
+ super(message);
70
+ this.cause = cause;
71
+ this.name = "GLMNetworkError";
72
+ }
73
+ }
74
+ /**
75
+ * Resolve GLM API key from environment or explicit parameter
76
+ */
77
+ function resolveApiKey(apiKey) {
78
+ if (apiKey)
79
+ return apiKey;
80
+ if (process.env.Z_AI_API_KEY)
81
+ return process.env.Z_AI_API_KEY;
82
+ if (process.env.ZAI_API_KEY)
83
+ return process.env.ZAI_API_KEY;
84
+ if (process.env.GLM_API_KEY)
85
+ return process.env.GLM_API_KEY;
86
+ throw new Error("GLM API key not found. Set Z_AI_API_KEY, ZAI_API_KEY, or GLM_API_KEY environment variable.");
87
+ }
88
+ /**
89
+ * Format milliseconds to human-readable string
90
+ */
91
+ function formatLatency(ms) {
92
+ if (ms < 1000)
93
+ return `${ms.toFixed(0)}ms`;
94
+ if (ms < 60000)
95
+ return `${(ms / 1000).toFixed(2)}s`;
96
+ return `${(ms / 60000).toFixed(2)}m`;
97
+ }
98
+ /**
99
+ * Sleep for a specified duration (for retry backoff)
100
+ */
101
+ function sleep(ms) {
102
+ return new Promise((resolve) => setTimeout(resolve, ms));
103
+ }
104
+ /**
105
+ * Calculate exponential backoff delay
106
+ */
107
+ function calculateBackoff(retryCount, baseDelay = 1000) {
108
+ return Math.min(baseDelay * Math.pow(2, retryCount), 10000); // Max 10 seconds
109
+ }
110
+ /**
111
+ * Create a timeout promise that rejects after specified duration
112
+ */
113
+ function createTimeoutPromise(ms) {
114
+ return new Promise((_, reject) => {
115
+ setTimeout(() => reject(new GLMTimeoutError(`Request timeout after ${ms}ms`)), ms);
116
+ });
117
+ }
118
+ /**
119
+ * GLM-4.7 Client for OpenAI-compatible API
120
+ * Includes timeout handling, retry logic, and detailed error reporting
121
+ */
122
+ export class GLMClient {
123
+ apiKey;
124
+ baseURL;
125
+ fetchImpl;
126
+ constructor(apiKey, baseURL, fetchImpl) {
127
+ this.apiKey = resolveApiKey(apiKey);
128
+ this.baseURL = baseURL || GLM_API_BASE;
129
+ // Use injected fetch for testing, otherwise use imported fetch
130
+ this.fetchImpl = fetchImpl || fetch;
131
+ }
132
+ /**
133
+ * Make a request to the GLM API with timeout and retry logic (for non-streaming)
134
+ */
135
+ async requestWithRetry(endpoint, options = {}, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, retryCount = 0) {
136
+ const startTime = performance.now();
137
+ try {
138
+ // Create abort controller for timeout
139
+ const controller = new AbortController();
140
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
141
+ // Race between fetch and timeout
142
+ const response = (await Promise.race([
143
+ this.fetchImpl(`${this.baseURL}${endpoint}`, {
144
+ ...options,
145
+ signal: controller.signal,
146
+ headers: {
147
+ Authorization: `Bearer ${this.apiKey}`,
148
+ "Content-Type": "application/json",
149
+ ...options.headers,
150
+ },
151
+ }),
152
+ createTimeoutPromise(timeout),
153
+ ]));
154
+ clearTimeout(timeoutId);
155
+ // Handle specific HTTP status codes
156
+ if (response.status === 401) {
157
+ throw new GLMAuthError("Invalid API key or unauthorized access");
158
+ }
159
+ if (response.status === 429) {
160
+ throw new GLMRateLimitError("Rate limit exceeded, please retry later");
161
+ }
162
+ if (!response.ok) {
163
+ const errorText = await response.text();
164
+ throw new Error(`GLM API error: ${response.status} ${response.statusText} - ${errorText}`);
165
+ }
166
+ const data = await response.json();
167
+ const latencyMs = performance.now() - startTime;
168
+ return { data, latencyMs };
169
+ }
170
+ catch (error) {
171
+ const latencyMs = performance.now() - startTime;
172
+ // Don't retry on auth errors or if we've exhausted retries
173
+ if (error instanceof GLMAuthError || retryCount >= maxRetries) {
174
+ throw error;
175
+ }
176
+ // Retry on timeout, network errors, or rate limit (with backoff)
177
+ if (error instanceof GLMTimeoutError ||
178
+ error instanceof GLMRateLimitError ||
179
+ (error instanceof TypeError && error.message.includes("fetch"))) {
180
+ const backoffDelay = calculateBackoff(retryCount);
181
+ console.warn(`GLM API request failed (attempt ${retryCount + 1}/${maxRetries + 1}), ` +
182
+ `retrying in ${backoffDelay}ms...`);
183
+ await sleep(backoffDelay);
184
+ return this.requestWithRetry(endpoint, options, timeout, maxRetries, retryCount + 1);
185
+ }
186
+ // Wrap other errors in GLMNetworkError
187
+ throw new GLMNetworkError(`Network error: ${error.message}`, error);
188
+ }
189
+ }
190
+ /**
191
+ * Make a streaming request to the GLM API with timeout and retry logic
192
+ * Returns the raw Response object for SSE parsing
193
+ */
194
+ async requestStream(endpoint, options = {}, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, retryCount = 0) {
195
+ try {
196
+ // Create abort controller for timeout
197
+ const controller = new AbortController();
198
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
199
+ // Race between fetch and timeout
200
+ const response = (await Promise.race([
201
+ this.fetchImpl(`${this.baseURL}${endpoint}`, {
202
+ ...options,
203
+ signal: controller.signal,
204
+ headers: {
205
+ Authorization: `Bearer ${this.apiKey}`,
206
+ "Content-Type": "application/json",
207
+ ...options.headers,
208
+ },
209
+ }),
210
+ createTimeoutPromise(timeout),
211
+ ]));
212
+ clearTimeout(timeoutId);
213
+ // Handle specific HTTP status codes
214
+ if (response.status === 401) {
215
+ throw new GLMAuthError("Invalid API key or unauthorized access");
216
+ }
217
+ if (response.status === 429) {
218
+ throw new GLMRateLimitError("Rate limit exceeded, please retry later");
219
+ }
220
+ if (!response.ok) {
221
+ const errorText = await response.text();
222
+ throw new Error(`GLM API error: ${response.status} ${response.statusText} - ${errorText}`);
223
+ }
224
+ return response;
225
+ }
226
+ catch (error) {
227
+ // Don't retry on auth errors or if we've exhausted retries
228
+ if (error instanceof GLMAuthError || retryCount >= maxRetries) {
229
+ throw error;
230
+ }
231
+ // Retry on timeout, network errors, or rate limit (with backoff)
232
+ if (error instanceof GLMTimeoutError ||
233
+ error instanceof GLMRateLimitError ||
234
+ (error instanceof TypeError && error.message.includes("fetch"))) {
235
+ const backoffDelay = calculateBackoff(retryCount);
236
+ console.warn(`GLM API streaming request failed (attempt ${retryCount + 1}/${maxRetries + 1}), ` +
237
+ `retrying in ${backoffDelay}ms...`);
238
+ await sleep(backoffDelay);
239
+ return this.requestStream(endpoint, options, timeout, maxRetries, retryCount + 1);
240
+ }
241
+ // Wrap other errors in GLMNetworkError
242
+ throw new GLMNetworkError(`Network error: ${error.message}`, error);
243
+ }
244
+ }
245
+ /**
246
+ * Public request method (backward compatibility)
247
+ */
248
+ async request(endpoint, options = {}) {
249
+ return this.requestWithRetry(endpoint, options, DEFAULT_TIMEOUT);
250
+ }
251
+ /**
252
+ * Validate and parse chat messages using Zod schema
253
+ */
254
+ validateMessages(messages) {
255
+ const result = z.array(ChatMessageSchema).min(1).safeParse(messages);
256
+ if (!result.success) {
257
+ throw new Error(`Invalid chat messages: ${result.error.message}`);
258
+ }
259
+ return result.data;
260
+ }
261
+ /**
262
+ * Validate and parse chat completion options using Zod schema
263
+ */
264
+ parseOptions(options) {
265
+ const result = ChatCompletionOptionsSchema.safeParse(options);
266
+ if (!result.success) {
267
+ throw new Error(`Invalid chat completion options: ${result.error.message}`);
268
+ }
269
+ return result.data;
270
+ }
271
+ /**
272
+ * Convert raw API response to internal format with Zod validation
273
+ */
274
+ convertResponse(raw, latencyMs) {
275
+ // Validate raw API response
276
+ const rawResult = RawChatCompletionResponseSchema.safeParse(raw);
277
+ if (!rawResult.success) {
278
+ throw new Error(`Invalid API response: ${rawResult.error.message}`);
279
+ }
280
+ const rawResponse = rawResult.data;
281
+ // Convert snake_case usage to camelCase
282
+ const usage = rawResponse.usage
283
+ ? {
284
+ promptTokens: rawResponse.usage.prompt_tokens,
285
+ completionTokens: rawResponse.usage.completion_tokens,
286
+ totalTokens: rawResponse.usage.total_tokens,
287
+ }
288
+ : undefined;
289
+ // Validate usage if present
290
+ if (usage) {
291
+ const usageResult = TokenUsageSchema.safeParse(usage);
292
+ if (!usageResult.success) {
293
+ throw new Error(`Invalid token usage: ${usageResult.error.message}`);
294
+ }
295
+ }
296
+ // Create latency info
297
+ const latency = {
298
+ totalMs: latencyMs,
299
+ formatted: formatLatency(latencyMs),
300
+ };
301
+ const latencyResult = LatencyInfoSchema.safeParse(latency);
302
+ if (!latencyResult.success) {
303
+ throw new Error(`Invalid latency info: ${latencyResult.error.message}`);
304
+ }
305
+ // Build final response
306
+ const response = {
307
+ id: rawResponse.id,
308
+ object: rawResponse.object,
309
+ created: rawResponse.created,
310
+ model: rawResponse.model,
311
+ choices: rawResponse.choices,
312
+ usage,
313
+ latency: latencyResult.data,
314
+ };
315
+ // Validate final response
316
+ const finalResult = ChatCompletionResponseSchema.safeParse(response);
317
+ if (!finalResult.success) {
318
+ throw new Error(`Invalid response format: ${finalResult.error.message}`);
319
+ }
320
+ return finalResult.data;
321
+ }
322
+ /**
323
+ * Create a chat completion (non-streaming)
324
+ * Maps API response (snake_case) to internal types (camelCase)
325
+ * Includes latency tracking and configurable timeout
326
+ */
327
+ async chatCompletion(messages, options = {}) {
328
+ // Validate inputs using Zod schemas
329
+ const validatedMessages = this.validateMessages(messages);
330
+ const validatedOptions = this.parseOptions(options);
331
+ const { model = "GLM-4.7", temperature = 0.7, maxTokens, stream = false, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, } = validatedOptions;
332
+ const body = {
333
+ model,
334
+ messages: validatedMessages,
335
+ temperature,
336
+ };
337
+ if (maxTokens !== undefined) {
338
+ body.max_tokens = maxTokens;
339
+ }
340
+ if (stream) {
341
+ body.stream = true;
342
+ }
343
+ // Raw API response has snake_case usage fields
344
+ const { data: raw, latencyMs } = await this.requestWithRetry("/chat/completions", {
345
+ method: "POST",
346
+ body: JSON.stringify(body),
347
+ }, timeout, maxRetries);
348
+ // Convert and validate response using Zod
349
+ return this.convertResponse(raw, latencyMs);
350
+ }
351
+ /**
352
+ * Simple generate method for quick prompts (non-streaming)
353
+ */
354
+ async generate(prompt, options = {}) {
355
+ const response = await this.chatCompletion([{ role: "user", content: prompt }], options);
356
+ return response.choices[0]?.message?.content || "";
357
+ }
358
+ /**
359
+ * Generate with system prompt (non-streaming)
360
+ */
361
+ async generateWithSystem(systemPrompt, userPrompt, options = {}) {
362
+ const response = await this.chatCompletion([
363
+ { role: "system", content: systemPrompt },
364
+ { role: "user", content: userPrompt },
365
+ ], options);
366
+ return response.choices[0]?.message?.content || "";
367
+ }
368
+ /**
369
+ * Stream a chat completion
370
+ *
371
+ * Returns an async generator that yields StreamChunk objects as they arrive.
372
+ * Each chunk contains incremental text content, and the final chunk includes
373
+ * usage information.
374
+ *
375
+ * @param messages - The chat messages to send
376
+ * @param options - Chat completion options (timeout, maxRetries, etc.)
377
+ * @returns Async generator of StreamChunk objects
378
+ *
379
+ * @example
380
+ * ```ts
381
+ * const chunks = [];
382
+ * for await (const chunk of client.streamChatCompletion(messages)) {
383
+ * if (chunk.type === 'text') {
384
+ * chunks.push(chunk.content);
385
+ * process.stdout.write(chunk.content);
386
+ * } else if (chunk.type === 'done') {
387
+ * console.log('\nStream complete!');
388
+ * }
389
+ * }
390
+ * ```
391
+ */
392
+ async *streamChatCompletion(messages, options = {}) {
393
+ // Validate inputs using Zod schemas
394
+ const validatedMessages = this.validateMessages(messages);
395
+ const validatedOptions = this.parseOptions(options);
396
+ const { model = "GLM-4.7", temperature = 0.7, maxTokens, timeout = DEFAULT_TIMEOUT, maxRetries = DEFAULT_MAX_RETRIES, } = validatedOptions;
397
+ const body = {
398
+ model,
399
+ messages: validatedMessages,
400
+ temperature,
401
+ stream: true, // Enable streaming
402
+ };
403
+ if (maxTokens !== undefined) {
404
+ body.max_tokens = maxTokens;
405
+ }
406
+ try {
407
+ // Make the streaming request
408
+ const response = await this.requestStream("/chat/completions", {
409
+ method: "POST",
410
+ body: JSON.stringify(body),
411
+ }, timeout, maxRetries);
412
+ // Parse the SSE stream and yield chunks
413
+ for await (const chunk of parseSSEStream(response)) {
414
+ yield chunk;
415
+ }
416
+ }
417
+ catch (error) {
418
+ yield {
419
+ type: "error",
420
+ error: error instanceof Error ? error.message : String(error),
421
+ };
422
+ }
423
+ }
424
+ /**
425
+ * Stream a simple prompt
426
+ *
427
+ * Returns an async generator that yields chunks of text as they arrive.
428
+ * Simplified interface for single-prompt streaming.
429
+ *
430
+ * @param prompt - The prompt to send
431
+ * @param options - Chat completion options (timeout, maxRetries, etc.)
432
+ * @returns Async generator of StreamChunk objects
433
+ *
434
+ * @example
435
+ * ```ts
436
+ * for await (const chunk of client.streamGenerate("Tell me a story")) {
437
+ * if (chunk.type === 'text') {
438
+ * process.stdout.write(chunk.content);
439
+ * }
440
+ * }
441
+ * ```
442
+ */
443
+ async *streamGenerate(prompt, options = {}) {
444
+ yield* this.streamChatCompletion([{ role: "user", content: prompt }], options);
445
+ }
446
+ /**
447
+ * Stream with system prompt
448
+ *
449
+ * Returns an async generator that yields chunks of text as they arrive.
450
+ * Includes both system and user prompts.
451
+ *
452
+ * @param systemPrompt - The system prompt
453
+ * @param userPrompt - The user prompt
454
+ * @param options - Chat completion options (timeout, maxRetries, etc.)
455
+ * @returns Async generator of StreamChunk objects
456
+ *
457
+ * @example
458
+ * ```ts
459
+ * for await (const chunk of client.streamGenerateWithSystem(
460
+ * "You are a helpful assistant",
461
+ * "Explain quantum computing"
462
+ * )) {
463
+ * if (chunk.type === 'text') {
464
+ * process.stdout.write(chunk.content);
465
+ * }
466
+ * }
467
+ * ```
468
+ */
469
+ async *streamGenerateWithSystem(systemPrompt, userPrompt, options = {}) {
470
+ yield* this.streamChatCompletion([
471
+ { role: "system", content: systemPrompt },
472
+ { role: "user", content: userPrompt },
473
+ ], options);
474
+ }
475
+ }
476
+ /**
477
+ * Create a singleton instance for use in the app
478
+ */
479
+ let glmClient = null;
480
+ export function getGLMClient() {
481
+ if (glmClient)
482
+ return glmClient;
483
+ try {
484
+ glmClient = new GLMClient();
485
+ console.log("✓ GLM-4.7 API client initialized");
486
+ return glmClient;
487
+ }
488
+ catch (error) {
489
+ console.warn("GLM API key not configured - AI features unavailable");
490
+ return null;
491
+ }
492
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * AI module exports
3
+ *
4
+ * Exports:
5
+ * - client: GLMClient class and related error types
6
+ * - types: Type definitions (now re-exported from schema)
7
+ * - prompts: Prompt building utilities
8
+ * - schema: Zod schemas and validation helpers
9
+ *
10
+ * Streaming support:
11
+ * - streamChatCompletion: Stream chat completions with SSE
12
+ * - streamGenerate: Stream simple prompts
13
+ * - streamGenerateWithSystem: Stream with system prompt
14
+ * - StreamChunk: Type for stream chunks
15
+ */
16
+ export * from "./client.js";
17
+ export * from "./types.js";
18
+ export * from "./prompts.js";
19
+ export type * from "./schemas/ai.js";
20
+ export type { StreamChunk, StreamChunkType, StreamDelta, RawStreamChunk, } from "./schemas/ai.js";
package/dist/index.js ADDED
@@ -0,0 +1,18 @@
1
+ /**
2
+ * AI module exports
3
+ *
4
+ * Exports:
5
+ * - client: GLMClient class and related error types
6
+ * - types: Type definitions (now re-exported from schema)
7
+ * - prompts: Prompt building utilities
8
+ * - schema: Zod schemas and validation helpers
9
+ *
10
+ * Streaming support:
11
+ * - streamChatCompletion: Stream chat completions with SSE
12
+ * - streamGenerate: Stream simple prompts
13
+ * - streamGenerateWithSystem: Stream with system prompt
14
+ * - StreamChunk: Type for stream chunks
15
+ */
16
+ export * from "./client.js";
17
+ export * from "./types.js";
18
+ export * from "./prompts.js";