@kaitranntt/ccs 3.3.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,488 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const http = require('http');
5
+ const https = require('https');
6
+ const GlmtTransformer = require('./glmt-transformer');
7
+ const SSEParser = require('./sse-parser');
8
+ const DeltaAccumulator = require('./delta-accumulator');
9
+
10
+ /**
11
+ * GlmtProxy - Embedded HTTP proxy for GLM thinking support
12
+ *
13
+ * Architecture:
14
+ * - Intercepts Claude CLI → Z.AI calls
15
+ * - Transforms Anthropic format → OpenAI format
16
+ * - Converts reasoning_content → thinking blocks
17
+ * - Supports both streaming and buffered modes
18
+ *
19
+ * Lifecycle:
20
+ * - Spawned by bin/ccs.js when 'glmt' profile detected
21
+ * - Binds to 127.0.0.1:random_port (security + avoid conflicts)
22
+ * - Terminates when parent process exits
23
+ *
24
+ * Debugging:
25
+ * - Verbose: Pass --verbose to see request/response logs
26
+ * - Debug: Set CCS_DEBUG_LOG=1 to write logs to ~/.ccs/logs/
27
+ *
28
+ * Usage:
29
+ * const proxy = new GlmtProxy({ verbose: true });
30
+ * await proxy.start();
31
+ */
32
+ class GlmtProxy {
33
+ constructor(config = {}) {
34
+ this.transformer = new GlmtTransformer({
35
+ verbose: config.verbose,
36
+ debugLog: config.debugLog || process.env.CCS_DEBUG_LOG === '1'
37
+ });
38
+ // Use ANTHROPIC_BASE_URL from environment (set by settings.json) or fallback to Z.AI default
39
+ this.upstreamUrl = process.env.ANTHROPIC_BASE_URL || 'https://api.z.ai/api/coding/paas/v4/chat/completions';
40
+ this.server = null;
41
+ this.port = null;
42
+ this.verbose = config.verbose || false;
43
+ this.timeout = config.timeout || 120000; // 120s default
44
+ this.streamingEnabled = process.env.CCS_GLMT_STREAMING !== 'disabled';
45
+ this.forceStreaming = process.env.CCS_GLMT_STREAMING === 'force';
46
+ }
47
+
48
+ /**
49
+ * Start HTTP server on random port
50
+ * @returns {Promise<number>} Port number
51
+ */
52
+ async start() {
53
+ return new Promise((resolve, reject) => {
54
+ this.server = http.createServer((req, res) => {
55
+ this.handleRequest(req, res);
56
+ });
57
+
58
+ // Bind to 127.0.0.1:0 (random port for security + avoid conflicts)
59
+ this.server.listen(0, '127.0.0.1', () => {
60
+ this.port = this.server.address().port;
61
+ // Signal parent process
62
+ console.log(`PROXY_READY:${this.port}`);
63
+
64
+ // Info message (only show in verbose mode)
65
+ if (this.verbose) {
66
+ const mode = this.streamingEnabled ? 'streaming mode' : 'buffered mode';
67
+ console.error(`[glmt] Proxy listening on port ${this.port} (${mode})`);
68
+ }
69
+
70
+ // Debug mode notice
71
+ if (this.transformer.debugLog) {
72
+ console.error(`[glmt] Debug logging enabled: ${this.transformer.debugLogDir}`);
73
+ console.error(`[glmt] WARNING: Debug logs contain full request/response data`);
74
+ }
75
+
76
+ this.log(`Verbose logging enabled`);
77
+ resolve(this.port);
78
+ });
79
+
80
+ this.server.on('error', (error) => {
81
+ console.error('[glmt-proxy] Server error:', error);
82
+ reject(error);
83
+ });
84
+ });
85
+ }
86
+
87
+ /**
88
+ * Handle incoming HTTP request
89
+ * @param {http.IncomingMessage} req - Request
90
+ * @param {http.ServerResponse} res - Response
91
+ */
92
+ async handleRequest(req, res) {
93
+ const startTime = Date.now();
94
+ this.log(`Request: ${req.method} ${req.url}`);
95
+
96
+ try {
97
+ // Only accept POST requests
98
+ if (req.method !== 'POST') {
99
+ res.writeHead(405, { 'Content-Type': 'application/json' });
100
+ res.end(JSON.stringify({ error: 'Method not allowed' }));
101
+ return;
102
+ }
103
+
104
+ // Read request body
105
+ const body = await this._readBody(req);
106
+ this.log(`Request body size: ${body.length} bytes`);
107
+
108
+ // Parse JSON with error handling
109
+ let anthropicRequest;
110
+ try {
111
+ anthropicRequest = JSON.parse(body);
112
+ } catch (jsonError) {
113
+ res.writeHead(400, { 'Content-Type': 'application/json' });
114
+ res.end(JSON.stringify({
115
+ error: {
116
+ type: 'invalid_request_error',
117
+ message: 'Invalid JSON in request body: ' + jsonError.message
118
+ }
119
+ }));
120
+ return;
121
+ }
122
+
123
+ // Log thinking parameter for debugging
124
+ if (anthropicRequest.thinking) {
125
+ this.log(`Request contains thinking parameter: ${JSON.stringify(anthropicRequest.thinking)}`);
126
+ } else {
127
+ this.log(`Request does NOT contain thinking parameter (will use message tags or default)`);
128
+ }
129
+
130
+ // Branch: streaming or buffered
131
+ const useStreaming = (anthropicRequest.stream && this.streamingEnabled) || this.forceStreaming;
132
+
133
+ if (useStreaming) {
134
+ await this._handleStreamingRequest(req, res, anthropicRequest, startTime);
135
+ } else {
136
+ await this._handleBufferedRequest(req, res, anthropicRequest, startTime);
137
+ }
138
+
139
+ } catch (error) {
140
+ console.error('[glmt-proxy] Request error:', error.message);
141
+ const duration = Date.now() - startTime;
142
+ this.log(`Request failed after ${duration}ms: ${error.message}`);
143
+
144
+ res.writeHead(500, { 'Content-Type': 'application/json' });
145
+ res.end(JSON.stringify({
146
+ error: {
147
+ type: 'proxy_error',
148
+ message: error.message
149
+ }
150
+ }));
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Handle buffered (non-streaming) request
156
+ * @private
157
+ */
158
+ async _handleBufferedRequest(req, res, anthropicRequest, startTime) {
159
+ // Transform to OpenAI format
160
+ const { openaiRequest, thinkingConfig } =
161
+ this.transformer.transformRequest(anthropicRequest);
162
+
163
+ this.log(`Transformed request, thinking: ${thinkingConfig.thinking}`);
164
+
165
+ // Forward to Z.AI
166
+ const openaiResponse = await this._forwardToUpstream(
167
+ openaiRequest,
168
+ req.headers
169
+ );
170
+
171
+ this.log(`Received response from upstream`);
172
+
173
+ // Transform back to Anthropic format
174
+ const anthropicResponse = this.transformer.transformResponse(
175
+ openaiResponse,
176
+ thinkingConfig
177
+ );
178
+
179
+ // Return to Claude CLI
180
+ res.writeHead(200, {
181
+ 'Content-Type': 'application/json',
182
+ 'Access-Control-Allow-Origin': '*'
183
+ });
184
+ res.end(JSON.stringify(anthropicResponse));
185
+
186
+ const duration = Date.now() - startTime;
187
+ this.log(`Request completed in ${duration}ms`);
188
+ }
189
+
190
+ /**
191
+ * Handle streaming request
192
+ * @private
193
+ */
194
+ async _handleStreamingRequest(req, res, anthropicRequest, startTime) {
195
+ this.log('Using streaming mode');
196
+
197
+ // Transform request
198
+ const { openaiRequest, thinkingConfig } =
199
+ this.transformer.transformRequest(anthropicRequest);
200
+
201
+ // Force streaming
202
+ openaiRequest.stream = true;
203
+
204
+ // Set SSE headers
205
+ res.writeHead(200, {
206
+ 'Content-Type': 'text/event-stream',
207
+ 'Cache-Control': 'no-cache',
208
+ 'Connection': 'keep-alive',
209
+ 'Access-Control-Allow-Origin': '*',
210
+ 'X-Accel-Buffering': 'no' // Disable proxy buffering
211
+ });
212
+
213
+ // Disable Nagle's algorithm to prevent buffering at socket level
214
+ if (res.socket) {
215
+ res.socket.setNoDelay(true);
216
+ }
217
+
218
+ this.log('Starting SSE stream to Claude CLI (socket buffering disabled)');
219
+
220
+ // Forward and stream
221
+ await this._forwardAndStreamUpstream(
222
+ openaiRequest,
223
+ req.headers,
224
+ res,
225
+ thinkingConfig,
226
+ startTime
227
+ );
228
+ }
229
+
230
+ /**
231
+ * Read request body
232
+ * @param {http.IncomingMessage} req - Request
233
+ * @returns {Promise<string>} Body content
234
+ * @private
235
+ */
236
+ _readBody(req) {
237
+ return new Promise((resolve, reject) => {
238
+ const chunks = [];
239
+ const maxSize = 10 * 1024 * 1024; // 10MB limit
240
+ let totalSize = 0;
241
+
242
+ req.on('data', chunk => {
243
+ totalSize += chunk.length;
244
+ if (totalSize > maxSize) {
245
+ reject(new Error('Request body too large (max 10MB)'));
246
+ return;
247
+ }
248
+ chunks.push(chunk);
249
+ });
250
+
251
+ req.on('end', () => resolve(Buffer.concat(chunks).toString()));
252
+ req.on('error', reject);
253
+ });
254
+ }
255
+
256
+ /**
257
+ * Forward request to Z.AI upstream
258
+ * @param {Object} openaiRequest - OpenAI format request
259
+ * @param {Object} originalHeaders - Original request headers
260
+ * @returns {Promise<Object>} OpenAI response
261
+ * @private
262
+ */
263
+ _forwardToUpstream(openaiRequest, originalHeaders) {
264
+ return new Promise((resolve, reject) => {
265
+ const url = new URL(this.upstreamUrl);
266
+ const requestBody = JSON.stringify(openaiRequest);
267
+
268
+ const options = {
269
+ hostname: url.hostname,
270
+ port: url.port || 443,
271
+ path: url.pathname || '/api/coding/paas/v4/chat/completions',
272
+ method: 'POST',
273
+ headers: {
274
+ 'Content-Type': 'application/json',
275
+ 'Content-Length': Buffer.byteLength(requestBody),
276
+ // Preserve auth header from original request
277
+ 'Authorization': originalHeaders['authorization'] || '',
278
+ 'User-Agent': 'CCS-GLMT-Proxy/1.0'
279
+ }
280
+ };
281
+
282
+ // Debug logging
283
+ this.log(`Forwarding to: ${url.hostname}${url.pathname}`);
284
+
285
+ // Set timeout
286
+ const timeoutHandle = setTimeout(() => {
287
+ req.destroy();
288
+ reject(new Error('Upstream request timeout'));
289
+ }, this.timeout);
290
+
291
+ const req = https.request(options, (res) => {
292
+ clearTimeout(timeoutHandle);
293
+
294
+ const chunks = [];
295
+ res.on('data', chunk => chunks.push(chunk));
296
+
297
+ res.on('end', () => {
298
+ try {
299
+ const body = Buffer.concat(chunks).toString();
300
+ this.log(`Upstream response size: ${body.length} bytes`);
301
+
302
+ // Check for non-200 status
303
+ if (res.statusCode !== 200) {
304
+ reject(new Error(
305
+ `Upstream error: ${res.statusCode} ${res.statusMessage}\n${body}`
306
+ ));
307
+ return;
308
+ }
309
+
310
+ const response = JSON.parse(body);
311
+ resolve(response);
312
+ } catch (error) {
313
+ reject(new Error('Invalid JSON from upstream: ' + error.message));
314
+ }
315
+ });
316
+ });
317
+
318
+ req.on('error', (error) => {
319
+ clearTimeout(timeoutHandle);
320
+ reject(error);
321
+ });
322
+
323
+ req.write(requestBody);
324
+ req.end();
325
+ });
326
+ }
327
+
328
+ /**
329
+ * Forward request to Z.AI and stream response
330
+ * @param {Object} openaiRequest - OpenAI format request
331
+ * @param {Object} originalHeaders - Original request headers
332
+ * @param {http.ServerResponse} clientRes - Response to Claude CLI
333
+ * @param {Object} thinkingConfig - Thinking configuration
334
+ * @param {number} startTime - Request start time
335
+ * @returns {Promise<void>}
336
+ * @private
337
+ */
338
+ async _forwardAndStreamUpstream(openaiRequest, originalHeaders, clientRes, thinkingConfig, startTime) {
339
+ return new Promise((resolve, reject) => {
340
+ const url = new URL(this.upstreamUrl);
341
+ const requestBody = JSON.stringify(openaiRequest);
342
+
343
+ const options = {
344
+ hostname: url.hostname,
345
+ port: url.port || 443,
346
+ path: url.pathname || '/api/coding/paas/v4/chat/completions',
347
+ method: 'POST',
348
+ headers: {
349
+ 'Content-Type': 'application/json',
350
+ 'Content-Length': Buffer.byteLength(requestBody),
351
+ 'Authorization': originalHeaders['authorization'] || '',
352
+ 'User-Agent': 'CCS-GLMT-Proxy/1.0',
353
+ 'Accept': 'text/event-stream'
354
+ }
355
+ };
356
+
357
+ this.log(`Forwarding streaming request to: ${url.hostname}${url.pathname}`);
358
+
359
+ // C-03 Fix: Apply timeout to streaming requests
360
+ const timeoutHandle = setTimeout(() => {
361
+ req.destroy();
362
+ reject(new Error(`Streaming request timeout after ${this.timeout}ms`));
363
+ }, this.timeout);
364
+
365
+ const req = https.request(options, (upstreamRes) => {
366
+ clearTimeout(timeoutHandle);
367
+ if (upstreamRes.statusCode !== 200) {
368
+ let body = '';
369
+ upstreamRes.on('data', chunk => body += chunk);
370
+ upstreamRes.on('end', () => {
371
+ reject(new Error(`Upstream error: ${upstreamRes.statusCode}\n${body}`));
372
+ });
373
+ return;
374
+ }
375
+
376
+ const parser = new SSEParser();
377
+ const accumulator = new DeltaAccumulator(thinkingConfig);
378
+
379
+ upstreamRes.on('data', (chunk) => {
380
+ try {
381
+ const events = parser.parse(chunk);
382
+
383
+ events.forEach(event => {
384
+ // Transform OpenAI delta → Anthropic events
385
+ const anthropicEvents = this.transformer.transformDelta(event, accumulator);
386
+
387
+ // Forward to Claude CLI with immediate flush
388
+ anthropicEvents.forEach(evt => {
389
+ const eventLine = `event: ${evt.event}\n`;
390
+ const dataLine = `data: ${JSON.stringify(evt.data)}\n\n`;
391
+ clientRes.write(eventLine + dataLine);
392
+
393
+ // Flush immediately if method available (HTTP/2 or custom servers)
394
+ if (typeof clientRes.flush === 'function') {
395
+ clientRes.flush();
396
+ }
397
+ });
398
+ });
399
+ } catch (error) {
400
+ this.log(`Error processing chunk: ${error.message}`);
401
+ }
402
+ });
403
+
404
+ upstreamRes.on('end', () => {
405
+ const duration = Date.now() - startTime;
406
+ this.log(`Streaming completed in ${duration}ms`);
407
+ clientRes.end();
408
+ resolve();
409
+ });
410
+
411
+ upstreamRes.on('error', (error) => {
412
+ clearTimeout(timeoutHandle);
413
+ this.log(`Upstream stream error: ${error.message}`);
414
+ clientRes.write(`event: error\n`);
415
+ clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
416
+ clientRes.end();
417
+ reject(error);
418
+ });
419
+ });
420
+
421
+ req.on('error', (error) => {
422
+ clearTimeout(timeoutHandle);
423
+ this.log(`Request error: ${error.message}`);
424
+ clientRes.write(`event: error\n`);
425
+ clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
426
+ clientRes.end();
427
+ reject(error);
428
+ });
429
+
430
+ req.write(requestBody);
431
+ req.end();
432
+ });
433
+ }
434
+
435
+ /**
436
+ * Stop proxy server
437
+ */
438
+ stop() {
439
+ if (this.server) {
440
+ this.log('Stopping proxy server');
441
+ this.server.close();
442
+ }
443
+ }
444
+
445
+ /**
446
+ * Log message if verbose
447
+ * @param {string} message - Message to log
448
+ * @private
449
+ */
450
+ log(message) {
451
+ if (this.verbose) {
452
+ console.error(`[glmt-proxy] ${message}`);
453
+ }
454
+ }
455
+ }
456
+
457
+ // Main entry point
458
+ if (require.main === module) {
459
+ const args = process.argv.slice(2);
460
+ const verbose = args.includes('--verbose') || args.includes('-v');
461
+
462
+ const proxy = new GlmtProxy({ verbose });
463
+
464
+ proxy.start().catch(error => {
465
+ console.error('[glmt-proxy] Failed to start:', error);
466
+ process.exit(1);
467
+ });
468
+
469
+ // Cleanup on signals
470
+ process.on('SIGTERM', () => {
471
+ proxy.stop();
472
+ process.exit(0);
473
+ });
474
+
475
+ process.on('SIGINT', () => {
476
+ proxy.stop();
477
+ process.exit(0);
478
+ });
479
+
480
+ // Keep process alive
481
+ process.on('uncaughtException', (error) => {
482
+ console.error('[glmt-proxy] Uncaught exception:', error);
483
+ proxy.stop();
484
+ process.exit(1);
485
+ });
486
+ }
487
+
488
+ module.exports = GlmtProxy;