@kaitranntt/ccs 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,16 +205,27 @@ Commands and skills symlinked from `~/.ccs/shared/` - no duplication across prof
205
205
  |---------|-----------------|-------------------|
206
206
  | **Endpoint** | Anthropic-compatible | OpenAI-compatible |
207
207
  | **Thinking** | No | Yes (reasoning_content) |
208
- | **Streaming** | Yes | No (buffered) |
208
+ | **Streaming** | Yes | **Yes (v3.4+)** |
209
+ | **TTFB** | <500ms | <500ms (streaming), 2-10s (buffered) |
209
210
  | **Use Case** | Fast responses | Complex reasoning |
210
211
 
212
+ ### Streaming Support (v3.4)
213
+
214
+ **GLMT now supports real-time streaming** with incremental reasoning content delivery.
215
+
216
+ - **Default**: Streaming enabled (TTFB <500ms)
217
+ - **Disable**: Set `CCS_GLMT_STREAMING=disabled` for buffered mode
218
+ - **Force**: Set `CCS_GLMT_STREAMING=force` to override client preferences
219
+
220
+ **Confirmed working**: Z.AI (1498 reasoning chunks tested)
221
+
211
222
  ### How It Works
212
223
 
213
224
  1. CCS spawns embedded HTTP proxy on localhost
214
- 2. Proxy converts Anthropic format → OpenAI format
225
+ 2. Proxy converts Anthropic format → OpenAI format (streaming or buffered)
215
226
  3. Forwards to Z.AI with reasoning parameters
216
- 4. Converts `reasoning_content` → thinking blocks
217
- 5. Thinking appears in Claude Code UI
227
+ 4. Converts `reasoning_content` → thinking blocks (incremental or complete)
228
+ 5. Thinking appears in Claude Code UI in real-time
218
229
 
219
230
  ### Control Tags
220
231
 
@@ -235,6 +246,14 @@ nano ~/.ccs/glmt.settings.json
235
246
  }
236
247
  ```
237
248
 
249
+ ### Security Limits
250
+
251
+ **DoS protection** (v3.4):
252
+ - SSE buffer: 1MB max per event
253
+ - Content buffer: 10MB max per block (thinking/text)
254
+ - Content blocks: 100 max per message
255
+ - Request timeout: 120s (both streaming and buffered)
256
+
238
257
  ### Debugging
239
258
 
240
259
  **Enable verbose logging**:
@@ -249,6 +268,12 @@ ccs glmt --verbose "your prompt"
249
268
  # Logs: ~/.ccs/logs/
250
269
  ```
251
270
 
271
+ **Check streaming mode**:
272
+ ```bash
273
+ # Disable streaming for debugging
274
+ CCS_GLMT_STREAMING=disabled ccs glmt "test"
275
+ ```
276
+
252
277
  **Check reasoning content**:
253
278
  ```bash
254
279
  cat ~/.ccs/logs/*response-openai.json | jq '.choices[0].message.reasoning_content'
package/VERSION CHANGED
@@ -1 +1 @@
1
- 3.3.0
1
+ 3.4.0
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * DeltaAccumulator - Maintain state across streaming deltas
6
+ *
7
+ * Tracks:
8
+ * - Message metadata (id, model, role)
9
+ * - Content blocks (thinking, text)
10
+ * - Current block index
11
+ * - Accumulated content
12
+ *
13
+ * Usage:
14
+ * const acc = new DeltaAccumulator(thinkingConfig);
15
+ * const events = transformer.transformDelta(openaiEvent, acc);
16
+ */
17
+ class DeltaAccumulator {
18
+ constructor(thinkingConfig = {}, options = {}) {
19
+ this.thinkingConfig = thinkingConfig;
20
+ this.messageId = 'msg_' + Date.now() + '_' + Math.random().toString(36).substring(7);
21
+ this.model = null;
22
+ this.role = 'assistant';
23
+
24
+ // Content blocks
25
+ this.contentBlocks = [];
26
+ this.currentBlockIndex = -1;
27
+
28
+ // Buffers
29
+ this.thinkingBuffer = '';
30
+ this.textBuffer = '';
31
+
32
+ // C-02 Fix: Limits to prevent unbounded accumulation
33
+ this.maxBlocks = options.maxBlocks || 100;
34
+ this.maxBufferSize = options.maxBufferSize || 10 * 1024 * 1024; // 10MB
35
+
36
+ // State flags
37
+ this.messageStarted = false;
38
+ this.finalized = false;
39
+
40
+ // Statistics
41
+ this.inputTokens = 0;
42
+ this.outputTokens = 0;
43
+ this.finishReason = null;
44
+ }
45
+
46
+ /**
47
+ * Get current content block
48
+ * @returns {Object|null} Current block or null
49
+ */
50
+ getCurrentBlock() {
51
+ if (this.currentBlockIndex >= 0 && this.currentBlockIndex < this.contentBlocks.length) {
52
+ return this.contentBlocks[this.currentBlockIndex];
53
+ }
54
+ return null;
55
+ }
56
+
57
+ /**
58
+ * Start new content block
59
+ * @param {string} type - Block type ('thinking' or 'text')
60
+ * @returns {Object} New block
61
+ */
62
+ startBlock(type) {
63
+ // C-02 Fix: Enforce max blocks limit
64
+ if (this.contentBlocks.length >= this.maxBlocks) {
65
+ throw new Error(`Maximum ${this.maxBlocks} content blocks exceeded (DoS protection)`);
66
+ }
67
+
68
+ this.currentBlockIndex++;
69
+ const block = {
70
+ index: this.currentBlockIndex,
71
+ type: type,
72
+ content: '',
73
+ started: true,
74
+ stopped: false
75
+ };
76
+ this.contentBlocks.push(block);
77
+
78
+ // Reset buffer for new block
79
+ if (type === 'thinking') {
80
+ this.thinkingBuffer = '';
81
+ } else if (type === 'text') {
82
+ this.textBuffer = '';
83
+ }
84
+
85
+ return block;
86
+ }
87
+
88
+ /**
89
+ * Add delta to current block
90
+ * @param {string} delta - Content delta
91
+ */
92
+ addDelta(delta) {
93
+ const block = this.getCurrentBlock();
94
+ if (block) {
95
+ if (block.type === 'thinking') {
96
+ // C-02 Fix: Enforce buffer size limit
97
+ if (this.thinkingBuffer.length + delta.length > this.maxBufferSize) {
98
+ throw new Error(`Thinking buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
99
+ }
100
+ this.thinkingBuffer += delta;
101
+ block.content = this.thinkingBuffer;
102
+ } else if (block.type === 'text') {
103
+ // C-02 Fix: Enforce buffer size limit
104
+ if (this.textBuffer.length + delta.length > this.maxBufferSize) {
105
+ throw new Error(`Text buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
106
+ }
107
+ this.textBuffer += delta;
108
+ block.content = this.textBuffer;
109
+ }
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Mark current block as stopped
115
+ */
116
+ stopCurrentBlock() {
117
+ const block = this.getCurrentBlock();
118
+ if (block) {
119
+ block.stopped = true;
120
+ }
121
+ }
122
+
123
+ /**
124
+ * Update usage statistics
125
+ * @param {Object} usage - Usage object from OpenAI
126
+ */
127
+ updateUsage(usage) {
128
+ if (usage) {
129
+ this.inputTokens = usage.prompt_tokens || usage.input_tokens || 0;
130
+ this.outputTokens = usage.completion_tokens || usage.output_tokens || 0;
131
+ }
132
+ }
133
+
134
+ /**
135
+ * Get summary of accumulated state
136
+ * @returns {Object} Summary
137
+ */
138
+ getSummary() {
139
+ return {
140
+ messageId: this.messageId,
141
+ model: this.model,
142
+ role: this.role,
143
+ blockCount: this.contentBlocks.length,
144
+ currentIndex: this.currentBlockIndex,
145
+ messageStarted: this.messageStarted,
146
+ finalized: this.finalized,
147
+ usage: {
148
+ input_tokens: this.inputTokens,
149
+ output_tokens: this.outputTokens
150
+ }
151
+ };
152
+ }
153
+ }
154
+
155
+ module.exports = DeltaAccumulator;
package/bin/glmt-proxy.js CHANGED
@@ -4,6 +4,8 @@
4
4
  const http = require('http');
5
5
  const https = require('https');
6
6
  const GlmtTransformer = require('./glmt-transformer');
7
+ const SSEParser = require('./sse-parser');
8
+ const DeltaAccumulator = require('./delta-accumulator');
7
9
 
8
10
  /**
9
11
  * GlmtProxy - Embedded HTTP proxy for GLM thinking support
@@ -12,7 +14,7 @@ const GlmtTransformer = require('./glmt-transformer');
12
14
  * - Intercepts Claude CLI → Z.AI calls
13
15
  * - Transforms Anthropic format → OpenAI format
14
16
  * - Converts reasoning_content → thinking blocks
15
- * - Buffered mode only (streaming not supported)
17
+ * - Supports both streaming and buffered modes
16
18
  *
17
19
  * Lifecycle:
18
20
  * - Spawned by bin/ccs.js when 'glmt' profile detected
@@ -30,11 +32,14 @@ const GlmtTransformer = require('./glmt-transformer');
30
32
  class GlmtProxy {
31
33
  constructor(config = {}) {
32
34
  this.transformer = new GlmtTransformer({ verbose: config.verbose });
33
- this.upstreamUrl = 'https://api.z.ai/api/coding/paas/v4/chat/completions';
35
+ // Use ANTHROPIC_BASE_URL from environment (set by settings.json) or fallback to Z.AI default
36
+ this.upstreamUrl = process.env.ANTHROPIC_BASE_URL || 'https://api.z.ai/api/coding/paas/v4/chat/completions';
34
37
  this.server = null;
35
38
  this.port = null;
36
39
  this.verbose = config.verbose || false;
37
40
  this.timeout = config.timeout || 120000; // 120s default
41
+ this.streamingEnabled = process.env.CCS_GLMT_STREAMING !== 'disabled';
42
+ this.forceStreaming = process.env.CCS_GLMT_STREAMING === 'force';
38
43
  }
39
44
 
40
45
  /**
@@ -52,8 +57,12 @@ class GlmtProxy {
52
57
  this.port = this.server.address().port;
53
58
  // Signal parent process
54
59
  console.log(`PROXY_READY:${this.port}`);
55
- // One-time info message (always shown)
56
- console.error(`[glmt] Proxy listening on port ${this.port} (buffered mode)`);
60
+
61
+ // Info message (only show in verbose mode)
62
+ if (this.verbose) {
63
+ const mode = this.streamingEnabled ? 'streaming mode' : 'buffered mode';
64
+ console.error(`[glmt] Proxy listening on port ${this.port} (${mode})`);
65
+ }
57
66
 
58
67
  // Debug mode notice
59
68
  if (this.transformer.debugLog) {
@@ -108,35 +117,14 @@ class GlmtProxy {
108
117
  return;
109
118
  }
110
119
 
111
- // Transform to OpenAI format
112
- const { openaiRequest, thinkingConfig } =
113
- this.transformer.transformRequest(anthropicRequest);
114
-
115
- this.log(`Transformed request, thinking: ${thinkingConfig.thinking}`);
116
-
117
- // Forward to Z.AI
118
- const openaiResponse = await this._forwardToUpstream(
119
- openaiRequest,
120
- req.headers
121
- );
122
-
123
- this.log(`Received response from upstream`);
120
+ // Branch: streaming or buffered
121
+ const useStreaming = (anthropicRequest.stream && this.streamingEnabled) || this.forceStreaming;
124
122
 
125
- // Transform back to Anthropic format
126
- const anthropicResponse = this.transformer.transformResponse(
127
- openaiResponse,
128
- thinkingConfig
129
- );
130
-
131
- // Return to Claude CLI
132
- res.writeHead(200, {
133
- 'Content-Type': 'application/json',
134
- 'Access-Control-Allow-Origin': '*'
135
- });
136
- res.end(JSON.stringify(anthropicResponse));
137
-
138
- const duration = Date.now() - startTime;
139
- this.log(`Request completed in ${duration}ms`);
123
+ if (useStreaming) {
124
+ await this._handleStreamingRequest(req, res, anthropicRequest, startTime);
125
+ } else {
126
+ await this._handleBufferedRequest(req, res, anthropicRequest, startTime);
127
+ }
140
128
 
141
129
  } catch (error) {
142
130
  console.error('[glmt-proxy] Request error:', error.message);
@@ -153,6 +141,76 @@ class GlmtProxy {
153
141
  }
154
142
  }
155
143
 
144
+ /**
145
+ * Handle buffered (non-streaming) request
146
+ * @private
147
+ */
148
+ async _handleBufferedRequest(req, res, anthropicRequest, startTime) {
149
+ // Transform to OpenAI format
150
+ const { openaiRequest, thinkingConfig } =
151
+ this.transformer.transformRequest(anthropicRequest);
152
+
153
+ this.log(`Transformed request, thinking: ${thinkingConfig.thinking}`);
154
+
155
+ // Forward to Z.AI
156
+ const openaiResponse = await this._forwardToUpstream(
157
+ openaiRequest,
158
+ req.headers
159
+ );
160
+
161
+ this.log(`Received response from upstream`);
162
+
163
+ // Transform back to Anthropic format
164
+ const anthropicResponse = this.transformer.transformResponse(
165
+ openaiResponse,
166
+ thinkingConfig
167
+ );
168
+
169
+ // Return to Claude CLI
170
+ res.writeHead(200, {
171
+ 'Content-Type': 'application/json',
172
+ 'Access-Control-Allow-Origin': '*'
173
+ });
174
+ res.end(JSON.stringify(anthropicResponse));
175
+
176
+ const duration = Date.now() - startTime;
177
+ this.log(`Request completed in ${duration}ms`);
178
+ }
179
+
180
+ /**
181
+ * Handle streaming request
182
+ * @private
183
+ */
184
+ async _handleStreamingRequest(req, res, anthropicRequest, startTime) {
185
+ this.log('Using streaming mode');
186
+
187
+ // Transform request
188
+ const { openaiRequest, thinkingConfig } =
189
+ this.transformer.transformRequest(anthropicRequest);
190
+
191
+ // Force streaming
192
+ openaiRequest.stream = true;
193
+
194
+ // Set SSE headers
195
+ res.writeHead(200, {
196
+ 'Content-Type': 'text/event-stream',
197
+ 'Cache-Control': 'no-cache',
198
+ 'Connection': 'keep-alive',
199
+ 'Access-Control-Allow-Origin': '*'
200
+ });
201
+
202
+ this.log('Starting SSE stream to Claude CLI');
203
+
204
+ // Forward and stream
205
+ await this._forwardAndStreamUpstream(
206
+ openaiRequest,
207
+ req.headers,
208
+ res,
209
+ thinkingConfig,
210
+ startTime
211
+ );
212
+ }
213
+
156
214
  /**
157
215
  * Read request body
158
216
  * @param {http.IncomingMessage} req - Request
@@ -194,7 +252,7 @@ class GlmtProxy {
194
252
  const options = {
195
253
  hostname: url.hostname,
196
254
  port: url.port || 443,
197
- path: '/api/coding/paas/v4/chat/completions', // OpenAI-compatible endpoint
255
+ path: url.pathname || '/api/coding/paas/v4/chat/completions',
198
256
  method: 'POST',
199
257
  headers: {
200
258
  'Content-Type': 'application/json',
@@ -206,7 +264,7 @@ class GlmtProxy {
206
264
  };
207
265
 
208
266
  // Debug logging
209
- this.log(`Forwarding to: ${url.hostname}${options.path}`);
267
+ this.log(`Forwarding to: ${url.hostname}${url.pathname}`);
210
268
 
211
269
  // Set timeout
212
270
  const timeoutHandle = setTimeout(() => {
@@ -251,6 +309,108 @@ class GlmtProxy {
251
309
  });
252
310
  }
253
311
 
312
+ /**
313
+ * Forward request to Z.AI and stream response
314
+ * @param {Object} openaiRequest - OpenAI format request
315
+ * @param {Object} originalHeaders - Original request headers
316
+ * @param {http.ServerResponse} clientRes - Response to Claude CLI
317
+ * @param {Object} thinkingConfig - Thinking configuration
318
+ * @param {number} startTime - Request start time
319
+ * @returns {Promise<void>}
320
+ * @private
321
+ */
322
+ async _forwardAndStreamUpstream(openaiRequest, originalHeaders, clientRes, thinkingConfig, startTime) {
323
+ return new Promise((resolve, reject) => {
324
+ const url = new URL(this.upstreamUrl);
325
+ const requestBody = JSON.stringify(openaiRequest);
326
+
327
+ const options = {
328
+ hostname: url.hostname,
329
+ port: url.port || 443,
330
+ path: url.pathname || '/api/coding/paas/v4/chat/completions',
331
+ method: 'POST',
332
+ headers: {
333
+ 'Content-Type': 'application/json',
334
+ 'Content-Length': Buffer.byteLength(requestBody),
335
+ 'Authorization': originalHeaders['authorization'] || '',
336
+ 'User-Agent': 'CCS-GLMT-Proxy/1.0',
337
+ 'Accept': 'text/event-stream'
338
+ }
339
+ };
340
+
341
+ this.log(`Forwarding streaming request to: ${url.hostname}${url.pathname}`);
342
+
343
+ // C-03 Fix: Apply timeout to streaming requests
344
+ const timeoutHandle = setTimeout(() => {
345
+ req.destroy();
346
+ reject(new Error(`Streaming request timeout after ${this.timeout}ms`));
347
+ }, this.timeout);
348
+
349
+ const req = https.request(options, (upstreamRes) => {
350
+ clearTimeout(timeoutHandle);
351
+ if (upstreamRes.statusCode !== 200) {
352
+ let body = '';
353
+ upstreamRes.on('data', chunk => body += chunk);
354
+ upstreamRes.on('end', () => {
355
+ reject(new Error(`Upstream error: ${upstreamRes.statusCode}\n${body}`));
356
+ });
357
+ return;
358
+ }
359
+
360
+ const parser = new SSEParser();
361
+ const accumulator = new DeltaAccumulator(thinkingConfig);
362
+
363
+ upstreamRes.on('data', (chunk) => {
364
+ try {
365
+ const events = parser.parse(chunk);
366
+
367
+ events.forEach(event => {
368
+ // Transform OpenAI delta → Anthropic events
369
+ const anthropicEvents = this.transformer.transformDelta(event, accumulator);
370
+
371
+ // Forward to Claude CLI
372
+ anthropicEvents.forEach(evt => {
373
+ const eventLine = `event: ${evt.event}\n`;
374
+ const dataLine = `data: ${JSON.stringify(evt.data)}\n\n`;
375
+ clientRes.write(eventLine + dataLine);
376
+ });
377
+ });
378
+ } catch (error) {
379
+ this.log(`Error processing chunk: ${error.message}`);
380
+ }
381
+ });
382
+
383
+ upstreamRes.on('end', () => {
384
+ const duration = Date.now() - startTime;
385
+ this.log(`Streaming completed in ${duration}ms`);
386
+ clientRes.end();
387
+ resolve();
388
+ });
389
+
390
+ upstreamRes.on('error', (error) => {
391
+ clearTimeout(timeoutHandle);
392
+ this.log(`Upstream stream error: ${error.message}`);
393
+ clientRes.write(`event: error\n`);
394
+ clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
395
+ clientRes.end();
396
+ reject(error);
397
+ });
398
+ });
399
+
400
+ req.on('error', (error) => {
401
+ clearTimeout(timeoutHandle);
402
+ this.log(`Request error: ${error.message}`);
403
+ clientRes.write(`event: error\n`);
404
+ clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
405
+ clientRes.end();
406
+ reject(error);
407
+ });
408
+
409
+ req.write(requestBody);
410
+ req.end();
411
+ });
412
+ }
413
+
254
414
  /**
255
415
  * Stop proxy server
256
416
  */
@@ -5,6 +5,8 @@ const crypto = require('crypto');
5
5
  const fs = require('fs');
6
6
  const path = require('path');
7
7
  const os = require('os');
8
+ const SSEParser = require('./sse-parser');
9
+ const DeltaAccumulator = require('./delta-accumulator');
8
10
 
9
11
  /**
10
12
  * GlmtTransformer - Convert between Anthropic and OpenAI formats with thinking support
@@ -73,10 +75,10 @@ class GlmtTransformer {
73
75
  openaiRequest.top_p = anthropicRequest.top_p;
74
76
  }
75
77
 
76
- // 5. Handle streaming (not yet supported)
77
- // Silently override to buffered mode
78
- if (anthropicRequest.stream) {
79
- openaiRequest.stream = false;
78
+ // 5. Handle streaming
79
+ // Keep stream parameter from request
80
+ if (anthropicRequest.stream !== undefined) {
81
+ openaiRequest.stream = anthropicRequest.stream;
80
82
  }
81
83
 
82
84
  // 6. Inject reasoning parameters
@@ -421,6 +423,251 @@ class GlmtTransformer {
421
423
  return { checks, passed, total, valid: passed === total };
422
424
  }
423
425
 
426
+ /**
427
+ * Transform OpenAI streaming delta to Anthropic events
428
+ * @param {Object} openaiEvent - Parsed SSE event from Z.AI
429
+ * @param {DeltaAccumulator} accumulator - State accumulator
430
+ * @returns {Array<Object>} Array of Anthropic SSE events
431
+ */
432
+ transformDelta(openaiEvent, accumulator) {
433
+ const events = [];
434
+
435
+ // Handle [DONE] marker
436
+ if (openaiEvent.event === 'done') {
437
+ return this.finalizeDelta(accumulator);
438
+ }
439
+
440
+ const choice = openaiEvent.data?.choices?.[0];
441
+ if (!choice) return events;
442
+
443
+ const delta = choice.delta;
444
+ if (!delta) return events;
445
+
446
+ // Message start
447
+ if (!accumulator.messageStarted) {
448
+ if (openaiEvent.data.model) {
449
+ accumulator.model = openaiEvent.data.model;
450
+ }
451
+ events.push(this._createMessageStartEvent(accumulator));
452
+ accumulator.messageStarted = true;
453
+ }
454
+
455
+ // Role
456
+ if (delta.role) {
457
+ accumulator.role = delta.role;
458
+ }
459
+
460
+ // Reasoning content delta (Z.AI streams incrementally - confirmed in Phase 02)
461
+ if (delta.reasoning_content) {
462
+ const currentBlock = accumulator.getCurrentBlock();
463
+
464
+ if (!currentBlock || currentBlock.type !== 'thinking') {
465
+ // Start thinking block
466
+ const block = accumulator.startBlock('thinking');
467
+ events.push(this._createContentBlockStartEvent(block));
468
+ }
469
+
470
+ accumulator.addDelta(delta.reasoning_content);
471
+ events.push(this._createThinkingDeltaEvent(
472
+ accumulator.getCurrentBlock(),
473
+ delta.reasoning_content
474
+ ));
475
+ }
476
+
477
+ // Text content delta
478
+ if (delta.content) {
479
+ const currentBlock = accumulator.getCurrentBlock();
480
+
481
+ // Close thinking block if transitioning from thinking to text
482
+ if (currentBlock && currentBlock.type === 'thinking' && !currentBlock.stopped) {
483
+ events.push(this._createSignatureDeltaEvent(currentBlock));
484
+ events.push(this._createContentBlockStopEvent(currentBlock));
485
+ accumulator.stopCurrentBlock();
486
+ }
487
+
488
+ if (!accumulator.getCurrentBlock() || accumulator.getCurrentBlock().type !== 'text') {
489
+ // Start text block
490
+ const block = accumulator.startBlock('text');
491
+ events.push(this._createContentBlockStartEvent(block));
492
+ }
493
+
494
+ accumulator.addDelta(delta.content);
495
+ events.push(this._createTextDeltaEvent(
496
+ accumulator.getCurrentBlock(),
497
+ delta.content
498
+ ));
499
+ }
500
+
501
+ // Usage update (appears in final chunk usually)
502
+ if (openaiEvent.data.usage) {
503
+ accumulator.updateUsage(openaiEvent.data.usage);
504
+ }
505
+
506
+ // Finish reason
507
+ if (choice.finish_reason) {
508
+ accumulator.finishReason = choice.finish_reason;
509
+ }
510
+
511
+ return events;
512
+ }
513
+
514
+ /**
515
+ * Finalize streaming and generate closing events
516
+ * @param {DeltaAccumulator} accumulator - State accumulator
517
+ * @returns {Array<Object>} Final Anthropic SSE events
518
+ */
519
+ finalizeDelta(accumulator) {
520
+ if (accumulator.finalized) {
521
+ return []; // Already finalized
522
+ }
523
+
524
+ const events = [];
525
+
526
+ // Close current content block if any
527
+ const currentBlock = accumulator.getCurrentBlock();
528
+ if (currentBlock && !currentBlock.stopped) {
529
+ if (currentBlock.type === 'thinking') {
530
+ events.push(this._createSignatureDeltaEvent(currentBlock));
531
+ }
532
+ events.push(this._createContentBlockStopEvent(currentBlock));
533
+ accumulator.stopCurrentBlock();
534
+ }
535
+
536
+ // Message delta (stop reason + usage)
537
+ events.push({
538
+ event: 'message_delta',
539
+ data: {
540
+ type: 'message_delta',
541
+ delta: {
542
+ stop_reason: this._mapStopReason(accumulator.finishReason || 'stop')
543
+ },
544
+ usage: {
545
+ output_tokens: accumulator.outputTokens
546
+ }
547
+ }
548
+ });
549
+
550
+ // Message stop
551
+ events.push({
552
+ event: 'message_stop',
553
+ data: {
554
+ type: 'message_stop'
555
+ }
556
+ });
557
+
558
+ accumulator.finalized = true;
559
+ return events;
560
+ }
561
+
562
+ /**
563
+ * Create message_start event
564
+ * @private
565
+ */
566
+ _createMessageStartEvent(accumulator) {
567
+ return {
568
+ event: 'message_start',
569
+ data: {
570
+ type: 'message_start',
571
+ message: {
572
+ id: accumulator.messageId,
573
+ type: 'message',
574
+ role: accumulator.role,
575
+ content: [],
576
+ model: accumulator.model || 'glm-4.6',
577
+ stop_reason: null,
578
+ usage: {
579
+ input_tokens: accumulator.inputTokens,
580
+ output_tokens: 0
581
+ }
582
+ }
583
+ }
584
+ };
585
+ }
586
+
587
+ /**
588
+ * Create content_block_start event
589
+ * @private
590
+ */
591
+ _createContentBlockStartEvent(block) {
592
+ return {
593
+ event: 'content_block_start',
594
+ data: {
595
+ type: 'content_block_start',
596
+ index: block.index,
597
+ content_block: {
598
+ type: block.type,
599
+ [block.type]: ''
600
+ }
601
+ }
602
+ };
603
+ }
604
+
605
+ /**
606
+ * Create thinking_delta event
607
+ * @private
608
+ */
609
+ _createThinkingDeltaEvent(block, delta) {
610
+ return {
611
+ event: 'content_block_delta',
612
+ data: {
613
+ type: 'content_block_delta',
614
+ index: block.index,
615
+ delta: {
616
+ type: 'thinking_delta',
617
+ thinking: delta
618
+ }
619
+ }
620
+ };
621
+ }
622
+
623
+ /**
624
+ * Create text_delta event
625
+ * @private
626
+ */
627
+ _createTextDeltaEvent(block, delta) {
628
+ return {
629
+ event: 'content_block_delta',
630
+ data: {
631
+ type: 'content_block_delta',
632
+ index: block.index,
633
+ delta: {
634
+ type: 'text_delta',
635
+ text: delta
636
+ }
637
+ }
638
+ };
639
+ }
640
+
641
+ /**
642
+ * Create signature_delta event
643
+ * @private
644
+ */
645
+ _createSignatureDeltaEvent(block) {
646
+ const signature = this._generateThinkingSignature(block.content);
647
+ return {
648
+ event: 'signature_delta',
649
+ data: {
650
+ type: 'signature_delta',
651
+ index: block.index,
652
+ signature: signature
653
+ }
654
+ };
655
+ }
656
+
657
+ /**
658
+ * Create content_block_stop event
659
+ * @private
660
+ */
661
+ _createContentBlockStopEvent(block) {
662
+ return {
663
+ event: 'content_block_stop',
664
+ data: {
665
+ type: 'content_block_stop',
666
+ index: block.index
667
+ }
668
+ };
669
+ }
670
+
424
671
  /**
425
672
  * Log message if verbose
426
673
  * @param {string} message - Message to log
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * SSEParser - Parse Server-Sent Events (SSE) stream
6
+ *
7
+ * Handles:
8
+ * - Incomplete events across chunks
9
+ * - Multiple events in single chunk
10
+ * - Malformed data (skip gracefully)
11
+ * - [DONE] marker
12
+ *
13
+ * Usage:
14
+ * const parser = new SSEParser();
15
+ * stream.on('data', chunk => {
16
+ * const events = parser.parse(chunk);
17
+ * events.forEach(event => { ... });
18
+ * });
19
+ */
20
+ class SSEParser {
21
+ constructor(options = {}) {
22
+ this.buffer = '';
23
+ this.eventCount = 0;
24
+ this.maxBufferSize = options.maxBufferSize || 1024 * 1024; // 1MB default
25
+ }
26
+
27
+ /**
28
+ * Parse chunk and extract SSE events
29
+ * @param {Buffer|string} chunk - Data chunk from stream
30
+ * @returns {Array<Object>} Array of parsed events
31
+ */
32
+ parse(chunk) {
33
+ this.buffer += chunk.toString();
34
+
35
+ // C-01 Fix: Prevent unbounded buffer growth (DoS protection)
36
+ if (this.buffer.length > this.maxBufferSize) {
37
+ throw new Error(`SSE buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
38
+ }
39
+
40
+ const lines = this.buffer.split('\n');
41
+
42
+ // Keep incomplete line in buffer
43
+ this.buffer = lines.pop() || '';
44
+
45
+ const events = [];
46
+ let currentEvent = { event: 'message', data: '' };
47
+
48
+ for (const line of lines) {
49
+ if (line.startsWith('event: ')) {
50
+ currentEvent.event = line.substring(7).trim();
51
+ } else if (line.startsWith('data: ')) {
52
+ const data = line.substring(6);
53
+
54
+ if (data === '[DONE]') {
55
+ this.eventCount++;
56
+ events.push({
57
+ event: 'done',
58
+ data: null,
59
+ index: this.eventCount
60
+ });
61
+ currentEvent = { event: 'message', data: '' };
62
+ } else {
63
+ try {
64
+ currentEvent.data = JSON.parse(data);
65
+ this.eventCount++;
66
+ currentEvent.index = this.eventCount;
67
+ events.push(currentEvent);
68
+ currentEvent = { event: 'message', data: '' };
69
+ } catch (e) {
70
+ // H-01 Fix: Log parse errors for debugging
71
+ if (typeof console !== 'undefined' && console.error) {
72
+ console.error('[SSEParser] Malformed JSON event:', e.message, 'Data:', data.substring(0, 100));
73
+ }
74
+ }
75
+ }
76
+ } else if (line.startsWith('id: ')) {
77
+ currentEvent.id = line.substring(4).trim();
78
+ } else if (line.startsWith('retry: ')) {
79
+ currentEvent.retry = parseInt(line.substring(7), 10);
80
+ }
81
+ // Empty lines separate events (already handled by JSON parsing)
82
+ }
83
+
84
+ return events;
85
+ }
86
+
87
+ /**
88
+ * Reset parser state (for reuse)
89
+ */
90
+ reset() {
91
+ this.buffer = '';
92
+ this.eventCount = 0;
93
+ }
94
+ }
95
+
96
+ module.exports = SSEParser;
package/lib/ccs CHANGED
@@ -2,7 +2,7 @@
2
2
  set -euo pipefail
3
3
 
4
4
  # Version (updated by scripts/bump-version.sh)
5
- CCS_VERSION="3.3.0"
5
+ CCS_VERSION="3.4.0"
6
6
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7
7
  readonly CONFIG_FILE="${CCS_CONFIG:-$HOME/.ccs/config.json}"
8
8
  readonly PROFILES_JSON="$HOME/.ccs/profiles.json"
package/lib/ccs.ps1 CHANGED
@@ -12,7 +12,7 @@ param(
12
12
  $ErrorActionPreference = "Stop"
13
13
 
14
14
  # Version (updated by scripts/bump-version.sh)
15
- $CcsVersion = "3.3.0"
15
+ $CcsVersion = "3.4.0"
16
16
  $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
17
17
  $ConfigFile = if ($env:CCS_CONFIG) { $env:CCS_CONFIG } else { "$env:USERPROFILE\.ccs\config.json" }
18
18
  $ProfilesJson = "$env:USERPROFILE\.ccs\profiles.json"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kaitranntt/ccs",
3
- "version": "3.3.0",
3
+ "version": "3.4.0",
4
4
  "description": "Claude Code Switch - Instant profile switching between Claude Sonnet 4.5 and GLM 4.6",
5
5
  "keywords": [
6
6
  "cli",