@kaitranntt/ccs 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/VERSION +1 -1
- package/bin/delta-accumulator.js +155 -0
- package/bin/glmt-proxy.js +194 -34
- package/bin/glmt-transformer.js +251 -4
- package/bin/sse-parser.js +96 -0
- package/lib/ccs +1 -1
- package/lib/ccs.ps1 +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -205,16 +205,27 @@ Commands and skills symlinked from `~/.ccs/shared/` - no duplication across prof
|
|
|
205
205
|
|---------|-----------------|-------------------|
|
|
206
206
|
| **Endpoint** | Anthropic-compatible | OpenAI-compatible |
|
|
207
207
|
| **Thinking** | No | Yes (reasoning_content) |
|
|
208
|
-
| **Streaming** | Yes |
|
|
208
|
+
| **Streaming** | Yes | **Yes (v3.4+)** |
|
|
209
|
+
| **TTFB** | <500ms | <500ms (streaming), 2-10s (buffered) |
|
|
209
210
|
| **Use Case** | Fast responses | Complex reasoning |
|
|
210
211
|
|
|
212
|
+
### Streaming Support (v3.4)
|
|
213
|
+
|
|
214
|
+
**GLMT now supports real-time streaming** with incremental reasoning content delivery.
|
|
215
|
+
|
|
216
|
+
- **Default**: Streaming enabled (TTFB <500ms)
|
|
217
|
+
- **Disable**: Set `CCS_GLMT_STREAMING=disabled` for buffered mode
|
|
218
|
+
- **Force**: Set `CCS_GLMT_STREAMING=force` to override client preferences
|
|
219
|
+
|
|
220
|
+
**Confirmed working**: Z.AI (1498 reasoning chunks tested)
|
|
221
|
+
|
|
211
222
|
### How It Works
|
|
212
223
|
|
|
213
224
|
1. CCS spawns embedded HTTP proxy on localhost
|
|
214
|
-
2. Proxy converts Anthropic format → OpenAI format
|
|
225
|
+
2. Proxy converts Anthropic format → OpenAI format (streaming or buffered)
|
|
215
226
|
3. Forwards to Z.AI with reasoning parameters
|
|
216
|
-
4. Converts `reasoning_content` → thinking blocks
|
|
217
|
-
5. Thinking appears in Claude Code UI
|
|
227
|
+
4. Converts `reasoning_content` → thinking blocks (incremental or complete)
|
|
228
|
+
5. Thinking appears in Claude Code UI in real-time
|
|
218
229
|
|
|
219
230
|
### Control Tags
|
|
220
231
|
|
|
@@ -235,6 +246,14 @@ nano ~/.ccs/glmt.settings.json
|
|
|
235
246
|
}
|
|
236
247
|
```
|
|
237
248
|
|
|
249
|
+
### Security Limits
|
|
250
|
+
|
|
251
|
+
**DoS protection** (v3.4):
|
|
252
|
+
- SSE buffer: 1MB max per event
|
|
253
|
+
- Content buffer: 10MB max per block (thinking/text)
|
|
254
|
+
- Content blocks: 100 max per message
|
|
255
|
+
- Request timeout: 120s (both streaming and buffered)
|
|
256
|
+
|
|
238
257
|
### Debugging
|
|
239
258
|
|
|
240
259
|
**Enable verbose logging**:
|
|
@@ -249,6 +268,12 @@ ccs glmt --verbose "your prompt"
|
|
|
249
268
|
# Logs: ~/.ccs/logs/
|
|
250
269
|
```
|
|
251
270
|
|
|
271
|
+
**Check streaming mode**:
|
|
272
|
+
```bash
|
|
273
|
+
# Disable streaming for debugging
|
|
274
|
+
CCS_GLMT_STREAMING=disabled ccs glmt "test"
|
|
275
|
+
```
|
|
276
|
+
|
|
252
277
|
**Check reasoning content**:
|
|
253
278
|
```bash
|
|
254
279
|
cat ~/.ccs/logs/*response-openai.json | jq '.choices[0].message.reasoning_content'
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.4.0
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* DeltaAccumulator - Maintain state across streaming deltas
|
|
6
|
+
*
|
|
7
|
+
* Tracks:
|
|
8
|
+
* - Message metadata (id, model, role)
|
|
9
|
+
* - Content blocks (thinking, text)
|
|
10
|
+
* - Current block index
|
|
11
|
+
* - Accumulated content
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* const acc = new DeltaAccumulator(thinkingConfig);
|
|
15
|
+
* const events = transformer.transformDelta(openaiEvent, acc);
|
|
16
|
+
*/
|
|
17
|
+
class DeltaAccumulator {
|
|
18
|
+
constructor(thinkingConfig = {}, options = {}) {
|
|
19
|
+
this.thinkingConfig = thinkingConfig;
|
|
20
|
+
this.messageId = 'msg_' + Date.now() + '_' + Math.random().toString(36).substring(7);
|
|
21
|
+
this.model = null;
|
|
22
|
+
this.role = 'assistant';
|
|
23
|
+
|
|
24
|
+
// Content blocks
|
|
25
|
+
this.contentBlocks = [];
|
|
26
|
+
this.currentBlockIndex = -1;
|
|
27
|
+
|
|
28
|
+
// Buffers
|
|
29
|
+
this.thinkingBuffer = '';
|
|
30
|
+
this.textBuffer = '';
|
|
31
|
+
|
|
32
|
+
// C-02 Fix: Limits to prevent unbounded accumulation
|
|
33
|
+
this.maxBlocks = options.maxBlocks || 100;
|
|
34
|
+
this.maxBufferSize = options.maxBufferSize || 10 * 1024 * 1024; // 10MB
|
|
35
|
+
|
|
36
|
+
// State flags
|
|
37
|
+
this.messageStarted = false;
|
|
38
|
+
this.finalized = false;
|
|
39
|
+
|
|
40
|
+
// Statistics
|
|
41
|
+
this.inputTokens = 0;
|
|
42
|
+
this.outputTokens = 0;
|
|
43
|
+
this.finishReason = null;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Get current content block
|
|
48
|
+
* @returns {Object|null} Current block or null
|
|
49
|
+
*/
|
|
50
|
+
getCurrentBlock() {
|
|
51
|
+
if (this.currentBlockIndex >= 0 && this.currentBlockIndex < this.contentBlocks.length) {
|
|
52
|
+
return this.contentBlocks[this.currentBlockIndex];
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Start new content block
|
|
59
|
+
* @param {string} type - Block type ('thinking' or 'text')
|
|
60
|
+
* @returns {Object} New block
|
|
61
|
+
*/
|
|
62
|
+
startBlock(type) {
|
|
63
|
+
// C-02 Fix: Enforce max blocks limit
|
|
64
|
+
if (this.contentBlocks.length >= this.maxBlocks) {
|
|
65
|
+
throw new Error(`Maximum ${this.maxBlocks} content blocks exceeded (DoS protection)`);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
this.currentBlockIndex++;
|
|
69
|
+
const block = {
|
|
70
|
+
index: this.currentBlockIndex,
|
|
71
|
+
type: type,
|
|
72
|
+
content: '',
|
|
73
|
+
started: true,
|
|
74
|
+
stopped: false
|
|
75
|
+
};
|
|
76
|
+
this.contentBlocks.push(block);
|
|
77
|
+
|
|
78
|
+
// Reset buffer for new block
|
|
79
|
+
if (type === 'thinking') {
|
|
80
|
+
this.thinkingBuffer = '';
|
|
81
|
+
} else if (type === 'text') {
|
|
82
|
+
this.textBuffer = '';
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return block;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Add delta to current block
|
|
90
|
+
* @param {string} delta - Content delta
|
|
91
|
+
*/
|
|
92
|
+
addDelta(delta) {
|
|
93
|
+
const block = this.getCurrentBlock();
|
|
94
|
+
if (block) {
|
|
95
|
+
if (block.type === 'thinking') {
|
|
96
|
+
// C-02 Fix: Enforce buffer size limit
|
|
97
|
+
if (this.thinkingBuffer.length + delta.length > this.maxBufferSize) {
|
|
98
|
+
throw new Error(`Thinking buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
|
|
99
|
+
}
|
|
100
|
+
this.thinkingBuffer += delta;
|
|
101
|
+
block.content = this.thinkingBuffer;
|
|
102
|
+
} else if (block.type === 'text') {
|
|
103
|
+
// C-02 Fix: Enforce buffer size limit
|
|
104
|
+
if (this.textBuffer.length + delta.length > this.maxBufferSize) {
|
|
105
|
+
throw new Error(`Text buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
|
|
106
|
+
}
|
|
107
|
+
this.textBuffer += delta;
|
|
108
|
+
block.content = this.textBuffer;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Mark current block as stopped
|
|
115
|
+
*/
|
|
116
|
+
stopCurrentBlock() {
|
|
117
|
+
const block = this.getCurrentBlock();
|
|
118
|
+
if (block) {
|
|
119
|
+
block.stopped = true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Update usage statistics
|
|
125
|
+
* @param {Object} usage - Usage object from OpenAI
|
|
126
|
+
*/
|
|
127
|
+
updateUsage(usage) {
|
|
128
|
+
if (usage) {
|
|
129
|
+
this.inputTokens = usage.prompt_tokens || usage.input_tokens || 0;
|
|
130
|
+
this.outputTokens = usage.completion_tokens || usage.output_tokens || 0;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Get summary of accumulated state
|
|
136
|
+
* @returns {Object} Summary
|
|
137
|
+
*/
|
|
138
|
+
getSummary() {
|
|
139
|
+
return {
|
|
140
|
+
messageId: this.messageId,
|
|
141
|
+
model: this.model,
|
|
142
|
+
role: this.role,
|
|
143
|
+
blockCount: this.contentBlocks.length,
|
|
144
|
+
currentIndex: this.currentBlockIndex,
|
|
145
|
+
messageStarted: this.messageStarted,
|
|
146
|
+
finalized: this.finalized,
|
|
147
|
+
usage: {
|
|
148
|
+
input_tokens: this.inputTokens,
|
|
149
|
+
output_tokens: this.outputTokens
|
|
150
|
+
}
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
module.exports = DeltaAccumulator;
|
package/bin/glmt-proxy.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
const http = require('http');
|
|
5
5
|
const https = require('https');
|
|
6
6
|
const GlmtTransformer = require('./glmt-transformer');
|
|
7
|
+
const SSEParser = require('./sse-parser');
|
|
8
|
+
const DeltaAccumulator = require('./delta-accumulator');
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* GlmtProxy - Embedded HTTP proxy for GLM thinking support
|
|
@@ -12,7 +14,7 @@ const GlmtTransformer = require('./glmt-transformer');
|
|
|
12
14
|
* - Intercepts Claude CLI → Z.AI calls
|
|
13
15
|
* - Transforms Anthropic format → OpenAI format
|
|
14
16
|
* - Converts reasoning_content → thinking blocks
|
|
15
|
-
* -
|
|
17
|
+
* - Supports both streaming and buffered modes
|
|
16
18
|
*
|
|
17
19
|
* Lifecycle:
|
|
18
20
|
* - Spawned by bin/ccs.js when 'glmt' profile detected
|
|
@@ -30,11 +32,14 @@ const GlmtTransformer = require('./glmt-transformer');
|
|
|
30
32
|
class GlmtProxy {
|
|
31
33
|
constructor(config = {}) {
|
|
32
34
|
this.transformer = new GlmtTransformer({ verbose: config.verbose });
|
|
33
|
-
|
|
35
|
+
// Use ANTHROPIC_BASE_URL from environment (set by settings.json) or fallback to Z.AI default
|
|
36
|
+
this.upstreamUrl = process.env.ANTHROPIC_BASE_URL || 'https://api.z.ai/api/coding/paas/v4/chat/completions';
|
|
34
37
|
this.server = null;
|
|
35
38
|
this.port = null;
|
|
36
39
|
this.verbose = config.verbose || false;
|
|
37
40
|
this.timeout = config.timeout || 120000; // 120s default
|
|
41
|
+
this.streamingEnabled = process.env.CCS_GLMT_STREAMING !== 'disabled';
|
|
42
|
+
this.forceStreaming = process.env.CCS_GLMT_STREAMING === 'force';
|
|
38
43
|
}
|
|
39
44
|
|
|
40
45
|
/**
|
|
@@ -52,8 +57,12 @@ class GlmtProxy {
|
|
|
52
57
|
this.port = this.server.address().port;
|
|
53
58
|
// Signal parent process
|
|
54
59
|
console.log(`PROXY_READY:${this.port}`);
|
|
55
|
-
|
|
56
|
-
|
|
60
|
+
|
|
61
|
+
// Info message (only show in verbose mode)
|
|
62
|
+
if (this.verbose) {
|
|
63
|
+
const mode = this.streamingEnabled ? 'streaming mode' : 'buffered mode';
|
|
64
|
+
console.error(`[glmt] Proxy listening on port ${this.port} (${mode})`);
|
|
65
|
+
}
|
|
57
66
|
|
|
58
67
|
// Debug mode notice
|
|
59
68
|
if (this.transformer.debugLog) {
|
|
@@ -108,35 +117,14 @@ class GlmtProxy {
|
|
|
108
117
|
return;
|
|
109
118
|
}
|
|
110
119
|
|
|
111
|
-
//
|
|
112
|
-
const
|
|
113
|
-
this.transformer.transformRequest(anthropicRequest);
|
|
114
|
-
|
|
115
|
-
this.log(`Transformed request, thinking: ${thinkingConfig.thinking}`);
|
|
116
|
-
|
|
117
|
-
// Forward to Z.AI
|
|
118
|
-
const openaiResponse = await this._forwardToUpstream(
|
|
119
|
-
openaiRequest,
|
|
120
|
-
req.headers
|
|
121
|
-
);
|
|
122
|
-
|
|
123
|
-
this.log(`Received response from upstream`);
|
|
120
|
+
// Branch: streaming or buffered
|
|
121
|
+
const useStreaming = (anthropicRequest.stream && this.streamingEnabled) || this.forceStreaming;
|
|
124
122
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
// Return to Claude CLI
|
|
132
|
-
res.writeHead(200, {
|
|
133
|
-
'Content-Type': 'application/json',
|
|
134
|
-
'Access-Control-Allow-Origin': '*'
|
|
135
|
-
});
|
|
136
|
-
res.end(JSON.stringify(anthropicResponse));
|
|
137
|
-
|
|
138
|
-
const duration = Date.now() - startTime;
|
|
139
|
-
this.log(`Request completed in ${duration}ms`);
|
|
123
|
+
if (useStreaming) {
|
|
124
|
+
await this._handleStreamingRequest(req, res, anthropicRequest, startTime);
|
|
125
|
+
} else {
|
|
126
|
+
await this._handleBufferedRequest(req, res, anthropicRequest, startTime);
|
|
127
|
+
}
|
|
140
128
|
|
|
141
129
|
} catch (error) {
|
|
142
130
|
console.error('[glmt-proxy] Request error:', error.message);
|
|
@@ -153,6 +141,76 @@ class GlmtProxy {
|
|
|
153
141
|
}
|
|
154
142
|
}
|
|
155
143
|
|
|
144
|
+
/**
|
|
145
|
+
* Handle buffered (non-streaming) request
|
|
146
|
+
* @private
|
|
147
|
+
*/
|
|
148
|
+
async _handleBufferedRequest(req, res, anthropicRequest, startTime) {
|
|
149
|
+
// Transform to OpenAI format
|
|
150
|
+
const { openaiRequest, thinkingConfig } =
|
|
151
|
+
this.transformer.transformRequest(anthropicRequest);
|
|
152
|
+
|
|
153
|
+
this.log(`Transformed request, thinking: ${thinkingConfig.thinking}`);
|
|
154
|
+
|
|
155
|
+
// Forward to Z.AI
|
|
156
|
+
const openaiResponse = await this._forwardToUpstream(
|
|
157
|
+
openaiRequest,
|
|
158
|
+
req.headers
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
this.log(`Received response from upstream`);
|
|
162
|
+
|
|
163
|
+
// Transform back to Anthropic format
|
|
164
|
+
const anthropicResponse = this.transformer.transformResponse(
|
|
165
|
+
openaiResponse,
|
|
166
|
+
thinkingConfig
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
// Return to Claude CLI
|
|
170
|
+
res.writeHead(200, {
|
|
171
|
+
'Content-Type': 'application/json',
|
|
172
|
+
'Access-Control-Allow-Origin': '*'
|
|
173
|
+
});
|
|
174
|
+
res.end(JSON.stringify(anthropicResponse));
|
|
175
|
+
|
|
176
|
+
const duration = Date.now() - startTime;
|
|
177
|
+
this.log(`Request completed in ${duration}ms`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Handle streaming request
|
|
182
|
+
* @private
|
|
183
|
+
*/
|
|
184
|
+
async _handleStreamingRequest(req, res, anthropicRequest, startTime) {
|
|
185
|
+
this.log('Using streaming mode');
|
|
186
|
+
|
|
187
|
+
// Transform request
|
|
188
|
+
const { openaiRequest, thinkingConfig } =
|
|
189
|
+
this.transformer.transformRequest(anthropicRequest);
|
|
190
|
+
|
|
191
|
+
// Force streaming
|
|
192
|
+
openaiRequest.stream = true;
|
|
193
|
+
|
|
194
|
+
// Set SSE headers
|
|
195
|
+
res.writeHead(200, {
|
|
196
|
+
'Content-Type': 'text/event-stream',
|
|
197
|
+
'Cache-Control': 'no-cache',
|
|
198
|
+
'Connection': 'keep-alive',
|
|
199
|
+
'Access-Control-Allow-Origin': '*'
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
this.log('Starting SSE stream to Claude CLI');
|
|
203
|
+
|
|
204
|
+
// Forward and stream
|
|
205
|
+
await this._forwardAndStreamUpstream(
|
|
206
|
+
openaiRequest,
|
|
207
|
+
req.headers,
|
|
208
|
+
res,
|
|
209
|
+
thinkingConfig,
|
|
210
|
+
startTime
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
|
|
156
214
|
/**
|
|
157
215
|
* Read request body
|
|
158
216
|
* @param {http.IncomingMessage} req - Request
|
|
@@ -194,7 +252,7 @@ class GlmtProxy {
|
|
|
194
252
|
const options = {
|
|
195
253
|
hostname: url.hostname,
|
|
196
254
|
port: url.port || 443,
|
|
197
|
-
path: '/api/coding/paas/v4/chat/completions',
|
|
255
|
+
path: url.pathname || '/api/coding/paas/v4/chat/completions',
|
|
198
256
|
method: 'POST',
|
|
199
257
|
headers: {
|
|
200
258
|
'Content-Type': 'application/json',
|
|
@@ -206,7 +264,7 @@ class GlmtProxy {
|
|
|
206
264
|
};
|
|
207
265
|
|
|
208
266
|
// Debug logging
|
|
209
|
-
this.log(`Forwarding to: ${url.hostname}${
|
|
267
|
+
this.log(`Forwarding to: ${url.hostname}${url.pathname}`);
|
|
210
268
|
|
|
211
269
|
// Set timeout
|
|
212
270
|
const timeoutHandle = setTimeout(() => {
|
|
@@ -251,6 +309,108 @@ class GlmtProxy {
|
|
|
251
309
|
});
|
|
252
310
|
}
|
|
253
311
|
|
|
312
|
+
/**
|
|
313
|
+
* Forward request to Z.AI and stream response
|
|
314
|
+
* @param {Object} openaiRequest - OpenAI format request
|
|
315
|
+
* @param {Object} originalHeaders - Original request headers
|
|
316
|
+
* @param {http.ServerResponse} clientRes - Response to Claude CLI
|
|
317
|
+
* @param {Object} thinkingConfig - Thinking configuration
|
|
318
|
+
* @param {number} startTime - Request start time
|
|
319
|
+
* @returns {Promise<void>}
|
|
320
|
+
* @private
|
|
321
|
+
*/
|
|
322
|
+
async _forwardAndStreamUpstream(openaiRequest, originalHeaders, clientRes, thinkingConfig, startTime) {
|
|
323
|
+
return new Promise((resolve, reject) => {
|
|
324
|
+
const url = new URL(this.upstreamUrl);
|
|
325
|
+
const requestBody = JSON.stringify(openaiRequest);
|
|
326
|
+
|
|
327
|
+
const options = {
|
|
328
|
+
hostname: url.hostname,
|
|
329
|
+
port: url.port || 443,
|
|
330
|
+
path: url.pathname || '/api/coding/paas/v4/chat/completions',
|
|
331
|
+
method: 'POST',
|
|
332
|
+
headers: {
|
|
333
|
+
'Content-Type': 'application/json',
|
|
334
|
+
'Content-Length': Buffer.byteLength(requestBody),
|
|
335
|
+
'Authorization': originalHeaders['authorization'] || '',
|
|
336
|
+
'User-Agent': 'CCS-GLMT-Proxy/1.0',
|
|
337
|
+
'Accept': 'text/event-stream'
|
|
338
|
+
}
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
this.log(`Forwarding streaming request to: ${url.hostname}${url.pathname}`);
|
|
342
|
+
|
|
343
|
+
// C-03 Fix: Apply timeout to streaming requests
|
|
344
|
+
const timeoutHandle = setTimeout(() => {
|
|
345
|
+
req.destroy();
|
|
346
|
+
reject(new Error(`Streaming request timeout after ${this.timeout}ms`));
|
|
347
|
+
}, this.timeout);
|
|
348
|
+
|
|
349
|
+
const req = https.request(options, (upstreamRes) => {
|
|
350
|
+
clearTimeout(timeoutHandle);
|
|
351
|
+
if (upstreamRes.statusCode !== 200) {
|
|
352
|
+
let body = '';
|
|
353
|
+
upstreamRes.on('data', chunk => body += chunk);
|
|
354
|
+
upstreamRes.on('end', () => {
|
|
355
|
+
reject(new Error(`Upstream error: ${upstreamRes.statusCode}\n${body}`));
|
|
356
|
+
});
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const parser = new SSEParser();
|
|
361
|
+
const accumulator = new DeltaAccumulator(thinkingConfig);
|
|
362
|
+
|
|
363
|
+
upstreamRes.on('data', (chunk) => {
|
|
364
|
+
try {
|
|
365
|
+
const events = parser.parse(chunk);
|
|
366
|
+
|
|
367
|
+
events.forEach(event => {
|
|
368
|
+
// Transform OpenAI delta → Anthropic events
|
|
369
|
+
const anthropicEvents = this.transformer.transformDelta(event, accumulator);
|
|
370
|
+
|
|
371
|
+
// Forward to Claude CLI
|
|
372
|
+
anthropicEvents.forEach(evt => {
|
|
373
|
+
const eventLine = `event: ${evt.event}\n`;
|
|
374
|
+
const dataLine = `data: ${JSON.stringify(evt.data)}\n\n`;
|
|
375
|
+
clientRes.write(eventLine + dataLine);
|
|
376
|
+
});
|
|
377
|
+
});
|
|
378
|
+
} catch (error) {
|
|
379
|
+
this.log(`Error processing chunk: ${error.message}`);
|
|
380
|
+
}
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
upstreamRes.on('end', () => {
|
|
384
|
+
const duration = Date.now() - startTime;
|
|
385
|
+
this.log(`Streaming completed in ${duration}ms`);
|
|
386
|
+
clientRes.end();
|
|
387
|
+
resolve();
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
upstreamRes.on('error', (error) => {
|
|
391
|
+
clearTimeout(timeoutHandle);
|
|
392
|
+
this.log(`Upstream stream error: ${error.message}`);
|
|
393
|
+
clientRes.write(`event: error\n`);
|
|
394
|
+
clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
|
|
395
|
+
clientRes.end();
|
|
396
|
+
reject(error);
|
|
397
|
+
});
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
req.on('error', (error) => {
|
|
401
|
+
clearTimeout(timeoutHandle);
|
|
402
|
+
this.log(`Request error: ${error.message}`);
|
|
403
|
+
clientRes.write(`event: error\n`);
|
|
404
|
+
clientRes.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
|
|
405
|
+
clientRes.end();
|
|
406
|
+
reject(error);
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
req.write(requestBody);
|
|
410
|
+
req.end();
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
|
|
254
414
|
/**
|
|
255
415
|
* Stop proxy server
|
|
256
416
|
*/
|
package/bin/glmt-transformer.js
CHANGED
|
@@ -5,6 +5,8 @@ const crypto = require('crypto');
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const path = require('path');
|
|
7
7
|
const os = require('os');
|
|
8
|
+
const SSEParser = require('./sse-parser');
|
|
9
|
+
const DeltaAccumulator = require('./delta-accumulator');
|
|
8
10
|
|
|
9
11
|
/**
|
|
10
12
|
* GlmtTransformer - Convert between Anthropic and OpenAI formats with thinking support
|
|
@@ -73,10 +75,10 @@ class GlmtTransformer {
|
|
|
73
75
|
openaiRequest.top_p = anthropicRequest.top_p;
|
|
74
76
|
}
|
|
75
77
|
|
|
76
|
-
// 5. Handle streaming
|
|
77
|
-
//
|
|
78
|
-
if (anthropicRequest.stream) {
|
|
79
|
-
openaiRequest.stream =
|
|
78
|
+
// 5. Handle streaming
|
|
79
|
+
// Keep stream parameter from request
|
|
80
|
+
if (anthropicRequest.stream !== undefined) {
|
|
81
|
+
openaiRequest.stream = anthropicRequest.stream;
|
|
80
82
|
}
|
|
81
83
|
|
|
82
84
|
// 6. Inject reasoning parameters
|
|
@@ -421,6 +423,251 @@ class GlmtTransformer {
|
|
|
421
423
|
return { checks, passed, total, valid: passed === total };
|
|
422
424
|
}
|
|
423
425
|
|
|
426
|
+
/**
|
|
427
|
+
* Transform OpenAI streaming delta to Anthropic events
|
|
428
|
+
* @param {Object} openaiEvent - Parsed SSE event from Z.AI
|
|
429
|
+
* @param {DeltaAccumulator} accumulator - State accumulator
|
|
430
|
+
* @returns {Array<Object>} Array of Anthropic SSE events
|
|
431
|
+
*/
|
|
432
|
+
transformDelta(openaiEvent, accumulator) {
|
|
433
|
+
const events = [];
|
|
434
|
+
|
|
435
|
+
// Handle [DONE] marker
|
|
436
|
+
if (openaiEvent.event === 'done') {
|
|
437
|
+
return this.finalizeDelta(accumulator);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const choice = openaiEvent.data?.choices?.[0];
|
|
441
|
+
if (!choice) return events;
|
|
442
|
+
|
|
443
|
+
const delta = choice.delta;
|
|
444
|
+
if (!delta) return events;
|
|
445
|
+
|
|
446
|
+
// Message start
|
|
447
|
+
if (!accumulator.messageStarted) {
|
|
448
|
+
if (openaiEvent.data.model) {
|
|
449
|
+
accumulator.model = openaiEvent.data.model;
|
|
450
|
+
}
|
|
451
|
+
events.push(this._createMessageStartEvent(accumulator));
|
|
452
|
+
accumulator.messageStarted = true;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Role
|
|
456
|
+
if (delta.role) {
|
|
457
|
+
accumulator.role = delta.role;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Reasoning content delta (Z.AI streams incrementally - confirmed in Phase 02)
|
|
461
|
+
if (delta.reasoning_content) {
|
|
462
|
+
const currentBlock = accumulator.getCurrentBlock();
|
|
463
|
+
|
|
464
|
+
if (!currentBlock || currentBlock.type !== 'thinking') {
|
|
465
|
+
// Start thinking block
|
|
466
|
+
const block = accumulator.startBlock('thinking');
|
|
467
|
+
events.push(this._createContentBlockStartEvent(block));
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
accumulator.addDelta(delta.reasoning_content);
|
|
471
|
+
events.push(this._createThinkingDeltaEvent(
|
|
472
|
+
accumulator.getCurrentBlock(),
|
|
473
|
+
delta.reasoning_content
|
|
474
|
+
));
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Text content delta
|
|
478
|
+
if (delta.content) {
|
|
479
|
+
const currentBlock = accumulator.getCurrentBlock();
|
|
480
|
+
|
|
481
|
+
// Close thinking block if transitioning from thinking to text
|
|
482
|
+
if (currentBlock && currentBlock.type === 'thinking' && !currentBlock.stopped) {
|
|
483
|
+
events.push(this._createSignatureDeltaEvent(currentBlock));
|
|
484
|
+
events.push(this._createContentBlockStopEvent(currentBlock));
|
|
485
|
+
accumulator.stopCurrentBlock();
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
if (!accumulator.getCurrentBlock() || accumulator.getCurrentBlock().type !== 'text') {
|
|
489
|
+
// Start text block
|
|
490
|
+
const block = accumulator.startBlock('text');
|
|
491
|
+
events.push(this._createContentBlockStartEvent(block));
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
accumulator.addDelta(delta.content);
|
|
495
|
+
events.push(this._createTextDeltaEvent(
|
|
496
|
+
accumulator.getCurrentBlock(),
|
|
497
|
+
delta.content
|
|
498
|
+
));
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Usage update (appears in final chunk usually)
|
|
502
|
+
if (openaiEvent.data.usage) {
|
|
503
|
+
accumulator.updateUsage(openaiEvent.data.usage);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// Finish reason
|
|
507
|
+
if (choice.finish_reason) {
|
|
508
|
+
accumulator.finishReason = choice.finish_reason;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
return events;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Finalize streaming and generate closing events
|
|
516
|
+
* @param {DeltaAccumulator} accumulator - State accumulator
|
|
517
|
+
* @returns {Array<Object>} Final Anthropic SSE events
|
|
518
|
+
*/
|
|
519
|
+
finalizeDelta(accumulator) {
|
|
520
|
+
if (accumulator.finalized) {
|
|
521
|
+
return []; // Already finalized
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const events = [];
|
|
525
|
+
|
|
526
|
+
// Close current content block if any
|
|
527
|
+
const currentBlock = accumulator.getCurrentBlock();
|
|
528
|
+
if (currentBlock && !currentBlock.stopped) {
|
|
529
|
+
if (currentBlock.type === 'thinking') {
|
|
530
|
+
events.push(this._createSignatureDeltaEvent(currentBlock));
|
|
531
|
+
}
|
|
532
|
+
events.push(this._createContentBlockStopEvent(currentBlock));
|
|
533
|
+
accumulator.stopCurrentBlock();
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Message delta (stop reason + usage)
|
|
537
|
+
events.push({
|
|
538
|
+
event: 'message_delta',
|
|
539
|
+
data: {
|
|
540
|
+
type: 'message_delta',
|
|
541
|
+
delta: {
|
|
542
|
+
stop_reason: this._mapStopReason(accumulator.finishReason || 'stop')
|
|
543
|
+
},
|
|
544
|
+
usage: {
|
|
545
|
+
output_tokens: accumulator.outputTokens
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
// Message stop
|
|
551
|
+
events.push({
|
|
552
|
+
event: 'message_stop',
|
|
553
|
+
data: {
|
|
554
|
+
type: 'message_stop'
|
|
555
|
+
}
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
accumulator.finalized = true;
|
|
559
|
+
return events;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Create message_start event
|
|
564
|
+
* @private
|
|
565
|
+
*/
|
|
566
|
+
_createMessageStartEvent(accumulator) {
|
|
567
|
+
return {
|
|
568
|
+
event: 'message_start',
|
|
569
|
+
data: {
|
|
570
|
+
type: 'message_start',
|
|
571
|
+
message: {
|
|
572
|
+
id: accumulator.messageId,
|
|
573
|
+
type: 'message',
|
|
574
|
+
role: accumulator.role,
|
|
575
|
+
content: [],
|
|
576
|
+
model: accumulator.model || 'glm-4.6',
|
|
577
|
+
stop_reason: null,
|
|
578
|
+
usage: {
|
|
579
|
+
input_tokens: accumulator.inputTokens,
|
|
580
|
+
output_tokens: 0
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Create content_block_start event
|
|
589
|
+
* @private
|
|
590
|
+
*/
|
|
591
|
+
_createContentBlockStartEvent(block) {
|
|
592
|
+
return {
|
|
593
|
+
event: 'content_block_start',
|
|
594
|
+
data: {
|
|
595
|
+
type: 'content_block_start',
|
|
596
|
+
index: block.index,
|
|
597
|
+
content_block: {
|
|
598
|
+
type: block.type,
|
|
599
|
+
[block.type]: ''
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
/**
|
|
606
|
+
* Create thinking_delta event
|
|
607
|
+
* @private
|
|
608
|
+
*/
|
|
609
|
+
_createThinkingDeltaEvent(block, delta) {
|
|
610
|
+
return {
|
|
611
|
+
event: 'content_block_delta',
|
|
612
|
+
data: {
|
|
613
|
+
type: 'content_block_delta',
|
|
614
|
+
index: block.index,
|
|
615
|
+
delta: {
|
|
616
|
+
type: 'thinking_delta',
|
|
617
|
+
thinking: delta
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Create text_delta event
|
|
625
|
+
* @private
|
|
626
|
+
*/
|
|
627
|
+
_createTextDeltaEvent(block, delta) {
|
|
628
|
+
return {
|
|
629
|
+
event: 'content_block_delta',
|
|
630
|
+
data: {
|
|
631
|
+
type: 'content_block_delta',
|
|
632
|
+
index: block.index,
|
|
633
|
+
delta: {
|
|
634
|
+
type: 'text_delta',
|
|
635
|
+
text: delta
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Create signature_delta event
|
|
643
|
+
* @private
|
|
644
|
+
*/
|
|
645
|
+
_createSignatureDeltaEvent(block) {
|
|
646
|
+
const signature = this._generateThinkingSignature(block.content);
|
|
647
|
+
return {
|
|
648
|
+
event: 'signature_delta',
|
|
649
|
+
data: {
|
|
650
|
+
type: 'signature_delta',
|
|
651
|
+
index: block.index,
|
|
652
|
+
signature: signature
|
|
653
|
+
}
|
|
654
|
+
};
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Create content_block_stop event
|
|
659
|
+
* @private
|
|
660
|
+
*/
|
|
661
|
+
_createContentBlockStopEvent(block) {
|
|
662
|
+
return {
|
|
663
|
+
event: 'content_block_stop',
|
|
664
|
+
data: {
|
|
665
|
+
type: 'content_block_stop',
|
|
666
|
+
index: block.index
|
|
667
|
+
}
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
|
|
424
671
|
/**
|
|
425
672
|
* Log message if verbose
|
|
426
673
|
* @param {string} message - Message to log
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* SSEParser - Parse Server-Sent Events (SSE) stream
|
|
6
|
+
*
|
|
7
|
+
* Handles:
|
|
8
|
+
* - Incomplete events across chunks
|
|
9
|
+
* - Multiple events in single chunk
|
|
10
|
+
* - Malformed data (skip gracefully)
|
|
11
|
+
* - [DONE] marker
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* const parser = new SSEParser();
|
|
15
|
+
* stream.on('data', chunk => {
|
|
16
|
+
* const events = parser.parse(chunk);
|
|
17
|
+
* events.forEach(event => { ... });
|
|
18
|
+
* });
|
|
19
|
+
*/
|
|
20
|
+
class SSEParser {
|
|
21
|
+
constructor(options = {}) {
|
|
22
|
+
this.buffer = '';
|
|
23
|
+
this.eventCount = 0;
|
|
24
|
+
this.maxBufferSize = options.maxBufferSize || 1024 * 1024; // 1MB default
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Parse chunk and extract SSE events
|
|
29
|
+
* @param {Buffer|string} chunk - Data chunk from stream
|
|
30
|
+
* @returns {Array<Object>} Array of parsed events
|
|
31
|
+
*/
|
|
32
|
+
parse(chunk) {
|
|
33
|
+
this.buffer += chunk.toString();
|
|
34
|
+
|
|
35
|
+
// C-01 Fix: Prevent unbounded buffer growth (DoS protection)
|
|
36
|
+
if (this.buffer.length > this.maxBufferSize) {
|
|
37
|
+
throw new Error(`SSE buffer exceeded ${this.maxBufferSize} bytes (DoS protection)`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const lines = this.buffer.split('\n');
|
|
41
|
+
|
|
42
|
+
// Keep incomplete line in buffer
|
|
43
|
+
this.buffer = lines.pop() || '';
|
|
44
|
+
|
|
45
|
+
const events = [];
|
|
46
|
+
let currentEvent = { event: 'message', data: '' };
|
|
47
|
+
|
|
48
|
+
for (const line of lines) {
|
|
49
|
+
if (line.startsWith('event: ')) {
|
|
50
|
+
currentEvent.event = line.substring(7).trim();
|
|
51
|
+
} else if (line.startsWith('data: ')) {
|
|
52
|
+
const data = line.substring(6);
|
|
53
|
+
|
|
54
|
+
if (data === '[DONE]') {
|
|
55
|
+
this.eventCount++;
|
|
56
|
+
events.push({
|
|
57
|
+
event: 'done',
|
|
58
|
+
data: null,
|
|
59
|
+
index: this.eventCount
|
|
60
|
+
});
|
|
61
|
+
currentEvent = { event: 'message', data: '' };
|
|
62
|
+
} else {
|
|
63
|
+
try {
|
|
64
|
+
currentEvent.data = JSON.parse(data);
|
|
65
|
+
this.eventCount++;
|
|
66
|
+
currentEvent.index = this.eventCount;
|
|
67
|
+
events.push(currentEvent);
|
|
68
|
+
currentEvent = { event: 'message', data: '' };
|
|
69
|
+
} catch (e) {
|
|
70
|
+
// H-01 Fix: Log parse errors for debugging
|
|
71
|
+
if (typeof console !== 'undefined' && console.error) {
|
|
72
|
+
console.error('[SSEParser] Malformed JSON event:', e.message, 'Data:', data.substring(0, 100));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
} else if (line.startsWith('id: ')) {
|
|
77
|
+
currentEvent.id = line.substring(4).trim();
|
|
78
|
+
} else if (line.startsWith('retry: ')) {
|
|
79
|
+
currentEvent.retry = parseInt(line.substring(7), 10);
|
|
80
|
+
}
|
|
81
|
+
// Empty lines separate events (already handled by JSON parsing)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return events;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Reset parser state (for reuse)
|
|
89
|
+
*/
|
|
90
|
+
reset() {
|
|
91
|
+
this.buffer = '';
|
|
92
|
+
this.eventCount = 0;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
module.exports = SSEParser;
|
package/lib/ccs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
4
|
# Version (updated by scripts/bump-version.sh)
|
|
5
|
-
CCS_VERSION="3.
|
|
5
|
+
CCS_VERSION="3.4.0"
|
|
6
6
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
7
7
|
readonly CONFIG_FILE="${CCS_CONFIG:-$HOME/.ccs/config.json}"
|
|
8
8
|
readonly PROFILES_JSON="$HOME/.ccs/profiles.json"
|
package/lib/ccs.ps1
CHANGED
|
@@ -12,7 +12,7 @@ param(
|
|
|
12
12
|
$ErrorActionPreference = "Stop"
|
|
13
13
|
|
|
14
14
|
# Version (updated by scripts/bump-version.sh)
|
|
15
|
-
$CcsVersion = "3.
|
|
15
|
+
$CcsVersion = "3.4.0"
|
|
16
16
|
$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
|
|
17
17
|
$ConfigFile = if ($env:CCS_CONFIG) { $env:CCS_CONFIG } else { "$env:USERPROFILE\.ccs\config.json" }
|
|
18
18
|
$ProfilesJson = "$env:USERPROFILE\.ccs\profiles.json"
|