@kaitranntt/ccs 3.4.0 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -205,9 +205,21 @@ Commands and skills symlinked from `~/.ccs/shared/` - no duplication across prof
205
205
  |---------|-----------------|-------------------|
206
206
  | **Endpoint** | Anthropic-compatible | OpenAI-compatible |
207
207
  | **Thinking** | No | Yes (reasoning_content) |
208
+ | **Tool Support** | Basic | **Full (v3.5+)** |
209
+ | **MCP Tools** | Limited | **Working (v3.5+)** |
208
210
  | **Streaming** | Yes | **Yes (v3.4+)** |
209
211
  | **TTFB** | <500ms | <500ms (streaming), 2-10s (buffered) |
210
- | **Use Case** | Fast responses | Complex reasoning |
212
+ | **Use Case** | Fast responses | Complex reasoning + tools |
213
+
214
+ ### Tool Support (v3.5)
215
+
216
+ **GLMT now fully supports MCP tools and function calling**:
217
+
218
+ - **Bidirectional Transformation**: Anthropic tools ↔ OpenAI function calling
219
+ - **MCP Integration**: MCP tools execute correctly (no XML tag output)
220
+ - **Streaming Tool Calls**: Real-time tool calls with input_json deltas
221
+ - **Backward Compatible**: Works seamlessly with existing thinking support
222
+ - **No Configuration**: Tool support works automatically
211
223
 
212
224
  ### Streaming Support (v3.4)
213
225
 
@@ -216,21 +228,42 @@ Commands and skills symlinked from `~/.ccs/shared/` - no duplication across prof
216
228
  - **Default**: Streaming enabled (TTFB <500ms)
217
229
  - **Disable**: Set `CCS_GLMT_STREAMING=disabled` for buffered mode
218
230
  - **Force**: Set `CCS_GLMT_STREAMING=force` to override client preferences
231
+ - **Thinking parameter**: Claude CLI `thinking` parameter support
232
+ - Respects `thinking.type` and `budget_tokens`
233
+ - Precedence: CLI parameter > message tags > default
219
234
 
220
- **Confirmed working**: Z.AI (1498 reasoning chunks tested)
235
+ **Confirmed working**: Z.AI (1498 reasoning chunks tested, tool calls verified)
221
236
 
222
237
  ### How It Works
223
238
 
224
239
  1. CCS spawns embedded HTTP proxy on localhost
225
240
  2. Proxy converts Anthropic format → OpenAI format (streaming or buffered)
226
- 3. Forwards to Z.AI with reasoning parameters
227
- 4. Converts `reasoning_content` thinking blocks (incremental or complete)
228
- 5. Thinking appears in Claude Code UI in real-time
241
+ 3. Transforms Anthropic tools OpenAI function calling format
242
+ 4. Forwards to Z.AI with reasoning parameters and tools
243
+ 5. Converts `reasoning_content` thinking blocks (incremental or complete)
244
+ 6. Converts OpenAI `tool_calls` → Anthropic tool_use blocks
245
+ 7. Thinking and tool calls appear in Claude Code UI in real-time
229
246
 
230
247
  ### Control Tags
231
248
 
232
249
  - `<Thinking:On|Off>` - Enable/disable reasoning blocks (default: On)
233
- - `<Effort:Low|Medium|High>` - Control reasoning depth (default: Medium)
250
+ - `<Effort:Low|Medium|High>` - Control reasoning depth (deprecated - Z.AI only supports binary thinking)
251
+
252
+ ### Environment Variables
253
+
254
+ **GLMT-specific**:
255
+ - `CCS_GLMT_FORCE_ENGLISH=true` - Force English output (default: true)
256
+ - `CCS_GLMT_THINKING_BUDGET=8192` - Control thinking on/off based on task type
257
+ - 0 or "unlimited": Always enable thinking
258
+ - 1-2048: Disable thinking (fast execution)
259
+ - 2049-8192: Enable for reasoning tasks only (default)
260
+ - >8192: Always enable thinking
261
+ - `CCS_GLMT_STREAMING=disabled` - Force buffered mode
262
+ - `CCS_GLMT_STREAMING=force` - Force streaming (override client)
263
+
264
+ **General**:
265
+ - `CCS_DEBUG_LOG=1` - Enable debug file logging
266
+ - `CCS_CLAUDE_PATH=/path/to/claude` - Custom Claude CLI path
234
267
 
235
268
  ### API Key Setup
236
269
 
@@ -376,6 +409,7 @@ irm ccs.kaitran.ca/uninstall | iex
376
409
  - [Configuration](./docs/en/configuration.md)
377
410
  - [Usage Examples](./docs/en/usage.md)
378
411
  - [System Architecture](./docs/system-architecture.md)
412
+ - [GLMT Control Mechanisms](./docs/glmt-controls.md)
379
413
  - [Troubleshooting](./docs/en/troubleshooting.md)
380
414
  - [Contributing](./CONTRIBUTING.md)
381
415
 
package/VERSION CHANGED
@@ -1 +1 @@
1
- 3.4.0
1
+ 3.4.2
@@ -2,9 +2,9 @@
2
2
 
3
3
  const { spawn } = require('child_process');
4
4
  const ProfileRegistry = require('./profile-registry');
5
- const InstanceManager = require('./instance-manager');
6
- const { colored } = require('./helpers');
7
- const { detectClaudeCli } = require('./claude-detector');
5
+ const InstanceManager = require('../management/instance-manager');
6
+ const { colored } = require('../utils/helpers');
7
+ const { detectClaudeCli } = require('../utils/claude-detector');
8
8
 
9
9
  /**
10
10
  * Auth Commands (Simplified)
package/bin/ccs.js CHANGED
@@ -5,11 +5,11 @@ const { spawn } = require('child_process');
5
5
  const path = require('path');
6
6
  const fs = require('fs');
7
7
  const os = require('os');
8
- const { error, colored } = require('./helpers');
9
- const { detectClaudeCli, showClaudeNotFoundError } = require('./claude-detector');
10
- const { getSettingsPath, getConfigPath } = require('./config-manager');
11
- const { ErrorManager } = require('./error-manager');
12
- const RecoveryManager = require('./recovery-manager');
8
+ const { error, colored } = require('./utils/helpers');
9
+ const { detectClaudeCli, showClaudeNotFoundError } = require('./utils/claude-detector');
10
+ const { getSettingsPath, getConfigPath } = require('./utils/config-manager');
11
+ const { ErrorManager } = require('./utils/error-manager');
12
+ const RecoveryManager = require('./management/recovery-manager');
13
13
 
14
14
  // Version (sync with package.json)
15
15
  const CCS_VERSION = require('../package.json').version;
@@ -194,7 +194,7 @@ function handleUninstallCommand() {
194
194
  }
195
195
 
196
196
  async function handleDoctorCommand() {
197
- const Doctor = require('./doctor');
197
+ const Doctor = require('./management/doctor');
198
198
  const doctor = new Doctor();
199
199
 
200
200
  await doctor.runAllChecks();
@@ -216,7 +216,7 @@ function detectProfile(args) {
216
216
 
217
217
  // Execute Claude CLI with embedded proxy (for GLMT profile)
218
218
  async function execClaudeWithProxy(claudeCli, profileName, args) {
219
- const { getSettingsPath } = require('./config-manager');
219
+ const { getSettingsPath } = require('./utils/config-manager');
220
220
 
221
221
  // 1. Read settings to get API key
222
222
  const settingsPath = getSettingsPath(profileName);
@@ -233,9 +233,10 @@ async function execClaudeWithProxy(claudeCli, profileName, args) {
233
233
  const verbose = args.includes('--verbose') || args.includes('-v');
234
234
 
235
235
  // 2. Spawn embedded proxy with verbose flag
236
- const proxyPath = path.join(__dirname, 'glmt-proxy.js');
236
+ const proxyPath = path.join(__dirname, 'glmt', 'glmt-proxy.js');
237
237
  const proxyArgs = verbose ? ['--verbose'] : [];
238
- const proxy = spawn('node', [proxyPath, ...proxyArgs], {
238
+ // Use process.execPath for Windows compatibility (CVE-2024-27980)
239
+ const proxy = spawn(process.execPath, [proxyPath, ...proxyArgs], {
239
240
  stdio: ['ignore', 'pipe', verbose ? 'pipe' : 'inherit']
240
241
  });
241
242
 
@@ -286,16 +287,34 @@ async function execClaudeWithProxy(claudeCli, profileName, args) {
286
287
 
287
288
  // 4. Spawn Claude CLI with proxy URL
288
289
  const envVars = {
289
- ...process.env,
290
290
  ANTHROPIC_BASE_URL: `http://127.0.0.1:${port}`,
291
291
  ANTHROPIC_AUTH_TOKEN: apiKey,
292
292
  ANTHROPIC_MODEL: 'glm-4.6'
293
293
  };
294
294
 
295
- const claude = spawn(claudeCli, args, {
296
- stdio: 'inherit',
297
- env: envVars
298
- });
295
+ // Use existing execClaude helper for consistent Windows handling
296
+ const isWindows = process.platform === 'win32';
297
+ const needsShell = isWindows && /\.(cmd|bat|ps1)$/i.test(claudeCli);
298
+ const env = { ...process.env, ...envVars };
299
+
300
+ let claude;
301
+ if (needsShell) {
302
+ // When shell needed: concatenate into string to avoid DEP0190 warning
303
+ const cmdString = [claudeCli, ...args].map(escapeShellArg).join(' ');
304
+ claude = spawn(cmdString, {
305
+ stdio: 'inherit',
306
+ windowsHide: true,
307
+ shell: true,
308
+ env
309
+ });
310
+ } else {
311
+ // When no shell needed: use array form (faster, no shell overhead)
312
+ claude = spawn(claudeCli, args, {
313
+ stdio: 'inherit',
314
+ windowsHide: true,
315
+ env
316
+ });
317
+ }
299
318
 
300
319
  // 5. Cleanup: kill proxy when Claude exits
301
320
  claude.on('exit', (code, signal) => {
@@ -358,7 +377,7 @@ async function main() {
358
377
 
359
378
  // Special case: auth command (multi-account management)
360
379
  if (firstArg === 'auth') {
361
- const AuthCommands = require('./auth-commands');
380
+ const AuthCommands = require('./auth/auth-commands');
362
381
  const authCommands = new AuthCommands();
363
382
  await authCommands.route(args.slice(1));
364
383
  return;
@@ -383,10 +402,10 @@ async function main() {
383
402
  }
384
403
 
385
404
  // Use ProfileDetector to determine profile type
386
- const ProfileDetector = require('./profile-detector');
387
- const InstanceManager = require('./instance-manager');
388
- const ProfileRegistry = require('./profile-registry');
389
- const { getSettingsPath } = require('./config-manager');
405
+ const ProfileDetector = require('./auth/profile-detector');
406
+ const InstanceManager = require('./management/instance-manager');
407
+ const ProfileRegistry = require('./auth/profile-registry');
408
+ const { getSettingsPath } = require('./utils/config-manager');
390
409
 
391
410
  const detector = new ProfileDetector();
392
411
 
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * BudgetCalculator - Control thinking enable/disable based on task complexity
6
+ *
7
+ * Purpose: Z.AI API only supports binary thinking (on/off), not reasoning_effort levels.
8
+ * This module decides when to enable thinking based on task type and budget preferences.
9
+ *
10
+ * Usage:
11
+ * const calculator = new BudgetCalculator();
12
+ * const shouldThink = calculator.shouldEnableThinking(taskType, envBudget);
13
+ *
14
+ * Configuration:
15
+ * CCS_GLMT_THINKING_BUDGET:
16
+ * - 0 or "unlimited": Always enable thinking (power user mode)
17
+ * - 1-2048: Disable thinking (fast execution, low budget)
18
+ * - 2049-8192: Enable thinking for reasoning tasks only (default)
19
+ * - >8192: Always enable thinking (high budget)
20
+ *
21
+ * Task type mapping:
22
+ * - reasoning: Enable thinking (planning, design, analysis)
23
+ * - execution: Disable thinking (fix, implement, debug) unless high budget
24
+ * - mixed: Enable thinking if budget >= medium threshold
25
+ */
26
+ class BudgetCalculator {
27
+ constructor(options = {}) {
28
+ this.budgetThresholds = {
29
+ low: 2048, // Disable thinking (fast execution)
30
+ medium: 8192 // Enable thinking for reasoning tasks
31
+ };
32
+ this.defaultBudget = options.defaultBudget || 8192; // Default: enable thinking for reasoning
33
+ }
34
+
35
+ /**
36
+ * Determine if thinking should be enabled based on task type and budget
37
+ * @param {string} taskType - 'reasoning', 'execution', or 'mixed'
38
+ * @param {string|number} envBudget - CCS_GLMT_THINKING_BUDGET value
39
+ * @returns {boolean} True if thinking should be enabled
40
+ */
41
+ shouldEnableThinking(taskType, envBudget) {
42
+ const budget = this._parseBudget(envBudget);
43
+
44
+ // Unlimited budget (0): Always enable thinking
45
+ if (budget === 0) {
46
+ return true;
47
+ }
48
+
49
+ // Low budget (<= 2048): Disable thinking (fast execution mode)
50
+ if (budget <= this.budgetThresholds.low) {
51
+ return false;
52
+ }
53
+
54
+ // High budget (> 8192): Always enable thinking
55
+ if (budget > this.budgetThresholds.medium) {
56
+ return true;
57
+ }
58
+
59
+ // Medium budget (2049-8192): Task-aware decision
60
+ if (taskType === 'reasoning') {
61
+ return true; // Enable thinking for planning/design tasks
62
+ } else if (taskType === 'execution') {
63
+ return false; // Disable thinking for quick fixes
64
+ } else {
65
+ return true; // Enable for mixed/ambiguous tasks (default safe)
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Parse budget from environment variable or use default
71
+ * @param {string|number} envBudget - Budget value
72
+ * @returns {number} Parsed budget (0 = unlimited)
73
+ * @private
74
+ */
75
+ _parseBudget(envBudget) {
76
+ // CRITICAL: Check for undefined/null explicitly, not falsy (0 is valid!)
77
+ if (envBudget === undefined || envBudget === null || envBudget === '') {
78
+ return this.defaultBudget;
79
+ }
80
+
81
+ // Handle string values
82
+ if (typeof envBudget === 'string') {
83
+ if (envBudget.toLowerCase() === 'unlimited') {
84
+ return 0;
85
+ }
86
+ const parsed = parseInt(envBudget, 10);
87
+ if (isNaN(parsed)) {
88
+ return this.defaultBudget;
89
+ }
90
+ return parsed < 0 ? 0 : parsed;
91
+ }
92
+
93
+ // Handle number values
94
+ if (typeof envBudget === 'number') {
95
+ return envBudget < 0 ? 0 : envBudget;
96
+ }
97
+
98
+ return this.defaultBudget;
99
+ }
100
+
101
+ /**
102
+ * Get human-readable budget description
103
+ * @param {number} budget - Budget value
104
+ * @returns {string} Description
105
+ */
106
+ getBudgetDescription(budget) {
107
+ if (budget === 0) return 'unlimited (always think)';
108
+ if (budget <= this.budgetThresholds.low) return 'low (fast execution, no thinking)';
109
+ if (budget <= this.budgetThresholds.medium) return 'medium (task-aware thinking)';
110
+ return 'high (always think)';
111
+ }
112
+ }
113
+
114
+ module.exports = BudgetCalculator;
@@ -25,6 +25,10 @@ class DeltaAccumulator {
25
25
  this.contentBlocks = [];
26
26
  this.currentBlockIndex = -1;
27
27
 
28
+ // Tool calls tracking
29
+ this.toolCalls = [];
30
+ this.toolCallsIndex = {};
31
+
28
32
  // Buffers
29
33
  this.thinkingBuffer = '';
30
34
  this.textBuffer = '';
@@ -33,9 +37,14 @@ class DeltaAccumulator {
33
37
  this.maxBlocks = options.maxBlocks || 100;
34
38
  this.maxBufferSize = options.maxBufferSize || 10 * 1024 * 1024; // 10MB
35
39
 
40
+ // Loop detection configuration
41
+ this.loopDetectionThreshold = options.loopDetectionThreshold || 3;
42
+ this.loopDetected = false;
43
+
36
44
  // State flags
37
45
  this.messageStarted = false;
38
46
  this.finalized = false;
47
+ this.usageReceived = false; // Track if usage data has arrived
39
48
 
40
49
  // Statistics
41
50
  this.inputTokens = 0;
@@ -56,7 +65,7 @@ class DeltaAccumulator {
56
65
 
57
66
  /**
58
67
  * Start new content block
59
- * @param {string} type - Block type ('thinking' or 'text')
68
+ * @param {string} type - Block type ('thinking', 'text', or 'tool_use')
60
69
  * @returns {Object} New block
61
70
  */
62
71
  startBlock(type) {
@@ -75,7 +84,7 @@ class DeltaAccumulator {
75
84
  };
76
85
  this.contentBlocks.push(block);
77
86
 
78
- // Reset buffer for new block
87
+ // Reset buffer for new block (tool_use doesn't use buffers)
79
88
  if (type === 'thinking') {
80
89
  this.thinkingBuffer = '';
81
90
  } else if (type === 'text') {
@@ -128,9 +137,104 @@ class DeltaAccumulator {
128
137
  if (usage) {
129
138
  this.inputTokens = usage.prompt_tokens || usage.input_tokens || 0;
130
139
  this.outputTokens = usage.completion_tokens || usage.output_tokens || 0;
140
+ this.usageReceived = true; // Mark that we've received usage data
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Add or update tool call delta
146
+ * @param {Object} toolCallDelta - Tool call delta from OpenAI
147
+ */
148
+ addToolCallDelta(toolCallDelta) {
149
+ const index = toolCallDelta.index;
150
+
151
+ // Initialize tool call if not exists
152
+ if (!this.toolCallsIndex[index]) {
153
+ const toolCall = {
154
+ index: index,
155
+ id: '',
156
+ type: 'function',
157
+ function: {
158
+ name: '',
159
+ arguments: ''
160
+ }
161
+ };
162
+ this.toolCalls.push(toolCall);
163
+ this.toolCallsIndex[index] = toolCall;
164
+ }
165
+
166
+ const toolCall = this.toolCallsIndex[index];
167
+
168
+ // Update id if present
169
+ if (toolCallDelta.id) {
170
+ toolCall.id = toolCallDelta.id;
171
+ }
172
+
173
+ // Update type if present
174
+ if (toolCallDelta.type) {
175
+ toolCall.type = toolCallDelta.type;
176
+ }
177
+
178
+ // Update function name if present
179
+ if (toolCallDelta.function?.name) {
180
+ toolCall.function.name += toolCallDelta.function.name;
181
+ }
182
+
183
+ // Update function arguments if present
184
+ if (toolCallDelta.function?.arguments) {
185
+ toolCall.function.arguments += toolCallDelta.function.arguments;
131
186
  }
132
187
  }
133
188
 
189
+ /**
190
+ * Get all tool calls
191
+ * @returns {Array} Tool calls array
192
+ */
193
+ getToolCalls() {
194
+ return this.toolCalls;
195
+ }
196
+
197
+ /**
198
+ * Check for planning loop pattern
199
+ * Loop = N consecutive thinking blocks with no tool calls
200
+ * @returns {boolean} True if loop detected
201
+ */
202
+ checkForLoop() {
203
+ // Already detected loop
204
+ if (this.loopDetected) {
205
+ return true;
206
+ }
207
+
208
+ // Need minimum blocks to detect pattern
209
+ if (this.contentBlocks.length < this.loopDetectionThreshold) {
210
+ return false;
211
+ }
212
+
213
+ // Get last N blocks
214
+ const recentBlocks = this.contentBlocks.slice(-this.loopDetectionThreshold);
215
+
216
+ // Check if all recent blocks are thinking blocks
217
+ const allThinking = recentBlocks.every(b => b.type === 'thinking');
218
+
219
+ // Check if no tool calls have been made at all
220
+ const noToolCalls = this.toolCalls.length === 0;
221
+
222
+ // Loop detected if: all recent blocks are thinking AND no tool calls yet
223
+ if (allThinking && noToolCalls) {
224
+ this.loopDetected = true;
225
+ return true;
226
+ }
227
+
228
+ return false;
229
+ }
230
+
231
+ /**
232
+ * Reset loop detection state (for testing)
233
+ */
234
+ resetLoopDetection() {
235
+ this.loopDetected = false;
236
+ }
237
+
134
238
  /**
135
239
  * Get summary of accumulated state
136
240
  * @returns {Object} Summary
@@ -142,8 +246,10 @@ class DeltaAccumulator {
142
246
  role: this.role,
143
247
  blockCount: this.contentBlocks.length,
144
248
  currentIndex: this.currentBlockIndex,
249
+ toolCallCount: this.toolCalls.length,
145
250
  messageStarted: this.messageStarted,
146
251
  finalized: this.finalized,
252
+ loopDetected: this.loopDetected,
147
253
  usage: {
148
254
  input_tokens: this.inputTokens,
149
255
  output_tokens: this.outputTokens
@@ -31,7 +31,10 @@ const DeltaAccumulator = require('./delta-accumulator');
31
31
  */
32
32
  class GlmtProxy {
33
33
  constructor(config = {}) {
34
- this.transformer = new GlmtTransformer({ verbose: config.verbose });
34
+ this.transformer = new GlmtTransformer({
35
+ verbose: config.verbose,
36
+ debugLog: config.debugLog || process.env.CCS_DEBUG_LOG === '1'
37
+ });
35
38
  // Use ANTHROPIC_BASE_URL from environment (set by settings.json) or fallback to Z.AI default
36
39
  this.upstreamUrl = process.env.ANTHROPIC_BASE_URL || 'https://api.z.ai/api/coding/paas/v4/chat/completions';
37
40
  this.server = null;
@@ -117,6 +120,13 @@ class GlmtProxy {
117
120
  return;
118
121
  }
119
122
 
123
+ // Log thinking parameter for debugging
124
+ if (anthropicRequest.thinking) {
125
+ this.log(`Request contains thinking parameter: ${JSON.stringify(anthropicRequest.thinking)}`);
126
+ } else {
127
+ this.log(`Request does NOT contain thinking parameter (will use message tags or default)`);
128
+ }
129
+
120
130
  // Branch: streaming or buffered
121
131
  const useStreaming = (anthropicRequest.stream && this.streamingEnabled) || this.forceStreaming;
122
132
 
@@ -196,10 +206,16 @@ class GlmtProxy {
196
206
  'Content-Type': 'text/event-stream',
197
207
  'Cache-Control': 'no-cache',
198
208
  'Connection': 'keep-alive',
199
- 'Access-Control-Allow-Origin': '*'
209
+ 'Access-Control-Allow-Origin': '*',
210
+ 'X-Accel-Buffering': 'no' // Disable proxy buffering
200
211
  });
201
212
 
202
- this.log('Starting SSE stream to Claude CLI');
213
+ // Disable Nagle's algorithm to prevent buffering at socket level
214
+ if (res.socket) {
215
+ res.socket.setNoDelay(true);
216
+ }
217
+
218
+ this.log('Starting SSE stream to Claude CLI (socket buffering disabled)');
203
219
 
204
220
  // Forward and stream
205
221
  await this._forwardAndStreamUpstream(
@@ -368,11 +384,16 @@ class GlmtProxy {
368
384
  // Transform OpenAI delta → Anthropic events
369
385
  const anthropicEvents = this.transformer.transformDelta(event, accumulator);
370
386
 
371
- // Forward to Claude CLI
387
+ // Forward to Claude CLI with immediate flush
372
388
  anthropicEvents.forEach(evt => {
373
389
  const eventLine = `event: ${evt.event}\n`;
374
390
  const dataLine = `data: ${JSON.stringify(evt.data)}\n\n`;
375
391
  clientRes.write(eventLine + dataLine);
392
+
393
+ // Flush immediately if method available (HTTP/2 or custom servers)
394
+ if (typeof clientRes.flush === 'function') {
395
+ clientRes.flush();
396
+ }
376
397
  });
377
398
  });
378
399
  } catch (error) {