@kaitranntt/ccs 3.4.0 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,18 @@ const path = require('path');
7
7
  const os = require('os');
8
8
  const SSEParser = require('./sse-parser');
9
9
  const DeltaAccumulator = require('./delta-accumulator');
10
+ const LocaleEnforcer = require('./locale-enforcer');
11
+ const BudgetCalculator = require('./budget-calculator');
12
+ const TaskClassifier = require('./task-classifier');
10
13
 
11
14
  /**
12
- * GlmtTransformer - Convert between Anthropic and OpenAI formats with thinking support
15
+ * GlmtTransformer - Convert between Anthropic and OpenAI formats with thinking and tool support
13
16
  *
14
17
  * Features:
15
- * - Request: Anthropic → OpenAI (inject reasoning params)
18
+ * - Request: Anthropic → OpenAI (inject reasoning params, transform tools)
16
19
  * - Response: OpenAI reasoning_content → Anthropic thinking blocks
20
+ * - Tool Support: Anthropic tools ↔ OpenAI function calling (bidirectional)
21
+ * - Streaming: Real-time tool calls with input_json deltas
17
22
  * - Debug mode: Log raw data to ~/.ccs/logs/ (CCS_DEBUG_LOG=1)
18
23
  * - Verbose mode: Console logging with timestamps
19
24
  * - Validation: Self-test transformation results
@@ -38,6 +43,18 @@ class GlmtTransformer {
38
43
  'GLM-4.5': 96000,
39
44
  'GLM-4.5-air': 16000
40
45
  };
46
+ // Effort level thresholds (budget_tokens)
47
+ this.EFFORT_LOW_THRESHOLD = 2048;
48
+ this.EFFORT_HIGH_THRESHOLD = 8192;
49
+
50
+ // Initialize locale enforcer
51
+ this.localeEnforcer = new LocaleEnforcer({
52
+ forceEnglish: process.env.CCS_GLMT_FORCE_ENGLISH !== 'false'
53
+ });
54
+
55
+ // Initialize budget calculator and task classifier
56
+ this.budgetCalculator = new BudgetCalculator();
57
+ this.taskClassifier = new TaskClassifier();
41
58
  }
42
59
 
43
60
  /**
@@ -50,24 +67,71 @@ class GlmtTransformer {
50
67
  this._writeDebugLog('request-anthropic', anthropicRequest);
51
68
 
52
69
  try {
53
- // 1. Extract thinking control from messages
70
+ // 1. Extract thinking control from messages (tags like <Thinking:On|Off>)
54
71
  const thinkingConfig = this._extractThinkingControl(
55
72
  anthropicRequest.messages || []
56
73
  );
57
- this.log(`Extracted thinking control: ${JSON.stringify(thinkingConfig)}`);
74
+ const hasControlTags = this._hasThinkingTags(anthropicRequest.messages || []);
75
+
76
+ // 2. Classify task type for intelligent thinking control
77
+ const taskType = this.taskClassifier.classify(anthropicRequest.messages || []);
78
+ this.log(`Task classified as: ${taskType}`);
79
+
80
+ // 3. Check budget and decide if thinking should be enabled
81
+ const envBudget = process.env.CCS_GLMT_THINKING_BUDGET;
82
+ const shouldThink = this.budgetCalculator.shouldEnableThinking(taskType, envBudget);
83
+ this.log(`Budget decision: thinking=${shouldThink} (budget: ${envBudget || 'default'}, type: ${taskType})`);
84
+
85
+ // Apply budget-based thinking control ONLY if:
86
+ // - No Claude CLI thinking parameter AND
87
+ // - No control tags in messages AND
88
+ // - Budget env var is explicitly set
89
+ if (!anthropicRequest.thinking && !hasControlTags && envBudget) {
90
+ thinkingConfig.thinking = shouldThink;
91
+ this.log('Applied budget-based thinking control');
92
+ }
58
93
 
59
- // 2. Map model
94
+ // 4. Check anthropicRequest.thinking parameter (takes precedence over budget)
95
+ // Claude CLI sends this when alwaysThinkingEnabled is configured
96
+ if (anthropicRequest.thinking) {
97
+ if (anthropicRequest.thinking.type === 'enabled') {
98
+ thinkingConfig.thinking = true;
99
+ this.log('Claude CLI explicitly enabled thinking (overrides budget)');
100
+ } else if (anthropicRequest.thinking.type === 'disabled') {
101
+ thinkingConfig.thinking = false;
102
+ this.log('Claude CLI explicitly disabled thinking (overrides budget)');
103
+ } else {
104
+ this.log(`Warning: Unknown thinking type: ${anthropicRequest.thinking.type}`);
105
+ }
106
+ }
107
+
108
+ this.log(`Final thinking control: ${JSON.stringify(thinkingConfig)}`);
109
+
110
+ // 3. Map model
60
111
  const glmModel = this._mapModel(anthropicRequest.model);
61
112
 
62
- // 3. Convert to OpenAI format
113
+ // 4. Inject locale instruction before sanitization
114
+ const messagesWithLocale = this.localeEnforcer.injectInstruction(
115
+ anthropicRequest.messages || []
116
+ );
117
+
118
+ // 5. Convert to OpenAI format
63
119
  const openaiRequest = {
64
120
  model: glmModel,
65
- messages: this._sanitizeMessages(anthropicRequest.messages || []),
121
+ messages: this._sanitizeMessages(messagesWithLocale),
66
122
  max_tokens: this._getMaxTokens(glmModel),
67
123
  stream: anthropicRequest.stream ?? false
68
124
  };
69
125
 
70
- // 4. Preserve optional parameters
126
+ // 5.5. Transform tools parameter if present
127
+ if (anthropicRequest.tools && anthropicRequest.tools.length > 0) {
128
+ openaiRequest.tools = this._transformTools(anthropicRequest.tools);
129
+ // Always use "auto" as Z.AI doesn't support other modes
130
+ openaiRequest.tool_choice = "auto";
131
+ this.log(`Transformed ${anthropicRequest.tools.length} tools for OpenAI format`);
132
+ }
133
+
134
+ // 6. Preserve optional parameters
71
135
  if (anthropicRequest.temperature !== undefined) {
72
136
  openaiRequest.temperature = anthropicRequest.temperature;
73
137
  }
@@ -75,13 +139,13 @@ class GlmtTransformer {
75
139
  openaiRequest.top_p = anthropicRequest.top_p;
76
140
  }
77
141
 
78
- // 5. Handle streaming
142
+ // 7. Handle streaming
79
143
  // Keep stream parameter from request
80
144
  if (anthropicRequest.stream !== undefined) {
81
145
  openaiRequest.stream = anthropicRequest.stream;
82
146
  }
83
147
 
84
- // 6. Inject reasoning parameters
148
+ // 8. Inject reasoning parameters
85
149
  this._injectReasoningParams(openaiRequest, thinkingConfig);
86
150
 
87
151
  // Log transformed request
@@ -153,11 +217,19 @@ class GlmtTransformer {
153
217
  // Handle tool_calls if present
154
218
  if (message.tool_calls && message.tool_calls.length > 0) {
155
219
  message.tool_calls.forEach(toolCall => {
220
+ let parsedInput;
221
+ try {
222
+ parsedInput = JSON.parse(toolCall.function.arguments || '{}');
223
+ } catch (parseError) {
224
+ this.log(`Warning: Invalid JSON in tool arguments: ${parseError.message}`);
225
+ parsedInput = { _error: 'Invalid JSON', _raw: toolCall.function.arguments };
226
+ }
227
+
156
228
  content.push({
157
229
  type: 'tool_use',
158
230
  id: toolCall.id,
159
231
  name: toolCall.function.name,
160
- input: JSON.parse(toolCall.function.arguments || '{}')
232
+ input: parsedInput
161
233
  });
162
234
  });
163
235
  }
@@ -169,9 +241,9 @@ class GlmtTransformer {
169
241
  content: content,
170
242
  model: openaiResponse.model || 'glm-4.6',
171
243
  stop_reason: this._mapStopReason(choice.finish_reason),
172
- usage: openaiResponse.usage || {
173
- input_tokens: 0,
174
- output_tokens: 0
244
+ usage: {
245
+ input_tokens: openaiResponse.usage?.prompt_tokens || 0,
246
+ output_tokens: openaiResponse.usage?.completion_tokens || 0
175
247
  }
176
248
  };
177
249
 
@@ -207,57 +279,109 @@ class GlmtTransformer {
207
279
 
208
280
  /**
209
281
  * Sanitize messages for OpenAI API compatibility
210
- * Remove thinking blocks and unsupported content types
282
+ * Convert tool_result blocks to separate tool messages
283
+ * Filter out thinking blocks
211
284
  * @param {Array} messages - Messages array
212
285
  * @returns {Array} Sanitized messages
213
286
  * @private
214
287
  */
215
288
  _sanitizeMessages(messages) {
216
- return messages.map(msg => {
217
- // If content is a string, return as-is
289
+ const result = [];
290
+
291
+ for (const msg of messages) {
292
+ // If content is a string, add as-is
218
293
  if (typeof msg.content === 'string') {
219
- return msg;
294
+ result.push(msg);
295
+ continue;
220
296
  }
221
297
 
222
- // If content is an array, filter out unsupported types
298
+ // If content is an array, process blocks
223
299
  if (Array.isArray(msg.content)) {
224
- const sanitizedContent = msg.content
225
- .filter(block => {
226
- // Keep only text content for OpenAI
227
- // Filter out: thinking, tool_use, tool_result, etc.
228
- return block.type === 'text';
229
- })
230
- .map(block => {
231
- // Return just the text content
232
- return block;
300
+ // Separate tool_result blocks from other content
301
+ const toolResults = msg.content.filter(block => block.type === 'tool_result');
302
+ const textBlocks = msg.content.filter(block => block.type === 'text');
303
+ const toolUseBlocks = msg.content.filter(block => block.type === 'tool_use');
304
+
305
+ // CRITICAL: Tool messages must come BEFORE user text in OpenAI API
306
+ // Convert tool_result blocks to OpenAI tool messages FIRST
307
+ for (const toolResult of toolResults) {
308
+ result.push({
309
+ role: 'tool',
310
+ tool_call_id: toolResult.tool_use_id,
311
+ content: typeof toolResult.content === 'string'
312
+ ? toolResult.content
313
+ : JSON.stringify(toolResult.content)
233
314
  });
315
+ }
316
+
317
+ // Add text content as user/assistant message AFTER tool messages
318
+ if (textBlocks.length > 0) {
319
+ const textContent = textBlocks.length === 1
320
+ ? textBlocks[0].text
321
+ : textBlocks.map(b => b.text).join('\n');
234
322
 
235
- // If we filtered everything out, return empty string
236
- if (sanitizedContent.length === 0) {
237
- return {
323
+ result.push({
238
324
  role: msg.role,
239
- content: ''
240
- };
325
+ content: textContent
326
+ });
241
327
  }
242
328
 
243
- // If only one text block, convert to string
244
- if (sanitizedContent.length === 1 && sanitizedContent[0].type === 'text') {
245
- return {
329
+ // Add tool_use blocks (assistant's tool calls) - skip for now, they're in assistant messages
330
+ // OpenAI handles these differently in response, not request
331
+
332
+ // If no content at all, add empty message (but not if we added tool messages)
333
+ if (textBlocks.length === 0 && toolResults.length === 0 && toolUseBlocks.length === 0) {
334
+ result.push({
246
335
  role: msg.role,
247
- content: sanitizedContent[0].text
248
- };
336
+ content: ''
337
+ });
249
338
  }
250
339
 
251
- // Return array of text blocks
252
- return {
253
- role: msg.role,
254
- content: sanitizedContent
255
- };
340
+ continue;
256
341
  }
257
342
 
258
343
  // Fallback: return message as-is
259
- return msg;
260
- });
344
+ result.push(msg);
345
+ }
346
+
347
+ return result;
348
+ }
349
+
350
+ /**
351
+ * Transform Anthropic tools to OpenAI tools format
352
+ * @param {Array} anthropicTools - Anthropic tools array
353
+ * @returns {Array} OpenAI tools array
354
+ * @private
355
+ */
356
+ _transformTools(anthropicTools) {
357
+ return anthropicTools.map(tool => ({
358
+ type: 'function',
359
+ function: {
360
+ name: tool.name,
361
+ description: tool.description,
362
+ parameters: tool.input_schema || {}
363
+ }
364
+ }));
365
+ }
366
+
367
+ /**
368
+ * Check if messages contain thinking control tags
369
+ * @param {Array} messages - Messages array
370
+ * @returns {boolean} True if tags found
371
+ * @private
372
+ */
373
+ _hasThinkingTags(messages) {
374
+ for (const msg of messages) {
375
+ if (msg.role !== 'user') continue;
376
+ const content = msg.content;
377
+ if (typeof content !== 'string') continue;
378
+
379
+ // Check for control tags
380
+ if (/<Thinking:(On|Off)>/i.test(content) || /<Effort:(Low|Medium|High)>/i.test(content)) {
381
+ return true;
382
+ }
383
+ }
384
+ return false;
261
385
  }
262
386
 
263
387
  /**
@@ -432,9 +556,30 @@ class GlmtTransformer {
432
556
  transformDelta(openaiEvent, accumulator) {
433
557
  const events = [];
434
558
 
559
+ // Debug logging for streaming deltas
560
+ if (this.debugLog && openaiEvent.data) {
561
+ this._writeDebugLog('delta-openai', openaiEvent.data);
562
+ }
563
+
435
564
  // Handle [DONE] marker
565
+ // Only finalize if we haven't already (deferred finalization may have already triggered)
436
566
  if (openaiEvent.event === 'done') {
437
- return this.finalizeDelta(accumulator);
567
+ if (!accumulator.finalized) {
568
+ return this.finalizeDelta(accumulator);
569
+ }
570
+ return []; // Already finalized
571
+ }
572
+
573
+ // Usage update (appears in final chunk, may be before choice data)
574
+ // Process this BEFORE early returns to ensure we capture usage
575
+ if (openaiEvent.data?.usage) {
576
+ accumulator.updateUsage(openaiEvent.data.usage);
577
+
578
+ // If we have both usage AND finish_reason, finalize immediately
579
+ if (accumulator.finishReason) {
580
+ events.push(...this.finalizeDelta(accumulator));
581
+ return events; // Early return after finalization
582
+ }
438
583
  }
439
584
 
440
585
  const choice = openaiEvent.data?.choices?.[0];
@@ -498,14 +643,97 @@ class GlmtTransformer {
498
643
  ));
499
644
  }
500
645
 
501
- // Usage update (appears in final chunk usually)
502
- if (openaiEvent.data.usage) {
503
- accumulator.updateUsage(openaiEvent.data.usage);
646
+ // Check for planning loop after each thinking block completes
647
+ if (accumulator.checkForLoop()) {
648
+ this.log('WARNING: Planning loop detected - 3 consecutive thinking blocks with no tool calls');
649
+ this.log('Forcing early finalization to prevent unbounded planning');
650
+
651
+ // Close current block if any
652
+ const currentBlock = accumulator.getCurrentBlock();
653
+ if (currentBlock && !currentBlock.stopped) {
654
+ if (currentBlock.type === 'thinking') {
655
+ events.push(this._createSignatureDeltaEvent(currentBlock));
656
+ }
657
+ events.push(this._createContentBlockStopEvent(currentBlock));
658
+ accumulator.stopCurrentBlock();
659
+ }
660
+
661
+ // Force finalization
662
+ events.push(...this.finalizeDelta(accumulator));
663
+ return events;
664
+ }
665
+
666
+ // Tool calls deltas
667
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
668
+ // Close current content block ONCE before processing any tool calls
669
+ const currentBlock = accumulator.getCurrentBlock();
670
+ if (currentBlock && !currentBlock.stopped) {
671
+ if (currentBlock.type === 'thinking') {
672
+ events.push(this._createSignatureDeltaEvent(currentBlock));
673
+ }
674
+ events.push(this._createContentBlockStopEvent(currentBlock));
675
+ accumulator.stopCurrentBlock();
676
+ }
677
+
678
+ // Process each tool call delta
679
+ for (const toolCallDelta of delta.tool_calls) {
680
+ // Track tool call state
681
+ const isNewToolCall = !accumulator.toolCallsIndex[toolCallDelta.index];
682
+ accumulator.addToolCallDelta(toolCallDelta);
683
+
684
+ // Emit tool use events (start + input_json deltas)
685
+ if (isNewToolCall) {
686
+ // Start new tool_use block in accumulator
687
+ const block = accumulator.startBlock('tool_use');
688
+ const toolCall = accumulator.toolCallsIndex[toolCallDelta.index];
689
+
690
+ events.push({
691
+ event: 'content_block_start',
692
+ data: {
693
+ type: 'content_block_start',
694
+ index: block.index,
695
+ content_block: {
696
+ type: 'tool_use',
697
+ id: toolCall.id || `tool_${toolCallDelta.index}`,
698
+ name: toolCall.function.name || ''
699
+ }
700
+ }
701
+ });
702
+ }
703
+
704
+ // Emit input_json delta if arguments present
705
+ if (toolCallDelta.function?.arguments) {
706
+ const currentToolBlock = accumulator.getCurrentBlock();
707
+ if (currentToolBlock && currentToolBlock.type === 'tool_use') {
708
+ events.push({
709
+ event: 'content_block_delta',
710
+ data: {
711
+ type: 'content_block_delta',
712
+ index: currentToolBlock.index,
713
+ delta: {
714
+ type: 'input_json_delta',
715
+ partial_json: toolCallDelta.function.arguments
716
+ }
717
+ }
718
+ });
719
+ }
720
+ }
721
+ }
504
722
  }
505
723
 
506
724
  // Finish reason
507
725
  if (choice.finish_reason) {
508
726
  accumulator.finishReason = choice.finish_reason;
727
+
728
+ // If we have both finish_reason AND usage, finalize immediately
729
+ if (accumulator.usageReceived) {
730
+ events.push(...this.finalizeDelta(accumulator));
731
+ }
732
+ }
733
+
734
+ // Debug logging for generated events
735
+ if (this.debugLog && events.length > 0) {
736
+ this._writeDebugLog('delta-anthropic-events', { events, accumulator: accumulator.getSummary() });
509
737
  }
510
738
 
511
739
  return events;
@@ -523,7 +751,7 @@ class GlmtTransformer {
523
751
 
524
752
  const events = [];
525
753
 
526
- // Close current content block if any
754
+ // Close current content block if any (including tool_use blocks)
527
755
  const currentBlock = accumulator.getCurrentBlock();
528
756
  if (currentBlock && !currentBlock.stopped) {
529
757
  if (currentBlock.type === 'thinking') {
@@ -533,6 +761,9 @@ class GlmtTransformer {
533
761
  accumulator.stopCurrentBlock();
534
762
  }
535
763
 
764
+ // No need to manually stop tool_use blocks - they're now tracked in contentBlocks
765
+ // and will be stopped by the logic above if they're the current block
766
+
536
767
  // Message delta (stop reason + usage)
537
768
  events.push({
538
769
  event: 'message_delta',
@@ -542,6 +773,7 @@ class GlmtTransformer {
542
773
  stop_reason: this._mapStopReason(accumulator.finishReason || 'stop')
543
774
  },
544
775
  usage: {
776
+ input_tokens: accumulator.inputTokens,
545
777
  output_tokens: accumulator.outputTokens
546
778
  }
547
779
  }
@@ -639,17 +871,20 @@ class GlmtTransformer {
639
871
  }
640
872
 
641
873
  /**
642
- * Create signature_delta event
874
+ * Create thinking signature delta event
643
875
  * @private
644
876
  */
645
877
  _createSignatureDeltaEvent(block) {
646
878
  const signature = this._generateThinkingSignature(block.content);
647
879
  return {
648
- event: 'signature_delta',
880
+ event: 'content_block_delta',
649
881
  data: {
650
- type: 'signature_delta',
882
+ type: 'content_block_delta',
651
883
  index: block.index,
652
- signature: signature
884
+ delta: {
885
+ type: 'thinking_signature_delta',
886
+ signature: signature
887
+ }
653
888
  }
654
889
  };
655
890
  }
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /**
5
+ * LocaleEnforcer - Force English output from GLM models
6
+ *
7
+ * Purpose: GLM models default to Chinese when prompts are ambiguous or contain Chinese context.
8
+ * This module injects "MUST respond in English" instruction into system prompt or first user message.
9
+ *
10
+ * Usage:
11
+ * const enforcer = new LocaleEnforcer({ forceEnglish: true });
12
+ * const modifiedMessages = enforcer.injectInstruction(messages);
13
+ *
14
+ * Configuration:
15
+ * CCS_GLMT_FORCE_ENGLISH=false - Disable locale enforcement (allow multilingual)
16
+ *
17
+ * Strategy:
18
+ * 1. If system prompt exists: Prepend instruction
19
+ * 2. If no system prompt: Prepend to first user message
20
+ * 3. Preserve message structure (string vs array content)
21
+ */
22
+ class LocaleEnforcer {
23
+ constructor(options = {}) {
24
+ this.forceEnglish = options.forceEnglish ?? true;
25
+ this.instruction = "CRITICAL: You MUST respond in English only, regardless of the input language or context. This is a strict requirement.";
26
+ }
27
+
28
+ /**
29
+ * Inject English instruction into messages
30
+ * @param {Array} messages - Messages array to modify
31
+ * @returns {Array} Modified messages array
32
+ */
33
+ injectInstruction(messages) {
34
+ if (!this.forceEnglish) {
35
+ return messages;
36
+ }
37
+
38
+ // Clone messages to avoid mutation
39
+ const modifiedMessages = JSON.parse(JSON.stringify(messages));
40
+
41
+ // Strategy 1: Inject into system prompt (preferred)
42
+ const systemIndex = modifiedMessages.findIndex(m => m.role === 'system');
43
+ if (systemIndex >= 0) {
44
+ const systemMsg = modifiedMessages[systemIndex];
45
+
46
+ if (typeof systemMsg.content === 'string') {
47
+ systemMsg.content = `${this.instruction}\n\n${systemMsg.content}`;
48
+ } else if (Array.isArray(systemMsg.content)) {
49
+ systemMsg.content.unshift({
50
+ type: 'text',
51
+ text: this.instruction
52
+ });
53
+ }
54
+
55
+ return modifiedMessages;
56
+ }
57
+
58
+ // Strategy 2: Prepend to first user message
59
+ const userIndex = modifiedMessages.findIndex(m => m.role === 'user');
60
+ if (userIndex >= 0) {
61
+ const userMsg = modifiedMessages[userIndex];
62
+
63
+ if (typeof userMsg.content === 'string') {
64
+ userMsg.content = `${this.instruction}\n\n${userMsg.content}`;
65
+ } else if (Array.isArray(userMsg.content)) {
66
+ userMsg.content.unshift({
67
+ type: 'text',
68
+ text: this.instruction
69
+ });
70
+ }
71
+
72
+ return modifiedMessages;
73
+ }
74
+
75
+ // No system or user messages found (edge case)
76
+ return modifiedMessages;
77
+ }
78
+ }
79
+
80
+ module.exports = LocaleEnforcer;