@exreve/exk 1.0.76 → 1.0.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,7 +56,7 @@ const symlinkAsync = promisify(fsSymlink);
56
56
  // AI config - loaded from server after registration, stored in ~/.talk-to-code/ai-config.json
57
57
  // (Do not read ANTHROPIC_* / CLAUDE_MODEL from the host environment — only this file + code default model.)
58
58
  const AI_CONFIG_PATH = path.join(os.homedir(), '.talk-to-code', 'ai-config.json');
59
- const DEFAULT_AI_MODEL = 'glm-5.1';
59
+ const DEFAULT_AI_MODEL = 'glm-5.2';
60
60
  /** TTL cache for ai-config.json reads to avoid hitting disk on every call */
61
61
  let _aiConfigCache = null;
62
62
  const AI_CONFIG_TTL_MS = 5_000;
@@ -66,12 +66,12 @@ const PROVIDERS = {
66
66
  zai: {
67
67
  apiKey: process.env.ZHIPU_API_KEY || '',
68
68
  baseUrl: process.env.CLI_AI_BASE_URL || 'https://api.z.ai/api/anthropic',
69
- models: ['glm-5.1', 'glm-4.7', 'glm-4.5-air'],
69
+ models: ['glm-5.2'],
70
70
  },
71
71
  minimax: {
72
72
  apiKey: '',
73
73
  baseUrl: 'https://api.minimax.io/anthropic',
74
- models: ['MiniMax-M2.7', 'MiniMax-M2.7-highspeed'],
74
+ models: ['MiniMax-M3'],
75
75
  },
76
76
  openrouter: {
77
77
  apiKey: '',
@@ -739,13 +739,13 @@ export class AgentSessionManager {
739
739
  resumeSessionId = session.sdkSessionId;
740
740
  }
741
741
  }
742
- // Enable auto-compaction: trigger when context reaches ~150k (of 200k), keep default target
742
+ // Enable auto-compaction: trigger when context reaches ~800k (of 1M), keep default target
743
743
  if (effectiveSettings) {
744
744
  effectiveSettings.autoCompactEnabled = true;
745
- effectiveSettings.autoCompactWindow = 150000;
745
+ effectiveSettings.autoCompactWindow = 800000;
746
746
  }
747
747
  else {
748
- effectiveSettings = { autoCompactEnabled: true, autoCompactWindow: 150000 };
748
+ effectiveSettings = { autoCompactEnabled: true, autoCompactWindow: 800000 };
749
749
  }
750
750
  // Build backend config
751
751
  const backendConfig = {
@@ -262,7 +262,7 @@ async function runSdkBenchmark(provider) {
262
262
  // ── Main ────────────────────────────────────────────────────────────
263
263
  async function main() {
264
264
  console.log(`\n${BOLD}╔══════════════════════════════════════════════════════════════════════╗${RESET}`);
265
- console.log(`${BOLD}║ SDK Benchmark: MiniMax M2.7-highspeed vs Cerebras zai-glm-4.7 ║${RESET}`);
265
+ console.log(`${BOLD}║ SDK Benchmark: MiniMax M3 vs Cerebras zai-glm-4.7 ║${RESET}`);
266
266
  console.log(`${BOLD}║ Task: Generate a complete HTML real estate page ║${RESET}`);
267
267
  console.log(`${BOLD}╚══════════════════════════════════════════════════════════════════════╝${RESET}\n`);
268
268
  const config = loadConfig();
@@ -278,8 +278,8 @@ async function main() {
278
278
  const providers = [];
279
279
  if (minimaxKey) {
280
280
  providers.push({
281
- name: 'MiniMax M2.7-highspeed',
282
- model: 'MiniMax-M2.7-highspeed',
281
+ name: 'MiniMax M3',
282
+ model: 'MiniMax-M3',
283
283
  apiKey: minimaxKey,
284
284
  baseUrl: 'https://api.minimax.io/anthropic',
285
285
  needsProxy: false,
@@ -169,6 +169,34 @@ export class ClaudeBackend {
169
169
  return CACHED_CLAUDE_PATH;
170
170
  }
171
171
  async *executePrompt(prompt, config) {
172
+ const { cwd, apiKey, model, env, settings, signal, attachmentDir, routingSessionId, routingPromptId, resumeSessionId } = config;
173
+ // Retry config for 529 overloaded errors
174
+ const MAX_RETRIES = 2;
175
+ const RETRY_DELAY_MS = 3000;
176
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
177
+ if (attempt > 0) {
178
+ console.log(`[ClaudeBackend] Retry ${attempt}/${MAX_RETRIES} after 529 delay...`);
179
+ await new Promise(r => setTimeout(r, RETRY_DELAY_MS * attempt));
180
+ }
181
+ try {
182
+ const stream = this._executePromptOnce(prompt, {
183
+ cwd, apiKey, model, env, settings, signal, attachmentDir, routingSessionId, routingPromptId, resumeSessionId,
184
+ });
185
+ for await (const event of stream) {
186
+ yield event;
187
+ }
188
+ return; // Success — exit retry loop
189
+ }
190
+ catch (err) {
191
+ const is529 = err?.message?.includes('529') || err?.message?.includes('overloaded_error');
192
+ if (!is529 || attempt === MAX_RETRIES) {
193
+ throw err; // Not retryable or out of retries
194
+ }
195
+ console.log(`[ClaudeBackend] 529 overloaded detected, will retry: ${err.message}`);
196
+ }
197
+ }
198
+ }
199
+ async *_executePromptOnce(prompt, config) {
172
200
  const { cwd, apiKey, model, env, settings, signal, attachmentDir, routingSessionId, routingPromptId, resumeSessionId } = config;
173
201
  // Build MCP server for this query
174
202
  const mcpServer = createModuleMcpServer({
@@ -188,6 +216,8 @@ export class ClaudeBackend {
188
216
  settingSources: ['project'],
189
217
  permissionMode: 'bypassPermissions',
190
218
  allowDangerouslySkipPermissions: true,
219
+ // Enable 1M token context window (GLM-5.2 supports 1M context via z.ai)
220
+ betas: ['context-1m-2025-08-07'],
191
221
  mcpServers: { 'claude-voice-modules': mcpServer },
192
222
  ...(pathToClaudeCodeExecutable ? { pathToClaudeCodeExecutable } : {}),
193
223
  spawnClaudeCodeProcess: (spawnOptions) => {
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exreve/exk",
3
- "version": "1.0.76",
3
+ "version": "1.0.78",
4
4
  "description": "exk - Control Claude CLI with voice and programmable interfaces",
5
5
  "type": "module",
6
6
  "bin": {