coze_lab 0.1.37 → 0.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ Configure local AI agents (Claude Code, Codex, OpenClaw) to report traces to Coz
8
8
  # First-time setup — triggers browser OAuth authorization
9
9
  npx coze_lab --agent=<type>
10
10
 
11
- # Per-agent setup. Cloud/local is inferred from coze-bridge config.
11
+ # Per-agent setup. Cloud/local is inferred from coze-bridge config or CLOUD_ENV=1.
12
12
  npx coze_lab --agent-id=<agentId>
13
13
 
14
14
  # Auth-only commands (no agent configuration)
@@ -23,8 +23,8 @@ npx coze_lab --logout # Clear cached credentials
23
23
  | Parameter | Required | Values / Effect |
24
24
  |-----------|----------|-----------------|
25
25
  | `--agent` | ✓ (for setup) | `claude-code`, `codex`, `openclaw` |
26
- | `--agent-id` | — | Resolve `~/.coze/agents/<agentId>/config.json` and write per-agent config. `deployType=cloud` enables cloud mode automatically |
27
- | `--cloud` | — | Backward-compatible override for old callers. New callers should rely on `--agent-id` + config `deployType` |
26
+ | `--agent-id` | — | Resolve `~/.coze/agents/<agentId>/config.json` and write per-agent config. Cloud mode is inferred from `deployType=cloud`, `CLOUD_ENV=1`, or bridge cloud-only fields |
27
+ | `--cloud` | — | Backward-compatible override for old manual callers. New callers should rely on `--agent-id` auto-detection |
28
28
  | `--codex-home` | — | Override Codex config home for non-cloud/custom runs |
29
29
  | `--login` | — | Run the Device Code login flow only |
30
30
  | `--status` | — | Print local token status (valid / expiring / expired) |
@@ -56,7 +56,7 @@ npx coze_lab --logout # Clear cached credentials
56
56
  | `codex` | `~/.codex/hooks/cozeloop_hook.py` | `~/.codex/hooks.json` | `~/.codex/hooks/cozeloop.env` |
57
57
  | `openclaw` | — (Node.js plugin) | `~/.openclaw/openclaw.json` | inline in config |
58
58
 
59
- For cloud Codex with `--agent-id=<agentId>` and config `deployType=cloud`, Codex hooks are written to
59
+ For cloud Codex with `--agent-id=<agentId>`, Codex hooks are written to
60
60
  `~/.coze/agents/<agentId>/codex-home` by default. The directory is created if it
61
61
  does not already exist, so callers do not need to pass `--codex-home` for the
62
62
  standard coze-bridge layout.
package/index.js CHANGED
@@ -53,6 +53,11 @@ function getCloudTokenInfo() {
53
53
  return { token: '', source: '', traceUsable: false };
54
54
  }
55
55
 
56
+ function isCloudRuntimeEnv() {
57
+ const v = readEnv('CLOUD_ENV').toLowerCase();
58
+ return v === '1' || v === 'true' || v === 'yes' || v === 'cloud';
59
+ }
60
+
56
61
  // ─── 1. Color helpers ────────────────────────────────────────────────────────
57
62
  const C = {
58
63
  reset: '\x1b[0m',
@@ -156,6 +161,30 @@ function parseArgs() {
156
161
 
157
162
  const VALID_AGENTS = ['claude-code', 'codex', 'openclaw'];
158
163
 
164
+ function hasCloudSessionToken(cfg) {
165
+ const sessions = cfg?.sessions;
166
+ if (!sessions || typeof sessions !== 'object' || Array.isArray(sessions)) return false;
167
+ return Object.values(sessions).some((record) => (
168
+ record
169
+ && typeof record === 'object'
170
+ && typeof record.modelToken === 'string'
171
+ && record.modelToken.trim()
172
+ ));
173
+ }
174
+
175
+ function hasCloudModelInfo(cfg) {
176
+ return !!(cfg?.modelInfo && typeof cfg.modelInfo === 'object' && !Array.isArray(cfg.modelInfo));
177
+ }
178
+
179
+ function inferDeployTypeFromAgentConfig(cfg) {
180
+ if (cfg?.deployType === 'cloud') return { deployType: 'cloud', reason: 'config.deployType=cloud' };
181
+ if (isCloudRuntimeEnv()) return { deployType: 'cloud', reason: 'env CLOUD_ENV=1' };
182
+ if (cfg?.deployType === 'local') return { deployType: 'local', reason: 'config.deployType=local' };
183
+ if (hasCloudSessionToken(cfg)) return { deployType: 'cloud', reason: 'config.sessions[*].modelToken' };
184
+ if (hasCloudModelInfo(cfg)) return { deployType: 'cloud', reason: 'config.modelInfo' };
185
+ return { deployType: 'local', reason: 'no cloud signal' };
186
+ }
187
+
159
188
  // resolveAgent 读 ~/.coze/agents/<agentId>/config.json,返回 { framework, workspace, deployType, agentId, root }。
160
189
  // soft=true 时,config 不存在 / 解析失败 / framework 非法均返回 null(不退出),供云端回退到显式 --agent。
161
190
  function resolveAgent(agentId, soft) {
@@ -186,8 +215,8 @@ function resolveAgent(agentId, soft) {
186
215
  `支持的类型: ${VALID_AGENTS.join(', ')}`,
187
216
  ]);
188
217
  }
189
- const deployType = cfg.deployType === 'cloud' ? 'cloud' : 'local';
190
- return { framework, workspace: cfg.workspace || '', deployType, agentId, root };
218
+ const inferred = inferDeployTypeFromAgentConfig(cfg);
219
+ return { framework, workspace: cfg.workspace || '', deployType: inferred.deployType, deployReason: inferred.reason, agentId, root };
191
220
  }
192
221
 
193
222
  function validateArgs(args) {
@@ -197,11 +226,12 @@ function validateArgs(args) {
197
226
  if (args['refresh']) return { refresh: true };
198
227
  if (args['verify']) return { verify: true, pairCode: args['pair-code'] };
199
228
 
200
- // --agent-id:优先读 coze-bridge 的 ~/.coze/agents/<id>/config.json 拿 framework/workspace/deployType
201
- // deployType=cloud 时自动进入云端注入路径;显式 --cloud 仍保留,供 config 缺失时兜底。
229
+ // --agent-id:优先读 coze-bridge 的 ~/.coze/agents/<id>/config.json 拿 framework/workspace。
230
+ // 云端判定优先看 deployType / CLOUD_ENV;兼容老 config 时再看 cloud-only 落盘字段。
202
231
  if (args['agent-id']) {
203
232
  const explicitCloud = !!args['cloud'];
204
- const resolved = resolveAgent(args['agent-id'], explicitCloud /* soft */);
233
+ const runtimeCloud = isCloudRuntimeEnv();
234
+ const resolved = resolveAgent(args['agent-id'], explicitCloud || runtimeCloud /* soft */);
205
235
  if (resolved) {
206
236
  const cloud = explicitCloud || resolved.deployType === 'cloud';
207
237
  return {
@@ -210,20 +240,22 @@ function validateArgs(args) {
210
240
  workspace: resolved.workspace,
211
241
  agentRoot: resolved.root,
212
242
  deployType: resolved.deployType,
243
+ deployReason: explicitCloud ? '--cloud' : resolved.deployReason,
213
244
  'codex-home': args['codex-home'],
214
245
  pairCode: args['pair-code'],
215
246
  cloud,
216
247
  force: !!args['force'],
217
248
  };
218
249
  }
219
- // 显式 --cloud 且 config.json 缺失:回退到显式 --agent(workspace 在 main 推断)。
220
- if (!args['agent'] || !VALID_AGENTS.includes(args['agent'])) {
250
+ // 显式 --cloud 或 CLOUD_ENV=1 且 config.json 缺失:回退到显式 --agent
251
+ // (workspace main 推断)。没有云端信号时仍按本地 config 缺失报错。
252
+ if ((!explicitCloud && !runtimeCloud) || !args['agent'] || !VALID_AGENTS.includes(args['agent'])) {
221
253
  errorBox([
222
254
  `ERROR: 未找到 agent "${args['agent-id']}" 的 config.json,且未显式指定 --agent`,
223
255
  '',
224
256
  '新调用方应确认 coze-bridge 已在目标环境写入该 agent config。',
225
- '如需兼容旧手工命令,可显式拼上 framework 和 --cloud:',
226
- ` npx coze_lab --cloud --agent-id=${args['agent-id']} --agent=claude-code|codex|openclaw`,
257
+ '如需兼容旧手工命令,可显式拼上 framework:',
258
+ ` npx coze_lab --agent-id=${args['agent-id']} --agent=claude-code|codex|openclaw`,
227
259
  ]);
228
260
  }
229
261
  return {
@@ -231,6 +263,7 @@ function validateArgs(args) {
231
263
  agentId: args['agent-id'],
232
264
  workspace: args['workspace'] || '',
233
265
  deployType: 'cloud',
266
+ deployReason: explicitCloud ? '--cloud' : 'env CLOUD_ENV=1',
234
267
  'codex-home': args['codex-home'],
235
268
  pairCode: args['pair-code'],
236
269
  cloud: true,
@@ -462,14 +495,27 @@ function writeHookScript(dest, content) {
462
495
  }
463
496
  }
464
497
 
498
+ // atomicWriteFileSync 原子写文件:先写到同目录下的临时文件(带 pid),fsync 后 rename
499
+ // 覆盖目标。rename 在同一文件系统上是原子操作,避免读-改-写过程中崩溃留下半截/空文件。
500
+ function atomicWriteFileSync(filepath, data, options) {
501
+ const dir = path.dirname(filepath);
502
+ const tmp = path.join(dir, `.tmp.${process.pid}.${Date.now()}.${path.basename(filepath)}`);
503
+ try {
504
+ fs.writeFileSync(tmp, data, options);
505
+ fs.renameSync(tmp, filepath);
506
+ } catch (e) {
507
+ // 失败时清掉残留临时文件,保持目标文件原样(要么旧内容,要么不存在)。
508
+ try { fs.unlinkSync(tmp); } catch { /* ignore */ }
509
+ throw e;
510
+ }
511
+ }
512
+
465
513
  function mergeJson(filepath, mergeFn) {
466
514
  let existing = {};
467
515
  if (fs.existsSync(filepath)) {
468
- try {
469
- existing = JSON.parse(fs.readFileSync(filepath, 'utf8'));
470
- } catch {
471
- warnBox([`Warning: Could not parse existing ${filepath}`, '', 'Starting with a fresh configuration.']);
472
- }
516
+ // 解析失败时绝不退回空对象继续合并——那会丢掉文件里已有的 hook/凭证配置。
517
+ // 直接抛错,让调用方保留原文件内容、让用户感知到坏文件,而不是静默覆盖。
518
+ existing = JSON.parse(fs.readFileSync(filepath, 'utf8'));
473
519
  }
474
520
  return mergeFn(existing);
475
521
  }
@@ -499,32 +545,46 @@ function writeClaudeCodeHook(patToken, workspaceId, pythonCmd, configBaseDir, cl
499
545
  const refreshScript = path.join(hooksDir, 'cozeloop_refresh.py');
500
546
  writeHookScript(refreshScript, readScript('shared/cozeloop_refresh.py'));
501
547
 
502
- // 2. Merge settings.json — Stop (trace) + UserPromptSubmit (refresh)。用绝对路径。
548
+ // 2. Merge settings.json — Stop (trace 收尾) + PostToolUse (trace 增量) + UserPromptSubmit (refresh)。用绝对路径。
503
549
  const hookCmd = `${pythonCmd} ${hookScript}`;
504
550
  const refreshCmd = `${pythonCmd} ${refreshScript}`;
505
551
 
506
552
  ensureDir(claudeDir);
507
- const settings = mergeJson(settingsPath, (existing) => {
508
- if (!existing.hooks) existing.hooks = {};
509
-
510
- // Stop hook trace upload
511
- if (!existing.hooks.Stop) existing.hooks.Stop = [];
512
- existing.hooks.Stop = existing.hooks.Stop.filter(
513
- entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_hook.py'))
514
- );
515
- existing.hooks.Stop.push({ matcher: '', hooks: [{ type: 'command', command: hookCmd }] });
516
-
517
- // UserPromptSubmit hook token refresh before each user message
518
- if (!existing.hooks.UserPromptSubmit) existing.hooks.UserPromptSubmit = [];
519
- existing.hooks.UserPromptSubmit = existing.hooks.UserPromptSubmit.filter(
520
- entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_refresh.py'))
521
- );
522
- existing.hooks.UserPromptSubmit.push({ matcher: '', hooks: [{ type: 'command', command: refreshCmd }] });
523
-
524
- return existing;
525
- });
553
+ let settings;
554
+ try {
555
+ settings = mergeJson(settingsPath, (existing) => {
556
+ if (!existing.hooks) existing.hooks = {};
557
+
558
+ // Stop hook — trace upload(收尾,全量上报最后一批)
559
+ if (!existing.hooks.Stop) existing.hooks.Stop = [];
560
+ existing.hooks.Stop = existing.hooks.Stop.filter(
561
+ entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_hook.py'))
562
+ );
563
+ existing.hooks.Stop.push({ matcher: '', hooks: [{ type: 'command', command: hookCmd }] });
564
+
565
+ // PostToolUse hook — trace 增量上报(任务进行中即上报已完成 turn,脚本内置节流+turn边界控制)。
566
+ // 没有它,长任务(单 turn 跑十几分钟)期间 Stop 不触发,trace 全程查不到。
567
+ if (!existing.hooks.PostToolUse) existing.hooks.PostToolUse = [];
568
+ existing.hooks.PostToolUse = existing.hooks.PostToolUse.filter(
569
+ entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_hook.py'))
570
+ );
571
+ existing.hooks.PostToolUse.push({ matcher: '', hooks: [{ type: 'command', command: hookCmd }] });
572
+
573
+ // UserPromptSubmit hook — token refresh before each user message
574
+ if (!existing.hooks.UserPromptSubmit) existing.hooks.UserPromptSubmit = [];
575
+ existing.hooks.UserPromptSubmit = existing.hooks.UserPromptSubmit.filter(
576
+ entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_refresh.py'))
577
+ );
578
+ existing.hooks.UserPromptSubmit.push({ matcher: '', hooks: [{ type: 'command', command: refreshCmd }] });
579
+
580
+ return existing;
581
+ });
582
+ } catch (e) {
583
+ // 现存 settings.json 解析失败:保留原文件,提示用户手动修复,绝不用空配置覆盖。
584
+ errorBox([`ERROR: Existing ${settingsPath} is not valid JSON`, '', e.message, '', '请修复或删除该文件后重试,避免覆盖丢失现有 hook 配置。']);
585
+ }
526
586
  try {
527
- fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
587
+ atomicWriteFileSync(settingsPath, JSON.stringify(settings, null, 2));
528
588
  } catch (e) {
529
589
  errorBox([`ERROR: Cannot write ${settingsPath}`, '', e.message]);
530
590
  }
@@ -533,42 +593,47 @@ function writeClaudeCodeHook(patToken, workspaceId, pythonCmd, configBaseDir, cl
533
593
  // 3. Write credentials into <baseDir>/.claude/settings.local.json
534
594
  ensureDir(path.join(baseDir, '.claude'));
535
595
  ensureDir(path.dirname(logFile));
536
- const localSettings = mergeJson(localSettingsPath, (existing) => {
537
- if (!existing.env) existing.env = {};
538
- existing.env.COZELOOP_WORKSPACE_ID = workspaceId;
539
- existing.env.COZELOOP_HOOK_LOG = logFile;
540
- if (cloud) {
541
- const loopToken = readEnv('COZELOOP_API_TOKEN');
542
- const cozeToken = readEnv('COZE_API_TOKEN');
543
- existing.env.COZELAB_ONBOARD_CLOUD = '1';
544
- if (loopToken) {
545
- existing.env.COZELOOP_API_TOKEN = loopToken;
546
- delete existing.env.COZE_API_TOKEN;
547
- } else if (cozeToken) {
548
- existing.env.COZE_API_TOKEN = cozeToken;
596
+ let localSettings;
597
+ try {
598
+ localSettings = mergeJson(localSettingsPath, (existing) => {
599
+ if (!existing.env) existing.env = {};
600
+ existing.env.COZELOOP_WORKSPACE_ID = workspaceId;
601
+ existing.env.COZELOOP_HOOK_LOG = logFile;
602
+ if (cloud) {
603
+ const loopToken = readEnv('COZELOOP_API_TOKEN');
604
+ const cozeToken = readEnv('COZE_API_TOKEN');
605
+ existing.env.COZELAB_ONBOARD_CLOUD = '1';
606
+ if (loopToken) {
607
+ existing.env.COZELOOP_API_TOKEN = loopToken;
608
+ delete existing.env.COZE_API_TOKEN;
609
+ } else if (cozeToken) {
610
+ existing.env.COZE_API_TOKEN = cozeToken;
611
+ delete existing.env.COZELOOP_API_TOKEN;
612
+ }
613
+ } else {
549
614
  delete existing.env.COZELOOP_API_TOKEN;
615
+ delete existing.env.COZE_API_TOKEN;
616
+ delete existing.env.COZELAB_ONBOARD_CLOUD;
550
617
  }
551
- } else {
552
- delete existing.env.COZELOOP_API_TOKEN;
553
- delete existing.env.COZE_API_TOKEN;
554
- delete existing.env.COZELAB_ONBOARD_CLOUD;
555
- }
556
- const loopBaseUrl = readEnv('COZELOOP_API_BASE_URL');
557
- const otelEndpoint = readEnv('OTEL_ENDPOINT');
558
- if (loopBaseUrl) {
559
- existing.env.COZELOOP_API_BASE_URL = loopBaseUrl;
560
- delete existing.env.OTEL_ENDPOINT;
561
- } else if (otelEndpoint) {
562
- existing.env.OTEL_ENDPOINT = otelEndpoint;
563
- delete existing.env.COZELOOP_API_BASE_URL;
564
- }
565
- // 切正式环境:移除历史注入的 PPE 泳道 env(旧版本曾写入 x_tt_env/x_use_ppe)
566
- delete existing.env.x_tt_env;
567
- delete existing.env.x_use_ppe;
568
- return existing;
569
- });
618
+ const loopBaseUrl = readEnv('COZELOOP_API_BASE_URL');
619
+ const otelEndpoint = readEnv('OTEL_ENDPOINT');
620
+ if (loopBaseUrl) {
621
+ existing.env.COZELOOP_API_BASE_URL = loopBaseUrl;
622
+ delete existing.env.OTEL_ENDPOINT;
623
+ } else if (otelEndpoint) {
624
+ existing.env.OTEL_ENDPOINT = otelEndpoint;
625
+ delete existing.env.COZELOOP_API_BASE_URL;
626
+ }
627
+ // 切正式环境:移除历史注入的 PPE 泳道 env(旧版本曾写入 x_tt_env/x_use_ppe)
628
+ delete existing.env.x_tt_env;
629
+ delete existing.env.x_use_ppe;
630
+ return existing;
631
+ });
632
+ } catch (e) {
633
+ errorBox([`ERROR: Existing ${localSettingsPath} is not valid JSON`, '', e.message, '', '请修复或删除该文件后重试,避免覆盖丢失现有凭证配置。']);
634
+ }
570
635
  try {
571
- fs.writeFileSync(localSettingsPath, JSON.stringify(localSettings, null, 2));
636
+ atomicWriteFileSync(localSettingsPath, JSON.stringify(localSettings, null, 2));
572
637
  } catch (e) {
573
638
  errorBox([`ERROR: Cannot write credentials to ${localSettingsPath}`, '', e.message]);
574
639
  }
@@ -629,7 +694,7 @@ function writeCodexHook(token, workspaceId, pythonCmd, codexHome, cloud) {
629
694
  }
630
695
  ok(`Credentials written to ${envFile} (chmod 600)`);
631
696
 
632
- // 3. Merge hooks.json — Stop (trace) + SessionStart (refresh)
697
+ // 3. Merge hooks.json — Stop (trace 收尾) + PostToolUse (trace 增量) + SessionStart (refresh)
633
698
  // 命令用绝对路径(CODEX_HOME 不一定是 ~/.codex)。
634
699
  const hookCmd = `set -a && . ${envFile} && set +a && ${pythonCmd} ${hookScript}`;
635
700
  const refreshCmd = `${pythonCmd} ${refreshScript}`;
@@ -637,13 +702,21 @@ function writeCodexHook(token, workspaceId, pythonCmd, codexHome, cloud) {
637
702
  const hooks = mergeJson(hooksJson, (existing) => {
638
703
  if (!existing.hooks) existing.hooks = {};
639
704
 
640
- // Stop hook — trace upload
705
+ // Stop hook — trace upload(turn 结束,收尾全量上报)
641
706
  if (!existing.hooks.Stop) existing.hooks.Stop = [];
642
707
  existing.hooks.Stop = existing.hooks.Stop.filter(
643
708
  entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_hook.py'))
644
709
  );
645
710
  existing.hooks.Stop.push({ matcher: null, hooks: [{ type: 'command', command: hookCmd, timeout: 60 }] });
646
711
 
712
+ // PostToolUse hook — trace 增量上报(turn 进行中即上报已完成 turn,脚本内置节流+turn边界控制)。
713
+ // 没有它,单个长 turn 内(多轮工具调用持续数分钟)期间 Stop 不触发,trace 全程查不到。
714
+ if (!existing.hooks.PostToolUse) existing.hooks.PostToolUse = [];
715
+ existing.hooks.PostToolUse = existing.hooks.PostToolUse.filter(
716
+ entry => !entry.hooks?.some(h => h.command?.includes('cozeloop_hook.py'))
717
+ );
718
+ existing.hooks.PostToolUse.push({ matcher: null, hooks: [{ type: 'command', command: hookCmd, timeout: 60 }] });
719
+
647
720
  // SessionStart hook — token refresh
648
721
  if (!existing.hooks.SessionStart) existing.hooks.SessionStart = [];
649
722
  existing.hooks.SessionStart = existing.hooks.SessionStart.filter(
@@ -680,16 +753,31 @@ function resolveCodexHome(args) {
680
753
  function resolveHomeDir(cloud) {
681
754
  const h = os.homedir();
682
755
  if (!cloud) return h;
683
- // 优先 $HOME,其次 os.homedir(),再回退云端常见 root home。
684
- const candidates = [process.env.HOME, h, '/root'].filter(Boolean);
756
+ // 优先级:$HOME > os.homedir()(非空才用)> 显式候选目录。
757
+ // 不再无脑回退 /root —— 云端真实 home 可能是 /home/appuser,回退 /root 会把
758
+ // openclaw.json/凭证写错位置。仅在 HOME 与 os.homedir() 都拿不到时才探测候选。
759
+ const envHome = (process.env.HOME || '').trim();
760
+ if (envHome) {
761
+ info(`resolveHomeDir: using $HOME=${envHome}`);
762
+ return envHome;
763
+ }
764
+ if (h) {
765
+ info(`resolveHomeDir: using os.homedir()=${h}`);
766
+ return h;
767
+ }
768
+ // HOME 与 os.homedir() 都为空:探测云端常见 home 候选,挑第一个已有 coze/openclaw 配置的。
769
+ const candidates = ['/home/appuser', '/root'];
685
770
  for (const c of candidates) {
686
771
  try {
687
772
  if (fs.existsSync(path.join(c, '.coze')) || fs.existsSync(path.join(c, '.openclaw'))) {
773
+ info(`resolveHomeDir: HOME/os.homedir() empty, detected home=${c}`);
688
774
  return c;
689
775
  }
690
776
  } catch { /* ignore */ }
691
777
  }
692
- return h || '/root';
778
+ const fallback = candidates[candidates.length - 1];
779
+ warn(`resolveHomeDir: cannot determine home (HOME/os.homedir() empty, no candidate has coze config), falling back to ${fallback}`);
780
+ return fallback;
693
781
  }
694
782
 
695
783
  function normalizeTraceAgentIds(ids) {
@@ -1446,8 +1534,9 @@ function loadCredentials() {
1446
1534
  }
1447
1535
 
1448
1536
  function saveCredentials(creds) {
1449
- fs.mkdirSync(path.dirname(CREDS_PATH), { recursive: true });
1450
- fs.writeFileSync(CREDS_PATH, JSON.stringify(creds, null, 2), { mode: 0o600 });
1537
+ // 0o700:凭证目录仅 owner 可读/进入,其他用户无法枚举 ~/.cozeloop 内容。
1538
+ fs.mkdirSync(path.dirname(CREDS_PATH), { recursive: true, mode: 0o700 });
1539
+ atomicWriteFileSync(CREDS_PATH, JSON.stringify(creds, null, 2), { mode: 0o600 });
1451
1540
  }
1452
1541
 
1453
1542
  function deleteCredentials() {
@@ -1486,7 +1575,7 @@ async function refreshToken(creds) {
1486
1575
  const updated = {
1487
1576
  access_token: data.access_token,
1488
1577
  refresh_token: data.refresh_token ?? creds.refresh_token,
1489
- expires_at: (data.expires_in ?? 0) * 1000, // expires_in is a unix timestamp in seconds
1578
+ expires_at: (data.expires_in ?? 0) * 1000, // expires_at stored in milliseconds (Python 端按毫秒读)
1490
1579
  workspace_id: creds.workspace_id ?? WORKSPACE_ID, // preserve workspace_id
1491
1580
  };
1492
1581
  saveCredentials(updated);
@@ -1569,7 +1658,7 @@ async function deviceCodeAuth() {
1569
1658
  const creds = {
1570
1659
  access_token: pollData.access_token,
1571
1660
  refresh_token: pollData.refresh_token,
1572
- expires_at: (pollData.expires_in ?? 0) * 1000, // unix timestamp in seconds
1661
+ expires_at: (pollData.expires_in ?? 0) * 1000, // expires_at stored in milliseconds (Python 端按毫秒读)
1573
1662
  workspace_id: WORKSPACE_ID,
1574
1663
  };
1575
1664
  saveCredentials(creds);
@@ -1784,14 +1873,13 @@ async function main() {
1784
1873
  // 云端模式:开启结构化输出 + errorBox 抛异常(而非 exit)。
1785
1874
  CLOUD_MODE = !!args.cloud;
1786
1875
  // per-agent 路由时 agent 的 workspace(claude-code 用它做配置根目录)。
1787
- // per-agent 路由时 agent 的 workspace(claude-code 用它做配置根目录)。
1788
1876
  // 云端 claude-code 未显式传 workspace 时,按约定路径 ~/.coze/agents/<id>/workspace 推断。
1789
1877
  let agentWorkspace = args.workspace || '';
1790
1878
  if (args.cloud && args.agentId && agent === 'claude-code' && !agentWorkspace) {
1791
1879
  agentWorkspace = path.join(os.homedir(), '.coze', 'agents', args.agentId, 'workspace');
1792
1880
  }
1793
1881
  if (args.agentId) {
1794
- info(`目标 agent: ${args.agentId} (framework=${agent}, workspace=${agentWorkspace || 'N/A'})`);
1882
+ info(`目标 agent: ${args.agentId} (framework=${agent}, workspace=${agentWorkspace || 'N/A'}, deploy=${args.cloud ? 'cloud' : 'local'}, reason=${args.deployReason || 'n/a'})`);
1795
1883
  console.log('');
1796
1884
  }
1797
1885
 
@@ -1945,20 +2033,30 @@ async function main() {
1945
2033
  successBox(summaryLines);
1946
2034
  }
1947
2035
 
1948
- main().catch(e => {
1949
- // 云端模式:失败时输出结构化结果(含 logid),exit 0 但带 COZE_LAB_RESULT 行。
1950
- if (CLOUD_MODE) {
1951
- if (cloudResult.inject !== 'ok') {
1952
- // hook 配置前的任何失败 → 注入失败。
1953
- cloudResult.inject = 'fail';
1954
- } else if (cloudResult.verify === 'skip') {
1955
- // 注入已成功,但验证阶段异常崩溃(未走到正常的 ok/fail 判定)→ 记为验证失败。
1956
- cloudResult.verify = 'fail';
2036
+ // 仅在作为 CLI 直接运行时执行 main();被 require(单元测试)时只导出内部函数。
2037
+ if (require.main === module) {
2038
+ main().catch(e => {
2039
+ // 云端模式:失败时输出结构化结果(含 logid),exit 0 但带 COZE_LAB_RESULT 行。
2040
+ if (CLOUD_MODE) {
2041
+ if (cloudResult.inject !== 'ok') {
2042
+ // hook 配置前的任何失败 注入失败。
2043
+ cloudResult.inject = 'fail';
2044
+ } else if (cloudResult.verify === 'skip') {
2045
+ // 注入已成功,但验证阶段异常崩溃(未走到正常的 ok/fail 判定)→ 记为验证失败。
2046
+ cloudResult.verify = 'fail';
2047
+ }
2048
+ if (!cloudResult.message) cloudResult.message = e && e.message ? e.message : 'unexpected failure';
2049
+ if (!cloudResult.logid) cloudResult.logid = extractLogid(cloudResult.message);
2050
+ emitCloudResult();
2051
+ process.exit(1);
1957
2052
  }
1958
- if (!cloudResult.message) cloudResult.message = e && e.message ? e.message : 'unexpected failure';
1959
- if (!cloudResult.logid) cloudResult.logid = extractLogid(cloudResult.message);
1960
- emitCloudResult();
1961
- process.exit(1);
1962
- }
1963
- errorBox(['ERROR: Unexpected failure', '', e.message]);
1964
- });
2053
+ errorBox(['ERROR: Unexpected failure', '', e.message]);
2054
+ });
2055
+ }
2056
+
2057
+ module.exports = {
2058
+ resolveHomeDir,
2059
+ mergeJson,
2060
+ atomicWriteFileSync,
2061
+ isExpired,
2062
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "coze_lab",
3
- "version": "0.1.37",
3
+ "version": "0.1.40",
4
4
  "description": "Configure local AI agents (Claude Code, Codex, OpenClaw) to report traces to CozeLoop",
5
5
  "keywords": [
6
6
  "cozeloop",
@@ -127,6 +127,10 @@ _DEFAULT_WORKSPACE_ID = "7649231955045072915" # hardcoded spaceID fallback
127
127
  _COZE_CTX_OPEN = "<coze-context>"
128
128
  _COZE_CTX_CLOSE = "</coze-context>"
129
129
 
130
+ # 中途事件(PostToolUse)增量上报的最小间隔(秒)。密集工具调用下用它节流,避免每秒多次 flush。
131
+ # 终态事件(Stop)不受此限制。
132
+ INCREMENTAL_UPLOAD_MIN_INTERVAL = float(os.environ.get("COZELOOP_INCREMENTAL_MIN_INTERVAL", "10"))
133
+
130
134
 
131
135
  def _content_to_text(content: Any) -> str:
132
136
  """Flatten Claude message content (str | list[dict] | dict) to plain text."""
@@ -438,8 +442,8 @@ def _token_from_credentials() -> Optional[str]:
438
442
  creds = _load_credentials()
439
443
  if not creds:
440
444
  return None
441
- expires_at_sec = creds.get("expires_at", 0) / 1000
442
- remaining = expires_at_sec - time.time()
445
+ # expires_at 由 index.js 以毫秒存储;先减去当前毫秒时间再换算成秒,与 _REFRESH_THRESHOLD() 比较。
446
+ remaining = (creds.get("expires_at", 0) - time.time() * 1000) / 1000
443
447
  if remaining > _REFRESH_THRESHOLD:
444
448
  debug_log(f"Cached token valid, expires in {int(remaining)}s.")
445
449
  return creds.get("access_token")
@@ -1091,7 +1095,13 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, history
1091
1095
  else:
1092
1096
  print("[CozeLoop] 警告: 未找到有效 Token,上报可能失败", file=sys.stderr)
1093
1097
  creds = _load_credentials()
1094
- workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
1098
+ # 云端模式:sandbox 注入的 COZELOOP_WORKSPACE_ID 必须优先于 credentials.json。
1099
+ # 否则残留的本地 credentials.json workspace_id 会覆盖云端注入,导致 trace 上报错位。
1100
+ is_cloud = os.environ.get("COZELAB_ONBOARD_CLOUD", "").lower() in ("1", "true", "yes")
1101
+ if is_cloud:
1102
+ workspace_id = os.environ.get("COZELOOP_WORKSPACE_ID", "") or (creds or {}).get("workspace_id") or _DEFAULT_WORKSPACE_ID
1103
+ else:
1104
+ workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
1095
1105
  os.environ["COZELOOP_WORKSPACE_ID"] = workspace_id
1096
1106
  upload_events: List[str] = []
1097
1107
  client_kwargs = {
@@ -1150,6 +1160,16 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, history
1150
1160
  t = coze_context_tags(um.get("content") if um else None)
1151
1161
  if t:
1152
1162
  coze_tags = t
1163
+ # Drop empty-valued coze_* tags: the backend pairs traces by exact tag
1164
+ # match (coze_message_id / coze_agent_id), where an empty string is
1165
+ # indistinguishable from "absent" yet still bloats the span — never
1166
+ # emit one. Log when a pairing key is missing so a silent pairing miss
1167
+ # is debuggable.
1168
+ coze_tags = {k: v for k, v in coze_tags.items() if isinstance(v, str) and v.strip()}
1169
+ if not coze_tags.get("coze_message_id"):
1170
+ hook_log(f"coze-context missing coze_message_id session={session_id}")
1171
+ if not coze_tags.get("coze_session_id"):
1172
+ hook_log(f"coze-context missing coze_session_id session={session_id}")
1153
1173
  if coze_tags:
1154
1174
  root_tags.update(coze_tags)
1155
1175
  root_baggage.update(coze_tags)
@@ -1561,7 +1581,19 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, history
1561
1581
  debug_log(f"An error occurred while sending traces to CozeLoop: {e}")
1562
1582
  return None
1563
1583
  finally:
1564
- # Crucial: close the client to ensure all buffered traces are sent.
1584
+ # Flush buffered spans BEFORE closing so a slow/failed close() can't drop
1585
+ # them. close() also flushes, but flushing first surfaces the upload
1586
+ # failure (via trace_finish_event_processor -> upload_events) while the
1587
+ # client is still usable.
1588
+ # KNOWN RISK: when flush/close reports a 401, the block below re-sends the
1589
+ # ENTIRE turn batch with a refreshed token. The SDK has no per-span ack, so
1590
+ # any batch the first attempt already delivered before the auth failure is
1591
+ # uploaded a second time -> duplicate trace. Acceptable for now (a missing
1592
+ # trace is worse than a dup); revisit if the SDK gains partial-ack/resume.
1593
+ try:
1594
+ client.flush()
1595
+ except Exception as _flush_err:
1596
+ debug_log(f"client.flush() before close failed: {_flush_err}")
1565
1597
  client.close()
1566
1598
  debug_log("CozeLoop client closed.")
1567
1599
 
@@ -1614,6 +1646,11 @@ def main():
1614
1646
  # Read hook input from stdin (Claude Code provides transcript_path, session_id, etc.)
1615
1647
  hook_input = read_hook_stdin()
1616
1648
 
1649
+ # 触发事件类型:Stop=会话结束(收尾,全量上报);PostToolUse=任务进行中(增量上报)。
1650
+ # 缺省按 Stop 处理,兼容旧注册与手动调用。
1651
+ hook_event = (hook_input.get("hook_event_name") or "Stop").strip()
1652
+ is_terminal_event = hook_event not in ("PostToolUse", "PreToolUse")
1653
+
1617
1654
  # Determine conversation file: prefer stdin, fallback to file scan
1618
1655
  conversation_file = hook_input.get("transcript_path")
1619
1656
  if conversation_file:
@@ -1637,6 +1674,16 @@ def main():
1637
1674
  state = load_state(state_file)
1638
1675
  last_processed_line = state.get("last_processed_line", 0)
1639
1676
 
1677
+ # 节流:PostToolUse 在密集工具调用下会高频触发。距上次上报不足 INCREMENTAL_UPLOAD_MIN_INTERVAL
1678
+ # 秒则跳过本次增量上报,避免每秒多次 flush 抬高上报量/成本。终态事件(Stop)永不被节流,
1679
+ # 保证任务结束时一定收尾上报最后一批。
1680
+ if not is_terminal_event:
1681
+ now_ts = time.time()
1682
+ last_upload_ts = state.get("last_upload_ts", 0)
1683
+ if now_ts - last_upload_ts < INCREMENTAL_UPLOAD_MIN_INTERVAL:
1684
+ debug_log(f"throttled: event={hook_event} since_last={now_ts - last_upload_ts:.1f}s < {INCREMENTAL_UPLOAD_MIN_INTERVAL}s, skip")
1685
+ return
1686
+
1640
1687
  # Read new messages from the file
1641
1688
  new_messages = read_new_messages(conversation_file, last_processed_line)
1642
1689
 
@@ -1685,18 +1732,38 @@ def main():
1685
1732
  if not has_coze_ctx:
1686
1733
  debug_log("No coze-context found in any turn (incl. history), skipping upload.")
1687
1734
  return
1688
- print(f"[CozeLoop] 开始上报: session={session_id}, turns={len(turns)}", file=sys.stderr)
1689
- uploaded = send_turns_to_cozeloop(turns, session_id, history_turns)
1735
+
1736
+ # turn 边界控制:中途事件(PostToolUse)触发时,最后一个 turn 往往仍在进行中
1737
+ # (后续还会追加 step)。若此刻就上报并推进其行号,同一逻辑 turn 会在下次触发时
1738
+ # 因缺了起始 user 消息而被拆成新的 root span。故中途事件只上报“已完成”的 turn
1739
+ # (= 除最后一个之外的所有 turn),把最后一个留到下次/收尾。终态事件(Stop)上报全部。
1740
+ if is_terminal_event:
1741
+ turns_to_send = turns
1742
+ else:
1743
+ turns_to_send = turns[:-1]
1744
+ if not turns_to_send:
1745
+ debug_log(f"event={hook_event}: no completed turn to send yet (turns={len(turns)}), defer")
1746
+ return
1747
+
1748
+ print(f"[CozeLoop] 开始上报: session={session_id}, event={hook_event}, turns={len(turns_to_send)}/{len(turns)}", file=sys.stderr)
1749
+ uploaded = send_turns_to_cozeloop(turns_to_send, session_id, history_turns)
1690
1750
  if uploaded is None:
1691
1751
  debug_log("Send failed, state not advanced.")
1692
1752
  return
1693
1753
 
1694
- # Update state with the new last processed line number
1695
- last_line_in_batch = max(msg.get("_line_number", 0) for msg in new_messages)
1696
- state["last_processed_line"] = last_line_in_batch + 1
1754
+ # 推进 last_processed_line:只推进到已上报 turn 覆盖的最后一行。中途事件保留了最后一个
1755
+ # 未完成 turn,故推进到“倒数第二个 turn 的末行”,让未完成 turn 的所有行下次重新读取。
1756
+ if is_terminal_event:
1757
+ last_line_in_batch = max(msg.get("_line_number", 0) for msg in new_messages)
1758
+ state["last_processed_line"] = last_line_in_batch + 1
1759
+ else:
1760
+ # turns_to_send 是 turns[:-1],下一个未发送 turn 的起始行即新的水位线。
1761
+ next_turn_start = turns[-1].get("start_line", 0)
1762
+ state["last_processed_line"] = next_turn_start
1763
+ state["last_upload_ts"] = time.time()
1697
1764
  save_state(state_file, state)
1698
- print(f"[CozeLoop] 上报完成 ✓ session={session_id}, turns={len(turns)}", file=sys.stderr)
1699
- debug_log(f"State updated. Last processed line: {state['last_processed_line']}")
1765
+ print(f"[CozeLoop] 上报完成 ✓ session={session_id}, turns={len(turns_to_send)}", file=sys.stderr)
1766
+ debug_log(f"State updated. event={hook_event} last_processed_line={state['last_processed_line']}")
1700
1767
 
1701
1768
  debug_log("Hook finished.")
1702
1769
 
@@ -50,6 +50,10 @@ _REFRESH_LOCK_STALE = 30
50
50
  _DEFAULT_WORKSPACE_ID = "7649231955045072915" # hardcoded spaceID fallback
51
51
  _OTEL_SUFFIX = "/v1/loop/opentelemetry"
52
52
 
53
+ # 中途事件(PostToolUse)增量上报的最小间隔(秒)。密集工具调用下用它节流,避免每次工具调用都 flush。
54
+ # 终态事件(Stop/SubagentStop)不受此限制。
55
+ INCREMENTAL_UPLOAD_MIN_INTERVAL = float(os.environ.get("COZELOOP_INCREMENTAL_MIN_INTERVAL", "10"))
56
+
53
57
 
54
58
  # --- coze-context parsing -------------------------------------------------
55
59
  # User messages may embed a block like:
@@ -351,7 +355,8 @@ def _token_from_credentials():
351
355
  creds = _load_credentials()
352
356
  if not creds:
353
357
  return None
354
- remaining = creds.get("expires_at", 0) / 1000 - time.time()
358
+ # expires_at index.js 以毫秒存储;先减去当前毫秒时间再换算成秒,与 _REFRESH_THRESHOLD() 比较。
359
+ remaining = (creds.get("expires_at", 0) - time.time() * 1000) / 1000
355
360
  if remaining > _REFRESH_THRESHOLD:
356
361
  return creds.get("access_token")
357
362
  if creds.get("refresh_token"):
@@ -1102,7 +1107,13 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, model_n
1102
1107
  )
1103
1108
  print("[CozeLoop] 警告: 未找到有效 Token,上报可能失败", file=sys.stderr)
1104
1109
  creds = _load_credentials()
1105
- workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
1110
+ # 云端模式:sandbox 注入的 COZELOOP_WORKSPACE_ID 必须优先于 credentials.json。
1111
+ # 否则残留的本地 credentials.json workspace_id 会覆盖云端注入,导致 trace 上报错位。
1112
+ is_cloud = os.environ.get("COZELAB_ONBOARD_CLOUD", "").lower() in ("1", "true", "yes")
1113
+ if is_cloud:
1114
+ workspace_id = os.environ.get("COZELOOP_WORKSPACE_ID", "") or (creds or {}).get("workspace_id") or _DEFAULT_WORKSPACE_ID
1115
+ else:
1116
+ workspace_id = (creds or {}).get("workspace_id") or os.environ.get("COZELOOP_WORKSPACE_ID", "") or _DEFAULT_WORKSPACE_ID
1106
1117
  os.environ["COZELOOP_WORKSPACE_ID"] = workspace_id
1107
1118
  hook_log(f"workspace_id={workspace_id}")
1108
1119
  upload_events: List[str] = []
@@ -1151,6 +1162,16 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, model_n
1151
1162
  t = {f"coze_{k}": v for k, v in turn_coze_context(turn).items()}
1152
1163
  if t:
1153
1164
  coze_tags = t
1165
+ # Drop empty-valued coze_* tags: the backend pairs traces by exact tag
1166
+ # match (coze_message_id / coze_agent_id), where an empty string is
1167
+ # indistinguishable from "absent" yet still bloats the span — never
1168
+ # emit one. Log when a pairing key is missing so a silent pairing miss
1169
+ # is debuggable.
1170
+ coze_tags = {k: v for k, v in coze_tags.items() if isinstance(v, str) and v.strip()}
1171
+ if not coze_tags.get("coze_message_id"):
1172
+ hook_log(f"coze-context missing coze_message_id session_id={session_id}")
1173
+ if not coze_tags.get("coze_session_id"):
1174
+ hook_log(f"coze-context missing coze_session_id session_id={session_id}")
1154
1175
  if coze_tags:
1155
1176
  root_tags.update(coze_tags)
1156
1177
  root_baggage.update(coze_tags)
@@ -1488,6 +1509,19 @@ def send_turns_to_cozeloop(turns: List[Dict[str, Any]], session_id: str, model_n
1488
1509
  debug_log(f"An error occurred while sending traces to CozeLoop: {e}")
1489
1510
  return None
1490
1511
  finally:
1512
+ # Flush buffered spans BEFORE closing so a slow/failed close() can't drop
1513
+ # them. close() also flushes, but flushing first surfaces the upload
1514
+ # failure (via trace_finish_event_processor -> upload_events) while the
1515
+ # client is still usable.
1516
+ # KNOWN RISK: when flush/close reports a 401, the block below re-sends the
1517
+ # ENTIRE turn batch with a refreshed token. The SDK has no per-span ack, so
1518
+ # any batch the first attempt already delivered before the auth failure is
1519
+ # uploaded a second time -> duplicate trace. Acceptable for now (a missing
1520
+ # trace is worse than a dup); revisit if the SDK gains partial-ack/resume.
1521
+ try:
1522
+ client.flush()
1523
+ except Exception as _flush_err:
1524
+ debug_log(f"client.flush() before close failed: {_flush_err}")
1491
1525
  client.close()
1492
1526
  hook_log("client closed")
1493
1527
  debug_log("CozeLoop client closed.")
@@ -1545,6 +1579,11 @@ def main():
1545
1579
 
1546
1580
  debug_log(f"Hook input: {json.dumps(hook_input, ensure_ascii=False)}")
1547
1581
 
1582
+ # 触发事件类型:Stop/SubagentStop=turn 结束(收尾,全量上报);PostToolUse=turn 进行中
1583
+ # (增量上报,仅上报已完成 turn + 节流)。缺省按终态处理,兼容旧注册与手动调用。
1584
+ hook_event = (hook_input.get("hook_event_name") or "Stop").strip()
1585
+ is_terminal_event = hook_event not in ("PostToolUse", "PreToolUse")
1586
+
1548
1587
  # Get transcript path
1549
1588
  transcript_path = hook_input.get("transcript_path")
1550
1589
  if not transcript_path:
@@ -1633,7 +1672,12 @@ def main():
1633
1672
 
1634
1673
  # Send turns to CozeLoop — only if at least one turn carries coze-context.
1635
1674
  if turns:
1636
- has_coze_ctx = any(
1675
+ # coze-context 只出现在首个 turn 的 user 消息里。增量上报推进后,后续批次的 turns
1676
+ # 已不含首 turn,直接判断会误判为"无 coze-context"而跳过。故用 state 持久化"本会话
1677
+ # 曾见过 coze-context"标记:一旦见过,后续增量批次都视为应上报(与 claude-code 用
1678
+ # history_turns 判断等效)。
1679
+ seen_ctx = bool(state.get("seen_coze_context"))
1680
+ has_coze_ctx = seen_ctx or any(
1637
1681
  turn_coze_context(t)
1638
1682
  for t in turns
1639
1683
  )
@@ -1641,17 +1685,48 @@ def main():
1641
1685
  hook_log(f"skip no coze-context turns={len(turns)} session_id={session_id}")
1642
1686
  debug_log("No coze-context found in any turn, skipping upload.")
1643
1687
  return
1688
+ if not seen_ctx:
1689
+ state["seen_coze_context"] = True
1690
+
1691
+ # 节流:PostToolUse 在密集工具调用下高频触发。距上次上报不足间隔则跳过本次增量上报。
1692
+ # 终态事件(Stop/SubagentStop)永不被节流,保证 turn 结束时一定收尾。
1693
+ if not is_terminal_event:
1694
+ now_ts = time.time()
1695
+ last_upload_ts = state.get("last_upload_ts", 0)
1696
+ if now_ts - last_upload_ts < INCREMENTAL_UPLOAD_MIN_INTERVAL:
1697
+ hook_log(f"throttled event={hook_event} since_last={now_ts - last_upload_ts:.1f}s session_id={session_id}")
1698
+ debug_log(f"throttled: event={hook_event}, skip incremental upload")
1699
+ return
1700
+
1701
+ # turn 边界控制:中途事件(PostToolUse)触发时,最后一个 turn 往往仍在进行中
1702
+ # (task_complete 尚未到达,后续还会追加内容)。只上报已完成 turn(turns[:-1]),
1703
+ # 把最后一个留到下次/收尾,避免同一 turn 被拆成多个 trace。终态事件上报全部。
1704
+ if is_terminal_event:
1705
+ turns_to_send = turns
1706
+ else:
1707
+ turns_to_send = turns[:-1]
1708
+ if not turns_to_send:
1709
+ hook_log(f"defer no completed turn event={hook_event} turns={len(turns)} session_id={session_id}")
1710
+ debug_log(f"event={hook_event}: no completed turn to send yet, defer")
1711
+ return
1712
+
1644
1713
  history_context = state.get("conversation_history", [])
1645
1714
  updated_history = send_turns_to_cozeloop(
1646
- turns, session_id, model_name,
1715
+ turns_to_send, session_id, model_name,
1647
1716
  history_context=history_context,
1648
1717
  )
1649
1718
  if updated_history is not None:
1650
- last_line = max(e.get("_line_number", 0) for e in entries) + 1
1719
+ # 推进 last_processed_line:终态推进到所有 entry 末行;中途保留最后一个未完成 turn,
1720
+ # 推进到该 turn 的起始行,让它的所有 entry 下次重新读取。
1721
+ if is_terminal_event:
1722
+ last_line = max(e.get("_line_number", 0) for e in entries) + 1
1723
+ else:
1724
+ last_line = turns[-1].get("start_line", max(e.get("_line_number", 0) for e in entries) + 1)
1651
1725
  state["last_processed_line"] = last_line
1652
1726
  state["conversation_history"] = updated_history
1727
+ state["last_upload_ts"] = time.time()
1653
1728
  save_state(state_file, state)
1654
- hook_log(f"state advanced last_line={last_line} session_id={session_id}")
1729
+ hook_log(f"state advanced event={hook_event} last_line={last_line} sent={len(turns_to_send)}/{len(turns)} session_id={session_id}")
1655
1730
  debug_log(f"State updated, last processed line: {last_line}")
1656
1731
  else:
1657
1732
  hook_log(f"send failed state not advanced session_id={session_id}")
@@ -404,6 +404,11 @@ class CozeloopIngestExporter {
404
404
  this.serviceName = config.serviceName;
405
405
  this.onAuthFailure = config.onAuthFailure;
406
406
  this.shutdownRequested = false;
407
+ // In-flight postSpans() promises. export() returns void (OTel contract),
408
+ // so without tracking these the process can exit while an HTTP POST is
409
+ // still pending and silently drop the span batch. forceFlush() awaits
410
+ // this set so dispose()/shutdown() can guarantee delivery.
411
+ this.pendingPromises = new Set();
407
412
  fileLog(this.logFile, `[ingest] exporter ready url=${this.url} workspaceId=${this.workspaceId}`);
408
413
  }
409
414
  export(spans, resultCallback) {
@@ -411,13 +416,17 @@ class CozeloopIngestExporter {
411
416
  resultCallback({ code: EXPORT_SUCCESS });
412
417
  return;
413
418
  }
414
- this.postSpans(spans)
419
+ const pending = this.postSpans(spans)
415
420
  .then(() => resultCallback({ code: EXPORT_SUCCESS }))
416
421
  .catch((err) => {
417
422
  this.logger?.error?.(`[CozeloopTrace] CozeLoop ingest export failed: ${err?.message || err}`);
418
423
  fileLog(this.logFile, `[ingest] export FAILED url=${this.url} spans=${spans.length} err=${err?.message || err}`);
419
424
  resultCallback({ code: EXPORT_FAILED, error: err });
425
+ })
426
+ .finally(() => {
427
+ this.pendingPromises.delete(pending);
420
428
  });
429
+ this.pendingPromises.add(pending);
421
430
  }
422
431
  async postSpans(spans) {
423
432
  const body = {
@@ -486,10 +495,15 @@ class CozeloopIngestExporter {
486
495
  fileLog(this.logFile, `[ingest] OK HTTP ${res.status} spans=${body.spans.length}${retry ? " retry=1" : ""}`);
487
496
  }
488
497
  async forceFlush() {
489
- return;
498
+ // Wait for every in-flight POST to settle. allSettled (not all) so one
499
+ // failed batch doesn't abort the wait on the others.
500
+ if (this.pendingPromises.size > 0) {
501
+ await Promise.allSettled([...this.pendingPromises]);
502
+ }
490
503
  }
491
504
  async shutdown() {
492
505
  this.shutdownRequested = true;
506
+ await this.forceFlush();
493
507
  }
494
508
  }
495
509
 
@@ -888,6 +902,9 @@ export class CozeloopExporter {
888
902
  async dispose() {
889
903
  if (this.provider) {
890
904
  try {
905
+ // Flush in-flight batches before tearing the provider down so a
906
+ // pending HTTP POST is never killed by process exit.
907
+ await this.flush();
891
908
  await this.provider.shutdown();
892
909
  }
893
910
  catch (err) {
@@ -70,7 +70,11 @@ function parseCozeContext(input) {
70
70
  continue;
71
71
  const key = line.slice(0, sep).trim();
72
72
  const value = line.slice(sep + 1).trim();
73
- if (key)
73
+ // Tag keys map directly onto backend trace-query field names, so only
74
+ // accept the same charset the keys are written in (letters/digits/_).
75
+ // Lines whose key carries any other char are malformed — skip them
76
+ // rather than emit a tag the backend Eq filter could never hit.
77
+ if (key && /^[A-Za-z0-9_]+$/.test(key))
74
78
  out["coze_" + key] = value;
75
79
  }
76
80
  return out;
@@ -1409,14 +1413,17 @@ const cozeloopTracePlugin = {
1409
1413
  };
1410
1414
  // OpenClaw runtime 周期性发送的心跳轮询消息,不是真实对话,整条 trace 丢弃。
1411
1415
  const HEARTBEAT_INPUT = "[OpenClaw heartbeat poll]";
1416
+ // 宽松匹配:trim 后只要包含心跳标记即算心跳,避免 runtime 加前后缀/包裹时漏判。
1417
+ const isHeartbeatText = (text) => typeof text === "string"
1418
+ && text.trim().includes(HEARTBEAT_INPUT);
1412
1419
  const isHeartbeatInput = (input) => {
1413
1420
  if (typeof input === "string") {
1414
- return input.trim() === HEARTBEAT_INPUT;
1421
+ return isHeartbeatText(input);
1415
1422
  }
1416
1423
  // content 可能是 [{type:'text', text:'...'}] 形式
1417
1424
  if (Array.isArray(input)) {
1418
1425
  return input.some((p) => p && typeof p === "object"
1419
- && typeof p.text === "string" && p.text.trim() === HEARTBEAT_INPUT);
1426
+ && isHeartbeatText(p.text));
1420
1427
  }
1421
1428
  return false;
1422
1429
  };
@@ -1428,6 +1435,7 @@ const cozeloopTracePlugin = {
1428
1435
  const lastUserFallback = lastUserFallbackFor(ctx, channelId);
1429
1436
  const heartbeatInput = ctx.userInput
1430
1437
  || lastUserFallback?.userInput || (lastUserFallback ? lastUserInput : undefined);
1438
+ const currentIsHeartbeat = isHeartbeatInput(ctx.userInput);
1431
1439
  if (isHeartbeatInput(heartbeatInput)) {
1432
1440
  if (config.debug) {
1433
1441
  api.logger.info(`[CozeloopTrace] skip heartbeat poll trace, traceId=${ctx.traceId}`);
@@ -1439,13 +1447,24 @@ const cozeloopTracePlugin = {
1439
1447
  // /loop follow-ups and later turns have none. Fall back to the per-
1440
1448
  // session cache (and global last-seen) instead of dropping the trace.
1441
1449
  // Only skip when NO context has ever been seen for this session.
1450
+ //
1451
+ // EXCEPTION: never apply the cache fallback to a heartbeat poll — the
1452
+ // cache could resurrect a previous real turn's coze-context and make
1453
+ // the heartbeat masquerade as that conversation (a phantom trace).
1454
+ // Claude-code / codex hooks only look at the current turn; match that.
1442
1455
  const ocSessionId = ctx.openclawSessionId || lastOpenclawSessionId;
1443
- rememberCozeContext(ctx.userInput, ocSessionId);
1444
- let cozeCtx = resolveCozeContext(ctx.userInput, ocSessionId);
1445
- if (Object.keys(cozeCtx).length === 0) {
1446
- // Try the fallback user inputs too before giving up.
1447
- const fallbackInput = lastUserFallback?.userInput || (lastUserFallback ? lastUserInput : undefined);
1448
- cozeCtx = resolveCozeContext(fallbackInput, ocSessionId);
1456
+ let cozeCtx;
1457
+ if (currentIsHeartbeat) {
1458
+ cozeCtx = parseCozeContext(ctx.userInput);
1459
+ }
1460
+ else {
1461
+ rememberCozeContext(ctx.userInput, ocSessionId);
1462
+ cozeCtx = resolveCozeContext(ctx.userInput, ocSessionId);
1463
+ if (Object.keys(cozeCtx).length === 0) {
1464
+ // Try the fallback user inputs too before giving up.
1465
+ const fallbackInput = lastUserFallback?.userInput || (lastUserFallback ? lastUserInput : undefined);
1466
+ cozeCtx = resolveCozeContext(fallbackInput, ocSessionId);
1467
+ }
1449
1468
  }
1450
1469
  if (Object.keys(cozeCtx).length === 0) {
1451
1470
  return;
@@ -1477,6 +1496,20 @@ const cozeloopTracePlugin = {
1477
1496
  if (!ctx.userInput) {
1478
1497
  ctx.userInput = lastUserFallback?.userInput || (lastUserFallback ? lastUserInput : undefined);
1479
1498
  }
1499
+ // Drop empty-valued coze_* tags: the backend pairs traces by exact
1500
+ // tag match (coze_message_id / coze_session_id), and an empty-string
1501
+ // tag is indistinguishable from "absent" there while still bloating
1502
+ // the span — so never emit one. Log when the pairing keys are missing
1503
+ // so a silent pairing miss is debuggable.
1504
+ const cozeTags = {};
1505
+ for (const [k, v] of Object.entries(cozeCtx)) {
1506
+ if (typeof v === "string" && v.trim() !== "") {
1507
+ cozeTags[k] = v;
1508
+ }
1509
+ }
1510
+ if (config.debug && (!cozeTags.coze_message_id || !cozeTags.coze_session_id)) {
1511
+ api.logger.info(`[CozeloopTrace] coze-context missing pairing key(s): message_id=${cozeTags.coze_message_id ? "ok" : "MISSING"} session_id=${cozeTags.coze_session_id ? "ok" : "MISSING"}, traceId=${ctx.traceId}`);
1512
+ }
1480
1513
  const rootSpanData = {
1481
1514
  name: "openclaw_request",
1482
1515
  type: "entry",
@@ -1486,7 +1519,7 @@ const cozeloopTracePlugin = {
1486
1519
  "run.id": ctx.runId,
1487
1520
  "turn.id": ctx.turnId,
1488
1521
  "openclaw.channel_id": channelId,
1489
- ...cozeCtx,
1522
+ ...cozeTags,
1490
1523
  },
1491
1524
  input: ctx.userInput,
1492
1525
  traceId: ctx.traceId,
@@ -44,7 +44,8 @@ def refresh(rt):
44
44
  def main():
45
45
  creds = load()
46
46
  if not creds: return
47
- remaining = (creds.get("expires_at",0)/1000) - time.time()
47
+ # expires_at 由 index.js 以毫秒存储;先减去当前毫秒时间再换算成秒,与 THRESHOLD() 比较。
48
+ remaining = (creds.get("expires_at",0) - time.time()*1000)/1000
48
49
  if remaining > THRESHOLD: return # still fresh
49
50
  print(f"[cozeloop_refresh] token expiring in {int(remaining)}s, refreshing...", file=sys.stderr)
50
51
  if creds.get("refresh_token"):