npm - @indigoai-us/hq-cloud - Versions diffs - 6.3.1 → 6.3.3 - Mend

@indigoai-us/hq-cloud 6.3.1 → 6.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/cli/sync.js +43 -1
package/dist/cli/sync.js.map +1 -1
package/dist/cli/sync.test.js +98 -0
package/dist/cli/sync.test.js.map +1 -1
package/dist/skill-telemetry.d.ts +42 -6
package/dist/skill-telemetry.d.ts.map +1 -1
package/dist/skill-telemetry.js +253 -10
package/dist/skill-telemetry.js.map +1 -1
package/dist/skill-telemetry.test.js +287 -1
package/dist/skill-telemetry.test.js.map +1 -1
package/package.json +1 -1
package/src/cli/sync.test.ts +114 -0
package/src/cli/sync.ts +43 -1
package/src/skill-telemetry.test.ts +433 -0
package/src/skill-telemetry.ts +260 -10

package/src/skill-telemetry.ts CHANGED Viewed

@@ -25,12 +25,24 @@
  *
  * Codex CLI is captured too, from its own rollout logs at
  * `~/.codex/sessions/YYYY/MM/DD/rollout-<ISO>-<uuid>.jsonl`. Codex records cwd +
- * sessionId ONCE in a leading `session_meta` line (not on every row), and a
- * typed slash command — including an HQ skill, e.g. `/indigo:hello-world` — is
- * logged verbatim as a later `event_msg` `user_message` (Codex does not expand
- * it). That parallels Claude's typed path exactly; Codex has no discrete "skill
- * tool_use" event, so only the typed source is captured for it. Both runtimes
- * funnel into the same wire shape, scope filter, batcher, and per-file cursor.
+ * sessionId ONCE in a leading `session_meta` line (not on every row). Two Codex
+ * paths feed the same wire shape, scope filter, batcher, and per-file cursor:
+ *   - Typed (`source: "typed"`) — a slash command, including an HQ skill, e.g.
+ *     `/indigo:hello-world`, is logged verbatim as a later `event_msg`
+ *     `user_message` (Codex does not expand it). Parallels Claude's typed path.
+ *   - Model-driven (`source: "model"`) — Codex has no discrete "Skill tool_use"
+ *     event like Claude. Instead it *runs* a skill by reading its instruction
+ *     file: the model issues a shell command that reads `…/skills/<name>/
+ *     SKILL.md`. Codex logs that exec in one of two shapes depending on CLI
+ *     version — an `event_msg` `exec_command_end` (with `turn_id`, `cwd`, and a
+ *     `parsed_cmd` it tags `type: "read"`) or a `response_item` `function_call`
+ *     named `exec_command` (command + `workdir` in its `arguments`). Both are
+ *     handled. We treat the read as one invocation of `<name>`. A single use
+ *     re-reads the file several times (line ranges, greps) and a version may log
+ *     both shapes for one exec, so events are deduped per (sessionId, turn_id,
+ *     skill) — at most one per Codex turn. Edits to a SKILL.md travel via
+ *     `apply_patch` (authoring, not using) and never reach this path, so skill
+ *     development is not miscounted as usage.
  *
  * Privacy: raw `<command-args>` / `input.args` content is NEVER sent to the
  * cloud — only a `hasArgs` boolean. This matches the message-stripping posture
@@ -265,6 +277,18 @@ export function parseCodexSessionMeta(
   return { sessionId, cwd };
 }
+/** The `turn_id` a Codex rollout row belongs to, when it carries one
+ *  (`turn_context` and `exec_command_end` do; bare `function_call` execs do
+ *  not). Used to track the running turn so the function_call exec shape can be
+ *  attributed to the turn that preceded it. */
+export function codexRowTurnId(row: unknown): string | undefined {
+  if (!row || typeof row !== "object" || Array.isArray(row)) return undefined;
+  const payload = (row as Record<string, unknown>).payload;
+  if (!payload || typeof payload !== "object" || Array.isArray(payload)) return undefined;
+  const t = (payload as Record<string, unknown>).turn_id;
+  return typeof t === "string" ? t : undefined;
+}
 /**
  * Extract a typed skill/slash-command invocation from a Codex `event_msg`
  * `user_message` row. Session context (cwd, sessionId) lives in the file's
@@ -309,6 +333,206 @@ export function extractCodexSkillEvents(
   ];
 }
+/** Match a `…/skills/<name>/SKILL.md` path inside a shell command. `skills/`
+ *  may be nested arbitrarily deep (`.agents/skills/…`, `.codex/skills/hq/…`),
+ *  and `<name>` is always the directory immediately above SKILL.md — captured
+ *  as the last segment so the bridge's `skills/hq/<name>/` layout resolves to
+ *  `<name>`, not `hq`. The leading `(?:…/)*` is non-greedy via the segment class
+ *  so it stops at the final directory boundary. */
+const CODEX_SKILL_FILE =
+  /(?:^|\/)skills\/(?:[^\s'"]+\/)*?([^/\s'"]+)\/SKILL\.md\b/;
+/** Pull the shell command string out of a Codex `exec_command_end` `command`,
+ *  which is `["/bin/zsh", "-lc", "<cmd>"]` (array) on the runtimes we see, but
+ *  tolerate a bare string too. */
+function codexCommandString(command: unknown): string {
+  if (typeof command === "string") return command;
+  if (Array.isArray(command)) {
+    // The interpreter + flags lead; the actual command is the trailing string.
+    for (let i = command.length - 1; i >= 0; i--) {
+      if (typeof command[i] === "string") return command[i] as string;
+    }
+  }
+  return "";
+}
+/** Classify a Codex exec from its own `parsed_cmd`:
+ *   - `"read"`    — every classified sub-command is a read; authoritative.
+ *   - `"nonread"` — at least one write/apply_patch-style entry; authoritative,
+ *                   the command text is NOT consulted (Codex's call wins).
+ *   - `"unknown"` — no usable classification (missing/empty); the caller then
+ *                   falls back to a read-verb check on the command text.
+ */
+function classifyCodexExec(parsedCmd: unknown): "read" | "nonread" | "unknown" {
+  if (!Array.isArray(parsedCmd)) return "unknown";
+  let sawRead = false;
+  for (const entry of parsedCmd) {
+    if (!entry || typeof entry !== "object") continue;
+    const t = (entry as Record<string, unknown>).type;
+    if (typeof t !== "string") continue;
+    if (t !== "read") return "nonread";
+    sawRead = true;
+  }
+  return sawRead ? "read" : "unknown";
+}
+// A leading read verb in the command — the positive signal that an exec is
+// inspecting SKILL.md rather than rewriting it. Anchored after an optional
+// `cd …;`/env-var prefix so `sed`, `rg`, `cat`, … are recognized at the head of
+// the real command.
+const CODEX_READ_VERB =
+  /(?:^|[;&|]\s*|\bcd\s+[^\s;]+\s*;\s*)(sed|cat|head|tail|nl|rg|grep|less|bat|wc|awk|cut|fold|view|print)\b/;
+// Writing into the skill file disqualifies regardless of a read verb elsewhere.
+const CODEX_WRITE_TO_SKILL = /(?:>>?|\btee\b)[^\n]*\/SKILL\.md\b/;
+/** Codex tool-call names that run a shell command (the `response_item`
+ *  `function_call` form). Excludes `apply_patch` — that is an edit, not a read. */
+const CODEX_EXEC_TOOLS = new Set([
+  "exec_command",
+  "shell",
+  "local_shell",
+  "local_shell_call",
+  "bash",
+  "container.exec",
+]);
+/**
+ * Normalize a completed Codex exec from either shape the CLI emits (it varies
+ * by version), returning the command string, the turn it belongs to, the run
+ * `cwd`, and any `parsed_cmd` classification — or null when the row is neither.
+ *   - `event_msg` / `exec_command_end`: `command` array, own `turn_id` + `cwd`,
+ *     and a `parsed_cmd` Codex tags `type: "read"`.
+ *   - `response_item` / `function_call` (name `exec_command`/`shell`/…): the
+ *     command lives in `arguments` (a JSON string) as `cmd`/`command`, the run
+ *     dir as `workdir`. No `turn_id`/`parsed_cmd` on the row, so the turn comes
+ *     from the scan's running `ctx.turnId` (tracked from `turn_context`) and
+ *     read-intent is decided by the command text.
+ */
+function codexExecParams(
+  obj: Record<string, unknown>,
+  payload: Record<string, unknown>,
+  ctx: { cwd?: string; turnId?: string },
+): { cmd: string; turnId?: string; cwd?: string; parsedCmd: unknown } | null {
+  if (obj.type === "event_msg" && payload.type === "exec_command_end") {
+    const cmd = codexCommandString(payload.command);
+    if (!cmd) return null;
+    return {
+      cmd,
+      turnId: typeof payload.turn_id === "string" ? payload.turn_id : ctx.turnId,
+      cwd: typeof payload.cwd === "string" ? payload.cwd : ctx.cwd,
+      parsedCmd: payload.parsed_cmd,
+    };
+  }
+  if (obj.type === "response_item" && payload.type === "function_call") {
+    const name = typeof payload.name === "string" ? payload.name : "";
+    if (!CODEX_EXEC_TOOLS.has(name)) return null;
+    let args: Record<string, unknown> = {};
+    const raw = payload.arguments;
+    if (typeof raw === "string") {
+      try {
+        const parsed = JSON.parse(raw);
+        if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
+          args = parsed as Record<string, unknown>;
+        }
+      } catch {
+        return null;
+      }
+    } else if (raw && typeof raw === "object" && !Array.isArray(raw)) {
+      args = raw as Record<string, unknown>;
+    }
+    const cmdRaw = args.cmd ?? args.command;
+    const cmd = typeof cmdRaw === "string" ? cmdRaw : codexCommandString(cmdRaw);
+    if (!cmd) return null;
+    return {
+      cmd,
+      turnId: ctx.turnId,
+      cwd: typeof args.workdir === "string" ? args.workdir : ctx.cwd,
+      parsedCmd: undefined,
+    };
+  }
+  return null;
+}
+/**
+ * Extract a model-driven skill invocation from a completed Codex exec — the
+ * model ran a shell command that *reads* a skill's `SKILL.md`, which is how
+ * Codex loads and runs a skill (it has no discrete Skill tool_use). Handles both
+ * Codex exec shapes (see `codexExecParams`). Returns 0 or 1 event tagged
+ * `source: "model"`.
+ *
+ * Dedup is per (sessionId, turn_id, skill): a single skill use re-reads the file
+ * several times (line ranges, greps) and some Codex versions log both exec
+ * shapes for one exec, so the caller threads a `seen` Set to collapse them to
+ * one event per Codex turn. When `seen` is omitted (unit tests), no dedup is
+ * applied. Session context (sessionId, cwd, and the running turnId) comes via
+ * `ctx`; the row's own `cwd` is preferred when present.
+ */
+export function extractCodexSkillToolEvents(
+  row: unknown,
+  ctx: { sessionId?: string; cwd?: string; turnId?: string },
+  seen?: Set<string>,
+): SkillEvent[] {
+  if (!row || typeof row !== "object" || Array.isArray(row)) return [];
+  const obj = row as Record<string, unknown>;
+  const payload =
+    obj.payload && typeof obj.payload === "object" && !Array.isArray(obj.payload)
+      ? (obj.payload as Record<string, unknown>)
+      : undefined;
+  if (!payload) return [];
+  const exec = codexExecParams(obj, payload, ctx);
+  if (!exec) return [];
+  const { cmd } = exec;
+  const m = CODEX_SKILL_FILE.exec(cmd);
+  if (!m) return [];
+  // Confirm the exec is a read of the skill file, not a write to it. Codex's own
+  // classification leads; only when it is absent does the command text decide.
+  if (CODEX_WRITE_TO_SKILL.test(cmd)) return [];
+  const cls = classifyCodexExec(exec.parsedCmd);
+  if (cls === "nonread") return [];
+  if (cls === "unknown" && !CODEX_READ_VERB.test(cmd)) return [];
+  const skill = m[1].trim();
+  if (!skill) return [];
+  const sessionId = ctx.sessionId;
+  const turnId = exec.turnId;
+  const timestamp = typeof obj.timestamp === "string" ? obj.timestamp : undefined;
+  const cwd = exec.cwd;
+  // Dedup key: one invocation per (session, turn, skill). Fall back to the
+  // session when no turn_id is present (still collapses a turn's repeat reads
+  // for the common single-turn case, since they share a timestamp-free key).
+  const dedupKey = `${sessionId ?? ""}:${turnId ?? ""}:${skill}`;
+  if (seen) {
+    if (seen.has(dedupKey)) return [];
+    seen.add(dedupKey);
+  }
+  // Synthesize a stable uuid so re-delivery across syncs is idempotent
+  // server-side. Prefer (session, turn) — globally unique per invocation; fall
+  // back to (session, timestamp) so distinct reads aren't all collapsed when a
+  // turn_id is somehow absent.
+  const uuid =
+    sessionId !== undefined && turnId !== undefined
+      ? `codex:skill:${sessionId}:${turnId}:${skill}`
+      : sessionId !== undefined && timestamp !== undefined
+        ? `codex:skill:${sessionId}:${timestamp}:${skill}`
+        : undefined;
+  return [
+    {
+      skill,
+      source: "model",
+      sessionId,
+      timestamp,
+      cwd,
+      uuid,
+      hasArgs: false,
+    },
+  ];
+}
 /** Shape the event for the wire. Drops raw args unless explicitly enabled. */
 function toWireRow(ev: SkillEvent): Record<string, unknown> {
   const row: Record<string, unknown> = {
@@ -512,6 +736,14 @@ export async function collectAndSendSkillTelemetry(
     // `session_meta` line, which we read from the top regardless of the cursor.
     const codexCtx =
       kind === "codex" ? await readCodexSessionContext(filePath) : undefined;
+    // Per-file dedup of model-driven Codex skill loads: a single skill use
+    // re-reads SKILL.md several times within one turn, so collapse them to one
+    // event per (session, turn, skill). Scoped per file = per Codex session.
+    const codexSeen = kind === "codex" ? new Set<string>() : undefined;
+    // Running turn id for Codex: the `function_call` exec shape carries no
+    // turn_id of its own, so we track the latest one seen (from `turn_context`,
+    // which precedes a turn's execs) and attribute those execs to it.
+    let codexTurnId: string | undefined;
     // Compute the absolute end-byte offset of each line in the read region.
     const segments = content.split("\n");
@@ -529,14 +761,32 @@ export async function collectAndSendSkillTelemetry(
       } catch {
         continue;
       }
+      if (kind === "codex") {
+        const t = codexRowTurnId(parsed);
+        if (t !== undefined) codexTurnId = t;
+      }
       const events =
         kind === "codex"
-          ? extractCodexSkillEvents(parsed, codexCtx ?? {})
+          ? [
+              ...extractCodexSkillEvents(parsed, codexCtx ?? {}),
+              ...extractCodexSkillToolEvents(
+                parsed,
+                { ...(codexCtx ?? {}), turnId: codexTurnId },
+                codexSeen,
+              ),
+            ]
           : extractSkillEvents(parsed);
       for (const ev of events) {
-        // Scope filter: only emit invocations made from the HQ project.
-        if (scopeCwd !== undefined && (ev.cwd === undefined || normalizePath(ev.cwd) !== scopeCwd)) {
-          continue;
+        // Scope filter: only emit invocations made from the HQ project — its
+        // root or any path beneath it (worktrees, nested apps), so a session run
+        // from `<hqRoot>/.claude/worktrees/…` still counts. Sibling repos that
+        // merely share a path prefix (`<hqRoot>-other`) are excluded by the
+        // trailing-slash boundary.
+        if (scopeCwd !== undefined) {
+          const c = ev.cwd === undefined ? undefined : normalizePath(ev.cwd);
+          if (c === undefined || (c !== scopeCwd && !c.startsWith(`${scopeCwd}/`))) {
+            continue;
+          }
         }
         sourced.push({ row: toWireRow(ev), filePath, endOffset });
         fileScans[filePath].eventCount++;