npm - @xiaolei.shawn/mcp-server - Versions diffs - 0.2.0 → 0.2.1 - Mend

@xiaolei.shawn/mcp-server 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +49 -0
package/dist/__tests__/ingest.test.d.ts +1 -0
package/dist/__tests__/ingest.test.js +105 -0
package/dist/adapters/codex.d.ts +2 -0
package/dist/adapters/codex.js +322 -0
package/dist/adapters/cursor.d.ts +2 -0
package/dist/adapters/cursor.js +279 -0
package/dist/adapters/index.d.ts +3 -0
package/dist/adapters/index.js +20 -0
package/dist/adapters/types.d.ts +34 -0
package/dist/adapters/types.js +1 -0
package/dist/config.d.ts +4 -0
package/dist/config.js +20 -0
package/dist/dashboard.js +333 -2
package/dist/event-envelope.d.ts +35 -3
package/dist/index.js +71 -2
package/dist/ingest.d.ts +17 -0
package/dist/ingest.js +419 -0
package/dist/store.d.ts +2 -2
package/dist/store.js +6 -4
package/dist/tools.d.ts +1066 -22
package/dist/tools.js +563 -0
package/package.json +15 -9

package/README.md CHANGED Viewed

@@ -24,6 +24,7 @@ You can point the built-in dashboard server to any static bundle via `AL_DASHBOA
 - Session storage on local disk (`AL_SESSIONS_DIR`)
 - Local gateway API for middleware (`/api/gateway/*`)
 - Export session JSON with normalized snapshot (`agentlens export`)
+- Raw log adapter ingestion (`agentlens ingest`, `/api/ingest`) with duplicate suppression
 ## Install
@@ -55,6 +56,18 @@ agentlens mcp
 - `record_decision`
 - `record_assumption`
 - `record_verification`
+- `record_artifact_created`
+- `record_intent_transition`
+- `record_risk_signal`
+- `record_verification_run`
+- `record_diff_summary`
+- `record_decision_link`
+- `record_assumption_lifecycle`
+- `record_blocker`
+- `record_token_usage_checkpoint`
+- `record_session_quality`
+- `record_replay_bookmark`
+- `record_hotspot`
 - `record_session_end`
 ### Gateway tools
@@ -80,6 +93,7 @@ API endpoints:
 - `POST /api/gateway/begin`
 - `POST /api/gateway/act`
 - `POST /api/gateway/end`
+- `POST /api/ingest`
 If web assets are available (default `../webapp/dist`), they are served by the same server.
@@ -140,6 +154,41 @@ Export by session id:
 agentlens export --session sess_1771256059058_2bd2bd8f --out ./session.json
 ```
+## Ingest raw logs via adapters
+Example: ingest Codex raw JSONL and convert to canonical events:
+```bash
+agentlens ingest --input /path/to/rollout.jsonl --adapter codex_jsonl
+```
+Example: ingest Cursor raw logs that contain `<user_query>`, `<think>`, and `Tool call/Tool result` blocks:
+```bash
+agentlens ingest --input /path/to/cursor-log.txt --adapter cursor_raw
+```
+Auto-detect adapter and merge into an existing session with dedupe:
+```bash
+agentlens ingest --input /path/to/raw.jsonl --adapter auto --merge-session sess_123
+```
+Notes:
+- Ingest writes canonical events to `<session_id>.jsonl`.
+- Original raw content is preserved in `<session_id>.<adapter>.raw.jsonl`.
+- **Merge and dedupe**: When merging into an existing session (e.g. raw log + MCP-canonical events), ingest uses **semantic dedupe** so the same logical event (intent, tool call, artifact, etc.) is not duplicated even if timestamps or payload details differ. Merged events are written in **time order** with contiguous `seq` for accurate recommendations/risk/hotspot analysis.
+- Duplicate events are skipped by default (exact or semantic key depending on merge).
+- Codex adapter preserves user prompts, reasoning summaries, assistant outputs, tool calls/results, and normalized token checkpoints.
+- Cursor adapter preserves user queries, `<think>` reasoning traces, tool call/result traces, and token counters when present.
+- If `--merge-session` is omitted, ingest attempts **fingerprint match** automatically:
+  - Primary signal: normalized user prompt / intent similarity
+  - Secondary signal: timestamp proximity (recent sessions weighted higher)
+  - Min confidence: `AL_INGEST_FINGERPRINT_MIN_CONFIDENCE` (default `0.62`)
+  - Max time window (hours): `AL_INGEST_FINGERPRINT_MAX_WINDOW_HOURS` (default `72`)
+- Ingest output includes `merge_strategy` (`explicit_merge`, `adapted_session_id`, `fingerprint_match`, `new_session`) and optional `merge_confidence`.
 ## Publish checklist
 1. Update version in `package.json`.

package/dist/__tests__/ingest.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/__tests__/ingest.test.js ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * Ingest and merge logic tests.
+ * Run from mcp-server: pnpm run build && pnpm test
+ */
+import { mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { describe, it, before, after } from "node:test";
+import assert from "node:assert";
+import { ingestRawContent, ingestRawFile } from "../ingest.js";
+import { readSessionEvents } from "../store.js";
+import { adaptRawContent } from "../adapters/index.js";
+const FIXTURES_DIR = join(process.cwd(), "fixtures");
+function fixturePath(name) {
+    return join(FIXTURES_DIR, name);
+}
+describe("adapters", () => {
+    it("cursor_raw adapts sample and produces session_start, intent, tool_call, session_end", () => {
+        const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
+        const adapted = adaptRawContent(raw, "cursor_raw");
+        assert.strictEqual(adapted.source, "cursor_raw");
+        assert.ok(adapted.session_id?.startsWith("cursor_"));
+        const kinds = adapted.events.map((e) => e.kind);
+        assert.ok(kinds.includes("session_start"), "has session_start");
+        assert.ok(kinds.includes("intent"), "has intent");
+        assert.ok(kinds.includes("session_end"), "has session_end");
+        const hasToolOrArtifact = kinds.includes("tool_call") || kinds.includes("artifact_created");
+        assert.ok(hasToolOrArtifact, "has tool_call or artifact_created");
+    });
+    it("codex_jsonl adapts sample and produces session_start, intent, session_end", () => {
+        const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
+        const adapted = adaptRawContent(raw, "codex_jsonl");
+        assert.strictEqual(adapted.source, "codex_jsonl");
+        const kinds = adapted.events.map((e) => e.kind);
+        assert.ok(kinds.includes("session_start"), "has session_start");
+        assert.ok(kinds.includes("intent"), "has intent");
+        assert.ok(kinds.includes("session_end"), "has session_end");
+    });
+    it("auto adapter selects cursor_raw for cursor-style content", () => {
+        const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
+        const adapted = adaptRawContent(raw, "auto");
+        assert.strictEqual(adapted.source, "cursor_raw");
+    });
+    it("auto adapter selects codex_jsonl for codex JSONL content", () => {
+        const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
+        const adapted = adaptRawContent(raw, "auto");
+        assert.strictEqual(adapted.source, "codex_jsonl");
+    });
+});
+describe("ingest", () => {
+    let sessionsDir;
+    const originalSessionsDir = process.env.AL_SESSIONS_DIR;
+    before(() => {
+        sessionsDir = mkdtempSync(join(tmpdir(), "agentlens-test-"));
+        process.env.AL_SESSIONS_DIR = sessionsDir;
+    });
+    after(() => {
+        process.env.AL_SESSIONS_DIR = originalSessionsDir;
+        rmSync(sessionsDir, { recursive: true, force: true });
+    });
+    it("ingest creates new session and inserts events", () => {
+        const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
+        const result = ingestRawContent(raw, { adapter: "cursor_raw" });
+        assert.ok(result.session_id);
+        assert.strictEqual(result.adapter, "cursor_raw");
+        assert.strictEqual(result.merge_strategy, "new_session");
+        assert.ok(result.inserted > 0, "inserted > 0");
+        const events = readSessionEvents(result.session_id);
+        assert.strictEqual(events.length, result.inserted);
+    });
+    it("ingest with merge_session_id merges into existing session with semantic dedupe", () => {
+        const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
+        const first = ingestRawContent(raw, { adapter: "cursor_raw" });
+        const countAfterFirst = readSessionEvents(first.session_id).length;
+        const second = ingestRawContent(raw, {
+            adapter: "cursor_raw",
+            merge_session_id: first.session_id,
+        });
+        assert.strictEqual(second.session_id, first.session_id);
+        assert.strictEqual(second.merge_strategy, "explicit_merge");
+        assert.ok(second.skipped_duplicates > 0, "semantic dedupe skips most events when same content merged");
+        assert.ok(second.inserted <= 1, "at most one new event (e.g. token_usage with different ts) when same content merged");
+        const eventsAfterMerge = readSessionEvents(first.session_id);
+        assert.ok(eventsAfterMerge.length <= countAfterFirst + 1, "event count grows by at most one after merge of same content");
+    });
+    it("merged session is ordered by ts and seq is contiguous", () => {
+        const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
+        const first = ingestRawContent(raw, { adapter: "cursor_raw" });
+        const sessionId = first.session_id;
+        const events = readSessionEvents(sessionId);
+        for (let i = 1; i < events.length; i++) {
+            const a = events[i - 1];
+            const b = events[i];
+            assert.ok(a.ts <= b.ts || (a.ts === b.ts && (a.seq ?? 0) <= (b.seq ?? 0)), `events ordered: ${a.seq} (${a.ts}) before ${b.seq} (${b.ts})`);
+            assert.strictEqual(b.seq, (a.seq ?? 0) + 1, "seq contiguous");
+        }
+    });
+    it("ingest from file path works", () => {
+        const path = fixturePath("codex_sample.jsonl");
+        const result = ingestRawFile(path, { adapter: "codex_jsonl" });
+        assert.ok(result.session_id);
+        assert.strictEqual(result.adapter, "codex_jsonl");
+        assert.ok(result.inserted > 0);
+    });
+});

package/dist/adapters/codex.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { RawAdapter } from "./types.js";
2	+ export declare const codexJsonlAdapter: RawAdapter;

package/dist/adapters/codex.js ADDED Viewed

@@ -0,0 +1,322 @@
+function toObject(value) {
+    return value && typeof value === "object" && !Array.isArray(value)
+        ? value
+        : {};
+}
+function toIso(ts, fallback) {
+    if (typeof ts !== "string" || ts.trim() === "")
+        return fallback;
+    const parsed = new Date(ts);
+    return Number.isNaN(parsed.getTime()) ? fallback : parsed.toISOString();
+}
+function short(value, max = 800) {
+    if (typeof value !== "string")
+        return undefined;
+    const s = value.trim();
+    if (!s)
+        return undefined;
+    return s.length > max ? `${s.slice(0, max)}...` : s;
+}
+function sanitizeText(value, max = 3000) {
+    if (typeof value !== "string")
+        return undefined;
+    const text = value.trim();
+    if (!text)
+        return undefined;
+    return text.length > max ? `${text.slice(0, max)}...` : text;
+}
+function normalizeTokenUsage(value) {
+    const info = toObject(value);
+    const total = toObject(info.total_token_usage);
+    const last = toObject(info.last_token_usage);
+    const primary = Object.keys(last).length > 0 ? last : total;
+    if (Object.keys(primary).length === 0)
+        return undefined;
+    const prompt = primary.input_tokens;
+    const completion = primary.output_tokens;
+    const totalTokens = primary.total_tokens;
+    return {
+        prompt_tokens: typeof prompt === "number" ? prompt : undefined,
+        completion_tokens: typeof completion === "number" ? completion : undefined,
+        total_tokens: typeof totalTokens === "number" ? totalTokens : undefined,
+        input_tokens: typeof primary.input_tokens === "number" ? primary.input_tokens : undefined,
+        cached_input_tokens: typeof primary.cached_input_tokens === "number" ? primary.cached_input_tokens : undefined,
+        output_tokens: typeof primary.output_tokens === "number" ? primary.output_tokens : undefined,
+        reasoning_output_tokens: typeof primary.reasoning_output_tokens === "number" ? primary.reasoning_output_tokens : undefined,
+        source_model_context_window: typeof info.model_context_window === "number" ? info.model_context_window : undefined,
+    };
+}
+function parseLines(content) {
+    return content
+        .split("\n")
+        .map((line) => line.trim())
+        .filter((line) => line.length > 0)
+        .map((line, i) => {
+        let parsed;
+        try {
+            parsed = JSON.parse(line);
+        }
+        catch {
+            throw new Error(`Invalid JSONL line ${i + 1}`);
+        }
+        if (!parsed || typeof parsed !== "object")
+            throw new Error(`Invalid record at line ${i + 1}`);
+        return parsed;
+    });
+}
+function mapResponseItem(record, intentId, now) {
+    const payload = toObject(record.payload);
+    const itemType = short(payload.type) ?? "unknown";
+    const ts = toIso(record.timestamp, now);
+    if (itemType === "function_call" || itemType === "custom_tool_call" || itemType === "web_search_call") {
+        const action = short(payload.name) ?? short(toObject(payload.action).type) ?? itemType;
+        return [
+            {
+                kind: "tool_call",
+                ts,
+                actor: { type: "agent", id: "codex" },
+                scope: intentId ? { intent_id: intentId } : undefined,
+                payload: {
+                    category: itemType === "web_search_call" ? "search" : "tool",
+                    action,
+                    target: short(payload.arguments, 1600) ?? short(payload.input, 1600),
+                    details: {
+                        call_id: short(payload.call_id),
+                        status: short(payload.status),
+                        source: "codex_response_item",
+                    },
+                },
+                derived: true,
+                confidence: 0.85,
+                visibility: "raw",
+            },
+        ];
+    }
+    if (itemType === "function_call_output" || itemType === "custom_tool_call_output") {
+        return [
+            {
+                kind: "tool_call",
+                ts,
+                actor: { type: "tool", id: "codex-tool" },
+                scope: intentId ? { intent_id: intentId } : undefined,
+                payload: {
+                    category: "execution",
+                    action: itemType,
+                    target: short(payload.call_id),
+                    details: {
+                        output: short(payload.output, 3500),
+                        source: "codex_response_item",
+                    },
+                },
+                derived: true,
+                confidence: 0.8,
+                visibility: "raw",
+            },
+        ];
+    }
+    if (itemType === "reasoning") {
+        const summary = Array.isArray(payload.summary)
+            ? payload.summary
+                .map((entry) => {
+                const record = toObject(entry);
+                return sanitizeText(record.text, 500);
+            })
+                .filter((s) => Boolean(s))
+                .join(" ")
+            : undefined;
+        const encrypted = sanitizeText(payload.encrypted_content, 400);
+        if (!summary && !encrypted)
+            return [];
+        return [
+            {
+                kind: "artifact_created",
+                ts,
+                actor: { type: "agent", id: "codex" },
+                scope: intentId ? { intent_id: intentId, module: "reasoning" } : { module: "reasoning" },
+                payload: {
+                    artifact_type: "reasoning",
+                    summary,
+                    encrypted_content_preview: encrypted,
+                    source: "codex_response_item",
+                },
+                derived: true,
+                confidence: 0.9,
+                visibility: "debug",
+            },
+        ];
+    }
+    if (itemType === "message") {
+        const content = Array.isArray(payload.content) ? payload.content : [];
+        const texts = content
+            .map((entry) => sanitizeText(toObject(entry).text))
+            .filter((s) => Boolean(s));
+        const merged = texts.join("\n").trim();
+        if (!merged)
+            return [];
+        return [
+            {
+                kind: "artifact_created",
+                ts,
+                actor: { type: "agent", id: "codex" },
+                scope: intentId ? { intent_id: intentId, module: "assistant_output" } : { module: "assistant_output" },
+                payload: {
+                    artifact_type: "assistant_message",
+                    role: short(payload.role),
+                    phase: short(payload.phase),
+                    text: sanitizeText(merged, 3200),
+                    source: "codex_response_item",
+                },
+                derived: true,
+                confidence: 0.85,
+                visibility: "review",
+            },
+        ];
+    }
+    return [];
+}
+export const codexJsonlAdapter = {
+    name: "codex_jsonl",
+    canAdapt(content) {
+        const sample = content.slice(0, 3000);
+        return sample.includes("\"type\":\"session_meta\"") || sample.includes("\"type\": \"session_meta\"");
+    },
+    adapt(content) {
+        const records = parseLines(content);
+        const now = new Date().toISOString();
+        const sessionMeta = records.find((r) => r.type === "session_meta");
+        if (!sessionMeta) {
+            throw new Error("No session_meta found in Codex JSONL.");
+        }
+        const meta = toObject(sessionMeta.payload);
+        const sessionId = short(meta.id) ?? `codex_${Date.now()}`;
+        const start = toIso(meta.timestamp, toIso(sessionMeta.timestamp, now));
+        let intentCounter = 0;
+        let activeIntentId;
+        const events = [];
+        events.push({
+            kind: "session_start",
+            ts: start,
+            actor: { type: "system", id: "codex" },
+            payload: {
+                goal: short(meta.user_goal) ?? short(meta.goal) ?? "Imported Codex session",
+                user_prompt: short(meta.user_prompt, 3000),
+                source: "codex_jsonl",
+            },
+            visibility: "review",
+            derived: true,
+            confidence: 0.95,
+        });
+        for (const record of records) {
+            if (record.type === "event_msg") {
+                const p = toObject(record.payload);
+                if (p.type === "user_message") {
+                    const message = short(p.message, 3000);
+                    if (message) {
+                        intentCounter += 1;
+                        activeIntentId = `intent_${sessionId}_${intentCounter}`;
+                        events.push({
+                            kind: "intent",
+                            ts: toIso(record.timestamp, now),
+                            actor: { type: "user", id: "codex-user" },
+                            scope: { intent_id: activeIntentId },
+                            payload: {
+                                intent_id: activeIntentId,
+                                title: message.split("\n")[0]?.slice(0, 120) || "User message",
+                                description: message,
+                                source: "codex_event_msg",
+                            },
+                            visibility: "review",
+                            derived: true,
+                            confidence: 0.85,
+                        });
+                    }
+                }
+                else if (p.type === "token_count") {
+                    const usage = normalizeTokenUsage(p.info);
+                    events.push({
+                        kind: "token_usage_checkpoint",
+                        ts: toIso(record.timestamp, now),
+                        actor: { type: "system", id: "codex" },
+                        scope: activeIntentId ? { intent_id: activeIntentId, module: "llm" } : { module: "llm" },
+                        payload: {
+                            source: "codex_event_msg",
+                            usage,
+                            raw: p.info,
+                        },
+                        visibility: "raw",
+                        derived: true,
+                        confidence: 0.75,
+                    });
+                }
+                else if (p.type === "agent_reasoning") {
+                    const reasoning = sanitizeText(p.text, 3500);
+                    if (reasoning) {
+                        events.push({
+                            kind: "artifact_created",
+                            ts: toIso(record.timestamp, now),
+                            actor: { type: "agent", id: "codex" },
+                            scope: activeIntentId
+                                ? { intent_id: activeIntentId, module: "reasoning" }
+                                : { module: "reasoning" },
+                            payload: {
+                                artifact_type: "reasoning",
+                                text: reasoning,
+                                source: "codex_event_msg",
+                            },
+                            visibility: "debug",
+                            derived: true,
+                            confidence: 0.9,
+                        });
+                    }
+                }
+                else if (p.type === "agent_message") {
+                    const assistantMessage = sanitizeText(p.message, 3500);
+                    if (assistantMessage) {
+                        events.push({
+                            kind: "artifact_created",
+                            ts: toIso(record.timestamp, now),
+                            actor: { type: "agent", id: "codex" },
+                            scope: activeIntentId
+                                ? { intent_id: activeIntentId, module: "assistant_output" }
+                                : { module: "assistant_output" },
+                            payload: {
+                                artifact_type: "assistant_message",
+                                text: assistantMessage,
+                                source: "codex_event_msg",
+                            },
+                            visibility: "review",
+                            derived: true,
+                            confidence: 0.85,
+                        });
+                    }
+                }
+            }
+            else if (record.type === "response_item") {
+                events.push(...mapResponseItem(record, activeIntentId, now));
+            }
+        }
+        const endTs = toIso(records[records.length - 1]?.timestamp, now);
+        events.push({
+            kind: "session_end",
+            ts: endTs,
+            actor: { type: "system", id: "codex" },
+            payload: {
+                outcome: "unknown",
+                summary: "Imported from raw Codex JSONL",
+                source: "codex_jsonl",
+            },
+            visibility: "review",
+            derived: true,
+            confidence: 0.9,
+        });
+        return {
+            source: "codex_jsonl",
+            session_id: sessionId,
+            goal: short(meta.user_goal) ?? short(meta.goal),
+            user_prompt: short(meta.user_prompt, 3000),
+            started_at: start,
+            ended_at: endTs,
+            events,
+        };
+    },
+};

package/dist/adapters/cursor.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { RawAdapter } from "./types.js";
2	+ export declare const cursorRawAdapter: RawAdapter;