@xiaolei.shawn/mcp-server 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,7 @@ You can point the built-in dashboard server to any static bundle via `AL_DASHBOA
24
24
  - Session storage on local disk (`AL_SESSIONS_DIR`)
25
25
  - Local gateway API for middleware (`/api/gateway/*`)
26
26
  - Export session JSON with normalized snapshot (`agentlens export`)
27
+ - Raw log adapter ingestion (`agentlens ingest`, `/api/ingest`) with duplicate suppression
27
28
 
28
29
  ## Install
29
30
 
@@ -55,6 +56,18 @@ agentlens mcp
55
56
  - `record_decision`
56
57
  - `record_assumption`
57
58
  - `record_verification`
59
+ - `record_artifact_created`
60
+ - `record_intent_transition`
61
+ - `record_risk_signal`
62
+ - `record_verification_run`
63
+ - `record_diff_summary`
64
+ - `record_decision_link`
65
+ - `record_assumption_lifecycle`
66
+ - `record_blocker`
67
+ - `record_token_usage_checkpoint`
68
+ - `record_session_quality`
69
+ - `record_replay_bookmark`
70
+ - `record_hotspot`
58
71
  - `record_session_end`
59
72
 
60
73
  ### Gateway tools
@@ -80,6 +93,7 @@ API endpoints:
80
93
  - `POST /api/gateway/begin`
81
94
  - `POST /api/gateway/act`
82
95
  - `POST /api/gateway/end`
96
+ - `POST /api/ingest`
83
97
 
84
98
  If web assets are available (default `../webapp/dist`), they are served by the same server.
85
99
 
@@ -140,6 +154,41 @@ Export by session id:
140
154
  agentlens export --session sess_1771256059058_2bd2bd8f --out ./session.json
141
155
  ```
142
156
 
157
+ ## Ingest raw logs via adapters
158
+
159
+ Example: ingest Codex raw JSONL and convert to canonical events:
160
+
161
+ ```bash
162
+ agentlens ingest --input /path/to/rollout.jsonl --adapter codex_jsonl
163
+ ```
164
+
165
+ Example: ingest Cursor raw logs that contain `<user_query>`, `<think>`, and `Tool call/Tool result` blocks:
166
+
167
+ ```bash
168
+ agentlens ingest --input /path/to/cursor-log.txt --adapter cursor_raw
169
+ ```
170
+
171
+ Auto-detect adapter and merge into an existing session with dedupe:
172
+
173
+ ```bash
174
+ agentlens ingest --input /path/to/raw.jsonl --adapter auto --merge-session sess_123
175
+ ```
176
+
177
+ Notes:
178
+
179
+ - Ingest writes canonical events to `<session_id>.jsonl`.
180
+ - Original raw content is preserved in `<session_id>.<adapter>.raw.jsonl`.
181
+ - **Merge and dedupe**: When merging into an existing session (e.g. raw log + MCP-canonical events), ingest uses **semantic dedupe** so the same logical event (intent, tool call, artifact, etc.) is not duplicated even if timestamps or payload details differ. Merged events are written in **time order** with contiguous `seq` for accurate recommendations/risk/hotspot analysis.
182
+ - Duplicate events are skipped by default (exact or semantic key depending on merge).
183
+ - Codex adapter preserves user prompts, reasoning summaries, assistant outputs, tool calls/results, and normalized token checkpoints.
184
+ - Cursor adapter preserves user queries, `<think>` reasoning traces, tool call/result traces, and token counters when present.
185
+ - If `--merge-session` is omitted, ingest attempts **fingerprint match** automatically:
186
+ - Primary signal: normalized user prompt / intent similarity
187
+ - Secondary signal: timestamp proximity (recent sessions weighted higher)
188
+ - Min confidence: `AL_INGEST_FINGERPRINT_MIN_CONFIDENCE` (default `0.62`)
189
+ - Max time window (hours): `AL_INGEST_FINGERPRINT_MAX_WINDOW_HOURS` (default `72`)
190
+ - Ingest output includes `merge_strategy` (`explicit_merge`, `adapted_session_id`, `fingerprint_match`, `new_session`) and optional `merge_confidence`.
191
+
143
192
  ## Publish checklist
144
193
 
145
194
  1. Update version in `package.json`.
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Ingest and merge logic tests.
3
+ * Run from mcp-server: pnpm run build && pnpm test
4
+ */
5
+ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import { tmpdir } from "node:os";
8
+ import { describe, it, before, after } from "node:test";
9
+ import assert from "node:assert";
10
+ import { ingestRawContent, ingestRawFile } from "../ingest.js";
11
+ import { readSessionEvents } from "../store.js";
12
+ import { adaptRawContent } from "../adapters/index.js";
13
+ const FIXTURES_DIR = join(process.cwd(), "fixtures");
14
+ function fixturePath(name) {
15
+ return join(FIXTURES_DIR, name);
16
+ }
17
+ describe("adapters", () => {
18
+ it("cursor_raw adapts sample and produces session_start, intent, tool_call, session_end", () => {
19
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
20
+ const adapted = adaptRawContent(raw, "cursor_raw");
21
+ assert.strictEqual(adapted.source, "cursor_raw");
22
+ assert.ok(adapted.session_id?.startsWith("cursor_"));
23
+ const kinds = adapted.events.map((e) => e.kind);
24
+ assert.ok(kinds.includes("session_start"), "has session_start");
25
+ assert.ok(kinds.includes("intent"), "has intent");
26
+ assert.ok(kinds.includes("session_end"), "has session_end");
27
+ const hasToolOrArtifact = kinds.includes("tool_call") || kinds.includes("artifact_created");
28
+ assert.ok(hasToolOrArtifact, "has tool_call or artifact_created");
29
+ });
30
+ it("codex_jsonl adapts sample and produces session_start, intent, session_end", () => {
31
+ const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
32
+ const adapted = adaptRawContent(raw, "codex_jsonl");
33
+ assert.strictEqual(adapted.source, "codex_jsonl");
34
+ const kinds = adapted.events.map((e) => e.kind);
35
+ assert.ok(kinds.includes("session_start"), "has session_start");
36
+ assert.ok(kinds.includes("intent"), "has intent");
37
+ assert.ok(kinds.includes("session_end"), "has session_end");
38
+ });
39
+ it("auto adapter selects cursor_raw for cursor-style content", () => {
40
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
41
+ const adapted = adaptRawContent(raw, "auto");
42
+ assert.strictEqual(adapted.source, "cursor_raw");
43
+ });
44
+ it("auto adapter selects codex_jsonl for codex JSONL content", () => {
45
+ const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
46
+ const adapted = adaptRawContent(raw, "auto");
47
+ assert.strictEqual(adapted.source, "codex_jsonl");
48
+ });
49
+ });
50
+ describe("ingest", () => {
51
+ let sessionsDir;
52
+ const originalSessionsDir = process.env.AL_SESSIONS_DIR;
53
+ before(() => {
54
+ sessionsDir = mkdtempSync(join(tmpdir(), "agentlens-test-"));
55
+ process.env.AL_SESSIONS_DIR = sessionsDir;
56
+ });
57
+ after(() => {
58
+ process.env.AL_SESSIONS_DIR = originalSessionsDir;
59
+ rmSync(sessionsDir, { recursive: true, force: true });
60
+ });
61
+ it("ingest creates new session and inserts events", () => {
62
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
63
+ const result = ingestRawContent(raw, { adapter: "cursor_raw" });
64
+ assert.ok(result.session_id);
65
+ assert.strictEqual(result.adapter, "cursor_raw");
66
+ assert.strictEqual(result.merge_strategy, "new_session");
67
+ assert.ok(result.inserted > 0, "inserted > 0");
68
+ const events = readSessionEvents(result.session_id);
69
+ assert.strictEqual(events.length, result.inserted);
70
+ });
71
+ it("ingest with merge_session_id merges into existing session with semantic dedupe", () => {
72
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
73
+ const first = ingestRawContent(raw, { adapter: "cursor_raw" });
74
+ const countAfterFirst = readSessionEvents(first.session_id).length;
75
+ const second = ingestRawContent(raw, {
76
+ adapter: "cursor_raw",
77
+ merge_session_id: first.session_id,
78
+ });
79
+ assert.strictEqual(second.session_id, first.session_id);
80
+ assert.strictEqual(second.merge_strategy, "explicit_merge");
81
+ assert.ok(second.skipped_duplicates > 0, "semantic dedupe skips most events when same content merged");
82
+ assert.ok(second.inserted <= 1, "at most one new event (e.g. token_usage with different ts) when same content merged");
83
+ const eventsAfterMerge = readSessionEvents(first.session_id);
84
+ assert.ok(eventsAfterMerge.length <= countAfterFirst + 1, "event count grows by at most one after merge of same content");
85
+ });
86
+ it("merged session is ordered by ts and seq is contiguous", () => {
87
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
88
+ const first = ingestRawContent(raw, { adapter: "cursor_raw" });
89
+ const sessionId = first.session_id;
90
+ const events = readSessionEvents(sessionId);
91
+ for (let i = 1; i < events.length; i++) {
92
+ const a = events[i - 1];
93
+ const b = events[i];
94
+ assert.ok(a.ts <= b.ts || (a.ts === b.ts && (a.seq ?? 0) <= (b.seq ?? 0)), `events ordered: ${a.seq} (${a.ts}) before ${b.seq} (${b.ts})`);
95
+ assert.strictEqual(b.seq, (a.seq ?? 0) + 1, "seq contiguous");
96
+ }
97
+ });
98
+ it("ingest from file path works", () => {
99
+ const path = fixturePath("codex_sample.jsonl");
100
+ const result = ingestRawFile(path, { adapter: "codex_jsonl" });
101
+ assert.ok(result.session_id);
102
+ assert.strictEqual(result.adapter, "codex_jsonl");
103
+ assert.ok(result.inserted > 0);
104
+ });
105
+ it("merge raw log from different day: time window filters out all raw events", () => {
106
+ const sessionId = "sess_merge_target_time_window";
107
+ const sessionStartTs = "2026-03-02T20:55:12.151Z";
108
+ const sessionEndTs = "2026-03-02T20:57:42.004Z";
109
+ const sessionLines = [
110
+ JSON.stringify({
111
+ id: `${sessionId}:1:aa`,
112
+ session_id: sessionId,
113
+ seq: 1,
114
+ ts: sessionStartTs,
115
+ kind: "session_start",
116
+ actor: { type: "agent" },
117
+ payload: { goal: "Test" },
118
+ schema_version: 1,
119
+ }),
120
+ JSON.stringify({
121
+ id: `${sessionId}:2:bb`,
122
+ session_id: sessionId,
123
+ seq: 2,
124
+ ts: sessionEndTs,
125
+ kind: "session_end",
126
+ actor: { type: "agent" },
127
+ payload: { outcome: "completed" },
128
+ schema_version: 1,
129
+ }),
130
+ ].join("\n") + "\n";
131
+ writeFileSync(join(sessionsDir, `${sessionId}.jsonl`), sessionLines, "utf-8");
132
+ const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
133
+ const result = ingestRawContent(raw, {
134
+ adapter: "codex_jsonl",
135
+ merge_session_id: sessionId,
136
+ });
137
+ assert.strictEqual(result.session_id, sessionId);
138
+ assert.strictEqual(result.merge_strategy, "explicit_merge");
139
+ assert.strictEqual(result.inserted, 0, "no raw events fall in Mar 2 window");
140
+ assert.ok(result.filtered_out_by_time_window !== undefined && result.filtered_out_by_time_window > 0, "raw events (Feb 24) were filtered out by time window");
141
+ const eventsAfter = readSessionEvents(sessionId);
142
+ assert.strictEqual(eventsAfter.length, 2, "session still has only session_start and session_end");
143
+ });
144
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,118 @@
1
+ import assert from "node:assert";
2
+ import { describe, it } from "node:test";
3
+ import { deriveIntentTokenBreakdown, generateFollowupArtifacts } from "../local-analysis.js";
4
+ function baseEvent(overrides) {
5
+ return {
6
+ id: "e-1",
7
+ session_id: "sess-test",
8
+ seq: 1,
9
+ ts: "2026-03-03T00:00:00.000Z",
10
+ kind: "intent",
11
+ actor: { type: "agent" },
12
+ payload: {},
13
+ schema_version: 1,
14
+ ...overrides,
15
+ };
16
+ }
17
+ describe("local-analysis", () => {
18
+ it("generates per-intent artifacts with deterministic template", () => {
19
+ const events = [
20
+ baseEvent({
21
+ id: "i1",
22
+ seq: 1,
23
+ kind: "intent",
24
+ scope: { intent_id: "intent_a" },
25
+ payload: { intent_id: "intent_a", title: "Implement feature A" },
26
+ }),
27
+ baseEvent({
28
+ id: "t1",
29
+ seq: 2,
30
+ kind: "tool_call",
31
+ scope: { intent_id: "intent_a" },
32
+ payload: { category: "tool", action: "read_file", target: "src/a.ts" },
33
+ }),
34
+ baseEvent({
35
+ id: "t2",
36
+ seq: 3,
37
+ kind: "tool_call",
38
+ scope: { intent_id: "intent_a" },
39
+ payload: { category: "tool", action: "read_file", target: "src/a.ts" },
40
+ }),
41
+ baseEvent({
42
+ id: "t3",
43
+ seq: 4,
44
+ kind: "tool_call",
45
+ scope: { intent_id: "intent_a" },
46
+ payload: { category: "tool", action: "read_file", target: "src/a.ts" },
47
+ }),
48
+ baseEvent({
49
+ id: "v1",
50
+ seq: 5,
51
+ kind: "verification",
52
+ scope: { intent_id: "intent_a" },
53
+ payload: { type: "test", result: "fail" },
54
+ }),
55
+ baseEvent({
56
+ id: "r1",
57
+ seq: 6,
58
+ kind: "risk_signal",
59
+ scope: { intent_id: "intent_a", file: "src/a.ts" },
60
+ payload: { level: "high", reasons: ["regression risk"] },
61
+ }),
62
+ ];
63
+ const result = generateFollowupArtifacts(events, {
64
+ mode: "per_intent",
65
+ strictness: "soft",
66
+ focus: "risk",
67
+ });
68
+ assert.equal(result.artifacts.length, 1);
69
+ assert.equal(result.artifacts[0].intent_id, "intent_a");
70
+ assert.equal(result.artifacts[0].rule_template_id, "high_risk_guardrail");
71
+ assert.ok(result.artifacts[0].value_claims.risk_mitigation.length > 0);
72
+ });
73
+ it("derives token breakdown context/output split", () => {
74
+ const events = [
75
+ baseEvent({
76
+ id: "i1",
77
+ seq: 1,
78
+ kind: "intent",
79
+ scope: { intent_id: "intent_a" },
80
+ payload: { intent_id: "intent_a", title: "Intent A" },
81
+ }),
82
+ baseEvent({
83
+ id: "ctx1",
84
+ seq: 2,
85
+ kind: "tool_call",
86
+ scope: { intent_id: "intent_a" },
87
+ payload: { category: "search", action: "search_docs", target: "api docs" },
88
+ }),
89
+ baseEvent({
90
+ id: "tok1",
91
+ seq: 3,
92
+ kind: "token_usage_checkpoint",
93
+ scope: { intent_id: "intent_a" },
94
+ payload: { usage: { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 } },
95
+ }),
96
+ baseEvent({
97
+ id: "out1",
98
+ seq: 4,
99
+ kind: "file_op",
100
+ scope: { intent_id: "intent_a", file: "src/a.ts" },
101
+ payload: { category: "file", action: "edit", target: "src/a.ts" },
102
+ }),
103
+ baseEvent({
104
+ id: "tok2",
105
+ seq: 5,
106
+ kind: "token_usage_checkpoint",
107
+ scope: { intent_id: "intent_a" },
108
+ payload: { usage: { total_tokens: 90 } },
109
+ }),
110
+ ];
111
+ const result = deriveIntentTokenBreakdown(events);
112
+ assert.equal(result.intent_breakdown.length, 1);
113
+ assert.equal(result.totals.total_tokens, 240);
114
+ assert.equal(result.intent_breakdown[0].context_tokens +
115
+ result.intent_breakdown[0].output_tokens +
116
+ result.intent_breakdown[0].unknown_tokens, 240);
117
+ });
118
+ });
@@ -0,0 +1,2 @@
1
+ import type { RawAdapter } from "./types.js";
2
+ export declare const codexJsonlAdapter: RawAdapter;