@xiaolei.shawn/mcp-server 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,6 +24,7 @@ You can point the built-in dashboard server to any static bundle via `AL_DASHBOA
24
24
  - Session storage on local disk (`AL_SESSIONS_DIR`)
25
25
  - Local gateway API for middleware (`/api/gateway/*`)
26
26
  - Export session JSON with normalized snapshot (`agentlens export`)
27
+ - Raw log adapter ingestion (`agentlens ingest`, `/api/ingest`) with duplicate suppression
27
28
 
28
29
  ## Install
29
30
 
@@ -55,6 +56,18 @@ agentlens mcp
55
56
  - `record_decision`
56
57
  - `record_assumption`
57
58
  - `record_verification`
59
+ - `record_artifact_created`
60
+ - `record_intent_transition`
61
+ - `record_risk_signal`
62
+ - `record_verification_run`
63
+ - `record_diff_summary`
64
+ - `record_decision_link`
65
+ - `record_assumption_lifecycle`
66
+ - `record_blocker`
67
+ - `record_token_usage_checkpoint`
68
+ - `record_session_quality`
69
+ - `record_replay_bookmark`
70
+ - `record_hotspot`
58
71
  - `record_session_end`
59
72
 
60
73
  ### Gateway tools
@@ -80,6 +93,7 @@ API endpoints:
80
93
  - `POST /api/gateway/begin`
81
94
  - `POST /api/gateway/act`
82
95
  - `POST /api/gateway/end`
96
+ - `POST /api/ingest`
83
97
 
84
98
  If web assets are available (default `../webapp/dist`), they are served by the same server.
85
99
 
@@ -140,6 +154,41 @@ Export by session id:
140
154
  agentlens export --session sess_1771256059058_2bd2bd8f --out ./session.json
141
155
  ```
142
156
 
157
+ ## Ingest raw logs via adapters
158
+
159
+ Example: ingest Codex raw JSONL and convert to canonical events:
160
+
161
+ ```bash
162
+ agentlens ingest --input /path/to/rollout.jsonl --adapter codex_jsonl
163
+ ```
164
+
165
+ Example: ingest Cursor raw logs that contain `<user_query>`, `<think>`, and `Tool call/Tool result` blocks:
166
+
167
+ ```bash
168
+ agentlens ingest --input /path/to/cursor-log.txt --adapter cursor_raw
169
+ ```
170
+
171
+ Auto-detect adapter and merge into an existing session with dedupe:
172
+
173
+ ```bash
174
+ agentlens ingest --input /path/to/raw.jsonl --adapter auto --merge-session sess_123
175
+ ```
176
+
177
+ Notes:
178
+
179
+ - Ingest writes canonical events to `<session_id>.jsonl`.
180
+ - Original raw content is preserved in `<session_id>.<adapter>.raw.jsonl`.
181
+ - **Merge and dedupe**: When merging into an existing session (e.g. raw log + MCP-canonical events), ingest uses **semantic dedupe** so the same logical event (intent, tool call, artifact, etc.) is not duplicated even if timestamps or payload details differ. Merged events are written in **time order** with contiguous `seq` for accurate recommendations/risk/hotspot analysis.
182
+ - Duplicate events are skipped by default (exact or semantic key depending on merge).
183
+ - Codex adapter preserves user prompts, reasoning summaries, assistant outputs, tool calls/results, and normalized token checkpoints.
184
+ - Cursor adapter preserves user queries, `<think>` reasoning traces, tool call/result traces, and token counters when present.
185
+ - If `--merge-session` is omitted, ingest attempts **fingerprint match** automatically:
186
+ - Primary signal: normalized user prompt / intent similarity
187
+ - Secondary signal: timestamp proximity (recent sessions weighted higher)
188
+ - Min confidence: `AL_INGEST_FINGERPRINT_MIN_CONFIDENCE` (default `0.62`)
189
+ - Max time window (hours): `AL_INGEST_FINGERPRINT_MAX_WINDOW_HOURS` (default `72`)
190
+ - Ingest output includes `merge_strategy` (`explicit_merge`, `adapted_session_id`, `fingerprint_match`, `new_session`) and optional `merge_confidence`.
191
+
143
192
  ## Publish checklist
144
193
 
145
194
  1. Update version in `package.json`.
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Ingest and merge logic tests.
3
+ * Run from mcp-server: pnpm run build && pnpm test
4
+ */
5
+ import { mkdtempSync, readFileSync, rmSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import { tmpdir } from "node:os";
8
+ import { describe, it, before, after } from "node:test";
9
+ import assert from "node:assert";
10
+ import { ingestRawContent, ingestRawFile } from "../ingest.js";
11
+ import { readSessionEvents } from "../store.js";
12
+ import { adaptRawContent } from "../adapters/index.js";
13
+ const FIXTURES_DIR = join(process.cwd(), "fixtures");
14
+ function fixturePath(name) {
15
+ return join(FIXTURES_DIR, name);
16
+ }
17
+ describe("adapters", () => {
18
+ it("cursor_raw adapts sample and produces session_start, intent, tool_call, session_end", () => {
19
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
20
+ const adapted = adaptRawContent(raw, "cursor_raw");
21
+ assert.strictEqual(adapted.source, "cursor_raw");
22
+ assert.ok(adapted.session_id?.startsWith("cursor_"));
23
+ const kinds = adapted.events.map((e) => e.kind);
24
+ assert.ok(kinds.includes("session_start"), "has session_start");
25
+ assert.ok(kinds.includes("intent"), "has intent");
26
+ assert.ok(kinds.includes("session_end"), "has session_end");
27
+ const hasToolOrArtifact = kinds.includes("tool_call") || kinds.includes("artifact_created");
28
+ assert.ok(hasToolOrArtifact, "has tool_call or artifact_created");
29
+ });
30
+ it("codex_jsonl adapts sample and produces session_start, intent, session_end", () => {
31
+ const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
32
+ const adapted = adaptRawContent(raw, "codex_jsonl");
33
+ assert.strictEqual(adapted.source, "codex_jsonl");
34
+ const kinds = adapted.events.map((e) => e.kind);
35
+ assert.ok(kinds.includes("session_start"), "has session_start");
36
+ assert.ok(kinds.includes("intent"), "has intent");
37
+ assert.ok(kinds.includes("session_end"), "has session_end");
38
+ });
39
+ it("auto adapter selects cursor_raw for cursor-style content", () => {
40
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
41
+ const adapted = adaptRawContent(raw, "auto");
42
+ assert.strictEqual(adapted.source, "cursor_raw");
43
+ });
44
+ it("auto adapter selects codex_jsonl for codex JSONL content", () => {
45
+ const raw = readFileSync(fixturePath("codex_sample.jsonl"), "utf-8");
46
+ const adapted = adaptRawContent(raw, "auto");
47
+ assert.strictEqual(adapted.source, "codex_jsonl");
48
+ });
49
+ });
50
+ describe("ingest", () => {
51
+ let sessionsDir;
52
+ const originalSessionsDir = process.env.AL_SESSIONS_DIR;
53
+ before(() => {
54
+ sessionsDir = mkdtempSync(join(tmpdir(), "agentlens-test-"));
55
+ process.env.AL_SESSIONS_DIR = sessionsDir;
56
+ });
57
+ after(() => {
58
+ process.env.AL_SESSIONS_DIR = originalSessionsDir;
59
+ rmSync(sessionsDir, { recursive: true, force: true });
60
+ });
61
+ it("ingest creates new session and inserts events", () => {
62
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
63
+ const result = ingestRawContent(raw, { adapter: "cursor_raw" });
64
+ assert.ok(result.session_id);
65
+ assert.strictEqual(result.adapter, "cursor_raw");
66
+ assert.strictEqual(result.merge_strategy, "new_session");
67
+ assert.ok(result.inserted > 0, "inserted > 0");
68
+ const events = readSessionEvents(result.session_id);
69
+ assert.strictEqual(events.length, result.inserted);
70
+ });
71
+ it("ingest with merge_session_id merges into existing session with semantic dedupe", () => {
72
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
73
+ const first = ingestRawContent(raw, { adapter: "cursor_raw" });
74
+ const countAfterFirst = readSessionEvents(first.session_id).length;
75
+ const second = ingestRawContent(raw, {
76
+ adapter: "cursor_raw",
77
+ merge_session_id: first.session_id,
78
+ });
79
+ assert.strictEqual(second.session_id, first.session_id);
80
+ assert.strictEqual(second.merge_strategy, "explicit_merge");
81
+ assert.ok(second.skipped_duplicates > 0, "semantic dedupe skips most events when same content merged");
82
+ assert.ok(second.inserted <= 1, "at most one new event (e.g. token_usage with different ts) when same content merged");
83
+ const eventsAfterMerge = readSessionEvents(first.session_id);
84
+ assert.ok(eventsAfterMerge.length <= countAfterFirst + 1, "event count grows by at most one after merge of same content");
85
+ });
86
+ it("merged session is ordered by ts and seq is contiguous", () => {
87
+ const raw = readFileSync(fixturePath("cursor_sample.txt"), "utf-8");
88
+ const first = ingestRawContent(raw, { adapter: "cursor_raw" });
89
+ const sessionId = first.session_id;
90
+ const events = readSessionEvents(sessionId);
91
+ for (let i = 1; i < events.length; i++) {
92
+ const a = events[i - 1];
93
+ const b = events[i];
94
+ assert.ok(a.ts <= b.ts || (a.ts === b.ts && (a.seq ?? 0) <= (b.seq ?? 0)), `events ordered: ${a.seq} (${a.ts}) before ${b.seq} (${b.ts})`);
95
+ assert.strictEqual(b.seq, (a.seq ?? 0) + 1, "seq contiguous");
96
+ }
97
+ });
98
+ it("ingest from file path works", () => {
99
+ const path = fixturePath("codex_sample.jsonl");
100
+ const result = ingestRawFile(path, { adapter: "codex_jsonl" });
101
+ assert.ok(result.session_id);
102
+ assert.strictEqual(result.adapter, "codex_jsonl");
103
+ assert.ok(result.inserted > 0);
104
+ });
105
+ });
@@ -0,0 +1,2 @@
1
+ import type { RawAdapter } from "./types.js";
2
+ export declare const codexJsonlAdapter: RawAdapter;
@@ -0,0 +1,322 @@
1
+ function toObject(value) {
2
+ return value && typeof value === "object" && !Array.isArray(value)
3
+ ? value
4
+ : {};
5
+ }
6
+ function toIso(ts, fallback) {
7
+ if (typeof ts !== "string" || ts.trim() === "")
8
+ return fallback;
9
+ const parsed = new Date(ts);
10
+ return Number.isNaN(parsed.getTime()) ? fallback : parsed.toISOString();
11
+ }
12
+ function short(value, max = 800) {
13
+ if (typeof value !== "string")
14
+ return undefined;
15
+ const s = value.trim();
16
+ if (!s)
17
+ return undefined;
18
+ return s.length > max ? `${s.slice(0, max)}...` : s;
19
+ }
20
+ function sanitizeText(value, max = 3000) {
21
+ if (typeof value !== "string")
22
+ return undefined;
23
+ const text = value.trim();
24
+ if (!text)
25
+ return undefined;
26
+ return text.length > max ? `${text.slice(0, max)}...` : text;
27
+ }
28
+ function normalizeTokenUsage(value) {
29
+ const info = toObject(value);
30
+ const total = toObject(info.total_token_usage);
31
+ const last = toObject(info.last_token_usage);
32
+ const primary = Object.keys(last).length > 0 ? last : total;
33
+ if (Object.keys(primary).length === 0)
34
+ return undefined;
35
+ const prompt = primary.input_tokens;
36
+ const completion = primary.output_tokens;
37
+ const totalTokens = primary.total_tokens;
38
+ return {
39
+ prompt_tokens: typeof prompt === "number" ? prompt : undefined,
40
+ completion_tokens: typeof completion === "number" ? completion : undefined,
41
+ total_tokens: typeof totalTokens === "number" ? totalTokens : undefined,
42
+ input_tokens: typeof primary.input_tokens === "number" ? primary.input_tokens : undefined,
43
+ cached_input_tokens: typeof primary.cached_input_tokens === "number" ? primary.cached_input_tokens : undefined,
44
+ output_tokens: typeof primary.output_tokens === "number" ? primary.output_tokens : undefined,
45
+ reasoning_output_tokens: typeof primary.reasoning_output_tokens === "number" ? primary.reasoning_output_tokens : undefined,
46
+ source_model_context_window: typeof info.model_context_window === "number" ? info.model_context_window : undefined,
47
+ };
48
+ }
49
+ function parseLines(content) {
50
+ return content
51
+ .split("\n")
52
+ .map((line) => line.trim())
53
+ .filter((line) => line.length > 0)
54
+ .map((line, i) => {
55
+ let parsed;
56
+ try {
57
+ parsed = JSON.parse(line);
58
+ }
59
+ catch {
60
+ throw new Error(`Invalid JSONL line ${i + 1}`);
61
+ }
62
+ if (!parsed || typeof parsed !== "object")
63
+ throw new Error(`Invalid record at line ${i + 1}`);
64
+ return parsed;
65
+ });
66
+ }
67
+ function mapResponseItem(record, intentId, now) {
68
+ const payload = toObject(record.payload);
69
+ const itemType = short(payload.type) ?? "unknown";
70
+ const ts = toIso(record.timestamp, now);
71
+ if (itemType === "function_call" || itemType === "custom_tool_call" || itemType === "web_search_call") {
72
+ const action = short(payload.name) ?? short(toObject(payload.action).type) ?? itemType;
73
+ return [
74
+ {
75
+ kind: "tool_call",
76
+ ts,
77
+ actor: { type: "agent", id: "codex" },
78
+ scope: intentId ? { intent_id: intentId } : undefined,
79
+ payload: {
80
+ category: itemType === "web_search_call" ? "search" : "tool",
81
+ action,
82
+ target: short(payload.arguments, 1600) ?? short(payload.input, 1600),
83
+ details: {
84
+ call_id: short(payload.call_id),
85
+ status: short(payload.status),
86
+ source: "codex_response_item",
87
+ },
88
+ },
89
+ derived: true,
90
+ confidence: 0.85,
91
+ visibility: "raw",
92
+ },
93
+ ];
94
+ }
95
+ if (itemType === "function_call_output" || itemType === "custom_tool_call_output") {
96
+ return [
97
+ {
98
+ kind: "tool_call",
99
+ ts,
100
+ actor: { type: "tool", id: "codex-tool" },
101
+ scope: intentId ? { intent_id: intentId } : undefined,
102
+ payload: {
103
+ category: "execution",
104
+ action: itemType,
105
+ target: short(payload.call_id),
106
+ details: {
107
+ output: short(payload.output, 3500),
108
+ source: "codex_response_item",
109
+ },
110
+ },
111
+ derived: true,
112
+ confidence: 0.8,
113
+ visibility: "raw",
114
+ },
115
+ ];
116
+ }
117
+ if (itemType === "reasoning") {
118
+ const summary = Array.isArray(payload.summary)
119
+ ? payload.summary
120
+ .map((entry) => {
121
+ const record = toObject(entry);
122
+ return sanitizeText(record.text, 500);
123
+ })
124
+ .filter((s) => Boolean(s))
125
+ .join(" ")
126
+ : undefined;
127
+ const encrypted = sanitizeText(payload.encrypted_content, 400);
128
+ if (!summary && !encrypted)
129
+ return [];
130
+ return [
131
+ {
132
+ kind: "artifact_created",
133
+ ts,
134
+ actor: { type: "agent", id: "codex" },
135
+ scope: intentId ? { intent_id: intentId, module: "reasoning" } : { module: "reasoning" },
136
+ payload: {
137
+ artifact_type: "reasoning",
138
+ summary,
139
+ encrypted_content_preview: encrypted,
140
+ source: "codex_response_item",
141
+ },
142
+ derived: true,
143
+ confidence: 0.9,
144
+ visibility: "debug",
145
+ },
146
+ ];
147
+ }
148
+ if (itemType === "message") {
149
+ const content = Array.isArray(payload.content) ? payload.content : [];
150
+ const texts = content
151
+ .map((entry) => sanitizeText(toObject(entry).text))
152
+ .filter((s) => Boolean(s));
153
+ const merged = texts.join("\n").trim();
154
+ if (!merged)
155
+ return [];
156
+ return [
157
+ {
158
+ kind: "artifact_created",
159
+ ts,
160
+ actor: { type: "agent", id: "codex" },
161
+ scope: intentId ? { intent_id: intentId, module: "assistant_output" } : { module: "assistant_output" },
162
+ payload: {
163
+ artifact_type: "assistant_message",
164
+ role: short(payload.role),
165
+ phase: short(payload.phase),
166
+ text: sanitizeText(merged, 3200),
167
+ source: "codex_response_item",
168
+ },
169
+ derived: true,
170
+ confidence: 0.85,
171
+ visibility: "review",
172
+ },
173
+ ];
174
+ }
175
+ return [];
176
+ }
177
+ export const codexJsonlAdapter = {
178
+ name: "codex_jsonl",
179
+ canAdapt(content) {
180
+ const sample = content.slice(0, 3000);
181
+ return sample.includes("\"type\":\"session_meta\"") || sample.includes("\"type\": \"session_meta\"");
182
+ },
183
+ adapt(content) {
184
+ const records = parseLines(content);
185
+ const now = new Date().toISOString();
186
+ const sessionMeta = records.find((r) => r.type === "session_meta");
187
+ if (!sessionMeta) {
188
+ throw new Error("No session_meta found in Codex JSONL.");
189
+ }
190
+ const meta = toObject(sessionMeta.payload);
191
+ const sessionId = short(meta.id) ?? `codex_${Date.now()}`;
192
+ const start = toIso(meta.timestamp, toIso(sessionMeta.timestamp, now));
193
+ let intentCounter = 0;
194
+ let activeIntentId;
195
+ const events = [];
196
+ events.push({
197
+ kind: "session_start",
198
+ ts: start,
199
+ actor: { type: "system", id: "codex" },
200
+ payload: {
201
+ goal: short(meta.user_goal) ?? short(meta.goal) ?? "Imported Codex session",
202
+ user_prompt: short(meta.user_prompt, 3000),
203
+ source: "codex_jsonl",
204
+ },
205
+ visibility: "review",
206
+ derived: true,
207
+ confidence: 0.95,
208
+ });
209
+ for (const record of records) {
210
+ if (record.type === "event_msg") {
211
+ const p = toObject(record.payload);
212
+ if (p.type === "user_message") {
213
+ const message = short(p.message, 3000);
214
+ if (message) {
215
+ intentCounter += 1;
216
+ activeIntentId = `intent_${sessionId}_${intentCounter}`;
217
+ events.push({
218
+ kind: "intent",
219
+ ts: toIso(record.timestamp, now),
220
+ actor: { type: "user", id: "codex-user" },
221
+ scope: { intent_id: activeIntentId },
222
+ payload: {
223
+ intent_id: activeIntentId,
224
+ title: message.split("\n")[0]?.slice(0, 120) || "User message",
225
+ description: message,
226
+ source: "codex_event_msg",
227
+ },
228
+ visibility: "review",
229
+ derived: true,
230
+ confidence: 0.85,
231
+ });
232
+ }
233
+ }
234
+ else if (p.type === "token_count") {
235
+ const usage = normalizeTokenUsage(p.info);
236
+ events.push({
237
+ kind: "token_usage_checkpoint",
238
+ ts: toIso(record.timestamp, now),
239
+ actor: { type: "system", id: "codex" },
240
+ scope: activeIntentId ? { intent_id: activeIntentId, module: "llm" } : { module: "llm" },
241
+ payload: {
242
+ source: "codex_event_msg",
243
+ usage,
244
+ raw: p.info,
245
+ },
246
+ visibility: "raw",
247
+ derived: true,
248
+ confidence: 0.75,
249
+ });
250
+ }
251
+ else if (p.type === "agent_reasoning") {
252
+ const reasoning = sanitizeText(p.text, 3500);
253
+ if (reasoning) {
254
+ events.push({
255
+ kind: "artifact_created",
256
+ ts: toIso(record.timestamp, now),
257
+ actor: { type: "agent", id: "codex" },
258
+ scope: activeIntentId
259
+ ? { intent_id: activeIntentId, module: "reasoning" }
260
+ : { module: "reasoning" },
261
+ payload: {
262
+ artifact_type: "reasoning",
263
+ text: reasoning,
264
+ source: "codex_event_msg",
265
+ },
266
+ visibility: "debug",
267
+ derived: true,
268
+ confidence: 0.9,
269
+ });
270
+ }
271
+ }
272
+ else if (p.type === "agent_message") {
273
+ const assistantMessage = sanitizeText(p.message, 3500);
274
+ if (assistantMessage) {
275
+ events.push({
276
+ kind: "artifact_created",
277
+ ts: toIso(record.timestamp, now),
278
+ actor: { type: "agent", id: "codex" },
279
+ scope: activeIntentId
280
+ ? { intent_id: activeIntentId, module: "assistant_output" }
281
+ : { module: "assistant_output" },
282
+ payload: {
283
+ artifact_type: "assistant_message",
284
+ text: assistantMessage,
285
+ source: "codex_event_msg",
286
+ },
287
+ visibility: "review",
288
+ derived: true,
289
+ confidence: 0.85,
290
+ });
291
+ }
292
+ }
293
+ }
294
+ else if (record.type === "response_item") {
295
+ events.push(...mapResponseItem(record, activeIntentId, now));
296
+ }
297
+ }
298
+ const endTs = toIso(records[records.length - 1]?.timestamp, now);
299
+ events.push({
300
+ kind: "session_end",
301
+ ts: endTs,
302
+ actor: { type: "system", id: "codex" },
303
+ payload: {
304
+ outcome: "unknown",
305
+ summary: "Imported from raw Codex JSONL",
306
+ source: "codex_jsonl",
307
+ },
308
+ visibility: "review",
309
+ derived: true,
310
+ confidence: 0.9,
311
+ });
312
+ return {
313
+ source: "codex_jsonl",
314
+ session_id: sessionId,
315
+ goal: short(meta.user_goal) ?? short(meta.goal),
316
+ user_prompt: short(meta.user_prompt, 3000),
317
+ started_at: start,
318
+ ended_at: endTs,
319
+ events,
320
+ };
321
+ },
322
+ };
@@ -0,0 +1,2 @@
1
+ import type { RawAdapter } from "./types.js";
2
+ export declare const cursorRawAdapter: RawAdapter;