memex-mvp 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parse.js ADDED
@@ -0,0 +1,175 @@
1
+ /**
2
+ * Shared dialogue-only parser for Claude Code / Cowork JSONL.
3
+ *
4
+ * Used by both the MCP server (server.js, importing inbox files) and the
5
+ * ingest daemon (ingest.js, reading deltas from raw source files).
6
+ */
7
+
8
+ /** Skip these top-level event types — they're not dialogue. */
9
+ export const CLAUDE_CODE_SKIP_TYPES = new Set(['queue-operation', 'ai-title', 'summary']);
10
+
11
+ /** Auto-generated user messages produced by /compact, /resume, and
12
+ * continuation flows. They're real messages (we keep them in the
13
+ * index), but they're never useful as conversation titles. */
14
+ export const CONTINUATION_PREFIXES = [
15
+ 'This session is being continued',
16
+ 'Continue from where you left off',
17
+ 'Please continue from where you left off',
18
+ ];
19
+
20
+ export function isContinuationBoilerplate(text) {
21
+ for (const p of CONTINUATION_PREFIXES) if (text.startsWith(p)) return true;
22
+ // XML/tag-wrapped artefacts (uploaded_files, system-reminder, command-name…)
23
+ if (text.startsWith('<')) return true;
24
+ return false;
25
+ }
26
+
27
+ /** Extract a clean dialogue message from a Claude Code JSONL record.
28
+ *
29
+ * Handles both:
30
+ * 1. Legacy flat shape (original spec):
31
+ * {"role":"user","content":"...","timestamp":"..."}
32
+ * 2. Real nested shape (current Claude Code / Cowork on disk):
33
+ * {"type":"user","message":{"role":"user","content":"..."},"timestamp":"..."}
34
+ * {"parentUuid":"...","message":{"role":"assistant","content":[{type:"text",text:"..."},...]}}
35
+ *
36
+ * Filters out everything that isn't human-readable dialogue:
37
+ * - queue-operation / ai-title / summary events
38
+ * - attachment-only records (deferred_tools_delta, skill_listing, plan_mode)
39
+ * - tool_use / tool_result / thinking / redacted_thinking / image content blocks
40
+ * - encrypted thinking signatures (multi-kilobyte base64 blobs)
41
+ *
42
+ * Compaction handling:
43
+ * Records with isCompactSummary:true (synthetic summary fed back into model
44
+ * context by /compact) are returned with role='summary' so the importer
45
+ * can route them away from FTS5 indexing — otherwise the summary would
46
+ * double-count against the original raw discussion it summarises.
47
+ *
48
+ * Returns null when the record should be skipped, otherwise
49
+ * { role, text, id, timestamp, uuid, parentUuid }.
50
+ */
51
+ export function extractMessageFromRecord(obj) {
52
+ if (!obj || typeof obj !== 'object') return null;
53
+
54
+ // Skip non-dialogue top-level event types
55
+ if (CLAUDE_CODE_SKIP_TYPES.has(obj.type)) return null;
56
+
57
+ // Skip attachment-only records (Claude Code harness bookkeeping)
58
+ if (obj.attachment && !obj.message) return null;
59
+
60
+ // Resolve role/content from either nested or flat shape
61
+ const nested = obj.message;
62
+ const fromNested = nested && typeof nested === 'object';
63
+ let role = fromNested ? nested.role : obj.role;
64
+ if (!role || typeof role !== 'string') return null;
65
+
66
+ let rawContent;
67
+ if (fromNested) {
68
+ rawContent = nested.content;
69
+ } else if (obj.content !== undefined) {
70
+ rawContent = obj.content;
71
+ } else {
72
+ rawContent = obj.text;
73
+ }
74
+
75
+ // Normalise content into dialogue-only text
76
+ let text = '';
77
+ if (typeof rawContent === 'string') {
78
+ text = rawContent;
79
+ } else if (Array.isArray(rawContent)) {
80
+ const parts = [];
81
+ for (const block of rawContent) {
82
+ if (typeof block === 'string') {
83
+ parts.push(block);
84
+ continue;
85
+ }
86
+ if (!block || typeof block !== 'object') continue;
87
+ // Only keep text-bearing blocks. Drop tool_use, tool_result, thinking,
88
+ // redacted_thinking, image, and any future unknown block types.
89
+ if (block.type === 'text' && typeof block.text === 'string') {
90
+ parts.push(block.text);
91
+ }
92
+ }
93
+ text = parts.join('\n');
94
+ }
95
+
96
+ if (!text || !text.trim()) return null;
97
+
98
+ // Claude Code marks the synthetic /compact summary message with
99
+ // isCompactSummary:true (and isVisibleInTranscriptOnly:true). Re-tag
100
+ // those as role='summary' so the importer can keep them in the messages
101
+ // table for retrieval but exclude them from FTS5 — otherwise searching
102
+ // for any topic discussed before a compaction would return both the
103
+ // original raw turns AND the compressed summary mention, polluting rank.
104
+ if (
105
+ role === 'user' &&
106
+ (obj.isCompactSummary === true || obj.isVisibleInTranscriptOnly === true)
107
+ ) {
108
+ role = 'summary';
109
+ }
110
+
111
+ const id = (fromNested && nested.id) || obj.id || null;
112
+ const timestamp =
113
+ obj.timestamp || (fromNested && nested.timestamp) || null;
114
+ const uuid = obj.uuid || null;
115
+ const parentUuid = obj.parentUuid || null;
116
+
117
+ return { role, text, id, timestamp, uuid, parentUuid };
118
+ }
119
+
120
+ /** Detect a compact_boundary record.
121
+ *
122
+ * Claude Code writes two record types when /compact (or auto-compact) fires:
123
+ * 1. {type:"system", subtype:"compact_boundary", compactMetadata:{...}, ...}
124
+ * — boundary marker. parentUuid is reset to null. compactMetadata
125
+ * carries {trigger, preTokens, postTokens, durationMs,
126
+ * logicalParentUuid, preCompactDiscoveredTools}.
127
+ * 2. {type:"user", isCompactSummary:true, message:{...}} — the
128
+ * AI-generated summary fed back into model context (handled by
129
+ * extractMessageFromRecord via role='summary').
130
+ *
131
+ * We also recognise the daemon's inbox-emitted shape
132
+ * {type:"compact-boundary", metadata:{...}, ...} so server.js can import
133
+ * either the raw on-disk format or the daemon's snapshot.
134
+ *
135
+ * Returns null when the record isn't a boundary, otherwise
136
+ * { timestamp, uuid, parentUuid, logicalParentUuid, metadata, id }.
137
+ */
138
+ export function extractCompactBoundary(obj) {
139
+ if (!obj || typeof obj !== 'object') return null;
140
+
141
+ let metadata, raw;
142
+ if (obj.type === 'system' && obj.subtype === 'compact_boundary') {
143
+ metadata = obj.compactMetadata || {};
144
+ raw = obj;
145
+ } else if (obj.type === 'compact-boundary') {
146
+ metadata = obj.metadata || {};
147
+ raw = obj;
148
+ } else {
149
+ return null;
150
+ }
151
+
152
+ return {
153
+ timestamp: obj.timestamp || null,
154
+ uuid: obj.uuid || null,
155
+ parentUuid: obj.parentUuid || null,
156
+ logicalParentUuid:
157
+ obj.logicalParentUuid || (metadata && metadata.logicalParentUuid) || null,
158
+ metadata,
159
+ id: obj.id || null,
160
+ raw,
161
+ };
162
+ }
163
+
164
+ /** Pull an ai-title record out of a JSONL line, if present. */
165
+ export function extractAiTitle(obj) {
166
+ if (
167
+ obj &&
168
+ obj.type === 'ai-title' &&
169
+ typeof obj.aiTitle === 'string' &&
170
+ obj.aiTitle.trim()
171
+ ) {
172
+ return obj.aiTitle.trim();
173
+ }
174
+ return null;
175
+ }
Binary file
package/package.json ADDED
@@ -0,0 +1,70 @@
1
+ {
2
+ "name": "memex-mvp",
3
+ "version": "0.5.0",
4
+ "description": "Local-first MCP server for cross-agent AI memory. One SQLite + FTS5 corpus across Claude Code, Cowork, Cursor, Continue, Zed, Obsidian, and Telegram — passively captured, verbatim, searchable from any MCP-compatible client.",
5
+ "type": "module",
6
+ "main": "server.js",
7
+ "bin": {
8
+ "memex": "./server.js",
9
+ "memex-sync": "./ingest.js",
10
+ "memex-ingest": "./ingest.js",
11
+ "memex-bot": "./bot/index.js"
12
+ },
13
+ "files": [
14
+ "server.js",
15
+ "ingest.js",
16
+ "lib/",
17
+ "bot/",
18
+ "HELP.md",
19
+ "README.md",
20
+ "LICENSE"
21
+ ],
22
+ "scripts": {
23
+ "start": "node server.js",
24
+ "sync": "node ingest.js",
25
+ "ingest": "node ingest.js",
26
+ "bot": "node bot/index.js",
27
+ "test": "node test/parser.test.js && node test/bot-inbox.test.js && node test/search-sort.test.js",
28
+ "prepublishOnly": "npm test"
29
+ },
30
+ "engines": {
31
+ "node": ">=20.0.0 <25.0.0"
32
+ },
33
+ "dependencies": {
34
+ "@modelcontextprotocol/sdk": "^1.0.0",
35
+ "better-sqlite3": "^11.0.0",
36
+ "chokidar": "^3.6.0"
37
+ },
38
+ "keywords": [
39
+ "mcp",
40
+ "mcp-server",
41
+ "claude",
42
+ "claude-code",
43
+ "claude-cowork",
44
+ "cursor",
45
+ "memex",
46
+ "memory",
47
+ "ai-memory",
48
+ "local-first",
49
+ "verbatim",
50
+ "chat-archive",
51
+ "fts5",
52
+ "sqlite",
53
+ "telegram",
54
+ "openclaw"
55
+ ],
56
+ "author": {
57
+ "name": "parallelclaw",
58
+ "email": "sedelev@gmail.com",
59
+ "url": "https://memex.parallelclaw.ai"
60
+ },
61
+ "license": "MIT",
62
+ "homepage": "https://memex.parallelclaw.ai",
63
+ "repository": {
64
+ "type": "git",
65
+ "url": "git+https://github.com/parallelclaw/memex-mvp.git"
66
+ },
67
+ "bugs": {
68
+ "url": "https://github.com/parallelclaw/memex-mvp/issues"
69
+ }
70
+ }