mempalace-code 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mempalace/README.md +40 -0
- mempalace/__init__.py +6 -0
- mempalace/__main__.py +5 -0
- mempalace/cli.py +811 -0
- mempalace/config.py +149 -0
- mempalace/convo_miner.py +415 -0
- mempalace/dialect.py +1075 -0
- mempalace/entity_detector.py +853 -0
- mempalace/entity_registry.py +639 -0
- mempalace/export.py +378 -0
- mempalace/general_extractor.py +521 -0
- mempalace/knowledge_graph.py +410 -0
- mempalace/layers.py +515 -0
- mempalace/mcp_server.py +873 -0
- mempalace/migrate.py +153 -0
- mempalace/miner.py +1285 -0
- mempalace/normalize.py +328 -0
- mempalace/onboarding.py +489 -0
- mempalace/palace_graph.py +225 -0
- mempalace/py.typed +0 -0
- mempalace/room_detector_local.py +310 -0
- mempalace/searcher.py +305 -0
- mempalace/spellcheck.py +269 -0
- mempalace/split_mega_files.py +309 -0
- mempalace/storage.py +807 -0
- mempalace/version.py +3 -0
- mempalace_code-1.0.0.dist-info/METADATA +489 -0
- mempalace_code-1.0.0.dist-info/RECORD +32 -0
- mempalace_code-1.0.0.dist-info/WHEEL +4 -0
- mempalace_code-1.0.0.dist-info/entry_points.txt +2 -0
- mempalace_code-1.0.0.dist-info/licenses/LICENSE +192 -0
- mempalace_code-1.0.0.dist-info/licenses/NOTICE +17 -0
mempalace/cli.py
ADDED
|
@@ -0,0 +1,811 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
MemPalace — Give your AI a memory. No API key required.
|
|
4
|
+
|
|
5
|
+
Two ways to ingest:
|
|
6
|
+
Projects: mempalace mine ~/projects/my_app (code, docs, notes)
|
|
7
|
+
Conversations: mempalace mine ~/chats/ --mode convos (Claude, ChatGPT, Slack)
|
|
8
|
+
|
|
9
|
+
Same palace. Same search. Different ingest strategies.
|
|
10
|
+
|
|
11
|
+
Commands:
|
|
12
|
+
mempalace init <dir> Detect rooms from folder structure
|
|
13
|
+
mempalace split <dir> Split concatenated mega-files into per-session files
|
|
14
|
+
mempalace mine <dir> Mine project files (default)
|
|
15
|
+
mempalace mine <dir> --mode convos Mine conversation exports
|
|
16
|
+
mempalace search "query" Find anything, exact words
|
|
17
|
+
mempalace wake-up Show L0 + L1 wake-up context
|
|
18
|
+
mempalace wake-up --wing my_app Wake-up for a specific project
|
|
19
|
+
mempalace status Show what's been filed
|
|
20
|
+
mempalace diary write --agent <name> --entry "<text>" Write a diary entry
|
|
21
|
+
|
|
22
|
+
Examples:
|
|
23
|
+
mempalace init ~/projects/my_app
|
|
24
|
+
mempalace mine ~/projects/my_app
|
|
25
|
+
mempalace mine ~/chats/claude-sessions --mode convos
|
|
26
|
+
mempalace search "why did we switch to GraphQL"
|
|
27
|
+
mempalace search "pricing discussion" --wing my_app --room costs
|
|
28
|
+
mempalace diary write --agent claude-code --entry "Finished feature X" --topic dev
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import os
|
|
32
|
+
import sys
|
|
33
|
+
import argparse
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
from .config import MempalaceConfig
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def fetch_model(model_name: str, force: bool = False) -> None:
|
|
40
|
+
"""Download *model_name* to the HuggingFace Hub cache.
|
|
41
|
+
|
|
42
|
+
Shared by ``cmd_fetch_model`` and ``cmd_init``. When *force* is True the
|
|
43
|
+
cached model directory is removed before downloading so a fresh copy is
|
|
44
|
+
retrieved.
|
|
45
|
+
"""
|
|
46
|
+
import shutil
|
|
47
|
+
from sentence_transformers import SentenceTransformer
|
|
48
|
+
|
|
49
|
+
# Compute cache dir at call time so HF_HOME env-var changes (e.g. in tests) are respected.
|
|
50
|
+
# huggingface_hub.constants.HF_HUB_CACHE is a module-level string set at import time and
|
|
51
|
+
# does not update when os.environ changes after Python starts.
|
|
52
|
+
hf_home = Path(os.environ.get("HF_HOME", Path.home() / ".cache" / "huggingface"))
|
|
53
|
+
cache_dir = hf_home / "hub"
|
|
54
|
+
# Standard Hub layout: models--{org}--{model}
|
|
55
|
+
model_dir = cache_dir / f"models--sentence-transformers--{model_name}"
|
|
56
|
+
|
|
57
|
+
if force and model_dir.exists():
|
|
58
|
+
print(f" Removing cached model: {model_dir}")
|
|
59
|
+
shutil.rmtree(model_dir)
|
|
60
|
+
|
|
61
|
+
print(f" Downloading model '{model_name}' …")
|
|
62
|
+
SentenceTransformer(model_name)
|
|
63
|
+
|
|
64
|
+
# Report cache location and size
|
|
65
|
+
if model_dir.exists():
|
|
66
|
+
size_bytes = sum(f.stat().st_size for f in model_dir.rglob("*") if f.is_file())
|
|
67
|
+
size_mb = size_bytes / (1024 * 1024)
|
|
68
|
+
print(f" Cached at: {model_dir}")
|
|
69
|
+
print(f" Size on disk: {size_mb:.1f} MB")
|
|
70
|
+
else:
|
|
71
|
+
print(f" Model ready (cache path not found at expected location: {model_dir})")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def cmd_fetch_model(args):
|
|
75
|
+
from .storage import DEFAULT_EMBED_MODEL
|
|
76
|
+
|
|
77
|
+
model_name = args.model or DEFAULT_EMBED_MODEL
|
|
78
|
+
try:
|
|
79
|
+
fetch_model(model_name, force=args.force)
|
|
80
|
+
print(" Done — embedding model is ready for offline use.")
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
print(f" Error downloading model: {exc}", file=sys.stderr)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def cmd_init(args):
|
|
87
|
+
import json
|
|
88
|
+
from pathlib import Path
|
|
89
|
+
from .entity_detector import scan_for_detection, detect_entities, confirm_entities
|
|
90
|
+
from .room_detector_local import detect_rooms_local
|
|
91
|
+
|
|
92
|
+
# Pass 1: auto-detect people and projects from file content
|
|
93
|
+
print(f"\n Scanning for entities in: {args.dir}")
|
|
94
|
+
files = scan_for_detection(args.dir)
|
|
95
|
+
if files:
|
|
96
|
+
print(f" Reading {len(files)} files...")
|
|
97
|
+
detected = detect_entities(files)
|
|
98
|
+
total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"])
|
|
99
|
+
if total > 0:
|
|
100
|
+
confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
|
|
101
|
+
# Save confirmed entities to <project>/entities.json for the miner
|
|
102
|
+
if confirmed["people"] or confirmed["projects"]:
|
|
103
|
+
entities_path = Path(args.dir).expanduser().resolve() / "entities.json"
|
|
104
|
+
with open(entities_path, "w") as f:
|
|
105
|
+
json.dump(confirmed, f, indent=2)
|
|
106
|
+
print(f" Entities saved: {entities_path}")
|
|
107
|
+
else:
|
|
108
|
+
print(" No entities detected — proceeding with directory-based rooms.")
|
|
109
|
+
|
|
110
|
+
# Pass 2: detect rooms from folder structure
|
|
111
|
+
detect_rooms_local(project_dir=args.dir, yes=getattr(args, "yes", False))
|
|
112
|
+
MempalaceConfig().init()
|
|
113
|
+
|
|
114
|
+
if not getattr(args, "skip_model_download", False):
|
|
115
|
+
from .storage import DEFAULT_EMBED_MODEL
|
|
116
|
+
|
|
117
|
+
print("\n Downloading embedding model (~80 MB)…")
|
|
118
|
+
try:
|
|
119
|
+
fetch_model(DEFAULT_EMBED_MODEL)
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
print(f" Warning: model download failed: {exc}", file=sys.stderr)
|
|
122
|
+
print(
|
|
123
|
+
" Run 'mempalace fetch-model' manually when network is available.", file=sys.stderr
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def cmd_mine(args):
|
|
128
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
129
|
+
include_ignored = []
|
|
130
|
+
for raw in args.include_ignored or []:
|
|
131
|
+
include_ignored.extend(part.strip() for part in raw.split(",") if part.strip())
|
|
132
|
+
|
|
133
|
+
if args.mode == "convos":
|
|
134
|
+
from .convo_miner import mine_convos
|
|
135
|
+
|
|
136
|
+
mine_convos(
|
|
137
|
+
convo_dir=args.dir,
|
|
138
|
+
palace_path=palace_path,
|
|
139
|
+
wing=args.wing,
|
|
140
|
+
agent=args.agent,
|
|
141
|
+
limit=args.limit,
|
|
142
|
+
dry_run=args.dry_run,
|
|
143
|
+
extract_mode=args.extract,
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
from .miner import mine
|
|
147
|
+
|
|
148
|
+
mine(
|
|
149
|
+
project_dir=args.dir,
|
|
150
|
+
palace_path=palace_path,
|
|
151
|
+
wing_override=args.wing,
|
|
152
|
+
agent=args.agent,
|
|
153
|
+
limit=args.limit,
|
|
154
|
+
dry_run=args.dry_run,
|
|
155
|
+
respect_gitignore=not args.no_gitignore,
|
|
156
|
+
include_ignored=include_ignored,
|
|
157
|
+
incremental=not args.full,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def cmd_search(args):
|
|
162
|
+
from .searcher import search, SearchError
|
|
163
|
+
|
|
164
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
165
|
+
try:
|
|
166
|
+
search(
|
|
167
|
+
query=args.query,
|
|
168
|
+
palace_path=palace_path,
|
|
169
|
+
wing=args.wing,
|
|
170
|
+
room=args.room,
|
|
171
|
+
n_results=args.results,
|
|
172
|
+
)
|
|
173
|
+
except SearchError:
|
|
174
|
+
sys.exit(1)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def cmd_wakeup(args):
|
|
178
|
+
"""Show L0 (identity) + L1 (essential story) — the wake-up context."""
|
|
179
|
+
from .layers import MemoryStack
|
|
180
|
+
|
|
181
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
182
|
+
stack = MemoryStack(palace_path=palace_path)
|
|
183
|
+
|
|
184
|
+
text = stack.wake_up(wing=args.wing)
|
|
185
|
+
tokens = len(text) // 4
|
|
186
|
+
print(f"Wake-up text (~{tokens} tokens):")
|
|
187
|
+
print("=" * 50)
|
|
188
|
+
print(text)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def cmd_split(args):
|
|
192
|
+
"""Split concatenated transcript mega-files into per-session files."""
|
|
193
|
+
from .split_mega_files import main as split_main
|
|
194
|
+
import sys
|
|
195
|
+
|
|
196
|
+
# Rebuild argv for split_mega_files argparse
|
|
197
|
+
argv = ["--source", args.dir]
|
|
198
|
+
if args.output_dir:
|
|
199
|
+
argv += ["--output-dir", args.output_dir]
|
|
200
|
+
if args.dry_run:
|
|
201
|
+
argv.append("--dry-run")
|
|
202
|
+
if args.min_sessions != 2:
|
|
203
|
+
argv += ["--min-sessions", str(args.min_sessions)]
|
|
204
|
+
|
|
205
|
+
old_argv = sys.argv
|
|
206
|
+
sys.argv = ["mempalace split"] + argv
|
|
207
|
+
try:
|
|
208
|
+
split_main()
|
|
209
|
+
finally:
|
|
210
|
+
sys.argv = old_argv
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def cmd_status(args):
|
|
214
|
+
from .miner import status
|
|
215
|
+
|
|
216
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
217
|
+
status(palace_path=palace_path)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def cmd_diary_write(args):
|
|
221
|
+
import hashlib
|
|
222
|
+
from datetime import datetime
|
|
223
|
+
from .storage import open_store
|
|
224
|
+
from .version import __version__
|
|
225
|
+
|
|
226
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
227
|
+
|
|
228
|
+
agent_name = args.agent
|
|
229
|
+
entry = args.entry
|
|
230
|
+
topic = args.topic
|
|
231
|
+
wing = args.wing or f"wing_{agent_name.lower().replace(' ', '_')}"
|
|
232
|
+
room = "diary"
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
store = open_store(palace_path, create=True)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
print(f"Cannot open palace at {palace_path}: {e}", file=sys.stderr)
|
|
238
|
+
sys.exit(1)
|
|
239
|
+
|
|
240
|
+
now = datetime.now()
|
|
241
|
+
entry_id = (
|
|
242
|
+
f"diary_{wing}_{now.strftime('%Y%m%d_%H%M%S')}"
|
|
243
|
+
f"_{hashlib.md5(entry[:50].encode()).hexdigest()[:8]}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
store.add(
|
|
248
|
+
ids=[entry_id],
|
|
249
|
+
documents=[entry],
|
|
250
|
+
metadatas=[
|
|
251
|
+
{
|
|
252
|
+
"wing": wing,
|
|
253
|
+
"room": room,
|
|
254
|
+
"hall": "hall_diary",
|
|
255
|
+
"topic": topic,
|
|
256
|
+
"type": "diary_entry",
|
|
257
|
+
"agent": agent_name,
|
|
258
|
+
"filed_at": now.isoformat(),
|
|
259
|
+
"date": now.strftime("%Y-%m-%d"),
|
|
260
|
+
"extractor_version": __version__,
|
|
261
|
+
"chunker_strategy": "diary_v1",
|
|
262
|
+
}
|
|
263
|
+
],
|
|
264
|
+
)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
print(str(e), file=sys.stderr)
|
|
267
|
+
sys.exit(1)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def cmd_diary(args):
|
|
271
|
+
if args.diary_command == "write":
|
|
272
|
+
cmd_diary_write(args)
|
|
273
|
+
else:
|
|
274
|
+
args._diary_parser.print_help()
|
|
275
|
+
sys.exit(2)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def cmd_migrate_storage(args):
|
|
279
|
+
"""Migrate a ChromaDB palace to a LanceDB palace."""
|
|
280
|
+
from .migrate import migrate_chroma_to_lance, VerificationError
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
src_count, dst_count = migrate_chroma_to_lance(
|
|
284
|
+
src_path=args.src_palace,
|
|
285
|
+
dst_path=args.dst_palace,
|
|
286
|
+
backup_dir=args.backup_dir,
|
|
287
|
+
force=args.force,
|
|
288
|
+
embed_model=args.embed_model,
|
|
289
|
+
verify=args.verify,
|
|
290
|
+
no_backup=False,
|
|
291
|
+
)
|
|
292
|
+
except VerificationError as e:
|
|
293
|
+
print(f"Verification failed: {e}", file=sys.stderr)
|
|
294
|
+
sys.exit(1)
|
|
295
|
+
except RuntimeError as e:
|
|
296
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
297
|
+
sys.exit(1)
|
|
298
|
+
|
|
299
|
+
print(f"Source drawers: {src_count} Destination drawers: {dst_count}")
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def cmd_repair(args):
|
|
303
|
+
"""Rebuild palace — extract all drawers, backup, and re-insert."""
|
|
304
|
+
import shutil
|
|
305
|
+
from .storage import open_store
|
|
306
|
+
|
|
307
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
308
|
+
|
|
309
|
+
if not os.path.isdir(palace_path):
|
|
310
|
+
print(f"\n No palace found at {palace_path}")
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
print(f"\n{'=' * 55}")
|
|
314
|
+
print(" MemPalace Repair")
|
|
315
|
+
print(f"{'=' * 55}\n")
|
|
316
|
+
print(f" Palace: {palace_path}")
|
|
317
|
+
|
|
318
|
+
# Try to read existing drawers
|
|
319
|
+
try:
|
|
320
|
+
store = open_store(palace_path, create=False)
|
|
321
|
+
total = store.count()
|
|
322
|
+
print(f" Drawers found: {total}")
|
|
323
|
+
except Exception as e:
|
|
324
|
+
print(f" Error reading palace: {e}")
|
|
325
|
+
print(" Cannot recover — palace may need to be re-mined from source files.")
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
if total == 0:
|
|
329
|
+
print(" Nothing to repair.")
|
|
330
|
+
return
|
|
331
|
+
|
|
332
|
+
# Extract all drawers in batches
|
|
333
|
+
print("\n Extracting drawers...")
|
|
334
|
+
batch_size = 5000
|
|
335
|
+
all_ids = []
|
|
336
|
+
all_docs = []
|
|
337
|
+
all_metas = []
|
|
338
|
+
offset = 0
|
|
339
|
+
while offset < total:
|
|
340
|
+
batch = store.get(limit=batch_size, offset=offset, include=["documents", "metadatas"])
|
|
341
|
+
all_ids.extend(batch["ids"])
|
|
342
|
+
all_docs.extend(batch["documents"])
|
|
343
|
+
all_metas.extend(batch["metadatas"])
|
|
344
|
+
offset += batch_size
|
|
345
|
+
print(f" Extracted {len(all_ids)} drawers")
|
|
346
|
+
|
|
347
|
+
# Backup and rebuild
|
|
348
|
+
backup_path = palace_path + ".backup"
|
|
349
|
+
if os.path.exists(backup_path):
|
|
350
|
+
shutil.rmtree(backup_path)
|
|
351
|
+
print(f" Backing up to {backup_path}...")
|
|
352
|
+
shutil.copytree(palace_path, backup_path)
|
|
353
|
+
|
|
354
|
+
print(" Rebuilding palace from extracted data...")
|
|
355
|
+
# Remove old data and recreate
|
|
356
|
+
shutil.rmtree(palace_path)
|
|
357
|
+
new_store = open_store(palace_path, create=True)
|
|
358
|
+
|
|
359
|
+
filed = 0
|
|
360
|
+
for i in range(0, len(all_ids), batch_size):
|
|
361
|
+
batch_ids = all_ids[i : i + batch_size]
|
|
362
|
+
batch_docs = all_docs[i : i + batch_size]
|
|
363
|
+
batch_metas = all_metas[i : i + batch_size]
|
|
364
|
+
new_store.add(documents=batch_docs, ids=batch_ids, metadatas=batch_metas)
|
|
365
|
+
filed += len(batch_ids)
|
|
366
|
+
print(f" Re-filed {filed}/{len(all_ids)} drawers...")
|
|
367
|
+
|
|
368
|
+
print(f"\n Repair complete. {filed} drawers rebuilt.")
|
|
369
|
+
print(f" Backup saved at {backup_path}")
|
|
370
|
+
print(f"\n{'=' * 55}\n")
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def cmd_compress(args):
|
|
374
|
+
"""Compress drawers in a wing using AAAK Dialect."""
|
|
375
|
+
from .storage import open_store
|
|
376
|
+
from .dialect import Dialect
|
|
377
|
+
|
|
378
|
+
palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
|
|
379
|
+
|
|
380
|
+
# Load dialect (with optional entity config)
|
|
381
|
+
config_path = args.config
|
|
382
|
+
if not config_path:
|
|
383
|
+
for candidate in ["entities.json", os.path.join(palace_path, "entities.json")]:
|
|
384
|
+
if os.path.exists(candidate):
|
|
385
|
+
config_path = candidate
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
if config_path and os.path.exists(config_path):
|
|
389
|
+
dialect = Dialect.from_config(config_path)
|
|
390
|
+
print(f" Loaded entity config: {config_path}")
|
|
391
|
+
else:
|
|
392
|
+
dialect = Dialect()
|
|
393
|
+
|
|
394
|
+
# Connect to palace
|
|
395
|
+
try:
|
|
396
|
+
store = open_store(palace_path, create=False)
|
|
397
|
+
except Exception:
|
|
398
|
+
print(f"\n No palace found at {palace_path}")
|
|
399
|
+
print(" Run: mempalace init <dir> then mempalace mine <dir>")
|
|
400
|
+
sys.exit(1)
|
|
401
|
+
|
|
402
|
+
# Query drawers in batches
|
|
403
|
+
where = {"wing": args.wing} if args.wing else None
|
|
404
|
+
_BATCH = 500
|
|
405
|
+
docs, metas, ids = [], [], []
|
|
406
|
+
offset = 0
|
|
407
|
+
while True:
|
|
408
|
+
try:
|
|
409
|
+
batch = store.get(
|
|
410
|
+
include=["documents", "metadatas"], limit=_BATCH, offset=offset, where=where
|
|
411
|
+
)
|
|
412
|
+
except Exception as e:
|
|
413
|
+
if not docs:
|
|
414
|
+
print(f"\n Error reading drawers: {e}")
|
|
415
|
+
sys.exit(1)
|
|
416
|
+
break
|
|
417
|
+
batch_docs = batch.get("documents", [])
|
|
418
|
+
if not batch_docs:
|
|
419
|
+
break
|
|
420
|
+
docs.extend(batch_docs)
|
|
421
|
+
metas.extend(batch.get("metadatas", []))
|
|
422
|
+
ids.extend(batch.get("ids", []))
|
|
423
|
+
offset += len(batch_docs)
|
|
424
|
+
if len(batch_docs) < _BATCH:
|
|
425
|
+
break
|
|
426
|
+
|
|
427
|
+
if not docs:
|
|
428
|
+
wing_label = f" in wing '{args.wing}'" if args.wing else ""
|
|
429
|
+
print(f"\n No drawers found{wing_label}.")
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
print(
|
|
433
|
+
f"\n Compressing {len(docs)} drawers"
|
|
434
|
+
+ (f" in wing '{args.wing}'" if args.wing else "")
|
|
435
|
+
+ "..."
|
|
436
|
+
)
|
|
437
|
+
print()
|
|
438
|
+
|
|
439
|
+
total_original = 0
|
|
440
|
+
total_compressed = 0
|
|
441
|
+
compressed_entries = []
|
|
442
|
+
|
|
443
|
+
for doc, meta, doc_id in zip(docs, metas, ids):
|
|
444
|
+
compressed = dialect.compress(doc, metadata=meta)
|
|
445
|
+
stats = dialect.compression_stats(doc, compressed)
|
|
446
|
+
|
|
447
|
+
total_original += stats["original_chars"]
|
|
448
|
+
total_compressed += stats["compressed_chars"]
|
|
449
|
+
|
|
450
|
+
compressed_entries.append((doc_id, compressed, meta, stats))
|
|
451
|
+
|
|
452
|
+
if args.dry_run:
|
|
453
|
+
wing_name = meta.get("wing", "?")
|
|
454
|
+
room_name = meta.get("room", "?")
|
|
455
|
+
source = Path(meta.get("source_file", "?")).name
|
|
456
|
+
print(f" [{wing_name}/{room_name}] {source}")
|
|
457
|
+
print(
|
|
458
|
+
f" {stats['original_tokens']}t -> {stats['compressed_tokens']}t ({stats['ratio']:.1f}x)"
|
|
459
|
+
)
|
|
460
|
+
print(f" {compressed}")
|
|
461
|
+
print()
|
|
462
|
+
|
|
463
|
+
# Store compressed versions (unless dry-run)
|
|
464
|
+
if not args.dry_run:
|
|
465
|
+
try:
|
|
466
|
+
# Upsert compressed drawers back into the main store
|
|
467
|
+
for doc_id, compressed, meta, stats in compressed_entries:
|
|
468
|
+
comp_meta = dict(meta)
|
|
469
|
+
comp_meta["compression_ratio"] = round(stats["ratio"], 1)
|
|
470
|
+
comp_meta["original_tokens"] = stats["original_tokens"]
|
|
471
|
+
store.upsert(
|
|
472
|
+
ids=[doc_id],
|
|
473
|
+
documents=[compressed],
|
|
474
|
+
metadatas=[comp_meta],
|
|
475
|
+
)
|
|
476
|
+
print(f" Stored {len(compressed_entries)} compressed drawers.")
|
|
477
|
+
except Exception as e:
|
|
478
|
+
print(f" Error storing compressed drawers: {e}")
|
|
479
|
+
sys.exit(1)
|
|
480
|
+
|
|
481
|
+
# Summary
|
|
482
|
+
ratio = total_original / max(total_compressed, 1)
|
|
483
|
+
orig_tokens = Dialect.count_tokens("x" * total_original)
|
|
484
|
+
comp_tokens = Dialect.count_tokens("x" * total_compressed)
|
|
485
|
+
print(f" Total: {orig_tokens:,}t -> {comp_tokens:,}t ({ratio:.1f}x compression)")
|
|
486
|
+
if args.dry_run:
|
|
487
|
+
print(" (dry run -- nothing stored)")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def cmd_export(args):
|
|
491
|
+
from .storage import open_store
|
|
492
|
+
from .knowledge_graph import KnowledgeGraph
|
|
493
|
+
from .export import write_jsonl
|
|
494
|
+
|
|
495
|
+
palace_path = args.palace or MempalaceConfig().palace_path
|
|
496
|
+
store = open_store(palace_path, create=False)
|
|
497
|
+
kg = KnowledgeGraph() if args.with_kg else None
|
|
498
|
+
|
|
499
|
+
print(f" Exporting from: {palace_path}")
|
|
500
|
+
summary = write_jsonl(
|
|
501
|
+
path=args.out,
|
|
502
|
+
store=store,
|
|
503
|
+
kg=kg,
|
|
504
|
+
only_manual=args.only_manual,
|
|
505
|
+
wing=args.wing,
|
|
506
|
+
room=args.room,
|
|
507
|
+
since=args.since,
|
|
508
|
+
include_vectors=args.with_embeddings,
|
|
509
|
+
include_kg=args.with_kg,
|
|
510
|
+
pretty=args.pretty,
|
|
511
|
+
palace_path=palace_path,
|
|
512
|
+
)
|
|
513
|
+
print(
|
|
514
|
+
f" Exported {summary['drawer_count']} drawers, {summary['kg_count']} KG triples → {args.out}"
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def cmd_import(args):
|
|
519
|
+
from .storage import open_store
|
|
520
|
+
from .knowledge_graph import KnowledgeGraph
|
|
521
|
+
from .export import import_jsonl
|
|
522
|
+
|
|
523
|
+
palace_path = args.palace or MempalaceConfig().palace_path
|
|
524
|
+
store = open_store(palace_path, create=True)
|
|
525
|
+
kg = None if args.skip_kg else KnowledgeGraph()
|
|
526
|
+
|
|
527
|
+
print(f" Importing into: {palace_path}")
|
|
528
|
+
if args.dry_run:
|
|
529
|
+
print(" (dry run — nothing will be written)")
|
|
530
|
+
|
|
531
|
+
summary = import_jsonl(
|
|
532
|
+
path=args.jsonl_file,
|
|
533
|
+
store=store,
|
|
534
|
+
kg=kg,
|
|
535
|
+
skip_dedup=args.skip_dedup,
|
|
536
|
+
skip_kg=args.skip_kg,
|
|
537
|
+
dry_run=args.dry_run,
|
|
538
|
+
wing_override=args.wing_override,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
print(f" Imported drawers: {summary['imported_drawers']}")
|
|
542
|
+
print(f" Skipped duplicates: {summary['skipped_duplicates']}")
|
|
543
|
+
print(f" Imported KG triples:{summary['imported_triples']}")
|
|
544
|
+
if args.dry_run:
|
|
545
|
+
print(" (dry run — no changes made)")
|
|
546
|
+
for w in summary["warnings"]:
|
|
547
|
+
print(f" WARNING: {w}")
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def main():
|
|
551
|
+
parser = argparse.ArgumentParser(
|
|
552
|
+
description="MemPalace — Give your AI a memory. No API key required.",
|
|
553
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
554
|
+
epilog=__doc__,
|
|
555
|
+
)
|
|
556
|
+
parser.add_argument(
|
|
557
|
+
"--palace",
|
|
558
|
+
default=None,
|
|
559
|
+
help="Where the palace lives (default: from ~/.mempalace/config.json or ~/.mempalace/palace)",
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
sub = parser.add_subparsers(dest="command")
|
|
563
|
+
|
|
564
|
+
# init
|
|
565
|
+
p_init = sub.add_parser("init", help="Detect rooms from your folder structure")
|
|
566
|
+
p_init.add_argument("dir", help="Project directory to set up")
|
|
567
|
+
p_init.add_argument(
|
|
568
|
+
"--yes", action="store_true", help="Auto-accept all detected entities (non-interactive)"
|
|
569
|
+
)
|
|
570
|
+
p_init.add_argument(
|
|
571
|
+
"--skip-model-download",
|
|
572
|
+
action="store_true",
|
|
573
|
+
dest="skip_model_download",
|
|
574
|
+
help="Skip automatic embedding model download (run 'fetch-model' later)",
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# mine
|
|
578
|
+
p_mine = sub.add_parser("mine", help="Mine files into the palace")
|
|
579
|
+
p_mine.add_argument("dir", help="Directory to mine")
|
|
580
|
+
p_mine.add_argument(
|
|
581
|
+
"--mode",
|
|
582
|
+
choices=["projects", "convos"],
|
|
583
|
+
default="projects",
|
|
584
|
+
help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports",
|
|
585
|
+
)
|
|
586
|
+
p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)")
|
|
587
|
+
p_mine.add_argument(
|
|
588
|
+
"--no-gitignore",
|
|
589
|
+
action="store_true",
|
|
590
|
+
help="Don't respect .gitignore files when scanning project files",
|
|
591
|
+
)
|
|
592
|
+
p_mine.add_argument(
|
|
593
|
+
"--include-ignored",
|
|
594
|
+
action="append",
|
|
595
|
+
default=[],
|
|
596
|
+
help="Always scan these project-relative paths even if ignored; repeat or pass comma-separated paths",
|
|
597
|
+
)
|
|
598
|
+
p_mine.add_argument(
|
|
599
|
+
"--agent",
|
|
600
|
+
default="mempalace",
|
|
601
|
+
help="Your name — recorded on every drawer (default: mempalace)",
|
|
602
|
+
)
|
|
603
|
+
p_mine.add_argument("--limit", type=int, default=0, help="Max files to process (0 = all)")
|
|
604
|
+
p_mine.add_argument(
|
|
605
|
+
"--dry-run", action="store_true", help="Show what would be filed without filing"
|
|
606
|
+
)
|
|
607
|
+
p_mine.add_argument(
|
|
608
|
+
"--full",
|
|
609
|
+
action="store_true",
|
|
610
|
+
help="Force full rebuild — re-mine all files even if content is unchanged",
|
|
611
|
+
)
|
|
612
|
+
p_mine.add_argument(
|
|
613
|
+
"--extract",
|
|
614
|
+
choices=["exchange", "general"],
|
|
615
|
+
default="exchange",
|
|
616
|
+
help="Extraction strategy for convos mode: 'exchange' (default) or 'general' (5 memory types)",
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# search
|
|
620
|
+
p_search = sub.add_parser("search", help="Find anything, exact words")
|
|
621
|
+
p_search.add_argument("query", help="What to search for")
|
|
622
|
+
p_search.add_argument("--wing", default=None, help="Limit to one project")
|
|
623
|
+
p_search.add_argument("--room", default=None, help="Limit to one room")
|
|
624
|
+
p_search.add_argument("--results", type=int, default=5, help="Number of results")
|
|
625
|
+
|
|
626
|
+
# compress
|
|
627
|
+
p_compress = sub.add_parser(
|
|
628
|
+
"compress", help="Compress drawers using AAAK Dialect (~30x reduction)"
|
|
629
|
+
)
|
|
630
|
+
p_compress.add_argument("--wing", default=None, help="Wing to compress (default: all wings)")
|
|
631
|
+
p_compress.add_argument(
|
|
632
|
+
"--dry-run", action="store_true", help="Preview compression without storing"
|
|
633
|
+
)
|
|
634
|
+
p_compress.add_argument(
|
|
635
|
+
"--config", default=None, help="Entity config JSON (e.g. entities.json)"
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
# wake-up
|
|
639
|
+
p_wakeup = sub.add_parser("wake-up", help="Show L0 + L1 wake-up context (~600-900 tokens)")
|
|
640
|
+
p_wakeup.add_argument("--wing", default=None, help="Wake-up for a specific project/wing")
|
|
641
|
+
|
|
642
|
+
# split
|
|
643
|
+
p_split = sub.add_parser(
|
|
644
|
+
"split",
|
|
645
|
+
help="Split concatenated transcript mega-files into per-session files (run before mine)",
|
|
646
|
+
)
|
|
647
|
+
p_split.add_argument("dir", help="Directory containing transcript files")
|
|
648
|
+
p_split.add_argument(
|
|
649
|
+
"--output-dir",
|
|
650
|
+
default=None,
|
|
651
|
+
help="Write split files here (default: same directory as source files)",
|
|
652
|
+
)
|
|
653
|
+
p_split.add_argument(
|
|
654
|
+
"--dry-run",
|
|
655
|
+
action="store_true",
|
|
656
|
+
help="Show what would be split without writing files",
|
|
657
|
+
)
|
|
658
|
+
p_split.add_argument(
|
|
659
|
+
"--min-sessions",
|
|
660
|
+
type=int,
|
|
661
|
+
default=2,
|
|
662
|
+
help="Only split files containing at least N sessions (default: 2)",
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# diary
|
|
666
|
+
p_diary = sub.add_parser("diary", help="Diary commands")
|
|
667
|
+
diary_sub = p_diary.add_subparsers(dest="diary_command")
|
|
668
|
+
|
|
669
|
+
p_diary_write = diary_sub.add_parser("write", help="Write a diary entry")
|
|
670
|
+
p_diary_write.add_argument("--agent", required=True, help="Agent name (e.g. claude-code)")
|
|
671
|
+
p_diary_write.add_argument(
|
|
672
|
+
"--entry", required=True, help="Diary entry content (stored verbatim)"
|
|
673
|
+
)
|
|
674
|
+
p_diary_write.add_argument("--topic", default="general", help="Topic tag (default: general)")
|
|
675
|
+
p_diary_write.add_argument(
|
|
676
|
+
"--wing", default=None, help="Override target wing (default: wing_<agent>)"
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
# migrate-storage
|
|
680
|
+
p_migrate = sub.add_parser(
|
|
681
|
+
"migrate-storage",
|
|
682
|
+
help="Migrate a ChromaDB palace to LanceDB (requires mempalace[chroma])",
|
|
683
|
+
)
|
|
684
|
+
p_migrate.add_argument("src_palace", help="Source ChromaDB palace path")
|
|
685
|
+
p_migrate.add_argument("dst_palace", help="Destination LanceDB palace path")
|
|
686
|
+
p_migrate.add_argument(
|
|
687
|
+
"--backup-dir",
|
|
688
|
+
default=None,
|
|
689
|
+
help="Directory for the source backup tar.gz (default: parent of src_palace)",
|
|
690
|
+
)
|
|
691
|
+
p_migrate.add_argument(
|
|
692
|
+
"--force",
|
|
693
|
+
action="store_true",
|
|
694
|
+
help="Allow appending to a non-empty destination palace",
|
|
695
|
+
)
|
|
696
|
+
p_migrate.add_argument(
|
|
697
|
+
"--embed-model",
|
|
698
|
+
default=None,
|
|
699
|
+
help="Embedding model for the destination (default: all-MiniLM-L6-v2)",
|
|
700
|
+
)
|
|
701
|
+
p_migrate.add_argument(
|
|
702
|
+
"--verify",
|
|
703
|
+
action="store_true",
|
|
704
|
+
help="Verify per-wing counts after migration; exit non-zero on mismatch",
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
# repair
|
|
708
|
+
sub.add_parser(
|
|
709
|
+
"repair",
|
|
710
|
+
help="Rebuild palace vector index from stored data (fixes segfaults after corruption)",
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
# status
|
|
714
|
+
sub.add_parser("status", help="Show what's been filed")
|
|
715
|
+
|
|
716
|
+
# fetch-model
|
|
717
|
+
p_fetch = sub.add_parser("fetch-model", help="Download the embedding model (~80 MB)")
|
|
718
|
+
p_fetch.add_argument(
|
|
719
|
+
"--model",
|
|
720
|
+
default=None,
|
|
721
|
+
help="Model name (default: all-MiniLM-L6-v2)",
|
|
722
|
+
)
|
|
723
|
+
p_fetch.add_argument(
|
|
724
|
+
"--force",
|
|
725
|
+
action="store_true",
|
|
726
|
+
help="Re-download even if already cached",
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
# export
|
|
730
|
+
p_export = sub.add_parser("export", help="Export drawers (and KG) to a JSONL file for backup")
|
|
731
|
+
p_export.add_argument(
|
|
732
|
+
"--out",
|
|
733
|
+
required=True,
|
|
734
|
+
metavar="FILE",
|
|
735
|
+
help="Output JSONL file path (use '-' for stdout)",
|
|
736
|
+
)
|
|
737
|
+
p_export.add_argument(
|
|
738
|
+
"--only-manual",
|
|
739
|
+
action="store_true",
|
|
740
|
+
help="Export only manually-added drawers (chunker_strategy in manual_v1, diary_v1)",
|
|
741
|
+
)
|
|
742
|
+
p_export.add_argument("--wing", default=None, help="Limit export to one wing")
|
|
743
|
+
p_export.add_argument("--room", default=None, help="Limit export to one room")
|
|
744
|
+
p_export.add_argument(
|
|
745
|
+
"--since",
|
|
746
|
+
default=None,
|
|
747
|
+
metavar="DATE",
|
|
748
|
+
help="Export only drawers filed on or after this ISO date (e.g. 2026-01-01)",
|
|
749
|
+
)
|
|
750
|
+
p_export.add_argument("--with-kg", action="store_true", help="Include KG triples in export")
|
|
751
|
+
p_export.add_argument(
|
|
752
|
+
"--with-embeddings",
|
|
753
|
+
action="store_true",
|
|
754
|
+
help="Include raw embedding vectors (larger file)",
|
|
755
|
+
)
|
|
756
|
+
p_export.add_argument("--pretty", action="store_true", help="Pretty-print JSON (larger file)")
|
|
757
|
+
|
|
758
|
+
# import
|
|
759
|
+
p_import = sub.add_parser("import", help="Import drawers (and KG) from a JSONL export file")
|
|
760
|
+
p_import.add_argument("jsonl_file", help="JSONL export file to import (use '-' for stdin)")
|
|
761
|
+
p_import.add_argument(
|
|
762
|
+
"--skip-dedup",
|
|
763
|
+
action="store_true",
|
|
764
|
+
help="Skip duplicate detection (import all records regardless of similarity)",
|
|
765
|
+
)
|
|
766
|
+
p_import.add_argument("--skip-kg", action="store_true", help="Skip KG triple import")
|
|
767
|
+
p_import.add_argument(
|
|
768
|
+
"--dry-run",
|
|
769
|
+
action="store_true",
|
|
770
|
+
help="Preview what would be imported without writing anything",
|
|
771
|
+
)
|
|
772
|
+
p_import.add_argument(
|
|
773
|
+
"--wing-override",
|
|
774
|
+
default=None,
|
|
775
|
+
metavar="WING",
|
|
776
|
+
help="Override the wing for all imported drawers",
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
args = parser.parse_args()
|
|
780
|
+
|
|
781
|
+
if not args.command:
|
|
782
|
+
parser.print_help()
|
|
783
|
+
return
|
|
784
|
+
|
|
785
|
+
if args.command == "diary" and not args.diary_command:
|
|
786
|
+
p_diary.print_help()
|
|
787
|
+
sys.exit(2)
|
|
788
|
+
|
|
789
|
+
if args.command == "diary":
|
|
790
|
+
args._diary_parser = p_diary
|
|
791
|
+
|
|
792
|
+
dispatch = {
|
|
793
|
+
"init": cmd_init,
|
|
794
|
+
"mine": cmd_mine,
|
|
795
|
+
"split": cmd_split,
|
|
796
|
+
"search": cmd_search,
|
|
797
|
+
"compress": cmd_compress,
|
|
798
|
+
"wake-up": cmd_wakeup,
|
|
799
|
+
"migrate-storage": cmd_migrate_storage,
|
|
800
|
+
"repair": cmd_repair,
|
|
801
|
+
"status": cmd_status,
|
|
802
|
+
"diary": cmd_diary,
|
|
803
|
+
"fetch-model": cmd_fetch_model,
|
|
804
|
+
"export": cmd_export,
|
|
805
|
+
"import": cmd_import,
|
|
806
|
+
}
|
|
807
|
+
dispatch[args.command](args)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
if __name__ == "__main__":
|
|
811
|
+
main()
|