mempalace-code 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mempalace/config.py ADDED
@@ -0,0 +1,149 @@
1
+ """
2
+ MemPalace configuration system.
3
+
4
+ Priority: env vars > config file (~/.mempalace/config.json) > defaults
5
+ """
6
+
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+
11
+ DEFAULT_PALACE_PATH = os.path.expanduser("~/.mempalace/palace")
12
+ DEFAULT_COLLECTION_NAME = "mempalace_drawers"
13
+
14
+ DEFAULT_TOPIC_WINGS = [
15
+ "emotions",
16
+ "consciousness",
17
+ "memory",
18
+ "technical",
19
+ "identity",
20
+ "family",
21
+ "creative",
22
+ ]
23
+
24
+ DEFAULT_HALL_KEYWORDS = {
25
+ "emotions": [
26
+ "scared",
27
+ "afraid",
28
+ "worried",
29
+ "happy",
30
+ "sad",
31
+ "love",
32
+ "hate",
33
+ "feel",
34
+ "cry",
35
+ "tears",
36
+ ],
37
+ "consciousness": [
38
+ "consciousness",
39
+ "conscious",
40
+ "aware",
41
+ "real",
42
+ "genuine",
43
+ "soul",
44
+ "exist",
45
+ "alive",
46
+ ],
47
+ "memory": ["memory", "remember", "forget", "recall", "archive", "palace", "store"],
48
+ "technical": [
49
+ "code",
50
+ "python",
51
+ "script",
52
+ "bug",
53
+ "error",
54
+ "function",
55
+ "api",
56
+ "database",
57
+ "server",
58
+ ],
59
+ "identity": ["identity", "name", "who am i", "persona", "self"],
60
+ "family": ["family", "kids", "children", "daughter", "son", "parent", "mother", "father"],
61
+ "creative": ["game", "gameplay", "player", "app", "design", "art", "music", "story"],
62
+ }
63
+
64
+
65
+ class MempalaceConfig:
66
+ """Configuration manager for MemPalace.
67
+
68
+ Load order: env vars > config file > defaults.
69
+ """
70
+
71
+ def __init__(self, config_dir=None):
72
+ """Initialize config.
73
+
74
+ Args:
75
+ config_dir: Override config directory (useful for testing).
76
+ Defaults to ~/.mempalace.
77
+ """
78
+ self._config_dir = (
79
+ Path(config_dir) if config_dir else Path(os.path.expanduser("~/.mempalace"))
80
+ )
81
+ self._config_file = self._config_dir / "config.json"
82
+ self._people_map_file = self._config_dir / "people_map.json"
83
+ self._file_config = {}
84
+
85
+ if self._config_file.exists():
86
+ try:
87
+ with open(self._config_file, "r") as f:
88
+ self._file_config = json.load(f)
89
+ except (json.JSONDecodeError, OSError):
90
+ self._file_config = {}
91
+
92
+ @property
93
+ def palace_path(self):
94
+ """Path to the memory palace data directory."""
95
+ env_val = os.environ.get("MEMPALACE_PALACE_PATH") or os.environ.get("MEMPAL_PALACE_PATH")
96
+ if env_val:
97
+ return env_val
98
+ return self._file_config.get("palace_path", DEFAULT_PALACE_PATH)
99
+
100
+ @property
101
+ def collection_name(self):
102
+ """ChromaDB collection name."""
103
+ return self._file_config.get("collection_name", DEFAULT_COLLECTION_NAME)
104
+
105
+ @property
106
+ def people_map(self):
107
+ """Mapping of name variants to canonical names."""
108
+ if self._people_map_file.exists():
109
+ try:
110
+ with open(self._people_map_file, "r") as f:
111
+ return json.load(f)
112
+ except (json.JSONDecodeError, OSError):
113
+ pass
114
+ return self._file_config.get("people_map", {})
115
+
116
+ @property
117
+ def topic_wings(self):
118
+ """List of topic wing names."""
119
+ return self._file_config.get("topic_wings", DEFAULT_TOPIC_WINGS)
120
+
121
+ @property
122
+ def hall_keywords(self):
123
+ """Mapping of hall names to keyword lists."""
124
+ return self._file_config.get("hall_keywords", DEFAULT_HALL_KEYWORDS)
125
+
126
+ def init(self):
127
+ """Create config directory and write default config.json if it doesn't exist."""
128
+ self._config_dir.mkdir(parents=True, exist_ok=True)
129
+ if not self._config_file.exists():
130
+ default_config = {
131
+ "palace_path": DEFAULT_PALACE_PATH,
132
+ "collection_name": DEFAULT_COLLECTION_NAME,
133
+ "topic_wings": DEFAULT_TOPIC_WINGS,
134
+ "hall_keywords": DEFAULT_HALL_KEYWORDS,
135
+ }
136
+ with open(self._config_file, "w") as f:
137
+ json.dump(default_config, f, indent=2)
138
+ return self._config_file
139
+
140
+ def save_people_map(self, people_map):
141
+ """Write people_map.json to config directory.
142
+
143
+ Args:
144
+ people_map: Dict mapping name variants to canonical names.
145
+ """
146
+ self._config_dir.mkdir(parents=True, exist_ok=True)
147
+ with open(self._people_map_file, "w") as f:
148
+ json.dump(people_map, f, indent=2)
149
+ return self._people_map_file
@@ -0,0 +1,415 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ convo_miner.py — Mine conversations into the palace.
4
+
5
+ Ingests chat exports (Claude Code, ChatGPT, Slack, plain text transcripts).
6
+ Normalizes format, chunks by exchange pair (Q+A = one unit), files to palace.
7
+
8
+ Same palace as project mining. Different ingest strategy.
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import time
14
+ import hashlib
15
+ from pathlib import Path
16
+ from datetime import datetime
17
+ from collections import defaultdict
18
+
19
+ from .storage import open_store
20
+ from .normalize import normalize
21
+ from .miner import BATCH_SIZE, add_drawers_batch
22
+ from .version import __version__
23
+
24
+
25
+ # File types that might contain conversations
26
+ CONVO_EXTENSIONS = {
27
+ ".txt",
28
+ ".md",
29
+ ".json",
30
+ ".jsonl",
31
+ }
32
+
33
+ SKIP_DIRS = {
34
+ ".git",
35
+ "node_modules",
36
+ "__pycache__",
37
+ ".venv",
38
+ "venv",
39
+ "env",
40
+ "dist",
41
+ "build",
42
+ ".next",
43
+ ".mempalace",
44
+ "tool-results",
45
+ "memory",
46
+ }
47
+
48
+ MIN_CHUNK_SIZE = 30
49
+
50
+
51
+ # =============================================================================
52
+ # CHUNKING — exchange pairs for conversations
53
+ # =============================================================================
54
+
55
+
56
+ def chunk_exchanges(content: str) -> list:
57
+ """
58
+ Chunk by exchange pair: one > turn + AI response = one unit.
59
+ Falls back to paragraph chunking if no > markers.
60
+ """
61
+ lines = content.split("\n")
62
+ quote_lines = sum(1 for line in lines if line.strip().startswith(">"))
63
+
64
+ if quote_lines >= 3:
65
+ return _chunk_by_exchange(lines)
66
+ else:
67
+ return _chunk_by_paragraph(content)
68
+
69
+
70
+ def _chunk_by_exchange(lines: list) -> list:
71
+ """One user turn (>) + the AI response that follows = one chunk."""
72
+ chunks = []
73
+ i = 0
74
+
75
+ while i < len(lines):
76
+ line = lines[i]
77
+ if line.strip().startswith(">"):
78
+ user_turn = line.strip()
79
+ i += 1
80
+
81
+ ai_lines = []
82
+ while i < len(lines):
83
+ next_line = lines[i]
84
+ if next_line.strip().startswith(">") or next_line.strip().startswith("---"):
85
+ break
86
+ if next_line.strip():
87
+ ai_lines.append(next_line.strip())
88
+ i += 1
89
+
90
+ ai_response = " ".join(ai_lines[:8])
91
+ content = f"{user_turn}\n{ai_response}" if ai_response else user_turn
92
+
93
+ if len(content.strip()) > MIN_CHUNK_SIZE:
94
+ chunks.append(
95
+ {
96
+ "content": content,
97
+ "chunk_index": len(chunks),
98
+ }
99
+ )
100
+ else:
101
+ i += 1
102
+
103
+ return chunks
104
+
105
+
106
+ def _chunk_by_paragraph(content: str) -> list:
107
+ """Fallback: chunk by paragraph breaks."""
108
+ chunks = []
109
+ paragraphs = [p.strip() for p in content.split("\n\n") if p.strip()]
110
+
111
+ # If no paragraph breaks and long content, chunk by line groups
112
+ if len(paragraphs) <= 1 and content.count("\n") > 20:
113
+ lines = content.split("\n")
114
+ for i in range(0, len(lines), 25):
115
+ group = "\n".join(lines[i : i + 25]).strip()
116
+ if len(group) > MIN_CHUNK_SIZE:
117
+ chunks.append({"content": group, "chunk_index": len(chunks)})
118
+ return chunks
119
+
120
+ for para in paragraphs:
121
+ if len(para) > MIN_CHUNK_SIZE:
122
+ chunks.append({"content": para, "chunk_index": len(chunks)})
123
+
124
+ return chunks
125
+
126
+
127
+ # =============================================================================
128
+ # ROOM DETECTION — topic-based for conversations
129
+ # =============================================================================
130
+
131
+ TOPIC_KEYWORDS = {
132
+ "technical": [
133
+ "code",
134
+ "python",
135
+ "function",
136
+ "bug",
137
+ "error",
138
+ "api",
139
+ "database",
140
+ "server",
141
+ "deploy",
142
+ "git",
143
+ "test",
144
+ "debug",
145
+ "refactor",
146
+ ],
147
+ "architecture": [
148
+ "architecture",
149
+ "design",
150
+ "pattern",
151
+ "structure",
152
+ "schema",
153
+ "interface",
154
+ "module",
155
+ "component",
156
+ "service",
157
+ "layer",
158
+ ],
159
+ "planning": [
160
+ "plan",
161
+ "roadmap",
162
+ "milestone",
163
+ "deadline",
164
+ "priority",
165
+ "sprint",
166
+ "backlog",
167
+ "scope",
168
+ "requirement",
169
+ "spec",
170
+ ],
171
+ "decisions": [
172
+ "decided",
173
+ "chose",
174
+ "picked",
175
+ "switched",
176
+ "migrated",
177
+ "replaced",
178
+ "trade-off",
179
+ "alternative",
180
+ "option",
181
+ "approach",
182
+ ],
183
+ "problems": [
184
+ "problem",
185
+ "issue",
186
+ "broken",
187
+ "failed",
188
+ "crash",
189
+ "stuck",
190
+ "workaround",
191
+ "fix",
192
+ "solved",
193
+ "resolved",
194
+ ],
195
+ }
196
+
197
+
198
+ def detect_convo_room(content: str) -> str:
199
+ """Score conversation content against topic keywords."""
200
+ content_lower = content[:3000].lower()
201
+ scores = {}
202
+ for room, keywords in TOPIC_KEYWORDS.items():
203
+ score = sum(1 for kw in keywords if kw in content_lower)
204
+ if score > 0:
205
+ scores[room] = score
206
+ if scores:
207
+ return max(scores, key=scores.get)
208
+ return "general"
209
+
210
+
211
+ # =============================================================================
212
+ # PALACE OPERATIONS
213
+ # =============================================================================
214
+
215
+
216
+ def get_collection(palace_path: str):
217
+ """Open (or create) the drawer store for a palace."""
218
+ os.makedirs(palace_path, exist_ok=True)
219
+ return open_store(palace_path, create=True)
220
+
221
+
222
+ def file_already_mined(collection, source_file: str) -> bool:
223
+ try:
224
+ results = collection.get(where={"source_file": source_file}, limit=1)
225
+ return len(results.get("ids", [])) > 0
226
+ except Exception:
227
+ return False
228
+
229
+
230
+ # =============================================================================
231
+ # SCAN FOR CONVERSATION FILES
232
+ # =============================================================================
233
+
234
+
235
+ def scan_convos(convo_dir: str) -> list:
236
+ """Find all potential conversation files."""
237
+ convo_path = Path(convo_dir).expanduser().resolve()
238
+ files = []
239
+ for root, dirs, filenames in os.walk(convo_path):
240
+ dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
241
+ for filename in filenames:
242
+ if filename.endswith(".meta.json"):
243
+ continue
244
+ filepath = Path(root) / filename
245
+ if filepath.suffix.lower() in CONVO_EXTENSIONS:
246
+ files.append(filepath)
247
+ return files
248
+
249
+
250
+ # =============================================================================
251
+ # MINE CONVERSATIONS
252
+ # =============================================================================
253
+
254
+
255
+ def mine_convos(
256
+ convo_dir: str,
257
+ palace_path: str,
258
+ wing: str = None,
259
+ agent: str = "mempalace",
260
+ limit: int = 0,
261
+ dry_run: bool = False,
262
+ extract_mode: str = "exchange",
263
+ ):
264
+ """Mine a directory of conversation files into the palace.
265
+
266
+ extract_mode:
267
+ "exchange" — default exchange-pair chunking (Q+A = one unit)
268
+ "general" — general extractor: decisions, preferences, milestones, problems, emotions
269
+ """
270
+
271
+ convo_path = Path(convo_dir).expanduser().resolve()
272
+ if not wing:
273
+ wing = convo_path.name.lower().replace(" ", "_").replace("-", "_")
274
+
275
+ files = scan_convos(convo_dir)
276
+ if limit > 0:
277
+ files = files[:limit]
278
+
279
+ print(f"\n{'=' * 55}")
280
+ print(" MemPalace Mine — Conversations")
281
+ print(f"{'=' * 55}")
282
+ print(f" Wing: {wing}")
283
+ print(f" Source: {convo_path}")
284
+ print(f" Files: {len(files)}")
285
+ print(f" Palace: {palace_path}")
286
+ if dry_run:
287
+ print(" DRY RUN — nothing will be filed")
288
+ print(f"{'-' * 55}\n")
289
+
290
+ collection = get_collection(palace_path) if not dry_run else None
291
+
292
+ total_drawers = 0
293
+ files_skipped = 0
294
+ room_counts = defaultdict(int)
295
+ batch_buffer: list = []
296
+
297
+ def flush_batch() -> None:
298
+ nonlocal total_drawers
299
+ total_drawers += add_drawers_batch(collection, batch_buffer)
300
+ batch_buffer.clear()
301
+
302
+ for i, filepath in enumerate(files, 1):
303
+ source_file = str(filepath)
304
+
305
+ # Skip if already filed
306
+ if not dry_run and file_already_mined(collection, source_file):
307
+ files_skipped += 1
308
+ continue
309
+
310
+ # Normalize format
311
+ try:
312
+ content = normalize(str(filepath))
313
+ except (OSError, ValueError):
314
+ continue
315
+
316
+ if not content or len(content.strip()) < MIN_CHUNK_SIZE:
317
+ continue
318
+
319
+ # Chunk — either exchange pairs or general extraction
320
+ if extract_mode == "general":
321
+ from .general_extractor import extract_memories
322
+
323
+ chunks = extract_memories(content)
324
+ # Each chunk already has memory_type; use it as the room name
325
+ else:
326
+ chunks = chunk_exchanges(content)
327
+
328
+ if not chunks:
329
+ continue
330
+
331
+ # Detect room from content (general mode uses memory_type instead)
332
+ if extract_mode != "general":
333
+ room = detect_convo_room(content)
334
+ else:
335
+ room = None # set per-chunk below
336
+
337
+ if dry_run:
338
+ if extract_mode == "general":
339
+ from collections import Counter
340
+
341
+ type_counts = Counter(c.get("memory_type", "general") for c in chunks)
342
+ types_str = ", ".join(f"{t}:{n}" for t, n in type_counts.most_common())
343
+ print(f" [DRY RUN] {filepath.name} → {len(chunks)} memories ({types_str})")
344
+ else:
345
+ print(f" [DRY RUN] {filepath.name} → room:{room} ({len(chunks)} drawers)")
346
+ total_drawers += len(chunks)
347
+ # Track room counts
348
+ if extract_mode == "general":
349
+ for c in chunks:
350
+ room_counts[c.get("memory_type", "general")] += 1
351
+ else:
352
+ room_counts[room] += 1
353
+ continue
354
+
355
+ if extract_mode != "general":
356
+ room_counts[room] += 1
357
+
358
+ # Build specs for this file; accumulate into the batch buffer
359
+ file_spec_count = 0
360
+ for chunk in chunks:
361
+ chunk_room = chunk.get("memory_type", room) if extract_mode == "general" else room
362
+ if extract_mode == "general":
363
+ room_counts[chunk_room] += 1
364
+ drawer_id = f"drawer_{wing}_{chunk_room}_{hashlib.md5((source_file + str(chunk['chunk_index'])).encode(), usedforsecurity=False).hexdigest()[:16]}"
365
+ batch_buffer.append(
366
+ {
367
+ "id": drawer_id,
368
+ "content": chunk["content"],
369
+ "metadata": {
370
+ "wing": wing,
371
+ "room": chunk_room,
372
+ "source_file": source_file,
373
+ "chunk_index": chunk["chunk_index"],
374
+ "added_by": agent,
375
+ "filed_at": datetime.now().isoformat(),
376
+ "ingest_mode": "convos",
377
+ "extract_mode": extract_mode,
378
+ "extractor_version": __version__,
379
+ "chunker_strategy": "convo_turn_v1",
380
+ },
381
+ }
382
+ )
383
+ file_spec_count += 1
384
+
385
+ print(f" ✓ [{i:4}/{len(files)}] {filepath.name[:50]:50} +{file_spec_count}")
386
+ if len(batch_buffer) >= BATCH_SIZE:
387
+ flush_batch()
388
+
389
+ if not dry_run:
390
+ flush_batch()
391
+ t0 = time.time()
392
+ print(" >> Optimizing storage...", end="", flush=True)
393
+ collection.optimize()
394
+ print(f" done ({time.time() - t0:.1f}s)", flush=True)
395
+
396
+ print(f"\n{'=' * 55}")
397
+ print(" Done.")
398
+ print(f" Files processed: {len(files) - files_skipped}")
399
+ print(f" Files skipped (already filed): {files_skipped}")
400
+ print(f" Drawers filed: {total_drawers}")
401
+ if room_counts:
402
+ print("\n By room:")
403
+ for room, count in sorted(room_counts.items(), key=lambda x: x[1], reverse=True):
404
+ print(f" {room:20} {count} files")
405
+ print('\n Next: mempalace search "what you\'re looking for"')
406
+ print(f"{'=' * 55}\n")
407
+
408
+
409
+ if __name__ == "__main__":
410
+ if len(sys.argv) < 2:
411
+ print("Usage: python convo_miner.py <convo_dir> [--palace PATH] [--limit N] [--dry-run]")
412
+ sys.exit(1)
413
+ from .config import MempalaceConfig
414
+
415
+ mine_convos(sys.argv[1], palace_path=MempalaceConfig().palace_path)