opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
opencode_memory/cli.py ADDED
@@ -0,0 +1,794 @@
1
+ """CLI entry point for opencode-memory."""
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from opencode_memory.server import main as server_main
8
+
9
+
10
+ def main() -> None:
11
+ """Main CLI entry point."""
12
+ parser = argparse.ArgumentParser(
13
+ description="OpenCode Memory - Persistent semantic memory for OpenCode sessions"
14
+ )
15
+ parser.add_argument(
16
+ "--version",
17
+ action="version",
18
+ version="opencode-memory 0.1.0",
19
+ )
20
+
21
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
22
+
23
+ serve_parser = subparsers.add_parser("serve", help="Start the MCP server")
24
+ serve_parser.add_argument(
25
+ "--no-daemon",
26
+ action="store_true",
27
+ help="Disable background ingestion daemon",
28
+ )
29
+
30
+ ingest_parser = subparsers.add_parser("ingest", help="Ingest files into memory")
31
+ ingest_parser.add_argument("path", help="Path to file or directory to ingest")
32
+ ingest_parser.add_argument(
33
+ "--recursive", "-r", action="store_true", help="Recursively ingest directories"
34
+ )
35
+
36
+ subparsers.add_parser("stats", help="Show memory statistics")
37
+
38
+ migrate_parser = subparsers.add_parser(
39
+ "migrate", help="Migrate existing .opencode/ notes into memory"
40
+ )
41
+ migrate_parser.add_argument(
42
+ "--path",
43
+ type=str,
44
+ default="~/gitlab_projects/.opencode",
45
+ help="Path to .opencode directory (default: ~/gitlab_projects/.opencode)",
46
+ )
47
+ migrate_parser.add_argument(
48
+ "--dry-run",
49
+ action="store_true",
50
+ help="Show what would be migrated without actually migrating",
51
+ )
52
+
53
+ enrich_parser = subparsers.add_parser("enrich", help="Enrich entities with GitLab metadata")
54
+ enrich_parser.add_argument(
55
+ "--limit", "-l", type=int, default=50, help="Maximum entities to enrich"
56
+ )
57
+ enrich_parser.add_argument(
58
+ "--stale-hours", type=int, default=24, help="Consider entities stale after this many hours"
59
+ )
60
+
61
+ cleanup_parser = subparsers.add_parser(
62
+ "cleanup", help="Archive old/expired memories (conservative, preserves searchability)"
63
+ )
64
+ cleanup_parser.add_argument(
65
+ "--dry-run",
66
+ action="store_true",
67
+ help="Show what would be archived without actually archiving",
68
+ )
69
+ cleanup_parser.add_argument(
70
+ "--resolved-blockers-days",
71
+ type=int,
72
+ default=90,
73
+ help="Archive resolved blockers older than N days (default: 90)",
74
+ )
75
+ cleanup_parser.add_argument(
76
+ "--conversations-days",
77
+ type=int,
78
+ default=180,
79
+ help="Archive conversation summaries older than N days (default: 180)",
80
+ )
81
+
82
+ backfill_parser = subparsers.add_parser(
83
+ "backfill-projects", help="Backfill project field for existing memories and re-embed"
84
+ )
85
+ backfill_parser.add_argument(
86
+ "--dry-run",
87
+ action="store_true",
88
+ help="Show what would be updated without actually updating",
89
+ )
90
+ backfill_parser.add_argument(
91
+ "--batch-size",
92
+ type=int,
93
+ default=100,
94
+ help="Process memories in batches of this size (default: 100)",
95
+ )
96
+
97
+ extract_parser = subparsers.add_parser(
98
+ "extract-knowledge",
99
+ help="Use LLM to extract procedures, directives, and tips from conversations",
100
+ )
101
+ extract_parser.add_argument(
102
+ "--limit",
103
+ "-l",
104
+ type=int,
105
+ default=10,
106
+ help="Maximum conversations to process (default: 10)",
107
+ )
108
+ extract_parser.add_argument(
109
+ "--since-days",
110
+ type=int,
111
+ default=None,
112
+ help="Only process conversations from last N days (default: all)",
113
+ )
114
+ extract_parser.add_argument(
115
+ "--dry-run",
116
+ action="store_true",
117
+ help="Show what would be extracted without storing",
118
+ )
119
+ extract_parser.add_argument(
120
+ "--project",
121
+ type=str,
122
+ help="Filter to specific project (e.g., 'gitlab-org/gitlab')",
123
+ )
124
+
125
+ args = parser.parse_args()
126
+
127
+ if args.command == "serve" or args.command is None:
128
+ enable_daemon = not getattr(args, "no_daemon", False)
129
+ server_main(enable_daemon=enable_daemon)
130
+ elif args.command == "ingest":
131
+ from pathlib import Path
132
+
133
+ from opencode_memory.config import Config
134
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
135
+ from opencode_memory.ingestion.parser import MarkdownParser
136
+ from opencode_memory.storage.sqlite import SQLiteStorage
137
+ from opencode_memory.storage.vectors import VectorStorage
138
+
139
+ config = Config.load()
140
+ sqlite = SQLiteStorage(config.db_path)
141
+ embeddings = EmbeddingEngine()
142
+ vectors = VectorStorage(config.vectors_path, embeddings.dimension)
143
+ parser_instance = MarkdownParser()
144
+
145
+ path = Path(args.path).expanduser()
146
+ if path.is_file():
147
+ files = [path]
148
+ elif path.is_dir():
149
+ if args.recursive:
150
+ files = list(path.rglob("*.md"))
151
+ else:
152
+ files = list(path.glob("*.md"))
153
+ else:
154
+ print(f"Path not found: {path}", file=sys.stderr)
155
+ sys.exit(1)
156
+
157
+ from opencode_memory.models import Entity
158
+
159
+ for file_path in files:
160
+ if "node_modules" in file_path.parts:
161
+ continue
162
+ print(f"Ingesting: {file_path}")
163
+ doc = parser_instance.parse_file(file_path)
164
+
165
+ entity_ids = []
166
+ for entity_type, ref in doc.entities:
167
+ entity = Entity(type=entity_type, ref=ref)
168
+ entity_id = sqlite.upsert_entity(entity)
169
+ entity_ids.append(entity_id)
170
+
171
+ for memory in doc.memories:
172
+ memory_id = sqlite.insert_memory(memory, entity_ids)
173
+ embedding = embeddings.embed(memory.embedding_content())
174
+ vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
175
+ print(f" - {len(doc.entities)} entities, {len(doc.memories)} memories")
176
+
177
+ print("Done!")
178
+ elif args.command == "migrate":
179
+ _migrate_opencode_notes(args.path, args.dry_run)
180
+ elif args.command == "stats":
181
+ _show_stats()
182
+ elif args.command == "enrich":
183
+ _enrich_entities(args.limit, args.stale_hours)
184
+ elif args.command == "cleanup":
185
+ _cleanup_memories(
186
+ dry_run=args.dry_run,
187
+ resolved_blockers_days=args.resolved_blockers_days,
188
+ conversations_days=args.conversations_days,
189
+ )
190
+ elif args.command == "backfill-projects":
191
+ _backfill_projects(dry_run=args.dry_run, batch_size=args.batch_size)
192
+ elif args.command == "extract-knowledge":
193
+ _extract_knowledge(
194
+ limit=args.limit,
195
+ since_days=args.since_days,
196
+ dry_run=args.dry_run,
197
+ project=args.project,
198
+ )
199
+ else:
200
+ parser.print_help()
201
+
202
+
203
+ def _show_stats() -> None:
204
+ """Show memory statistics."""
205
+ import json
206
+ import sqlite3
207
+ import urllib.request
208
+
209
+ from opencode_memory.config import Config
210
+
211
+ config = Config.load()
212
+ db_path = config.db_path
213
+ vectors_path = config.vectors_path
214
+
215
+ if not db_path.exists():
216
+ print("No memory database found.")
217
+ return
218
+
219
+ # Try to get live stats from HTTP server (may be slow if MCP sessions are active)
220
+ server_stats = None
221
+ try:
222
+ with urllib.request.urlopen("http://127.0.0.1:9824/stats", timeout=15) as resp:
223
+ server_stats = json.loads(resp.read().decode())
224
+ except Exception:
225
+ pass # Server not running or busy, fall back to direct DB access
226
+
227
+ with sqlite3.connect(db_path) as conn:
228
+ conn.row_factory = sqlite3.Row
229
+
230
+ cursor = conn.execute("SELECT COUNT(*) as count FROM memories WHERE resolved_at IS NULL")
231
+ total_memories = cursor.fetchone()["count"]
232
+
233
+ cursor = conn.execute(
234
+ "SELECT category, COUNT(*) as count FROM memories WHERE resolved_at IS NULL GROUP BY category ORDER BY count DESC"
235
+ )
236
+ categories = cursor.fetchall()
237
+
238
+ cursor = conn.execute("SELECT COUNT(*) as count FROM entities")
239
+ total_entities = cursor.fetchone()["count"]
240
+
241
+ cursor = conn.execute(
242
+ "SELECT type, COUNT(*) as count FROM entities GROUP BY type ORDER BY count DESC"
243
+ )
244
+ entity_types = cursor.fetchall()
245
+
246
+ cursor = conn.execute(
247
+ "SELECT COUNT(DISTINCT source_file) as count FROM memories WHERE source_file LIKE 'opencode:session:%'"
248
+ )
249
+ sessions_ingested = cursor.fetchone()["count"]
250
+
251
+ cursor = conn.execute(
252
+ "SELECT COUNT(*) as count FROM memories WHERE resolved_at IS NOT NULL"
253
+ )
254
+ resolved_count = cursor.fetchone()["count"]
255
+
256
+ # Get link stats
257
+ link_count = 0
258
+ link_types = []
259
+ try:
260
+ cursor = conn.execute("SELECT COUNT(*) as count FROM memory_links")
261
+ link_count = cursor.fetchone()["count"]
262
+ cursor = conn.execute(
263
+ "SELECT link_type, COUNT(*) as count FROM memory_links GROUP BY link_type"
264
+ )
265
+ link_types = cursor.fetchall()
266
+ except Exception:
267
+ pass # Table may not exist in older databases
268
+
269
+ opencode_db = Path("~/.local/share/opencode/opencode.db").expanduser()
270
+ total_sessions = 0
271
+ if opencode_db.exists():
272
+ with sqlite3.connect(f"file:{opencode_db}?mode=ro", uri=True) as conn:
273
+ cursor = conn.execute("SELECT COUNT(*) FROM session")
274
+ total_sessions = cursor.fetchone()[0]
275
+
276
+ db_size = db_path.stat().st_size / (1024 * 1024)
277
+ vectors_size = (
278
+ sum(f.stat().st_size for f in vectors_path.rglob("*") if f.is_file()) / (1024 * 1024)
279
+ if vectors_path.exists()
280
+ else 0
281
+ )
282
+
283
+ print("=" * 60)
284
+ print("OpenCode Memory Statistics")
285
+ print("=" * 60)
286
+ print()
287
+
288
+ # Server status
289
+ if server_stats:
290
+ daemon = server_stats.get("daemon", {})
291
+ print(f"Server: running (daemon: {'active' if daemon.get('running') else 'stopped'})")
292
+ else:
293
+ print("Server: not running (showing database stats only)")
294
+ print()
295
+
296
+ print(f"Total memories: {total_memories}")
297
+ print(f"Total entities: {total_entities}")
298
+ print(f"Resolved blockers: {resolved_count}")
299
+ print(f"Memory links: {link_count}")
300
+ print()
301
+
302
+ print("Memories by category:")
303
+ for row in categories:
304
+ print(f" {row['category']:15} {row['count']:>6}")
305
+ print()
306
+
307
+ print("Entities by type:")
308
+ for row in entity_types:
309
+ print(f" {row['type']:15} {row['count']:>6}")
310
+ print()
311
+
312
+ if link_types:
313
+ print("Links by type:")
314
+ for row in link_types:
315
+ print(f" {row['link_type']:15} {row['count']:>6}")
316
+ print()
317
+
318
+ print(f"Sessions ingested: {sessions_ingested} / {total_sessions}")
319
+ if total_sessions > 0:
320
+ pct = (sessions_ingested / total_sessions) * 100
321
+ print(f"Ingestion progress: {pct:.1f}%")
322
+ print()
323
+
324
+ # Storage
325
+ print("Storage:")
326
+ print(f" Database: {db_size:>8.2f} MB")
327
+ print(f" Vector store: {vectors_size:>8.2f} MB")
328
+ print(f" Total: {db_size + vectors_size:>8.2f} MB")
329
+
330
+ # Cache stats from server
331
+ if server_stats:
332
+ print()
333
+ cache = server_stats.get("cache", {})
334
+ if cache:
335
+ print("Cache:")
336
+ print(f" Size: {cache.get('size', 0)} / {cache.get('max_size', 0)}")
337
+ print(f" Hit rate: {cache.get('hit_rate', 0) * 100:.1f}%")
338
+ print(f" Hits: {cache.get('hits', 0)}")
339
+ print(f" Misses: {cache.get('misses', 0)}")
340
+
341
+ eq = server_stats.get("embedding_queue", {})
342
+ if eq:
343
+ print()
344
+ print("Embedding queue:")
345
+ print(f" Status: {eq.get('status', 'unknown')}")
346
+ print(f" Pending: {eq.get('pending', 0)}")
347
+
348
+
349
+ def _enrich_entities(limit: int, stale_hours: int) -> None:
350
+ """Enrich stale entities with GitLab metadata."""
351
+ import asyncio
352
+ import os
353
+
354
+ from opencode_memory.config import Config
355
+ from opencode_memory.enrichment.gitlab import GitLabEnricher
356
+ from opencode_memory.storage.sqlite import SQLiteStorage
357
+
358
+ if not os.environ.get("GITLAB_TOKEN"):
359
+ print("Error: GITLAB_TOKEN environment variable not set", file=sys.stderr)
360
+ sys.exit(1)
361
+
362
+ config = Config.load()
363
+ sqlite = SQLiteStorage(config.db_path)
364
+ enricher = GitLabEnricher()
365
+
366
+ stale_entities = sqlite.get_stale_entities(max_age_hours=stale_hours, limit=limit)
367
+
368
+ if not stale_entities:
369
+ print("No stale entities to enrich.")
370
+ return
371
+
372
+ print(f"Found {len(stale_entities)} stale entities to enrich...")
373
+
374
+ async def do_enrich():
375
+ enriched_count = 0
376
+ for entity in stale_entities:
377
+ try:
378
+ enriched = await enricher.enrich_entity(entity)
379
+ if enriched.title or enriched.metadata:
380
+ sqlite.upsert_entity(enriched)
381
+ enriched_count += 1
382
+ print(
383
+ f" {entity.type.value:6} {entity.ref:12} -> {enriched.title or '(no title)'}"
384
+ )
385
+ except Exception as e:
386
+ print(f" {entity.type.value:6} {entity.ref:12} -> Error: {e}")
387
+
388
+ await enricher.close()
389
+ return enriched_count
390
+
391
+ enriched = asyncio.run(do_enrich())
392
+ print(f"\nEnriched {enriched} of {len(stale_entities)} entities.")
393
+
394
+
395
+ def _cleanup_memories(dry_run: bool, resolved_blockers_days: int, conversations_days: int) -> None:
396
+ """Archive old/expired memories.
397
+
398
+ This is a conservative cleanup that:
399
+ - Archives (not deletes) memories for audit/search purposes
400
+ - Only targets: expired memories, old resolved blockers, very old conversations
401
+ - Does NOT touch: decisions, facts, procedures, active blockers
402
+ """
403
+ from opencode_memory.config import Config
404
+ from opencode_memory.storage.sqlite import SQLiteStorage
405
+
406
+ config = Config.load()
407
+ sqlite = SQLiteStorage(config.db_path)
408
+
409
+ stats = sqlite.get_cleanup_stats()
410
+
411
+ print("=" * 50)
412
+ print("Memory Cleanup" + (" (DRY RUN)" if dry_run else ""))
413
+ print("=" * 50)
414
+ print()
415
+ print("Cleanup candidates:")
416
+ print(f" Expired memories: {stats['expired']}")
417
+ print(f" Resolved blockers: {stats['resolved_blockers']}")
418
+ print(f" Old conversations (90d+): {stats['old_conversations']}")
419
+ print(f" Already archived: {stats['archived_total']}")
420
+ print()
421
+
422
+ if dry_run:
423
+ print("Would archive:")
424
+ print(f" - All {stats['expired']} expired memories")
425
+ print(f" - Resolved blockers older than {resolved_blockers_days} days")
426
+ print(f" - Conversation summaries older than {conversations_days} days")
427
+ print()
428
+ print("Run without --dry-run to proceed.")
429
+ return
430
+
431
+ archived_expired = sqlite.archive_expired_memories()
432
+ print(f"Archived {archived_expired} expired memories")
433
+
434
+ archived_blockers = sqlite.archive_old_resolved_blockers(days_old=resolved_blockers_days)
435
+ print(f"Archived {archived_blockers} old resolved blockers")
436
+
437
+ archived_conversations = sqlite.archive_old_conversations(days_old=conversations_days)
438
+ print(f"Archived {archived_conversations} old conversations")
439
+
440
+ total = archived_expired + archived_blockers + archived_conversations
441
+ print()
442
+ print(f"Total archived: {total}")
443
+ print()
444
+ print("Note: Archived memories remain searchable via memory_search with include_archived=true")
445
+
446
+
447
+ def _migrate_opencode_notes(opencode_path: str, dry_run: bool) -> None:
448
+ """Migrate existing .opencode/ notes into memory.
449
+
450
+ Scans for markdown files in the .opencode directory structure and ingests
451
+ them into the memory system with progress reporting.
452
+ """
453
+ from pathlib import Path
454
+
455
+ from opencode_memory.config import Config
456
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
457
+ from opencode_memory.ingestion.parser import MarkdownParser
458
+ from opencode_memory.models import Entity
459
+ from opencode_memory.storage.sqlite import SQLiteStorage
460
+ from opencode_memory.storage.vectors import VectorStorage
461
+
462
+ path = Path(opencode_path).expanduser()
463
+ if not path.exists():
464
+ print(f"Error: Directory not found: {path}", file=sys.stderr)
465
+ sys.exit(1)
466
+
467
+ md_files = [
468
+ f for f in path.rglob("*.md") if "node_modules" not in f.parts and "archive" not in f.parts
469
+ ]
470
+
471
+ if not md_files:
472
+ print(f"No markdown files found in {path}")
473
+ return
474
+
475
+ print("=" * 50)
476
+ print("OpenCode Notes Migration" + (" (DRY RUN)" if dry_run else ""))
477
+ print("=" * 50)
478
+ print()
479
+ print(f"Source: {path}")
480
+ print(f"Files found: {len(md_files)}")
481
+ print()
482
+
483
+ if dry_run:
484
+ print("Files to migrate:")
485
+ for f in sorted(md_files):
486
+ rel = f.relative_to(path)
487
+ print(f" {rel}")
488
+ print()
489
+ print("Run without --dry-run to proceed.")
490
+ return
491
+
492
+ config = Config.load()
493
+ sqlite = SQLiteStorage(config.db_path)
494
+ embeddings = EmbeddingEngine()
495
+ vectors = VectorStorage(config.vectors_path, embeddings.dimension)
496
+ parser_instance = MarkdownParser()
497
+
498
+ total_entities = 0
499
+ total_memories = 0
500
+
501
+ for i, file_path in enumerate(sorted(md_files), 1):
502
+ rel = file_path.relative_to(path)
503
+ print(f"[{i}/{len(md_files)}] {rel}")
504
+
505
+ try:
506
+ doc = parser_instance.parse_file(file_path)
507
+
508
+ entity_ids = []
509
+ for entity_type, ref in doc.entities:
510
+ entity = Entity(type=entity_type, ref=ref)
511
+ entity_id = sqlite.upsert_entity(entity)
512
+ entity_ids.append(entity_id)
513
+ total_entities += len(doc.entities)
514
+
515
+ for memory in doc.memories:
516
+ memory_id = sqlite.insert_memory(memory, entity_ids)
517
+ embedding = embeddings.embed(memory.embedding_content())
518
+ vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
519
+ total_memories += len(doc.memories)
520
+
521
+ print(f" {len(doc.entities)} entities, {len(doc.memories)} memories")
522
+ except Exception as e:
523
+ print(f" Error: {e}")
524
+
525
+ print()
526
+ print("=" * 50)
527
+ print("Migration complete!")
528
+ print(f" Files processed: {len(md_files)}")
529
+ print(f" Entities found: {total_entities}")
530
+ print(f" Memories created: {total_memories}")
531
+ print()
532
+ print("Tip: Run 'opencode-memory stats' to see updated statistics")
533
+
534
+
535
+ def _backfill_projects(dry_run: bool = False, batch_size: int = 100) -> None:
536
+ """Backfill project field for existing memories and re-embed with project prefix."""
537
+ import sqlite3
538
+
539
+ from opencode_memory.config import Config
540
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
541
+ from opencode_memory.project import detect_project_from_path, detect_project_from_git
542
+ from opencode_memory.storage.sqlite import SQLiteStorage
543
+ from opencode_memory.storage.vectors import VectorStorage
544
+
545
+ config = Config.load()
546
+ sqlite = SQLiteStorage(config.db_path)
547
+
548
+ if dry_run:
549
+ print("DRY RUN - no changes will be made")
550
+ print()
551
+
552
+ conn = sqlite3.connect(config.db_path)
553
+ conn.row_factory = sqlite3.Row
554
+
555
+ cursor = conn.execute(
556
+ "SELECT id, source_file, project, content FROM memories WHERE project IS NULL"
557
+ )
558
+ memories_to_update = cursor.fetchall()
559
+
560
+ print(f"Found {len(memories_to_update)} memories without project")
561
+
562
+ if not memories_to_update:
563
+ print("Nothing to backfill!")
564
+ return
565
+
566
+ project_counts: dict[str | None, int] = {}
567
+ updates: list[tuple[str | None, int]] = []
568
+
569
+ for row in memories_to_update:
570
+ source_file = row["source_file"]
571
+ memory_id = row["id"]
572
+
573
+ project = None
574
+ if source_file:
575
+ if source_file.startswith("opencode:session:"):
576
+ session_id = source_file.replace("opencode:session:", "")
577
+ opencode_db = Path.home() / ".local/share/opencode/opencode.db"
578
+ if opencode_db.exists():
579
+ try:
580
+ oc_conn = sqlite3.connect(f"file:{opencode_db}?mode=ro", uri=True)
581
+ oc_cursor = oc_conn.execute(
582
+ "SELECT directory FROM session WHERE id = ?", (session_id,)
583
+ )
584
+ oc_row = oc_cursor.fetchone()
585
+ if oc_row and oc_row[0]:
586
+ project = detect_project_from_path(
587
+ oc_row[0]
588
+ ) or detect_project_from_git(oc_row[0])
589
+ oc_conn.close()
590
+ except Exception:
591
+ pass
592
+ else:
593
+ project = detect_project_from_path(source_file)
594
+
595
+ updates.append((project, memory_id))
596
+ project_counts[project] = project_counts.get(project, 0) + 1
597
+
598
+ print()
599
+ print("Project distribution:")
600
+ for project, count in sorted(project_counts.items(), key=lambda x: -x[1]):
601
+ print(f" {project or '(unknown)'}: {count}")
602
+
603
+ if dry_run:
604
+ print()
605
+ print("DRY RUN complete - no changes made")
606
+ return
607
+
608
+ print()
609
+ print("Updating database...")
610
+
611
+ updated = 0
612
+ for project, memory_id in updates:
613
+ if project:
614
+ conn.execute(
615
+ "UPDATE memories SET project = ? WHERE id = ?",
616
+ (project, memory_id),
617
+ )
618
+ updated += 1
619
+ conn.commit()
620
+
621
+ print(f"Updated {updated} memories with project")
622
+
623
+ print()
624
+ print("Re-embedding memories with project prefix...")
625
+
626
+ embeddings = EmbeddingEngine()
627
+ vectors = VectorStorage(config.vectors_path, embeddings.dimension)
628
+
629
+ cursor = conn.execute("SELECT id, content, project FROM memories WHERE project IS NOT NULL")
630
+ all_with_project = cursor.fetchall()
631
+
632
+ reembedded = 0
633
+ for i in range(0, len(all_with_project), batch_size):
634
+ batch = all_with_project[i : i + batch_size]
635
+ for row in batch:
636
+ memory_id = row["id"]
637
+ content = row["content"]
638
+ project = row["project"]
639
+
640
+ prefixed_content = f"[{project}] {content}"
641
+ embedding = embeddings.embed(prefixed_content)
642
+ vectors.add(f"mem_{memory_id}", memory_id, prefixed_content, embedding)
643
+ reembedded += 1
644
+
645
+ print(f" Re-embedded {min(i + batch_size, len(all_with_project))}/{len(all_with_project)}")
646
+
647
+ print()
648
+ print(f"Backfill complete! Re-embedded {reembedded} memories with project prefix")
649
+
650
+
651
+ def _extract_knowledge(
652
+ limit: int,
653
+ since_days: int,
654
+ dry_run: bool,
655
+ project: str | None,
656
+ ) -> None:
657
+ """Extract procedures, directives, and tips from conversations using LLM."""
658
+ import asyncio
659
+
660
+ from opencode_memory.config import Config
661
+ from opencode_memory.extraction import (
662
+ EXTRACTION_PROMPT,
663
+ _find_opencode,
664
+ call_opencode,
665
+ get_unprocessed_conversations,
666
+ )
667
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
668
+ from opencode_memory.models import LinkType, Memory, MemoryCategory, MemoryLink
669
+ from opencode_memory.storage.sqlite import SQLiteStorage
670
+ from opencode_memory.storage.vectors import VectorStorage
671
+
672
+ if not _find_opencode():
673
+ print("Error: opencode not found in PATH or ~/.opencode/bin/")
674
+ print("Install from https://opencode.ai")
675
+ sys.exit(1)
676
+
677
+ config = Config.load()
678
+ sqlite = SQLiteStorage(config.db_path)
679
+ embeddings = EmbeddingEngine()
680
+ vectors = VectorStorage(config.vectors_path, embeddings.dimension)
681
+
682
+ conversations = get_unprocessed_conversations(sqlite, since_days=since_days, limit=limit)
683
+
684
+ if project:
685
+ conversations = [c for c in conversations if c.get("project") == project]
686
+
687
+ if not conversations:
688
+ print("No unprocessed conversations found matching criteria.")
689
+ return
690
+
691
+ print(f"Found {len(conversations)} conversations to process\n")
692
+
693
+ total_extracted = 0
694
+
695
+ async def process_one(conv: dict) -> int:
696
+ import json
697
+
698
+ conv_id = conv["id"]
699
+ content = conv["content"]
700
+ conv_project = conv.get("project")
701
+
702
+ if len(content) > 15000:
703
+ content = content[:15000] + "\n\n[... truncated ...]"
704
+
705
+ full_prompt = EXTRACTION_PROMPT + content
706
+
707
+ try:
708
+ response = await call_opencode(full_prompt)
709
+ except Exception as e:
710
+ print(f" Error calling opencode: {e}")
711
+ return 0
712
+
713
+ extracted = []
714
+ for line in response.split("\n"):
715
+ line = line.strip()
716
+ if not line or not line.startswith("{"):
717
+ continue
718
+ try:
719
+ item = json.loads(line)
720
+ if "category" in item and "content" in item:
721
+ extracted.append(item)
722
+ except json.JSONDecodeError:
723
+ continue
724
+
725
+ if not extracted:
726
+ return 0
727
+
728
+ count = 0
729
+ category_map = {
730
+ "procedure": MemoryCategory.PROCEDURE,
731
+ "directive": MemoryCategory.DIRECTIVE,
732
+ "decision": MemoryCategory.DECISION,
733
+ "fact": MemoryCategory.FACT,
734
+ }
735
+
736
+ for item in extracted:
737
+ category_str = item.get("category", "fact")
738
+ category = category_map.get(category_str, MemoryCategory.FACT)
739
+
740
+ print(f" [{category_str}] {item.get('what', 'No summary')[:50]}")
741
+
742
+ if dry_run:
743
+ count += 1
744
+ continue
745
+
746
+ memory = Memory(
747
+ category=category,
748
+ content=item.get("content", ""),
749
+ what=item.get("what"),
750
+ why=item.get("why"),
751
+ learned=item.get("learned"),
752
+ project=conv_project,
753
+ source_file=conv.get("source_file"),
754
+ )
755
+
756
+ memory_id = sqlite.insert_memory(memory)
757
+
758
+ embedding = embeddings.embed(memory.embedding_content())
759
+ vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
760
+
761
+ link = MemoryLink(
762
+ source_memory_id=conv_id,
763
+ target_memory_id=memory_id,
764
+ link_type=LinkType.EXTENDS,
765
+ strength=0.9,
766
+ reason="Knowledge extracted from conversation via LLM",
767
+ )
768
+ sqlite.insert_link(link)
769
+
770
+ count += 1
771
+
772
+ return count
773
+
774
+ for conv in conversations:
775
+ title = conv.get("what") or "Untitled"
776
+ print(f"Processing: {title[:60]}...")
777
+
778
+ extracted_count = asyncio.run(process_one(conv))
779
+ if extracted_count > 0:
780
+ print(f" Extracted {extracted_count} items")
781
+ total_extracted += extracted_count
782
+ else:
783
+ print(" No knowledge extracted")
784
+ print()
785
+
786
+ print()
787
+ if dry_run:
788
+ print(f"DRY RUN complete - would have extracted ~{total_extracted} items")
789
+ else:
790
+ print(f"Extraction complete! Stored {total_extracted} new memories")
791
+
792
+
793
+ if __name__ == "__main__":
794
+ main()