opencode-semantic-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencode_memory/__init__.py +3 -0
- opencode_memory/cache.py +261 -0
- opencode_memory/cli.py +794 -0
- opencode_memory/config.py +89 -0
- opencode_memory/daemon.py +879 -0
- opencode_memory/enrichment/__init__.py +0 -0
- opencode_memory/enrichment/gitlab.py +237 -0
- opencode_memory/extraction.py +225 -0
- opencode_memory/historical_ingest.py +142 -0
- opencode_memory/http_server.py +464 -0
- opencode_memory/ingestion/__init__.py +7 -0
- opencode_memory/ingestion/embeddings.py +211 -0
- opencode_memory/ingestion/extractors.py +287 -0
- opencode_memory/ingestion/opencode_db.py +448 -0
- opencode_memory/ingestion/parser.py +344 -0
- opencode_memory/ingestion/watcher.py +88 -0
- opencode_memory/linking/__init__.py +5 -0
- opencode_memory/linking/linker.py +323 -0
- opencode_memory/metrics.py +273 -0
- opencode_memory/models.py +171 -0
- opencode_memory/project.py +86 -0
- opencode_memory/query/__init__.py +5 -0
- opencode_memory/query/hybrid.py +196 -0
- opencode_memory/server.py +2795 -0
- opencode_memory/session/__init__.py +5 -0
- opencode_memory/session/registry.py +57 -0
- opencode_memory/storage/__init__.py +6 -0
- opencode_memory/storage/sqlite.py +1608 -0
- opencode_memory/storage/vectors.py +199 -0
- opencode_semantic_memory-0.1.0.dist-info/METADATA +531 -0
- opencode_semantic_memory-0.1.0.dist-info/RECORD +33 -0
- opencode_semantic_memory-0.1.0.dist-info/WHEEL +4 -0
- opencode_semantic_memory-0.1.0.dist-info/entry_points.txt +3 -0
opencode_memory/cli.py
ADDED
|
@@ -0,0 +1,794 @@
|
|
|
1
|
+
"""CLI entry point for opencode-memory."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from opencode_memory.server import main as server_main
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main() -> None:
|
|
11
|
+
"""Main CLI entry point."""
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
description="OpenCode Memory - Persistent semantic memory for OpenCode sessions"
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"--version",
|
|
17
|
+
action="version",
|
|
18
|
+
version="opencode-memory 0.1.0",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
22
|
+
|
|
23
|
+
serve_parser = subparsers.add_parser("serve", help="Start the MCP server")
|
|
24
|
+
serve_parser.add_argument(
|
|
25
|
+
"--no-daemon",
|
|
26
|
+
action="store_true",
|
|
27
|
+
help="Disable background ingestion daemon",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
ingest_parser = subparsers.add_parser("ingest", help="Ingest files into memory")
|
|
31
|
+
ingest_parser.add_argument("path", help="Path to file or directory to ingest")
|
|
32
|
+
ingest_parser.add_argument(
|
|
33
|
+
"--recursive", "-r", action="store_true", help="Recursively ingest directories"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
subparsers.add_parser("stats", help="Show memory statistics")
|
|
37
|
+
|
|
38
|
+
migrate_parser = subparsers.add_parser(
|
|
39
|
+
"migrate", help="Migrate existing .opencode/ notes into memory"
|
|
40
|
+
)
|
|
41
|
+
migrate_parser.add_argument(
|
|
42
|
+
"--path",
|
|
43
|
+
type=str,
|
|
44
|
+
default="~/gitlab_projects/.opencode",
|
|
45
|
+
help="Path to .opencode directory (default: ~/gitlab_projects/.opencode)",
|
|
46
|
+
)
|
|
47
|
+
migrate_parser.add_argument(
|
|
48
|
+
"--dry-run",
|
|
49
|
+
action="store_true",
|
|
50
|
+
help="Show what would be migrated without actually migrating",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
enrich_parser = subparsers.add_parser("enrich", help="Enrich entities with GitLab metadata")
|
|
54
|
+
enrich_parser.add_argument(
|
|
55
|
+
"--limit", "-l", type=int, default=50, help="Maximum entities to enrich"
|
|
56
|
+
)
|
|
57
|
+
enrich_parser.add_argument(
|
|
58
|
+
"--stale-hours", type=int, default=24, help="Consider entities stale after this many hours"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
cleanup_parser = subparsers.add_parser(
|
|
62
|
+
"cleanup", help="Archive old/expired memories (conservative, preserves searchability)"
|
|
63
|
+
)
|
|
64
|
+
cleanup_parser.add_argument(
|
|
65
|
+
"--dry-run",
|
|
66
|
+
action="store_true",
|
|
67
|
+
help="Show what would be archived without actually archiving",
|
|
68
|
+
)
|
|
69
|
+
cleanup_parser.add_argument(
|
|
70
|
+
"--resolved-blockers-days",
|
|
71
|
+
type=int,
|
|
72
|
+
default=90,
|
|
73
|
+
help="Archive resolved blockers older than N days (default: 90)",
|
|
74
|
+
)
|
|
75
|
+
cleanup_parser.add_argument(
|
|
76
|
+
"--conversations-days",
|
|
77
|
+
type=int,
|
|
78
|
+
default=180,
|
|
79
|
+
help="Archive conversation summaries older than N days (default: 180)",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
backfill_parser = subparsers.add_parser(
|
|
83
|
+
"backfill-projects", help="Backfill project field for existing memories and re-embed"
|
|
84
|
+
)
|
|
85
|
+
backfill_parser.add_argument(
|
|
86
|
+
"--dry-run",
|
|
87
|
+
action="store_true",
|
|
88
|
+
help="Show what would be updated without actually updating",
|
|
89
|
+
)
|
|
90
|
+
backfill_parser.add_argument(
|
|
91
|
+
"--batch-size",
|
|
92
|
+
type=int,
|
|
93
|
+
default=100,
|
|
94
|
+
help="Process memories in batches of this size (default: 100)",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
extract_parser = subparsers.add_parser(
|
|
98
|
+
"extract-knowledge",
|
|
99
|
+
help="Use LLM to extract procedures, directives, and tips from conversations",
|
|
100
|
+
)
|
|
101
|
+
extract_parser.add_argument(
|
|
102
|
+
"--limit",
|
|
103
|
+
"-l",
|
|
104
|
+
type=int,
|
|
105
|
+
default=10,
|
|
106
|
+
help="Maximum conversations to process (default: 10)",
|
|
107
|
+
)
|
|
108
|
+
extract_parser.add_argument(
|
|
109
|
+
"--since-days",
|
|
110
|
+
type=int,
|
|
111
|
+
default=None,
|
|
112
|
+
help="Only process conversations from last N days (default: all)",
|
|
113
|
+
)
|
|
114
|
+
extract_parser.add_argument(
|
|
115
|
+
"--dry-run",
|
|
116
|
+
action="store_true",
|
|
117
|
+
help="Show what would be extracted without storing",
|
|
118
|
+
)
|
|
119
|
+
extract_parser.add_argument(
|
|
120
|
+
"--project",
|
|
121
|
+
type=str,
|
|
122
|
+
help="Filter to specific project (e.g., 'gitlab-org/gitlab')",
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
args = parser.parse_args()
|
|
126
|
+
|
|
127
|
+
if args.command == "serve" or args.command is None:
|
|
128
|
+
enable_daemon = not getattr(args, "no_daemon", False)
|
|
129
|
+
server_main(enable_daemon=enable_daemon)
|
|
130
|
+
elif args.command == "ingest":
|
|
131
|
+
from pathlib import Path
|
|
132
|
+
|
|
133
|
+
from opencode_memory.config import Config
|
|
134
|
+
from opencode_memory.ingestion.embeddings import EmbeddingEngine
|
|
135
|
+
from opencode_memory.ingestion.parser import MarkdownParser
|
|
136
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
137
|
+
from opencode_memory.storage.vectors import VectorStorage
|
|
138
|
+
|
|
139
|
+
config = Config.load()
|
|
140
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
141
|
+
embeddings = EmbeddingEngine()
|
|
142
|
+
vectors = VectorStorage(config.vectors_path, embeddings.dimension)
|
|
143
|
+
parser_instance = MarkdownParser()
|
|
144
|
+
|
|
145
|
+
path = Path(args.path).expanduser()
|
|
146
|
+
if path.is_file():
|
|
147
|
+
files = [path]
|
|
148
|
+
elif path.is_dir():
|
|
149
|
+
if args.recursive:
|
|
150
|
+
files = list(path.rglob("*.md"))
|
|
151
|
+
else:
|
|
152
|
+
files = list(path.glob("*.md"))
|
|
153
|
+
else:
|
|
154
|
+
print(f"Path not found: {path}", file=sys.stderr)
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
|
|
157
|
+
from opencode_memory.models import Entity
|
|
158
|
+
|
|
159
|
+
for file_path in files:
|
|
160
|
+
if "node_modules" in file_path.parts:
|
|
161
|
+
continue
|
|
162
|
+
print(f"Ingesting: {file_path}")
|
|
163
|
+
doc = parser_instance.parse_file(file_path)
|
|
164
|
+
|
|
165
|
+
entity_ids = []
|
|
166
|
+
for entity_type, ref in doc.entities:
|
|
167
|
+
entity = Entity(type=entity_type, ref=ref)
|
|
168
|
+
entity_id = sqlite.upsert_entity(entity)
|
|
169
|
+
entity_ids.append(entity_id)
|
|
170
|
+
|
|
171
|
+
for memory in doc.memories:
|
|
172
|
+
memory_id = sqlite.insert_memory(memory, entity_ids)
|
|
173
|
+
embedding = embeddings.embed(memory.embedding_content())
|
|
174
|
+
vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
|
|
175
|
+
print(f" - {len(doc.entities)} entities, {len(doc.memories)} memories")
|
|
176
|
+
|
|
177
|
+
print("Done!")
|
|
178
|
+
elif args.command == "migrate":
|
|
179
|
+
_migrate_opencode_notes(args.path, args.dry_run)
|
|
180
|
+
elif args.command == "stats":
|
|
181
|
+
_show_stats()
|
|
182
|
+
elif args.command == "enrich":
|
|
183
|
+
_enrich_entities(args.limit, args.stale_hours)
|
|
184
|
+
elif args.command == "cleanup":
|
|
185
|
+
_cleanup_memories(
|
|
186
|
+
dry_run=args.dry_run,
|
|
187
|
+
resolved_blockers_days=args.resolved_blockers_days,
|
|
188
|
+
conversations_days=args.conversations_days,
|
|
189
|
+
)
|
|
190
|
+
elif args.command == "backfill-projects":
|
|
191
|
+
_backfill_projects(dry_run=args.dry_run, batch_size=args.batch_size)
|
|
192
|
+
elif args.command == "extract-knowledge":
|
|
193
|
+
_extract_knowledge(
|
|
194
|
+
limit=args.limit,
|
|
195
|
+
since_days=args.since_days,
|
|
196
|
+
dry_run=args.dry_run,
|
|
197
|
+
project=args.project,
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
parser.print_help()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _show_stats() -> None:
|
|
204
|
+
"""Show memory statistics."""
|
|
205
|
+
import json
|
|
206
|
+
import sqlite3
|
|
207
|
+
import urllib.request
|
|
208
|
+
|
|
209
|
+
from opencode_memory.config import Config
|
|
210
|
+
|
|
211
|
+
config = Config.load()
|
|
212
|
+
db_path = config.db_path
|
|
213
|
+
vectors_path = config.vectors_path
|
|
214
|
+
|
|
215
|
+
if not db_path.exists():
|
|
216
|
+
print("No memory database found.")
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
# Try to get live stats from HTTP server (may be slow if MCP sessions are active)
|
|
220
|
+
server_stats = None
|
|
221
|
+
try:
|
|
222
|
+
with urllib.request.urlopen("http://127.0.0.1:9824/stats", timeout=15) as resp:
|
|
223
|
+
server_stats = json.loads(resp.read().decode())
|
|
224
|
+
except Exception:
|
|
225
|
+
pass # Server not running or busy, fall back to direct DB access
|
|
226
|
+
|
|
227
|
+
with sqlite3.connect(db_path) as conn:
|
|
228
|
+
conn.row_factory = sqlite3.Row
|
|
229
|
+
|
|
230
|
+
cursor = conn.execute("SELECT COUNT(*) as count FROM memories WHERE resolved_at IS NULL")
|
|
231
|
+
total_memories = cursor.fetchone()["count"]
|
|
232
|
+
|
|
233
|
+
cursor = conn.execute(
|
|
234
|
+
"SELECT category, COUNT(*) as count FROM memories WHERE resolved_at IS NULL GROUP BY category ORDER BY count DESC"
|
|
235
|
+
)
|
|
236
|
+
categories = cursor.fetchall()
|
|
237
|
+
|
|
238
|
+
cursor = conn.execute("SELECT COUNT(*) as count FROM entities")
|
|
239
|
+
total_entities = cursor.fetchone()["count"]
|
|
240
|
+
|
|
241
|
+
cursor = conn.execute(
|
|
242
|
+
"SELECT type, COUNT(*) as count FROM entities GROUP BY type ORDER BY count DESC"
|
|
243
|
+
)
|
|
244
|
+
entity_types = cursor.fetchall()
|
|
245
|
+
|
|
246
|
+
cursor = conn.execute(
|
|
247
|
+
"SELECT COUNT(DISTINCT source_file) as count FROM memories WHERE source_file LIKE 'opencode:session:%'"
|
|
248
|
+
)
|
|
249
|
+
sessions_ingested = cursor.fetchone()["count"]
|
|
250
|
+
|
|
251
|
+
cursor = conn.execute(
|
|
252
|
+
"SELECT COUNT(*) as count FROM memories WHERE resolved_at IS NOT NULL"
|
|
253
|
+
)
|
|
254
|
+
resolved_count = cursor.fetchone()["count"]
|
|
255
|
+
|
|
256
|
+
# Get link stats
|
|
257
|
+
link_count = 0
|
|
258
|
+
link_types = []
|
|
259
|
+
try:
|
|
260
|
+
cursor = conn.execute("SELECT COUNT(*) as count FROM memory_links")
|
|
261
|
+
link_count = cursor.fetchone()["count"]
|
|
262
|
+
cursor = conn.execute(
|
|
263
|
+
"SELECT link_type, COUNT(*) as count FROM memory_links GROUP BY link_type"
|
|
264
|
+
)
|
|
265
|
+
link_types = cursor.fetchall()
|
|
266
|
+
except Exception:
|
|
267
|
+
pass # Table may not exist in older databases
|
|
268
|
+
|
|
269
|
+
opencode_db = Path("~/.local/share/opencode/opencode.db").expanduser()
|
|
270
|
+
total_sessions = 0
|
|
271
|
+
if opencode_db.exists():
|
|
272
|
+
with sqlite3.connect(f"file:{opencode_db}?mode=ro", uri=True) as conn:
|
|
273
|
+
cursor = conn.execute("SELECT COUNT(*) FROM session")
|
|
274
|
+
total_sessions = cursor.fetchone()[0]
|
|
275
|
+
|
|
276
|
+
db_size = db_path.stat().st_size / (1024 * 1024)
|
|
277
|
+
vectors_size = (
|
|
278
|
+
sum(f.stat().st_size for f in vectors_path.rglob("*") if f.is_file()) / (1024 * 1024)
|
|
279
|
+
if vectors_path.exists()
|
|
280
|
+
else 0
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
print("=" * 60)
|
|
284
|
+
print("OpenCode Memory Statistics")
|
|
285
|
+
print("=" * 60)
|
|
286
|
+
print()
|
|
287
|
+
|
|
288
|
+
# Server status
|
|
289
|
+
if server_stats:
|
|
290
|
+
daemon = server_stats.get("daemon", {})
|
|
291
|
+
print(f"Server: running (daemon: {'active' if daemon.get('running') else 'stopped'})")
|
|
292
|
+
else:
|
|
293
|
+
print("Server: not running (showing database stats only)")
|
|
294
|
+
print()
|
|
295
|
+
|
|
296
|
+
print(f"Total memories: {total_memories}")
|
|
297
|
+
print(f"Total entities: {total_entities}")
|
|
298
|
+
print(f"Resolved blockers: {resolved_count}")
|
|
299
|
+
print(f"Memory links: {link_count}")
|
|
300
|
+
print()
|
|
301
|
+
|
|
302
|
+
print("Memories by category:")
|
|
303
|
+
for row in categories:
|
|
304
|
+
print(f" {row['category']:15} {row['count']:>6}")
|
|
305
|
+
print()
|
|
306
|
+
|
|
307
|
+
print("Entities by type:")
|
|
308
|
+
for row in entity_types:
|
|
309
|
+
print(f" {row['type']:15} {row['count']:>6}")
|
|
310
|
+
print()
|
|
311
|
+
|
|
312
|
+
if link_types:
|
|
313
|
+
print("Links by type:")
|
|
314
|
+
for row in link_types:
|
|
315
|
+
print(f" {row['link_type']:15} {row['count']:>6}")
|
|
316
|
+
print()
|
|
317
|
+
|
|
318
|
+
print(f"Sessions ingested: {sessions_ingested} / {total_sessions}")
|
|
319
|
+
if total_sessions > 0:
|
|
320
|
+
pct = (sessions_ingested / total_sessions) * 100
|
|
321
|
+
print(f"Ingestion progress: {pct:.1f}%")
|
|
322
|
+
print()
|
|
323
|
+
|
|
324
|
+
# Storage
|
|
325
|
+
print("Storage:")
|
|
326
|
+
print(f" Database: {db_size:>8.2f} MB")
|
|
327
|
+
print(f" Vector store: {vectors_size:>8.2f} MB")
|
|
328
|
+
print(f" Total: {db_size + vectors_size:>8.2f} MB")
|
|
329
|
+
|
|
330
|
+
# Cache stats from server
|
|
331
|
+
if server_stats:
|
|
332
|
+
print()
|
|
333
|
+
cache = server_stats.get("cache", {})
|
|
334
|
+
if cache:
|
|
335
|
+
print("Cache:")
|
|
336
|
+
print(f" Size: {cache.get('size', 0)} / {cache.get('max_size', 0)}")
|
|
337
|
+
print(f" Hit rate: {cache.get('hit_rate', 0) * 100:.1f}%")
|
|
338
|
+
print(f" Hits: {cache.get('hits', 0)}")
|
|
339
|
+
print(f" Misses: {cache.get('misses', 0)}")
|
|
340
|
+
|
|
341
|
+
eq = server_stats.get("embedding_queue", {})
|
|
342
|
+
if eq:
|
|
343
|
+
print()
|
|
344
|
+
print("Embedding queue:")
|
|
345
|
+
print(f" Status: {eq.get('status', 'unknown')}")
|
|
346
|
+
print(f" Pending: {eq.get('pending', 0)}")
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _enrich_entities(limit: int, stale_hours: int) -> None:
|
|
350
|
+
"""Enrich stale entities with GitLab metadata."""
|
|
351
|
+
import asyncio
|
|
352
|
+
import os
|
|
353
|
+
|
|
354
|
+
from opencode_memory.config import Config
|
|
355
|
+
from opencode_memory.enrichment.gitlab import GitLabEnricher
|
|
356
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
357
|
+
|
|
358
|
+
if not os.environ.get("GITLAB_TOKEN"):
|
|
359
|
+
print("Error: GITLAB_TOKEN environment variable not set", file=sys.stderr)
|
|
360
|
+
sys.exit(1)
|
|
361
|
+
|
|
362
|
+
config = Config.load()
|
|
363
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
364
|
+
enricher = GitLabEnricher()
|
|
365
|
+
|
|
366
|
+
stale_entities = sqlite.get_stale_entities(max_age_hours=stale_hours, limit=limit)
|
|
367
|
+
|
|
368
|
+
if not stale_entities:
|
|
369
|
+
print("No stale entities to enrich.")
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
print(f"Found {len(stale_entities)} stale entities to enrich...")
|
|
373
|
+
|
|
374
|
+
async def do_enrich():
|
|
375
|
+
enriched_count = 0
|
|
376
|
+
for entity in stale_entities:
|
|
377
|
+
try:
|
|
378
|
+
enriched = await enricher.enrich_entity(entity)
|
|
379
|
+
if enriched.title or enriched.metadata:
|
|
380
|
+
sqlite.upsert_entity(enriched)
|
|
381
|
+
enriched_count += 1
|
|
382
|
+
print(
|
|
383
|
+
f" {entity.type.value:6} {entity.ref:12} -> {enriched.title or '(no title)'}"
|
|
384
|
+
)
|
|
385
|
+
except Exception as e:
|
|
386
|
+
print(f" {entity.type.value:6} {entity.ref:12} -> Error: {e}")
|
|
387
|
+
|
|
388
|
+
await enricher.close()
|
|
389
|
+
return enriched_count
|
|
390
|
+
|
|
391
|
+
enriched = asyncio.run(do_enrich())
|
|
392
|
+
print(f"\nEnriched {enriched} of {len(stale_entities)} entities.")
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _cleanup_memories(dry_run: bool, resolved_blockers_days: int, conversations_days: int) -> None:
|
|
396
|
+
"""Archive old/expired memories.
|
|
397
|
+
|
|
398
|
+
This is a conservative cleanup that:
|
|
399
|
+
- Archives (not deletes) memories for audit/search purposes
|
|
400
|
+
- Only targets: expired memories, old resolved blockers, very old conversations
|
|
401
|
+
- Does NOT touch: decisions, facts, procedures, active blockers
|
|
402
|
+
"""
|
|
403
|
+
from opencode_memory.config import Config
|
|
404
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
405
|
+
|
|
406
|
+
config = Config.load()
|
|
407
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
408
|
+
|
|
409
|
+
stats = sqlite.get_cleanup_stats()
|
|
410
|
+
|
|
411
|
+
print("=" * 50)
|
|
412
|
+
print("Memory Cleanup" + (" (DRY RUN)" if dry_run else ""))
|
|
413
|
+
print("=" * 50)
|
|
414
|
+
print()
|
|
415
|
+
print("Cleanup candidates:")
|
|
416
|
+
print(f" Expired memories: {stats['expired']}")
|
|
417
|
+
print(f" Resolved blockers: {stats['resolved_blockers']}")
|
|
418
|
+
print(f" Old conversations (90d+): {stats['old_conversations']}")
|
|
419
|
+
print(f" Already archived: {stats['archived_total']}")
|
|
420
|
+
print()
|
|
421
|
+
|
|
422
|
+
if dry_run:
|
|
423
|
+
print("Would archive:")
|
|
424
|
+
print(f" - All {stats['expired']} expired memories")
|
|
425
|
+
print(f" - Resolved blockers older than {resolved_blockers_days} days")
|
|
426
|
+
print(f" - Conversation summaries older than {conversations_days} days")
|
|
427
|
+
print()
|
|
428
|
+
print("Run without --dry-run to proceed.")
|
|
429
|
+
return
|
|
430
|
+
|
|
431
|
+
archived_expired = sqlite.archive_expired_memories()
|
|
432
|
+
print(f"Archived {archived_expired} expired memories")
|
|
433
|
+
|
|
434
|
+
archived_blockers = sqlite.archive_old_resolved_blockers(days_old=resolved_blockers_days)
|
|
435
|
+
print(f"Archived {archived_blockers} old resolved blockers")
|
|
436
|
+
|
|
437
|
+
archived_conversations = sqlite.archive_old_conversations(days_old=conversations_days)
|
|
438
|
+
print(f"Archived {archived_conversations} old conversations")
|
|
439
|
+
|
|
440
|
+
total = archived_expired + archived_blockers + archived_conversations
|
|
441
|
+
print()
|
|
442
|
+
print(f"Total archived: {total}")
|
|
443
|
+
print()
|
|
444
|
+
print("Note: Archived memories remain searchable via memory_search with include_archived=true")
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _migrate_opencode_notes(opencode_path: str, dry_run: bool) -> None:
|
|
448
|
+
"""Migrate existing .opencode/ notes into memory.
|
|
449
|
+
|
|
450
|
+
Scans for markdown files in the .opencode directory structure and ingests
|
|
451
|
+
them into the memory system with progress reporting.
|
|
452
|
+
"""
|
|
453
|
+
from pathlib import Path
|
|
454
|
+
|
|
455
|
+
from opencode_memory.config import Config
|
|
456
|
+
from opencode_memory.ingestion.embeddings import EmbeddingEngine
|
|
457
|
+
from opencode_memory.ingestion.parser import MarkdownParser
|
|
458
|
+
from opencode_memory.models import Entity
|
|
459
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
460
|
+
from opencode_memory.storage.vectors import VectorStorage
|
|
461
|
+
|
|
462
|
+
path = Path(opencode_path).expanduser()
|
|
463
|
+
if not path.exists():
|
|
464
|
+
print(f"Error: Directory not found: {path}", file=sys.stderr)
|
|
465
|
+
sys.exit(1)
|
|
466
|
+
|
|
467
|
+
md_files = [
|
|
468
|
+
f for f in path.rglob("*.md") if "node_modules" not in f.parts and "archive" not in f.parts
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
if not md_files:
|
|
472
|
+
print(f"No markdown files found in {path}")
|
|
473
|
+
return
|
|
474
|
+
|
|
475
|
+
print("=" * 50)
|
|
476
|
+
print("OpenCode Notes Migration" + (" (DRY RUN)" if dry_run else ""))
|
|
477
|
+
print("=" * 50)
|
|
478
|
+
print()
|
|
479
|
+
print(f"Source: {path}")
|
|
480
|
+
print(f"Files found: {len(md_files)}")
|
|
481
|
+
print()
|
|
482
|
+
|
|
483
|
+
if dry_run:
|
|
484
|
+
print("Files to migrate:")
|
|
485
|
+
for f in sorted(md_files):
|
|
486
|
+
rel = f.relative_to(path)
|
|
487
|
+
print(f" {rel}")
|
|
488
|
+
print()
|
|
489
|
+
print("Run without --dry-run to proceed.")
|
|
490
|
+
return
|
|
491
|
+
|
|
492
|
+
config = Config.load()
|
|
493
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
494
|
+
embeddings = EmbeddingEngine()
|
|
495
|
+
vectors = VectorStorage(config.vectors_path, embeddings.dimension)
|
|
496
|
+
parser_instance = MarkdownParser()
|
|
497
|
+
|
|
498
|
+
total_entities = 0
|
|
499
|
+
total_memories = 0
|
|
500
|
+
|
|
501
|
+
for i, file_path in enumerate(sorted(md_files), 1):
|
|
502
|
+
rel = file_path.relative_to(path)
|
|
503
|
+
print(f"[{i}/{len(md_files)}] {rel}")
|
|
504
|
+
|
|
505
|
+
try:
|
|
506
|
+
doc = parser_instance.parse_file(file_path)
|
|
507
|
+
|
|
508
|
+
entity_ids = []
|
|
509
|
+
for entity_type, ref in doc.entities:
|
|
510
|
+
entity = Entity(type=entity_type, ref=ref)
|
|
511
|
+
entity_id = sqlite.upsert_entity(entity)
|
|
512
|
+
entity_ids.append(entity_id)
|
|
513
|
+
total_entities += len(doc.entities)
|
|
514
|
+
|
|
515
|
+
for memory in doc.memories:
|
|
516
|
+
memory_id = sqlite.insert_memory(memory, entity_ids)
|
|
517
|
+
embedding = embeddings.embed(memory.embedding_content())
|
|
518
|
+
vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
|
|
519
|
+
total_memories += len(doc.memories)
|
|
520
|
+
|
|
521
|
+
print(f" {len(doc.entities)} entities, {len(doc.memories)} memories")
|
|
522
|
+
except Exception as e:
|
|
523
|
+
print(f" Error: {e}")
|
|
524
|
+
|
|
525
|
+
print()
|
|
526
|
+
print("=" * 50)
|
|
527
|
+
print("Migration complete!")
|
|
528
|
+
print(f" Files processed: {len(md_files)}")
|
|
529
|
+
print(f" Entities found: {total_entities}")
|
|
530
|
+
print(f" Memories created: {total_memories}")
|
|
531
|
+
print()
|
|
532
|
+
print("Tip: Run 'opencode-memory stats' to see updated statistics")
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _backfill_projects(dry_run: bool = False, batch_size: int = 100) -> None:
|
|
536
|
+
"""Backfill project field for existing memories and re-embed with project prefix."""
|
|
537
|
+
import sqlite3
|
|
538
|
+
|
|
539
|
+
from opencode_memory.config import Config
|
|
540
|
+
from opencode_memory.ingestion.embeddings import EmbeddingEngine
|
|
541
|
+
from opencode_memory.project import detect_project_from_path, detect_project_from_git
|
|
542
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
543
|
+
from opencode_memory.storage.vectors import VectorStorage
|
|
544
|
+
|
|
545
|
+
config = Config.load()
|
|
546
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
547
|
+
|
|
548
|
+
if dry_run:
|
|
549
|
+
print("DRY RUN - no changes will be made")
|
|
550
|
+
print()
|
|
551
|
+
|
|
552
|
+
conn = sqlite3.connect(config.db_path)
|
|
553
|
+
conn.row_factory = sqlite3.Row
|
|
554
|
+
|
|
555
|
+
cursor = conn.execute(
|
|
556
|
+
"SELECT id, source_file, project, content FROM memories WHERE project IS NULL"
|
|
557
|
+
)
|
|
558
|
+
memories_to_update = cursor.fetchall()
|
|
559
|
+
|
|
560
|
+
print(f"Found {len(memories_to_update)} memories without project")
|
|
561
|
+
|
|
562
|
+
if not memories_to_update:
|
|
563
|
+
print("Nothing to backfill!")
|
|
564
|
+
return
|
|
565
|
+
|
|
566
|
+
project_counts: dict[str | None, int] = {}
|
|
567
|
+
updates: list[tuple[str | None, int]] = []
|
|
568
|
+
|
|
569
|
+
for row in memories_to_update:
|
|
570
|
+
source_file = row["source_file"]
|
|
571
|
+
memory_id = row["id"]
|
|
572
|
+
|
|
573
|
+
project = None
|
|
574
|
+
if source_file:
|
|
575
|
+
if source_file.startswith("opencode:session:"):
|
|
576
|
+
session_id = source_file.replace("opencode:session:", "")
|
|
577
|
+
opencode_db = Path.home() / ".local/share/opencode/opencode.db"
|
|
578
|
+
if opencode_db.exists():
|
|
579
|
+
try:
|
|
580
|
+
oc_conn = sqlite3.connect(f"file:{opencode_db}?mode=ro", uri=True)
|
|
581
|
+
oc_cursor = oc_conn.execute(
|
|
582
|
+
"SELECT directory FROM session WHERE id = ?", (session_id,)
|
|
583
|
+
)
|
|
584
|
+
oc_row = oc_cursor.fetchone()
|
|
585
|
+
if oc_row and oc_row[0]:
|
|
586
|
+
project = detect_project_from_path(
|
|
587
|
+
oc_row[0]
|
|
588
|
+
) or detect_project_from_git(oc_row[0])
|
|
589
|
+
oc_conn.close()
|
|
590
|
+
except Exception:
|
|
591
|
+
pass
|
|
592
|
+
else:
|
|
593
|
+
project = detect_project_from_path(source_file)
|
|
594
|
+
|
|
595
|
+
updates.append((project, memory_id))
|
|
596
|
+
project_counts[project] = project_counts.get(project, 0) + 1
|
|
597
|
+
|
|
598
|
+
print()
|
|
599
|
+
print("Project distribution:")
|
|
600
|
+
for project, count in sorted(project_counts.items(), key=lambda x: -x[1]):
|
|
601
|
+
print(f" {project or '(unknown)'}: {count}")
|
|
602
|
+
|
|
603
|
+
if dry_run:
|
|
604
|
+
print()
|
|
605
|
+
print("DRY RUN complete - no changes made")
|
|
606
|
+
return
|
|
607
|
+
|
|
608
|
+
print()
|
|
609
|
+
print("Updating database...")
|
|
610
|
+
|
|
611
|
+
updated = 0
|
|
612
|
+
for project, memory_id in updates:
|
|
613
|
+
if project:
|
|
614
|
+
conn.execute(
|
|
615
|
+
"UPDATE memories SET project = ? WHERE id = ?",
|
|
616
|
+
(project, memory_id),
|
|
617
|
+
)
|
|
618
|
+
updated += 1
|
|
619
|
+
conn.commit()
|
|
620
|
+
|
|
621
|
+
print(f"Updated {updated} memories with project")
|
|
622
|
+
|
|
623
|
+
print()
|
|
624
|
+
print("Re-embedding memories with project prefix...")
|
|
625
|
+
|
|
626
|
+
embeddings = EmbeddingEngine()
|
|
627
|
+
vectors = VectorStorage(config.vectors_path, embeddings.dimension)
|
|
628
|
+
|
|
629
|
+
cursor = conn.execute("SELECT id, content, project FROM memories WHERE project IS NOT NULL")
|
|
630
|
+
all_with_project = cursor.fetchall()
|
|
631
|
+
|
|
632
|
+
reembedded = 0
|
|
633
|
+
for i in range(0, len(all_with_project), batch_size):
|
|
634
|
+
batch = all_with_project[i : i + batch_size]
|
|
635
|
+
for row in batch:
|
|
636
|
+
memory_id = row["id"]
|
|
637
|
+
content = row["content"]
|
|
638
|
+
project = row["project"]
|
|
639
|
+
|
|
640
|
+
prefixed_content = f"[{project}] {content}"
|
|
641
|
+
embedding = embeddings.embed(prefixed_content)
|
|
642
|
+
vectors.add(f"mem_{memory_id}", memory_id, prefixed_content, embedding)
|
|
643
|
+
reembedded += 1
|
|
644
|
+
|
|
645
|
+
print(f" Re-embedded {min(i + batch_size, len(all_with_project))}/{len(all_with_project)}")
|
|
646
|
+
|
|
647
|
+
print()
|
|
648
|
+
print(f"Backfill complete! Re-embedded {reembedded} memories with project prefix")
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _extract_knowledge(
|
|
652
|
+
limit: int,
|
|
653
|
+
since_days: int,
|
|
654
|
+
dry_run: bool,
|
|
655
|
+
project: str | None,
|
|
656
|
+
) -> None:
|
|
657
|
+
"""Extract procedures, directives, and tips from conversations using LLM."""
|
|
658
|
+
import asyncio
|
|
659
|
+
|
|
660
|
+
from opencode_memory.config import Config
|
|
661
|
+
from opencode_memory.extraction import (
|
|
662
|
+
EXTRACTION_PROMPT,
|
|
663
|
+
_find_opencode,
|
|
664
|
+
call_opencode,
|
|
665
|
+
get_unprocessed_conversations,
|
|
666
|
+
)
|
|
667
|
+
from opencode_memory.ingestion.embeddings import EmbeddingEngine
|
|
668
|
+
from opencode_memory.models import LinkType, Memory, MemoryCategory, MemoryLink
|
|
669
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
670
|
+
from opencode_memory.storage.vectors import VectorStorage
|
|
671
|
+
|
|
672
|
+
if not _find_opencode():
|
|
673
|
+
print("Error: opencode not found in PATH or ~/.opencode/bin/")
|
|
674
|
+
print("Install from https://opencode.ai")
|
|
675
|
+
sys.exit(1)
|
|
676
|
+
|
|
677
|
+
config = Config.load()
|
|
678
|
+
sqlite = SQLiteStorage(config.db_path)
|
|
679
|
+
embeddings = EmbeddingEngine()
|
|
680
|
+
vectors = VectorStorage(config.vectors_path, embeddings.dimension)
|
|
681
|
+
|
|
682
|
+
conversations = get_unprocessed_conversations(sqlite, since_days=since_days, limit=limit)
|
|
683
|
+
|
|
684
|
+
if project:
|
|
685
|
+
conversations = [c for c in conversations if c.get("project") == project]
|
|
686
|
+
|
|
687
|
+
if not conversations:
|
|
688
|
+
print("No unprocessed conversations found matching criteria.")
|
|
689
|
+
return
|
|
690
|
+
|
|
691
|
+
print(f"Found {len(conversations)} conversations to process\n")
|
|
692
|
+
|
|
693
|
+
total_extracted = 0
|
|
694
|
+
|
|
695
|
+
async def process_one(conv: dict) -> int:
|
|
696
|
+
import json
|
|
697
|
+
|
|
698
|
+
conv_id = conv["id"]
|
|
699
|
+
content = conv["content"]
|
|
700
|
+
conv_project = conv.get("project")
|
|
701
|
+
|
|
702
|
+
if len(content) > 15000:
|
|
703
|
+
content = content[:15000] + "\n\n[... truncated ...]"
|
|
704
|
+
|
|
705
|
+
full_prompt = EXTRACTION_PROMPT + content
|
|
706
|
+
|
|
707
|
+
try:
|
|
708
|
+
response = await call_opencode(full_prompt)
|
|
709
|
+
except Exception as e:
|
|
710
|
+
print(f" Error calling opencode: {e}")
|
|
711
|
+
return 0
|
|
712
|
+
|
|
713
|
+
extracted = []
|
|
714
|
+
for line in response.split("\n"):
|
|
715
|
+
line = line.strip()
|
|
716
|
+
if not line or not line.startswith("{"):
|
|
717
|
+
continue
|
|
718
|
+
try:
|
|
719
|
+
item = json.loads(line)
|
|
720
|
+
if "category" in item and "content" in item:
|
|
721
|
+
extracted.append(item)
|
|
722
|
+
except json.JSONDecodeError:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
if not extracted:
|
|
726
|
+
return 0
|
|
727
|
+
|
|
728
|
+
count = 0
|
|
729
|
+
category_map = {
|
|
730
|
+
"procedure": MemoryCategory.PROCEDURE,
|
|
731
|
+
"directive": MemoryCategory.DIRECTIVE,
|
|
732
|
+
"decision": MemoryCategory.DECISION,
|
|
733
|
+
"fact": MemoryCategory.FACT,
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
for item in extracted:
|
|
737
|
+
category_str = item.get("category", "fact")
|
|
738
|
+
category = category_map.get(category_str, MemoryCategory.FACT)
|
|
739
|
+
|
|
740
|
+
print(f" [{category_str}] {item.get('what', 'No summary')[:50]}")
|
|
741
|
+
|
|
742
|
+
if dry_run:
|
|
743
|
+
count += 1
|
|
744
|
+
continue
|
|
745
|
+
|
|
746
|
+
memory = Memory(
|
|
747
|
+
category=category,
|
|
748
|
+
content=item.get("content", ""),
|
|
749
|
+
what=item.get("what"),
|
|
750
|
+
why=item.get("why"),
|
|
751
|
+
learned=item.get("learned"),
|
|
752
|
+
project=conv_project,
|
|
753
|
+
source_file=conv.get("source_file"),
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
memory_id = sqlite.insert_memory(memory)
|
|
757
|
+
|
|
758
|
+
embedding = embeddings.embed(memory.embedding_content())
|
|
759
|
+
vectors.add(f"mem_{memory_id}", memory_id, memory.embedding_content(), embedding)
|
|
760
|
+
|
|
761
|
+
link = MemoryLink(
|
|
762
|
+
source_memory_id=conv_id,
|
|
763
|
+
target_memory_id=memory_id,
|
|
764
|
+
link_type=LinkType.EXTENDS,
|
|
765
|
+
strength=0.9,
|
|
766
|
+
reason="Knowledge extracted from conversation via LLM",
|
|
767
|
+
)
|
|
768
|
+
sqlite.insert_link(link)
|
|
769
|
+
|
|
770
|
+
count += 1
|
|
771
|
+
|
|
772
|
+
return count
|
|
773
|
+
|
|
774
|
+
for conv in conversations:
|
|
775
|
+
title = conv.get("what") or "Untitled"
|
|
776
|
+
print(f"Processing: {title[:60]}...")
|
|
777
|
+
|
|
778
|
+
extracted_count = asyncio.run(process_one(conv))
|
|
779
|
+
if extracted_count > 0:
|
|
780
|
+
print(f" Extracted {extracted_count} items")
|
|
781
|
+
total_extracted += extracted_count
|
|
782
|
+
else:
|
|
783
|
+
print(" No knowledge extracted")
|
|
784
|
+
print()
|
|
785
|
+
|
|
786
|
+
print()
|
|
787
|
+
if dry_run:
|
|
788
|
+
print(f"DRY RUN complete - would have extracted ~{total_extracted} items")
|
|
789
|
+
else:
|
|
790
|
+
print(f"Extraction complete! Stored {total_extracted} new memories")
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
if __name__ == "__main__":
|
|
794
|
+
main()
|