oghma 0.0.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oghma/storage.py ADDED
@@ -0,0 +1,753 @@
1
+ import json
2
+ import logging
3
+ import sqlite3
4
+ from contextlib import contextmanager
5
+ from pathlib import Path
6
+ from typing import Any, TypedDict
7
+
8
+ from oghma.config import Config, get_db_path
9
+
10
+ try:
11
+ import sqlite_vec
12
+ except ImportError: # pragma: no cover - optional runtime dependency in tests
13
+ sqlite_vec = None
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class MemoryRecord(TypedDict):
19
+ id: int
20
+ content: str
21
+ category: str
22
+ source_tool: str
23
+ source_file: str
24
+ source_session: str | None
25
+ confidence: float
26
+ created_at: str
27
+ updated_at: str
28
+ status: str
29
+ has_embedding: int
30
+ metadata: dict[str, Any]
31
+
32
+
33
+ class ExtractionStateRecord(TypedDict):
34
+ id: int
35
+ source_path: str
36
+ last_mtime: float
37
+ last_size: int
38
+ last_extracted_at: str
39
+ message_count: int
40
+
41
+
42
+ class ExtractionLogRecord(TypedDict):
43
+ id: int
44
+ source_path: str
45
+ memories_extracted: int
46
+ tokens_used: int
47
+ duration_ms: int
48
+ error: str | None
49
+ created_at: str
50
+
51
+
52
+ class Storage:
53
+ # Hybrid search tuning constants.
54
+ MIN_HYBRID_QUERY_LENGTH = 3
55
+ VECTOR_K_MULTIPLIER = 4
56
+ VECTOR_K_MIN = 25
57
+ RRF_K_DEFAULT = 60
58
+
59
+ def __init__(
60
+ self,
61
+ db_path: str | None = None,
62
+ config: Config | None = None,
63
+ read_only: bool = False,
64
+ ):
65
+ self.db_path = db_path or get_db_path(config)
66
+ self.read_only = read_only
67
+ self._config = config
68
+ self.embedding_dimensions = (
69
+ config.get("embedding", {}).get("dimensions", 1536) if config else 1536
70
+ )
71
+ self._vec_available = sqlite_vec is not None
72
+ self._vector_search_enabled = self._vec_available
73
+
74
+ if self.read_only:
75
+ db_file = Path(self.db_path)
76
+ if not db_file.exists():
77
+ raise FileNotFoundError(f"Database not found: {self.db_path}")
78
+ self._connection_target = f"file:{db_file.resolve()}?mode=ro"
79
+ self._use_uri = True
80
+ else:
81
+ self._connection_target = self.db_path
82
+ self._use_uri = False
83
+ self._init_db()
84
+
85
+ @contextmanager
86
+ def _get_connection(self):
87
+ conn = sqlite3.connect(self._connection_target, uri=self._use_uri)
88
+ conn.row_factory = sqlite3.Row
89
+ self._configure_connection(conn)
90
+ try:
91
+ yield conn
92
+ if not self.read_only:
93
+ conn.commit()
94
+ except Exception:
95
+ if not self.read_only:
96
+ conn.rollback()
97
+ raise
98
+ finally:
99
+ conn.close()
100
+
101
+ def _configure_connection(self, conn: sqlite3.Connection) -> None:
102
+ if not self._vec_available:
103
+ return
104
+ try:
105
+ sqlite_vec.load(conn)
106
+ except Exception:
107
+ self._vector_search_enabled = False
108
+ logger.warning(
109
+ "sqlite-vec extension failed to load; vector search disabled",
110
+ exc_info=True,
111
+ )
112
+
113
+ def _fallback_keyword_search(
114
+ self,
115
+ *,
116
+ query: str,
117
+ category: str | None,
118
+ source_tool: str | None,
119
+ status: str,
120
+ limit: int,
121
+ offset: int,
122
+ reason: str,
123
+ exc_info: bool = False,
124
+ ) -> list[MemoryRecord]:
125
+ log_fn = logger.warning if exc_info else logger.info
126
+ log_fn("Hybrid/vector search fell back to keyword search: %s", reason, exc_info=exc_info)
127
+ return self.search_memories(
128
+ query=query,
129
+ category=category,
130
+ source_tool=source_tool,
131
+ status=status,
132
+ limit=limit,
133
+ offset=offset,
134
+ )
135
+
136
+ def _ensure_column(
137
+ self,
138
+ cursor: sqlite3.Cursor,
139
+ table_name: str,
140
+ column_name: str,
141
+ definition: str,
142
+ ) -> None:
143
+ cursor.execute(f"PRAGMA table_info({table_name})")
144
+ columns = {row[1] for row in cursor.fetchall()}
145
+ if column_name not in columns:
146
+ cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {definition}")
147
+
148
+ def _serialize_embedding(self, embedding: list[float]) -> Any:
149
+ if sqlite_vec and hasattr(sqlite_vec, "serialize_float32"):
150
+ return sqlite_vec.serialize_float32(embedding)
151
+ return json.dumps(embedding)
152
+
153
+ def _init_db(self) -> None:
154
+ Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
155
+
156
+ with self._get_connection() as conn:
157
+ cursor = conn.cursor()
158
+
159
+ cursor.execute("""
160
+ CREATE TABLE IF NOT EXISTS memories (
161
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
162
+ content TEXT NOT NULL,
163
+ category TEXT NOT NULL,
164
+ source_tool TEXT NOT NULL,
165
+ source_file TEXT NOT NULL,
166
+ source_session TEXT,
167
+ confidence REAL DEFAULT 1.0,
168
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
169
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
170
+ status TEXT DEFAULT 'active',
171
+ metadata JSON
172
+ )
173
+ """)
174
+ self._ensure_column(cursor, "memories", "has_embedding", "INTEGER DEFAULT 0")
175
+
176
+ cursor.execute("""
177
+ CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category)
178
+ """)
179
+
180
+ cursor.execute("""
181
+ CREATE INDEX IF NOT EXISTS idx_memories_source_tool ON memories(source_tool)
182
+ """)
183
+
184
+ cursor.execute("""
185
+ CREATE INDEX IF NOT EXISTS idx_memories_created_at ON memories(created_at DESC)
186
+ """)
187
+
188
+ cursor.execute("""
189
+ CREATE INDEX IF NOT EXISTS idx_memories_status ON memories(status)
190
+ """)
191
+
192
+ cursor.execute("""
193
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
194
+ content,
195
+ category,
196
+ source_tool,
197
+ content=memories,
198
+ content_rowid=id
199
+ )
200
+ """)
201
+
202
+ if self._vector_search_enabled:
203
+ cursor.execute(
204
+ f"""
205
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec USING vec0(
206
+ memory_id INTEGER PRIMARY KEY,
207
+ embedding float[{self.embedding_dimensions}]
208
+ )
209
+ """
210
+ )
211
+
212
+ cursor.execute("""
213
+ CREATE TABLE IF NOT EXISTS extraction_state (
214
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
215
+ source_path TEXT UNIQUE NOT NULL,
216
+ last_mtime REAL NOT NULL,
217
+ last_size INTEGER NOT NULL,
218
+ last_extracted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
219
+ message_count INTEGER DEFAULT 0
220
+ )
221
+ """)
222
+
223
+ cursor.execute(
224
+ """
225
+ CREATE INDEX IF NOT EXISTS idx_extraction_state_path
226
+ ON extraction_state(source_path)
227
+ """
228
+ )
229
+
230
+ cursor.execute("""
231
+ CREATE TABLE IF NOT EXISTS extraction_log (
232
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
233
+ source_path TEXT NOT NULL,
234
+ memories_extracted INTEGER DEFAULT 0,
235
+ tokens_used INTEGER DEFAULT 0,
236
+ duration_ms INTEGER DEFAULT 0,
237
+ error TEXT,
238
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
239
+ )
240
+ """)
241
+
242
+ cursor.execute("""
243
+ CREATE INDEX IF NOT EXISTS idx_extraction_log_path ON extraction_log(source_path)
244
+ """)
245
+
246
+ cursor.execute(
247
+ """
248
+ CREATE INDEX IF NOT EXISTS idx_extraction_log_created_at
249
+ ON extraction_log(created_at DESC)
250
+ """
251
+ )
252
+
253
+ cursor.execute("""
254
+ CREATE TRIGGER IF NOT EXISTS memories_fts_insert AFTER INSERT ON memories BEGIN
255
+ INSERT INTO memories_fts(rowid, content, category, source_tool)
256
+ VALUES (NEW.id, NEW.content, NEW.category, NEW.source_tool);
257
+ END
258
+ """)
259
+
260
+ cursor.execute("""
261
+ CREATE TRIGGER IF NOT EXISTS memories_fts_delete AFTER DELETE ON memories BEGIN
262
+ DELETE FROM memories_fts WHERE rowid = OLD.id;
263
+ END
264
+ """)
265
+
266
+ cursor.execute("""
267
+ CREATE TRIGGER IF NOT EXISTS memories_fts_update AFTER UPDATE ON memories BEGIN
268
+ DELETE FROM memories_fts WHERE rowid = OLD.id;
269
+ INSERT INTO memories_fts(rowid, content, category, source_tool)
270
+ VALUES (NEW.id, NEW.content, NEW.category, NEW.source_tool);
271
+ END
272
+ """)
273
+
274
+ def add_memory(
275
+ self,
276
+ content: str,
277
+ category: str,
278
+ source_tool: str,
279
+ source_file: str,
280
+ source_session: str | None = None,
281
+ confidence: float = 1.0,
282
+ metadata: dict[str, Any] | None = None,
283
+ embedding: list[float] | None = None,
284
+ ) -> int:
285
+ with self._get_connection() as conn:
286
+ cursor = conn.cursor()
287
+ metadata_json = json.dumps(metadata) if metadata else None
288
+
289
+ cursor.execute(
290
+ """
291
+ INSERT INTO memories
292
+ (content, category, source_tool, source_file,
293
+ source_session, confidence, metadata, has_embedding)
294
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
295
+ """,
296
+ (
297
+ content,
298
+ category,
299
+ source_tool,
300
+ source_file,
301
+ source_session,
302
+ confidence,
303
+ metadata_json,
304
+ 1 if embedding is not None and self._vector_search_enabled else 0,
305
+ ),
306
+ )
307
+ memory_id = cursor.lastrowid or 0
308
+
309
+ if embedding is not None and self._vector_search_enabled:
310
+ cursor.execute(
311
+ "INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)",
312
+ (memory_id, self._serialize_embedding(embedding)),
313
+ )
314
+
315
+ return memory_id
316
+
317
+ def search_memories(
318
+ self,
319
+ query: str,
320
+ category: str | None = None,
321
+ source_tool: str | None = None,
322
+ status: str = "active",
323
+ limit: int = 10,
324
+ offset: int = 0,
325
+ ) -> list[MemoryRecord]:
326
+ with self._get_connection() as conn:
327
+ cursor = conn.cursor()
328
+
329
+ sql = """
330
+ SELECT m.*
331
+ FROM memories m
332
+ WHERE m.id IN (
333
+ SELECT rowid FROM memories_fts WHERE memories_fts MATCH ?
334
+ )
335
+ AND m.status = ?
336
+ """
337
+ params: list[str | int] = [query, status]
338
+
339
+ if category:
340
+ sql += " AND m.category = ?"
341
+ params.append(category)
342
+
343
+ if source_tool:
344
+ sql += " AND m.source_tool = ?"
345
+ params.append(source_tool)
346
+
347
+ sql += " ORDER BY m.created_at DESC LIMIT ? OFFSET ?"
348
+ params.extend([limit, offset])
349
+
350
+ cursor.execute(sql, params)
351
+ rows = cursor.fetchall()
352
+
353
+ return [self._row_to_memory_record(row) for row in rows]
354
+
355
+ def _row_to_memory_record(self, row: sqlite3.Row) -> MemoryRecord:
356
+ return {
357
+ "id": row["id"],
358
+ "content": row["content"],
359
+ "category": row["category"],
360
+ "source_tool": row["source_tool"],
361
+ "source_file": row["source_file"],
362
+ "source_session": row["source_session"],
363
+ "confidence": row["confidence"],
364
+ "created_at": row["created_at"],
365
+ "updated_at": row["updated_at"],
366
+ "status": row["status"],
367
+ "has_embedding": row["has_embedding"] if "has_embedding" in row.keys() else 0,
368
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
369
+ }
370
+
371
+ def upsert_memory_embedding(self, memory_id: int, embedding: list[float]) -> bool:
372
+ if not self._vector_search_enabled:
373
+ return False
374
+
375
+ with self._get_connection() as conn:
376
+ cursor = conn.cursor()
377
+ cursor.execute("SELECT id FROM memories WHERE id = ?", (memory_id,))
378
+ if cursor.fetchone() is None:
379
+ return False
380
+
381
+ cursor.execute(
382
+ "INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)",
383
+ (memory_id, self._serialize_embedding(embedding)),
384
+ )
385
+ cursor.execute(
386
+ (
387
+ "UPDATE memories SET has_embedding = 1, "
388
+ "updated_at = CURRENT_TIMESTAMP WHERE id = ?"
389
+ ),
390
+ (memory_id,),
391
+ )
392
+ return True
393
+
394
+ def get_memories_without_embeddings(self, limit: int = 100) -> list[MemoryRecord]:
395
+ with self._get_connection() as conn:
396
+ cursor = conn.cursor()
397
+ cursor.execute(
398
+ """
399
+ SELECT * FROM memories
400
+ WHERE status = 'active' AND has_embedding = 0
401
+ ORDER BY id ASC
402
+ LIMIT ?
403
+ """,
404
+ (limit,),
405
+ )
406
+ rows = cursor.fetchall()
407
+ return [self._row_to_memory_record(row) for row in rows]
408
+
409
+ def get_embedding_progress(self) -> tuple[int, int]:
410
+ with self._get_connection() as conn:
411
+ cursor = conn.cursor()
412
+ cursor.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'")
413
+ total_row = cursor.fetchone()
414
+ cursor.execute(
415
+ "SELECT COUNT(*) FROM memories WHERE status = 'active' AND has_embedding = 1"
416
+ )
417
+ done_row = cursor.fetchone()
418
+ total = int(total_row[0]) if total_row else 0
419
+ done = int(done_row[0]) if done_row else 0
420
+ return done, total
421
+
422
+ def search_memories_hybrid(
423
+ self,
424
+ query: str,
425
+ query_embedding: list[float] | None = None,
426
+ category: str | None = None,
427
+ source_tool: str | None = None,
428
+ status: str = "active",
429
+ limit: int = 10,
430
+ offset: int = 0,
431
+ search_mode: str = "hybrid",
432
+ rrf_k: int = RRF_K_DEFAULT,
433
+ ) -> list[MemoryRecord]:
434
+ if search_mode not in {"keyword", "vector", "hybrid"}:
435
+ raise ValueError("search_mode must be one of: keyword, vector, hybrid")
436
+
437
+ if search_mode == "keyword":
438
+ return self.search_memories(
439
+ query=query,
440
+ category=category,
441
+ source_tool=source_tool,
442
+ status=status,
443
+ limit=limit,
444
+ offset=offset,
445
+ )
446
+
447
+ if not self._vector_search_enabled:
448
+ return self._fallback_keyword_search(
449
+ query=query,
450
+ category=category,
451
+ source_tool=source_tool,
452
+ status=status,
453
+ limit=limit,
454
+ offset=offset,
455
+ reason="sqlite-vec unavailable",
456
+ )
457
+
458
+ if len(query.strip()) < self.MIN_HYBRID_QUERY_LENGTH:
459
+ return self._fallback_keyword_search(
460
+ query=query,
461
+ category=category,
462
+ source_tool=source_tool,
463
+ status=status,
464
+ limit=limit,
465
+ offset=offset,
466
+ reason=f"query shorter than {self.MIN_HYBRID_QUERY_LENGTH} chars",
467
+ )
468
+
469
+ if not query_embedding:
470
+ return self._fallback_keyword_search(
471
+ query=query,
472
+ category=category,
473
+ source_tool=source_tool,
474
+ status=status,
475
+ limit=limit,
476
+ offset=offset,
477
+ reason="query embedding missing",
478
+ )
479
+
480
+ with self._get_connection() as conn:
481
+ cursor = conn.cursor()
482
+
483
+ cursor.execute(
484
+ "SELECT COUNT(*) FROM memories WHERE status = ? AND has_embedding = 1",
485
+ (status,),
486
+ )
487
+ row = cursor.fetchone()
488
+ if not row or row[0] == 0:
489
+ return self._fallback_keyword_search(
490
+ query=query,
491
+ category=category,
492
+ source_tool=source_tool,
493
+ status=status,
494
+ limit=limit,
495
+ offset=offset,
496
+ reason="no embedded memories available",
497
+ )
498
+
499
+ filters = ""
500
+ filter_params: list[str] = [status]
501
+ if category:
502
+ filters += " AND m.category = ?"
503
+ filter_params.append(category)
504
+ if source_tool:
505
+ filters += " AND m.source_tool = ?"
506
+ filter_params.append(source_tool)
507
+
508
+ vector_k = max(limit * self.VECTOR_K_MULTIPLIER, self.VECTOR_K_MIN)
509
+ vec_query = self._serialize_embedding(query_embedding)
510
+
511
+ try:
512
+ if search_mode == "vector":
513
+ sql = f"""
514
+ WITH vec AS (
515
+ SELECT m.id AS memory_id
516
+ FROM memories_vec v
517
+ JOIN memories m ON m.id = v.memory_id
518
+ WHERE v.embedding MATCH ? AND k = ?
519
+ AND m.status = ?
520
+ {filters}
521
+ ORDER BY v.distance
522
+ LIMIT ?
523
+ )
524
+ SELECT m.*
525
+ FROM vec
526
+ JOIN memories m ON m.id = vec.memory_id
527
+ ORDER BY m.created_at DESC
528
+ LIMIT ? OFFSET ?
529
+ """
530
+ params: list[Any] = [
531
+ vec_query,
532
+ vector_k,
533
+ *filter_params,
534
+ vector_k,
535
+ limit,
536
+ offset,
537
+ ]
538
+ else:
539
+ sql = f"""
540
+ WITH
541
+ fts AS (
542
+ SELECT
543
+ m.id AS memory_id,
544
+ ROW_NUMBER() OVER (ORDER BY bm25(memories_fts)) AS fts_rank
545
+ FROM memories_fts
546
+ JOIN memories m ON m.id = memories_fts.rowid
547
+ WHERE memories_fts MATCH ?
548
+ AND m.status = ?
549
+ {filters}
550
+ LIMIT ?
551
+ ),
552
+ vec AS (
553
+ SELECT
554
+ m.id AS memory_id,
555
+ ROW_NUMBER() OVER (ORDER BY v.distance) AS vec_rank
556
+ FROM memories_vec v
557
+ JOIN memories m ON m.id = v.memory_id
558
+ WHERE v.embedding MATCH ? AND k = ?
559
+ AND m.status = ?
560
+ {filters}
561
+ LIMIT ?
562
+ ),
563
+ rrf AS (
564
+ SELECT memory_id, (1.0 / (? + fts_rank)) * 0.5 AS score FROM fts
565
+ UNION ALL
566
+ SELECT memory_id, (1.0 / (? + vec_rank)) * 0.5 AS score FROM vec
567
+ ),
568
+ ranked AS (
569
+ SELECT memory_id, SUM(score) AS rrf_score
570
+ FROM rrf
571
+ GROUP BY memory_id
572
+ )
573
+ SELECT m.*
574
+ FROM ranked
575
+ JOIN memories m ON m.id = ranked.memory_id
576
+ ORDER BY ranked.rrf_score DESC, m.created_at DESC
577
+ LIMIT ? OFFSET ?
578
+ """
579
+ params = [
580
+ query,
581
+ *filter_params,
582
+ vector_k,
583
+ vec_query,
584
+ vector_k,
585
+ *filter_params,
586
+ vector_k,
587
+ rrf_k,
588
+ rrf_k,
589
+ limit,
590
+ offset,
591
+ ]
592
+
593
+ cursor.execute(sql, params)
594
+ rows = cursor.fetchall()
595
+ return [self._row_to_memory_record(row) for row in rows]
596
+ except sqlite3.Error:
597
+ return self._fallback_keyword_search(
598
+ query=query,
599
+ category=category,
600
+ source_tool=source_tool,
601
+ status=status,
602
+ limit=limit,
603
+ offset=offset,
604
+ reason="sqlite query error",
605
+ exc_info=True,
606
+ )
607
+
608
+ def get_memory_by_id(self, memory_id: int) -> MemoryRecord | None:
609
+ with self._get_connection() as conn:
610
+ cursor = conn.cursor()
611
+ cursor.execute("SELECT * FROM memories WHERE id = ?", (memory_id,))
612
+ row = cursor.fetchone()
613
+
614
+ if row is None:
615
+ return None
616
+
617
+ return self._row_to_memory_record(row)
618
+
619
+ def update_memory_status(self, memory_id: int, status: str) -> bool:
620
+ with self._get_connection() as conn:
621
+ cursor = conn.cursor()
622
+ cursor.execute(
623
+ "UPDATE memories SET status = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?",
624
+ (status, memory_id),
625
+ )
626
+ return cursor.rowcount > 0
627
+
628
+ def get_extraction_state(self, source_path: str) -> ExtractionStateRecord | None:
629
+ with self._get_connection() as conn:
630
+ cursor = conn.cursor()
631
+ cursor.execute("SELECT * FROM extraction_state WHERE source_path = ?", (source_path,))
632
+ row = cursor.fetchone()
633
+
634
+ if row is None:
635
+ return None
636
+
637
+ return {
638
+ "id": row["id"],
639
+ "source_path": row["source_path"],
640
+ "last_mtime": row["last_mtime"],
641
+ "last_size": row["last_size"],
642
+ "last_extracted_at": row["last_extracted_at"],
643
+ "message_count": row["message_count"],
644
+ }
645
+
646
+ def update_extraction_state(
647
+ self,
648
+ source_path: str,
649
+ last_mtime: float,
650
+ last_size: int,
651
+ message_count: int = 0,
652
+ ) -> None:
653
+ with self._get_connection() as conn:
654
+ cursor = conn.cursor()
655
+ cursor.execute(
656
+ """
657
+ INSERT INTO extraction_state
658
+ (source_path, last_mtime, last_size, message_count, last_extracted_at)
659
+ VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
660
+ ON CONFLICT(source_path) DO UPDATE SET
661
+ last_mtime = excluded.last_mtime,
662
+ last_size = excluded.last_size,
663
+ message_count = excluded.message_count,
664
+ last_extracted_at = CURRENT_TIMESTAMP
665
+ """,
666
+ (source_path, last_mtime, last_size, message_count),
667
+ )
668
+
669
+ def log_extraction(
670
+ self,
671
+ source_path: str,
672
+ memories_extracted: int = 0,
673
+ tokens_used: int = 0,
674
+ duration_ms: int = 0,
675
+ error: str | None = None,
676
+ ) -> int:
677
+ with self._get_connection() as conn:
678
+ cursor = conn.cursor()
679
+ cursor.execute(
680
+ """
681
+ INSERT INTO extraction_log
682
+ (source_path, memories_extracted, tokens_used, duration_ms, error)
683
+ VALUES (?, ?, ?, ?, ?)
684
+ """,
685
+ (source_path, memories_extracted, tokens_used, duration_ms, error),
686
+ )
687
+ return cursor.lastrowid or 0
688
+
689
+ def get_memory_count(self, status: str = "active") -> int:
690
+ with self._get_connection() as conn:
691
+ cursor = conn.cursor()
692
+ cursor.execute("SELECT COUNT(*) FROM memories WHERE status = ?", (status,))
693
+ row = cursor.fetchone()
694
+ return row[0] if row else 0
695
+
696
+ def get_all_memories(
697
+ self, status: str = "active", category: str | None = None
698
+ ) -> list[MemoryRecord]:
699
+ with self._get_connection() as conn:
700
+ cursor = conn.cursor()
701
+
702
+ sql = "SELECT * FROM memories WHERE status = ?"
703
+ params: list[str] = [status]
704
+
705
+ if category:
706
+ sql += " AND category = ?"
707
+ params.append(category)
708
+
709
+ sql += " ORDER BY created_at DESC"
710
+ cursor.execute(sql, params)
711
+ rows = cursor.fetchall()
712
+
713
+ return [self._row_to_memory_record(row) for row in rows]
714
+
715
+ def get_all_extraction_states(self) -> list[ExtractionStateRecord]:
716
+ with self._get_connection() as conn:
717
+ cursor = conn.cursor()
718
+ cursor.execute("SELECT * FROM extraction_state")
719
+ rows = cursor.fetchall()
720
+
721
+ return [
722
+ {
723
+ "id": row["id"],
724
+ "source_path": row["source_path"],
725
+ "last_mtime": row["last_mtime"],
726
+ "last_size": row["last_size"],
727
+ "last_extracted_at": row["last_extracted_at"],
728
+ "message_count": row["message_count"],
729
+ }
730
+ for row in rows
731
+ ]
732
+
733
+ def get_recent_extraction_logs(self, limit: int = 10) -> list[ExtractionLogRecord]:
734
+ with self._get_connection() as conn:
735
+ cursor = conn.cursor()
736
+ cursor.execute(
737
+ "SELECT * FROM extraction_log ORDER BY created_at DESC LIMIT ?",
738
+ (limit,),
739
+ )
740
+ rows = cursor.fetchall()
741
+
742
+ return [
743
+ {
744
+ "id": row["id"],
745
+ "source_path": row["source_path"],
746
+ "memories_extracted": row["memories_extracted"],
747
+ "tokens_used": row["tokens_used"],
748
+ "duration_ms": row["duration_ms"],
749
+ "error": row["error"],
750
+ "created_at": row["created_at"],
751
+ }
752
+ for row in rows
753
+ ]