brainlayer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. brainlayer/__init__.py +3 -0
  2. brainlayer/cli/__init__.py +1545 -0
  3. brainlayer/cli/wizard.py +132 -0
  4. brainlayer/cli_new.py +151 -0
  5. brainlayer/client.py +164 -0
  6. brainlayer/clustering.py +736 -0
  7. brainlayer/daemon.py +1105 -0
  8. brainlayer/dashboard/README.md +129 -0
  9. brainlayer/dashboard/__init__.py +5 -0
  10. brainlayer/dashboard/app.py +151 -0
  11. brainlayer/dashboard/search.py +229 -0
  12. brainlayer/dashboard/views.py +230 -0
  13. brainlayer/embeddings.py +131 -0
  14. brainlayer/engine.py +550 -0
  15. brainlayer/index_new.py +87 -0
  16. brainlayer/mcp/__init__.py +1558 -0
  17. brainlayer/migrate.py +205 -0
  18. brainlayer/paths.py +43 -0
  19. brainlayer/pipeline/__init__.py +47 -0
  20. brainlayer/pipeline/analyze_communication.py +508 -0
  21. brainlayer/pipeline/brain_graph.py +567 -0
  22. brainlayer/pipeline/chat_tags.py +63 -0
  23. brainlayer/pipeline/chunk.py +422 -0
  24. brainlayer/pipeline/classify.py +472 -0
  25. brainlayer/pipeline/cluster_sampling.py +73 -0
  26. brainlayer/pipeline/enrichment.py +810 -0
  27. brainlayer/pipeline/extract.py +66 -0
  28. brainlayer/pipeline/extract_claude_desktop.py +149 -0
  29. brainlayer/pipeline/extract_corrections.py +231 -0
  30. brainlayer/pipeline/extract_markdown.py +195 -0
  31. brainlayer/pipeline/extract_whatsapp.py +227 -0
  32. brainlayer/pipeline/git_overlay.py +301 -0
  33. brainlayer/pipeline/longitudinal_analyzer.py +568 -0
  34. brainlayer/pipeline/obsidian_export.py +455 -0
  35. brainlayer/pipeline/operation_grouping.py +486 -0
  36. brainlayer/pipeline/plan_linking.py +313 -0
  37. brainlayer/pipeline/sanitize.py +549 -0
  38. brainlayer/pipeline/semantic_style.py +574 -0
  39. brainlayer/pipeline/session_enrichment.py +472 -0
  40. brainlayer/pipeline/style_embed.py +67 -0
  41. brainlayer/pipeline/style_index.py +139 -0
  42. brainlayer/pipeline/temporal_chains.py +203 -0
  43. brainlayer/pipeline/time_batcher.py +248 -0
  44. brainlayer/pipeline/unified_timeline.py +569 -0
  45. brainlayer/storage.py +66 -0
  46. brainlayer/store.py +155 -0
  47. brainlayer/taxonomy.json +80 -0
  48. brainlayer/vector_store.py +1891 -0
  49. brainlayer-1.0.0.dist-info/METADATA +313 -0
  50. brainlayer-1.0.0.dist-info/RECORD +53 -0
  51. brainlayer-1.0.0.dist-info/WHEEL +4 -0
  52. brainlayer-1.0.0.dist-info/entry_points.txt +4 -0
  53. brainlayer-1.0.0.dist-info/licenses/LICENSE +190 -0
brainlayer/engine.py ADDED
@@ -0,0 +1,550 @@
1
+ """Think/Recall Engine — Intelligence layer for BrainLayer.
2
+
3
+ Turns BrainLayer from "search your conversations" into "AI that remembers everything."
4
+
5
+ Three capabilities:
6
+ - think(context) — given current task, retrieve relevant past decisions/patterns
7
+ - recall(file_path|topic) — proactive retrieval based on what you're working on
8
+ - sessions(project, days) — browse sessions by date/project
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timedelta
17
+ from typing import Any
18
+
19
+ from .vector_store import VectorStore
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Intent categories for grouping think results
24
+ DECISION_INTENTS = {"deciding", "designing"}
25
+ DEBUG_INTENTS = {"debugging"}
26
+ IMPLEMENT_INTENTS = {"implementing", "configuring"}
27
+ REVIEW_INTENTS = {"reviewing", "discussing"}
28
+
29
+
30
+ @dataclass
31
+ class ThinkResult:
32
+ """Structured result from think()."""
33
+
34
+ decisions: list[dict[str, Any]] = field(default_factory=list)
35
+ patterns: list[dict[str, Any]] = field(default_factory=list)
36
+ bugs: list[dict[str, Any]] = field(default_factory=list)
37
+ context: list[dict[str, Any]] = field(default_factory=list)
38
+ query: str = ""
39
+ total: int = 0
40
+
41
+ def format(self) -> str:
42
+ """Format as markdown for MCP response."""
43
+ if self.total == 0:
44
+ return "No relevant memories found."
45
+
46
+ parts = [f"## Relevant Memories for: {self.query}\n"]
47
+
48
+ if self.decisions:
49
+ parts.append("### Decisions & Design")
50
+ for item in self.decisions:
51
+ parts.append(_format_memory_item(item))
52
+
53
+ if self.patterns:
54
+ parts.append("\n### Patterns & Implementations")
55
+ for item in self.patterns:
56
+ parts.append(_format_memory_item(item))
57
+
58
+ if self.bugs:
59
+ parts.append("\n### Related Bugs & Fixes")
60
+ for item in self.bugs:
61
+ parts.append(_format_memory_item(item))
62
+
63
+ if self.context:
64
+ parts.append("\n### Related Context")
65
+ for item in self.context:
66
+ parts.append(_format_memory_item(item))
67
+
68
+ parts.append(f"\n*{self.total} memories retrieved*")
69
+ return "\n".join(parts)
70
+
71
+
72
+ @dataclass
73
+ class RecallResult:
74
+ """Structured result from recall()."""
75
+
76
+ file_history: list[dict[str, Any]] = field(default_factory=list)
77
+ related_chunks: list[dict[str, Any]] = field(default_factory=list)
78
+ session_summaries: list[dict[str, Any]] = field(default_factory=list)
79
+ target: str = ""
80
+
81
+ def format(self) -> str:
82
+ """Format as markdown for MCP response."""
83
+ if not self.file_history and not self.related_chunks:
84
+ return f"No recall data found for '{self.target}'."
85
+
86
+ parts = [f"## Recall: {self.target}\n"]
87
+
88
+ if self.file_history:
89
+ parts.append("### File History")
90
+ for item in self.file_history:
91
+ ts = (item.get("timestamp") or "?")[:19]
92
+ action = item.get("action", "?")
93
+ session = (item.get("session_id") or "?")[:8]
94
+ parts.append(f"- **{action}** at {ts} (session: {session})")
95
+
96
+ if self.session_summaries:
97
+ parts.append("\n### Sessions That Touched This")
98
+ for s in self.session_summaries:
99
+ sid = (s.get("session_id") or "?")[:8]
100
+ branch = s.get("branch") or "?"
101
+ plan = s.get("plan_name") or ""
102
+ ts = (s.get("started_at") or "?")[:19]
103
+ line = f"- {sid} | {branch}"
104
+ if plan:
105
+ line += f" | plan: {plan}"
106
+ line += f" | {ts}"
107
+ parts.append(line)
108
+
109
+ if self.related_chunks:
110
+ parts.append("\n### Related Knowledge")
111
+ for item in self.related_chunks:
112
+ parts.append(_format_memory_item(item))
113
+
114
+ return "\n".join(parts)
115
+
116
+
117
+ @dataclass
118
+ class SessionInfo:
119
+ """A single session entry."""
120
+
121
+ session_id: str = ""
122
+ project: str = ""
123
+ branch: str = ""
124
+ started_at: str = ""
125
+ ended_at: str = ""
126
+ plan_name: str = ""
127
+ plan_phase: str = ""
128
+ files_changed: list[str] = field(default_factory=list)
129
+
130
+
131
+ def _format_memory_item(item: dict[str, Any]) -> str:
132
+ """Format a single memory item as compact markdown."""
133
+ summary = item.get("summary") or ""
134
+ content = item.get("content", "")
135
+ date = (item.get("created_at") or "")[:10]
136
+ project = item.get("project", "")
137
+ importance = item.get("importance")
138
+
139
+ # Use summary if available, otherwise truncate content
140
+ display = summary if summary else (content[:200] + "..." if len(content) > 200 else content)
141
+
142
+ line = "- "
143
+ if date:
144
+ line += f"[{date}] "
145
+ if project:
146
+ line += f"({project}) "
147
+ if importance is not None and importance >= 7:
148
+ line += "**"
149
+ line += display
150
+ if importance is not None and importance >= 7:
151
+ line += "**"
152
+ return line
153
+
154
+
155
+ def categorize_by_intent(items: list[dict[str, Any]]) -> ThinkResult:
156
+ """Categorize search results by their intent metadata."""
157
+ result = ThinkResult()
158
+
159
+ for item in items:
160
+ intent = item.get("intent", "")
161
+
162
+ if intent in DECISION_INTENTS:
163
+ result.decisions.append(item)
164
+ elif intent in DEBUG_INTENTS:
165
+ result.bugs.append(item)
166
+ elif intent in IMPLEMENT_INTENTS:
167
+ result.patterns.append(item)
168
+ else:
169
+ result.context.append(item)
170
+
171
+ result.total = len(items)
172
+ return result
173
+
174
+
175
+ def think(
176
+ context: str,
177
+ store: VectorStore,
178
+ embed_fn: Any,
179
+ project: str | None = None,
180
+ max_results: int = 10,
181
+ ) -> ThinkResult:
182
+ """Given current task context, retrieve relevant past knowledge.
183
+
184
+ Args:
185
+ context: Free-text description of current task/context
186
+ store: VectorStore instance
187
+ embed_fn: Function that takes text and returns embedding vector
188
+ project: Optional project filter
189
+ max_results: Maximum results to return
190
+
191
+ Returns:
192
+ ThinkResult with categorized memories
193
+ """
194
+ if not context or not context.strip():
195
+ return ThinkResult(query=context or "")
196
+
197
+ query = context.strip()
198
+
199
+ # Generate embedding
200
+ query_embedding = embed_fn(query)
201
+
202
+ # Search with importance bias — prefer high-value memories
203
+ results = store.hybrid_search(
204
+ query_embedding=query_embedding,
205
+ query_text=query,
206
+ n_results=max_results,
207
+ project_filter=project,
208
+ importance_min=3.0, # Skip low-importance noise
209
+ )
210
+
211
+ if not results["documents"][0]:
212
+ return ThinkResult(query=query)
213
+
214
+ # Build items with metadata
215
+ items = []
216
+ for doc, meta in zip(results["documents"][0], results["metadatas"][0]):
217
+ items.append(
218
+ {
219
+ "content": doc,
220
+ "summary": meta.get("summary"),
221
+ "intent": meta.get("intent", ""),
222
+ "importance": meta.get("importance"),
223
+ "project": meta.get("project", ""),
224
+ "created_at": meta.get("created_at", ""),
225
+ "content_type": meta.get("content_type", ""),
226
+ "tags": meta.get("tags", []),
227
+ }
228
+ )
229
+
230
+ result = categorize_by_intent(items)
231
+ result.query = query
232
+ return result
233
+
234
+
235
+ def recall(
236
+ store: VectorStore,
237
+ embed_fn: Any | None = None,
238
+ file_path: str | None = None,
239
+ topic: str | None = None,
240
+ project: str | None = None,
241
+ max_results: int = 10,
242
+ ) -> RecallResult:
243
+ """Proactive smart retrieval based on file or topic.
244
+
245
+ Args:
246
+ store: VectorStore instance
247
+ embed_fn: Function that takes text and returns embedding vector (needed for topic recall)
248
+ file_path: File path to recall context for
249
+ topic: Topic to recall context for
250
+ project: Optional project filter
251
+ max_results: Maximum results to return
252
+
253
+ Returns:
254
+ RecallResult with file history, sessions, and related knowledge
255
+ """
256
+ target = file_path or topic or ""
257
+ result = RecallResult(target=target)
258
+
259
+ if file_path:
260
+ # Get file interaction timeline
261
+ timeline = store.get_file_timeline(file_path, project=project, limit=max_results * 2)
262
+ result.file_history = timeline
263
+
264
+ # Get sessions that touched this file
265
+ session_ids = list({t.get("session_id") for t in timeline if t.get("session_id")})
266
+ for sid in session_ids[:5]:
267
+ ctx = store.get_session_context(sid)
268
+ if ctx:
269
+ result.session_summaries.append(ctx)
270
+
271
+ # Search for related knowledge about this file
272
+ if embed_fn:
273
+ # Use filename as search query
274
+ fname = file_path.rsplit("/", 1)[-1] if "/" in file_path else file_path
275
+ query_embedding = embed_fn(f"working on {fname}")
276
+ search_results = store.hybrid_search(
277
+ query_embedding=query_embedding,
278
+ query_text=fname,
279
+ n_results=max_results,
280
+ project_filter=project,
281
+ )
282
+ for doc, meta in zip(search_results["documents"][0], search_results["metadatas"][0]):
283
+ result.related_chunks.append(
284
+ {
285
+ "content": doc,
286
+ "summary": meta.get("summary"),
287
+ "intent": meta.get("intent", ""),
288
+ "importance": meta.get("importance"),
289
+ "project": meta.get("project", ""),
290
+ "created_at": meta.get("created_at", ""),
291
+ }
292
+ )
293
+
294
+ elif topic and embed_fn:
295
+ # Topic-based recall — search for related discussions
296
+ query_embedding = embed_fn(topic)
297
+ search_results = store.hybrid_search(
298
+ query_embedding=query_embedding,
299
+ query_text=topic,
300
+ n_results=max_results,
301
+ project_filter=project,
302
+ )
303
+ for doc, meta in zip(search_results["documents"][0], search_results["metadatas"][0]):
304
+ result.related_chunks.append(
305
+ {
306
+ "content": doc,
307
+ "summary": meta.get("summary"),
308
+ "intent": meta.get("intent", ""),
309
+ "importance": meta.get("importance"),
310
+ "project": meta.get("project", ""),
311
+ "created_at": meta.get("created_at", ""),
312
+ }
313
+ )
314
+
315
+ return result
316
+
317
+
318
+ def sessions(
319
+ store: VectorStore,
320
+ project: str | None = None,
321
+ days: int = 7,
322
+ limit: int = 20,
323
+ ) -> list[SessionInfo]:
324
+ """List recent sessions with metadata.
325
+
326
+ Args:
327
+ store: VectorStore instance
328
+ project: Optional project filter
329
+ days: How many days back to look
330
+ limit: Maximum sessions to return
331
+
332
+ Returns:
333
+ List of SessionInfo objects
334
+ """
335
+ cursor = store.conn.cursor()
336
+
337
+ date_from = (datetime.now() - timedelta(days=days)).isoformat()
338
+
339
+ where_clauses = ["started_at >= ?"]
340
+ params: list = [date_from]
341
+
342
+ if project:
343
+ where_clauses.append("project = ?")
344
+ params.append(project)
345
+
346
+ params.append(limit)
347
+
348
+ query = f"""
349
+ SELECT session_id, project, branch, started_at, ended_at,
350
+ plan_name, plan_phase, files_changed
351
+ FROM session_context
352
+ WHERE {" AND ".join(where_clauses)}
353
+ ORDER BY started_at DESC
354
+ LIMIT ?
355
+ """
356
+
357
+ rows = list(cursor.execute(query, params))
358
+
359
+ results = []
360
+ for row in rows:
361
+ files = []
362
+ if row[7]:
363
+ try:
364
+ files = json.loads(row[7])
365
+ except (json.JSONDecodeError, TypeError):
366
+ pass
367
+
368
+ results.append(
369
+ SessionInfo(
370
+ session_id=row[0] or "",
371
+ project=row[1] or "",
372
+ branch=row[2] or "",
373
+ started_at=row[3] or "",
374
+ ended_at=row[4] or "",
375
+ plan_name=row[5] or "",
376
+ plan_phase=row[6] or "",
377
+ files_changed=files if isinstance(files, list) else [],
378
+ )
379
+ )
380
+
381
+ return results
382
+
383
+
384
+ @dataclass
385
+ class CurrentContext:
386
+ """Current working context — what the user is working on right now."""
387
+
388
+ recent_sessions: list[SessionInfo] = field(default_factory=list)
389
+ active_projects: list[str] = field(default_factory=list)
390
+ recent_files: list[str] = field(default_factory=list)
391
+ active_branches: list[str] = field(default_factory=list)
392
+ active_plan: str = ""
393
+
394
+ def format(self) -> str:
395
+ """Format as concise markdown — designed for voice/quick context."""
396
+ if not self.recent_sessions and not self.active_projects and not self.recent_files:
397
+ return "No recent session context available."
398
+
399
+ parts = ["## Current Context\n"]
400
+
401
+ if self.active_projects:
402
+ parts.append(f"**Projects:** {', '.join(self.active_projects)}")
403
+ if self.active_branches:
404
+ parts.append(f"**Branches:** {', '.join(self.active_branches)}")
405
+ if self.active_plan:
406
+ parts.append(f"**Plan:** {self.active_plan}")
407
+
408
+ if self.recent_files:
409
+ parts.append(f"\n**Recent files ({len(self.recent_files)}):**")
410
+ for f in self.recent_files[:10]:
411
+ # Show just the filename, not full path
412
+ name = f.rsplit("/", 1)[-1] if "/" in f else f
413
+ parts.append(f"- {name}")
414
+
415
+ if self.recent_sessions:
416
+ latest = self.recent_sessions[0]
417
+ parts.append(f"\n**Latest session:** {latest.session_id[:8]}")
418
+ if latest.started_at:
419
+ parts.append(f"**Started:** {latest.started_at[:19]}")
420
+ if latest.project:
421
+ parts.append(f"**Project:** {latest.project}")
422
+ if latest.branch:
423
+ parts.append(f"**Branch:** {latest.branch}")
424
+
425
+ return "\n".join(parts)
426
+
427
+
428
+ def current_context(
429
+ store: VectorStore,
430
+ hours: int = 24,
431
+ ) -> CurrentContext:
432
+ """Get current working context — what the user is doing right now.
433
+
434
+ Designed for voice assistants and quick context injection.
435
+ Lightweight — no embedding model needed.
436
+
437
+ Uses two data sources:
438
+ 1. session_context table (git overlay data — may be sparse)
439
+ 2. chunks table (always populated from indexing)
440
+
441
+ Args:
442
+ store: VectorStore instance
443
+ hours: How many hours back to look (default: 24)
444
+
445
+ Returns:
446
+ CurrentContext with recent sessions, files, projects, branches
447
+ """
448
+ result = CurrentContext()
449
+ cursor = store.conn.cursor()
450
+ date_from = (datetime.now() - timedelta(hours=hours)).isoformat()
451
+
452
+ # 1. Try session_context first (richest data)
453
+ # Convert hours to days properly — ceil division, minimum 1
454
+ days = max(1, -(-hours // 24)) # ceiling division trick
455
+ recent = sessions(store, days=days, limit=10)
456
+ result.recent_sessions = recent
457
+
458
+ # 2. Also query chunks table directly for recent projects
459
+ # This catches sessions that haven't been through git_overlay yet
460
+ chunk_projects = list(
461
+ cursor.execute(
462
+ """
463
+ SELECT project
464
+ FROM chunks
465
+ WHERE created_at >= ? AND project IS NOT NULL
466
+ GROUP BY project
467
+ ORDER BY MAX(created_at) DESC
468
+ LIMIT 10
469
+ """,
470
+ (date_from,),
471
+ )
472
+ )
473
+
474
+ # Extract active projects and branches from session_context
475
+ projects = []
476
+ branches = []
477
+ plans = []
478
+ for s in recent:
479
+ if s.project and s.project not in projects:
480
+ projects.append(s.project)
481
+ if s.branch and s.branch not in branches:
482
+ branches.append(s.branch)
483
+ if s.plan_name and s.plan_name not in plans:
484
+ plans.append(s.plan_name)
485
+
486
+ # Merge in projects from chunks table (may have projects not in session_context)
487
+ for row in chunk_projects:
488
+ if row[0] and row[0] not in projects:
489
+ projects.append(row[0])
490
+
491
+ result.active_projects = projects[:5]
492
+ result.active_branches = branches[:5]
493
+ if plans:
494
+ result.active_plan = plans[0] # Most recent plan
495
+
496
+ # 3. Get recent files from file_interactions
497
+ rows = list(
498
+ cursor.execute(
499
+ """
500
+ SELECT DISTINCT file_path
501
+ FROM file_interactions
502
+ WHERE timestamp >= ?
503
+ ORDER BY timestamp DESC
504
+ LIMIT 20
505
+ """,
506
+ (date_from,),
507
+ )
508
+ )
509
+ result.recent_files = [r[0] for r in rows if r[0]]
510
+
511
+ # 4. If no files from interactions, try chunks metadata for file references
512
+ if not result.recent_files:
513
+ file_rows = list(
514
+ cursor.execute(
515
+ """
516
+ SELECT DISTINCT source_file
517
+ FROM chunks
518
+ WHERE created_at >= ? AND source_file IS NOT NULL
519
+ ORDER BY created_at DESC
520
+ LIMIT 20
521
+ """,
522
+ (date_from,),
523
+ )
524
+ )
525
+ result.recent_files = [r[0] for r in file_rows if r[0]]
526
+
527
+ return result
528
+
529
+
530
+ def format_sessions(session_list: list[SessionInfo], days: int = 7) -> str:
531
+ """Format sessions list as markdown."""
532
+ if not session_list:
533
+ return f"No sessions found in the last {days} days."
534
+
535
+ parts = [f"## Recent Sessions (last {days} days)\n"]
536
+
537
+ for s in session_list:
538
+ ts = s.started_at[:19] if s.started_at else "?"
539
+ line = f"- **{s.session_id[:8]}** | {s.project or '?'} | {s.branch or '?'}"
540
+ if s.plan_name:
541
+ line += f" | plan: {s.plan_name}"
542
+ if s.plan_phase:
543
+ line += f"/{s.plan_phase}"
544
+ line += f" | {ts}"
545
+ if s.files_changed:
546
+ line += f" | {len(s.files_changed)} files"
547
+ parts.append(line)
548
+
549
+ parts.append(f"\n*{len(session_list)} sessions*")
550
+ return "\n".join(parts)
@@ -0,0 +1,87 @@
1
+ """New indexing pipeline using sqlite-vec and sentence-transformers."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Callable, List, Optional
6
+
7
+ from .embeddings import embed_chunks
8
+ from .pipeline.chunk import Chunk
9
+ from .vector_store import VectorStore
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ from .paths import DEFAULT_DB_PATH
14
+
15
+
16
+ def index_chunks_to_sqlite(
17
+ chunks: List[Chunk],
18
+ source_file: str,
19
+ project: Optional[str] = None,
20
+ db_path: Path = DEFAULT_DB_PATH,
21
+ on_progress: Optional[Callable[[int, int], None]] = None,
22
+ ) -> int:
23
+ """Index chunks to sqlite-vec database."""
24
+ if not chunks:
25
+ return 0
26
+
27
+ # Generate embeddings
28
+ embedded_chunks = embed_chunks(chunks, on_progress=on_progress)
29
+
30
+ if not embedded_chunks:
31
+ return 0
32
+
33
+ # Try to get timestamp from source file (first JSONL message)
34
+ created_at = None
35
+ try:
36
+ import json as _json
37
+
38
+ with open(source_file) as _f:
39
+ for _line in _f:
40
+ _line = _line.strip()
41
+ if not _line:
42
+ continue
43
+ _data = _json.loads(_line)
44
+ if "timestamp" in _data:
45
+ created_at = _data["timestamp"]
46
+ break
47
+ except Exception:
48
+ pass
49
+ if not created_at:
50
+ from datetime import datetime, timezone
51
+
52
+ created_at = datetime.now(timezone.utc).isoformat()
53
+
54
+ # Prepare data for vector store
55
+ chunk_data = []
56
+ embeddings = []
57
+
58
+ for i, ec in enumerate(embedded_chunks):
59
+ chunk = ec.chunk
60
+
61
+ chunk_id = f"{source_file}:{i}"
62
+
63
+ chunk_data.append(
64
+ {
65
+ "id": chunk_id,
66
+ "content": chunk.content,
67
+ "metadata": chunk.metadata,
68
+ "source_file": source_file,
69
+ "project": project,
70
+ "content_type": chunk.content_type.value,
71
+ "value_type": chunk.value.value,
72
+ "char_count": chunk.char_count,
73
+ "created_at": created_at,
74
+ }
75
+ )
76
+
77
+ embeddings.append(ec.embedding)
78
+
79
+ # Store in database
80
+ with VectorStore(db_path) as store:
81
+ return store.upsert_chunks(chunk_data, embeddings)
82
+
83
+
84
+ def get_stats(db_path: Path = DEFAULT_DB_PATH) -> dict:
85
+ """Get database statistics."""
86
+ with VectorStore(db_path) as store:
87
+ return store.get_stats()