brainlayer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. brainlayer/__init__.py +3 -0
  2. brainlayer/cli/__init__.py +1545 -0
  3. brainlayer/cli/wizard.py +132 -0
  4. brainlayer/cli_new.py +151 -0
  5. brainlayer/client.py +164 -0
  6. brainlayer/clustering.py +736 -0
  7. brainlayer/daemon.py +1105 -0
  8. brainlayer/dashboard/README.md +129 -0
  9. brainlayer/dashboard/__init__.py +5 -0
  10. brainlayer/dashboard/app.py +151 -0
  11. brainlayer/dashboard/search.py +229 -0
  12. brainlayer/dashboard/views.py +230 -0
  13. brainlayer/embeddings.py +131 -0
  14. brainlayer/engine.py +550 -0
  15. brainlayer/index_new.py +87 -0
  16. brainlayer/mcp/__init__.py +1558 -0
  17. brainlayer/migrate.py +205 -0
  18. brainlayer/paths.py +43 -0
  19. brainlayer/pipeline/__init__.py +47 -0
  20. brainlayer/pipeline/analyze_communication.py +508 -0
  21. brainlayer/pipeline/brain_graph.py +567 -0
  22. brainlayer/pipeline/chat_tags.py +63 -0
  23. brainlayer/pipeline/chunk.py +422 -0
  24. brainlayer/pipeline/classify.py +472 -0
  25. brainlayer/pipeline/cluster_sampling.py +73 -0
  26. brainlayer/pipeline/enrichment.py +810 -0
  27. brainlayer/pipeline/extract.py +66 -0
  28. brainlayer/pipeline/extract_claude_desktop.py +149 -0
  29. brainlayer/pipeline/extract_corrections.py +231 -0
  30. brainlayer/pipeline/extract_markdown.py +195 -0
  31. brainlayer/pipeline/extract_whatsapp.py +227 -0
  32. brainlayer/pipeline/git_overlay.py +301 -0
  33. brainlayer/pipeline/longitudinal_analyzer.py +568 -0
  34. brainlayer/pipeline/obsidian_export.py +455 -0
  35. brainlayer/pipeline/operation_grouping.py +486 -0
  36. brainlayer/pipeline/plan_linking.py +313 -0
  37. brainlayer/pipeline/sanitize.py +549 -0
  38. brainlayer/pipeline/semantic_style.py +574 -0
  39. brainlayer/pipeline/session_enrichment.py +472 -0
  40. brainlayer/pipeline/style_embed.py +67 -0
  41. brainlayer/pipeline/style_index.py +139 -0
  42. brainlayer/pipeline/temporal_chains.py +203 -0
  43. brainlayer/pipeline/time_batcher.py +248 -0
  44. brainlayer/pipeline/unified_timeline.py +569 -0
  45. brainlayer/storage.py +66 -0
  46. brainlayer/store.py +155 -0
  47. brainlayer/taxonomy.json +80 -0
  48. brainlayer/vector_store.py +1891 -0
  49. brainlayer-1.0.0.dist-info/METADATA +313 -0
  50. brainlayer-1.0.0.dist-info/RECORD +53 -0
  51. brainlayer-1.0.0.dist-info/WHEEL +4 -0
  52. brainlayer-1.0.0.dist-info/entry_points.txt +4 -0
  53. brainlayer-1.0.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,230 @@
1
+ """Dashboard views for Home and Memory interfaces."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from rich import box
7
+ from rich.align import Align
8
+ from rich.columns import Columns
9
+ from rich.console import Group
10
+ from rich.panel import Panel
11
+ from rich.table import Table
12
+ from rich.text import Text
13
+
14
+ from .search import HybridSearchEngine
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class HomeView:
20
+ """Home dashboard view showing system statistics."""
21
+
22
+ def __init__(self, stats: Dict[str, Any]):
23
+ self.stats = stats
24
+
25
+ def render(self) -> Panel:
26
+ """Render the home view with statistics."""
27
+ # Create statistics table
28
+ stats_table = Table(show_header=False, box=box.SIMPLE)
29
+ stats_table.add_column("Metric", style="bold")
30
+ stats_table.add_column("Value", style="cyan")
31
+
32
+ total_chunks = self.stats.get("total_chunks", 0)
33
+ projects = self.stats.get("projects", [])
34
+ content_types = self.stats.get("content_types", [])
35
+
36
+ stats_table.add_row("Total Chunks", f"{total_chunks:,}")
37
+ stats_table.add_row("Projects", str(len(projects)))
38
+ stats_table.add_row("Content Types", str(len(content_types)))
39
+
40
+ # Create projects list
41
+ projects_text = Text()
42
+ if projects:
43
+ for i, project in enumerate(projects[:5]): # Show top 5
44
+ if i > 0:
45
+ projects_text.append(" • ")
46
+ projects_text.append(project, style="green")
47
+ if len(projects) > 5:
48
+ projects_text.append(f" • +{len(projects) - 5} more", style="dim")
49
+ else:
50
+ projects_text.append("No projects indexed", style="dim")
51
+
52
+ # Create content types list
53
+ types_text = Text()
54
+ if content_types:
55
+ for i, content_type in enumerate(content_types):
56
+ if i > 0:
57
+ types_text.append(" • ")
58
+ types_text.append(content_type, style="yellow")
59
+ else:
60
+ types_text.append("No content types", style="dim")
61
+
62
+ # Combine into columns
63
+ left_panel = Panel(stats_table, title="Statistics", box=box.ROUNDED)
64
+
65
+ right_content = Text.assemble("Projects:\n", projects_text, "\n\n", "Content Types:\n", types_text)
66
+ right_panel = Panel(right_content, title="Collections", box=box.ROUNDED)
67
+
68
+ columns = Columns([left_panel, right_panel], equal=True)
69
+
70
+ # Status message
71
+ if total_chunks == 0:
72
+ status_msg = Text("No data indexed. Run 'brainlayer index' to get started.", style="yellow")
73
+ else:
74
+ status_msg = Text(
75
+ f"Ready to search {total_chunks:,} chunks across {len(projects)} projects",
76
+ style="green",
77
+ )
78
+
79
+ status_panel = Panel(Align.center(status_msg), box=box.ROUNDED, style="dim")
80
+
81
+ # Combine all elements using Group (Text.assemble only works with text)
82
+ main_content = Group(columns, Text(""), status_panel)
83
+
84
+ return Panel(main_content, title="Home", box=box.ROUNDED)
85
+
86
+
87
+ class MemoryView:
88
+ """Memory view with search interface and filtering."""
89
+
90
+ def __init__(self, search_engine: HybridSearchEngine, vector_store, stats: Dict[str, Any]):
91
+ self.search_engine = search_engine
92
+ self.vector_store = vector_store # sqlite-vec VectorStore (or None)
93
+ self.stats = stats
94
+ self.current_query = ""
95
+ self.current_filter = None
96
+ self.search_results = []
97
+
98
+ def render(self) -> Panel:
99
+ """Render the memory view with search interface."""
100
+ # Search interface
101
+ search_panel = self._render_search_interface()
102
+
103
+ # Filters
104
+ filters_panel = self._render_filters()
105
+
106
+ # Results
107
+ results_panel = self._render_results()
108
+
109
+ # Combine into layout using Group (Text.assemble only works with text)
110
+ top_row = Columns([search_panel, filters_panel], equal=True)
111
+
112
+ main_content = Group(top_row, Text(""), results_panel)
113
+
114
+ return Panel(main_content, title="Memory Search", box=box.ROUNDED)
115
+
116
+ def _render_search_interface(self) -> Panel:
117
+ """Render search input interface."""
118
+ content = Text.assemble(
119
+ "Search Query:\n",
120
+ Text("Enter search terms to find relevant chunks", style="dim"),
121
+ "\n\n",
122
+ "Search Type: ",
123
+ Text("Hybrid (BM25 + Semantic)", style="green"),
124
+ "\n",
125
+ "Status: ",
126
+ Text("Ready", style="cyan"),
127
+ )
128
+
129
+ return Panel(content, title="Search", box=box.ROUNDED)
130
+
131
+ def _render_filters(self) -> Panel:
132
+ """Render collection filters."""
133
+ projects = self.stats.get("projects", [])
134
+ content_types = self.stats.get("content_types", [])
135
+
136
+ content = Text("Available Filters:\n\n")
137
+
138
+ # Projects filter
139
+ content.append("Projects:\n", style="bold")
140
+ if projects:
141
+ for project in projects[:3]: # Show top 3
142
+ content.append(f"• {project}\n", style="green")
143
+ if len(projects) > 3:
144
+ content.append(f"• +{len(projects) - 3} more\n", style="dim")
145
+ else:
146
+ content.append("• No projects\n", style="dim")
147
+
148
+ content.append("\n")
149
+
150
+ # Content types filter
151
+ content.append("Content Types:\n", style="bold")
152
+ if content_types:
153
+ for ctype in content_types:
154
+ content.append(f"• {ctype}\n", style="yellow")
155
+ else:
156
+ content.append("• No types\n", style="dim")
157
+
158
+ return Panel(content, title="Filters", box=box.ROUNDED)
159
+
160
+ def _render_results(self) -> Panel:
161
+ """Render search results."""
162
+ if not self.search_results:
163
+ content = Align.center(Text("No search performed yet.\nEnter a query to see results.", style="dim italic"))
164
+ else:
165
+ # Create results table
166
+ results_table = Table(show_header=True, box=box.SIMPLE)
167
+ results_table.add_column("Score", width=8)
168
+ results_table.add_column("Project", width=15)
169
+ results_table.add_column("Type", width=12)
170
+ results_table.add_column("Content", min_width=40)
171
+
172
+ for i, result in enumerate(self.search_results[:5]): # Show top 5
173
+ score = f"{result.get('score', 0):.3f}"
174
+ project = result.get("project", "unknown")[:14]
175
+ content_type = result.get("content_type", "unknown")[:11]
176
+ content_preview = (
177
+ result.get("content", "")[:80] + "..."
178
+ if len(result.get("content", "")) > 80
179
+ else result.get("content", "")
180
+ )
181
+
182
+ results_table.add_row(score, project, content_type, content_preview)
183
+
184
+ content = results_table
185
+
186
+ return Panel(content, title="Results", box=box.ROUNDED)
187
+
188
+ def search(self, query: str, project_filter: Optional[str] = None) -> List[Dict[str, Any]]:
189
+ """Perform search and update results."""
190
+ if not query.strip():
191
+ self.search_results = []
192
+ return []
193
+
194
+ try:
195
+ # Use core hybrid search (FTS5 + semantic via RRF) if available
196
+ if self.vector_store and hasattr(self.vector_store, "hybrid_search"):
197
+ query_embedding = self.search_engine.embedding_model.embed_query(query)
198
+ results = self.vector_store.hybrid_search(
199
+ query_embedding=query_embedding,
200
+ query_text=query,
201
+ n_results=10,
202
+ project_filter=project_filter,
203
+ )
204
+ else:
205
+ results = self.search_engine.search(
206
+ self.vector_store, query, n_results=10, project_filter=project_filter
207
+ )
208
+
209
+ # Convert to display format
210
+ documents = results.get("documents", [[]])[0]
211
+ metadatas = results.get("metadatas", [[]])[0]
212
+ distances = results.get("distances", [[]])[0]
213
+
214
+ self.search_results = []
215
+ for doc, meta, distance in zip(documents, metadatas, distances):
216
+ self.search_results.append(
217
+ {
218
+ "content": doc,
219
+ "project": meta.get("project", "unknown"),
220
+ "content_type": meta.get("content_type", "unknown"),
221
+ "score": 1.0 - distance if distance is not None else 1.0,
222
+ }
223
+ )
224
+
225
+ return self.search_results
226
+
227
+ except Exception as e:
228
+ logger.warning("Search error: %s", e)
229
+ self.search_results = []
230
+ return []
@@ -0,0 +1,131 @@
1
+ """Fast embeddings using sentence-transformers with bge-large-en-v1.5."""
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from typing import Callable, List, Optional
6
+
7
+ import torch
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ from .pipeline.chunk import Chunk
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Use bge-large-en-v1.5 for high-quality embeddings (1024 dims, 63.5 MTEB score)
15
+ DEFAULT_MODEL = "BAAI/bge-large-en-v1.5"
16
+ EMBEDDING_DIM = 1024 # bge-large dimension
17
+ MAX_EMBEDDING_CHARS = 512 # context length
18
+ BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
19
+
20
+
21
+ @dataclass
22
+ class EmbeddedChunk:
23
+ """A chunk with its embedding vector."""
24
+
25
+ chunk: Chunk
26
+ embedding: List[float]
27
+
28
+
29
+ class EmbeddingModel:
30
+ """Sentence-transformers embedding model."""
31
+
32
+ def __init__(self, model_name: str = DEFAULT_MODEL):
33
+ self.model_name = model_name
34
+ self._model: Optional[SentenceTransformer] = None
35
+
36
+ def _load_model(self) -> SentenceTransformer:
37
+ """Load model on first use."""
38
+ if self._model is None:
39
+ logger.info(f"Loading embedding model: {self.model_name}")
40
+ device = "mps" if torch.backends.mps.is_available() else "cpu"
41
+ self._model = SentenceTransformer(self.model_name, device=device)
42
+ return self._model
43
+
44
+ def embed_chunks(
45
+ self,
46
+ chunks: List[Chunk],
47
+ batch_size: int = 32,
48
+ on_progress: Optional[Callable[[int, int], None]] = None,
49
+ ) -> List[EmbeddedChunk]:
50
+ """Generate embeddings for chunks."""
51
+ if not chunks:
52
+ return []
53
+
54
+ model = self._load_model()
55
+ results = []
56
+ total = len(chunks)
57
+
58
+ # Prepare texts with truncation
59
+ texts = []
60
+ for chunk in chunks:
61
+ content = chunk.content
62
+ if len(content) > MAX_EMBEDDING_CHARS:
63
+ # Keep first part for context
64
+ content = content[: MAX_EMBEDDING_CHARS - 50] + "..."
65
+ texts.append(content)
66
+
67
+ # Generate embeddings in batches
68
+ for i in range(0, len(texts), batch_size):
69
+ batch_texts = texts[i : i + batch_size]
70
+ batch_chunks = chunks[i : i + batch_size]
71
+
72
+ try:
73
+ embeddings = model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
74
+
75
+ for chunk, embedding in zip(batch_chunks, embeddings):
76
+ results.append(EmbeddedChunk(chunk=chunk, embedding=embedding.tolist()))
77
+
78
+ if on_progress:
79
+ on_progress(len(results), total)
80
+
81
+ except Exception as e:
82
+ logger.error(f"Failed to embed batch: {e}")
83
+ continue
84
+
85
+ return results
86
+
87
+ def embed_query(self, query: str) -> List[float]:
88
+ """Generate embedding for search query with BGE prefix."""
89
+ model = self._load_model()
90
+
91
+ # Truncate if too long
92
+ if len(query) > MAX_EMBEDDING_CHARS:
93
+ query = query[: MAX_EMBEDDING_CHARS - 3] + "..."
94
+
95
+ # BGE models need query prefix for optimal retrieval
96
+ prefixed_query = f"{BGE_QUERY_PREFIX}{query}"
97
+
98
+ try:
99
+ embedding = model.encode([prefixed_query], convert_to_numpy=True)[0]
100
+ return embedding.tolist()
101
+ except Exception as e:
102
+ raise RuntimeError(f"Failed to embed query: {e}") from e
103
+
104
+
105
+ # Global model instance
106
+ _embedding_model: Optional[EmbeddingModel] = None
107
+
108
+
109
+ def get_embedding_model(model_name: str = DEFAULT_MODEL) -> EmbeddingModel:
110
+ """Get global embedding model instance."""
111
+ global _embedding_model
112
+ if _embedding_model is None or _embedding_model.model_name != model_name:
113
+ _embedding_model = EmbeddingModel(model_name)
114
+ return _embedding_model
115
+
116
+
117
+ def embed_chunks(
118
+ chunks: List[Chunk],
119
+ model_name: str = DEFAULT_MODEL,
120
+ batch_size: int = 32,
121
+ on_progress: Optional[Callable[[int, int], None]] = None,
122
+ ) -> List[EmbeddedChunk]:
123
+ """Generate embeddings for chunks using global model."""
124
+ model = get_embedding_model(model_name)
125
+ return model.embed_chunks(chunks, batch_size, on_progress)
126
+
127
+
128
+ def embed_query(query: str, model_name: str = DEFAULT_MODEL) -> List[float]:
129
+ """Generate embedding for search query using global model."""
130
+ model = get_embedding_model(model_name)
131
+ return model.embed_query(query)