PyPI - brainlayer - Versions diffs - 1.0.0__py3-none-any.whl - Mend

brainlayer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

brainlayer/__init__.py +3 -0
brainlayer/cli/__init__.py +1545 -0
brainlayer/cli/wizard.py +132 -0
brainlayer/cli_new.py +151 -0
brainlayer/client.py +164 -0
brainlayer/clustering.py +736 -0
brainlayer/daemon.py +1105 -0
brainlayer/dashboard/README.md +129 -0
brainlayer/dashboard/__init__.py +5 -0
brainlayer/dashboard/app.py +151 -0
brainlayer/dashboard/search.py +229 -0
brainlayer/dashboard/views.py +230 -0
brainlayer/embeddings.py +131 -0
brainlayer/engine.py +550 -0
brainlayer/index_new.py +87 -0
brainlayer/mcp/__init__.py +1558 -0
brainlayer/migrate.py +205 -0
brainlayer/paths.py +43 -0
brainlayer/pipeline/__init__.py +47 -0
brainlayer/pipeline/analyze_communication.py +508 -0
brainlayer/pipeline/brain_graph.py +567 -0
brainlayer/pipeline/chat_tags.py +63 -0
brainlayer/pipeline/chunk.py +422 -0
brainlayer/pipeline/classify.py +472 -0
brainlayer/pipeline/cluster_sampling.py +73 -0
brainlayer/pipeline/enrichment.py +810 -0
brainlayer/pipeline/extract.py +66 -0
brainlayer/pipeline/extract_claude_desktop.py +149 -0
brainlayer/pipeline/extract_corrections.py +231 -0
brainlayer/pipeline/extract_markdown.py +195 -0
brainlayer/pipeline/extract_whatsapp.py +227 -0
brainlayer/pipeline/git_overlay.py +301 -0
brainlayer/pipeline/longitudinal_analyzer.py +568 -0
brainlayer/pipeline/obsidian_export.py +455 -0
brainlayer/pipeline/operation_grouping.py +486 -0
brainlayer/pipeline/plan_linking.py +313 -0
brainlayer/pipeline/sanitize.py +549 -0
brainlayer/pipeline/semantic_style.py +574 -0
brainlayer/pipeline/session_enrichment.py +472 -0
brainlayer/pipeline/style_embed.py +67 -0
brainlayer/pipeline/style_index.py +139 -0
brainlayer/pipeline/temporal_chains.py +203 -0
brainlayer/pipeline/time_batcher.py +248 -0
brainlayer/pipeline/unified_timeline.py +569 -0
brainlayer/storage.py +66 -0
brainlayer/store.py +155 -0
brainlayer/taxonomy.json +80 -0
brainlayer/vector_store.py +1891 -0
brainlayer-1.0.0.dist-info/METADATA +313 -0
brainlayer-1.0.0.dist-info/RECORD +53 -0
brainlayer-1.0.0.dist-info/WHEEL +4 -0
brainlayer-1.0.0.dist-info/entry_points.txt +4 -0
brainlayer-1.0.0.dist-info/licenses/LICENSE +190 -0

brainlayer/dashboard/views.py ADDED Viewed

@@ -0,0 +1,230 @@
+"""Dashboard views for Home and Memory interfaces."""
+import logging
+from typing import Any, Dict, List, Optional
+from rich import box
+from rich.align import Align
+from rich.columns import Columns
+from rich.console import Group
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+from .search import HybridSearchEngine
+logger = logging.getLogger(__name__)
+class HomeView:
+    """Home dashboard view showing system statistics."""
+    def __init__(self, stats: Dict[str, Any]):
+        self.stats = stats
+    def render(self) -> Panel:
+        """Render the home view with statistics."""
+        # Create statistics table
+        stats_table = Table(show_header=False, box=box.SIMPLE)
+        stats_table.add_column("Metric", style="bold")
+        stats_table.add_column("Value", style="cyan")
+        total_chunks = self.stats.get("total_chunks", 0)
+        projects = self.stats.get("projects", [])
+        content_types = self.stats.get("content_types", [])
+        stats_table.add_row("Total Chunks", f"{total_chunks:,}")
+        stats_table.add_row("Projects", str(len(projects)))
+        stats_table.add_row("Content Types", str(len(content_types)))
+        # Create projects list
+        projects_text = Text()
+        if projects:
+            for i, project in enumerate(projects[:5]):  # Show top 5
+                if i > 0:
+                    projects_text.append(" • ")
+                projects_text.append(project, style="green")
+            if len(projects) > 5:
+                projects_text.append(f" • +{len(projects) - 5} more", style="dim")
+        else:
+            projects_text.append("No projects indexed", style="dim")
+        # Create content types list
+        types_text = Text()
+        if content_types:
+            for i, content_type in enumerate(content_types):
+                if i > 0:
+                    types_text.append(" • ")
+                types_text.append(content_type, style="yellow")
+        else:
+            types_text.append("No content types", style="dim")
+        # Combine into columns
+        left_panel = Panel(stats_table, title="Statistics", box=box.ROUNDED)
+        right_content = Text.assemble("Projects:\n", projects_text, "\n\n", "Content Types:\n", types_text)
+        right_panel = Panel(right_content, title="Collections", box=box.ROUNDED)
+        columns = Columns([left_panel, right_panel], equal=True)
+        # Status message
+        if total_chunks == 0:
+            status_msg = Text("No data indexed. Run 'brainlayer index' to get started.", style="yellow")
+        else:
+            status_msg = Text(
+                f"Ready to search {total_chunks:,} chunks across {len(projects)} projects",
+                style="green",
+            )
+        status_panel = Panel(Align.center(status_msg), box=box.ROUNDED, style="dim")
+        # Combine all elements using Group (Text.assemble only works with text)
+        main_content = Group(columns, Text(""), status_panel)
+        return Panel(main_content, title="Home", box=box.ROUNDED)
+class MemoryView:
+    """Memory view with search interface and filtering."""
+    def __init__(self, search_engine: HybridSearchEngine, vector_store, stats: Dict[str, Any]):
+        self.search_engine = search_engine
+        self.vector_store = vector_store  # sqlite-vec VectorStore (or None)
+        self.stats = stats
+        self.current_query = ""
+        self.current_filter = None
+        self.search_results = []
+    def render(self) -> Panel:
+        """Render the memory view with search interface."""
+        # Search interface
+        search_panel = self._render_search_interface()
+        # Filters
+        filters_panel = self._render_filters()
+        # Results
+        results_panel = self._render_results()
+        # Combine into layout using Group (Text.assemble only works with text)
+        top_row = Columns([search_panel, filters_panel], equal=True)
+        main_content = Group(top_row, Text(""), results_panel)
+        return Panel(main_content, title="Memory Search", box=box.ROUNDED)
+    def _render_search_interface(self) -> Panel:
+        """Render search input interface."""
+        content = Text.assemble(
+            "Search Query:\n",
+            Text("Enter search terms to find relevant chunks", style="dim"),
+            "\n\n",
+            "Search Type: ",
+            Text("Hybrid (BM25 + Semantic)", style="green"),
+            "\n",
+            "Status: ",
+            Text("Ready", style="cyan"),
+        )
+        return Panel(content, title="Search", box=box.ROUNDED)
+    def _render_filters(self) -> Panel:
+        """Render collection filters."""
+        projects = self.stats.get("projects", [])
+        content_types = self.stats.get("content_types", [])
+        content = Text("Available Filters:\n\n")
+        # Projects filter
+        content.append("Projects:\n", style="bold")
+        if projects:
+            for project in projects[:3]:  # Show top 3
+                content.append(f"• {project}\n", style="green")
+            if len(projects) > 3:
+                content.append(f"• +{len(projects) - 3} more\n", style="dim")
+        else:
+            content.append("• No projects\n", style="dim")
+        content.append("\n")
+        # Content types filter
+        content.append("Content Types:\n", style="bold")
+        if content_types:
+            for ctype in content_types:
+                content.append(f"• {ctype}\n", style="yellow")
+        else:
+            content.append("• No types\n", style="dim")
+        return Panel(content, title="Filters", box=box.ROUNDED)
+    def _render_results(self) -> Panel:
+        """Render search results."""
+        if not self.search_results:
+            content = Align.center(Text("No search performed yet.\nEnter a query to see results.", style="dim italic"))
+        else:
+            # Create results table
+            results_table = Table(show_header=True, box=box.SIMPLE)
+            results_table.add_column("Score", width=8)
+            results_table.add_column("Project", width=15)
+            results_table.add_column("Type", width=12)
+            results_table.add_column("Content", min_width=40)
+            for i, result in enumerate(self.search_results[:5]):  # Show top 5
+                score = f"{result.get('score', 0):.3f}"
+                project = result.get("project", "unknown")[:14]
+                content_type = result.get("content_type", "unknown")[:11]
+                content_preview = (
+                    result.get("content", "")[:80] + "..."
+                    if len(result.get("content", "")) > 80
+                    else result.get("content", "")
+                )
+                results_table.add_row(score, project, content_type, content_preview)
+            content = results_table
+        return Panel(content, title="Results", box=box.ROUNDED)
+    def search(self, query: str, project_filter: Optional[str] = None) -> List[Dict[str, Any]]:
+        """Perform search and update results."""
+        if not query.strip():
+            self.search_results = []
+            return []
+        try:
+            # Use core hybrid search (FTS5 + semantic via RRF) if available
+            if self.vector_store and hasattr(self.vector_store, "hybrid_search"):
+                query_embedding = self.search_engine.embedding_model.embed_query(query)
+                results = self.vector_store.hybrid_search(
+                    query_embedding=query_embedding,
+                    query_text=query,
+                    n_results=10,
+                    project_filter=project_filter,
+                )
+            else:
+                results = self.search_engine.search(
+                    self.vector_store, query, n_results=10, project_filter=project_filter
+                )
+            # Convert to display format
+            documents = results.get("documents", [[]])[0]
+            metadatas = results.get("metadatas", [[]])[0]
+            distances = results.get("distances", [[]])[0]
+            self.search_results = []
+            for doc, meta, distance in zip(documents, metadatas, distances):
+                self.search_results.append(
+                    {
+                        "content": doc,
+                        "project": meta.get("project", "unknown"),
+                        "content_type": meta.get("content_type", "unknown"),
+                        "score": 1.0 - distance if distance is not None else 1.0,
+                    }
+                )
+            return self.search_results
+        except Exception as e:
+            logger.warning("Search error: %s", e)
+            self.search_results = []
+            return []

brainlayer/embeddings.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Fast embeddings using sentence-transformers with bge-large-en-v1.5."""
+import logging
+from dataclasses import dataclass
+from typing import Callable, List, Optional
+import torch
+from sentence_transformers import SentenceTransformer
+from .pipeline.chunk import Chunk
+logger = logging.getLogger(__name__)
+# Use bge-large-en-v1.5 for high-quality embeddings (1024 dims, 63.5 MTEB score)
+DEFAULT_MODEL = "BAAI/bge-large-en-v1.5"
+EMBEDDING_DIM = 1024  # bge-large dimension
+MAX_EMBEDDING_CHARS = 512  # context length
+BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
+@dataclass
+class EmbeddedChunk:
+    """A chunk with its embedding vector."""
+    chunk: Chunk
+    embedding: List[float]
+class EmbeddingModel:
+    """Sentence-transformers embedding model."""
+    def __init__(self, model_name: str = DEFAULT_MODEL):
+        self.model_name = model_name
+        self._model: Optional[SentenceTransformer] = None
+    def _load_model(self) -> SentenceTransformer:
+        """Load model on first use."""
+        if self._model is None:
+            logger.info(f"Loading embedding model: {self.model_name}")
+            device = "mps" if torch.backends.mps.is_available() else "cpu"
+            self._model = SentenceTransformer(self.model_name, device=device)
+        return self._model
+    def embed_chunks(
+        self,
+        chunks: List[Chunk],
+        batch_size: int = 32,
+        on_progress: Optional[Callable[[int, int], None]] = None,
+    ) -> List[EmbeddedChunk]:
+        """Generate embeddings for chunks."""
+        if not chunks:
+            return []
+        model = self._load_model()
+        results = []
+        total = len(chunks)
+        # Prepare texts with truncation
+        texts = []
+        for chunk in chunks:
+            content = chunk.content
+            if len(content) > MAX_EMBEDDING_CHARS:
+                # Keep first part for context
+                content = content[: MAX_EMBEDDING_CHARS - 50] + "..."
+            texts.append(content)
+        # Generate embeddings in batches
+        for i in range(0, len(texts), batch_size):
+            batch_texts = texts[i : i + batch_size]
+            batch_chunks = chunks[i : i + batch_size]
+            try:
+                embeddings = model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
+                for chunk, embedding in zip(batch_chunks, embeddings):
+                    results.append(EmbeddedChunk(chunk=chunk, embedding=embedding.tolist()))
+                if on_progress:
+                    on_progress(len(results), total)
+            except Exception as e:
+                logger.error(f"Failed to embed batch: {e}")
+                continue
+        return results
+    def embed_query(self, query: str) -> List[float]:
+        """Generate embedding for search query with BGE prefix."""
+        model = self._load_model()
+        # Truncate if too long
+        if len(query) > MAX_EMBEDDING_CHARS:
+            query = query[: MAX_EMBEDDING_CHARS - 3] + "..."
+        # BGE models need query prefix for optimal retrieval
+        prefixed_query = f"{BGE_QUERY_PREFIX}{query}"
+        try:
+            embedding = model.encode([prefixed_query], convert_to_numpy=True)[0]
+            return embedding.tolist()
+        except Exception as e:
+            raise RuntimeError(f"Failed to embed query: {e}") from e
+# Global model instance
+_embedding_model: Optional[EmbeddingModel] = None
+def get_embedding_model(model_name: str = DEFAULT_MODEL) -> EmbeddingModel:
+    """Get global embedding model instance."""
+    global _embedding_model
+    if _embedding_model is None or _embedding_model.model_name != model_name:
+        _embedding_model = EmbeddingModel(model_name)
+    return _embedding_model
+def embed_chunks(
+    chunks: List[Chunk],
+    model_name: str = DEFAULT_MODEL,
+    batch_size: int = 32,
+    on_progress: Optional[Callable[[int, int], None]] = None,
+) -> List[EmbeddedChunk]:
+    """Generate embeddings for chunks using global model."""
+    model = get_embedding_model(model_name)
+    return model.embed_chunks(chunks, batch_size, on_progress)
+def embed_query(query: str, model_name: str = DEFAULT_MODEL) -> List[float]:
+    """Generate embedding for search query using global model."""
+    model = get_embedding_model(model_name)
+    return model.embed_query(query)