brainlayer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. brainlayer/__init__.py +3 -0
  2. brainlayer/cli/__init__.py +1545 -0
  3. brainlayer/cli/wizard.py +132 -0
  4. brainlayer/cli_new.py +151 -0
  5. brainlayer/client.py +164 -0
  6. brainlayer/clustering.py +736 -0
  7. brainlayer/daemon.py +1105 -0
  8. brainlayer/dashboard/README.md +129 -0
  9. brainlayer/dashboard/__init__.py +5 -0
  10. brainlayer/dashboard/app.py +151 -0
  11. brainlayer/dashboard/search.py +229 -0
  12. brainlayer/dashboard/views.py +230 -0
  13. brainlayer/embeddings.py +131 -0
  14. brainlayer/engine.py +550 -0
  15. brainlayer/index_new.py +87 -0
  16. brainlayer/mcp/__init__.py +1558 -0
  17. brainlayer/migrate.py +205 -0
  18. brainlayer/paths.py +43 -0
  19. brainlayer/pipeline/__init__.py +47 -0
  20. brainlayer/pipeline/analyze_communication.py +508 -0
  21. brainlayer/pipeline/brain_graph.py +567 -0
  22. brainlayer/pipeline/chat_tags.py +63 -0
  23. brainlayer/pipeline/chunk.py +422 -0
  24. brainlayer/pipeline/classify.py +472 -0
  25. brainlayer/pipeline/cluster_sampling.py +73 -0
  26. brainlayer/pipeline/enrichment.py +810 -0
  27. brainlayer/pipeline/extract.py +66 -0
  28. brainlayer/pipeline/extract_claude_desktop.py +149 -0
  29. brainlayer/pipeline/extract_corrections.py +231 -0
  30. brainlayer/pipeline/extract_markdown.py +195 -0
  31. brainlayer/pipeline/extract_whatsapp.py +227 -0
  32. brainlayer/pipeline/git_overlay.py +301 -0
  33. brainlayer/pipeline/longitudinal_analyzer.py +568 -0
  34. brainlayer/pipeline/obsidian_export.py +455 -0
  35. brainlayer/pipeline/operation_grouping.py +486 -0
  36. brainlayer/pipeline/plan_linking.py +313 -0
  37. brainlayer/pipeline/sanitize.py +549 -0
  38. brainlayer/pipeline/semantic_style.py +574 -0
  39. brainlayer/pipeline/session_enrichment.py +472 -0
  40. brainlayer/pipeline/style_embed.py +67 -0
  41. brainlayer/pipeline/style_index.py +139 -0
  42. brainlayer/pipeline/temporal_chains.py +203 -0
  43. brainlayer/pipeline/time_batcher.py +248 -0
  44. brainlayer/pipeline/unified_timeline.py +569 -0
  45. brainlayer/storage.py +66 -0
  46. brainlayer/store.py +155 -0
  47. brainlayer/taxonomy.json +80 -0
  48. brainlayer/vector_store.py +1891 -0
  49. brainlayer-1.0.0.dist-info/METADATA +313 -0
  50. brainlayer-1.0.0.dist-info/RECORD +53 -0
  51. brainlayer-1.0.0.dist-info/WHEEL +4 -0
  52. brainlayer-1.0.0.dist-info/entry_points.txt +4 -0
  53. brainlayer-1.0.0.dist-info/licenses/LICENSE +190 -0
brainlayer/migrate.py ADDED
@@ -0,0 +1,205 @@
1
+ """Migration script to convert ChromaDB data to sqlite-vec."""
2
+
3
+ import logging
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Disable ChromaDB telemetry
9
+ os.environ["ANONYMIZED_TELEMETRY"] = "false"
10
+
11
+ try:
12
+ import chromadb
13
+ from chromadb.config import Settings
14
+
15
+ CHROMADB_AVAILABLE = True
16
+ except ImportError:
17
+ CHROMADB_AVAILABLE = False
18
+
19
+ from .embeddings import get_embedding_model
20
+ from .vector_store import VectorStore
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Paths
25
+ CHROMADB_PATH = Path.home() / ".local" / "share" / "brainlayer" / "chromadb.backup"
26
+ SQLITE_PATH = Path.home() / ".local" / "share" / "brainlayer" / "brainlayer.db"
27
+
28
+
29
+ def migrate_from_chromadb() -> bool:
30
+ """Migrate data from ChromaDB to sqlite-vec."""
31
+ if not CHROMADB_AVAILABLE:
32
+ print("ChromaDB not available, skipping migration")
33
+ return False
34
+
35
+ if not CHROMADB_PATH.exists():
36
+ print("No existing ChromaDB found, skipping migration")
37
+ return False
38
+
39
+ print(f"Migrating from ChromaDB at {CHROMADB_PATH}")
40
+
41
+ try:
42
+ # Connect to ChromaDB
43
+ client = chromadb.PersistentClient(path=str(CHROMADB_PATH), settings=Settings(anonymized_telemetry=False))
44
+
45
+ # Get all collections
46
+ collections = client.list_collections()
47
+ if not collections:
48
+ print("No collections found in ChromaDB")
49
+ return False
50
+
51
+ collection_names = [c.name for c in collections]
52
+ total_all = sum(c.count() for c in collections)
53
+ print(f"Found {len(collections)} collections with {total_all} total chunks: {collection_names}")
54
+
55
+ # Create sqlite-vec store
56
+ print(f"Creating sqlite-vec database at {SQLITE_PATH}")
57
+ vector_store = VectorStore(SQLITE_PATH)
58
+
59
+ # Lazy-load embedding model only if needed
60
+ embedding_model = None
61
+ grand_total = 0
62
+
63
+ for collection in collections:
64
+ total_count = collection.count()
65
+ print(f"\n--- Migrating collection '{collection.name}' ({total_count} chunks) ---")
66
+
67
+ # Fetch and insert in batches to avoid OOM
68
+ FETCH_BATCH = 5000
69
+ migrated = 0
70
+ need_reembed_total = 0
71
+
72
+ for offset in range(0, total_count, FETCH_BATCH):
73
+ batch_data = collection.get(
74
+ include=["documents", "metadatas", "embeddings"],
75
+ limit=FETCH_BATCH,
76
+ offset=offset,
77
+ )
78
+
79
+ if not batch_data["ids"]:
80
+ break
81
+
82
+ chunks = []
83
+ embeddings_batch = []
84
+ need_reembed = []
85
+
86
+ for i, chunk_id in enumerate(batch_data["ids"]):
87
+ document = batch_data["documents"][i]
88
+ metadata = batch_data["metadatas"][i] or {}
89
+ embedding = batch_data["embeddings"][i] if batch_data["embeddings"] else None
90
+
91
+ chunk_data = {
92
+ "id": chunk_id,
93
+ "content": document,
94
+ "metadata": {
95
+ k: v
96
+ for k, v in metadata.items()
97
+ if k
98
+ not in [
99
+ "source_file",
100
+ "project",
101
+ "content_type",
102
+ "value_type",
103
+ "char_count",
104
+ ]
105
+ },
106
+ "source_file": metadata.get("source_file", "unknown"),
107
+ "project": metadata.get("project"),
108
+ "content_type": metadata.get("content_type"),
109
+ "value_type": metadata.get("value_type"),
110
+ "char_count": metadata.get("char_count", len(document)),
111
+ }
112
+ chunks.append(chunk_data)
113
+
114
+ if embedding and len(embedding) == 1024:
115
+ embeddings_batch.append(embedding)
116
+ else:
117
+ embeddings_batch.append(None)
118
+ need_reembed.append(i)
119
+
120
+ # Re-embed chunks with wrong/missing dimensions
121
+ if need_reembed:
122
+ need_reembed_total += len(need_reembed)
123
+ if embedding_model is None:
124
+ print("Loading bge-large-en-v1.5 for re-embedding...")
125
+ embedding_model = get_embedding_model()
126
+
127
+ for rb_start in range(0, len(need_reembed), 32):
128
+ rb_indices = need_reembed[rb_start : rb_start + 32]
129
+ rb_texts = [chunks[idx]["content"] for idx in rb_indices]
130
+ try:
131
+ rb_embs = embedding_model._load_model().encode(
132
+ rb_texts, convert_to_numpy=True, show_progress_bar=False
133
+ )
134
+ for j, idx in enumerate(rb_indices):
135
+ embeddings_batch[idx] = rb_embs[j].tolist()
136
+ except Exception as e:
137
+ logger.error(f"Failed to re-embed batch: {e}")
138
+ for idx in rb_indices:
139
+ embeddings_batch[idx] = [0.0] * 1024
140
+
141
+ # Insert this batch into sqlite-vec
142
+ INSERT_BATCH = 1000
143
+ for ins_start in range(0, len(chunks), INSERT_BATCH):
144
+ ins_chunks = chunks[ins_start : ins_start + INSERT_BATCH]
145
+ ins_embs = embeddings_batch[ins_start : ins_start + INSERT_BATCH]
146
+ vector_store.upsert_chunks(ins_chunks, ins_embs)
147
+
148
+ migrated += len(chunks)
149
+ reembed_note = f" ({len(need_reembed)} re-embedded)" if need_reembed else ""
150
+ print(f" {migrated}/{total_count} chunks{reembed_note}")
151
+
152
+ grand_total += migrated
153
+ if need_reembed_total:
154
+ print(f" ({need_reembed_total} total re-embedded in this collection)")
155
+
156
+ vector_store.close()
157
+
158
+ # Verify migration
159
+ vector_store = VectorStore(SQLITE_PATH)
160
+ final_count = vector_store.count()
161
+ vector_store.close()
162
+
163
+ print(f"\nMigration complete: {final_count} chunks in sqlite-vec (from {total_all} in ChromaDB)")
164
+
165
+ return True
166
+
167
+ except Exception as e:
168
+ logger.error(f"Migration failed: {e}")
169
+ print(f"Migration failed: {e}")
170
+ return False
171
+
172
+
173
+ def main():
174
+ """Main migration entry point."""
175
+ logging.basicConfig(level=logging.INFO)
176
+
177
+ print("זיכרון - Migration Tool")
178
+ print("=" * 50)
179
+
180
+ if SQLITE_PATH.exists():
181
+ response = input(f"sqlite-vec database already exists at {SQLITE_PATH}. Overwrite? (y/N): ")
182
+ if response.lower() != "y":
183
+ print("Migration cancelled")
184
+ return
185
+
186
+ # Remove existing database and WAL/SHM files
187
+ SQLITE_PATH.unlink()
188
+ for suffix in ["-shm", "-wal"]:
189
+ p = SQLITE_PATH.parent / (SQLITE_PATH.name + suffix)
190
+ if p.exists():
191
+ p.unlink()
192
+
193
+ success = migrate_from_chromadb()
194
+
195
+ if success:
196
+ print("\nMigration completed successfully!")
197
+ print("You can now use the new fast daemon service:")
198
+ print(" brainlayer search 'your query'")
199
+ else:
200
+ print("\nMigration failed or skipped")
201
+ sys.exit(1)
202
+
203
+
204
+ if __name__ == "__main__":
205
+ main()
brainlayer/paths.py ADDED
@@ -0,0 +1,43 @@
1
+ """Centralized data paths for BrainLayer.
2
+
3
+ The database currently lives at ~/.local/share/zikaron/ (legacy naming from
4
+ before the BrainLayer extraction). An env var override is supported for custom
5
+ installations.
6
+
7
+ Resolution order:
8
+ 1. BRAINLAYER_DB env var (full path to .db file)
9
+ 2. ~/.local/share/zikaron/zikaron.db (current production location)
10
+ 3. ~/.local/share/brainlayer/brainlayer.db (future default)
11
+ """
12
+
13
+ import os
14
+ from pathlib import Path
15
+
16
+ # Legacy path (where data actually lives today)
17
+ _LEGACY_DB_PATH = Path.home() / ".local" / "share" / "zikaron" / "zikaron.db"
18
+
19
+ # New canonical path (for fresh installs)
20
+ _CANONICAL_DB_PATH = Path.home() / ".local" / "share" / "brainlayer" / "brainlayer.db"
21
+
22
+
23
+ def get_db_path() -> Path:
24
+ """Resolve the BrainLayer database path.
25
+
26
+ Checks BRAINLAYER_DB env var first, then falls back to whichever
27
+ known path exists (preferring the legacy zikaron path if both exist,
28
+ since that's where the real data is).
29
+ """
30
+ env = os.environ.get("BRAINLAYER_DB")
31
+ if env:
32
+ return Path(env)
33
+
34
+ if _LEGACY_DB_PATH.exists():
35
+ return _LEGACY_DB_PATH
36
+
37
+ # Ensure parent dir exists for fresh installs
38
+ _CANONICAL_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
39
+ return _CANONICAL_DB_PATH
40
+
41
+
42
+ # Convenience: pre-resolved default for import
43
+ DEFAULT_DB_PATH = get_db_path()
@@ -0,0 +1,47 @@
1
+ """Pipeline stages for processing Claude Code conversations."""
2
+
3
+ from .chunk import chunk_content
4
+ from .classify import classify_content
5
+ from .enrichment import build_external_prompt
6
+ from .extract import extract_system_prompts
7
+ from .extract_markdown import (
8
+ classify_by_path,
9
+ extract_markdown_content,
10
+ find_markdown_files,
11
+ parse_markdown,
12
+ )
13
+ from .sanitize import (
14
+ Replacement,
15
+ SanitizeConfig,
16
+ Sanitizer,
17
+ SanitizeResult,
18
+ )
19
+ from .semantic_style import (
20
+ SemanticStyleAnalysis,
21
+ SemanticStyleAnalyzer,
22
+ TopicCluster,
23
+ analyze_semantic_style,
24
+ )
25
+
26
+ __all__ = [
27
+ "extract_system_prompts",
28
+ "classify_content",
29
+ "chunk_content",
30
+ # Markdown extraction
31
+ "find_markdown_files",
32
+ "parse_markdown",
33
+ "classify_by_path",
34
+ "extract_markdown_content",
35
+ # Semantic style analysis
36
+ "SemanticStyleAnalyzer",
37
+ "SemanticStyleAnalysis",
38
+ "TopicCluster",
39
+ "analyze_semantic_style",
40
+ # PII sanitization
41
+ "Sanitizer",
42
+ "SanitizeConfig",
43
+ "SanitizeResult",
44
+ "Replacement",
45
+ # External enrichment (sanitized)
46
+ "build_external_prompt",
47
+ ]