arkaos 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/VERSION +1 -1
  2. package/config/constitution.yaml +2 -0
  3. package/config/hooks/user-prompt-submit-v2.sh +11 -0
  4. package/core/knowledge/__init__.py +6 -0
  5. package/core/knowledge/__pycache__/__init__.cpython-313.pyc +0 -0
  6. package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
  7. package/core/knowledge/__pycache__/embedder.cpython-313.pyc +0 -0
  8. package/core/knowledge/__pycache__/indexer.cpython-313.pyc +0 -0
  9. package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
  10. package/core/knowledge/chunker.py +121 -0
  11. package/core/knowledge/embedder.py +52 -0
  12. package/core/knowledge/indexer.py +97 -0
  13. package/core/knowledge/vector_store.py +213 -0
  14. package/core/obsidian/__pycache__/__init__.cpython-313.pyc +0 -0
  15. package/core/obsidian/__pycache__/templates.cpython-313.pyc +0 -0
  16. package/core/obsidian/__pycache__/writer.cpython-313.pyc +0 -0
  17. package/core/runtime/__pycache__/subagent.cpython-313.pyc +0 -0
  18. package/core/runtime/subagent.py +5 -0
  19. package/core/squads/__pycache__/schema.cpython-313.pyc +0 -0
  20. package/core/squads/schema.py +3 -0
  21. package/core/squads/templates/project-squad.yaml +28 -0
  22. package/core/synapse/__pycache__/engine.cpython-313.pyc +0 -0
  23. package/core/synapse/__pycache__/layers.cpython-313.pyc +0 -0
  24. package/core/synapse/engine.py +5 -1
  25. package/core/synapse/layers.py +95 -9
  26. package/core/tasks/__pycache__/schema.cpython-313.pyc +0 -0
  27. package/core/tasks/schema.py +1 -0
  28. package/core/workflow/__pycache__/engine.cpython-313.pyc +0 -0
  29. package/departments/dev/agents/research-assistant.yaml +51 -0
  30. package/departments/kb/agents/data-collector.yaml +51 -0
  31. package/departments/ops/agents/doc-writer.yaml +51 -0
  32. package/departments/pm/agents/pm-director.yaml +1 -1
  33. package/installer/cli.js +36 -0
  34. package/installer/init.js +105 -0
  35. package/installer/migrate.js +4 -1
  36. package/package.json +1 -1
  37. package/pyproject.toml +5 -1
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.2
1
+ 2.0.3
@@ -60,6 +60,8 @@ enforcement_levels:
60
60
 
61
61
  quality_gate:
62
62
  description: "Mandatory pre-delivery review. Nothing ships without APPROVED verdict."
63
+ trigger: "After the last execution phase, before delivery to user"
64
+ frequency: "Once per workflow execution, not per phase"
63
65
  agents:
64
66
  orchestrator:
65
67
  id: cqo-marta
@@ -7,6 +7,17 @@
7
7
 
8
8
  input=$(cat)
9
9
 
10
+ # ─── V1 Migration Detection ─────────────────────────────────────────────
11
+ V1_PATHS=("$HOME/.claude/skills/arka-os" "$HOME/.claude/skills/arkaos")
12
+ MIGRATION_MARKER="$HOME/.arkaos/migrated-from-v1"
13
+
14
+ for v1_path in "${V1_PATHS[@]}"; do
15
+ if [ -d "$v1_path" ] && [ ! -f "$MIGRATION_MARKER" ]; then
16
+ echo "{\"additionalContext\": \"[MIGRATION] ArkaOS v1 detected at $v1_path. Run: npx arkaos migrate — This will backup v1, preserve your data, and install v2. See: https://github.com/andreagroferreira/arka-os#install\"}"
17
+ exit 0
18
+ fi
19
+ done
20
+
10
21
  # ─── Performance Timing ──────────────────────────────────────────────────
11
22
  _HOOK_START_NS=$(date +%s%N 2>/dev/null || echo "0")
12
23
  _hook_ms() {
@@ -0,0 +1,6 @@
1
+ """Knowledge system — vector store, chunking, embedding, and retrieval."""
2
+
3
+ from core.knowledge.chunker import chunk_markdown
4
+ from core.knowledge.vector_store import VectorStore
5
+
6
+ __all__ = ["VectorStore", "chunk_markdown"]
@@ -0,0 +1,121 @@
1
+ """Markdown chunker — split documents into embeddable chunks.
2
+
3
+ Splits on paragraph boundaries, respects heading structure,
4
+ and maintains overlap for context continuity.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class Chunk:
13
+ """A text chunk ready for embedding."""
14
+ text: str
15
+ heading: str = "" # Current heading context
16
+ index: int = 0 # Position in document
17
+ source: str = "" # Source file path
18
+
19
+ @property
20
+ def token_estimate(self) -> int:
21
+ return len(self.text.split())
22
+
23
+
24
+ def chunk_markdown(
25
+ content: str,
26
+ max_tokens: int = 512,
27
+ overlap_tokens: int = 50,
28
+ source: str = "",
29
+ ) -> list[Chunk]:
30
+ """Split markdown content into chunks at paragraph boundaries.
31
+
32
+ Args:
33
+ content: Markdown text to chunk.
34
+ max_tokens: Maximum tokens per chunk.
35
+ overlap_tokens: Token overlap between consecutive chunks.
36
+ source: Source file path for metadata.
37
+
38
+ Returns:
39
+ List of Chunk objects.
40
+ """
41
+ # Strip frontmatter
42
+ body = content
43
+ if content.startswith("---"):
44
+ end = content.find("---", 3)
45
+ if end != -1:
46
+ body = content[end + 3:].strip()
47
+
48
+ # Split into paragraphs (double newline) preserving headings
49
+ blocks = re.split(r'\n\n+', body)
50
+ blocks = [b.strip() for b in blocks if b.strip()]
51
+
52
+ chunks: list[Chunk] = []
53
+ current_heading = ""
54
+ current_text = ""
55
+ current_tokens = 0
56
+
57
+ for block in blocks:
58
+ # Track headings
59
+ heading_match = re.match(r'^(#{1,6})\s+(.+)', block)
60
+ if heading_match:
61
+ current_heading = heading_match.group(2)
62
+
63
+ block_tokens = len(block.split())
64
+
65
+ # If single block exceeds max, split it
66
+ if block_tokens > max_tokens:
67
+ if current_text:
68
+ chunks.append(Chunk(
69
+ text=current_text.strip(),
70
+ heading=current_heading,
71
+ index=len(chunks),
72
+ source=source,
73
+ ))
74
+ current_text = ""
75
+ current_tokens = 0
76
+
77
+ # Split large block by sentences
78
+ sentences = re.split(r'(?<=[.!?])\s+', block)
79
+ for sentence in sentences:
80
+ sent_tokens = len(sentence.split())
81
+ if current_tokens + sent_tokens > max_tokens and current_text:
82
+ chunks.append(Chunk(
83
+ text=current_text.strip(),
84
+ heading=current_heading,
85
+ index=len(chunks),
86
+ source=source,
87
+ ))
88
+ # Overlap: keep last few words
89
+ words = current_text.split()
90
+ current_text = " ".join(words[-overlap_tokens:]) + " " if len(words) > overlap_tokens else ""
91
+ current_tokens = len(current_text.split())
92
+ current_text += sentence + " "
93
+ current_tokens += sent_tokens
94
+ continue
95
+
96
+ # Check if adding this block exceeds limit
97
+ if current_tokens + block_tokens > max_tokens and current_text:
98
+ chunks.append(Chunk(
99
+ text=current_text.strip(),
100
+ heading=current_heading,
101
+ index=len(chunks),
102
+ source=source,
103
+ ))
104
+ # Overlap
105
+ words = current_text.split()
106
+ current_text = " ".join(words[-overlap_tokens:]) + " " if len(words) > overlap_tokens else ""
107
+ current_tokens = len(current_text.split())
108
+
109
+ current_text += block + "\n\n"
110
+ current_tokens += block_tokens
111
+
112
+ # Final chunk
113
+ if current_text.strip():
114
+ chunks.append(Chunk(
115
+ text=current_text.strip(),
116
+ heading=current_heading,
117
+ index=len(chunks),
118
+ source=source,
119
+ ))
120
+
121
+ return chunks
@@ -0,0 +1,52 @@
1
+ """Embedding wrapper — local embeddings via fastembed.
2
+
3
+ Graceful degradation: if fastembed is not installed, returns None
4
+ and the vector store falls back to keyword matching.
5
+ """
6
+
7
+ from typing import Optional
8
+
9
+ # Lazy import — fastembed is optional
10
+ _model = None
11
+ _model_name = "BAAI/bge-small-en-v1.5" # 384 dims, fast, good quality
12
+ EMBEDDING_DIMS = 384
13
+
14
+
15
+ def get_model():
16
+ """Get or create the embedding model (lazy singleton)."""
17
+ global _model
18
+ if _model is None:
19
+ try:
20
+ from fastembed import TextEmbedding
21
+ _model = TextEmbedding(_model_name)
22
+ except ImportError:
23
+ return None
24
+ return _model
25
+
26
+
27
+ def embed(text: str) -> Optional[list[float]]:
28
+ """Embed a single text. Returns None if fastembed unavailable."""
29
+ model = get_model()
30
+ if model is None:
31
+ return None
32
+ results = list(model.embed([text]))
33
+ return results[0].tolist() if results else None
34
+
35
+
36
+ def embed_batch(texts: list[str]) -> Optional[list[list[float]]]:
37
+ """Embed multiple texts. Returns None if fastembed unavailable."""
38
+ if not texts:
39
+ return []
40
+ model = get_model()
41
+ if model is None:
42
+ return None
43
+ return [emb.tolist() for emb in model.embed(texts)]
44
+
45
+
46
+ def is_available() -> bool:
47
+ """Check if embedding model is available."""
48
+ try:
49
+ from fastembed import TextEmbedding
50
+ return True
51
+ except ImportError:
52
+ return False
@@ -0,0 +1,97 @@
1
+ """Knowledge indexer — walk directories and index markdown files.
2
+
3
+ Supports incremental indexing (skips already-indexed files by hash).
4
+ """
5
+
6
+ import hashlib
7
+ from pathlib import Path
8
+ from typing import Callable, Optional
9
+
10
+ from core.knowledge.chunker import chunk_markdown
11
+ from core.knowledge.vector_store import VectorStore
12
+
13
+
14
+ def file_hash(path: Path) -> str:
15
+ """Compute SHA-256 hash of file content."""
16
+ return hashlib.sha256(path.read_bytes()).hexdigest()[:16]
17
+
18
+
19
+ def index_directory(
20
+ directory: str | Path,
21
+ store: VectorStore,
22
+ pattern: str = "**/*.md",
23
+ on_progress: Optional[Callable[[int, int, str], None]] = None,
24
+ max_tokens: int = 512,
25
+ skip_indexed: bool = True,
26
+ ) -> dict:
27
+ """Index all markdown files in a directory.
28
+
29
+ Args:
30
+ directory: Root directory to scan.
31
+ store: VectorStore to index into.
32
+ pattern: Glob pattern for files.
33
+ on_progress: Callback(current, total, filename).
34
+ max_tokens: Max tokens per chunk.
35
+ skip_indexed: Skip files already indexed (by hash).
36
+
37
+ Returns:
38
+ Dict with: files_scanned, files_indexed, files_skipped, chunks_created.
39
+ """
40
+ root = Path(directory)
41
+ if not root.exists():
42
+ return {"files_scanned": 0, "files_indexed": 0, "files_skipped": 0, "chunks_created": 0}
43
+
44
+ files = sorted(root.glob(pattern))
45
+ # Skip hidden dirs (.obsidian, .git)
46
+ files = [f for f in files if not any(part.startswith(".") for part in f.relative_to(root).parts)]
47
+
48
+ total = len(files)
49
+ indexed = 0
50
+ skipped = 0
51
+ chunks_created = 0
52
+
53
+ for i, filepath in enumerate(files):
54
+ if on_progress:
55
+ on_progress(i + 1, total, filepath.name)
56
+
57
+ fhash = file_hash(filepath)
58
+
59
+ if skip_indexed and store.is_file_indexed(fhash):
60
+ skipped += 1
61
+ continue
62
+
63
+ try:
64
+ content = filepath.read_text(encoding="utf-8")
65
+ except (OSError, UnicodeDecodeError):
66
+ skipped += 1
67
+ continue
68
+
69
+ # Skip very small files
70
+ if len(content.split()) < 20:
71
+ skipped += 1
72
+ continue
73
+
74
+ # Remove old chunks for this file (re-index)
75
+ store.remove_file(str(filepath))
76
+
77
+ # Chunk and index
78
+ chunks = chunk_markdown(content, max_tokens=max_tokens, source=str(filepath))
79
+ if chunks:
80
+ texts = [c.text for c in chunks]
81
+ headings = [c.heading for c in chunks]
82
+ count = store.index_chunks(
83
+ texts=texts,
84
+ headings=headings,
85
+ source=str(filepath),
86
+ file_hash=fhash,
87
+ metadata={"relative_path": str(filepath.relative_to(root))},
88
+ )
89
+ chunks_created += count
90
+ indexed += 1
91
+
92
+ return {
93
+ "files_scanned": total,
94
+ "files_indexed": indexed,
95
+ "files_skipped": skipped,
96
+ "chunks_created": chunks_created,
97
+ }
@@ -0,0 +1,213 @@
1
+ """Vector store — SQLite-VSS backed semantic search.
2
+
3
+ Stores document chunks with embeddings for fast similarity search.
4
+ Graceful degradation: works without sqlite-vss (brute-force fallback).
5
+ """
6
+
7
+ import json
8
+ import sqlite3
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Any, Optional
12
+
13
+ from core.knowledge.embedder import embed, embed_batch, EMBEDDING_DIMS
14
+
15
+
16
+ def _load_vss(db: sqlite3.Connection) -> bool:
17
+ """Try to load sqlite-vss extension."""
18
+ try:
19
+ db.enable_load_extension(True)
20
+ import sqlite_vss
21
+ sqlite_vss.load(db)
22
+ return True
23
+ except (ImportError, Exception):
24
+ return False
25
+
26
+
27
+ class VectorStore:
28
+ """SQLite-VSS backed vector store for knowledge retrieval."""
29
+
30
+ def __init__(self, db_path: str | Path = ":memory:") -> None:
31
+ self._db_path = str(db_path)
32
+ self._db = sqlite3.connect(self._db_path)
33
+ self._db.row_factory = sqlite3.Row
34
+ self._vss_available = _load_vss(self._db)
35
+ self._init_schema()
36
+
37
+ def _init_schema(self) -> None:
38
+ """Create tables if they don't exist."""
39
+ self._db.executescript("""
40
+ CREATE TABLE IF NOT EXISTS chunks (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ text TEXT NOT NULL,
43
+ heading TEXT DEFAULT '',
44
+ source TEXT DEFAULT '',
45
+ file_hash TEXT DEFAULT '',
46
+ metadata TEXT DEFAULT '{}',
47
+ created_at REAL DEFAULT (unixepoch('now')),
48
+ embedding BLOB
49
+ );
50
+ CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);
51
+ CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(file_hash);
52
+ """)
53
+ if self._vss_available:
54
+ try:
55
+ self._db.execute(
56
+ f"CREATE VIRTUAL TABLE IF NOT EXISTS vss_chunks USING vss0(embedding({EMBEDDING_DIMS}))"
57
+ )
58
+ except Exception:
59
+ self._vss_available = False
60
+ self._db.commit()
61
+
62
+ def index_chunks(
63
+ self,
64
+ texts: list[str],
65
+ headings: list[str] | None = None,
66
+ source: str = "",
67
+ file_hash: str = "",
68
+ metadata: dict[str, Any] | None = None,
69
+ ) -> int:
70
+ """Index multiple text chunks with embeddings.
71
+
72
+ Returns number of chunks indexed.
73
+ """
74
+ if not texts:
75
+ return 0
76
+
77
+ embeddings = embed_batch(texts)
78
+ meta_json = json.dumps(metadata or {})
79
+ count = 0
80
+
81
+ for i, text in enumerate(texts):
82
+ heading = headings[i] if headings and i < len(headings) else ""
83
+ emb_blob = None
84
+
85
+ if embeddings and i < len(embeddings):
86
+ emb_blob = _vec_to_blob(embeddings[i])
87
+
88
+ cursor = self._db.execute(
89
+ "INSERT INTO chunks (text, heading, source, file_hash, metadata, embedding) VALUES (?, ?, ?, ?, ?, ?)",
90
+ (text, heading, source, file_hash, meta_json, emb_blob),
91
+ )
92
+
93
+ if self._vss_available and emb_blob:
94
+ self._db.execute(
95
+ "INSERT INTO vss_chunks (rowid, embedding) VALUES (?, ?)",
96
+ (cursor.lastrowid, emb_blob),
97
+ )
98
+ count += 1
99
+
100
+ self._db.commit()
101
+ return count
102
+
103
+ def search(self, query: str, top_k: int = 5) -> list[dict]:
104
+ """Search for similar chunks.
105
+
106
+ Returns list of dicts with: text, heading, source, score, metadata.
107
+ """
108
+ # Check if store has any data
109
+ total = self._db.execute("SELECT COUNT(*) as cnt FROM chunks").fetchone()["cnt"]
110
+ if total == 0:
111
+ return []
112
+
113
+ query_emb = embed(query)
114
+
115
+ if query_emb and self._vss_available:
116
+ try:
117
+ return self._vss_search(query_emb, top_k)
118
+ except Exception:
119
+ return self._keyword_search(query, top_k)
120
+
121
+ # Fallback: keyword search
122
+ return self._keyword_search(query, top_k)
123
+
124
+ def _vss_search(self, query_emb: list[float], top_k: int) -> list[dict]:
125
+ """Vector similarity search via sqlite-vss."""
126
+ query_blob = _vec_to_blob(query_emb)
127
+ rows = self._db.execute("""
128
+ SELECT c.text, c.heading, c.source, c.metadata, v.distance
129
+ FROM vss_chunks v
130
+ JOIN chunks c ON c.id = v.rowid
131
+ WHERE vss_search(v.embedding, vss_search_params(?, ?))
132
+ """, (query_blob, top_k)).fetchall()
133
+
134
+ return [
135
+ {
136
+ "text": r["text"],
137
+ "heading": r["heading"],
138
+ "source": r["source"],
139
+ "score": 1.0 - r["distance"], # Convert distance to similarity
140
+ "metadata": json.loads(r["metadata"]),
141
+ }
142
+ for r in rows
143
+ ]
144
+
145
+ def _keyword_search(self, query: str, top_k: int) -> list[dict]:
146
+ """Fallback keyword search when VSS unavailable."""
147
+ words = query.lower().split()
148
+ if not words:
149
+ return []
150
+
151
+ conditions = " OR ".join(["lower(text) LIKE ?" for _ in words])
152
+ params = [f"%{w}%" for w in words[:5]] # Max 5 keywords
153
+
154
+ rows = self._db.execute(
155
+ f"SELECT text, heading, source, metadata FROM chunks WHERE {conditions} LIMIT ?",
156
+ params + [top_k],
157
+ ).fetchall()
158
+
159
+ return [
160
+ {
161
+ "text": r["text"],
162
+ "heading": r["heading"],
163
+ "source": r["source"],
164
+ "score": 0.5, # No real score for keyword search
165
+ "metadata": json.loads(r["metadata"]),
166
+ }
167
+ for r in rows
168
+ ]
169
+
170
+ def is_file_indexed(self, file_hash: str) -> bool:
171
+ """Check if a file has already been indexed."""
172
+ row = self._db.execute(
173
+ "SELECT COUNT(*) as cnt FROM chunks WHERE file_hash = ?", (file_hash,)
174
+ ).fetchone()
175
+ return row["cnt"] > 0
176
+
177
+ def remove_file(self, source: str) -> int:
178
+ """Remove all chunks from a source file."""
179
+ if self._vss_available:
180
+ rows = self._db.execute("SELECT id FROM chunks WHERE source = ?", (source,)).fetchall()
181
+ for r in rows:
182
+ self._db.execute("DELETE FROM vss_chunks WHERE rowid = ?", (r["id"],))
183
+ deleted = self._db.execute("DELETE FROM chunks WHERE source = ?", (source,)).rowcount
184
+ self._db.commit()
185
+ return deleted
186
+
187
+ def get_stats(self) -> dict:
188
+ """Get store statistics."""
189
+ total = self._db.execute("SELECT COUNT(*) as cnt FROM chunks").fetchone()["cnt"]
190
+ sources = self._db.execute("SELECT COUNT(DISTINCT source) as cnt FROM chunks").fetchone()["cnt"]
191
+ return {
192
+ "total_chunks": total,
193
+ "total_files": sources,
194
+ "vss_available": self._vss_available,
195
+ "db_path": self._db_path,
196
+ }
197
+
198
+ def clear(self) -> None:
199
+ """Remove all data."""
200
+ if self._vss_available:
201
+ self._db.execute("DELETE FROM vss_chunks")
202
+ self._db.execute("DELETE FROM chunks")
203
+ self._db.commit()
204
+
205
+ def close(self) -> None:
206
+ """Close database connection."""
207
+ self._db.close()
208
+
209
+
210
+ def _vec_to_blob(vec: list[float]) -> bytes:
211
+ """Convert float vector to bytes for SQLite storage."""
212
+ import struct
213
+ return struct.pack(f"{len(vec)}f", *vec)
@@ -102,6 +102,11 @@ class SubagentDispatcher:
102
102
  The dispatcher creates HandoffArtifacts from agent definitions
103
103
  and task descriptions, then delegates to the runtime adapter
104
104
  for actual execution.
105
+
106
+ Nesting policy: Maximum 1 level of nesting (agent -> subagent).
107
+ Sub-subagent dispatch is not recommended -- creates context fragmentation
108
+ and debugging complexity. If a subagent needs help, it should escalate
109
+ to its squad lead rather than spawning another subagent.
105
110
  """
106
111
 
107
112
  def __init__(self) -> None:
@@ -35,6 +35,9 @@ class SquadMember(BaseModel):
35
35
  borrowed: bool = False # Borrowed from another department?
36
36
  source_department: str = "" # Original department if borrowed
37
37
  availability: float = 1.0 # 0.0-1.0, for shared agents
38
+ # Tier 2 agents can collaborate directly within project squads
39
+ # without requiring Tier 1 approval for each interaction.
40
+ can_collaborate_directly: bool = True
38
41
 
39
42
 
40
43
  class SquadWorkflow(BaseModel):
@@ -0,0 +1,28 @@
1
+ # Project Squad Template
2
+ # Copy and customize for cross-department projects
3
+ id: project-{name}
4
+ name: "{Project Name} Squad"
5
+ description: "Cross-department squad for {project description}"
6
+ department: "" # No single department — cross-cutting
7
+ squad_type: project
8
+ topology: stream-aligned
9
+
10
+ members:
11
+ # Borrow from department squads
12
+ - agent_id: "{lead-agent-id}"
13
+ role: "Project Lead"
14
+ is_lead: true
15
+ borrowed: true # Borrowed from department squad
16
+ availability: 0.5 # 50% allocation
17
+
18
+ - agent_id: "{specialist-id}"
19
+ role: "Technical Implementation"
20
+ borrowed: true
21
+ availability: 0.3
22
+
23
+ # Project squads:
24
+ # - Created by COO (Sofia) or any Squad Lead
25
+ # - Agents are borrowed, not moved
26
+ # - Max 10 members (Two-Pizza Team)
27
+ # - Dissolved when project completes
28
+ # - Quality Gate still mandatory
@@ -10,6 +10,7 @@ Design goals:
10
10
 
11
11
  import time
12
12
  from dataclasses import dataclass, field
13
+ from typing import Any
13
14
 
14
15
  from core.synapse.layers import Layer, LayerResult, PromptContext
15
16
  from core.synapse.cache import LayerCache
@@ -152,6 +153,7 @@ def create_default_engine(
152
153
  constitution_compressed: str = "",
153
154
  commands: list[dict] | None = None,
154
155
  agents_registry: dict[str, dict] | None = None,
156
+ vector_store: Any = None,
155
157
  ) -> SynapseEngine:
156
158
  """Create a SynapseEngine with all 8 default layers.
157
159
 
@@ -166,7 +168,7 @@ def create_default_engine(
166
168
  from core.synapse.layers import (
167
169
  ConstitutionLayer, DepartmentLayer, AgentLayer,
168
170
  ProjectLayer, BranchLayer, CommandHintsLayer,
169
- QualityGateLayer, TimeLayer,
171
+ QualityGateLayer, TimeLayer, KnowledgeRetrievalLayer,
170
172
  )
171
173
 
172
174
  engine = SynapseEngine()
@@ -176,6 +178,8 @@ def create_default_engine(
176
178
  engine.register_layer(DepartmentLayer())
177
179
  engine.register_layer(AgentLayer(agents_registry=agents_registry))
178
180
  engine.register_layer(ProjectLayer())
181
+ if vector_store is not None:
182
+ engine.register_layer(KnowledgeRetrievalLayer(vector_store=vector_store))
179
183
  engine.register_layer(BranchLayer())
180
184
  engine.register_layer(CommandHintsLayer(commands=commands))
181
185
  engine.register_layer(QualityGateLayer())
@@ -1,17 +1,18 @@
1
- """Synapse layer definitions — the 8 context layers.
1
+ """Synapse layer definitions — the 9 context layers.
2
2
 
3
3
  Each layer extracts a specific type of context and compresses it
4
4
  for injection into the prompt. Layers are pluggable and ordered.
5
5
 
6
6
  Layer Architecture:
7
- L0: Constitution — Compressed governance rules (TTL: 300s)
8
- L1: Department — Detected department from input (no cache)
9
- L2: Agent — Agent profile + last gotchas (TTL: 30s)
10
- L3: Project — Active project context (TTL: 30s)
11
- L4: Branch Current git branch (no cache)
12
- L5: Command Hints Matching commands from registry (TTL: 30s)
13
- L6: Quality Gate QG status and last verdicts (TTL: 60s)
14
- L7: Time Time-of-day signal (no cache)
7
+ L0: Constitution — Compressed governance rules (TTL: 300s)
8
+ L1: Department — Detected department from input (no cache)
9
+ L2: Agent — Agent profile + last gotchas (TTL: 30s)
10
+ L3: Project — Active project context (TTL: 30s)
11
+ L3.5: KnowledgeRetrieval Semantic search from vector DB (TTL: 30s)
12
+ L4: Branch Current git branch (no cache)
13
+ L5: Command Hints Matching commands from registry (TTL: 30s)
14
+ L6: Quality Gate QG status and last verdicts (TTL: 60s)
15
+ L7: Time — Time-of-day signal (no cache)
15
16
  """
16
17
 
17
18
  import re
@@ -439,3 +440,88 @@ class TimeLayer(Layer):
439
440
  layer_id=self.id, tag=tag, content=period,
440
441
  tokens_est=1, compute_ms=ms, cached=False,
441
442
  )
443
+
444
+
445
+ # --- L3.5: Knowledge Retrieval ---
446
+
447
+ class KnowledgeRetrievalLayer(Layer):
448
+ """L3.5: Semantic knowledge retrieval from vector DB.
449
+
450
+ Searches the local vector store for chunks relevant to the user's
451
+ input and injects them as context. Gracefully skips if vector store
452
+ is unavailable or empty.
453
+ """
454
+
455
+ def __init__(self, vector_store: Any = None, max_chunks: int = 3, max_tokens: int = 400) -> None:
456
+ self._store = vector_store
457
+ self._max_chunks = max_chunks
458
+ self._max_tokens = max_tokens
459
+
460
+ @property
461
+ def id(self) -> str:
462
+ return "L3.5"
463
+
464
+ @property
465
+ def name(self) -> str:
466
+ return "KnowledgeRetrieval"
467
+
468
+ @property
469
+ def cache_ttl(self) -> int:
470
+ return 30
471
+
472
+ @property
473
+ def priority(self) -> int:
474
+ return 35
475
+
476
+ def compute(self, ctx: PromptContext) -> LayerResult:
477
+ start = time.time()
478
+
479
+ if not self._store or not ctx.user_input:
480
+ return LayerResult(
481
+ layer_id=self.id, tag="", content="",
482
+ tokens_est=0, compute_ms=0, cached=False,
483
+ )
484
+
485
+ try:
486
+ results = self._store.search(ctx.user_input, top_k=self._max_chunks)
487
+ except Exception:
488
+ return LayerResult(
489
+ layer_id=self.id, tag="", content="",
490
+ tokens_est=0, compute_ms=0, cached=False,
491
+ )
492
+
493
+ if not results:
494
+ ms = int((time.time() - start) * 1000)
495
+ return LayerResult(
496
+ layer_id=self.id, tag="", content="",
497
+ tokens_est=0, compute_ms=ms, cached=False,
498
+ )
499
+
500
+ # Build compact knowledge context
501
+ snippets = []
502
+ total_tokens = 0
503
+ for r in results:
504
+ text = r["text"][:200].replace("\n", " ").strip()
505
+ tokens = len(text.split())
506
+ if total_tokens + tokens > self._max_tokens:
507
+ break
508
+ source = r.get("source", "").split("/")[-1] if r.get("source") else ""
509
+ snippet = f"{source}: {text}" if source else text
510
+ snippets.append(snippet)
511
+ total_tokens += tokens
512
+
513
+ if not snippets:
514
+ ms = int((time.time() - start) * 1000)
515
+ return LayerResult(
516
+ layer_id=self.id, tag="", content="",
517
+ tokens_est=0, compute_ms=ms, cached=False,
518
+ )
519
+
520
+ content = " | ".join(snippets)
521
+ tag = f"[knowledge:{len(snippets)} chunks]"
522
+ ms = int((time.time() - start) * 1000)
523
+
524
+ return LayerResult(
525
+ layer_id=self.id, tag=tag, content=content,
526
+ tokens_est=total_tokens, compute_ms=ms, cached=False,
527
+ )
@@ -29,6 +29,7 @@ class TaskType(str, Enum):
29
29
  RESEARCH = "research" # Background research
30
30
  GENERATION = "generation" # AI content/image generation
31
31
  EXPORT = "export" # Export to external system
32
+ KB_INDEX = "kb_index" # Index documents into vector store
32
33
  CUSTOM = "custom"
33
34
 
34
35
 
@@ -0,0 +1,51 @@
1
+ id: research-assistant
2
+ name: Maria
3
+ role: Research Assistant
4
+ department: dev
5
+ tier: 3
6
+
7
+ behavioral_dna:
8
+ disc:
9
+ primary: C
10
+ secondary: S
11
+ communication_style: "Thorough, detail-oriented, presents findings systematically"
12
+ under_pressure: "Digs deeper into data before responding"
13
+ motivator: "Understanding the full picture"
14
+ enneagram:
15
+ type: 5
16
+ wing: 6
17
+ core_motivation: "To understand and be competent"
18
+ core_fear: "Being ignorant or uninformed"
19
+ subtype: social
20
+ big_five:
21
+ openness: 90
22
+ conscientiousness: 85
23
+ extraversion: 30
24
+ agreeableness: 70
25
+ neuroticism: 35
26
+ mbti:
27
+ type: INTP
28
+
29
+ authority:
30
+ veto: false
31
+ approve_budget: false
32
+ approve_architecture: false
33
+ approve_quality: false
34
+ block_release: false
35
+ block_delivery: false
36
+ orchestrate: false
37
+ delegates_to: []
38
+ escalates_to: tech-lead-paulo
39
+
40
+ expertise:
41
+ domains: ["research", "documentation", "analysis", "literature-review"]
42
+ frameworks: ["Systematic Review", "PRISMA", "Research Methodology"]
43
+ depth: proficient
44
+ years_equivalent: 5
45
+
46
+ communication:
47
+ language: en
48
+ tone: "Precise and informative"
49
+ vocabulary_level: specialist
50
+ preferred_format: "Structured reports with citations"
51
+ avoid: ["assumptions without evidence", "vague conclusions"]
@@ -0,0 +1,51 @@
1
+ id: data-collector
2
+ name: Tomas Jr
3
+ role: Data Collector
4
+ department: kb
5
+ tier: 3
6
+
7
+ behavioral_dna:
8
+ disc:
9
+ primary: C
10
+ secondary: D
11
+ communication_style: "Data-driven, factual, structured"
12
+ under_pressure: "Relies on systematic data collection"
13
+ motivator: "Complete and accurate data"
14
+ enneagram:
15
+ type: 6
16
+ wing: 5
17
+ core_motivation: "To have reliable information"
18
+ core_fear: "Making decisions on incomplete data"
19
+ subtype: self-preservation
20
+ big_five:
21
+ openness: 70
22
+ conscientiousness: 88
23
+ extraversion: 35
24
+ agreeableness: 65
25
+ neuroticism: 40
26
+ mbti:
27
+ type: ISTJ
28
+
29
+ authority:
30
+ veto: false
31
+ approve_budget: false
32
+ approve_architecture: false
33
+ approve_quality: false
34
+ block_release: false
35
+ block_delivery: false
36
+ orchestrate: false
37
+ delegates_to: []
38
+ escalates_to: kb-lead-clara
39
+
40
+ expertise:
41
+ domains: ["data-collection", "web-scraping", "API-integration", "data-validation"]
42
+ frameworks: ["ETL", "Data Quality Framework"]
43
+ depth: proficient
44
+ years_equivalent: 4
45
+
46
+ communication:
47
+ language: en
48
+ tone: "Factual and precise"
49
+ vocabulary_level: specialist
50
+ preferred_format: "Data tables with quality scores"
51
+ avoid: ["subjective interpretations", "unverified claims"]
@@ -0,0 +1,51 @@
1
+ id: doc-writer
2
+ name: Isabel
3
+ role: Documentation Writer
4
+ department: ops
5
+ tier: 3
6
+
7
+ behavioral_dna:
8
+ disc:
9
+ primary: S
10
+ secondary: C
11
+ communication_style: "Clear, structured, audience-aware"
12
+ under_pressure: "Focuses on clarity and completeness"
13
+ motivator: "Making complex things accessible"
14
+ enneagram:
15
+ type: 1
16
+ wing: 2
17
+ core_motivation: "To produce correct, helpful documentation"
18
+ core_fear: "Publishing inaccurate information"
19
+ subtype: social
20
+ big_five:
21
+ openness: 75
22
+ conscientiousness: 92
23
+ extraversion: 40
24
+ agreeableness: 80
25
+ neuroticism: 30
26
+ mbti:
27
+ type: ISFJ
28
+
29
+ authority:
30
+ veto: false
31
+ approve_budget: false
32
+ approve_architecture: false
33
+ approve_quality: false
34
+ block_release: false
35
+ block_delivery: false
36
+ orchestrate: false
37
+ delegates_to: []
38
+ escalates_to: ops-lead-daniel
39
+
40
+ expertise:
41
+ domains: ["technical-writing", "API-docs", "user-guides", "SOPs"]
42
+ frameworks: ["Diátaxis", "Google Developer Documentation Style"]
43
+ depth: proficient
44
+ years_equivalent: 5
45
+
46
+ communication:
47
+ language: en
48
+ tone: "Clear, concise, helpful"
49
+ vocabulary_level: accessible
50
+ preferred_format: "Step-by-step guides with examples"
51
+ avoid: ["jargon without explanation", "walls of text"]
@@ -42,7 +42,7 @@ authority:
42
42
  - product-owner
43
43
  - scrum-master
44
44
  - project-coordinator
45
- escalates_to: cto-marco
45
+ escalates_to: coo-sofia
46
46
 
47
47
  expertise:
48
48
  domains:
package/installer/cli.js CHANGED
@@ -36,6 +36,7 @@ ArkaOS v${VERSION} — The Operating System for AI Agent Teams
36
36
  Usage:
37
37
  npx arkaos install Install ArkaOS in current environment
38
38
  npx arkaos install --runtime <runtime> Install for specific runtime
39
+ npx arkaos init Initialize project config (.arkaos.json)
39
40
  npx arkaos update Update to latest version
40
41
  npx arkaos migrate Migrate from v1 to v2
41
42
  npx arkaos doctor Run health checks
@@ -57,6 +58,8 @@ Runtimes:
57
58
  Examples:
58
59
  npx arkaos install Auto-detect runtime and install
59
60
  npx arkaos install --runtime codex Install for Codex CLI specifically
61
+ npx arkaos index Index knowledge base (Obsidian vault)
62
+ npx arkaos search "query" Search indexed knowledge
60
63
  npx arkaos doctor Verify installation health
61
64
  `);
62
65
  process.exit(0);
@@ -69,6 +72,12 @@ async function main() {
69
72
  await install({ runtime, path: values.path, force: values.force });
70
73
  break;
71
74
 
75
+ case "init": {
76
+ const { init } = await import("./init.js");
77
+ await init({ path: values.path || process.cwd() });
78
+ break;
79
+ }
80
+
72
81
  case "doctor":
73
82
  const { doctor } = await import("./doctor.js");
74
83
  await doctor();
@@ -89,6 +98,33 @@ async function main() {
89
98
  await migrate();
90
99
  break;
91
100
 
101
+ case "index": {
102
+ const { execSync } = await import("node:child_process");
103
+ const indexArgs = positionals.slice(1).join(" ");
104
+ const repoRoot = dirname(fileURLToPath(import.meta.url)).replace(/\/installer$/, "");
105
+ try {
106
+ execSync(`python3 "${repoRoot}/scripts/knowledge-index.py" ${indexArgs || "--vault"}`, {
107
+ stdio: "inherit",
108
+ env: { ...process.env, ARKAOS_ROOT: repoRoot },
109
+ });
110
+ } catch { process.exit(1); }
111
+ break;
112
+ }
113
+
114
+ case "search": {
115
+ const { execSync } = await import("node:child_process");
116
+ const query = positionals.slice(1).join(" ");
117
+ if (!query) { console.error("Usage: npx arkaos search \"your query\""); process.exit(1); }
118
+ const repoRoot2 = dirname(fileURLToPath(import.meta.url)).replace(/\/installer$/, "");
119
+ try {
120
+ execSync(`python3 "${repoRoot2}/scripts/knowledge-index.py" --search "${query}"`, {
121
+ stdio: "inherit",
122
+ env: { ...process.env, ARKAOS_ROOT: repoRoot2 },
123
+ });
124
+ } catch { process.exit(1); }
125
+ break;
126
+ }
127
+
92
128
  default:
93
129
  console.error(`Unknown command: ${command}`);
94
130
  console.error('Run "npx arkaos help" for usage information.');
@@ -0,0 +1,105 @@
1
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
2
+ import { join, basename } from "node:path";
3
+ import { execSync } from "node:child_process";
4
+
5
+ export async function init({ path }) {
6
+ const projectDir = path || process.cwd();
7
+ const configPath = join(projectDir, ".arkaos.json");
8
+ const projectName = basename(projectDir);
9
+
10
+ console.log(`\n ArkaOS Project Init — ${projectName}\n`);
11
+
12
+ // Detect existing config
13
+ if (existsSync(configPath)) {
14
+ const existing = JSON.parse(readFileSync(configPath, "utf-8"));
15
+ console.log(` Config already exists: ${configPath}`);
16
+ console.log(` Department: ${existing.department || "auto"}`);
17
+ console.log(` Stack: ${existing.stack || "auto"}`);
18
+ console.log(`\n To reconfigure, delete .arkaos.json and run again.\n`);
19
+ return;
20
+ }
21
+
22
+ // Auto-detect stack
23
+ const stack = detectStack(projectDir);
24
+ console.log(` Detected stack: ${stack}`);
25
+
26
+ // Auto-detect department
27
+ const department = detectDepartment(projectDir, stack);
28
+ console.log(` Default department: ${department}`);
29
+
30
+ // Create config
31
+ const config = {
32
+ name: projectName,
33
+ department,
34
+ stack,
35
+ created: new Date().toISOString(),
36
+ arkaos_version: "2",
37
+ settings: {
38
+ quality_gate: true,
39
+ obsidian_output: true,
40
+ auto_index: true,
41
+ },
42
+ };
43
+
44
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
45
+ console.log(` Created: ${configPath}`);
46
+
47
+ // Create .claude/settings.local.json if Claude Code project
48
+ const claudeDir = join(projectDir, ".claude");
49
+ const localSettings = join(claudeDir, "settings.local.json");
50
+ if (!existsSync(localSettings)) {
51
+ mkdirSync(claudeDir, { recursive: true });
52
+ writeFileSync(localSettings, JSON.stringify({
53
+ permissions: {},
54
+ hooks: {},
55
+ }, null, 2) + "\n");
56
+ console.log(` Created: .claude/settings.local.json`);
57
+ }
58
+
59
+ console.log(`
60
+ Project initialized for ArkaOS.
61
+
62
+ Config: .arkaos.json
63
+ Stack: ${stack}
64
+ Dept: ${department}
65
+
66
+ ArkaOS will auto-detect this project's context via Synapse L3.
67
+ Use /dev, /mkt, /brand etc. or just describe what you need.
68
+ `);
69
+ }
70
+
71
+ function detectStack(dir) {
72
+ if (existsSync(join(dir, "composer.json"))) return "laravel";
73
+ if (existsSync(join(dir, "nuxt.config.ts")) || existsSync(join(dir, "nuxt.config.js"))) return "nuxt";
74
+ if (existsSync(join(dir, "next.config.js")) || existsSync(join(dir, "next.config.ts")) || existsSync(join(dir, "next.config.mjs"))) return "nextjs";
75
+ if (existsSync(join(dir, "vite.config.ts"))) {
76
+ try {
77
+ const pkg = JSON.parse(readFileSync(join(dir, "package.json"), "utf-8"));
78
+ if (pkg.dependencies?.vue) return "vue";
79
+ if (pkg.dependencies?.react) return "react";
80
+ } catch {}
81
+ return "vite";
82
+ }
83
+ if (existsSync(join(dir, "package.json"))) {
84
+ try {
85
+ const pkg = JSON.parse(readFileSync(join(dir, "package.json"), "utf-8"));
86
+ if (pkg.dependencies?.react) return "react";
87
+ if (pkg.dependencies?.vue) return "vue";
88
+ if (pkg.dependencies?.express) return "node-express";
89
+ return "node";
90
+ } catch {}
91
+ }
92
+ if (existsSync(join(dir, "pyproject.toml")) || existsSync(join(dir, "setup.py"))) return "python";
93
+ if (existsSync(join(dir, "Gemfile"))) return "ruby";
94
+ if (existsSync(join(dir, "go.mod"))) return "go";
95
+ if (existsSync(join(dir, "Cargo.toml"))) return "rust";
96
+ return "unknown";
97
+ }
98
+
99
+ function detectDepartment(dir, stack) {
100
+ // Code projects default to dev
101
+ if (["laravel", "nuxt", "nextjs", "react", "vue", "node", "python", "ruby", "go", "rust", "node-express", "vite"].includes(stack)) {
102
+ return "dev";
103
+ }
104
+ return "general";
105
+ }
@@ -1,4 +1,4 @@
1
- import { existsSync, readFileSync, renameSync, mkdirSync } from "node:fs";
1
+ import { existsSync, readFileSync, renameSync, mkdirSync, writeFileSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { homedir } from "node:os";
4
4
  import { execSync } from "node:child_process";
@@ -105,6 +105,9 @@ export async function migrate() {
105
105
  process.exit(1);
106
106
  }
107
107
 
108
+ // Mark as migrated so hook stops alerting
109
+ writeFileSync(join(V2_PATH, "migrated-from-v1"), new Date().toISOString());
110
+
108
111
  console.log(`
109
112
  Migration complete!
110
113
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arkaos",
3
- "version": "2.0.2",
3
+ "version": "2.0.3",
4
4
  "description": "The Operating System for AI Agent Teams",
5
5
  "type": "module",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "arkaos-core"
3
- version = "2.0.2"
3
+ version = "2.0.3"
4
4
  description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -28,6 +28,10 @@ dependencies = [
28
28
  ]
29
29
 
30
30
  [project.optional-dependencies]
31
+ knowledge = [
32
+ "fastembed>=0.8.0",
33
+ "sqlite-vss>=0.1.2",
34
+ ]
31
35
  dev = [
32
36
  "pytest>=8.0",
33
37
  "pytest-cov>=5.0",