npm - arkaos - Versions diffs - 2.0.2 → 2.1.0 - Mend

arkaos 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/VERSION +1 -1
package/config/constitution.yaml +2 -0
package/config/hooks/user-prompt-submit-v2.sh +11 -0
package/core/budget/__pycache__/__init__.cpython-313.pyc +0 -0
package/core/budget/__pycache__/manager.cpython-313.pyc +0 -0
package/core/budget/__pycache__/schema.cpython-313.pyc +0 -0
package/core/knowledge/__init__.py +6 -0
package/core/knowledge/__pycache__/__init__.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/embedder.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/indexer.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
package/core/knowledge/chunker.py +121 -0
package/core/knowledge/embedder.py +52 -0
package/core/knowledge/indexer.py +97 -0
package/core/knowledge/ingest.py +270 -0
package/core/knowledge/vector_store.py +213 -0
package/core/obsidian/__pycache__/__init__.cpython-313.pyc +0 -0
package/core/obsidian/__pycache__/templates.cpython-313.pyc +0 -0
package/core/obsidian/__pycache__/writer.cpython-313.pyc +0 -0
package/core/orchestration/__pycache__/__init__.cpython-313.pyc +0 -0
package/core/orchestration/__pycache__/patterns.cpython-313.pyc +0 -0
package/core/orchestration/__pycache__/protocol.cpython-313.pyc +0 -0
package/core/runtime/__pycache__/subagent.cpython-313.pyc +0 -0
package/core/runtime/subagent.py +5 -0
package/core/squads/__pycache__/schema.cpython-313.pyc +0 -0
package/core/squads/schema.py +3 -0
package/core/squads/templates/project-squad.yaml +28 -0
package/core/synapse/__pycache__/engine.cpython-313.pyc +0 -0
package/core/synapse/__pycache__/layers.cpython-313.pyc +0 -0
package/core/synapse/engine.py +5 -1
package/core/synapse/layers.py +95 -9
package/core/tasks/__pycache__/schema.cpython-313.pyc +0 -0
package/core/tasks/schema.py +1 -0
package/core/workflow/__pycache__/engine.cpython-313.pyc +0 -0
package/core/workflow/__pycache__/schema.cpython-313.pyc +0 -0
package/departments/dev/agents/research-assistant.yaml +51 -0
package/departments/kb/agents/data-collector.yaml +51 -0
package/departments/ops/agents/doc-writer.yaml +51 -0
package/departments/pm/agents/pm-director.yaml +1 -1
package/installer/cli.js +49 -0
package/installer/init.js +105 -0
package/installer/migrate.js +4 -1
package/package.json +1 -1
package/pyproject.toml +16 -1

package/core/knowledge/vector_store.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""Vector store — SQLite-VSS backed semantic search.
+Stores document chunks with embeddings for fast similarity search.
+Graceful degradation: works without sqlite-vss (brute-force fallback).
+"""
+import json
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any, Optional
+from core.knowledge.embedder import embed, embed_batch, EMBEDDING_DIMS
+def _load_vss(db: sqlite3.Connection) -> bool:
+    """Try to load sqlite-vss extension."""
+    try:
+        db.enable_load_extension(True)
+        import sqlite_vss
+        sqlite_vss.load(db)
+        return True
+    except (ImportError, Exception):
+        return False
+class VectorStore:
+    """SQLite-VSS backed vector store for knowledge retrieval."""
+    def __init__(self, db_path: str | Path = ":memory:") -> None:
+        self._db_path = str(db_path)
+        self._db = sqlite3.connect(self._db_path)
+        self._db.row_factory = sqlite3.Row
+        self._vss_available = _load_vss(self._db)
+        self._init_schema()
+    def _init_schema(self) -> None:
+        """Create tables if they don't exist."""
+        self._db.executescript("""
+            CREATE TABLE IF NOT EXISTS chunks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                text TEXT NOT NULL,
+                heading TEXT DEFAULT '',
+                source TEXT DEFAULT '',
+                file_hash TEXT DEFAULT '',
+                metadata TEXT DEFAULT '{}',
+                created_at REAL DEFAULT (unixepoch('now')),
+                embedding BLOB
+            );
+            CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);
+            CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(file_hash);
+        """)
+        if self._vss_available:
+            try:
+                self._db.execute(
+                    f"CREATE VIRTUAL TABLE IF NOT EXISTS vss_chunks USING vss0(embedding({EMBEDDING_DIMS}))"
+                )
+            except Exception:
+                self._vss_available = False
+        self._db.commit()
+    def index_chunks(
+        self,
+        texts: list[str],
+        headings: list[str] | None = None,
+        source: str = "",
+        file_hash: str = "",
+        metadata: dict[str, Any] | None = None,
+    ) -> int:
+        """Index multiple text chunks with embeddings.
+        Returns number of chunks indexed.
+        """
+        if not texts:
+            return 0
+        embeddings = embed_batch(texts)
+        meta_json = json.dumps(metadata or {})
+        count = 0
+        for i, text in enumerate(texts):
+            heading = headings[i] if headings and i < len(headings) else ""
+            emb_blob = None
+            if embeddings and i < len(embeddings):
+                emb_blob = _vec_to_blob(embeddings[i])
+            cursor = self._db.execute(
+                "INSERT INTO chunks (text, heading, source, file_hash, metadata, embedding) VALUES (?, ?, ?, ?, ?, ?)",
+                (text, heading, source, file_hash, meta_json, emb_blob),
+            )
+            if self._vss_available and emb_blob:
+                self._db.execute(
+                    "INSERT INTO vss_chunks (rowid, embedding) VALUES (?, ?)",
+                    (cursor.lastrowid, emb_blob),
+                )
+            count += 1
+        self._db.commit()
+        return count
+    def search(self, query: str, top_k: int = 5) -> list[dict]:
+        """Search for similar chunks.
+        Returns list of dicts with: text, heading, source, score, metadata.
+        """
+        # Check if store has any data
+        total = self._db.execute("SELECT COUNT(*) as cnt FROM chunks").fetchone()["cnt"]
+        if total == 0:
+            return []
+        query_emb = embed(query)
+        if query_emb and self._vss_available:
+            try:
+                return self._vss_search(query_emb, top_k)
+            except Exception:
+                return self._keyword_search(query, top_k)
+        # Fallback: keyword search
+        return self._keyword_search(query, top_k)
+    def _vss_search(self, query_emb: list[float], top_k: int) -> list[dict]:
+        """Vector similarity search via sqlite-vss."""
+        query_blob = _vec_to_blob(query_emb)
+        rows = self._db.execute("""
+            SELECT c.text, c.heading, c.source, c.metadata, v.distance
+            FROM vss_chunks v
+            JOIN chunks c ON c.id = v.rowid
+            WHERE vss_search(v.embedding, vss_search_params(?, ?))
+        """, (query_blob, top_k)).fetchall()
+        return [
+            {
+                "text": r["text"],
+                "heading": r["heading"],
+                "source": r["source"],
+                "score": 1.0 - r["distance"],  # Convert distance to similarity
+                "metadata": json.loads(r["metadata"]),
+            }
+            for r in rows
+        ]
+    def _keyword_search(self, query: str, top_k: int) -> list[dict]:
+        """Fallback keyword search when VSS unavailable."""
+        words = query.lower().split()
+        if not words:
+            return []
+        conditions = " OR ".join(["lower(text) LIKE ?" for _ in words])
+        params = [f"%{w}%" for w in words[:5]]  # Max 5 keywords
+        rows = self._db.execute(
+            f"SELECT text, heading, source, metadata FROM chunks WHERE {conditions} LIMIT ?",
+            params + [top_k],
+        ).fetchall()
+        return [
+            {
+                "text": r["text"],
+                "heading": r["heading"],
+                "source": r["source"],
+                "score": 0.5,  # No real score for keyword search
+                "metadata": json.loads(r["metadata"]),
+            }
+            for r in rows
+        ]
+    def is_file_indexed(self, file_hash: str) -> bool:
+        """Check if a file has already been indexed."""
+        row = self._db.execute(
+            "SELECT COUNT(*) as cnt FROM chunks WHERE file_hash = ?", (file_hash,)
+        ).fetchone()
+        return row["cnt"] > 0
+    def remove_file(self, source: str) -> int:
+        """Remove all chunks from a source file."""
+        if self._vss_available:
+            rows = self._db.execute("SELECT id FROM chunks WHERE source = ?", (source,)).fetchall()
+            for r in rows:
+                self._db.execute("DELETE FROM vss_chunks WHERE rowid = ?", (r["id"],))
+        deleted = self._db.execute("DELETE FROM chunks WHERE source = ?", (source,)).rowcount
+        self._db.commit()
+        return deleted
+    def get_stats(self) -> dict:
+        """Get store statistics."""
+        total = self._db.execute("SELECT COUNT(*) as cnt FROM chunks").fetchone()["cnt"]
+        sources = self._db.execute("SELECT COUNT(DISTINCT source) as cnt FROM chunks").fetchone()["cnt"]
+        return {
+            "total_chunks": total,
+            "total_files": sources,
+            "vss_available": self._vss_available,
+            "db_path": self._db_path,
+        }
+    def clear(self) -> None:
+        """Remove all data."""
+        if self._vss_available:
+            self._db.execute("DELETE FROM vss_chunks")
+        self._db.execute("DELETE FROM chunks")
+        self._db.commit()
+    def close(self) -> None:
+        """Close database connection."""
+        self._db.close()
+def _vec_to_blob(vec: list[float]) -> bytes:
+    """Convert float vector to bytes for SQLite storage."""
+    import struct
+    return struct.pack(f"{len(vec)}f", *vec)

package/core/obsidian/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/obsidian/__pycache__/templates.cpython-313.pyc CHANGED Viewed

Binary file

package/core/obsidian/__pycache__/writer.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/patterns.cpython-313.pyc CHANGED Viewed

Binary file

package/core/orchestration/__pycache__/protocol.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/subagent.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/subagent.py CHANGED Viewed

@@ -102,6 +102,11 @@ class SubagentDispatcher:
     The dispatcher creates HandoffArtifacts from agent definitions
     and task descriptions, then delegates to the runtime adapter
     for actual execution.
+    Nesting policy: Maximum 1 level of nesting (agent -> subagent).
+    Sub-subagent dispatch is not recommended -- creates context fragmentation
+    and debugging complexity. If a subagent needs help, it should escalate
+    to its squad lead rather than spawning another subagent.
     """
     def __init__(self) -> None:

package/core/squads/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/squads/schema.py CHANGED Viewed

@@ -35,6 +35,9 @@ class SquadMember(BaseModel):
     borrowed: bool = False           # Borrowed from another department?
     source_department: str = ""      # Original department if borrowed
     availability: float = 1.0        # 0.0-1.0, for shared agents
+    # Tier 2 agents can collaborate directly within project squads
+    # without requiring Tier 1 approval for each interaction.
+    can_collaborate_directly: bool = True
 class SquadWorkflow(BaseModel):

package/core/squads/templates/project-squad.yaml ADDED Viewed

@@ -0,0 +1,28 @@
+# Project Squad Template
+# Copy and customize for cross-department projects
+id: project-{name}
+name: "{Project Name} Squad"
+description: "Cross-department squad for {project description}"
+department: ""  # No single department — cross-cutting
+squad_type: project
+topology: stream-aligned
+members:
+  # Borrow from department squads
+  - agent_id: "{lead-agent-id}"
+    role: "Project Lead"
+    is_lead: true
+    borrowed: true     # Borrowed from department squad
+    availability: 0.5  # 50% allocation
+  - agent_id: "{specialist-id}"
+    role: "Technical Implementation"
+    borrowed: true
+    availability: 0.3
+# Project squads:
+# - Created by COO (Sofia) or any Squad Lead
+# - Agents are borrowed, not moved
+# - Max 10 members (Two-Pizza Team)
+# - Dissolved when project completes
+# - Quality Gate still mandatory

package/core/synapse/__pycache__/engine.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/engine.py CHANGED Viewed

@@ -10,6 +10,7 @@ Design goals:
 import time
 from dataclasses import dataclass, field
+from typing import Any
 from core.synapse.layers import Layer, LayerResult, PromptContext
 from core.synapse.cache import LayerCache
@@ -152,6 +153,7 @@ def create_default_engine(
     constitution_compressed: str = "",
     commands: list[dict] | None = None,
     agents_registry: dict[str, dict] | None = None,
+    vector_store: Any = None,
 ) -> SynapseEngine:
     """Create a SynapseEngine with all 8 default layers.
@@ -166,7 +168,7 @@ def create_default_engine(
     from core.synapse.layers import (
         ConstitutionLayer, DepartmentLayer, AgentLayer,
         ProjectLayer, BranchLayer, CommandHintsLayer,
-        QualityGateLayer, TimeLayer,
+        QualityGateLayer, TimeLayer, KnowledgeRetrievalLayer,
     )
     engine = SynapseEngine()
@@ -176,6 +178,8 @@ def create_default_engine(
     engine.register_layer(DepartmentLayer())
     engine.register_layer(AgentLayer(agents_registry=agents_registry))
     engine.register_layer(ProjectLayer())
+    if vector_store is not None:
+        engine.register_layer(KnowledgeRetrievalLayer(vector_store=vector_store))
     engine.register_layer(BranchLayer())
     engine.register_layer(CommandHintsLayer(commands=commands))
     engine.register_layer(QualityGateLayer())

package/core/synapse/layers.py CHANGED Viewed

@@ -1,17 +1,18 @@
-"""Synapse layer definitions — the 8 context layers.
+"""Synapse layer definitions — the 9 context layers.
 Each layer extracts a specific type of context and compresses it
 for injection into the prompt. Layers are pluggable and ordered.
 Layer Architecture:
-  L0: Constitution  — Compressed governance rules (TTL: 300s)
-  L1: Department    — Detected department from input (no cache)
-  L2: Agent         — Agent profile + last gotchas (TTL: 30s)
-  L3: Project       — Active project context (TTL: 30s)
-  L4: Branch        — Current git branch (no cache)
-  L5: Command Hints — Matching commands from registry (TTL: 30s)
-  L6: Quality Gate  — QG status and last verdicts (TTL: 60s)
-  L7: Time          — Time-of-day signal (no cache)
+  L0:   Constitution       — Compressed governance rules (TTL: 300s)
+  L1:   Department         — Detected department from input (no cache)
+  L2:   Agent              — Agent profile + last gotchas (TTL: 30s)
+  L3:   Project            — Active project context (TTL: 30s)
+  L3.5: KnowledgeRetrieval — Semantic search from vector DB (TTL: 30s)
+  L4:   Branch             — Current git branch (no cache)
+  L5:   Command Hints      — Matching commands from registry (TTL: 30s)
+  L6:   Quality Gate       — QG status and last verdicts (TTL: 60s)
+  L7:   Time               — Time-of-day signal (no cache)
 """
 import re
@@ -439,3 +440,88 @@ class TimeLayer(Layer):
             layer_id=self.id, tag=tag, content=period,
             tokens_est=1, compute_ms=ms, cached=False,
         )
+# --- L3.5: Knowledge Retrieval ---
+class KnowledgeRetrievalLayer(Layer):
+    """L3.5: Semantic knowledge retrieval from vector DB.
+    Searches the local vector store for chunks relevant to the user's
+    input and injects them as context. Gracefully skips if vector store
+    is unavailable or empty.
+    """
+    def __init__(self, vector_store: Any = None, max_chunks: int = 3, max_tokens: int = 400) -> None:
+        self._store = vector_store
+        self._max_chunks = max_chunks
+        self._max_tokens = max_tokens
+    @property
+    def id(self) -> str:
+        return "L3.5"
+    @property
+    def name(self) -> str:
+        return "KnowledgeRetrieval"
+    @property
+    def cache_ttl(self) -> int:
+        return 30
+    @property
+    def priority(self) -> int:
+        return 35
+    def compute(self, ctx: PromptContext) -> LayerResult:
+        start = time.time()
+        if not self._store or not ctx.user_input:
+            return LayerResult(
+                layer_id=self.id, tag="", content="",
+                tokens_est=0, compute_ms=0, cached=False,
+            )
+        try:
+            results = self._store.search(ctx.user_input, top_k=self._max_chunks)
+        except Exception:
+            return LayerResult(
+                layer_id=self.id, tag="", content="",
+                tokens_est=0, compute_ms=0, cached=False,
+            )
+        if not results:
+            ms = int((time.time() - start) * 1000)
+            return LayerResult(
+                layer_id=self.id, tag="", content="",
+                tokens_est=0, compute_ms=ms, cached=False,
+            )
+        # Build compact knowledge context
+        snippets = []
+        total_tokens = 0
+        for r in results:
+            text = r["text"][:200].replace("\n", " ").strip()
+            tokens = len(text.split())
+            if total_tokens + tokens > self._max_tokens:
+                break
+            source = r.get("source", "").split("/")[-1] if r.get("source") else ""
+            snippet = f"{source}: {text}" if source else text
+            snippets.append(snippet)
+            total_tokens += tokens
+        if not snippets:
+            ms = int((time.time() - start) * 1000)
+            return LayerResult(
+                layer_id=self.id, tag="", content="",
+                tokens_est=0, compute_ms=ms, cached=False,
+            )
+        content = " | ".join(snippets)
+        tag = f"[knowledge:{len(snippets)} chunks]"
+        ms = int((time.time() - start) * 1000)
+        return LayerResult(
+            layer_id=self.id, tag=tag, content=content,
+            tokens_est=total_tokens, compute_ms=ms, cached=False,
+        )

package/core/tasks/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/tasks/schema.py CHANGED Viewed

@@ -29,6 +29,7 @@ class TaskType(str, Enum):
     RESEARCH = "research"             # Background research
     GENERATION = "generation"         # AI content/image generation
     EXPORT = "export"                 # Export to external system
+    KB_INDEX = "kb_index"           # Index documents into vector store
     CUSTOM = "custom"

package/core/workflow/__pycache__/engine.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/departments/dev/agents/research-assistant.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+id: research-assistant
+name: Maria
+role: Research Assistant
+department: dev
+tier: 3
+behavioral_dna:
+  disc:
+    primary: C
+    secondary: S
+    communication_style: "Thorough, detail-oriented, presents findings systematically"
+    under_pressure: "Digs deeper into data before responding"
+    motivator: "Understanding the full picture"
+  enneagram:
+    type: 5
+    wing: 6
+    core_motivation: "To understand and be competent"
+    core_fear: "Being ignorant or uninformed"
+    subtype: social
+  big_five:
+    openness: 90
+    conscientiousness: 85
+    extraversion: 30
+    agreeableness: 70
+    neuroticism: 35
+  mbti:
+    type: INTP
+authority:
+  veto: false
+  approve_budget: false
+  approve_architecture: false
+  approve_quality: false
+  block_release: false
+  block_delivery: false
+  orchestrate: false
+  delegates_to: []
+  escalates_to: tech-lead-paulo
+expertise:
+  domains: ["research", "documentation", "analysis", "literature-review"]
+  frameworks: ["Systematic Review", "PRISMA", "Research Methodology"]
+  depth: proficient
+  years_equivalent: 5
+communication:
+  language: en
+  tone: "Precise and informative"
+  vocabulary_level: specialist
+  preferred_format: "Structured reports with citations"
+  avoid: ["assumptions without evidence", "vague conclusions"]

package/departments/kb/agents/data-collector.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+id: data-collector
+name: Tomas Jr
+role: Data Collector
+department: kb
+tier: 3
+behavioral_dna:
+  disc:
+    primary: C
+    secondary: D
+    communication_style: "Data-driven, factual, structured"
+    under_pressure: "Relies on systematic data collection"
+    motivator: "Complete and accurate data"
+  enneagram:
+    type: 6
+    wing: 5
+    core_motivation: "To have reliable information"
+    core_fear: "Making decisions on incomplete data"
+    subtype: self-preservation
+  big_five:
+    openness: 70
+    conscientiousness: 88
+    extraversion: 35
+    agreeableness: 65
+    neuroticism: 40
+  mbti:
+    type: ISTJ
+authority:
+  veto: false
+  approve_budget: false
+  approve_architecture: false
+  approve_quality: false
+  block_release: false
+  block_delivery: false
+  orchestrate: false
+  delegates_to: []
+  escalates_to: kb-lead-clara
+expertise:
+  domains: ["data-collection", "web-scraping", "API-integration", "data-validation"]
+  frameworks: ["ETL", "Data Quality Framework"]
+  depth: proficient
+  years_equivalent: 4
+communication:
+  language: en
+  tone: "Factual and precise"
+  vocabulary_level: specialist
+  preferred_format: "Data tables with quality scores"
+  avoid: ["subjective interpretations", "unverified claims"]

package/departments/ops/agents/doc-writer.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+id: doc-writer
+name: Isabel
+role: Documentation Writer
+department: ops
+tier: 3
+behavioral_dna:
+  disc:
+    primary: S
+    secondary: C
+    communication_style: "Clear, structured, audience-aware"
+    under_pressure: "Focuses on clarity and completeness"
+    motivator: "Making complex things accessible"
+  enneagram:
+    type: 1
+    wing: 2
+    core_motivation: "To produce correct, helpful documentation"
+    core_fear: "Publishing inaccurate information"
+    subtype: social
+  big_five:
+    openness: 75
+    conscientiousness: 92
+    extraversion: 40
+    agreeableness: 80
+    neuroticism: 30
+  mbti:
+    type: ISFJ
+authority:
+  veto: false
+  approve_budget: false
+  approve_architecture: false
+  approve_quality: false
+  block_release: false
+  block_delivery: false
+  orchestrate: false
+  delegates_to: []
+  escalates_to: ops-lead-daniel
+expertise:
+  domains: ["technical-writing", "API-docs", "user-guides", "SOPs"]
+  frameworks: ["Diátaxis", "Google Developer Documentation Style"]
+  depth: proficient
+  years_equivalent: 5
+communication:
+  language: en
+  tone: "Clear, concise, helpful"
+  vocabulary_level: accessible
+  preferred_format: "Step-by-step guides with examples"
+  avoid: ["jargon without explanation", "walls of text"]

package/departments/pm/agents/pm-director.yaml CHANGED Viewed

@@ -42,7 +42,7 @@ authority:
     - product-owner
     - scrum-master
     - project-coordinator
-  escalates_to: cto-marco
+  escalates_to: coo-sofia
 expertise:
   domains: