npm - arkaos - Versions diffs - 3.78.0 → 4.0.0 - Mend

arkaos 3.78.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/VERSION +1 -1
package/config/agent-allowlists/laravel.yaml +1 -0
package/config/agent-allowlists/node.yaml +1 -0
package/config/agent-allowlists/nuxt.yaml +1 -0
package/config/agent-allowlists/python.yaml +1 -0
package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
package/core/agents/registry_gen.py +6 -1
package/core/agents/schema.py +4 -0
package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
package/core/cognition/reorganizer.py +37 -7
package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
package/core/knowledge/agent_match.py +114 -0
package/core/knowledge/chunker.py +45 -0
package/core/knowledge/ingest.py +156 -78
package/core/knowledge/sources.py +138 -0
package/core/knowledge/vector_store.py +52 -0
package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
package/core/squads/loader.py +25 -0
package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
package/core/sync/agent_provisioner.py +19 -8
package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
package/dashboard/app/pages/cognition.vue +9 -4
package/dashboard/app/pages/knowledge/[id].vue +669 -0
package/dashboard/app/pages/knowledge/index.vue +1281 -0
package/dashboard/app/types/index.d.ts +1 -1
package/departments/brand/agents/ux-designer.yaml +15 -1
package/departments/brand/agents/ux-researcher.yaml +73 -0
package/departments/brand/agents/ux-strategist.yaml +72 -0
package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
package/departments/dev/agents/architect.yaml +9 -3
package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
package/departments/dev/agents/backend-dev.yaml +10 -4
package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
package/departments/dev/agents/dba.yaml +7 -3
package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
package/departments/ecom/agents/retention-manager.yaml +13 -1
package/departments/leadership/agents/culture-coach.yaml +20 -0
package/departments/leadership/agents/hr-specialist.yaml +18 -0
package/departments/leadership/agents/leadership-director.yaml +10 -0
package/departments/org/agents/chief-of-staff.yaml +76 -0
package/departments/org/agents/coo.yaml +11 -0
package/departments/org/agents/okr-steward.yaml +71 -0
package/departments/org/agents/org-designer.yaml +23 -0
package/departments/org/skills/okr-cadence/SKILL.md +34 -0
package/departments/org/skills/principles-audit/SKILL.md +36 -0
package/departments/pm/agents/pm-director.yaml +21 -8
package/departments/pm/agents/product-owner.yaml +24 -2
package/departments/pm/agents/scrum-master.yaml +21 -0
package/departments/pm/agents/strategic-pm.yaml +72 -0
package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
package/departments/quality/agents/cqo.yaml +8 -0
package/departments/saas/agents/cs-manager.yaml +19 -2
package/departments/saas/agents/growth-engineer.yaml +14 -1
package/departments/saas/agents/metrics-analyst.yaml +17 -1
package/departments/saas/agents/revops-lead.yaml +73 -0
package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
package/departments/saas/skills/voc-loop/SKILL.md +29 -0
package/departments/sales/agents/sales-director.yaml +9 -0
package/departments/sales/agents/sdr.yaml +72 -0
package/departments/strategy/agents/decision-quality.yaml +72 -0
package/departments/strategy/agents/strategy-director.yaml +13 -0
package/departments/strategy/skills/premortem/SKILL.md +33 -0
package/knowledge/agents-registry-v2.json +1218 -78
package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
package/scripts/dashboard-api.py +376 -13
package/dashboard/app/pages/knowledge.vue +0 -918

package/core/knowledge/ingest.py CHANGED Viewed

@@ -6,12 +6,14 @@ the vector store. Reports progress via callback for real-time UI updates.
 import os
 import re
+import subprocess
 import tempfile
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Callable, Optional
 from core.knowledge.chunker import chunk_markdown
+from core.knowledge.sources import source_id
 from core.knowledge.vector_store import VectorStore
@@ -25,6 +27,11 @@ class IngestResult:
     title: str = ""
     error: str = ""
     success: bool = True
+    duration: int = 0
+    language: str = ""
+    media_path: str = ""
+    thumbnail_path: str = ""
+    transcript: str = ""
 ProgressCallback = Callable[[int, str], None]  # (percent, message)
@@ -38,15 +45,20 @@ def detect_source_type(source: str) -> str:
     if any(domain in source_lower for domain in ["youtube.com", "youtu.be"]):
         return "youtube"
-    # Web URLs
+    # Video: a URL or file path ending in a video container extension.
+    # Checked *before* the generic web fallback so a non-youtube CDN clip
+    # (https://.../clip.mp4) resolves to "video", per PR1 spec Task 2.3.
+    ext = Path(source.split("?", 1)[0]).suffix.lower()
+    if ext in IngestEngine.VIDEO_EXTS:
+        return "video"
+    # Web URLs (no recognised media extension)
     if source_lower.startswith(("http://", "https://")):
         return "web"
-    # File extensions
-    ext = Path(source).suffix.lower()
     if ext == ".pdf":
         return "pdf"
-    if ext in (".mp3", ".wav", ".m4a", ".ogg", ".flac", ".webm"):
+    if ext in (".mp3", ".wav", ".m4a", ".ogg", ".flac"):
         return "audio"
     if ext in (".md", ".txt", ".rst"):
         return "markdown"
@@ -57,11 +69,19 @@ def detect_source_type(source: str) -> str:
 class IngestEngine:
     """Processes content from various sources into the vector store."""
-    def __init__(self, store: VectorStore, media_dir: str | Path = "") -> None:
+    VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi")
+    def __init__(self, store: VectorStore, media_dir: str | Path = "", registry=None) -> None:
         self._store = store
+        self._registry = registry
         self._media_dir = Path(media_dir) if media_dir else Path.home() / ".arkaos" / "media"
         self._media_dir.mkdir(parents=True, exist_ok=True)
+    @staticmethod
+    def detect_source_type(source: str) -> str:
+        """Detect the source type for a URL or file path (class-level alias)."""
+        return detect_source_type(source)
     def ingest(
         self,
         source: str,
@@ -87,6 +107,7 @@ class IngestEngine:
             "youtube": self._process_youtube,
             "pdf": self._process_pdf,
             "audio": self._process_audio,
+            "video": self._process_video,
             "web": self._process_web,
             "markdown": self._process_markdown,
         }
@@ -96,11 +117,13 @@ class IngestEngine:
             return IngestResult(source=source, source_type=source_type, error=f"Unsupported type: {source_type}", success=False)
         try:
-            text, title = processor(source, progress)
+            text, title, extra = self._invoke_processor(processor, source, progress)
         except Exception as e:
+            self._register_failure(source, source_type, str(e))
             return IngestResult(source=source, source_type=source_type, error=str(e), success=False)
         if not text or len(text.strip()) < 50:
+            self._register_failure(source, source_type, "Extracted text too short")
             return IngestResult(source=source, source_type=source_type, error="Extracted text too short", success=False)
         # Chunk and index
@@ -110,7 +133,9 @@ class IngestEngine:
         if total_chunks == 0:
             progress(100, "No chunks to index")
-            return IngestResult(source=source, source_type=source_type, text_length=len(text), chunks_created=0, title=title, success=True)
+            empty = self._make_result(source, source_type, text, title, 0, extra)
+            self._register_success(empty)
+            return empty
         # Index in batches with granular progress (85→99%)
         texts = [c.text for c in chunks]
@@ -149,100 +174,153 @@ class IngestEngine:
         except Exception:
             pass
+        result = self._make_result(source, source_type, text, title, count, extra)
+        self._register_success(result)
+        return result
+    @staticmethod
+    def _invoke_processor(
+        processor: Callable, source: str, progress: ProgressCallback
+    ) -> tuple[str, str, dict]:
+        """Call a processor, normalizing 2-tuple and 3-tuple returns."""
+        out = processor(source, progress)
+        if len(out) == 3:
+            return out[0], out[1], out[2] or {}
+        return out[0], out[1], {}
+    @staticmethod
+    def _make_result(
+        source: str, source_type: str, text: str, title: str,
+        count: int, extra: dict,
+    ) -> IngestResult:
+        """Assemble a successful IngestResult including media metadata."""
         return IngestResult(
-            source=source,
-            source_type=source_type,
-            text_length=len(text),
-            chunks_created=count,
-            title=title,
-            success=True,
+            source=source, source_type=source_type, text_length=len(text),
+            chunks_created=count, title=title, success=True, transcript=text,
+            duration=int(extra.get("duration", 0)),
+            language=extra.get("language", ""),
+            media_path=extra.get("media_path", ""),
+            thumbnail_path=extra.get("thumbnail_path", ""),
         )
-    def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str]:
-        """Download YouTube video and transcribe audio.
+    def _register_success(self, result: IngestResult) -> None:
+        """Persist a successful ingest to the source registry, if present."""
+        if self._registry is None:
+            return
+        self._registry.upsert(
+            result.source, type=result.source_type, title=result.title,
+            duration=result.duration, language=result.language,
+            thumbnail_path=result.thumbnail_path, media_path=result.media_path,
+            transcript=result.transcript, chunk_count=result.chunks_created,
+            status="ready",
+        )
+    def _register_failure(self, source: str, stype: str, error: str) -> None:
+        """Persist a failed ingest to the source registry, if present."""
+        if self._registry is None:
+            return
+        self._registry.upsert(source, type=stype, status="failed", error=error)
-        5 distinct phases with clear progress:
-        Phase 1: Fetch video info (0-5%)
-        Phase 2: Download video (5-25%)
-        Phase 3: Extract audio (25-35%)
-        Phase 4: Transcribe audio (35-65%)
-        Phase 5: Return text for chunking/indexing (handled by caller, 75-100%)
+    def _process_youtube(self, url: str, progress: ProgressCallback) -> tuple[str, str, dict]:
+        """Download a YouTube video (kept as media) and transcribe it.
+        Phase 1: Fetch info (title, duration, language, thumbnail).
+        Phase 2: Download best video+audio merged to mp4 (kept as media).
+        Phase 3: Extract a WAV audio track for transcription.
+        Phase 4: Transcribe. Returns (text, title, extra-metadata).
         """
         try:
-            import yt_dlp
+            import yt_dlp  # noqa: F401
         except ImportError:
             raise RuntimeError("yt-dlp not installed. Run: pip install yt-dlp")
-        # === Phase 1: Fetch video info ===
         progress(2, "Phase 1/4 — Fetching video info...")
+        info = self._youtube_info(url, progress)
+        title = info.get("title", "YouTube Video")
+        progress(8, "Phase 2/4 — Downloading video...")
+        video_path = self._download_video(url, progress)
+        progress(40, "Phase 3/4 — Extracting audio from video...")
+        audio_path = self._extract_audio(video_path)
+        progress(50, "Phase 4/4 — Transcribing audio (this may take a while)...")
+        text = self._transcribe_audio(str(audio_path))
+        if not text or len(text.strip()) < 20:
+            raise RuntimeError("Transcription produced no usable text")
+        progress(70, f"Phase 4/4 — Transcribed: {len(text.split())} words")
+        return text, title, self._youtube_extra(info, video_path)
+    @staticmethod
+    def _youtube_extra(info: dict, video_path: Path) -> dict:
+        """Build the extra-metadata dict from yt-dlp info + saved video."""
+        return {
+            "duration": int(info.get("duration") or 0),
+            "language": info.get("language") or "",
+            "thumbnail_path": info.get("thumbnail") or "",
+            "media_path": str(video_path),
+        }
+    def _youtube_info(self, url: str, progress: ProgressCallback) -> dict:
+        """Fetch YouTube metadata without downloading."""
+        import yt_dlp
         try:
             with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
                 info = ydl.extract_info(url, download=False)
-                title = info.get("title", "YouTube Video")
-                duration = info.get("duration", 0)
-                progress(5, f"Phase 1/4 — Found: {title} ({duration}s)")
+                progress(5, f"Phase 1/4 — Found: {info.get('title')}")
+                return info
         except Exception as e:
             raise RuntimeError(f"YouTube access failed: {str(e)[:200]}")
-        # === Phase 2: Download video + extract audio ===
-        progress(8, f"Phase 2/4 — Downloading video...")
-        audio_path = str(self._media_dir / "yt_audio.wav")
+    def _download_video(self, url: str, progress: ProgressCallback) -> Path:
+        """Download best video+audio merged to mp4, keyed by stable id."""
+        import yt_dlp
+        stable_id = source_id(url)
+        out = self._media_dir / stable_id
         ydl_opts = {
-            "format": "bestaudio/best",
-            "outtmpl": str(self._media_dir / "yt_audio.%(ext)s"),
-            "postprocessors": [{
-                "key": "FFmpegExtractAudio",
-                "preferredcodec": "wav",
-                "preferredquality": "16",
-            }],
+            "format": "bestvideo*+bestaudio/best",
+            "merge_output_format": "mp4",
+            "outtmpl": str(out) + ".%(ext)s",
             "quiet": True,
             "no_warnings": True,
-            "progress_hooks": [lambda d: progress(
-                8 + int((d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)) * 17),
-                f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}"
-            ) if d.get("status") == "downloading" else None],
+            "progress_hooks": [self._dl_hook(progress)],
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.extract_info(url, download=True)
+        return self._media_dir / f"{stable_id}.mp4"
+    @staticmethod
+    def _dl_hook(progress: ProgressCallback) -> Callable:
+        """Build a yt-dlp progress hook mapping download % to 8-38%."""
+        def hook(d: dict) -> None:
+            if d.get("status") != "downloading":
+                return
+            ratio = d.get("downloaded_bytes", 0) / max(d.get("total_bytes", 1), 1)
+            progress(8 + int(ratio * 30),
+                     f"Phase 2/4 — Downloading... {d.get('_percent_str', '').strip()}")
+        return hook
+    def _extract_audio(self, video_path: Path) -> Path:
+        """Extract a 16kHz mono WAV track from a video via ffmpeg."""
+        audio_path = video_path.with_suffix(".wav")
+        subprocess.run(
+            ["ffmpeg", "-y", "-i", str(video_path), "-vn",
+             "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
+             str(audio_path)],
+            check=True, capture_output=True,
+        )
+        return audio_path
-        # === Phase 3: Extract audio (FFmpeg post-processing) ===
-        progress(28, "Phase 3/4 — Extracting audio from video...")
-        # Verify audio file exists
-        if not os.path.exists(audio_path):
-            # Try to find the downloaded file with different extension
-            for ext in ["wav", "m4a", "webm", "mp3", "opus"]:
-                alt = str(self._media_dir / f"yt_audio.{ext}")
-                if os.path.exists(alt):
-                    audio_path = alt
-                    break
-            else:
-                raise RuntimeError("Audio extraction failed — no output file found")
-        audio_size_mb = os.path.getsize(audio_path) / (1024 * 1024)
-        progress(35, f"Phase 3/4 — Audio extracted ({audio_size_mb:.1f} MB)")
-        # === Phase 4: Transcribe audio ===
-        progress(38, "Phase 4/4 — Transcribing audio (this may take a while)...")
-        text = self._transcribe_audio(audio_path)
-        if not text or len(text.strip()) < 20:
-            raise RuntimeError("Transcription produced no usable text")
-        word_count = len(text.split())
-        progress(70, f"Phase 4/4 — Transcribed: {word_count} words")
-        # Rename audio to include title for easy identification
-        safe_title = "".join(c if c.isalnum() or c in " -_" else "" for c in title)[:50].strip()
-        final_audio = self._media_dir / f"{safe_title}.wav"
-        try:
-            import shutil
-            shutil.move(audio_path, str(final_audio))
-        except Exception:
-            final_audio = Path(audio_path)
-        return text, title
+    def _process_video(self, path: str, progress: ProgressCallback) -> tuple[str, str, dict]:
+        """Ingest a local video file; the video itself is the media."""
+        filepath = Path(path)
+        if not filepath.exists():
+            raise FileNotFoundError(f"Video not found: {path}")
+        progress(30, "Transcribing video...")
+        text = self._transcribe_audio(str(filepath))
+        title = filepath.stem.replace("-", " ").replace("_", " ")
+        return text, title, {"media_path": str(filepath)}
     def _process_pdf(self, path: str, progress: ProgressCallback) -> tuple[str, str]:
         """Extract text from PDF."""

package/core/knowledge/sources.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Source registry for the knowledge base.
+Stores rich per-source metadata (title, duration, media path, transcript,
+thumbnail, status) in a dedicated ``sources`` table living inside the same
+``knowledge.db`` as the vector store. This module is purely additive: it
+never touches the ``chunks`` table owned by :class:`VectorStore`.
+"""
+from __future__ import annotations
+import hashlib
+import sqlite3
+import threading
+from pathlib import Path
+from typing import Optional
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS sources (
+    id TEXT PRIMARY KEY,
+    source TEXT NOT NULL,
+    type TEXT DEFAULT '',
+    title TEXT DEFAULT '',
+    duration INTEGER DEFAULT 0,
+    language TEXT DEFAULT '',
+    thumbnail_path TEXT DEFAULT '',
+    media_path TEXT DEFAULT '',
+    transcript TEXT DEFAULT '',
+    chunk_count INTEGER DEFAULT 0,
+    status TEXT DEFAULT 'pending',
+    error TEXT DEFAULT '',
+    created_at REAL DEFAULT (unixepoch('now')),
+    updated_at REAL DEFAULT (unixepoch('now'))
+)
+"""
+_COLUMNS = (
+    "id", "source", "type", "title", "duration", "language",
+    "thumbnail_path", "media_path", "transcript", "chunk_count",
+    "status", "error", "created_at", "updated_at",
+)
+def source_id(source: str) -> str:
+    """Return a stable id for a source string: ``src-`` + sha1[:12]."""
+    digest = hashlib.sha1(source.encode("utf-8")).hexdigest()
+    return f"src-{digest[:12]}"
+class SourceRegistry:
+    """SQLite-backed registry of knowledge sources and their metadata."""
+    def __init__(self, db_path: str | Path = "") -> None:
+        """Open (or create) the sources table in the knowledge database."""
+        self._db_path = str(db_path) if db_path else self._default_path()
+        self._lock = threading.Lock()
+        self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
+        self._conn.execute("PRAGMA journal_mode=WAL")
+        self._conn.execute(_SCHEMA)
+        self._conn.commit()
+    @staticmethod
+    def _default_path() -> str:
+        home = Path.home() / ".arkaos"
+        home.mkdir(parents=True, exist_ok=True)
+        return str(home / "knowledge.db")
+    def upsert(
+        self,
+        source: str,
+        *,
+        type: str = "",
+        title: str = "",
+        duration: int = 0,
+        language: str = "",
+        thumbnail_path: str = "",
+        media_path: str = "",
+        transcript: str = "",
+        chunk_count: int = 0,
+        status: str = "ready",
+        error: str = "",
+    ) -> str:
+        """Insert or replace a source row by id; return its stable id."""
+        sid = source_id(source)
+        params = (
+            sid, source, type, title, duration, language, thumbnail_path,
+            media_path, transcript, chunk_count, status, error, sid,
+        )
+        with self._lock:
+            self._conn.execute(self._upsert_sql(), params)
+            self._conn.commit()
+        return sid
+    @staticmethod
+    def _upsert_sql() -> str:
+        """SQL that preserves created_at on update via a COALESCE subquery."""
+        return (
+            "INSERT OR REPLACE INTO sources "
+            "(id, source, type, title, duration, language, thumbnail_path, "
+            "media_path, transcript, chunk_count, status, error, "
+            "created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, "
+            "?, ?, COALESCE((SELECT created_at FROM sources WHERE id = ?), "
+            "unixepoch('now')), unixepoch('now'))"
+        )
+    def get(self, source_id_: str) -> Optional[dict]:
+        """Return the full row for a source id as a dict, or None."""
+        row = self._conn.execute(
+            "SELECT * FROM sources WHERE id = ?", (source_id_,)
+        ).fetchone()
+        return self._row_to_dict(row) if row else None
+    def get_by_source(self, source: str) -> Optional[dict]:
+        """Return the row matching a raw source string, or None."""
+        return self.get(source_id(source))
+    def list(self) -> list[dict]:
+        """Return all source rows, newest updated first."""
+        rows = self._conn.execute(
+            "SELECT * FROM sources ORDER BY updated_at DESC"
+        ).fetchall()
+        return [self._row_to_dict(r) for r in rows]
+    def delete(self, source_id_: str) -> bool:
+        """Delete a source row; return True if a row was removed."""
+        with self._lock:
+            cur = self._conn.execute(
+                "DELETE FROM sources WHERE id = ?", (source_id_,)
+            )
+            self._conn.commit()
+        return cur.rowcount > 0
+    @staticmethod
+    def _row_to_dict(row: tuple) -> dict:
+        """Map a SELECT * tuple to a column-keyed dict."""
+        return dict(zip(_COLUMNS, row))
+    def close(self) -> None:
+        """Close the database connection."""
+        self._conn.close()

package/core/knowledge/vector_store.py CHANGED Viewed

@@ -296,6 +296,58 @@ class VectorStore:
         ).fetchall()
         return [{"source": r["source"], "chunks": int(r["chunks"])} for r in rows]
+    def distinct_sources(self) -> list[str]:
+        """Return the distinct non-empty source strings, noisiest first.
+        Read-only reverse-lookup helper: the dashboard only has a
+        sha1-based source_id and must recover the raw source string to
+        serve chunks-only (pre-registry) sources. Reuses the same SELECT
+        shape as :meth:`list_sources`.
+        """
+        rows = self._db.execute(
+            "SELECT source, COUNT(*) AS chunks FROM chunks "
+            "WHERE source IS NOT NULL AND source != '' "
+            "GROUP BY source ORDER BY chunks DESC"
+        ).fetchall()
+        return [r["source"] for r in rows]
+    def chunks_for_source(self, source: str) -> list[dict]:
+        """Return all chunks for a source as text/heading/metadata dicts.
+        Ordered by ``id`` ASC (insertion / ingest order) so callers that
+        re-join the text — e.g. :meth:`transcript_for_source` — read the
+        chunks back in their original sequence.
+        """
+        rows = self._db.execute(
+            "SELECT text, heading, metadata FROM chunks "
+            "WHERE source = ? ORDER BY id",
+            (source,),
+        ).fetchall()
+        return [
+            {
+                "text": r["text"],
+                "heading": r["heading"],
+                "metadata": json.loads(r["metadata"]) if r["metadata"] else {},
+            }
+            for r in rows
+        ]
+    def transcript_for_source(self, source: str) -> str:
+        """Reconstruct a source's transcript from its indexed chunks.
+        Read-only. Joins the chunk texts (in ingest order, via
+        :meth:`chunks_for_source`) via :func:`~core.knowledge.chunker.stitch_chunks`,
+        which dedupes the token-overlap window the chunker keeps between
+        consecutive chunks so the seams don't repeat ~50 tokens of text.
+        Returns "" when the source has no chunks. Used to surface a transcript
+        for legacy sources ingested before the SourceRegistry, which have
+        chunks but no stored transcript.
+        """
+        from core.knowledge.chunker import stitch_chunks
+        chunks = self.chunks_for_source(source)
+        return stitch_chunks([c["text"] for c in chunks])
     def clear(self) -> None:
         """Remove all data."""
         if self._vec_available:

package/core/squads/__pycache__/loader.cpython-313.pyc CHANGED Viewed

Binary file

package/core/squads/loader.py CHANGED Viewed

@@ -38,3 +38,28 @@ def load_all_squads(base_dir: str | Path) -> list[Squad]:
             warnings.warn(f"Failed to load squad: {squad_file}: {e}")
     return squads
+def load_matrix_squads(squads_dir: str | Path) -> list[Squad]:
+    """Load cross-department matrix squads (missions + transversal).
+    These implement "Autonomy by Missions, not Departments": stream-aligned
+    mission squads that own an outcome end-to-end, and transversal
+    platform/enabling squads (RevOps, People & Org, Governance). Members are
+    borrowed from their home departments — agents keep their department home.
+    Discovers one level of categorised subdirectories (e.g.
+    squads/missions/*.yaml, squads/transversal/*.yaml) — not arbitrary depth,
+    so stray YAML elsewhere under the tree is never mistaken for a squad.
+    """
+    squads_dir = Path(squads_dir)
+    squads = []
+    for squad_file in sorted(squads_dir.glob("*/*.yaml")):
+        try:
+            squads.append(load_squad(squad_file))
+        except Exception as e:
+            import warnings
+            warnings.warn(f"Failed to load matrix squad: {squad_file}: {e}")
+    return squads

package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc CHANGED Viewed

Binary file

package/core/sync/agent_provisioner.py CHANGED Viewed

@@ -139,12 +139,23 @@ def _find_agent_file(core: Path, name: str, suffix: str) -> Path | None:
     departments_root = (core / "departments").resolve()
     if not departments_root.exists():
         return None
-    for dept in departments_root.iterdir():
-        candidate = (dept / "agents" / f"{name}{suffix}").resolve()
-        try:
-            candidate.relative_to(departments_root)
-        except ValueError:
-            continue
-        if candidate.exists():
-            return candidate
+    # Top-level agents/<name> first (fast, deterministic), then sub-squad
+    # subdirectories (e.g. agents/backend-core/<name>.yaml). `name` is already
+    # validated above, so the glob cannot traverse outside the agents tree.
+    direct = (departments_root.glob(f"*/agents/{name}{suffix}"))
+    nested = (departments_root.glob(f"*/agents/**/{name}{suffix}"))
+    for candidate in [*sorted(direct), *sorted(nested)]:
+        resolved = _safe_resolve(candidate, departments_root)
+        if resolved is not None:
+            return resolved
     return None
+def _safe_resolve(candidate: Path, root: Path) -> Path | None:
+    """Resolve a candidate path, returning it only if it exists inside root."""
+    resolved = candidate.resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError:
+        return None
+    return resolved if resolved.exists() else None