npm - arkaos - Versions diffs - 3.78.0 → 4.0.1 - Mend

arkaos 3.78.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/README.md +42 -30
package/VERSION +1 -1
package/arka/SKILL.md +2 -2
package/config/agent-allowlists/laravel.yaml +1 -0
package/config/agent-allowlists/node.yaml +1 -0
package/config/agent-allowlists/nuxt.yaml +1 -0
package/config/agent-allowlists/python.yaml +1 -0
package/core/agents/__pycache__/registry_gen.cpython-313.pyc +0 -0
package/core/agents/__pycache__/schema.cpython-313.pyc +0 -0
package/core/agents/registry_gen.py +6 -1
package/core/agents/schema.py +4 -0
package/core/cognition/__pycache__/reorganizer.cpython-313.pyc +0 -0
package/core/cognition/reorganizer.py +37 -7
package/core/governance/__pycache__/design_system_lint.cpython-313.pyc +0 -0
package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/agent_match.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/chunker.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/ingest.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/sources.cpython-313.pyc +0 -0
package/core/knowledge/__pycache__/vector_store.cpython-313.pyc +0 -0
package/core/knowledge/agent_match.py +114 -0
package/core/knowledge/chunker.py +45 -0
package/core/knowledge/ingest.py +156 -78
package/core/knowledge/sources.py +138 -0
package/core/knowledge/vector_store.py +52 -0
package/core/squads/__pycache__/loader.cpython-313.pyc +0 -0
package/core/squads/loader.py +25 -0
package/core/sync/__pycache__/agent_provisioner.cpython-313.pyc +0 -0
package/core/sync/agent_provisioner.py +19 -8
package/dashboard/app/components/KnowledgeSourcesList.vue +40 -13
package/dashboard/app/pages/cognition.vue +9 -4
package/dashboard/app/pages/knowledge/[id].vue +669 -0
package/dashboard/app/pages/knowledge/index.vue +1281 -0
package/dashboard/app/types/index.d.ts +1 -1
package/departments/brand/agents/ux-designer.yaml +15 -1
package/departments/brand/agents/ux-researcher.yaml +73 -0
package/departments/brand/agents/ux-strategist.yaml +72 -0
package/departments/dev/agents/ai-engineering/ai-engineering-lead.yaml +76 -0
package/departments/dev/agents/architect.yaml +9 -3
package/departments/dev/agents/backend-core/laravel-eng.yaml +76 -0
package/departments/dev/agents/backend-core/node-ts-eng.yaml +76 -0
package/departments/dev/agents/backend-core/python-eng.yaml +76 -0
package/departments/dev/agents/backend-dev.yaml +10 -4
package/departments/dev/agents/data-platform/etl-eng.yaml +74 -0
package/departments/dev/agents/dba.yaml +7 -3
package/departments/dev/references/backend-knowledge-and-tools.md +70 -0
package/departments/ecom/agents/retention-manager.yaml +13 -1
package/departments/leadership/agents/culture-coach.yaml +20 -0
package/departments/leadership/agents/hr-specialist.yaml +18 -0
package/departments/leadership/agents/leadership-director.yaml +10 -0
package/departments/org/agents/chief-of-staff.yaml +76 -0
package/departments/org/agents/coo.yaml +11 -0
package/departments/org/agents/okr-steward.yaml +71 -0
package/departments/org/agents/org-designer.yaml +23 -0
package/departments/org/skills/okr-cadence/SKILL.md +34 -0
package/departments/org/skills/principles-audit/SKILL.md +36 -0
package/departments/pm/agents/pm-director.yaml +21 -8
package/departments/pm/agents/product-owner.yaml +24 -2
package/departments/pm/agents/scrum-master.yaml +21 -0
package/departments/pm/agents/strategic-pm.yaml +72 -0
package/departments/pm/skills/discovery-plan/SKILL.md +7 -1
package/departments/quality/agents/cqo.yaml +8 -0
package/departments/saas/agents/cs-manager.yaml +19 -2
package/departments/saas/agents/growth-engineer.yaml +14 -1
package/departments/saas/agents/metrics-analyst.yaml +17 -1
package/departments/saas/agents/revops-lead.yaml +73 -0
package/departments/saas/skills/leaky-bucket/SKILL.md +28 -0
package/departments/saas/skills/voc-loop/SKILL.md +29 -0
package/departments/sales/agents/sales-director.yaml +9 -0
package/departments/sales/agents/sdr.yaml +72 -0
package/departments/strategy/agents/decision-quality.yaml +72 -0
package/departments/strategy/agents/strategy-director.yaml +13 -0
package/departments/strategy/skills/premortem/SKILL.md +33 -0
package/knowledge/agents-registry-v2.json +1218 -78
package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/__pycache__/dashboard-api.cpython-313.pyc +0 -0
package/scripts/bench/__init__.py +5 -0
package/scripts/bench/__pycache__/__init__.cpython-313.pyc +0 -0
package/scripts/bench/__pycache__/harness.cpython-313.pyc +0 -0
package/scripts/bench/__pycache__/run.cpython-313.pyc +0 -0
package/scripts/bench/harness.py +138 -0
package/scripts/bench/run.py +136 -0
package/scripts/dashboard-api.py +376 -13
package/scripts/tools/__pycache__/docs_stats.cpython-313.pyc +0 -0
package/scripts/tools/docs_stats.py +154 -0
package/dashboard/app/pages/knowledge.vue +0 -918

package/README.md CHANGED Viewed

@@ -2,13 +2,16 @@
 **The Operating System for AI Agent Teams.**
-106 agents. 17 departments. 250+ skills. Enterprise frameworks. Multi-runtime. One install.
+82 agents. 17 departments. 267 skills. Enterprise frameworks. Multi-runtime. One install.
 ```bash
 npx arkaos install
 ```
-[![npm](https://img.shields.io/npm/v/arkaos)](https://www.npmjs.com/package/arkaos) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![Tests](https://img.shields.io/badge/tests-3025%20passing-brightgreen)]()
+[![npm](https://img.shields.io/npm/v/arkaos)](https://www.npmjs.com/package/arkaos) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![Tests](https://img.shields.io/badge/tests-4500%2B%20passing-brightgreen)]()
+> All counts in this document are generated by `python scripts/tools/docs_stats.py`
+> and locked by a test — they cannot drift from the repository.
 ---
@@ -99,7 +102,7 @@ In plain language. No special syntax required.
 ### 2. ArkaOS routes to the right squad
-The Synapse engine (8-layer context injection in <1ms) analyzes your request and routes it to the correct department. Each department has a lead agent who orchestrates specialists.
+The Synapse engine (12-layer context injection, ~87ms cold / ~83ms warm — see [Benchmarks](wiki/11-Benchmarks.md)) analyzes your request and routes it to the correct department. Each department has a lead agent who orchestrates specialists.
 ### 3. Agents execute with enterprise frameworks
@@ -132,24 +135,27 @@ Every decision, solution, and pattern is captured. The Cognitive Layer curates i
 | Department | Prefix | Agents | What It Does |
 |-----------|--------|--------|-------------|
-| **Development** | `/dev` | 10 | Full-stack features, APIs, architecture, security, CI/CD |
+| **Development** | `/dev` | 15 | Full-stack features, APIs, architecture, security, CI/CD |
+| **Brand & Design** | `/brand` | 10 | Brand identity, UX/UI, design systems, naming |
 | **Marketing** | `/mkt` | 4 | SEO, paid ads, email campaigns, growth loops |
-| **Brand & Design** | `/brand` | 4 | Brand identity, UX/UI, design systems, naming |
-| **Finance** | `/fin` | 3 | DCF valuation, unit economics, budgets, investor prep |
-| **Strategy** | `/strat` | 3 | Market analysis, competitive intelligence, business models |
+| **Strategy** | `/strat` | 4 | Market analysis, competitive intelligence, business models |
 | **E-Commerce** | `/ecom` | 4 | Store optimization, CRO, pricing, RFM segmentation |
-| **Knowledge** | `/kb` | 3 | Research, Zettelkasten, persona building, ingestion |
-| **Operations** | `/ops` | 4 | Automation, SOPs, compliance (GDPR, ISO, SOC 2) |
-| **Project Mgmt** | `/pm` | 3 | Scrum, Shape Up, discovery, roadmaps |
-| **SaaS** | `/saas` | 4 | Idea validation, metrics, PLG strategy, scaffolding |
-| **Landing Pages** | `/landing` | 4 | Sales copy, funnels, offers, page generation |
+| **Knowledge** | `/kb` | 4 | Research, Zettelkasten, persona building, ingestion |
+| **Project Mgmt** | `/pm` | 4 | Scrum, Shape Up, discovery, roadmaps |
 | **Content** | `/content` | 4 | Viral hooks, scripts, repurposing, content calendars |
-| **Communities** | `/community` | 2 | Groups, membership, gamification, engagement |
-| **Sales** | `/sales` | 2 | Pipeline management, SPIN selling, negotiation |
-| **Leadership** | `/lead` | 2 | Team health, OKRs, culture, hiring frameworks |
-| **Organization** | `/org` | 1 | Org design, team topologies, matrix structure |
+| **Sales** | `/sales` | 4 | Pipeline management, SPIN selling, negotiation |
+| **SaaS** | `/saas` | 5 | Idea validation, metrics, PLG strategy, scaffolding |
+| **Organization** | `/org` | 5 | Org design, team topologies, matrix structure |
+| **Landing Pages** | `/landing` | 4 | Sales copy, funnels, offers, page generation |
+| **Finance** | `/fin` | 3 | DCF valuation, unit economics, budgets, investor prep |
+| **Operations** | `/ops` | 3 | Automation, SOPs, compliance (GDPR, ISO, SOC 2) |
+| **Communities** | `/community` | 3 | Groups, membership, gamification, engagement |
+| **Leadership** | `/lead` | 3 | Team health, OKRs, culture, hiring frameworks |
 | **Quality Gate** | (auto) | 3 | Mandatory review on every workflow. Veto power. |
+> 82 agents across 17 departments (81 unique; `cro-specialist` is shared by
+> E-Commerce and Landing in the matrix structure).
 ---
 ## Cognitive Layer (v2.10)
@@ -386,7 +392,7 @@ python scripts/tools/okr_cascade.py growth --json
 User Input
   │
   ▼
-Synapse v2 (8-layer context injection, <1ms, cached)
+Synapse v2 (12-layer context injection, ~87ms cold / ~83ms warm)
   │
   ▼
 Orchestrator (/do → department routing)
@@ -408,13 +414,13 @@ Output (Obsidian vault + structured deliverables)
 | System | Purpose |
 |--------|---------|
-| **Synapse v2** | 8-layer context injection (<1ms, with caching) |
+| **Synapse v2** | 12-layer context injection (~87ms cold, ~83ms warm; cacheable layers are sub-millisecond) |
 | **Workflow Engine** | YAML workflows with phases, gates, parallelization |
 | **Agent Schema** | 4-framework behavioral DNA with consistency validation |
 | **Squad Framework** | Department squads + ad-hoc project squads (matrix) |
 | **Cognitive Layer** | Memory, Dreaming, Research, Scheduler |
 | **Living Specs** | Bidirectional spec/code sync |
-| **Governance** | Constitution with 14 non-negotiable rules |
+| **Governance** | Constitution with 25 non-negotiable rules (+ 11 must, 8 should) |
 | **Multi-Runtime** | Claude Code, Codex, Gemini, Cursor adapters |
 ### Tech Stack
@@ -427,22 +433,28 @@ Output (Obsidian vault + structured deliverables)
 | Workflows | YAML |
 | Agent Definitions | YAML |
 | Knowledge | Obsidian + SQLite-VSS |
-| Tests | pytest (1,993 tests) |
+| Tests | pytest (4,500+ tests) |
 ---
 ## Documentation
-Full documentation is available on the **[GitHub Wiki](https://github.com/andreagroferreira/arka-os/wiki)**:
+Full documentation lives in two places in this repository:
+**[`wiki/`](wiki/Home.md)** — the user-facing guide (step-by-step, features, benchmarks):
+- [Home](wiki/Home.md) — the index of everything
+- [Getting Started](wiki/01-Getting-Started.md) — install and run your first command
+- [Core Concepts](wiki/02-Core-Concepts.md) — squads, agents, tiers, behavioral DNA
+- [The 13-Phase Flow](wiki/03-The-13-Phase-Flow.md) — how every request is handled
+- [Departments](wiki/04-Departments/) — one page per department
+- [Commands Reference](wiki/05-Commands-Reference.md)
+- [Cognitive Layer](wiki/06-Cognitive-Layer.md) — memory, dreaming, research
+- [Benchmarks](wiki/11-Benchmarks.md) — measured, reproducible numbers
+- [Competitive Analysis](wiki/12-Competitive-Analysis.md) and [Benefits & ROI](wiki/13-Benefits-ROI.md)
-- [Getting Started](https://github.com/andreagroferreira/arka-os/wiki/Getting-Started)
-- [Installation Guide](https://github.com/andreagroferreira/arka-os/wiki/Installation)
-- [Departments & Agents](https://github.com/andreagroferreira/arka-os/wiki/Departments)
-- [Cognitive Layer](https://github.com/andreagroferreira/arka-os/wiki/Cognitive-Layer)
-- [Ecosystem Management](https://github.com/andreagroferreira/arka-os/wiki/Ecosystems)
-- [Configuration](https://github.com/andreagroferreira/arka-os/wiki/Configuration)
-- [Creating Projects](https://github.com/andreagroferreira/arka-os/wiki/Creating-Projects)
-- [Update & Sync](https://github.com/andreagroferreira/arka-os/wiki/Update-and-Sync)
+**[`docs/`](docs/)** — the technical/contributor reference (architecture, API,
+agent schema, core engine, ADRs).
 ---
@@ -493,7 +505,7 @@ Department commands: `/dev`, `/mkt`, `/brand`, `/fin`, `/strat`, `/ecom`, `/kb`,
 ## Contributing
-See [CONTRIBUTING.md](.github/CONTRIBUTING.md). PRs welcome — all changes require passing the full test suite (3,473+ tests as of v2.46.x) and Quality Gate review (Marta CQO + Eduardo Copy + Francisca Tech, all Opus).
+See [CONTRIBUTING.md](.github/CONTRIBUTING.md). PRs welcome — all changes require passing the full test suite (4,500+ tests as of v4.0.0) and Quality Gate review (Marta CQO + Eduardo Copy + Francisca Tech, all Opus).
 ## License

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 3.78.0
1	+ 4.0.1

package/arka/SKILL.md CHANGED Viewed

@@ -21,10 +21,10 @@ treat them as your default source. External research supplements, it
 does not replace the vault.
 <!-- arka:kb-first-prefix end -->
-# ArkaOS v2 — Main Orchestrator
+# ArkaOS — Main Orchestrator
 > **The Operating System for AI Agent Teams**
-> 65 agents. 17 departments. 244+ skills. Multi-runtime. Dashboard. Knowledge RAG.
+> 82 agents. 17 departments. 267 skills. Multi-runtime. Dashboard. Knowledge RAG.
 ## ⛔ Mandatory 13-phase flow (NON-NEGOTIABLE)

package/config/agent-allowlists/laravel.yaml CHANGED Viewed

@@ -1,6 +1,7 @@
 stack: laravel
 baseline:
   - backend-dev
+  - laravel-eng
   - senior-dev
   - architect
   - qa

package/config/agent-allowlists/node.yaml CHANGED Viewed

@@ -1,6 +1,7 @@
 stack: node
 baseline:
   - backend-dev
+  - node-ts-eng
   - senior-dev
   - architect
   - qa

package/config/agent-allowlists/nuxt.yaml CHANGED Viewed

@@ -2,6 +2,7 @@ stack: nuxt
 baseline:
   - frontend-dev
   - backend-dev
+  - node-ts-eng
   - architect
   - qa
   - devops

package/config/agent-allowlists/python.yaml CHANGED Viewed

@@ -1,5 +1,6 @@
 stack: python
 baseline:
+  - python-eng
   - senior-dev
   - architect
   - qa

package/core/agents/__pycache__/registry_gen.cpython-313.pyc ADDED Viewed

Binary file

package/core/agents/__pycache__/schema.cpython-313.pyc CHANGED Viewed

Binary file

package/core/agents/registry_gen.py CHANGED Viewed

@@ -27,7 +27,9 @@ def generate_registry(departments_dir: str | Path, output_path: str | Path) -> d
     agents = []
     errors = []
-    for yaml_file in sorted(departments_dir.glob("*/agents/*.yaml")):
+    # Recursive: also picks up sub-squad subdirectories
+    # (e.g. dev/agents/backend-core/*.yaml, brand/agents/design-ops/*.yaml).
+    for yaml_file in sorted(departments_dir.glob("*/agents/**/*.yaml")):
         try:
             agent = load_agent(yaml_file)
             entry = {
@@ -36,6 +38,8 @@ def generate_registry(departments_dir: str | Path, output_path: str | Path) -> d
                 "role": agent.role,
                 "department": agent.department,
                 "tier": agent.tier,
+                "parent_squad": agent.parent_squad,
+                "sub_squad_role": agent.sub_squad_role,
                 "disc": {
                     "primary": agent.behavioral_dna.disc.primary.value,
                     "secondary": agent.behavioral_dna.disc.secondary.value,
@@ -60,6 +64,7 @@ def generate_registry(departments_dir: str | Path, output_path: str | Path) -> d
                 },
                 "expertise_domains": agent.expertise.domains[:5],
                 "frameworks": agent.expertise.frameworks[:5],
+                "knowledge_sources": agent.expertise.knowledge_sources,
                 "file": str(yaml_file.relative_to(departments_dir.parent)),
                 "memory_path": agent.memory_path,
             }

package/core/agents/schema.py CHANGED Viewed

@@ -237,6 +237,10 @@ class Expertise(BaseModel):
     """What the agent knows."""
     domains: list[str] = Field(default_factory=list)
     frameworks: list[str] = Field(default_factory=list)
+    knowledge_sources: list[str] = Field(
+        default_factory=list,
+        description="Obsidian KB notes ([[wikilinks]]) that ground this agent's expertise.",
+    )
     depth: str = "expert"  # novice, intermediate, expert, master
     years_equivalent: int = 10

package/core/cognition/__pycache__/reorganizer.cpython-313.pyc CHANGED Viewed

Binary file

package/core/cognition/reorganizer.py CHANGED Viewed

@@ -291,26 +291,56 @@ def _safe_int(value: object) -> int:
         return 0
-def _redact(text: str) -> str:
+def redact_clients(text: str) -> str:
+    """Redact configured client identifiers from arbitrary text.
+    Public, reusable wrapper around the module's compiled redaction regex
+    (loaded from ``~/.arkaos/redaction-clients.json``). Returns the text
+    unchanged when no patterns are configured. Used by any propose-only
+    producer — the reorganizer report and the agent-attribution proposal —
+    so client names never leak into a generated artifact.
+    """
     if _REDACT_RE is None:
         return text
     return _REDACT_RE.sub(_REDACT_TOKEN, text)
-def _md_escape(text: str) -> str:
-    """Escape markdown control characters that would distort a table row.
+def _redact(text: str) -> str:
+    return redact_clients(text)
+def md_escape(text: str) -> str:
+    """Neutralise untrusted text for an HTML-rendering markdown viewer.
-    Titles and excerpts come from frontmatter the operator controls, but
-    a `|` in a title silently shifts table columns and corrupts the raw-
-    artifact table. Escape pipes, newlines, and stray backticks.
+    Public, reusable wrapper. Titles and excerpts come from untrusted
+    sources (web page titles, YouTube/PDF metadata) and land in .md files
+    that the user opens in viewers such as Obsidian, which render raw HTML
+    inside markdown. To prevent stored HTML/JS injection (CWE-79) we
+    HTML-escape ``<``/``>`` to ``&lt;``/``&gt;`` so any tag renders as
+    literal text. We also escape pipes/backslashes (a ``|`` silently shifts
+    table columns), strip backticks, and flatten newlines so an untrusted
+    title can never distort or execute inside the rendered artifact. Used by
+    the reorganizer report and the agent-attribution proposal.
+    Callers redact client names *before* escaping, so the trusted
+    ``<redacted-client>`` marker may already be present; it is preserved
+    verbatim while every other angle bracket is neutralised.
     """
-    return (
+    escaped = (
         text.replace("\\", "\\\\")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
             .replace("|", "\\|")
             .replace("\n", " ")
             .replace("\r", " ")
             .replace("`", "")
     )
+    escaped_token = _REDACT_TOKEN.replace("<", "&lt;").replace(">", "&gt;")
+    return escaped.replace(escaped_token, _REDACT_TOKEN)
+def _md_escape(text: str) -> str:
+    return md_escape(text)
 def _body_excerpt(body: str) -> str:

package/core/governance/__pycache__/design_system_lint.cpython-313.pyc CHANGED Viewed

Binary file

package/core/governance/__pycache__/design_system_lint_cli.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/agent_match.cpython-313.pyc ADDED Viewed

Binary file

package/core/knowledge/__pycache__/chunker.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/ingest.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/__pycache__/sources.cpython-313.pyc ADDED Viewed

Binary file

package/core/knowledge/__pycache__/vector_store.cpython-313.pyc CHANGED Viewed

Binary file

package/core/knowledge/agent_match.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""Semantic agent attribution for a knowledge source (PR3).
+Given the knowledge text of a source, suggest WHICH agents should learn
+from it by comparing the source text against each agent's expertise
+profile via local embeddings (``core.knowledge.embedder``).
+Pure and propose-only: this module reads agent dicts passed in by the
+caller and NEVER writes agent YAMLs. It degrades gracefully — when the
+embedder is unavailable (fastembed missing) or the source text is empty
+it returns an empty list, and the caller surfaces a reason.
+The registry stores ``expertise_domains`` and ``frameworks`` as flat
+list keys on each agent dict (not nested under an ``expertise`` object).
+"""
+from __future__ import annotations
+import math
+from core.knowledge import embedder
+_PROFILE_FIELDS = ("expertise_domains", "frameworks")
+_MAX_MATCHED_TERMS = 5
+def agent_profile_text(agent: dict) -> str:
+    """Build one searchable string from an agent's role + expertise.
+    Concatenates role, expertise domains, frameworks, and (optionally)
+    name into a single space-joined string suitable for embedding. Empty
+    fields are skipped so a sparse agent still yields a clean profile.
+    """
+    parts: list[str] = []
+    name = str(agent.get("name") or "").strip()
+    role = str(agent.get("role") or "").strip()
+    if role:
+        parts.append(role)
+    for field in _PROFILE_FIELDS:
+        parts.extend(str(v).strip() for v in agent.get(field) or [] if str(v).strip())
+    if name:
+        parts.append(name)
+    return " ".join(parts)
+def cosine(a: list[float], b: list[float]) -> float:
+    """Cosine similarity of two vectors; 0.0 if either is empty/zero-norm."""
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = math.sqrt(sum(x * x for x in a))
+    norm_b = math.sqrt(sum(y * y for y in b))
+    if norm_a == 0.0 or norm_b == 0.0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+def _profile_texts(agents: list[dict]) -> list[str]:
+    """Profile text for each agent, preserving order."""
+    return [agent_profile_text(agent) for agent in agents]
+def _explain_match(source_text: str, agent: dict) -> list[str]:
+    """Up to 5 expertise/framework terms that textually appear in source.
+    A cheap, case-insensitive substring "why" explanation — independent of
+    the embedding similarity — so the proposal can show concrete overlap.
+    """
+    haystack = source_text.lower()
+    matched: list[str] = []
+    for field in _PROFILE_FIELDS:
+        for term in agent.get(field) or []:
+            clean = str(term).strip()
+            if clean and clean.lower() in haystack and clean not in matched:
+                matched.append(clean)
+                if len(matched) >= _MAX_MATCHED_TERMS:
+                    return matched
+    return matched
+def _build_result(source_text: str, agent: dict, score: float) -> dict:
+    """Shape one ranked match for the API response."""
+    return {
+        "id": agent.get("id", ""),
+        "name": agent.get("name", ""),
+        "department": agent.get("department", ""),
+        "role": agent.get("role", ""),
+        "score": round(score, 3),
+        "matched_terms": _explain_match(source_text, agent),
+    }
+def match_agents(source_text: str, agents: list[dict], top_n: int = 5) -> list[dict]:
+    """Rank agents by semantic similarity of their expertise to the source.
+    Returns ``[]`` when ``source_text`` is empty or the embedder is
+    unavailable (caller surfaces a reason). Embeds the source once and the
+    agent profiles in a single batch, then sorts by cosine descending and
+    returns the top ``top_n`` results, each with id/name/department/role/
+    score (0..1, 3dp) and matched_terms. Never writes anything.
+    """
+    if not source_text.strip() or not agents:
+        return []
+    source_vec = embedder.embed(source_text)
+    if source_vec is None:
+        return []
+    agent_vecs = embedder.embed_batch(_profile_texts(agents))
+    if agent_vecs is None:
+        return []
+    scored = [
+        _build_result(source_text, agent, cosine(source_vec, vec))
+        for agent, vec in zip(agents, agent_vecs)
+    ]
+    scored.sort(key=lambda r: r["score"], reverse=True)
+    return scored[: max(top_n, 0)]

package/core/knowledge/chunker.py CHANGED Viewed

@@ -119,3 +119,48 @@ def chunk_markdown(
         ))
     return chunks
+# Minimum contiguous token run treated as a real overlap. The chunker seeds
+# each chunk with the previous chunk's last ``overlap_tokens`` words (default
+# 50), so genuine seams are tens of tokens long. Requiring >= 5 avoids
+# stripping on a 1-2 word coincidence (e.g. both chunks ending/starting "the").
+_MIN_OVERLAP_TOKENS = 5
+def _overlap_token_count(prev: list[str], cur: list[str], window: int) -> int:
+    """Return length of the longest suffix of ``prev`` that prefixes ``cur``.
+    Compares whitespace tokens (never mid-word). Searches the largest possible
+    overlap first within ``window`` and returns the first match, so the result
+    is the true chunker overlap window rather than a short coincidence. Returns
+    0 when no run of at least ``_MIN_OVERLAP_TOKENS`` tokens matches.
+    """
+    max_len = min(len(prev), len(cur), window)
+    for length in range(max_len, _MIN_OVERLAP_TOKENS - 1, -1):
+        if prev[-length:] == cur[:length]:
+            return length
+    return 0
+def stitch_chunks(texts: list[str], max_overlap_tokens: int = 200) -> str:
+    """Re-join overlapping chunks into a clean transcript, deduping seams.
+    ``chunk_markdown`` prepends each chunk with a token-overlap window copied
+    from the previous chunk. Naively joining the chunks therefore repeats that
+    window at every boundary. This detects the actual overlap per adjacent pair
+    (longest suffix-of-prev == prefix-of-cur, on token boundaries, capped at
+    ``max_overlap_tokens``) and strips it from the later chunk before joining
+    with blank lines. No overlap detected -> plain join, so content is never
+    lost. Single chunk returns as-is; empty list returns "".
+    """
+    parts = [t for t in texts if t]
+    if not parts:
+        return ""
+    result = [parts[0]]
+    for cur in parts[1:]:
+        prev_tokens = result[-1].split()
+        cur_tokens = cur.split()
+        overlap = _overlap_token_count(prev_tokens, cur_tokens, max_overlap_tokens)
+        result.append(" ".join(cur_tokens[overlap:]) if overlap else cur)
+    return "\n\n".join(p for p in result if p)