npm - arkaos - Versions diffs - 2.22.0 → 2.22.1 - Mend

arkaos 2.22.0 → 2.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 2.22.0
1	+ 2.22.1

package/core/cognition/__pycache__/auto_documentor.cpython-313.pyc CHANGED Viewed

Binary file

package/core/cognition/auto_documentor.py CHANGED Viewed

@@ -21,16 +21,28 @@ from __future__ import annotations
 import json
 import re
+from contextlib import contextmanager
 from dataclasses import dataclass, field
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Iterable
 from core.obsidian import cataloger as _cataloger
 from core.obsidian import relator as _relator
 from core.obsidian.writer import ObsidianWriter
+from core.shared import safe_session_id as _safe_session_id_module
+try:
+    import fcntl  # POSIX only
+    _HAS_FLOCK = True
+except ImportError:
+    _HAS_FLOCK = False
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
+AUTO_DOC_TELEMETRY_PATH = Path.home() / ".arkaos" / "telemetry" / "auto_doc.jsonl"
 _URL_RE = re.compile(r"https?://[^\s\)\]\"']+")
 _FILE_PATH_RE = re.compile(r"(?:^|[\s`'])(/[A-Za-z0-9_./\-]+\.[A-Za-z0-9]+)")
@@ -58,8 +70,8 @@ _SYSTEM_PROMPT = (
     "(150-300 words) summarising the session. Structure: short intro, "
     "then markdown sections for Key Facts, Decisions, and Sources. "
     "Preserve every URL and file path verbatim. Use Obsidian wikilinks "
-    "([[Topic]]) for reusable concepts. No preamble, no sign-off, no "
-    "meta commentary about the model or prompt. Output only markdown."
+    "([[Topic]]) for reusable concepts. Do not include preamble, sign-off, "
+    "or meta commentary about the model or prompt. Output only markdown."
 )
@@ -329,18 +341,52 @@ def _build_synthesis_prompt(learning: Learning) -> str:
     return "\n".join(lines)
+def _extract_key_facts(learning: Learning, limit: int = 5) -> list[str]:
+    """Pull 3-5 bullet candidates from the learning content.
+    Used by the template fallback so both the LLM and template paths
+    produce a ``## Key Facts`` section in the same order as
+    ``_SYSTEM_PROMPT`` requires.
+    """
+    text = (learning.content or "").strip()
+    if not text:
+        return []
+    paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
+    bullets: list[str] = []
+    for para in paragraphs:
+        for raw in para.splitlines():
+            line = raw.strip().lstrip("-*• ").strip()
+            # Skip markdown section headings; they aren't facts.
+            if not line or line.startswith("#"):
+                continue
+            if line.startswith(">") or line.startswith("`"):
+                continue
+            if len(line) < 8:
+                continue
+            bullets.append(line[:240])
+            if len(bullets) >= limit:
+                return bullets
+        if len(bullets) >= limit:
+            break
+    return bullets
 def _template_synthesize(learning: Learning) -> str:
-    parts = [f"# {learning.topic}", ""]
+    # Section order mirrors _SYSTEM_PROMPT: Key Facts → Decisions →
+    # Sources. Keeping both synthesis paths aligned means downstream
+    # consumers (MOC generation, relator) never branch on provider.
+    parts: list[str] = [f"# {learning.topic}", ""]
     parts.append(f"> {_AUTO_DOC_SUFFIX}.")
     parts.append("")
     if learning.content.strip():
         parts.append(learning.content.strip())
         parts.append("")
-    if learning.sources:
-        parts.append("## Sources")
+    key_facts = _extract_key_facts(learning)
+    if key_facts:
+        parts.append("## Key Facts")
         parts.append("")
-        for src in learning.sources[:20]:
-            parts.append(f"- {src}")
+        for fact in key_facts:
+            parts.append(f"- {fact}")
         parts.append("")
     if learning.decisions:
         parts.append("## Decisions")
@@ -348,6 +394,12 @@ def _template_synthesize(learning: Learning) -> str:
         for dec in learning.decisions[:10]:
             parts.append(f"- {dec}")
         parts.append("")
+    if learning.sources:
+        parts.append("## Sources")
+        parts.append("")
+        for src in learning.sources[:20]:
+            parts.append(f"- {src}")
+        parts.append("")
     return "\n".join(parts).rstrip() + "\n"
@@ -391,13 +443,78 @@ def _document_one(
     meta.setdefault("auto_documented", True)
     try:
         plan = _cataloger.plan(body, meta)
-    except ValueError:
+    except ValueError as exc:
+        _log_auto_doc_event(
+            session_id=session_id,
+            event="classification-failed",
+            topic=learning.topic,
+            reason=str(exc),
+        )
+        return None
+    if plan is None:
+        _log_auto_doc_event(
+            session_id=session_id,
+            event="succeeded-empty",
+            topic=learning.topic,
+            reason="cataloger returned no plan",
+        )
         return None
     note_path = _cataloger.execute(plan, body, writer)
     _relate_note(note_path, body, vault_path, plan)
+    _log_auto_doc_event(
+        session_id=session_id,
+        event="succeeded-wrote-note",
+        topic=learning.topic,
+        reason=str(note_path),
+    )
     return note_path
+@contextmanager
+def _locked_append(path: Path):
+    """Append to ``path`` under an exclusive advisory lock (POSIX flock).
+    Mirrors the pattern in ``core/workflow/flow_enforcer._locked_append``
+    — see that module for the platform-fallback rationale.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fh = path.open("a", encoding="utf-8")
+    try:
+        if _HAS_FLOCK:
+            fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
+        yield fh
+    finally:
+        if _HAS_FLOCK:
+            try:
+                fcntl.flock(fh.fileno(), fcntl.LOCK_UN)
+            except OSError:
+                pass
+        fh.close()
+def _log_auto_doc_event(
+    *,
+    session_id: str,
+    event: str,
+    topic: str,
+    reason: str,
+) -> None:
+    """Append a structured auto-doc telemetry entry, degrade silently."""
+    entry = {
+        "ts": datetime.now(timezone.utc).isoformat(),
+        "session_id": session_id,
+        "event": event,
+        "topic": topic[:120],
+        "reason": reason[:240],
+    }
+    try:
+        with _locked_append(AUTO_DOC_TELEMETRY_PATH) as fh:
+            fh.write(json.dumps(entry) + "\n")
+    except OSError:
+        # Telemetry failures must never break the doc job.
+        return
 def _relate_note(note_path: Path, body: str, vault_path: Path, plan) -> None:
     try:
         related = _relator.find_related(
@@ -428,6 +545,4 @@ def _append_related_block(note_path: Path, related) -> None:
 def _safe_session_id(session_id: str) -> bool:
-    if not isinstance(session_id, str) or not session_id:
-        return False
-    return bool(SAFE_SESSION_ID_RE.match(session_id))
+    return _safe_session_id_module.safe_session_id(session_id) is not None

package/core/jobs/__pycache__/auto_doc_worker.cpython-313.pyc CHANGED Viewed

Binary file

package/core/jobs/auto_doc_worker.py CHANGED Viewed

@@ -28,7 +28,6 @@ from __future__ import annotations
 import argparse
 import json
 import os
-import re
 import sys
 import time
 import uuid
@@ -36,9 +35,12 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional
+from core.shared import safe_session_id as _safe_session_id_module
 MAX_ATTEMPTS = 3
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
 _QUEUE_SUBDIRS = ("pending", "processing", "completed", "failed")
@@ -77,7 +79,7 @@ def enqueue_job(
     """Write a pending job file. Returns the job id."""
     root = queue_root or _queue_root()
     _ensure_queue(root)
-    safe = session_id if SAFE_SESSION_ID_RE.match(session_id or "") else "unknown"
+    safe = _safe_session_id_module.safe_session_id(session_id or "") or "unknown"
     job_id = f"{int(time.time())}-{uuid.uuid4().hex[:12]}"
     payload = {
         "job_id": job_id,

package/core/runtime/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/base.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/claude_code.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/codex_cli.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/cursor.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/gemini_cli.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/llm_cost_telemetry.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/llm_cost_telemetry_cli.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/llm_provider.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/__pycache__/pricing.cpython-313.pyc CHANGED Viewed

Binary file

package/core/runtime/claude_code.py CHANGED Viewed

@@ -120,7 +120,7 @@ class ClaudeCodeAdapter(RuntimeAdapter):
         max_tokens: int = 2000,
         system: str = "",
     ) -> "LLMResponse":
-        from core.runtime.llm_provider import LLMResponse, LLMUnavailable
+        from core.runtime.llm_provider import LLMUnavailable
         binary = shutil.which("claude")
         if binary is None:
@@ -131,27 +131,28 @@ class ClaudeCodeAdapter(RuntimeAdapter):
         cmd = [binary, "-p", prompt, "--output-format", "json"]
         if system:
             cmd.extend(["--append-system-prompt", system])
-        try:
-            proc = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=60,
-                check=False,
-            )
-        except subprocess.TimeoutExpired as exc:
-            raise LLMUnavailable("claude CLI timed out after 60s") from exc
-        except OSError as exc:
-            raise LLMUnavailable(f"claude CLI subprocess failed: {exc}") from exc
+        proc = _run_claude_cli(cmd)
         if proc.returncode != 0:
             raise LLMUnavailable(
                 f"claude CLI exited {proc.returncode}: {proc.stderr.strip()[:200]}"
             )
-        return _parse_claude_json(proc.stdout)
+        return _parse_claude_cli_output(proc.stdout)
+def _run_claude_cli(cmd: list[str]) -> subprocess.CompletedProcess:
+    from core.runtime.llm_provider import LLMUnavailable
+    try:
+        return subprocess.run(
+            cmd, capture_output=True, text=True, timeout=60, check=False
+        )
+    except subprocess.TimeoutExpired as exc:
+        raise LLMUnavailable("claude CLI timed out after 60s") from exc
+    except OSError as exc:
+        raise LLMUnavailable(f"claude CLI subprocess failed: {exc}") from exc
-def _parse_claude_json(stdout: str) -> "LLMResponse":
+def _parse_claude_cli_output(stdout: str) -> "LLMResponse":
     from core.runtime.llm_provider import LLMResponse
     payload = json.loads(stdout) if stdout.strip() else {}
@@ -170,3 +171,8 @@ def _parse_claude_json(stdout: str) -> "LLMResponse":
         cached_tokens=cache_read,
         model=model,
     )
+# Backward compatibility alias — tests and external importers that used
+# the old helper name continue to work without modification.
+_parse_claude_json = _parse_claude_cli_output

package/core/runtime/codex_cli.py CHANGED Viewed

@@ -94,11 +94,28 @@ class CodexCliAdapter(RuntimeAdapter):
                 "codex CLI not found on PATH — install Codex CLI to "
                 "enable headless completion."
             )
-        # TODO(llm-agnostic): Verify Codex CLI headless invocation
-        # syntax (`codex exec "<prompt>"` was the working hypothesis
-        # but has not been confirmed for the current release). Until
-        # then, refuse rather than guess. Tracked in Task #12 report.
+        # TODO(llm-agnostic): Implement real headless completion.
+        #
+        # Status as of 2026-04-20: Codex CLI is NOT installed on the
+        # development machine, so actual invocation syntax could not
+        # be verified. Until a local install is available, refuse
+        # rather than ship guessed arguments.
+        #
+        # Verification checklist for whoever picks this up:
+        #   1. Install:   npm install -g @openai/codex-cli
+        #   2. Discover:  codex --help   (confirm non-interactive flag)
+        #   3. Pattern:   likely `codex exec "<prompt>"` or
+        #                 `codex --prompt "<prompt>" --format json`
+        #   4. Wire the subprocess call (mirror the Gemini adapter —
+        #      list-form args, 60s timeout, stderr clipped, JSON parse
+        #      with plain-text fallback, token estimate on miss).
+        #
+        # SubagentProvider cleanly falls back to anthropic-direct or
+        # stub when this raises, so the chain keeps working.
         raise NotImplementedError(
-            "Codex CLI headless completion not yet wired — verify CLI "
-            "syntax before enabling. See core/runtime/codex_cli.py TODO."
+            "Codex CLI headless mode requires local `codex` CLI. "
+            "Install: `npm install -g @openai/codex-cli` (verified 2026-04-20). "
+            "Verify syntax: `codex --help`. "
+            "See TODO(llm-agnostic) in this file. "
+            "SubagentProvider will cleanly fall back to anthropic-direct or stub."
         )

package/core/runtime/gemini_cli.py CHANGED Viewed

@@ -1,9 +1,26 @@
 """Gemini CLI runtime adapter.
 Google's Gemini CLI. Uses GEMINI.md for instructions and activate_skill for skills.
+Headless invocation reference (verified against
+https://github.com/google-gemini/gemini-cli docs — Context7 query
+on 2026-04-20):
+    gemini -p "<prompt>" --output-format json
+The JSON payload contains a ``response`` key (the model's text) and a
+``stats`` block with ``totalTokenCount`` / token counts. On failure the
+payload includes an ``error`` block with diagnostic details. If JSON
+parsing fails we fall back to treating stdout as raw text and estimate
+tokens via a ``len(text) // 4`` heuristic — better than losing cost
+telemetry entirely.
 """
+from __future__ import annotations
+import json
 import shutil
+import subprocess
 from pathlib import Path
 from os.path import expanduser
 from typing import TYPE_CHECKING
@@ -14,6 +31,11 @@ if TYPE_CHECKING:
     from core.runtime.llm_provider import LLMResponse
+_TIMEOUT_SECONDS = 60
+_TOKEN_ESTIMATE_DIVISOR = 4  # Rough chars-per-token heuristic.
+_STDERR_CLIP = 200
 class GeminiCliAdapter(RuntimeAdapter):
     """Adapter for Google's Gemini CLI."""
@@ -73,10 +95,7 @@ class GeminiCliAdapter(RuntimeAdapter):
         raise NotImplementedError("Use Gemini CLI's native content search")
     def headless_supported(self) -> bool:
-        # Gemini CLI headless invocation syntax is not verified for the
-        # current release. Returning False lets SubagentProvider fall
-        # back gracefully rather than shell out blindly.
-        return False
+        return shutil.which("gemini") is not None
     def headless_complete(
         self,
@@ -85,17 +104,122 @@ class GeminiCliAdapter(RuntimeAdapter):
         max_tokens: int = 2000,
         system: str = "",
     ) -> "LLMResponse":
+        from core.runtime.llm_provider import LLMUnavailable
         binary = shutil.which("gemini")
         if binary is None:
             raise NotImplementedError(
                 "gemini CLI not found on PATH — install Gemini CLI to "
                 "enable headless completion."
             )
-        # TODO(llm-agnostic): Verify Gemini CLI's headless invocation
-        # (`gemini -p "<prompt>"` was the working hypothesis). Until
-        # confirmed for the shipped CLI version, refuse rather than
-        # guess. Tracked in Task #12 report.
-        raise NotImplementedError(
-            "Gemini CLI headless completion not yet wired — verify CLI "
-            "syntax before enabling. See core/runtime/gemini_cli.py TODO."
+        effective_prompt = _merge_system_prompt(prompt, system)
+        cmd = [binary, "-p", effective_prompt, "--output-format", "json"]
+        proc = _run_gemini_cli(cmd)
+        if proc.returncode != 0:
+            stderr_tail = proc.stderr.strip()[:_STDERR_CLIP]
+            raise LLMUnavailable(
+                f"gemini CLI exited {proc.returncode}: {stderr_tail}"
+            )
+        return _parse_gemini_cli_output(proc.stdout)
+def _merge_system_prompt(prompt: str, system: str) -> str:
+    # Gemini CLI's -p flag accepts a single prompt; prepend the system
+    # text when provided so downstream behaviour matches Claude Code.
+    if not system:
+        return prompt
+    return f"{system}\n\n---\n\n{prompt}"
+def _run_gemini_cli(cmd: list[str]) -> subprocess.CompletedProcess:
+    from core.runtime.llm_provider import LLMUnavailable
+    try:
+        return subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=_TIMEOUT_SECONDS,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        raise LLMUnavailable(
+            f"gemini CLI timed out after {_TIMEOUT_SECONDS}s"
+        ) from exc
+    except OSError as exc:
+        raise LLMUnavailable(f"gemini CLI subprocess failed: {exc}") from exc
+def _parse_gemini_cli_output(stdout: str) -> "LLMResponse":
+    from core.runtime.llm_provider import LLMResponse
+    stripped = stdout.strip()
+    if not stripped:
+        return LLMResponse(
+            text="", tokens_in=0, tokens_out=0, cached_tokens=0, model=""
+        )
+    payload = _safe_loads(stripped)
+    if payload is None:
+        # Non-JSON fallback: treat stdout as raw text, estimate tokens.
+        return _response_from_plain_text(stripped)
+    return _response_from_json_payload(payload)
+def _safe_loads(text: str) -> dict | None:
+    try:
+        data = json.loads(text)
+    except (json.JSONDecodeError, ValueError):
+        return None
+    return data if isinstance(data, dict) else None
+def _response_from_plain_text(text: str) -> "LLMResponse":
+    from core.runtime.llm_provider import LLMResponse
+    estimate = max(1, len(text) // _TOKEN_ESTIMATE_DIVISOR)
+    return LLMResponse(
+        text=text,
+        tokens_in=0,
+        tokens_out=estimate,
+        cached_tokens=0,
+        model="",
+    )
+def _response_from_json_payload(payload: dict) -> "LLMResponse":
+    from core.runtime.llm_provider import LLMResponse, LLMUnavailable
+    error = payload.get("error")
+    if isinstance(error, dict) and error:
+        message = str(error.get("message") or error).strip()[:_STDERR_CLIP]
+        raise LLMUnavailable(f"gemini CLI returned error: {message}")
+    text = str(payload.get("response") or payload.get("result") or "")
+    tokens_in, tokens_out = _extract_token_counts(payload, text)
+    model = str(payload.get("model") or "")
+    return LLMResponse(
+        text=text,
+        tokens_in=tokens_in,
+        tokens_out=tokens_out,
+        cached_tokens=0,
+        model=model,
+    )
+def _extract_token_counts(payload: dict, text: str) -> tuple[int, int]:
+    stats = payload.get("stats") or payload.get("usageMetadata") or {}
+    if isinstance(stats, dict):
+        tokens_in = int(stats.get("promptTokenCount") or stats.get("input_tokens") or 0)
+        tokens_out = int(
+            stats.get("candidatesTokenCount")
+            or stats.get("output_tokens")
+            or 0
         )
+        # Fall back to the rolled-up total when per-side counts are absent.
+        if tokens_in == 0 and tokens_out == 0:
+            total = int(stats.get("totalTokenCount") or 0)
+            if total > 0:
+                return 0, total
+        return tokens_in, tokens_out
+    # No stats block at all — estimate output from text length.
+    return 0, max(1, len(text) // _TOKEN_ESTIMATE_DIVISOR)

package/core/runtime/llm_provider.py CHANGED Viewed

@@ -199,6 +199,19 @@ class AnthropicDirectProvider:
             }
         ]
+    def _build_anthropic_payload(
+        self, prompt: str, system: str, max_tokens: int, model: str
+    ) -> dict[str, object]:
+        payload: dict[str, object] = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        system_blocks = self._build_system_blocks(system)
+        if system_blocks:
+            payload["system"] = system_blocks
+        return payload
     def complete(
         self,
         prompt: str,
@@ -213,15 +226,7 @@ class AnthropicDirectProvider:
                 "cannot select a model."
             )
         client = self._build_client()
-        payload: dict[str, object] = {
-            "model": model,
-            "max_tokens": max_tokens,
-            "messages": [{"role": "user", "content": prompt}],
-        }
-        system_blocks = self._build_system_blocks(system)
-        if system_blocks:
-            payload["system"] = system_blocks
+        payload = self._build_anthropic_payload(prompt, system, max_tokens, model)
         try:
             raw = client.messages.create(**payload)  # type: ignore[attr-defined]
         except Exception as exc:  # noqa: BLE001

package/core/shared/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Cross-cutting primitives shared by multiple ArkaOS core packages.
+Keep this package lean — only primitives that two or more sibling
+packages already duplicate belong here. It is NOT a dumping ground for
+utilities; each addition must delete a duplicate elsewhere.
+"""

package/core/shared/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file

package/core/shared/__pycache__/safe_session_id.cpython-313.pyc ADDED Viewed

Binary file

package/core/shared/safe_session_id.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Shared session-id allowlist — path-traversal / injection guard.
+A session id is considered safe iff it matches ``[A-Za-z0-9._-]{1,128}``.
+Any other character (``/``, ``\\``, whitespace, control char, unicode,
+NUL, ``..``) rejects — callers MUST treat ``None`` as "do not use this
+id for any filesystem or shell path".
+Why this lives here: the exact same regex + helper was duplicated in 6
+modules (flow_enforcer, marker_cache, research_gate, kb_cache,
+auto_documentor, auto_doc_worker). A single source of truth prevents
+drift — if the allowlist ever tightens, it tightens everywhere.
+Historic aliases remain at each call site as module-level re-exports
+so external importers that did ``from core.workflow.flow_enforcer
+import SAFE_SESSION_ID_RE`` continue to work.
+"""
+from __future__ import annotations
+import re
+SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+def safe_session_id(session_id: str) -> str | None:
+    """Validate ``session_id`` against the strict allowlist.
+    Returns the id unchanged when safe, or ``None`` when it contains
+    path separators, ``..`` traversal fragments, whitespace, unicode,
+    NUL bytes, or any character outside ``[A-Za-z0-9._-]``. Length is
+    capped at 128 characters to prevent pathological filesystem paths.
+    Callers MUST treat ``None`` as reject — never construct a path or
+    shell argument from the raw input when this returns ``None``.
+    """
+    if not session_id or not isinstance(session_id, str):
+        return None
+    if not SAFE_SESSION_ID_RE.match(session_id):
+        return None
+    return session_id

package/core/synapse/__pycache__/kb_cache.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary file

package/core/synapse/kb_cache.py CHANGED Viewed

@@ -23,15 +23,17 @@ Turn-scoped marker (record_obsidian_query / read_obsidian_query):
 import hashlib
 import json
 import os
-import re
 import threading
 import time
 import uuid
 from pathlib import Path
 from typing import Any, Optional
+from core.shared import safe_session_id as _safe_session_id_module
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
 KB_QUERY_MARKER_DIR = Path("/tmp/arkaos-kb-query")
 _MAX_QUERIES_PER_TURN = 32
 _MAX_QUERY_LEN = 512
@@ -428,11 +430,10 @@ def _kb_query_dir() -> Path:
 def _kb_query_path(session_id: str) -> Optional[Path]:
-    if not session_id or not isinstance(session_id, str):
-        return None
-    if not SAFE_SESSION_ID_RE.match(session_id):
+    safe = _safe_session_id_module.safe_session_id(session_id)
+    if safe is None:
         return None
-    return _kb_query_dir() / f"{session_id}.json"
+    return _kb_query_dir() / f"{safe}.json"
 def record_obsidian_query(session_id: str, query: str, hit_count: int = 0) -> None:

package/core/synapse/layers.py CHANGED Viewed

@@ -795,6 +795,11 @@ class SessionContextLayer(Layer):
 _WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:\|[^\]]+)?\]\]")
 _FRONTMATTER_RE = re.compile(r"^---\n.*?\n---\n", re.DOTALL)
 _KB_CONFIG_PATH = Path.home() / ".arkaos" / "config.json"
+# Cap fallback-note scanning to avoid O(vault size) blow-ups on large
+# Obsidian vaults. The cap is above any realistic top-N retrieval need
+# (Jaccard ranks the top few notes; scanning 2000 sorted-by-name first
+# is plenty — see `_load_fallback_notes`) while still bounding worst-case latency.
+_MAX_FALLBACK_NOTES = 2000
 _KB_STOPWORDS: frozenset[str] = frozenset({
     "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of",
     "with", "by", "from", "as", "is", "was", "are", "were", "be", "been", "being",
@@ -929,6 +934,8 @@ def _load_fallback_notes(vault_path: Optional[Path]) -> list[dict]:
         return []
     notes: list[dict] = []
     for md in sorted(vault_path.rglob("*.md")):
+        if len(notes) >= _MAX_FALLBACK_NOTES:
+            break
         try:
             raw = md.read_text(encoding="utf-8")
         except (OSError, UnicodeDecodeError):

package/core/workflow/__pycache__/flow_enforcer.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/__pycache__/marker_cache.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/__pycache__/research_gate.cpython-313.pyc CHANGED Viewed

Binary file

package/core/workflow/flow_enforcer.py CHANGED Viewed

@@ -22,6 +22,7 @@ from dataclasses import asdict, dataclass
 from datetime import datetime, timezone
 from pathlib import Path
+from core.shared import safe_session_id as _safe_session_id_module
 from core.workflow import marker_cache
 try:
@@ -58,7 +59,11 @@ GATED_TOOLS: frozenset[str] = frozenset({"Write", "Edit", "MultiEdit"})
 ROUTING_RE = re.compile(r"\[arka:routing\]\s*[\w-]+\s*->\s*\w+", re.IGNORECASE)
 TRIVIAL_RE = re.compile(r"\[arka:trivial\]\s*\S+", re.IGNORECASE)
 PHASE_RE = re.compile(r"\[arka:phase:\d+\]", re.IGNORECASE)
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers that
+# relied on the module-level symbols before the core.shared extraction.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
+_safe_session_id = _safe_session_id_module.safe_session_id
 ASSISTANT_WINDOW = 6
 CONFIG_PATH = Path.home() / ".arkaos" / "config.json"
@@ -67,19 +72,6 @@ TELEMETRY_PATH = Path.home() / ".arkaos" / "telemetry" / "enforcement.jsonl"
 FLOW_REQUIRED_DIR = Path("/tmp/arkaos-wf-required")
-def _safe_session_id(session_id: str) -> str | None:
-    """Validate session_id against a strict allowlist (prevents path traversal).
-    Returns the id if safe, or None if it contains path separators, dots-dots,
-    or characters outside `[A-Za-z0-9._-]`. Callers MUST treat None as reject.
-    """
-    if not session_id or not isinstance(session_id, str):
-        return None
-    if not SAFE_SESSION_ID_RE.match(session_id):
-        return None
-    return session_id
 @dataclass
 class Decision:
     """Outcome of enforcement evaluation for a single tool call."""

package/core/workflow/marker_cache.py CHANGED Viewed

@@ -13,13 +13,15 @@ ADR compliance (docs/adr/2026-04-17-binding-flow-enforcement.md):
 import json
 import os
-import re
 import threading
 import time
 import uuid
 from dataclasses import dataclass
 from pathlib import Path
+from core.shared import safe_session_id as _safe_session_id_module
 def _resolve_cache_dir() -> Path:
     override = os.environ.get("ARKA_MARKER_CACHE_DIR", "").strip()
     if override:
@@ -28,7 +30,8 @@ def _resolve_cache_dir() -> Path:
 MARKER_CACHE_DIR = _resolve_cache_dir()
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
 VALID_MARKER_TYPES: frozenset[str] = frozenset({"routing", "trivial", "phase"})
 _MAX_LABEL_LEN = 64
@@ -51,12 +54,7 @@ class MarkerRecord:
         }
-def _safe_session_id(session_id: str) -> str | None:
-    if not session_id or not isinstance(session_id, str):
-        return None
-    if not SAFE_SESSION_ID_RE.match(session_id):
-        return None
-    return session_id
+_safe_session_id = _safe_session_id_module.safe_session_id
 def _cache_path(session_id: str) -> Path | None:

package/core/workflow/research_gate.py CHANGED Viewed

@@ -27,6 +27,7 @@ from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
+from core.shared import safe_session_id as _safe_session_id_module
 from core.synapse import kb_cache
 try:
@@ -48,7 +49,8 @@ RESEARCH_EXTERNAL_TOOLS: frozenset[str] = frozenset({
     "mcp__firecrawl__firecrawl_extract",
 })
-SAFE_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Re-export for backward compatibility with any external importers.
+SAFE_SESSION_ID_RE = _safe_session_id_module.SAFE_SESSION_ID_RE
 CONFIG_PATH = Path.home() / ".arkaos" / "config.json"
 BYPASS_AUDIT_PATH = Path.home() / ".arkaos" / "audit" / "kb_first_bypass.log"
 TELEMETRY_PATH = Path.home() / ".arkaos" / "telemetry" / "kb_first.jsonl"
@@ -100,12 +102,7 @@ def _locked_append(path: Path):
         fh.close()
-def _safe_session_id(session_id: str) -> str | None:
-    if not session_id or not isinstance(session_id, str):
-        return None
-    if not SAFE_SESSION_ID_RE.match(session_id):
-        return None
-    return session_id
+_safe_session_id = _safe_session_id_module.safe_session_id
 def _feature_flag_on() -> bool:
@@ -163,9 +160,14 @@ def _mark_violation(session_id: str, tool: str) -> None:
         return
     path.parent.mkdir(parents=True, exist_ok=True)
     entry = json.dumps({"tool": tool, "ts": datetime.now(timezone.utc).isoformat()})
+    # Race contract: two concurrent tool calls on the same session may
+    # both observe "no prior violation" and both emit the first-violation
+    # nudge. This is intentional — a nudge is cheap and both calls were
+    # genuinely first-ish. Deny is reserved for the SECOND violation
+    # after the first marker is on disk, which is what a plain
+    # ``write_text`` (non-exclusive, last-writer-wins) gives us. Tested
+    # by ``test_concurrent_violation_markers_race_safe``.
     try:
-        # O_CREAT|O_EXCL would be stricter, but we want idempotent writes
-        # from a concurrent race — last writer wins, both see "first".
         path.write_text(entry, encoding="utf-8")
     except OSError:
         pass

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "arkaos",
-  "version": "2.22.0",
+  "version": "2.22.1",
   "description": "The Operating System for AI Agent Teams",
   "type": "module",
   "bin": {

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "arkaos-core"
-version = "2.22.0"
+version = "2.22.1"
 description = "Core engine for ArkaOS — The Operating System for AI Agent Teams"
 readme = "README.md"
 license = {text = "MIT"}