npm - okstra - Versions diffs - 0.64.1 → 0.66.0 - Mend

okstra 0.64.1 → 0.66.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/bin/okstra +1 -0
package/docs/kr/architecture.md +2 -0
package/docs/kr/cli.md +12 -4
package/docs/kr/performance-improvement-plan-v2.md +2 -1
package/docs/project-structure-overview.md +1 -0
package/docs/superpowers/plans/2026-06-10-p6-token-usage-incremental.md +1029 -0
package/docs/superpowers/specs/2026-06-10-blocking-contract-posthoc-conformance-design.md +168 -0
package/package.json +1 -1
package/runtime/BUILD.json +2 -2
package/runtime/agents/SKILL.md +4 -2
package/runtime/agents/workers/claude-worker.md +1 -1
package/runtime/agents/workers/codex-worker.md +1 -0
package/runtime/agents/workers/gemini-worker.md +1 -0
package/runtime/bin/lib/okstra/cli.sh +4 -0
package/runtime/bin/lib/okstra/globals.sh +1 -0
package/runtime/bin/lib/okstra/usage.sh +4 -1
package/runtime/bin/okstra.sh +1 -0
package/runtime/prompts/profiles/_implementation-executor.md +1 -0
package/runtime/python/okstra_ctl/clarification_items.py +96 -37
package/runtime/python/okstra_ctl/context_cost.py +86 -8
package/runtime/python/okstra_ctl/locks.py +32 -0
package/runtime/python/okstra_ctl/migrate.py +45 -6
package/runtime/python/okstra_ctl/models.py +5 -0
package/runtime/python/okstra_ctl/pr_template.py +2 -7
package/runtime/python/okstra_ctl/render_final_report.py +2 -1
package/runtime/python/okstra_ctl/run.py +58 -44
package/runtime/python/okstra_ctl/run_context.py +3 -8
package/runtime/python/okstra_ctl/seeding.py +25 -18
package/runtime/python/okstra_ctl/wizard.py +9 -11
package/runtime/python/okstra_ctl/worktree.py +13 -0
package/runtime/python/okstra_project/dirs.py +10 -1
package/runtime/python/okstra_token_usage/claude.py +226 -61
package/runtime/python/okstra_token_usage/cli.py +10 -1
package/runtime/python/okstra_token_usage/collect.py +34 -27
package/runtime/python/okstra_token_usage/cursor.py +93 -0
package/runtime/python/okstra_token_usage/paths.py +29 -2
package/runtime/python/okstra_token_usage/pricing.py +7 -3
package/runtime/skills/okstra-coding-preflight/clean-code.md +15 -0
package/runtime/skills/okstra-inspect/SKILL.md +16 -11
package/runtime/skills/okstra-run/templates/pr-body.template.md +13 -16
package/runtime/skills/okstra-schedule/SKILL.md +3 -3
package/runtime/skills/okstra-team-contract/SKILL.md +1 -1
package/runtime/validators/lib/fixtures.sh +73 -10
package/runtime/validators/lib/runners.sh +4 -0
package/runtime/validators/validate-run.py +53 -0
package/runtime/validators/validate_session_conformance.py +430 -0
package/src/migrate.mjs +31 -0

package/runtime/validators/validate_session_conformance.py ADDED Viewed

@@ -0,0 +1,430 @@
+"""agents/SKILL.md BLOCKING 계약 3종의 post-hoc conformance 검사.
+설계: docs/superpowers/specs/2026-06-10-blocking-contract-posthoc-conformance-design.md
+| 검사 | 선언 위치 | 증거 |
+|------|----------|------|
+| 1. lead PROGRESS 체크포인트 라인 | agents/SKILL.md "Progress reporting (BLOCKING)" | lead 세션 jsonl 의 assistant text 블록 |
+| 2. claude-worker 5분 heartbeat | agents/workers/claude-worker.md "Heartbeat" | audit 사이드카의 `- PROGRESS: <stage> <ISO>` 라인 |
+| 3. implementation sidecar entry guard | agents/SKILL.md "Entry guard (BLOCKING)" | lead 세션 jsonl 의 Read tool_use |
+스캔 규칙 (false-pass 방지가 핵심):
+- `type == "assistant"` 레코드만 본다 — Skill 호출 시 SKILL.md 본문(체크포인트
+  라인 예시 포함)이 tool_result(user 레코드)로 transcript 에 주입되므로,
+  assistant 외 레코드를 보면 즉시 false pass 가 난다. `isSidechain` 레코드도 제외.
+- run 윈도우(`resolve_run_window`)로 스코핑한다 — in-session lead 는 세션 전체
+  jsonl 에 기록되므로, 같은 세션의 직전 run 이 남긴 라인이 증거로 오인된다.
+  (실측: dev-9692 lead 세션 1개에 implementation run 001–003 이 모두 들어 있음.)
+- validator 실행 시점(Phase 7) **이후** 에 출력되는 체크포인트
+  (`phase-7-teardown`, `complete`)는 구조적으로 검사 불가 — 요구하지 않는다.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+_DISPATCHED_STATUSES = {"completed", "timeout", "error", "in-progress"}
+_ATTEMPTED_STATUSES = {"completed", "timeout", "error"}
+# lead 의 체크포인트 라인 — assistant text 블록 안에서 line-anchored 로만 인정.
+_PROGRESS_LINE_RE = re.compile(r"^PROGRESS:[ \t]+(?P<phase>\S+)(?P<rest>.*)$", re.MULTILINE)
+# claude-worker audit 사이드카의 heartbeat 라인 (claude-worker.md "Heartbeat").
+_HEARTBEAT_LINE_RE = re.compile(
+    r"^-[ \t]*PROGRESS:[ \t]*(?P<stage>\S+)[ \t]+(?P<ts>\S+)[ \t]*$", re.MULTILINE
+)
+# 계약상 cadence 는 5분. append 직전 측정한 시각과 실제 쓰기 사이 지연을 흡수하는
+# 고정 grace 60초를 더한다.
+_HEARTBEAT_MAX_GAP_SECONDS = 5 * 60 + 60
+# Phase 5/6 진입 전 lead 가 Read 해야 하는 implementation 프로파일 sidecar.
+# 절대 경로는 레이어(repo / runtime / 설치본)마다 다르지만 basename 은 동일하다.
+_SIDECAR_BASENAMES = (
+    "_implementation-executor.md",
+    "_implementation-verifier.md",
+    "_implementation-deliverable.md",
+)
+@dataclass
+class SessionConformanceResult:
+    errors: list[str] = field(default_factory=list)
+    @property
+    def ok(self) -> bool:
+        return not self.errors
+@dataclass
+class _LeadEvidence:
+    progress: list[tuple[str, str, str]] = field(default_factory=list)  # (ts, phase-id, line)
+    sidecar_reads: dict[str, list[str]] = field(default_factory=dict)  # basename -> [ts]
+    scanned_files: list[Path] = field(default_factory=list)
+    window: tuple[str | None, str | None] = (None, None)
+def _ensure_token_usage_importable() -> None:
+    """okstra_token_usage 패키지를 레이아웃별(repo/scripts, runtime/python,
+    OKSTRA_PYTHONPATH)로 해소 — validate-run.py `_import_token_usage` 와 동일 후보."""
+    here = Path(__file__).resolve().parent
+    candidates = [here.parent / "scripts", here.parent / "python"]
+    env_pp = os.environ.get("OKSTRA_PYTHONPATH", "").strip()
+    if env_pp:
+        candidates.append(Path(env_pp))
+    for candidate in candidates:
+        if candidate.is_dir() and (candidate / "okstra_token_usage").is_dir():
+            if str(candidate) not in sys.path:
+                sys.path.insert(0, str(candidate))
+            break
+def _norm(value: str) -> str:
+    return re.sub(r"[^a-z0-9]", "", (value or "").lower())
+def _is_report_writer(worker: dict) -> bool:
+    return "reportwriter" in _norm(str(worker.get("role", ""))) or "reportwriter" in _norm(
+        str(worker.get("workerId", ""))
+    )
+def _worker_needles(worker: dict) -> list[str]:
+    """worker 식별 needle — 선언 형식 `worker=<role>` 와 role/workerId 표기를
+    normalize 매칭으로 흡수한다 (`Claude worker` ↔ `claude-worker`)."""
+    needles = []
+    role = _norm(str(worker.get("role", "")))
+    if role:
+        needles.append(role)
+    worker_id = _norm(str(worker.get("workerId", "")))
+    if worker_id:
+        needles.append(worker_id + "worker")
+    return needles
+def _scan_one_jsonl(
+    path: Path, since: str | None, until: str | None
+) -> tuple[list[tuple[str, str, str]], dict[str, list[str]], str | None]:
+    """jsonl 한 파일에서 (progress, sidecar reads, agentName) 을 추출한다."""
+    from okstra_token_usage.paths import ts_in_window
+    progress: list[tuple[str, str, str]] = []
+    reads: dict[str, list[str]] = {}
+    agent_name: str | None = None
+    try:
+        fh = path.open(encoding="utf-8")
+    except OSError:
+        return progress, reads, agent_name
+    with fh:
+        for raw in fh:
+            try:
+                rec = json.loads(raw)
+            except (json.JSONDecodeError, UnicodeDecodeError):
+                continue
+            if agent_name is None and rec.get("agentName"):
+                agent_name = rec["agentName"]
+            if rec.get("type") != "assistant" or rec.get("isSidechain"):
+                continue
+            ts = rec.get("timestamp") or ""
+            if ts and not ts_in_window(ts, since, until):
+                continue
+            msg = rec.get("message") or {}
+            for block in msg.get("content") or []:
+                if not isinstance(block, dict):
+                    continue
+                if block.get("type") == "text":
+                    for m in _PROGRESS_LINE_RE.finditer(block.get("text") or ""):
+                        progress.append((ts, m.group("phase"), m.group(0).strip()))
+                elif block.get("type") == "tool_use" and block.get("name") == "Read":
+                    base = Path(str((block.get("input") or {}).get("file_path") or "")).name
+                    if base in _SIDECAR_BASENAMES:
+                        reads.setdefault(base, []).append(ts)
+    return progress, reads, agent_name
+def _collect_lead_evidence(
+    team_state: dict,
+    team_state_path: Path,
+    project_root: Path,
+    projects_root: Path | None,
+) -> tuple[_LeadEvidence | None, str | None]:
+    """lead 후보 jsonl 을 스캔해 증거를 모은다.
+    후보 = {기록된 lead.sessionId} ∪ {team 태그는 있으나 agentName 이 없는 세션}.
+    후자는 `claude --resume` 으로 lead 세션이 fork 된 경우(새 sessionId,
+    agentName 없음)를 흡수한다 — worker 세션은 agentName 이 있어 자연 배제된다.
+    """
+    from okstra_token_usage.claude import find_claude_team_sessions
+    from okstra_token_usage.collect import resolve_run_window, resolve_team_name
+    from okstra_token_usage.paths import claude_project_dir
+    since, until = resolve_run_window(team_state_path, team_state)
+    lead_sid = (team_state.get("lead") or {}).get("sessionId") or ""
+    sessions = find_claude_team_sessions(
+        project_root, resolve_team_name(team_state), lead_sid, projects_root=projects_root
+    )
+    evidence = _LeadEvidence(window=(since, until))
+    for sid, path in sorted(sessions.items()):
+        progress, reads, agent_name = _scan_one_jsonl(path, since, until)
+        if agent_name and sid != lead_sid:
+            continue  # agentName 이 찍힌 세션은 worker — lead 후보에서 제외
+        evidence.scanned_files.append(path)
+        evidence.progress.extend(progress)
+        for base, ts_list in reads.items():
+            evidence.sidecar_reads.setdefault(base, []).extend(ts_list)
+    if not evidence.scanned_files:
+        proj_dir = claude_project_dir(project_root, projects_root)
+        return None, (
+            f"lead session jsonl not found under {proj_dir} "
+            f"(lead.sessionId={lead_sid or '<empty>'}) — PROGRESS checkpoint / "
+            "implementation entry-guard conformance cannot be verified, which "
+            "fails the run (same principle as the token-usage accuracy contract)."
+        )
+    evidence.progress.sort()
+    for ts_list in evidence.sidecar_reads.values():
+        ts_list.sort()
+    return evidence, None
+def _convergence_rounds_ran(run_dir: Path, suffix: str | None) -> bool:
+    """이 run 의 convergence state artifact 가 실제 round 를 1회 이상 돌았는지.
+    auto-disable(`totalRounds: 0`)·artifact 부재는 phase-5.5 라인을 요구하지 않는다."""
+    if not suffix:
+        return False
+    path = run_dir / "state" / f"convergence-{suffix}.json"
+    try:
+        doc = json.loads(path.read_text())
+    except (OSError, json.JSONDecodeError):
+        return False
+    return isinstance(doc, dict) and (doc.get("totalRounds") or 0) >= 1
+def _phase_mentions_worker(lines: list[tuple[str, str]], needles: list[str]) -> bool:
+    return any(needle in _norm(line) for _ts, line in lines for needle in needles)
+def _check_worker_checkpoint_lines(
+    by_phase: dict[str, list[tuple[str, str]]],
+    analysis_workers: list[dict],
+    errors: list[str],
+) -> None:
+    """phase-4-dispatch / phase-5-collect 의 per-worker 라인 (SKILL.md: once per worker)."""
+    for worker in analysis_workers:
+        role = str(worker.get("role", "")).strip() or "<unknown role>"
+        status = str(worker.get("status", "")).strip()
+        needles = _worker_needles(worker)
+        if status in _ATTEMPTED_STATUSES and not _phase_mentions_worker(
+            by_phase.get("phase-4-dispatch", []), needles
+        ):
+            errors.append(
+                f"PROGRESS checkpoint missing: no `phase-4-dispatch worker=<role>` "
+                f"line names worker `{role}` — one line per dispatched worker, "
+                "agents/SKILL.md 'Progress reporting (BLOCKING)'."
+            )
+        if status == "completed" and not _phase_mentions_worker(
+            by_phase.get("phase-5-collect", []), needles
+        ):
+            errors.append(
+                f"PROGRESS checkpoint missing: no `phase-5-collect worker=<role>` "
+                f"line names completed worker `{role}` — one line per collected "
+                "result, agents/SKILL.md 'Progress reporting (BLOCKING)'."
+            )
+def _check_progress_checkpoints(
+    evidence: _LeadEvidence,
+    team_state: dict,
+    run_dir: Path,
+    suffix: str | None,
+    errors: list[str],
+) -> None:
+    by_phase: dict[str, list[tuple[str, str]]] = {}
+    for ts, phase, line in evidence.progress:
+        by_phase.setdefault(phase, []).append((ts, line))
+    def require(phase: str, condition: bool, detail: str) -> None:
+        if condition and phase not in by_phase:
+            errors.append(
+                f"PROGRESS checkpoint missing: `{phase}` ({detail}) — "
+                "agents/SKILL.md 'Progress reporting (BLOCKING)'."
+            )
+    intake = by_phase.get("phase-1-intake", [])
+    if not any("complete" not in line.lower() for _ts, line in intake):
+        errors.append(
+            "PROGRESS checkpoint missing: `phase-1-intake reading task bundle` "
+            "(start-of-Phase-1 line) — agents/SKILL.md 'Progress reporting (BLOCKING)'."
+        )
+    if not any("complete" in line.lower() for _ts, line in intake):
+        errors.append(
+            "PROGRESS checkpoint missing: `phase-1-intake complete` "
+            "(after all intake reads) — agents/SKILL.md 'Progress reporting (BLOCKING)'."
+        )
+    workers = [w for w in (team_state.get("workers") or []) if isinstance(w, dict)]
+    analysis_workers = [w for w in workers if not _is_report_writer(w)]
+    any_dispatched = any(
+        str(w.get("status", "")).strip() in _DISPATCHED_STATUSES for w in workers
+    )
+    require("phase-2-prompts", bool(workers), "before any Write to assigned prompt paths")
+    require("phase-3-team-create", any_dispatched, "immediately before the TeamCreate call")
+    _check_worker_checkpoint_lines(by_phase, analysis_workers, errors)
+    require(
+        "phase-5.5-convergence",
+        _convergence_rounds_ran(run_dir, suffix),
+        "at the start of each convergence round (state artifact records totalRounds >= 1)",
+    )
+    report_writer = next((w for w in workers if _is_report_writer(w)), None)
+    require(
+        "phase-6-synthesis",
+        report_writer is not None
+        and str(report_writer.get("status", "")).strip() in _DISPATCHED_STATUSES,
+        "at the start of Phase 6 (report-writer dispatch)",
+    )
+    require("phase-7-persist", True, "at the start of Phase 7")
+def _parse_iso(ts: str) -> datetime | None:
+    try:
+        return datetime.fromisoformat(ts.replace("Z", "+00:00"))
+    except ValueError:
+        return None
+def _check_heartbeat_sidecar(path: Path, errors: list[str]) -> None:
+    rel = path.name
+    try:
+        content = path.read_text(encoding="utf-8")
+    except OSError as exc:
+        errors.append(f"claude-worker audit sidecar unreadable: {rel} ({exc})")
+        return
+    entries = [(m.group("stage"), m.group("ts")) for m in _HEARTBEAT_LINE_RE.finditer(content)]
+    if not entries:
+        errors.append(
+            f"`{rel}` has no `- PROGRESS: <stage> <ISO-8601-UTC>` heartbeat lines — "
+            "the claude-worker MUST write `started` immediately and append one "
+            "line per stage at <= 5-minute cadence (agents/workers/claude-worker.md "
+            "'Heartbeat', agents/SKILL.md Common Mistakes)."
+        )
+        return
+    if entries[0][0] != "started":
+        errors.append(
+            f"`{rel}`: first heartbeat stage must be `started` "
+            f"(found `{entries[0][0]}`) — the sidecar is written BEFORE the "
+            "per-file reads, with a `- PROGRESS: started <ISO>` line."
+        )
+    # result 파일이 존재하면 마지막 단계 마커도 있어야 한다. timeout 으로 중단된
+    # worker(result 없음)에는 요구하지 않는다 — hang 이전 구간의 cadence 만 본다.
+    result_file = path.with_name(rel.replace("-audit-", "-"))
+    if result_file.exists() and not any(s == "write-result-start" for s, _ in entries):
+        errors.append(
+            f"`{rel}`: heartbeat is missing the `write-result-start` stage line "
+            f"although the worker result `{result_file.name}` exists — every "
+            "stage must append its own line (agents/workers/claude-worker.md 'Heartbeat')."
+        )
+    prev: datetime | None = None
+    for stage, raw_ts in entries:
+        ts = _parse_iso(raw_ts)
+        if ts is None:
+            errors.append(
+                f"`{rel}`: heartbeat line for stage `{stage}` has an unparseable "
+                f"ISO-8601 timestamp `{raw_ts}`."
+            )
+            continue
+        if prev is not None:
+            gap = (ts - prev).total_seconds()
+            if gap < 0:
+                errors.append(
+                    f"`{rel}`: heartbeat timestamps regress at stage `{stage}` ({raw_ts})."
+                )
+            elif gap > _HEARTBEAT_MAX_GAP_SECONDS:
+                errors.append(
+                    f"`{rel}`: heartbeat gap before stage `{stage}` is {int(gap)}s — "
+                    "the append cadence MUST NOT exceed 5 minutes (+60s grace); emit "
+                    "`- PROGRESS: in-stage:<stage> <ISO>` during long stages."
+                )
+        prev = ts
+def _check_heartbeat_sidecars(run_dir: Path, task_type: str, errors: list[str]) -> None:
+    """검사 2 — 사이드카 **존재** 는 validate-run.py validate_worker_results_audit 가
+    이미 강제하므로, 여기서는 존재하는 사이드카의 heartbeat 내용만 본다."""
+    worker_results_dir = run_dir / "worker-results"
+    if not worker_results_dir.is_dir():
+        return
+    for path in sorted(worker_results_dir.glob(f"claude-worker-audit-{task_type}-*.md")):
+        _check_heartbeat_sidecar(path, errors)
+def _check_implementation_sidecar_reads(evidence: _LeadEvidence, errors: list[str]) -> None:
+    """검사 3 — entry guard. fresh-read 규칙(이전 run 기억으로 갈음 불가)은 run
+    윈도우 스코핑이 보장한다: 이번 윈도우 안의 Read 만 증거로 인정된다."""
+    anchors: dict[str, str] = {}
+    for ts, phase, _line in evidence.progress:  # progress 는 ts 정렬 — 첫 항목이 최초 발생
+        if phase in ("phase-6-synthesis", "phase-7-persist") and ts:
+            anchors.setdefault(phase, ts)
+    expectations = (
+        ("_implementation-executor.md", "phase-6-synthesis", "Phase 5"),
+        ("_implementation-verifier.md", "phase-6-synthesis", "Phase 5"),
+        ("_implementation-deliverable.md", "phase-7-persist", "Phase 6"),
+    )
+    for basename, anchor_phase, read_at in expectations:
+        ts_list = evidence.sidecar_reads.get(basename) or []
+        if not ts_list:
+            errors.append(
+                f"implementation entry guard: no `Read` of `{basename}` found in "
+                f"the lead session jsonl within this run's window — the sidecar "
+                f"MUST be read fresh at {read_at} every implementation run "
+                "(agents/SKILL.md 'Entry guard (BLOCKING)')."
+            )
+            continue
+        anchor_ts = anchors.get(anchor_phase)
+        if anchor_ts and min(ts_list) >= anchor_ts:
+            errors.append(
+                f"implementation entry guard: `{basename}` was first Read at "
+                f"{min(ts_list)}, not before the first `PROGRESS: {anchor_phase}` "
+                f"line ({anchor_ts}) — it must be loaded at {read_at}, before "
+                "that checkpoint (agents/SKILL.md 'Entry guard (BLOCKING)')."
+            )
+def validate_session_conformance(
+    *,
+    team_state: dict,
+    team_state_path: Path,
+    project_root: Path,
+    report_path: Path,
+    task_type: str,
+    claude_projects_dir: Path | None = None,
+) -> SessionConformanceResult:
+    """post-hoc conformance 검사 3종을 수행하고 실패 목록을 돌려준다.
+    `claude_projects_dir` 는 테스트/진단용 주입 시드 (기본: 실제 ~/.claude/projects).
+    검사 2(heartbeat)는 디스크 사이드카만 보므로 lead jsonl 미발견 시에도 수행된다.
+    """
+    result = SessionConformanceResult()
+    _ensure_token_usage_importable()
+    try:
+        from okstra_token_usage.collect import run_artifact_suffix
+    except ImportError as exc:  # pragma: no cover — 설치본은 항상 패키지를 동반
+        result.errors.append(f"okstra_token_usage import failed — {exc}")
+        return result
+    run_dir = report_path.parent.parent  # reports/ 의 부모 = run 디렉터리
+    _check_heartbeat_sidecars(run_dir, task_type, result.errors)
+    evidence, error = _collect_lead_evidence(
+        team_state, team_state_path, project_root, claude_projects_dir
+    )
+    if error:
+        result.errors.append(error)
+        return result
+    suffix = run_artifact_suffix(team_state_path)
+    _check_progress_checkpoints(evidence, team_state, run_dir, suffix, result.errors)
+    if task_type == "implementation":
+        _check_implementation_sidecar_reads(evidence, result.errors)
+    return result

package/src/migrate.mjs ADDED Viewed

@@ -0,0 +1,31 @@
+import { runPythonModule } from "./_python-helper.mjs";
+const USAGE = `okstra migrate — move legacy .project-docs/okstra/ to .okstra/ (one-shot)
+A thin shim over \`python3 -m okstra_ctl.migrate\`. Dry-run by default:
+prints the migration plan as JSON and exits without touching anything.
+Usage:
+  okstra migrate [--apply] [--cwd <dir>] [--quiet]
+Options:
+  --apply   Execute the migration (git mv + .gitignore + okstra-home
+            registry row updates). Without it, preview only.
+  --cwd     Project root to migrate. Default: current directory.
+  --quiet   Single-line JSON output.
+Exits 1 when the migration is refused (.okstra/ already exists, or no
+legacy .project-docs/okstra/ found).
+`;
+export async function run(args) {
+  if (args.includes("--help") || args.includes("-h")) {
+    process.stdout.write(USAGE);
+    return 0;
+  }
+  const { code } = await runPythonModule({
+    module: "okstra_ctl.migrate",
+    args,
+  });
+  return code ?? 1;
+}