npm - claude-controller - Versions diffs - 0.1.2 → 0.3.0 - Mend

claude-controller 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +2 -2
package/bin/autoloop.sh +382 -0
package/bin/ctl +1189 -0
package/bin/native-app.py +6 -3
package/bin/watchdog.sh +357 -0
package/cognitive/__init__.py +14 -0
package/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
package/cognitive/__pycache__/dispatcher.cpython-314.pyc +0 -0
package/cognitive/__pycache__/evaluator.cpython-314.pyc +0 -0
package/cognitive/__pycache__/goal_engine.cpython-314.pyc +0 -0
package/cognitive/__pycache__/learning.cpython-314.pyc +0 -0
package/cognitive/__pycache__/orchestrator.cpython-314.pyc +0 -0
package/cognitive/__pycache__/planner.cpython-314.pyc +0 -0
package/cognitive/dispatcher.py +192 -0
package/cognitive/evaluator.py +289 -0
package/cognitive/goal_engine.py +232 -0
package/cognitive/learning.py +189 -0
package/cognitive/orchestrator.py +303 -0
package/cognitive/planner.py +207 -0
package/cognitive/prompts/analyst.md +31 -0
package/cognitive/prompts/coder.md +22 -0
package/cognitive/prompts/reviewer.md +33 -0
package/cognitive/prompts/tester.md +21 -0
package/cognitive/prompts/writer.md +25 -0
package/config.sh +6 -1
package/dag/__init__.py +5 -0
package/dag/__pycache__/__init__.cpython-314.pyc +0 -0
package/dag/__pycache__/graph.cpython-314.pyc +0 -0
package/dag/graph.py +222 -0
package/lib/jobs.sh +12 -1
package/package.json +11 -5
package/postinstall.sh +1 -1
package/service/controller.sh +43 -11
package/web/audit.py +122 -0
package/web/checkpoint.py +80 -0
package/web/config.py +2 -5
package/web/handler.py +634 -473
package/web/handler_fs.py +153 -0
package/web/handler_goals.py +203 -0
package/web/handler_jobs.py +372 -0
package/web/handler_memory.py +203 -0
package/web/handler_sessions.py +132 -0
package/web/jobs.py +585 -13
package/web/personas.py +419 -0
package/web/pipeline.py +981 -0
package/web/presets.py +506 -0
package/web/projects.py +246 -0
package/web/static/api.js +141 -0
package/web/static/app.js +25 -1937
package/web/static/attachments.js +144 -0
package/web/static/base.css +497 -0
package/web/static/context.js +204 -0
package/web/static/dirs.js +246 -0
package/web/static/form.css +763 -0
package/web/static/goals.css +363 -0
package/web/static/goals.js +300 -0
package/web/static/i18n.js +625 -0
package/web/static/index.html +215 -13
package/web/static/{styles.css → jobs.css} +746 -1141
package/web/static/jobs.js +1270 -0
package/web/static/memoryview.js +117 -0
package/web/static/personas.js +228 -0
package/web/static/pipeline.css +338 -0
package/web/static/pipelines.js +487 -0
package/web/static/presets.js +244 -0
package/web/static/send.js +135 -0
package/web/static/settings-style.css +291 -0
package/web/static/settings.js +81 -0
package/web/static/stream.js +534 -0
package/web/static/utils.js +131 -0
package/web/webhook.py +210 -0

package/cognitive/evaluator.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""
+Evaluator — 자동 평가기
+Worker 산출물의 품질을 자동으로 검증한다.
+평가 파이프라인:
+  1. 정적 분석 (lint, type check)
+  2. 테스트 실행
+  3. AI 코드 리뷰 (Reviewer Worker)
+  4. 성공 기준 검증
+Gate 모드에서는 각 단계 후 사용자 승인을 요청한다.
+"""
+import json
+import os
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+@dataclass
+class EvalResult:
+    """단일 평가 단계의 결과."""
+    step: str
+    passed: bool
+    details: str
+    score: float = 0.0  # 0.0 ~ 1.0
+@dataclass
+class EvaluationReport:
+    """전체 평가 보고서."""
+    goal_id: str
+    task_id: Optional[str]
+    results: list[EvalResult] = field(default_factory=list)
+    overall_pass: bool = False
+    summary: str = ""
+    @property
+    def total_score(self) -> float:
+        if not self.results:
+            return 0.0
+        return sum(r.score for r in self.results) / len(self.results)
+    def to_dict(self) -> dict:
+        return {
+            "goal_id": self.goal_id,
+            "task_id": self.task_id,
+            "overall_pass": self.overall_pass,
+            "total_score": round(self.total_score, 2),
+            "summary": self.summary,
+            "results": [
+                {
+                    "step": r.step,
+                    "passed": r.passed,
+                    "details": r.details,
+                    "score": r.score,
+                }
+                for r in self.results
+            ],
+        }
+class Evaluator:
+    """태스크 산출물을 자동으로 평가한다."""
+    def __init__(self, claude_bin: str, cwd: str):
+        self.claude_bin = claude_bin
+        self.cwd = cwd
+    def evaluate_task(
+        self,
+        goal_id: str,
+        task_id: str,
+        worker_type: str,
+        changed_files: list[str] = None,
+    ) -> EvaluationReport:
+        """개별 태스크의 결과를 평가한다."""
+        report = EvaluationReport(goal_id=goal_id, task_id=task_id)
+        # Worker 유형에 따른 평가 단계 선택
+        if worker_type == "coder":
+            self._eval_lint(report, changed_files)
+            self._eval_tests(report)
+        elif worker_type == "tester":
+            self._eval_tests(report)
+        elif worker_type == "reviewer":
+            # Reviewer 자체는 평가 생략
+            report.results.append(EvalResult(
+                step="review_complete",
+                passed=True,
+                details="리뷰 완료",
+                score=1.0,
+            ))
+        report.overall_pass = all(r.passed for r in report.results)
+        report.summary = self._generate_summary(report)
+        return report
+    def evaluate_goal(
+        self,
+        goal_id: str,
+        success_criteria: list[str],
+    ) -> EvaluationReport:
+        """목표 전체의 성공 기준을 검증한다.
+        Claude에게 성공 기준 목록을 주고 각각 충족 여부를 판단하게 한다.
+        """
+        report = EvaluationReport(goal_id=goal_id, task_id=None)
+        prompt = self._build_criteria_prompt(success_criteria)
+        result = self._call_claude_eval(prompt)
+        try:
+            data = json.loads(result)
+            for criterion in data.get("criteria", []):
+                report.results.append(EvalResult(
+                    step=f"criterion: {criterion['name']}",
+                    passed=criterion.get("met", False),
+                    details=criterion.get("reason", ""),
+                    score=1.0 if criterion.get("met") else 0.0,
+                ))
+        except (json.JSONDecodeError, KeyError):
+            report.results.append(EvalResult(
+                step="criteria_parse",
+                passed=False,
+                details=f"평가 응답 파싱 실패: {result[:200]}",
+                score=0.0,
+            ))
+        report.overall_pass = all(r.passed for r in report.results)
+        report.summary = self._generate_summary(report)
+        return report
+    def _eval_lint(self, report: EvaluationReport, changed_files: list[str] = None):
+        """린트/정적 분석을 실행한다."""
+        # 프로젝트에서 사용 가능한 린터 감지
+        checks = []
+        # Python: ruff 또는 flake8
+        if self._has_command("ruff"):
+            checks.append(("ruff check .", "ruff"))
+        elif self._has_command("flake8"):
+            checks.append(("flake8 .", "flake8"))
+        # JavaScript/TypeScript: eslint
+        if Path(self.cwd, "node_modules/.bin/eslint").exists():
+            checks.append(("npx eslint .", "eslint"))
+        if not checks:
+            report.results.append(EvalResult(
+                step="lint",
+                passed=True,
+                details="린터 미설치 — 건너뜀",
+                score=0.5,
+            ))
+            return
+        for cmd, name in checks:
+            try:
+                result = subprocess.run(
+                    cmd, shell=True, cwd=self.cwd,
+                    capture_output=True, text=True, timeout=60,
+                )
+                passed = result.returncode == 0
+                report.results.append(EvalResult(
+                    step=f"lint_{name}",
+                    passed=passed,
+                    details=result.stdout[:500] if not passed else "통과",
+                    score=1.0 if passed else 0.0,
+                ))
+            except subprocess.TimeoutExpired:
+                report.results.append(EvalResult(
+                    step=f"lint_{name}",
+                    passed=False,
+                    details="타임아웃 (60초)",
+                    score=0.0,
+                ))
+    def _eval_tests(self, report: EvaluationReport):
+        """테스트를 실행한다."""
+        test_cmds = []
+        # 프로젝트 유형에 따른 테스트 명령 감지
+        if Path(self.cwd, "pytest.ini").exists() or Path(self.cwd, "pyproject.toml").exists():
+            test_cmds.append(("python -m pytest --tb=short -q", "pytest"))
+        if Path(self.cwd, "package.json").exists():
+            test_cmds.append(("npm test", "npm_test"))
+        if not test_cmds:
+            report.results.append(EvalResult(
+                step="test",
+                passed=True,
+                details="테스트 설정 없음 — 건너뜀",
+                score=0.5,
+            ))
+            return
+        for cmd, name in test_cmds:
+            try:
+                result = subprocess.run(
+                    cmd, shell=True, cwd=self.cwd,
+                    capture_output=True, text=True, timeout=120,
+                )
+                passed = result.returncode == 0
+                report.results.append(EvalResult(
+                    step=f"test_{name}",
+                    passed=passed,
+                    details=result.stdout[-500:] if not passed else "모든 테스트 통과",
+                    score=1.0 if passed else 0.0,
+                ))
+            except subprocess.TimeoutExpired:
+                report.results.append(EvalResult(
+                    step=f"test_{name}",
+                    passed=False,
+                    details="타임아웃 (120초)",
+                    score=0.0,
+                ))
+    def _build_criteria_prompt(self, criteria: list[str]) -> str:
+        """성공 기준 검증용 프롬프트를 생성한다."""
+        criteria_text = "\n".join(f"- {c}" for c in criteria)
+        return f"""다음 성공 기준의 충족 여부를 코드베이스를 분석하여 판단하세요.
+## 성공 기준
+{criteria_text}
+## 출력 형식 (JSON만)
+```json
+{{
+  "criteria": [
+    {{"name": "기준 내용", "met": true/false, "reason": "판단 근거"}}
+  ]
+}}
+```
+"""
+    def _call_claude_eval(self, prompt: str) -> str:
+        """Claude를 호출하여 평가를 수행한다."""
+        cmd = [
+            self.claude_bin,
+            "-p", prompt,
+            "--output-format", "json",
+            "--allowedTools", "Read,Glob,Grep,Bash",
+        ]
+        result = subprocess.run(
+            cmd, cwd=self.cwd,
+            capture_output=True, text=True,
+            timeout=180,
+        )
+        if result.returncode != 0:
+            return json.dumps({"criteria": []})
+        # JSON 응답에서 텍스트 추출
+        try:
+            outer = json.loads(result.stdout)
+            if "result" in outer:
+                return outer["result"]
+            for block in outer.get("content", []):
+                if block.get("type") == "text":
+                    return block["text"]
+        except (json.JSONDecodeError, TypeError):
+            pass
+        return result.stdout
+    def _generate_summary(self, report: EvaluationReport) -> str:
+        """평가 보고서의 요약을 생성한다."""
+        total = len(report.results)
+        passed = sum(1 for r in report.results if r.passed)
+        failed_steps = [r.step for r in report.results if not r.passed]
+        if report.overall_pass:
+            return f"모든 평가 통과 ({passed}/{total})"
+        else:
+            return f"평가 실패 ({passed}/{total}) — 실패 항목: {', '.join(failed_steps)}"
+    def _has_command(self, cmd: str) -> bool:
+        """시스템에 명령어가 존재하는지 확인한다."""
+        try:
+            subprocess.run(
+                ["which", cmd], capture_output=True, timeout=5,
+            )
+            return True
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            return False

package/cognitive/goal_engine.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""
+Goal Engine — 목표 관리자
+추상적 목표를 구조화하고, 진행 상태를 추적하며, 완료 조건을 판단한다.
+사용 흐름:
+  1. create_goal("테스트 커버리지 80%로 올려") → goal_id
+  2. Planner가 DAG 생성 → attach_dag(goal_id, dag)
+  3. Dispatcher가 실행 → update_task_status(goal_id, task_id, status)
+  4. 모든 태스크 완료 → evaluate_completion(goal_id) → True/False
+"""
+import json
+import os
+import time
+import uuid
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+class GoalStatus(str, Enum):
+    PENDING = "pending"          # 생성됨, 계획 미수립
+    PLANNING = "planning"        # Planner가 DAG 생성 중
+    READY = "ready"              # DAG 생성 완료, 실행 대기
+    RUNNING = "running"          # 태스크 실행 중
+    GATE_WAITING = "gate_waiting"  # Gate 모드: 사용자 승인 대기
+    EVALUATING = "evaluating"    # Evaluator가 결과 검증 중
+    COMPLETED = "completed"      # 목표 달성
+    FAILED = "failed"            # 목표 달성 실패
+    CANCELLED = "cancelled"      # 사용자가 취소
+class ExecutionMode(str, Enum):
+    FULL_AUTO = "full_auto"      # 완전 자율
+    GATE = "gate"                # 단계별 승인
+    WATCH = "watch"              # 자율 + 관찰/중단 가능
+    PAIR = "pair"                # 태스크별 공동 리뷰
+class GoalEngine:
+    """목표 생성, 상태 추적, 완료 판단을 담당하는 엔진."""
+    def __init__(self, data_dir: str):
+        self.data_dir = Path(data_dir)
+        self.goals_dir = self.data_dir / "goals"
+        self.goals_dir.mkdir(parents=True, exist_ok=True)
+    def create_goal(
+        self,
+        objective: str,
+        mode: ExecutionMode = ExecutionMode.GATE,
+        context: Optional[dict] = None,
+        budget_usd: float = 5.0,
+        max_tasks: int = 20,
+    ) -> dict:
+        """새 목표를 생성한다.
+        Args:
+            objective: 자연어 목표 ("테스트 커버리지를 80%로 올려")
+            mode: 실행 모드
+            context: 추가 맥락 (cwd, target_files 등)
+            budget_usd: 비용 상한 (초과 시 자동 중단)
+            max_tasks: 최대 태스크 수
+        Returns:
+            생성된 목표 dict
+        """
+        goal_id = f"goal-{int(time.time())}-{uuid.uuid4().hex[:8]}"
+        goal = {
+            "id": goal_id,
+            "objective": objective,
+            "mode": mode.value,
+            "status": GoalStatus.PENDING.value,
+            "context": context or {},
+            "budget_usd": budget_usd,
+            "max_tasks": max_tasks,
+            "success_criteria": [],      # Planner가 채움
+            "dag": None,                 # Planner가 생성한 DAG
+            "progress": {
+                "total_tasks": 0,
+                "completed_tasks": 0,
+                "failed_tasks": 0,
+                "cost_usd": 0.0,
+            },
+            "memory_refs": [],           # 이 목표 실행 중 참조/생성된 메모리 ID
+            "created_at": time.time(),
+            "updated_at": time.time(),
+            "completed_at": None,
+        }
+        self._save_goal(goal)
+        return goal
+    def get_goal(self, goal_id: str) -> Optional[dict]:
+        """목표를 조회한다."""
+        path = self.goals_dir / f"{goal_id}.json"
+        if not path.exists():
+            return None
+        with open(path) as f:
+            return json.load(f)
+    def list_goals(self, status: Optional[str] = None) -> list[dict]:
+        """목표 목록을 반환한다. status 필터 가능."""
+        goals = []
+        for path in sorted(self.goals_dir.glob("goal-*.json"), reverse=True):
+            with open(path) as f:
+                goal = json.load(f)
+            if status is None or goal["status"] == status:
+                goals.append(goal)
+        return goals
+    def update_status(self, goal_id: str, status: GoalStatus) -> dict:
+        """목표 상태를 변경한다."""
+        goal = self.get_goal(goal_id)
+        if goal is None:
+            raise ValueError(f"Goal not found: {goal_id}")
+        goal["status"] = status.value
+        goal["updated_at"] = time.time()
+        if status in (GoalStatus.COMPLETED, GoalStatus.FAILED, GoalStatus.CANCELLED):
+            goal["completed_at"] = time.time()
+        self._save_goal(goal)
+        return goal
+    def attach_dag(self, goal_id: str, dag: dict, success_criteria: list[str]) -> dict:
+        """Planner가 생성한 DAG와 성공 기준을 목표에 연결한다."""
+        goal = self.get_goal(goal_id)
+        if goal is None:
+            raise ValueError(f"Goal not found: {goal_id}")
+        goal["dag"] = dag
+        goal["success_criteria"] = success_criteria
+        goal["progress"]["total_tasks"] = len(dag.get("tasks", []))
+        goal["status"] = GoalStatus.READY.value
+        goal["updated_at"] = time.time()
+        self._save_goal(goal)
+        return goal
+    def update_task_status(
+        self, goal_id: str, task_id: str, status: str, cost_usd: float = 0.0
+    ) -> dict:
+        """DAG 내 개별 태스크의 상태를 갱신하고 진행률을 재계산한다."""
+        goal = self.get_goal(goal_id)
+        if goal is None:
+            raise ValueError(f"Goal not found: {goal_id}")
+        # DAG 내 태스크 상태 갱신
+        if goal["dag"]:
+            for task in goal["dag"].get("tasks", []):
+                if task["id"] == task_id:
+                    task["status"] = status
+                    task["cost_usd"] = task.get("cost_usd", 0) + cost_usd
+                    break
+        # 진행률 재계산
+        tasks = goal["dag"].get("tasks", []) if goal["dag"] else []
+        goal["progress"]["completed_tasks"] = sum(
+            1 for t in tasks if t.get("status") == "completed"
+        )
+        goal["progress"]["failed_tasks"] = sum(
+            1 for t in tasks if t.get("status") == "failed"
+        )
+        goal["progress"]["cost_usd"] += cost_usd
+        goal["updated_at"] = time.time()
+        # 예산 초과 확인
+        if goal["progress"]["cost_usd"] > goal["budget_usd"]:
+            goal["status"] = GoalStatus.FAILED.value
+            goal["completed_at"] = time.time()
+        self._save_goal(goal)
+        return goal
+    def evaluate_completion(self, goal_id: str) -> dict:
+        """목표 달성 여부를 판단한다.
+        Returns:
+            { "achieved": bool, "criteria_results": [...], "summary": str }
+        """
+        goal = self.get_goal(goal_id)
+        if goal is None:
+            raise ValueError(f"Goal not found: {goal_id}")
+        tasks = goal["dag"].get("tasks", []) if goal["dag"] else []
+        all_done = all(t.get("status") == "completed" for t in tasks)
+        any_failed = any(t.get("status") == "failed" for t in tasks)
+        result = {
+            "achieved": all_done and not any_failed,
+            "all_tasks_done": all_done,
+            "failed_tasks": [t["id"] for t in tasks if t.get("status") == "failed"],
+            "total_cost_usd": goal["progress"]["cost_usd"],
+            "criteria": goal["success_criteria"],
+        }
+        if result["achieved"]:
+            self.update_status(goal_id, GoalStatus.COMPLETED)
+        elif any_failed and not any(
+            t.get("status") in ("pending", "running") for t in tasks
+        ):
+            self.update_status(goal_id, GoalStatus.FAILED)
+        return result
+    def get_next_tasks(self, goal_id: str) -> list[dict]:
+        """DAG에서 현재 실행 가능한 태스크들을 반환한다 (의존성 충족된 것만)."""
+        goal = self.get_goal(goal_id)
+        if not goal or not goal["dag"]:
+            return []
+        tasks = goal["dag"].get("tasks", [])
+        task_map = {t["id"]: t for t in tasks}
+        ready = []
+        for task in tasks:
+            if task.get("status") not in (None, "pending"):
+                continue
+            deps = task.get("depends_on", [])
+            if all(
+                task_map.get(d, {}).get("status") == "completed" for d in deps
+            ):
+                ready.append(task)
+        return ready
+    def cancel_goal(self, goal_id: str) -> dict:
+        """목표를 취소한다."""
+        return self.update_status(goal_id, GoalStatus.CANCELLED)
+    def _save_goal(self, goal: dict):
+        """목표를 파일에 원자적으로 저장한다 (temp → rename)."""
+        path = self.goals_dir / f"{goal['id']}.json"
+        tmp_path = path.with_suffix(".tmp")
+        with open(tmp_path, "w") as f:
+            json.dump(goal, f, indent=2, ensure_ascii=False)
+        os.replace(str(tmp_path), str(path))