claude-controller 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +2 -2
  2. package/bin/autoloop.sh +382 -0
  3. package/bin/ctl +1189 -0
  4. package/bin/native-app.py +6 -3
  5. package/bin/watchdog.sh +357 -0
  6. package/cognitive/__init__.py +14 -0
  7. package/cognitive/__pycache__/__init__.cpython-314.pyc +0 -0
  8. package/cognitive/__pycache__/dispatcher.cpython-314.pyc +0 -0
  9. package/cognitive/__pycache__/evaluator.cpython-314.pyc +0 -0
  10. package/cognitive/__pycache__/goal_engine.cpython-314.pyc +0 -0
  11. package/cognitive/__pycache__/learning.cpython-314.pyc +0 -0
  12. package/cognitive/__pycache__/orchestrator.cpython-314.pyc +0 -0
  13. package/cognitive/__pycache__/planner.cpython-314.pyc +0 -0
  14. package/cognitive/dispatcher.py +192 -0
  15. package/cognitive/evaluator.py +289 -0
  16. package/cognitive/goal_engine.py +232 -0
  17. package/cognitive/learning.py +189 -0
  18. package/cognitive/orchestrator.py +303 -0
  19. package/cognitive/planner.py +207 -0
  20. package/cognitive/prompts/analyst.md +31 -0
  21. package/cognitive/prompts/coder.md +22 -0
  22. package/cognitive/prompts/reviewer.md +33 -0
  23. package/cognitive/prompts/tester.md +21 -0
  24. package/cognitive/prompts/writer.md +25 -0
  25. package/config.sh +6 -1
  26. package/dag/__init__.py +5 -0
  27. package/dag/__pycache__/__init__.cpython-314.pyc +0 -0
  28. package/dag/__pycache__/graph.cpython-314.pyc +0 -0
  29. package/dag/graph.py +222 -0
  30. package/lib/jobs.sh +12 -1
  31. package/package.json +11 -5
  32. package/postinstall.sh +1 -1
  33. package/service/controller.sh +43 -11
  34. package/web/audit.py +122 -0
  35. package/web/checkpoint.py +80 -0
  36. package/web/config.py +2 -5
  37. package/web/handler.py +634 -473
  38. package/web/handler_fs.py +153 -0
  39. package/web/handler_goals.py +203 -0
  40. package/web/handler_jobs.py +372 -0
  41. package/web/handler_memory.py +203 -0
  42. package/web/handler_sessions.py +132 -0
  43. package/web/jobs.py +585 -13
  44. package/web/personas.py +419 -0
  45. package/web/pipeline.py +981 -0
  46. package/web/presets.py +506 -0
  47. package/web/projects.py +246 -0
  48. package/web/static/api.js +141 -0
  49. package/web/static/app.js +25 -1937
  50. package/web/static/attachments.js +144 -0
  51. package/web/static/base.css +497 -0
  52. package/web/static/context.js +204 -0
  53. package/web/static/dirs.js +246 -0
  54. package/web/static/form.css +763 -0
  55. package/web/static/goals.css +363 -0
  56. package/web/static/goals.js +300 -0
  57. package/web/static/i18n.js +625 -0
  58. package/web/static/index.html +215 -13
  59. package/web/static/{styles.css → jobs.css} +746 -1141
  60. package/web/static/jobs.js +1270 -0
  61. package/web/static/memoryview.js +117 -0
  62. package/web/static/personas.js +228 -0
  63. package/web/static/pipeline.css +338 -0
  64. package/web/static/pipelines.js +487 -0
  65. package/web/static/presets.js +244 -0
  66. package/web/static/send.js +135 -0
  67. package/web/static/settings-style.css +291 -0
  68. package/web/static/settings.js +81 -0
  69. package/web/static/stream.js +534 -0
  70. package/web/static/utils.js +131 -0
  71. package/web/webhook.py +210 -0
@@ -0,0 +1,289 @@
1
+ """
2
+ Evaluator — 자동 평가기
3
+ Worker 산출물의 품질을 자동으로 검증한다.
4
+
5
+ 평가 파이프라인:
6
+ 1. 정적 분석 (lint, type check)
7
+ 2. 테스트 실행
8
+ 3. AI 코드 리뷰 (Reviewer Worker)
9
+ 4. 성공 기준 검증
10
+
11
+ Gate 모드에서는 각 단계 후 사용자 승인을 요청한다.
12
+ """
13
+
14
+ import json
15
+ import os
16
+ import subprocess
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+
22
+ @dataclass
23
+ class EvalResult:
24
+ """단일 평가 단계의 결과."""
25
+ step: str
26
+ passed: bool
27
+ details: str
28
+ score: float = 0.0 # 0.0 ~ 1.0
29
+
30
+
31
+ @dataclass
32
+ class EvaluationReport:
33
+ """전체 평가 보고서."""
34
+ goal_id: str
35
+ task_id: Optional[str]
36
+ results: list[EvalResult] = field(default_factory=list)
37
+ overall_pass: bool = False
38
+ summary: str = ""
39
+
40
+ @property
41
+ def total_score(self) -> float:
42
+ if not self.results:
43
+ return 0.0
44
+ return sum(r.score for r in self.results) / len(self.results)
45
+
46
+ def to_dict(self) -> dict:
47
+ return {
48
+ "goal_id": self.goal_id,
49
+ "task_id": self.task_id,
50
+ "overall_pass": self.overall_pass,
51
+ "total_score": round(self.total_score, 2),
52
+ "summary": self.summary,
53
+ "results": [
54
+ {
55
+ "step": r.step,
56
+ "passed": r.passed,
57
+ "details": r.details,
58
+ "score": r.score,
59
+ }
60
+ for r in self.results
61
+ ],
62
+ }
63
+
64
+
65
+ class Evaluator:
66
+ """태스크 산출물을 자동으로 평가한다."""
67
+
68
+ def __init__(self, claude_bin: str, cwd: str):
69
+ self.claude_bin = claude_bin
70
+ self.cwd = cwd
71
+
72
+ def evaluate_task(
73
+ self,
74
+ goal_id: str,
75
+ task_id: str,
76
+ worker_type: str,
77
+ changed_files: list[str] = None,
78
+ ) -> EvaluationReport:
79
+ """개별 태스크의 결과를 평가한다."""
80
+ report = EvaluationReport(goal_id=goal_id, task_id=task_id)
81
+
82
+ # Worker 유형에 따른 평가 단계 선택
83
+ if worker_type == "coder":
84
+ self._eval_lint(report, changed_files)
85
+ self._eval_tests(report)
86
+ elif worker_type == "tester":
87
+ self._eval_tests(report)
88
+ elif worker_type == "reviewer":
89
+ # Reviewer 자체는 평가 생략
90
+ report.results.append(EvalResult(
91
+ step="review_complete",
92
+ passed=True,
93
+ details="리뷰 완료",
94
+ score=1.0,
95
+ ))
96
+
97
+ report.overall_pass = all(r.passed for r in report.results)
98
+ report.summary = self._generate_summary(report)
99
+ return report
100
+
101
+ def evaluate_goal(
102
+ self,
103
+ goal_id: str,
104
+ success_criteria: list[str],
105
+ ) -> EvaluationReport:
106
+ """목표 전체의 성공 기준을 검증한다.
107
+
108
+ Claude에게 성공 기준 목록을 주고 각각 충족 여부를 판단하게 한다.
109
+ """
110
+ report = EvaluationReport(goal_id=goal_id, task_id=None)
111
+
112
+ prompt = self._build_criteria_prompt(success_criteria)
113
+ result = self._call_claude_eval(prompt)
114
+
115
+ try:
116
+ data = json.loads(result)
117
+ for criterion in data.get("criteria", []):
118
+ report.results.append(EvalResult(
119
+ step=f"criterion: {criterion['name']}",
120
+ passed=criterion.get("met", False),
121
+ details=criterion.get("reason", ""),
122
+ score=1.0 if criterion.get("met") else 0.0,
123
+ ))
124
+ except (json.JSONDecodeError, KeyError):
125
+ report.results.append(EvalResult(
126
+ step="criteria_parse",
127
+ passed=False,
128
+ details=f"평가 응답 파싱 실패: {result[:200]}",
129
+ score=0.0,
130
+ ))
131
+
132
+ report.overall_pass = all(r.passed for r in report.results)
133
+ report.summary = self._generate_summary(report)
134
+ return report
135
+
136
+ def _eval_lint(self, report: EvaluationReport, changed_files: list[str] = None):
137
+ """린트/정적 분석을 실행한다."""
138
+ # 프로젝트에서 사용 가능한 린터 감지
139
+ checks = []
140
+
141
+ # Python: ruff 또는 flake8
142
+ if self._has_command("ruff"):
143
+ checks.append(("ruff check .", "ruff"))
144
+ elif self._has_command("flake8"):
145
+ checks.append(("flake8 .", "flake8"))
146
+
147
+ # JavaScript/TypeScript: eslint
148
+ if Path(self.cwd, "node_modules/.bin/eslint").exists():
149
+ checks.append(("npx eslint .", "eslint"))
150
+
151
+ if not checks:
152
+ report.results.append(EvalResult(
153
+ step="lint",
154
+ passed=True,
155
+ details="린터 미설치 — 건너뜀",
156
+ score=0.5,
157
+ ))
158
+ return
159
+
160
+ for cmd, name in checks:
161
+ try:
162
+ result = subprocess.run(
163
+ cmd, shell=True, cwd=self.cwd,
164
+ capture_output=True, text=True, timeout=60,
165
+ )
166
+ passed = result.returncode == 0
167
+ report.results.append(EvalResult(
168
+ step=f"lint_{name}",
169
+ passed=passed,
170
+ details=result.stdout[:500] if not passed else "통과",
171
+ score=1.0 if passed else 0.0,
172
+ ))
173
+ except subprocess.TimeoutExpired:
174
+ report.results.append(EvalResult(
175
+ step=f"lint_{name}",
176
+ passed=False,
177
+ details="타임아웃 (60초)",
178
+ score=0.0,
179
+ ))
180
+
181
+ def _eval_tests(self, report: EvaluationReport):
182
+ """테스트를 실행한다."""
183
+ test_cmds = []
184
+
185
+ # 프로젝트 유형에 따른 테스트 명령 감지
186
+ if Path(self.cwd, "pytest.ini").exists() or Path(self.cwd, "pyproject.toml").exists():
187
+ test_cmds.append(("python -m pytest --tb=short -q", "pytest"))
188
+ if Path(self.cwd, "package.json").exists():
189
+ test_cmds.append(("npm test", "npm_test"))
190
+
191
+ if not test_cmds:
192
+ report.results.append(EvalResult(
193
+ step="test",
194
+ passed=True,
195
+ details="테스트 설정 없음 — 건너뜀",
196
+ score=0.5,
197
+ ))
198
+ return
199
+
200
+ for cmd, name in test_cmds:
201
+ try:
202
+ result = subprocess.run(
203
+ cmd, shell=True, cwd=self.cwd,
204
+ capture_output=True, text=True, timeout=120,
205
+ )
206
+ passed = result.returncode == 0
207
+ report.results.append(EvalResult(
208
+ step=f"test_{name}",
209
+ passed=passed,
210
+ details=result.stdout[-500:] if not passed else "모든 테스트 통과",
211
+ score=1.0 if passed else 0.0,
212
+ ))
213
+ except subprocess.TimeoutExpired:
214
+ report.results.append(EvalResult(
215
+ step=f"test_{name}",
216
+ passed=False,
217
+ details="타임아웃 (120초)",
218
+ score=0.0,
219
+ ))
220
+
221
+ def _build_criteria_prompt(self, criteria: list[str]) -> str:
222
+ """성공 기준 검증용 프롬프트를 생성한다."""
223
+ criteria_text = "\n".join(f"- {c}" for c in criteria)
224
+ return f"""다음 성공 기준의 충족 여부를 코드베이스를 분석하여 판단하세요.
225
+
226
+ ## 성공 기준
227
+ {criteria_text}
228
+
229
+ ## 출력 형식 (JSON만)
230
+ ```json
231
+ {{
232
+ "criteria": [
233
+ {{"name": "기준 내용", "met": true/false, "reason": "판단 근거"}}
234
+ ]
235
+ }}
236
+ ```
237
+ """
238
+
239
+ def _call_claude_eval(self, prompt: str) -> str:
240
+ """Claude를 호출하여 평가를 수행한다."""
241
+ cmd = [
242
+ self.claude_bin,
243
+ "-p", prompt,
244
+ "--output-format", "json",
245
+ "--allowedTools", "Read,Glob,Grep,Bash",
246
+ ]
247
+
248
+ result = subprocess.run(
249
+ cmd, cwd=self.cwd,
250
+ capture_output=True, text=True,
251
+ timeout=180,
252
+ )
253
+
254
+ if result.returncode != 0:
255
+ return json.dumps({"criteria": []})
256
+
257
+ # JSON 응답에서 텍스트 추출
258
+ try:
259
+ outer = json.loads(result.stdout)
260
+ if "result" in outer:
261
+ return outer["result"]
262
+ for block in outer.get("content", []):
263
+ if block.get("type") == "text":
264
+ return block["text"]
265
+ except (json.JSONDecodeError, TypeError):
266
+ pass
267
+
268
+ return result.stdout
269
+
270
+ def _generate_summary(self, report: EvaluationReport) -> str:
271
+ """평가 보고서의 요약을 생성한다."""
272
+ total = len(report.results)
273
+ passed = sum(1 for r in report.results if r.passed)
274
+ failed_steps = [r.step for r in report.results if not r.passed]
275
+
276
+ if report.overall_pass:
277
+ return f"모든 평가 통과 ({passed}/{total})"
278
+ else:
279
+ return f"평가 실패 ({passed}/{total}) — 실패 항목: {', '.join(failed_steps)}"
280
+
281
+ def _has_command(self, cmd: str) -> bool:
282
+ """시스템에 명령어가 존재하는지 확인한다."""
283
+ try:
284
+ subprocess.run(
285
+ ["which", cmd], capture_output=True, timeout=5,
286
+ )
287
+ return True
288
+ except (subprocess.TimeoutExpired, FileNotFoundError):
289
+ return False
@@ -0,0 +1,232 @@
1
+ """
2
+ Goal Engine — 목표 관리자
3
+ 추상적 목표를 구조화하고, 진행 상태를 추적하며, 완료 조건을 판단한다.
4
+
5
+ 사용 흐름:
6
+ 1. create_goal("테스트 커버리지 80%로 올려") → goal_id
7
+ 2. Planner가 DAG 생성 → attach_dag(goal_id, dag)
8
+ 3. Dispatcher가 실행 → update_task_status(goal_id, task_id, status)
9
+ 4. 모든 태스크 완료 → evaluate_completion(goal_id) → True/False
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import time
15
+ import uuid
16
+ from enum import Enum
17
+ from pathlib import Path
18
+ from typing import Optional
19
+
20
+
21
+ class GoalStatus(str, Enum):
22
+ PENDING = "pending" # 생성됨, 계획 미수립
23
+ PLANNING = "planning" # Planner가 DAG 생성 중
24
+ READY = "ready" # DAG 생성 완료, 실행 대기
25
+ RUNNING = "running" # 태스크 실행 중
26
+ GATE_WAITING = "gate_waiting" # Gate 모드: 사용자 승인 대기
27
+ EVALUATING = "evaluating" # Evaluator가 결과 검증 중
28
+ COMPLETED = "completed" # 목표 달성
29
+ FAILED = "failed" # 목표 달성 실패
30
+ CANCELLED = "cancelled" # 사용자가 취소
31
+
32
+
33
+ class ExecutionMode(str, Enum):
34
+ FULL_AUTO = "full_auto" # 완전 자율
35
+ GATE = "gate" # 단계별 승인
36
+ WATCH = "watch" # 자율 + 관찰/중단 가능
37
+ PAIR = "pair" # 태스크별 공동 리뷰
38
+
39
+
40
+ class GoalEngine:
41
+ """목표 생성, 상태 추적, 완료 판단을 담당하는 엔진."""
42
+
43
+ def __init__(self, data_dir: str):
44
+ self.data_dir = Path(data_dir)
45
+ self.goals_dir = self.data_dir / "goals"
46
+ self.goals_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ def create_goal(
49
+ self,
50
+ objective: str,
51
+ mode: ExecutionMode = ExecutionMode.GATE,
52
+ context: Optional[dict] = None,
53
+ budget_usd: float = 5.0,
54
+ max_tasks: int = 20,
55
+ ) -> dict:
56
+ """새 목표를 생성한다.
57
+
58
+ Args:
59
+ objective: 자연어 목표 ("테스트 커버리지를 80%로 올려")
60
+ mode: 실행 모드
61
+ context: 추가 맥락 (cwd, target_files 등)
62
+ budget_usd: 비용 상한 (초과 시 자동 중단)
63
+ max_tasks: 최대 태스크 수
64
+ Returns:
65
+ 생성된 목표 dict
66
+ """
67
+ goal_id = f"goal-{int(time.time())}-{uuid.uuid4().hex[:8]}"
68
+ goal = {
69
+ "id": goal_id,
70
+ "objective": objective,
71
+ "mode": mode.value,
72
+ "status": GoalStatus.PENDING.value,
73
+ "context": context or {},
74
+ "budget_usd": budget_usd,
75
+ "max_tasks": max_tasks,
76
+ "success_criteria": [], # Planner가 채움
77
+ "dag": None, # Planner가 생성한 DAG
78
+ "progress": {
79
+ "total_tasks": 0,
80
+ "completed_tasks": 0,
81
+ "failed_tasks": 0,
82
+ "cost_usd": 0.0,
83
+ },
84
+ "memory_refs": [], # 이 목표 실행 중 참조/생성된 메모리 ID
85
+ "created_at": time.time(),
86
+ "updated_at": time.time(),
87
+ "completed_at": None,
88
+ }
89
+ self._save_goal(goal)
90
+ return goal
91
+
92
+ def get_goal(self, goal_id: str) -> Optional[dict]:
93
+ """목표를 조회한다."""
94
+ path = self.goals_dir / f"{goal_id}.json"
95
+ if not path.exists():
96
+ return None
97
+ with open(path) as f:
98
+ return json.load(f)
99
+
100
+ def list_goals(self, status: Optional[str] = None) -> list[dict]:
101
+ """목표 목록을 반환한다. status 필터 가능."""
102
+ goals = []
103
+ for path in sorted(self.goals_dir.glob("goal-*.json"), reverse=True):
104
+ with open(path) as f:
105
+ goal = json.load(f)
106
+ if status is None or goal["status"] == status:
107
+ goals.append(goal)
108
+ return goals
109
+
110
+ def update_status(self, goal_id: str, status: GoalStatus) -> dict:
111
+ """목표 상태를 변경한다."""
112
+ goal = self.get_goal(goal_id)
113
+ if goal is None:
114
+ raise ValueError(f"Goal not found: {goal_id}")
115
+ goal["status"] = status.value
116
+ goal["updated_at"] = time.time()
117
+ if status in (GoalStatus.COMPLETED, GoalStatus.FAILED, GoalStatus.CANCELLED):
118
+ goal["completed_at"] = time.time()
119
+ self._save_goal(goal)
120
+ return goal
121
+
122
+ def attach_dag(self, goal_id: str, dag: dict, success_criteria: list[str]) -> dict:
123
+ """Planner가 생성한 DAG와 성공 기준을 목표에 연결한다."""
124
+ goal = self.get_goal(goal_id)
125
+ if goal is None:
126
+ raise ValueError(f"Goal not found: {goal_id}")
127
+ goal["dag"] = dag
128
+ goal["success_criteria"] = success_criteria
129
+ goal["progress"]["total_tasks"] = len(dag.get("tasks", []))
130
+ goal["status"] = GoalStatus.READY.value
131
+ goal["updated_at"] = time.time()
132
+ self._save_goal(goal)
133
+ return goal
134
+
135
+ def update_task_status(
136
+ self, goal_id: str, task_id: str, status: str, cost_usd: float = 0.0
137
+ ) -> dict:
138
+ """DAG 내 개별 태스크의 상태를 갱신하고 진행률을 재계산한다."""
139
+ goal = self.get_goal(goal_id)
140
+ if goal is None:
141
+ raise ValueError(f"Goal not found: {goal_id}")
142
+
143
+ # DAG 내 태스크 상태 갱신
144
+ if goal["dag"]:
145
+ for task in goal["dag"].get("tasks", []):
146
+ if task["id"] == task_id:
147
+ task["status"] = status
148
+ task["cost_usd"] = task.get("cost_usd", 0) + cost_usd
149
+ break
150
+
151
+ # 진행률 재계산
152
+ tasks = goal["dag"].get("tasks", []) if goal["dag"] else []
153
+ goal["progress"]["completed_tasks"] = sum(
154
+ 1 for t in tasks if t.get("status") == "completed"
155
+ )
156
+ goal["progress"]["failed_tasks"] = sum(
157
+ 1 for t in tasks if t.get("status") == "failed"
158
+ )
159
+ goal["progress"]["cost_usd"] += cost_usd
160
+ goal["updated_at"] = time.time()
161
+
162
+ # 예산 초과 확인
163
+ if goal["progress"]["cost_usd"] > goal["budget_usd"]:
164
+ goal["status"] = GoalStatus.FAILED.value
165
+ goal["completed_at"] = time.time()
166
+
167
+ self._save_goal(goal)
168
+ return goal
169
+
170
+ def evaluate_completion(self, goal_id: str) -> dict:
171
+ """목표 달성 여부를 판단한다.
172
+
173
+ Returns:
174
+ { "achieved": bool, "criteria_results": [...], "summary": str }
175
+ """
176
+ goal = self.get_goal(goal_id)
177
+ if goal is None:
178
+ raise ValueError(f"Goal not found: {goal_id}")
179
+
180
+ tasks = goal["dag"].get("tasks", []) if goal["dag"] else []
181
+ all_done = all(t.get("status") == "completed" for t in tasks)
182
+ any_failed = any(t.get("status") == "failed" for t in tasks)
183
+
184
+ result = {
185
+ "achieved": all_done and not any_failed,
186
+ "all_tasks_done": all_done,
187
+ "failed_tasks": [t["id"] for t in tasks if t.get("status") == "failed"],
188
+ "total_cost_usd": goal["progress"]["cost_usd"],
189
+ "criteria": goal["success_criteria"],
190
+ }
191
+
192
+ if result["achieved"]:
193
+ self.update_status(goal_id, GoalStatus.COMPLETED)
194
+ elif any_failed and not any(
195
+ t.get("status") in ("pending", "running") for t in tasks
196
+ ):
197
+ self.update_status(goal_id, GoalStatus.FAILED)
198
+
199
+ return result
200
+
201
+ def get_next_tasks(self, goal_id: str) -> list[dict]:
202
+ """DAG에서 현재 실행 가능한 태스크들을 반환한다 (의존성 충족된 것만)."""
203
+ goal = self.get_goal(goal_id)
204
+ if not goal or not goal["dag"]:
205
+ return []
206
+
207
+ tasks = goal["dag"].get("tasks", [])
208
+ task_map = {t["id"]: t for t in tasks}
209
+ ready = []
210
+
211
+ for task in tasks:
212
+ if task.get("status") not in (None, "pending"):
213
+ continue
214
+ deps = task.get("depends_on", [])
215
+ if all(
216
+ task_map.get(d, {}).get("status") == "completed" for d in deps
217
+ ):
218
+ ready.append(task)
219
+
220
+ return ready
221
+
222
+ def cancel_goal(self, goal_id: str) -> dict:
223
+ """목표를 취소한다."""
224
+ return self.update_status(goal_id, GoalStatus.CANCELLED)
225
+
226
+ def _save_goal(self, goal: dict):
227
+ """목표를 파일에 원자적으로 저장한다 (temp → rename)."""
228
+ path = self.goals_dir / f"{goal['id']}.json"
229
+ tmp_path = path.with_suffix(".tmp")
230
+ with open(tmp_path, "w") as f:
231
+ json.dump(goal, f, indent=2, ensure_ascii=False)
232
+ os.replace(str(tmp_path), str(path))