gobby 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/__init__.py +3 -0
- gobby/adapters/__init__.py +30 -0
- gobby/adapters/base.py +93 -0
- gobby/adapters/claude_code.py +276 -0
- gobby/adapters/codex.py +1292 -0
- gobby/adapters/gemini.py +343 -0
- gobby/agents/__init__.py +37 -0
- gobby/agents/codex_session.py +120 -0
- gobby/agents/constants.py +112 -0
- gobby/agents/context.py +362 -0
- gobby/agents/definitions.py +133 -0
- gobby/agents/gemini_session.py +111 -0
- gobby/agents/registry.py +618 -0
- gobby/agents/runner.py +968 -0
- gobby/agents/session.py +259 -0
- gobby/agents/spawn.py +916 -0
- gobby/agents/spawners/__init__.py +77 -0
- gobby/agents/spawners/base.py +142 -0
- gobby/agents/spawners/cross_platform.py +266 -0
- gobby/agents/spawners/embedded.py +225 -0
- gobby/agents/spawners/headless.py +226 -0
- gobby/agents/spawners/linux.py +125 -0
- gobby/agents/spawners/macos.py +277 -0
- gobby/agents/spawners/windows.py +308 -0
- gobby/agents/tty_config.py +319 -0
- gobby/autonomous/__init__.py +32 -0
- gobby/autonomous/progress_tracker.py +447 -0
- gobby/autonomous/stop_registry.py +269 -0
- gobby/autonomous/stuck_detector.py +383 -0
- gobby/cli/__init__.py +67 -0
- gobby/cli/__main__.py +8 -0
- gobby/cli/agents.py +529 -0
- gobby/cli/artifacts.py +266 -0
- gobby/cli/daemon.py +329 -0
- gobby/cli/extensions.py +526 -0
- gobby/cli/github.py +263 -0
- gobby/cli/init.py +53 -0
- gobby/cli/install.py +614 -0
- gobby/cli/installers/__init__.py +37 -0
- gobby/cli/installers/antigravity.py +65 -0
- gobby/cli/installers/claude.py +363 -0
- gobby/cli/installers/codex.py +192 -0
- gobby/cli/installers/gemini.py +294 -0
- gobby/cli/installers/git_hooks.py +377 -0
- gobby/cli/installers/shared.py +737 -0
- gobby/cli/linear.py +250 -0
- gobby/cli/mcp.py +30 -0
- gobby/cli/mcp_proxy.py +698 -0
- gobby/cli/memory.py +304 -0
- gobby/cli/merge.py +384 -0
- gobby/cli/projects.py +79 -0
- gobby/cli/sessions.py +622 -0
- gobby/cli/tasks/__init__.py +30 -0
- gobby/cli/tasks/_utils.py +658 -0
- gobby/cli/tasks/ai.py +1025 -0
- gobby/cli/tasks/commits.py +169 -0
- gobby/cli/tasks/crud.py +685 -0
- gobby/cli/tasks/deps.py +135 -0
- gobby/cli/tasks/labels.py +63 -0
- gobby/cli/tasks/main.py +273 -0
- gobby/cli/tasks/search.py +178 -0
- gobby/cli/tui.py +34 -0
- gobby/cli/utils.py +513 -0
- gobby/cli/workflows.py +927 -0
- gobby/cli/worktrees.py +481 -0
- gobby/config/__init__.py +129 -0
- gobby/config/app.py +551 -0
- gobby/config/extensions.py +167 -0
- gobby/config/features.py +472 -0
- gobby/config/llm_providers.py +98 -0
- gobby/config/logging.py +66 -0
- gobby/config/mcp.py +346 -0
- gobby/config/persistence.py +247 -0
- gobby/config/servers.py +141 -0
- gobby/config/sessions.py +250 -0
- gobby/config/tasks.py +784 -0
- gobby/hooks/__init__.py +104 -0
- gobby/hooks/artifact_capture.py +213 -0
- gobby/hooks/broadcaster.py +243 -0
- gobby/hooks/event_handlers.py +723 -0
- gobby/hooks/events.py +218 -0
- gobby/hooks/git.py +169 -0
- gobby/hooks/health_monitor.py +171 -0
- gobby/hooks/hook_manager.py +856 -0
- gobby/hooks/hook_types.py +575 -0
- gobby/hooks/plugins.py +813 -0
- gobby/hooks/session_coordinator.py +396 -0
- gobby/hooks/verification_runner.py +268 -0
- gobby/hooks/webhooks.py +339 -0
- gobby/install/claude/commands/gobby/bug.md +51 -0
- gobby/install/claude/commands/gobby/chore.md +51 -0
- gobby/install/claude/commands/gobby/epic.md +52 -0
- gobby/install/claude/commands/gobby/eval.md +235 -0
- gobby/install/claude/commands/gobby/feat.md +49 -0
- gobby/install/claude/commands/gobby/nit.md +52 -0
- gobby/install/claude/commands/gobby/ref.md +52 -0
- gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
- gobby/install/claude/hooks/hook_dispatcher.py +364 -0
- gobby/install/claude/hooks/validate_settings.py +102 -0
- gobby/install/claude/hooks-template.json +118 -0
- gobby/install/codex/hooks/hook_dispatcher.py +153 -0
- gobby/install/codex/prompts/forget.md +7 -0
- gobby/install/codex/prompts/memories.md +7 -0
- gobby/install/codex/prompts/recall.md +7 -0
- gobby/install/codex/prompts/remember.md +13 -0
- gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
- gobby/install/gemini/hooks-template.json +138 -0
- gobby/install/shared/plugins/code_guardian.py +456 -0
- gobby/install/shared/plugins/example_notify.py +331 -0
- gobby/integrations/__init__.py +10 -0
- gobby/integrations/github.py +145 -0
- gobby/integrations/linear.py +145 -0
- gobby/llm/__init__.py +40 -0
- gobby/llm/base.py +120 -0
- gobby/llm/claude.py +578 -0
- gobby/llm/claude_executor.py +503 -0
- gobby/llm/codex.py +322 -0
- gobby/llm/codex_executor.py +513 -0
- gobby/llm/executor.py +316 -0
- gobby/llm/factory.py +34 -0
- gobby/llm/gemini.py +258 -0
- gobby/llm/gemini_executor.py +339 -0
- gobby/llm/litellm.py +287 -0
- gobby/llm/litellm_executor.py +303 -0
- gobby/llm/resolver.py +499 -0
- gobby/llm/service.py +236 -0
- gobby/mcp_proxy/__init__.py +29 -0
- gobby/mcp_proxy/actions.py +175 -0
- gobby/mcp_proxy/daemon_control.py +198 -0
- gobby/mcp_proxy/importer.py +436 -0
- gobby/mcp_proxy/lazy.py +325 -0
- gobby/mcp_proxy/manager.py +798 -0
- gobby/mcp_proxy/metrics.py +609 -0
- gobby/mcp_proxy/models.py +139 -0
- gobby/mcp_proxy/registries.py +215 -0
- gobby/mcp_proxy/schema_hash.py +381 -0
- gobby/mcp_proxy/semantic_search.py +706 -0
- gobby/mcp_proxy/server.py +549 -0
- gobby/mcp_proxy/services/__init__.py +0 -0
- gobby/mcp_proxy/services/fallback.py +306 -0
- gobby/mcp_proxy/services/recommendation.py +224 -0
- gobby/mcp_proxy/services/server_mgmt.py +214 -0
- gobby/mcp_proxy/services/system.py +72 -0
- gobby/mcp_proxy/services/tool_filter.py +231 -0
- gobby/mcp_proxy/services/tool_proxy.py +309 -0
- gobby/mcp_proxy/stdio.py +565 -0
- gobby/mcp_proxy/tools/__init__.py +27 -0
- gobby/mcp_proxy/tools/agents.py +1103 -0
- gobby/mcp_proxy/tools/artifacts.py +207 -0
- gobby/mcp_proxy/tools/hub.py +335 -0
- gobby/mcp_proxy/tools/internal.py +337 -0
- gobby/mcp_proxy/tools/memory.py +543 -0
- gobby/mcp_proxy/tools/merge.py +422 -0
- gobby/mcp_proxy/tools/metrics.py +283 -0
- gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
- gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
- gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
- gobby/mcp_proxy/tools/orchestration/review.py +736 -0
- gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
- gobby/mcp_proxy/tools/session_messages.py +1056 -0
- gobby/mcp_proxy/tools/task_dependencies.py +219 -0
- gobby/mcp_proxy/tools/task_expansion.py +591 -0
- gobby/mcp_proxy/tools/task_github.py +393 -0
- gobby/mcp_proxy/tools/task_linear.py +379 -0
- gobby/mcp_proxy/tools/task_orchestration.py +77 -0
- gobby/mcp_proxy/tools/task_readiness.py +522 -0
- gobby/mcp_proxy/tools/task_sync.py +351 -0
- gobby/mcp_proxy/tools/task_validation.py +843 -0
- gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
- gobby/mcp_proxy/tools/tasks/_context.py +112 -0
- gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
- gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
- gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
- gobby/mcp_proxy/tools/tasks/_search.py +215 -0
- gobby/mcp_proxy/tools/tasks/_session.py +125 -0
- gobby/mcp_proxy/tools/workflows.py +973 -0
- gobby/mcp_proxy/tools/worktrees.py +1264 -0
- gobby/mcp_proxy/transports/__init__.py +0 -0
- gobby/mcp_proxy/transports/base.py +95 -0
- gobby/mcp_proxy/transports/factory.py +44 -0
- gobby/mcp_proxy/transports/http.py +139 -0
- gobby/mcp_proxy/transports/stdio.py +213 -0
- gobby/mcp_proxy/transports/websocket.py +136 -0
- gobby/memory/backends/__init__.py +116 -0
- gobby/memory/backends/mem0.py +408 -0
- gobby/memory/backends/memu.py +485 -0
- gobby/memory/backends/null.py +111 -0
- gobby/memory/backends/openmemory.py +537 -0
- gobby/memory/backends/sqlite.py +304 -0
- gobby/memory/context.py +87 -0
- gobby/memory/manager.py +1001 -0
- gobby/memory/protocol.py +451 -0
- gobby/memory/search/__init__.py +66 -0
- gobby/memory/search/text.py +127 -0
- gobby/memory/viz.py +258 -0
- gobby/prompts/__init__.py +13 -0
- gobby/prompts/defaults/expansion/system.md +119 -0
- gobby/prompts/defaults/expansion/user.md +48 -0
- gobby/prompts/defaults/external_validation/agent.md +72 -0
- gobby/prompts/defaults/external_validation/external.md +63 -0
- gobby/prompts/defaults/external_validation/spawn.md +83 -0
- gobby/prompts/defaults/external_validation/system.md +6 -0
- gobby/prompts/defaults/features/import_mcp.md +22 -0
- gobby/prompts/defaults/features/import_mcp_github.md +17 -0
- gobby/prompts/defaults/features/import_mcp_search.md +16 -0
- gobby/prompts/defaults/features/recommend_tools.md +32 -0
- gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
- gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
- gobby/prompts/defaults/features/server_description.md +20 -0
- gobby/prompts/defaults/features/server_description_system.md +6 -0
- gobby/prompts/defaults/features/task_description.md +31 -0
- gobby/prompts/defaults/features/task_description_system.md +6 -0
- gobby/prompts/defaults/features/tool_summary.md +17 -0
- gobby/prompts/defaults/features/tool_summary_system.md +6 -0
- gobby/prompts/defaults/research/step.md +58 -0
- gobby/prompts/defaults/validation/criteria.md +47 -0
- gobby/prompts/defaults/validation/validate.md +38 -0
- gobby/prompts/loader.py +346 -0
- gobby/prompts/models.py +113 -0
- gobby/py.typed +0 -0
- gobby/runner.py +488 -0
- gobby/search/__init__.py +23 -0
- gobby/search/protocol.py +104 -0
- gobby/search/tfidf.py +232 -0
- gobby/servers/__init__.py +7 -0
- gobby/servers/http.py +636 -0
- gobby/servers/models.py +31 -0
- gobby/servers/routes/__init__.py +23 -0
- gobby/servers/routes/admin.py +416 -0
- gobby/servers/routes/dependencies.py +118 -0
- gobby/servers/routes/mcp/__init__.py +24 -0
- gobby/servers/routes/mcp/hooks.py +135 -0
- gobby/servers/routes/mcp/plugins.py +121 -0
- gobby/servers/routes/mcp/tools.py +1337 -0
- gobby/servers/routes/mcp/webhooks.py +159 -0
- gobby/servers/routes/sessions.py +582 -0
- gobby/servers/websocket.py +766 -0
- gobby/sessions/__init__.py +13 -0
- gobby/sessions/analyzer.py +322 -0
- gobby/sessions/lifecycle.py +240 -0
- gobby/sessions/manager.py +563 -0
- gobby/sessions/processor.py +225 -0
- gobby/sessions/summary.py +532 -0
- gobby/sessions/transcripts/__init__.py +41 -0
- gobby/sessions/transcripts/base.py +125 -0
- gobby/sessions/transcripts/claude.py +386 -0
- gobby/sessions/transcripts/codex.py +143 -0
- gobby/sessions/transcripts/gemini.py +195 -0
- gobby/storage/__init__.py +21 -0
- gobby/storage/agents.py +409 -0
- gobby/storage/artifact_classifier.py +341 -0
- gobby/storage/artifacts.py +285 -0
- gobby/storage/compaction.py +67 -0
- gobby/storage/database.py +357 -0
- gobby/storage/inter_session_messages.py +194 -0
- gobby/storage/mcp.py +680 -0
- gobby/storage/memories.py +562 -0
- gobby/storage/merge_resolutions.py +550 -0
- gobby/storage/migrations.py +860 -0
- gobby/storage/migrations_legacy.py +1359 -0
- gobby/storage/projects.py +166 -0
- gobby/storage/session_messages.py +251 -0
- gobby/storage/session_tasks.py +97 -0
- gobby/storage/sessions.py +817 -0
- gobby/storage/task_dependencies.py +223 -0
- gobby/storage/tasks/__init__.py +42 -0
- gobby/storage/tasks/_aggregates.py +180 -0
- gobby/storage/tasks/_crud.py +449 -0
- gobby/storage/tasks/_id.py +104 -0
- gobby/storage/tasks/_lifecycle.py +311 -0
- gobby/storage/tasks/_manager.py +889 -0
- gobby/storage/tasks/_models.py +300 -0
- gobby/storage/tasks/_ordering.py +119 -0
- gobby/storage/tasks/_path_cache.py +110 -0
- gobby/storage/tasks/_queries.py +343 -0
- gobby/storage/tasks/_search.py +143 -0
- gobby/storage/workflow_audit.py +393 -0
- gobby/storage/worktrees.py +547 -0
- gobby/sync/__init__.py +29 -0
- gobby/sync/github.py +333 -0
- gobby/sync/linear.py +304 -0
- gobby/sync/memories.py +284 -0
- gobby/sync/tasks.py +641 -0
- gobby/tasks/__init__.py +8 -0
- gobby/tasks/build_verification.py +193 -0
- gobby/tasks/commits.py +633 -0
- gobby/tasks/context.py +747 -0
- gobby/tasks/criteria.py +342 -0
- gobby/tasks/enhanced_validator.py +226 -0
- gobby/tasks/escalation.py +263 -0
- gobby/tasks/expansion.py +626 -0
- gobby/tasks/external_validator.py +764 -0
- gobby/tasks/issue_extraction.py +171 -0
- gobby/tasks/prompts/expand.py +327 -0
- gobby/tasks/research.py +421 -0
- gobby/tasks/tdd.py +352 -0
- gobby/tasks/tree_builder.py +263 -0
- gobby/tasks/validation.py +712 -0
- gobby/tasks/validation_history.py +357 -0
- gobby/tasks/validation_models.py +89 -0
- gobby/tools/__init__.py +0 -0
- gobby/tools/summarizer.py +170 -0
- gobby/tui/__init__.py +5 -0
- gobby/tui/api_client.py +281 -0
- gobby/tui/app.py +327 -0
- gobby/tui/screens/__init__.py +25 -0
- gobby/tui/screens/agents.py +333 -0
- gobby/tui/screens/chat.py +450 -0
- gobby/tui/screens/dashboard.py +377 -0
- gobby/tui/screens/memory.py +305 -0
- gobby/tui/screens/metrics.py +231 -0
- gobby/tui/screens/orchestrator.py +904 -0
- gobby/tui/screens/sessions.py +412 -0
- gobby/tui/screens/tasks.py +442 -0
- gobby/tui/screens/workflows.py +289 -0
- gobby/tui/screens/worktrees.py +174 -0
- gobby/tui/widgets/__init__.py +21 -0
- gobby/tui/widgets/chat.py +210 -0
- gobby/tui/widgets/conductor.py +104 -0
- gobby/tui/widgets/menu.py +132 -0
- gobby/tui/widgets/message_panel.py +160 -0
- gobby/tui/widgets/review_gate.py +224 -0
- gobby/tui/widgets/task_tree.py +99 -0
- gobby/tui/widgets/token_budget.py +166 -0
- gobby/tui/ws_client.py +258 -0
- gobby/utils/__init__.py +3 -0
- gobby/utils/daemon_client.py +235 -0
- gobby/utils/git.py +222 -0
- gobby/utils/id.py +38 -0
- gobby/utils/json_helpers.py +161 -0
- gobby/utils/logging.py +376 -0
- gobby/utils/machine_id.py +135 -0
- gobby/utils/metrics.py +589 -0
- gobby/utils/project_context.py +182 -0
- gobby/utils/project_init.py +263 -0
- gobby/utils/status.py +256 -0
- gobby/utils/validation.py +80 -0
- gobby/utils/version.py +23 -0
- gobby/workflows/__init__.py +4 -0
- gobby/workflows/actions.py +1310 -0
- gobby/workflows/approval_flow.py +138 -0
- gobby/workflows/artifact_actions.py +103 -0
- gobby/workflows/audit_helpers.py +110 -0
- gobby/workflows/autonomous_actions.py +286 -0
- gobby/workflows/context_actions.py +394 -0
- gobby/workflows/definitions.py +130 -0
- gobby/workflows/detection_helpers.py +208 -0
- gobby/workflows/engine.py +485 -0
- gobby/workflows/evaluator.py +669 -0
- gobby/workflows/git_utils.py +96 -0
- gobby/workflows/hooks.py +169 -0
- gobby/workflows/lifecycle_evaluator.py +613 -0
- gobby/workflows/llm_actions.py +70 -0
- gobby/workflows/loader.py +333 -0
- gobby/workflows/mcp_actions.py +60 -0
- gobby/workflows/memory_actions.py +272 -0
- gobby/workflows/premature_stop.py +164 -0
- gobby/workflows/session_actions.py +139 -0
- gobby/workflows/state_actions.py +123 -0
- gobby/workflows/state_manager.py +104 -0
- gobby/workflows/stop_signal_actions.py +163 -0
- gobby/workflows/summary_actions.py +344 -0
- gobby/workflows/task_actions.py +249 -0
- gobby/workflows/task_enforcement_actions.py +901 -0
- gobby/workflows/templates.py +52 -0
- gobby/workflows/todo_actions.py +84 -0
- gobby/workflows/webhook.py +223 -0
- gobby/workflows/webhook_executor.py +399 -0
- gobby/worktrees/__init__.py +5 -0
- gobby/worktrees/git.py +690 -0
- gobby/worktrees/merge/__init__.py +20 -0
- gobby/worktrees/merge/conflict_parser.py +177 -0
- gobby/worktrees/merge/resolver.py +485 -0
- gobby-0.2.5.dist-info/METADATA +351 -0
- gobby-0.2.5.dist-info/RECORD +383 -0
- gobby-0.2.5.dist-info/WHEEL +5 -0
- gobby-0.2.5.dist-info/entry_points.txt +2 -0
- gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
- gobby-0.2.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""External validator for objective task validation.
|
|
2
|
+
|
|
3
|
+
Provides a separate validation path using either:
|
|
4
|
+
1. A fresh LLM context (direct API calls) - mode: "llm"
|
|
5
|
+
2. An in-process agent instance with tools - mode: "agent"
|
|
6
|
+
3. A spawned headless agent process - mode: "spawn"
|
|
7
|
+
|
|
8
|
+
All modes ensure the validator has no prior knowledge of the implementation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import TYPE_CHECKING, Any
|
|
16
|
+
|
|
17
|
+
from gobby.config.app import TaskValidationConfig
|
|
18
|
+
from gobby.llm import LLMService
|
|
19
|
+
from gobby.prompts import PromptLoader
|
|
20
|
+
from gobby.tasks.commits import (
|
|
21
|
+
extract_mentioned_files,
|
|
22
|
+
extract_mentioned_symbols,
|
|
23
|
+
summarize_diff_for_validation,
|
|
24
|
+
)
|
|
25
|
+
from gobby.tasks.issue_extraction import parse_issues_from_response
|
|
26
|
+
from gobby.tasks.validation_models import Issue
|
|
27
|
+
from gobby.utils.json_helpers import extract_json_object
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from typing import Protocol
|
|
31
|
+
|
|
32
|
+
from gobby.agents.runner import AgentRunner
|
|
33
|
+
|
|
34
|
+
class AgentSpawner(Protocol):
|
|
35
|
+
"""Protocol for agent spawning interface (gobby-agents)."""
|
|
36
|
+
|
|
37
|
+
async def start_agent(self, **kwargs: Any) -> dict[str, Any]:
|
|
38
|
+
"""Start a new agent process."""
|
|
39
|
+
...
|
|
40
|
+
|
|
41
|
+
async def get_agent_result(self, agent_id: str, **kwargs: Any) -> dict[str, Any]:
|
|
42
|
+
"""Get the result of a completed agent run."""
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
# Default system prompt for external validators
|
|
49
|
+
DEFAULT_EXTERNAL_SYSTEM_PROMPT = (
|
|
50
|
+
"You are an objective QA validator reviewing code changes. "
|
|
51
|
+
"You have no prior context about this task - evaluate purely based on "
|
|
52
|
+
"the acceptance criteria and the changes provided. "
|
|
53
|
+
"Be thorough but fair in your assessment."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Module-level loader (initialized lazily)
|
|
57
|
+
_loader: PromptLoader | None = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _get_loader(project_dir: Path | None = None) -> PromptLoader:
|
|
61
|
+
"""Get or create the module-level PromptLoader."""
|
|
62
|
+
global _loader
|
|
63
|
+
if _loader is None:
|
|
64
|
+
_loader = PromptLoader(project_dir=project_dir)
|
|
65
|
+
# Register fallbacks for strangler fig pattern
|
|
66
|
+
_loader.register_fallback(
|
|
67
|
+
"external_validation/system", lambda: DEFAULT_EXTERNAL_SYSTEM_PROMPT
|
|
68
|
+
)
|
|
69
|
+
return _loader
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class ExternalValidationResult:
|
|
74
|
+
"""Result from external validation.
|
|
75
|
+
|
|
76
|
+
Used by QA loop to determine next action:
|
|
77
|
+
- status="valid": Task can be marked complete
|
|
78
|
+
- status="invalid": Task should be retried with issues as feedback
|
|
79
|
+
- status="error": Validation failed (timeout, crash, etc.) - may retry or escalate
|
|
80
|
+
- status="skipped": Validation was skipped (disabled in config)
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
status: Validation status - "valid", "invalid", "error", "skipped", or "pending"
|
|
84
|
+
summary: Human-readable summary of validation result
|
|
85
|
+
issues: List of structured issues found (actionable feedback for implementation agent)
|
|
86
|
+
error: Error message if status is "error"
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
status: str
|
|
90
|
+
summary: str
|
|
91
|
+
issues: list[Issue] = field(default_factory=list)
|
|
92
|
+
error: str | None = None
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def passed(self) -> bool:
|
|
96
|
+
"""Whether validation passed (status is 'valid')."""
|
|
97
|
+
return self.status == "valid"
|
|
98
|
+
|
|
99
|
+
def format_issues_for_feedback(self) -> str:
|
|
100
|
+
"""Format issues as actionable feedback for implementation agent.
|
|
101
|
+
|
|
102
|
+
Returns a formatted string suitable for including in a prompt to the
|
|
103
|
+
implementation agent, describing what needs to be fixed.
|
|
104
|
+
"""
|
|
105
|
+
if not self.issues:
|
|
106
|
+
return ""
|
|
107
|
+
|
|
108
|
+
lines = ["## Validation Issues\n"]
|
|
109
|
+
for i, issue in enumerate(self.issues, 1):
|
|
110
|
+
lines.append(f"### Issue {i}: {issue.title}")
|
|
111
|
+
if hasattr(issue, "severity"):
|
|
112
|
+
lines.append(f"**Severity:** {issue.severity}")
|
|
113
|
+
if hasattr(issue, "issue_type"):
|
|
114
|
+
lines.append(f"**Type:** {issue.issue_type}")
|
|
115
|
+
if hasattr(issue, "location") and issue.location:
|
|
116
|
+
lines.append(f"**Location:** {issue.location}")
|
|
117
|
+
if hasattr(issue, "details") and issue.details:
|
|
118
|
+
lines.append(f"\n{issue.details}")
|
|
119
|
+
if hasattr(issue, "suggested_fix") and issue.suggested_fix:
|
|
120
|
+
lines.append(f"\n**Suggested Fix:** {issue.suggested_fix}")
|
|
121
|
+
lines.append("")
|
|
122
|
+
|
|
123
|
+
return "\n".join(lines)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def run_external_validation(
|
|
127
|
+
config: TaskValidationConfig,
|
|
128
|
+
llm_service: LLMService | None,
|
|
129
|
+
task: dict[str, Any],
|
|
130
|
+
changes_context: str,
|
|
131
|
+
force_external: bool = False,
|
|
132
|
+
agent_runner: "AgentRunner | None" = None,
|
|
133
|
+
agent_spawner: "AgentSpawner | None" = None,
|
|
134
|
+
) -> ExternalValidationResult:
|
|
135
|
+
"""Run external validation with a fresh LLM context or agent.
|
|
136
|
+
|
|
137
|
+
Creates a completely fresh validation context without any prior conversation,
|
|
138
|
+
ensuring the validator is objective and has no knowledge of the implementation
|
|
139
|
+
process.
|
|
140
|
+
|
|
141
|
+
Three modes are supported:
|
|
142
|
+
- "llm": Direct LLM API calls (default, backwards compatible)
|
|
143
|
+
- "agent": In-process agent instance with tools for validation
|
|
144
|
+
- "spawn": Spawned headless agent process via gobby-agents
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
config: Validation configuration
|
|
148
|
+
llm_service: LLM service for making requests (used in llm mode)
|
|
149
|
+
task: Task dictionary with id, title, description, validation_criteria
|
|
150
|
+
changes_context: Code changes to validate (typically a git diff)
|
|
151
|
+
force_external: If True, run external validation even if config.use_external_validator is False
|
|
152
|
+
agent_runner: Agent runner for in-process validation (required for agent mode)
|
|
153
|
+
agent_spawner: Agent spawner interface for headless agents (required for spawn mode)
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
ExternalValidationResult with status, summary, and any issues found
|
|
157
|
+
"""
|
|
158
|
+
# Check if external validation should be skipped
|
|
159
|
+
if not force_external and not config.use_external_validator:
|
|
160
|
+
return ExternalValidationResult(
|
|
161
|
+
status="skipped",
|
|
162
|
+
summary="External validation skipped (disabled in config)",
|
|
163
|
+
issues=[],
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Dispatch based on mode
|
|
167
|
+
mode = getattr(config, "external_validator_mode", "llm")
|
|
168
|
+
|
|
169
|
+
if mode == "spawn":
|
|
170
|
+
return await _run_spawn_validation(
|
|
171
|
+
config=config,
|
|
172
|
+
task=task,
|
|
173
|
+
changes_context=changes_context,
|
|
174
|
+
agent_spawner=agent_spawner,
|
|
175
|
+
)
|
|
176
|
+
elif mode == "agent":
|
|
177
|
+
return await _run_agent_validation(
|
|
178
|
+
config=config,
|
|
179
|
+
task=task,
|
|
180
|
+
changes_context=changes_context,
|
|
181
|
+
agent_runner=agent_runner,
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
if llm_service is None:
|
|
185
|
+
return ExternalValidationResult(
|
|
186
|
+
status="error",
|
|
187
|
+
summary="External validation requires llm_service for 'llm' mode",
|
|
188
|
+
issues=[],
|
|
189
|
+
)
|
|
190
|
+
return await _run_llm_validation(
|
|
191
|
+
config=config,
|
|
192
|
+
llm_service=llm_service,
|
|
193
|
+
task=task,
|
|
194
|
+
changes_context=changes_context,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def _run_llm_validation(
|
|
199
|
+
config: TaskValidationConfig,
|
|
200
|
+
llm_service: LLMService,
|
|
201
|
+
task: dict[str, Any],
|
|
202
|
+
changes_context: str,
|
|
203
|
+
) -> ExternalValidationResult:
|
|
204
|
+
"""Run validation using direct LLM API calls.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
config: Validation configuration
|
|
208
|
+
llm_service: LLM service for making requests
|
|
209
|
+
task: Task dictionary
|
|
210
|
+
changes_context: Code changes to validate
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
ExternalValidationResult
|
|
214
|
+
"""
|
|
215
|
+
# Determine which model to use
|
|
216
|
+
model = config.external_validator_model or config.model
|
|
217
|
+
|
|
218
|
+
# Build the validation prompt
|
|
219
|
+
prompt = _build_external_validation_prompt(task, changes_context)
|
|
220
|
+
|
|
221
|
+
# System prompt emphasizing objectivity
|
|
222
|
+
system_prompt = (
|
|
223
|
+
"You are an objective QA validator reviewing code changes. "
|
|
224
|
+
"You have no prior context about this task - evaluate purely based on "
|
|
225
|
+
"the acceptance criteria and the changes provided. "
|
|
226
|
+
"Be thorough but fair in your assessment."
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
provider = llm_service.get_provider(config.provider)
|
|
231
|
+
response = await provider.generate_text(
|
|
232
|
+
prompt=prompt,
|
|
233
|
+
system_prompt=system_prompt,
|
|
234
|
+
model=model,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return _parse_external_validation_response(response)
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
logger.error(f"External validation failed: {e}")
|
|
241
|
+
return ExternalValidationResult(
|
|
242
|
+
status="error",
|
|
243
|
+
summary=f"External validation failed: {str(e)}",
|
|
244
|
+
issues=[],
|
|
245
|
+
error=str(e),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
async def _run_agent_validation(
|
|
250
|
+
config: TaskValidationConfig,
|
|
251
|
+
task: dict[str, Any],
|
|
252
|
+
changes_context: str,
|
|
253
|
+
agent_runner: "AgentRunner | None" = None,
|
|
254
|
+
) -> ExternalValidationResult:
|
|
255
|
+
"""Run validation by spawning an agent instance.
|
|
256
|
+
|
|
257
|
+
Spawns a headless agent that can use tools to validate the implementation.
|
|
258
|
+
This provides more thorough validation as the agent can read files,
|
|
259
|
+
run commands, etc.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
config: Validation configuration
|
|
263
|
+
task: Task dictionary
|
|
264
|
+
changes_context: Code changes to validate
|
|
265
|
+
agent_runner: Agent runner for spawning agents
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
ExternalValidationResult
|
|
269
|
+
"""
|
|
270
|
+
if not agent_runner:
|
|
271
|
+
logger.warning("Agent validation requested but no agent runner available")
|
|
272
|
+
return ExternalValidationResult(
|
|
273
|
+
status="error",
|
|
274
|
+
summary="Agent validation not available (no agent runner)",
|
|
275
|
+
issues=[],
|
|
276
|
+
error="Agent runner required for agent mode",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
from gobby.agents.runner import AgentConfig
|
|
281
|
+
|
|
282
|
+
# Build prompt for validation agent
|
|
283
|
+
prompt = _build_agent_validation_prompt(task, changes_context)
|
|
284
|
+
|
|
285
|
+
# Create agent config for in-process execution
|
|
286
|
+
agent_config = AgentConfig(
|
|
287
|
+
prompt=prompt,
|
|
288
|
+
mode="in_process", # Run in-process for direct result access
|
|
289
|
+
max_turns=20,
|
|
290
|
+
timeout=120.0,
|
|
291
|
+
source="external_validator",
|
|
292
|
+
model=config.external_validator_model or config.model,
|
|
293
|
+
provider=config.provider,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Run the agent directly
|
|
297
|
+
result = await agent_runner.run(agent_config)
|
|
298
|
+
|
|
299
|
+
# Parse the agent's output
|
|
300
|
+
if result.status == "error":
|
|
301
|
+
return ExternalValidationResult(
|
|
302
|
+
status="error",
|
|
303
|
+
summary=f"Validation agent failed: {result.error or 'Unknown error'}",
|
|
304
|
+
issues=[],
|
|
305
|
+
error=result.error,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# Parse the agent's response for validation verdict
|
|
309
|
+
return _parse_external_validation_response(result.output or "")
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logger.error(f"Agent validation failed: {e}")
|
|
313
|
+
return ExternalValidationResult(
|
|
314
|
+
status="error",
|
|
315
|
+
summary=f"Agent validation failed: {str(e)}",
|
|
316
|
+
issues=[],
|
|
317
|
+
error=str(e),
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
async def _run_spawn_validation(
|
|
322
|
+
config: TaskValidationConfig,
|
|
323
|
+
task: dict[str, Any],
|
|
324
|
+
changes_context: str,
|
|
325
|
+
agent_spawner: "AgentSpawner | None" = None,
|
|
326
|
+
) -> ExternalValidationResult:
|
|
327
|
+
"""Run validation by spawning a separate headless agent process.
|
|
328
|
+
|
|
329
|
+
Spawns a completely separate agent process via gobby-agents.start_agent.
|
|
330
|
+
This ensures the validator has no shared state with the implementation agent
|
|
331
|
+
and runs in a fresh context.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
config: Validation configuration
|
|
335
|
+
task: Task dictionary
|
|
336
|
+
changes_context: Code changes to validate
|
|
337
|
+
agent_spawner: Agent spawner interface (gobby-agents)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
ExternalValidationResult
|
|
341
|
+
"""
|
|
342
|
+
if not agent_spawner:
|
|
343
|
+
logger.warning("Spawn validation requested but no agent spawner available")
|
|
344
|
+
return ExternalValidationResult(
|
|
345
|
+
status="error",
|
|
346
|
+
summary="Spawn validation not available (no agent spawner)",
|
|
347
|
+
issues=[],
|
|
348
|
+
error="Agent spawner required for spawn mode",
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
# Build validation prompt with objective instructions
|
|
353
|
+
prompt = _build_spawn_validation_prompt(task, changes_context)
|
|
354
|
+
|
|
355
|
+
# Determine model to use
|
|
356
|
+
model = config.external_validator_model or config.model
|
|
357
|
+
|
|
358
|
+
# Spawn a headless agent with no parent context
|
|
359
|
+
spawn_result = await agent_spawner.start_agent(
|
|
360
|
+
prompt=prompt,
|
|
361
|
+
mode="headless",
|
|
362
|
+
model=model,
|
|
363
|
+
provider=config.provider,
|
|
364
|
+
max_turns=5, # Validation should be quick
|
|
365
|
+
timeout=120.0,
|
|
366
|
+
# Critical: no parent session context to ensure fresh context
|
|
367
|
+
parent_session_id=None,
|
|
368
|
+
session_context=None,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if not spawn_result.get("success"):
|
|
372
|
+
error_msg = spawn_result.get("error", "Failed to spawn validation agent")
|
|
373
|
+
logger.error(f"Failed to spawn validation agent: {error_msg}")
|
|
374
|
+
return ExternalValidationResult(
|
|
375
|
+
status="error",
|
|
376
|
+
summary=f"Failed to spawn validation agent: {error_msg}",
|
|
377
|
+
issues=[],
|
|
378
|
+
error=error_msg,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
agent_id = spawn_result.get("agent_id")
|
|
382
|
+
if not agent_id:
|
|
383
|
+
return ExternalValidationResult(
|
|
384
|
+
status="error",
|
|
385
|
+
summary="Spawn succeeded but no agent_id returned",
|
|
386
|
+
issues=[],
|
|
387
|
+
error="No agent_id in spawn result",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Poll for agent completion
|
|
391
|
+
result = await agent_spawner.get_agent_result(agent_id)
|
|
392
|
+
|
|
393
|
+
if not result.get("success"):
|
|
394
|
+
status = result.get("status", "error")
|
|
395
|
+
error_msg = result.get("error", "Agent execution failed")
|
|
396
|
+
|
|
397
|
+
if status == "timeout":
|
|
398
|
+
return ExternalValidationResult(
|
|
399
|
+
status="error",
|
|
400
|
+
summary=f"Validation agent timed out: {error_msg}",
|
|
401
|
+
issues=[],
|
|
402
|
+
error=error_msg,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
return ExternalValidationResult(
|
|
406
|
+
status="error",
|
|
407
|
+
summary=f"Validation agent failed: {error_msg}",
|
|
408
|
+
issues=[],
|
|
409
|
+
error=error_msg,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Parse the agent's output
|
|
413
|
+
output = result.get("output", "")
|
|
414
|
+
return _parse_external_validation_response(output)
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"Spawn validation failed: {e}")
|
|
418
|
+
return ExternalValidationResult(
|
|
419
|
+
status="error",
|
|
420
|
+
summary=f"Spawn validation failed: {str(e)}",
|
|
421
|
+
issues=[],
|
|
422
|
+
error=str(e),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _build_spawn_validation_prompt(
|
|
427
|
+
task: dict[str, Any],
|
|
428
|
+
changes_context: str,
|
|
429
|
+
) -> str:
|
|
430
|
+
"""Build the validation prompt for spawn mode.
|
|
431
|
+
|
|
432
|
+
Creates a prompt that instructs the spawned agent to be objective
|
|
433
|
+
and adversarial in its validation.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
task: Task dictionary
|
|
437
|
+
changes_context: Code changes to validate
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Formatted prompt string
|
|
441
|
+
"""
|
|
442
|
+
task_id = task.get("id", "unknown")
|
|
443
|
+
task_title = task.get("title", "Unknown Task")
|
|
444
|
+
task_description = task.get("description", "")
|
|
445
|
+
validation_criteria = task.get("validation_criteria", "")
|
|
446
|
+
category = task.get("category", "")
|
|
447
|
+
|
|
448
|
+
# Extract files mentioned in the task for prioritization
|
|
449
|
+
priority_files = extract_mentioned_files(task)
|
|
450
|
+
|
|
451
|
+
# Summarize diff with priority files for better context
|
|
452
|
+
summarized_changes = summarize_diff_for_validation(
|
|
453
|
+
changes_context, priority_files=priority_files if priority_files else None
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# Build criteria section
|
|
457
|
+
if validation_criteria:
|
|
458
|
+
criteria_section = f"Acceptance Criteria:\n{validation_criteria}"
|
|
459
|
+
elif task_description:
|
|
460
|
+
criteria_section = f"Task Description:\n{task_description}"
|
|
461
|
+
else:
|
|
462
|
+
criteria_section = "No specific criteria provided. Evaluate for general correctness."
|
|
463
|
+
|
|
464
|
+
# Build category section
|
|
465
|
+
category_section = ""
|
|
466
|
+
if category:
|
|
467
|
+
category_section = f"\n\n## Task Category\n{category}"
|
|
468
|
+
|
|
469
|
+
# Build priority files section
|
|
470
|
+
priority_section = ""
|
|
471
|
+
if priority_files:
|
|
472
|
+
priority_section = (
|
|
473
|
+
f"\n\n**Prioritized files based on task description:** {', '.join(priority_files)}"
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Extract symbols mentioned in the task for verification
|
|
477
|
+
mentioned_symbols = extract_mentioned_symbols(task)
|
|
478
|
+
symbol_section = ""
|
|
479
|
+
if mentioned_symbols:
|
|
480
|
+
symbol_section = f"\n\n**Key symbols to verify in the changes:** {', '.join(mentioned_symbols)}\nVerify these specific functions/classes are present and correctly implemented."
|
|
481
|
+
|
|
482
|
+
prompt = f"""You are an OBJECTIVE and ADVERSARIAL QA validator.
|
|
483
|
+
|
|
484
|
+
## Critical Instructions
|
|
485
|
+
- You have NO prior context about this task or its implementation
|
|
486
|
+
- Do NOT assume the implementation is correct
|
|
487
|
+
- Verify each criterion INDEPENDENTLY
|
|
488
|
+
- Be CRITICAL - look for what's missing or broken
|
|
489
|
+
- Your role is to find problems, not to approve
|
|
490
|
+
|
|
491
|
+
## Task Being Validated
|
|
492
|
+
ID: {task_id}
|
|
493
|
+
Title: {task_title}
|
|
494
|
+
|
|
495
|
+
{criteria_section}{category_section}{priority_section}{symbol_section}
|
|
496
|
+
|
|
497
|
+
## Code Changes to Validate
|
|
498
|
+
{summarized_changes}
|
|
499
|
+
|
|
500
|
+
## Validation Process
|
|
501
|
+
1. Review each acceptance criterion one by one
|
|
502
|
+
2. Check if the code changes actually satisfy each criterion
|
|
503
|
+
3. Look for edge cases, missing error handling, security issues
|
|
504
|
+
4. Verify tests exist and cover the requirements
|
|
505
|
+
5. Be thorough and skeptical
|
|
506
|
+
|
|
507
|
+
## Required Output
|
|
508
|
+
After your analysis, provide your verdict as a JSON object:
|
|
509
|
+
|
|
510
|
+
```json
|
|
511
|
+
{{
|
|
512
|
+
"status": "valid" | "invalid",
|
|
513
|
+
"summary": "Brief assessment explaining your verdict",
|
|
514
|
+
"issues": [
|
|
515
|
+
{{
|
|
516
|
+
"type": "acceptance_gap|test_failure|lint_error|type_error|security",
|
|
517
|
+
"severity": "blocker|major|minor",
|
|
518
|
+
"title": "Brief description of the issue",
|
|
519
|
+
"location": "file:line (if applicable)",
|
|
520
|
+
"details": "Full explanation of the problem",
|
|
521
|
+
"suggested_fix": "How to resolve (if known)"
|
|
522
|
+
}}
|
|
523
|
+
]
|
|
524
|
+
}}
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
If ALL criteria are FULLY met with no issues, return status "valid".
|
|
528
|
+
If there are ANY problems or gaps, return status "invalid" with detailed issues.
|
|
529
|
+
|
|
530
|
+
Begin your validation now. Be critical and thorough.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
return prompt
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _build_agent_validation_prompt(
|
|
537
|
+
task: dict[str, Any],
|
|
538
|
+
changes_context: str,
|
|
539
|
+
) -> str:
|
|
540
|
+
"""Build the validation prompt for agent mode.
|
|
541
|
+
|
|
542
|
+
The agent prompt is more comprehensive as the agent can use tools.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
task: Task dictionary
|
|
546
|
+
changes_context: Code changes to validate
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Formatted prompt string
|
|
550
|
+
"""
|
|
551
|
+
task_title = task.get("title", "Unknown Task")
|
|
552
|
+
task_description = task.get("description", "")
|
|
553
|
+
validation_criteria = task.get("validation_criteria", "")
|
|
554
|
+
|
|
555
|
+
# Extract files mentioned in the task for prioritization
|
|
556
|
+
priority_files = extract_mentioned_files(task)
|
|
557
|
+
|
|
558
|
+
# Summarize diff with priority files for better context
|
|
559
|
+
summarized_changes = summarize_diff_for_validation(
|
|
560
|
+
changes_context, priority_files=priority_files if priority_files else None
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
# Build criteria section
|
|
564
|
+
if validation_criteria:
|
|
565
|
+
criteria_section = f"Acceptance Criteria:\n{validation_criteria}"
|
|
566
|
+
elif task_description:
|
|
567
|
+
criteria_section = f"Task Description:\n{task_description}"
|
|
568
|
+
else:
|
|
569
|
+
criteria_section = "No specific criteria provided. Evaluate for general correctness."
|
|
570
|
+
|
|
571
|
+
# Build priority files section
|
|
572
|
+
priority_section = ""
|
|
573
|
+
if priority_files:
|
|
574
|
+
priority_section = (
|
|
575
|
+
f"\n\n**Prioritized files based on task description:** {', '.join(priority_files)}"
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Extract symbols mentioned in the task for verification
|
|
579
|
+
mentioned_symbols = extract_mentioned_symbols(task)
|
|
580
|
+
symbol_section = ""
|
|
581
|
+
if mentioned_symbols:
|
|
582
|
+
symbol_section = f"\n\n**Key symbols to verify in the changes:** {', '.join(mentioned_symbols)}\nVerify these specific functions/classes are present and correctly implemented."
|
|
583
|
+
|
|
584
|
+
prompt = f"""You are an objective QA validator. You have NO prior context about this task.
|
|
585
|
+
|
|
586
|
+
## Your Role
|
|
587
|
+
Validate whether the code changes satisfy the acceptance criteria. You have access to tools to:
|
|
588
|
+
- Read files to verify implementation details
|
|
589
|
+
- Run tests if needed
|
|
590
|
+
- Check for common issues
|
|
591
|
+
|
|
592
|
+
## Task Being Validated
|
|
593
|
+
Title: {task_title}
|
|
594
|
+
|
|
595
|
+
{criteria_section}{priority_section}{symbol_section}
|
|
596
|
+
|
|
597
|
+
## Code Changes to Validate
|
|
598
|
+
{summarized_changes}
|
|
599
|
+
|
|
600
|
+
## Instructions
|
|
601
|
+
1. Review the changes against the acceptance criteria
|
|
602
|
+
2. Use tools if needed to verify specific requirements
|
|
603
|
+
3. Check for correctness, completeness, and potential issues
|
|
604
|
+
4. Be objective and thorough
|
|
605
|
+
|
|
606
|
+
## Required Output
|
|
607
|
+
After your analysis, provide your verdict as a JSON object:
|
|
608
|
+
|
|
609
|
+
```json
|
|
610
|
+
{{
|
|
611
|
+
"status": "valid" | "invalid",
|
|
612
|
+
"summary": "Brief assessment of the changes",
|
|
613
|
+
"issues": [
|
|
614
|
+
{{
|
|
615
|
+
"type": "acceptance_gap|test_failure|lint_error|type_error|security",
|
|
616
|
+
"severity": "blocker|major|minor",
|
|
617
|
+
"title": "Brief description",
|
|
618
|
+
"location": "file:line (if applicable)",
|
|
619
|
+
"details": "Full explanation",
|
|
620
|
+
"suggested_fix": "How to resolve (if applicable)"
|
|
621
|
+
}}
|
|
622
|
+
]
|
|
623
|
+
}}
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
If all criteria are met, return status "valid" with an empty issues array.
|
|
627
|
+
If there are problems, return status "invalid" with detailed issues.
|
|
628
|
+
|
|
629
|
+
Begin your validation now.
|
|
630
|
+
"""
|
|
631
|
+
|
|
632
|
+
return prompt
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _build_external_validation_prompt(
|
|
636
|
+
task: dict[str, Any],
|
|
637
|
+
changes_context: str,
|
|
638
|
+
) -> str:
|
|
639
|
+
"""Build the external validation prompt.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
task: Task dictionary
|
|
643
|
+
changes_context: Code changes to validate
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
Formatted prompt string
|
|
647
|
+
"""
|
|
648
|
+
task_title = task.get("title", "Unknown Task")
|
|
649
|
+
task_description = task.get("description", "")
|
|
650
|
+
validation_criteria = task.get("validation_criteria", "")
|
|
651
|
+
|
|
652
|
+
# Extract files mentioned in the task for prioritization
|
|
653
|
+
priority_files = extract_mentioned_files(task)
|
|
654
|
+
|
|
655
|
+
# Summarize diff with priority files for better context
|
|
656
|
+
summarized_changes = summarize_diff_for_validation(
|
|
657
|
+
changes_context, priority_files=priority_files if priority_files else None
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
# Build criteria section
|
|
661
|
+
if validation_criteria:
|
|
662
|
+
criteria_section = f"Acceptance Criteria:\n{validation_criteria}"
|
|
663
|
+
elif task_description:
|
|
664
|
+
criteria_section = f"Task Description:\n{task_description}"
|
|
665
|
+
else:
|
|
666
|
+
criteria_section = "No specific criteria provided. Evaluate for general correctness."
|
|
667
|
+
|
|
668
|
+
# Build priority files section
|
|
669
|
+
priority_section = ""
|
|
670
|
+
if priority_files:
|
|
671
|
+
priority_section = (
|
|
672
|
+
f"\n\n**Prioritized files based on task description:** {', '.join(priority_files)}"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# Extract symbols mentioned in the task for verification
|
|
676
|
+
mentioned_symbols = extract_mentioned_symbols(task)
|
|
677
|
+
symbol_section = ""
|
|
678
|
+
if mentioned_symbols:
|
|
679
|
+
symbol_section = f"\n\n**Key symbols to verify in the changes:** {', '.join(mentioned_symbols)}\nVerify these specific functions/classes are present and correctly implemented."
|
|
680
|
+
|
|
681
|
+
prompt = f"""You are reviewing code changes for the following task.
|
|
682
|
+
|
|
683
|
+
## Task
|
|
684
|
+
Title: {task_title}
|
|
685
|
+
|
|
686
|
+
{criteria_section}{priority_section}{symbol_section}
|
|
687
|
+
|
|
688
|
+
## Code Changes to Validate
|
|
689
|
+
{summarized_changes}
|
|
690
|
+
|
|
691
|
+
## Instructions
|
|
692
|
+
1. Review each change against the acceptance criteria
|
|
693
|
+
2. Check for correctness, completeness, and potential issues
|
|
694
|
+
3. Be objective - you have no prior context about this implementation
|
|
695
|
+
|
|
696
|
+
## Output Format
|
|
697
|
+
Return your assessment as a JSON object:
|
|
698
|
+
|
|
699
|
+
```json
|
|
700
|
+
{{
|
|
701
|
+
"status": "valid" | "invalid",
|
|
702
|
+
"summary": "Brief assessment of the changes",
|
|
703
|
+
"issues": [
|
|
704
|
+
{{
|
|
705
|
+
"type": "acceptance_gap|test_failure|lint_error|type_error|security",
|
|
706
|
+
"severity": "blocker|major|minor",
|
|
707
|
+
"title": "Brief description",
|
|
708
|
+
"location": "file:line (if applicable)",
|
|
709
|
+
"details": "Full explanation",
|
|
710
|
+
"suggested_fix": "How to resolve (if applicable)"
|
|
711
|
+
}}
|
|
712
|
+
]
|
|
713
|
+
}}
|
|
714
|
+
```
|
|
715
|
+
|
|
716
|
+
If all criteria are met, return status "valid" with an empty issues array.
|
|
717
|
+
If there are problems, return status "invalid" with detailed issues.
|
|
718
|
+
"""
|
|
719
|
+
|
|
720
|
+
return prompt
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _parse_external_validation_response(response: str) -> ExternalValidationResult:
|
|
724
|
+
"""Parse the external validation response.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
response: Raw LLM response
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
ExternalValidationResult
|
|
731
|
+
"""
|
|
732
|
+
if not response or not response.strip():
|
|
733
|
+
return ExternalValidationResult(
|
|
734
|
+
status="error",
|
|
735
|
+
summary="Empty response from validator",
|
|
736
|
+
issues=[],
|
|
737
|
+
error="Empty response",
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
# Extract JSON from response using shared utility
|
|
741
|
+
data = extract_json_object(response)
|
|
742
|
+
if data is None:
|
|
743
|
+
logger.warning("Failed to parse external validation response")
|
|
744
|
+
return ExternalValidationResult(
|
|
745
|
+
status="error",
|
|
746
|
+
summary="Failed to parse validator response",
|
|
747
|
+
issues=[],
|
|
748
|
+
error="No valid JSON found in response",
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
# Extract fields
|
|
752
|
+
status = data.get("status", "pending")
|
|
753
|
+
summary = data.get("summary", "")
|
|
754
|
+
|
|
755
|
+
# Parse issues using the issue extraction module
|
|
756
|
+
# Reconstruct the response with issues for parsing
|
|
757
|
+
issues_response = json.dumps({"issues": data.get("issues", [])})
|
|
758
|
+
issues = parse_issues_from_response(issues_response)
|
|
759
|
+
|
|
760
|
+
return ExternalValidationResult(
|
|
761
|
+
status=status,
|
|
762
|
+
summary=summary,
|
|
763
|
+
issues=issues,
|
|
764
|
+
)
|