gobby 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/__init__.py +3 -0
- gobby/adapters/__init__.py +30 -0
- gobby/adapters/base.py +93 -0
- gobby/adapters/claude_code.py +276 -0
- gobby/adapters/codex.py +1292 -0
- gobby/adapters/gemini.py +343 -0
- gobby/agents/__init__.py +37 -0
- gobby/agents/codex_session.py +120 -0
- gobby/agents/constants.py +112 -0
- gobby/agents/context.py +362 -0
- gobby/agents/definitions.py +133 -0
- gobby/agents/gemini_session.py +111 -0
- gobby/agents/registry.py +618 -0
- gobby/agents/runner.py +968 -0
- gobby/agents/session.py +259 -0
- gobby/agents/spawn.py +916 -0
- gobby/agents/spawners/__init__.py +77 -0
- gobby/agents/spawners/base.py +142 -0
- gobby/agents/spawners/cross_platform.py +266 -0
- gobby/agents/spawners/embedded.py +225 -0
- gobby/agents/spawners/headless.py +226 -0
- gobby/agents/spawners/linux.py +125 -0
- gobby/agents/spawners/macos.py +277 -0
- gobby/agents/spawners/windows.py +308 -0
- gobby/agents/tty_config.py +319 -0
- gobby/autonomous/__init__.py +32 -0
- gobby/autonomous/progress_tracker.py +447 -0
- gobby/autonomous/stop_registry.py +269 -0
- gobby/autonomous/stuck_detector.py +383 -0
- gobby/cli/__init__.py +67 -0
- gobby/cli/__main__.py +8 -0
- gobby/cli/agents.py +529 -0
- gobby/cli/artifacts.py +266 -0
- gobby/cli/daemon.py +329 -0
- gobby/cli/extensions.py +526 -0
- gobby/cli/github.py +263 -0
- gobby/cli/init.py +53 -0
- gobby/cli/install.py +614 -0
- gobby/cli/installers/__init__.py +37 -0
- gobby/cli/installers/antigravity.py +65 -0
- gobby/cli/installers/claude.py +363 -0
- gobby/cli/installers/codex.py +192 -0
- gobby/cli/installers/gemini.py +294 -0
- gobby/cli/installers/git_hooks.py +377 -0
- gobby/cli/installers/shared.py +737 -0
- gobby/cli/linear.py +250 -0
- gobby/cli/mcp.py +30 -0
- gobby/cli/mcp_proxy.py +698 -0
- gobby/cli/memory.py +304 -0
- gobby/cli/merge.py +384 -0
- gobby/cli/projects.py +79 -0
- gobby/cli/sessions.py +622 -0
- gobby/cli/tasks/__init__.py +30 -0
- gobby/cli/tasks/_utils.py +658 -0
- gobby/cli/tasks/ai.py +1025 -0
- gobby/cli/tasks/commits.py +169 -0
- gobby/cli/tasks/crud.py +685 -0
- gobby/cli/tasks/deps.py +135 -0
- gobby/cli/tasks/labels.py +63 -0
- gobby/cli/tasks/main.py +273 -0
- gobby/cli/tasks/search.py +178 -0
- gobby/cli/tui.py +34 -0
- gobby/cli/utils.py +513 -0
- gobby/cli/workflows.py +927 -0
- gobby/cli/worktrees.py +481 -0
- gobby/config/__init__.py +129 -0
- gobby/config/app.py +551 -0
- gobby/config/extensions.py +167 -0
- gobby/config/features.py +472 -0
- gobby/config/llm_providers.py +98 -0
- gobby/config/logging.py +66 -0
- gobby/config/mcp.py +346 -0
- gobby/config/persistence.py +247 -0
- gobby/config/servers.py +141 -0
- gobby/config/sessions.py +250 -0
- gobby/config/tasks.py +784 -0
- gobby/hooks/__init__.py +104 -0
- gobby/hooks/artifact_capture.py +213 -0
- gobby/hooks/broadcaster.py +243 -0
- gobby/hooks/event_handlers.py +723 -0
- gobby/hooks/events.py +218 -0
- gobby/hooks/git.py +169 -0
- gobby/hooks/health_monitor.py +171 -0
- gobby/hooks/hook_manager.py +856 -0
- gobby/hooks/hook_types.py +575 -0
- gobby/hooks/plugins.py +813 -0
- gobby/hooks/session_coordinator.py +396 -0
- gobby/hooks/verification_runner.py +268 -0
- gobby/hooks/webhooks.py +339 -0
- gobby/install/claude/commands/gobby/bug.md +51 -0
- gobby/install/claude/commands/gobby/chore.md +51 -0
- gobby/install/claude/commands/gobby/epic.md +52 -0
- gobby/install/claude/commands/gobby/eval.md +235 -0
- gobby/install/claude/commands/gobby/feat.md +49 -0
- gobby/install/claude/commands/gobby/nit.md +52 -0
- gobby/install/claude/commands/gobby/ref.md +52 -0
- gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
- gobby/install/claude/hooks/hook_dispatcher.py +364 -0
- gobby/install/claude/hooks/validate_settings.py +102 -0
- gobby/install/claude/hooks-template.json +118 -0
- gobby/install/codex/hooks/hook_dispatcher.py +153 -0
- gobby/install/codex/prompts/forget.md +7 -0
- gobby/install/codex/prompts/memories.md +7 -0
- gobby/install/codex/prompts/recall.md +7 -0
- gobby/install/codex/prompts/remember.md +13 -0
- gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
- gobby/install/gemini/hooks-template.json +138 -0
- gobby/install/shared/plugins/code_guardian.py +456 -0
- gobby/install/shared/plugins/example_notify.py +331 -0
- gobby/integrations/__init__.py +10 -0
- gobby/integrations/github.py +145 -0
- gobby/integrations/linear.py +145 -0
- gobby/llm/__init__.py +40 -0
- gobby/llm/base.py +120 -0
- gobby/llm/claude.py +578 -0
- gobby/llm/claude_executor.py +503 -0
- gobby/llm/codex.py +322 -0
- gobby/llm/codex_executor.py +513 -0
- gobby/llm/executor.py +316 -0
- gobby/llm/factory.py +34 -0
- gobby/llm/gemini.py +258 -0
- gobby/llm/gemini_executor.py +339 -0
- gobby/llm/litellm.py +287 -0
- gobby/llm/litellm_executor.py +303 -0
- gobby/llm/resolver.py +499 -0
- gobby/llm/service.py +236 -0
- gobby/mcp_proxy/__init__.py +29 -0
- gobby/mcp_proxy/actions.py +175 -0
- gobby/mcp_proxy/daemon_control.py +198 -0
- gobby/mcp_proxy/importer.py +436 -0
- gobby/mcp_proxy/lazy.py +325 -0
- gobby/mcp_proxy/manager.py +798 -0
- gobby/mcp_proxy/metrics.py +609 -0
- gobby/mcp_proxy/models.py +139 -0
- gobby/mcp_proxy/registries.py +215 -0
- gobby/mcp_proxy/schema_hash.py +381 -0
- gobby/mcp_proxy/semantic_search.py +706 -0
- gobby/mcp_proxy/server.py +549 -0
- gobby/mcp_proxy/services/__init__.py +0 -0
- gobby/mcp_proxy/services/fallback.py +306 -0
- gobby/mcp_proxy/services/recommendation.py +224 -0
- gobby/mcp_proxy/services/server_mgmt.py +214 -0
- gobby/mcp_proxy/services/system.py +72 -0
- gobby/mcp_proxy/services/tool_filter.py +231 -0
- gobby/mcp_proxy/services/tool_proxy.py +309 -0
- gobby/mcp_proxy/stdio.py +565 -0
- gobby/mcp_proxy/tools/__init__.py +27 -0
- gobby/mcp_proxy/tools/agents.py +1103 -0
- gobby/mcp_proxy/tools/artifacts.py +207 -0
- gobby/mcp_proxy/tools/hub.py +335 -0
- gobby/mcp_proxy/tools/internal.py +337 -0
- gobby/mcp_proxy/tools/memory.py +543 -0
- gobby/mcp_proxy/tools/merge.py +422 -0
- gobby/mcp_proxy/tools/metrics.py +283 -0
- gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
- gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
- gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
- gobby/mcp_proxy/tools/orchestration/review.py +736 -0
- gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
- gobby/mcp_proxy/tools/session_messages.py +1056 -0
- gobby/mcp_proxy/tools/task_dependencies.py +219 -0
- gobby/mcp_proxy/tools/task_expansion.py +591 -0
- gobby/mcp_proxy/tools/task_github.py +393 -0
- gobby/mcp_proxy/tools/task_linear.py +379 -0
- gobby/mcp_proxy/tools/task_orchestration.py +77 -0
- gobby/mcp_proxy/tools/task_readiness.py +522 -0
- gobby/mcp_proxy/tools/task_sync.py +351 -0
- gobby/mcp_proxy/tools/task_validation.py +843 -0
- gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
- gobby/mcp_proxy/tools/tasks/_context.py +112 -0
- gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
- gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
- gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
- gobby/mcp_proxy/tools/tasks/_search.py +215 -0
- gobby/mcp_proxy/tools/tasks/_session.py +125 -0
- gobby/mcp_proxy/tools/workflows.py +973 -0
- gobby/mcp_proxy/tools/worktrees.py +1264 -0
- gobby/mcp_proxy/transports/__init__.py +0 -0
- gobby/mcp_proxy/transports/base.py +95 -0
- gobby/mcp_proxy/transports/factory.py +44 -0
- gobby/mcp_proxy/transports/http.py +139 -0
- gobby/mcp_proxy/transports/stdio.py +213 -0
- gobby/mcp_proxy/transports/websocket.py +136 -0
- gobby/memory/backends/__init__.py +116 -0
- gobby/memory/backends/mem0.py +408 -0
- gobby/memory/backends/memu.py +485 -0
- gobby/memory/backends/null.py +111 -0
- gobby/memory/backends/openmemory.py +537 -0
- gobby/memory/backends/sqlite.py +304 -0
- gobby/memory/context.py +87 -0
- gobby/memory/manager.py +1001 -0
- gobby/memory/protocol.py +451 -0
- gobby/memory/search/__init__.py +66 -0
- gobby/memory/search/text.py +127 -0
- gobby/memory/viz.py +258 -0
- gobby/prompts/__init__.py +13 -0
- gobby/prompts/defaults/expansion/system.md +119 -0
- gobby/prompts/defaults/expansion/user.md +48 -0
- gobby/prompts/defaults/external_validation/agent.md +72 -0
- gobby/prompts/defaults/external_validation/external.md +63 -0
- gobby/prompts/defaults/external_validation/spawn.md +83 -0
- gobby/prompts/defaults/external_validation/system.md +6 -0
- gobby/prompts/defaults/features/import_mcp.md +22 -0
- gobby/prompts/defaults/features/import_mcp_github.md +17 -0
- gobby/prompts/defaults/features/import_mcp_search.md +16 -0
- gobby/prompts/defaults/features/recommend_tools.md +32 -0
- gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
- gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
- gobby/prompts/defaults/features/server_description.md +20 -0
- gobby/prompts/defaults/features/server_description_system.md +6 -0
- gobby/prompts/defaults/features/task_description.md +31 -0
- gobby/prompts/defaults/features/task_description_system.md +6 -0
- gobby/prompts/defaults/features/tool_summary.md +17 -0
- gobby/prompts/defaults/features/tool_summary_system.md +6 -0
- gobby/prompts/defaults/research/step.md +58 -0
- gobby/prompts/defaults/validation/criteria.md +47 -0
- gobby/prompts/defaults/validation/validate.md +38 -0
- gobby/prompts/loader.py +346 -0
- gobby/prompts/models.py +113 -0
- gobby/py.typed +0 -0
- gobby/runner.py +488 -0
- gobby/search/__init__.py +23 -0
- gobby/search/protocol.py +104 -0
- gobby/search/tfidf.py +232 -0
- gobby/servers/__init__.py +7 -0
- gobby/servers/http.py +636 -0
- gobby/servers/models.py +31 -0
- gobby/servers/routes/__init__.py +23 -0
- gobby/servers/routes/admin.py +416 -0
- gobby/servers/routes/dependencies.py +118 -0
- gobby/servers/routes/mcp/__init__.py +24 -0
- gobby/servers/routes/mcp/hooks.py +135 -0
- gobby/servers/routes/mcp/plugins.py +121 -0
- gobby/servers/routes/mcp/tools.py +1337 -0
- gobby/servers/routes/mcp/webhooks.py +159 -0
- gobby/servers/routes/sessions.py +582 -0
- gobby/servers/websocket.py +766 -0
- gobby/sessions/__init__.py +13 -0
- gobby/sessions/analyzer.py +322 -0
- gobby/sessions/lifecycle.py +240 -0
- gobby/sessions/manager.py +563 -0
- gobby/sessions/processor.py +225 -0
- gobby/sessions/summary.py +532 -0
- gobby/sessions/transcripts/__init__.py +41 -0
- gobby/sessions/transcripts/base.py +125 -0
- gobby/sessions/transcripts/claude.py +386 -0
- gobby/sessions/transcripts/codex.py +143 -0
- gobby/sessions/transcripts/gemini.py +195 -0
- gobby/storage/__init__.py +21 -0
- gobby/storage/agents.py +409 -0
- gobby/storage/artifact_classifier.py +341 -0
- gobby/storage/artifacts.py +285 -0
- gobby/storage/compaction.py +67 -0
- gobby/storage/database.py +357 -0
- gobby/storage/inter_session_messages.py +194 -0
- gobby/storage/mcp.py +680 -0
- gobby/storage/memories.py +562 -0
- gobby/storage/merge_resolutions.py +550 -0
- gobby/storage/migrations.py +860 -0
- gobby/storage/migrations_legacy.py +1359 -0
- gobby/storage/projects.py +166 -0
- gobby/storage/session_messages.py +251 -0
- gobby/storage/session_tasks.py +97 -0
- gobby/storage/sessions.py +817 -0
- gobby/storage/task_dependencies.py +223 -0
- gobby/storage/tasks/__init__.py +42 -0
- gobby/storage/tasks/_aggregates.py +180 -0
- gobby/storage/tasks/_crud.py +449 -0
- gobby/storage/tasks/_id.py +104 -0
- gobby/storage/tasks/_lifecycle.py +311 -0
- gobby/storage/tasks/_manager.py +889 -0
- gobby/storage/tasks/_models.py +300 -0
- gobby/storage/tasks/_ordering.py +119 -0
- gobby/storage/tasks/_path_cache.py +110 -0
- gobby/storage/tasks/_queries.py +343 -0
- gobby/storage/tasks/_search.py +143 -0
- gobby/storage/workflow_audit.py +393 -0
- gobby/storage/worktrees.py +547 -0
- gobby/sync/__init__.py +29 -0
- gobby/sync/github.py +333 -0
- gobby/sync/linear.py +304 -0
- gobby/sync/memories.py +284 -0
- gobby/sync/tasks.py +641 -0
- gobby/tasks/__init__.py +8 -0
- gobby/tasks/build_verification.py +193 -0
- gobby/tasks/commits.py +633 -0
- gobby/tasks/context.py +747 -0
- gobby/tasks/criteria.py +342 -0
- gobby/tasks/enhanced_validator.py +226 -0
- gobby/tasks/escalation.py +263 -0
- gobby/tasks/expansion.py +626 -0
- gobby/tasks/external_validator.py +764 -0
- gobby/tasks/issue_extraction.py +171 -0
- gobby/tasks/prompts/expand.py +327 -0
- gobby/tasks/research.py +421 -0
- gobby/tasks/tdd.py +352 -0
- gobby/tasks/tree_builder.py +263 -0
- gobby/tasks/validation.py +712 -0
- gobby/tasks/validation_history.py +357 -0
- gobby/tasks/validation_models.py +89 -0
- gobby/tools/__init__.py +0 -0
- gobby/tools/summarizer.py +170 -0
- gobby/tui/__init__.py +5 -0
- gobby/tui/api_client.py +281 -0
- gobby/tui/app.py +327 -0
- gobby/tui/screens/__init__.py +25 -0
- gobby/tui/screens/agents.py +333 -0
- gobby/tui/screens/chat.py +450 -0
- gobby/tui/screens/dashboard.py +377 -0
- gobby/tui/screens/memory.py +305 -0
- gobby/tui/screens/metrics.py +231 -0
- gobby/tui/screens/orchestrator.py +904 -0
- gobby/tui/screens/sessions.py +412 -0
- gobby/tui/screens/tasks.py +442 -0
- gobby/tui/screens/workflows.py +289 -0
- gobby/tui/screens/worktrees.py +174 -0
- gobby/tui/widgets/__init__.py +21 -0
- gobby/tui/widgets/chat.py +210 -0
- gobby/tui/widgets/conductor.py +104 -0
- gobby/tui/widgets/menu.py +132 -0
- gobby/tui/widgets/message_panel.py +160 -0
- gobby/tui/widgets/review_gate.py +224 -0
- gobby/tui/widgets/task_tree.py +99 -0
- gobby/tui/widgets/token_budget.py +166 -0
- gobby/tui/ws_client.py +258 -0
- gobby/utils/__init__.py +3 -0
- gobby/utils/daemon_client.py +235 -0
- gobby/utils/git.py +222 -0
- gobby/utils/id.py +38 -0
- gobby/utils/json_helpers.py +161 -0
- gobby/utils/logging.py +376 -0
- gobby/utils/machine_id.py +135 -0
- gobby/utils/metrics.py +589 -0
- gobby/utils/project_context.py +182 -0
- gobby/utils/project_init.py +263 -0
- gobby/utils/status.py +256 -0
- gobby/utils/validation.py +80 -0
- gobby/utils/version.py +23 -0
- gobby/workflows/__init__.py +4 -0
- gobby/workflows/actions.py +1310 -0
- gobby/workflows/approval_flow.py +138 -0
- gobby/workflows/artifact_actions.py +103 -0
- gobby/workflows/audit_helpers.py +110 -0
- gobby/workflows/autonomous_actions.py +286 -0
- gobby/workflows/context_actions.py +394 -0
- gobby/workflows/definitions.py +130 -0
- gobby/workflows/detection_helpers.py +208 -0
- gobby/workflows/engine.py +485 -0
- gobby/workflows/evaluator.py +669 -0
- gobby/workflows/git_utils.py +96 -0
- gobby/workflows/hooks.py +169 -0
- gobby/workflows/lifecycle_evaluator.py +613 -0
- gobby/workflows/llm_actions.py +70 -0
- gobby/workflows/loader.py +333 -0
- gobby/workflows/mcp_actions.py +60 -0
- gobby/workflows/memory_actions.py +272 -0
- gobby/workflows/premature_stop.py +164 -0
- gobby/workflows/session_actions.py +139 -0
- gobby/workflows/state_actions.py +123 -0
- gobby/workflows/state_manager.py +104 -0
- gobby/workflows/stop_signal_actions.py +163 -0
- gobby/workflows/summary_actions.py +344 -0
- gobby/workflows/task_actions.py +249 -0
- gobby/workflows/task_enforcement_actions.py +901 -0
- gobby/workflows/templates.py +52 -0
- gobby/workflows/todo_actions.py +84 -0
- gobby/workflows/webhook.py +223 -0
- gobby/workflows/webhook_executor.py +399 -0
- gobby/worktrees/__init__.py +5 -0
- gobby/worktrees/git.py +690 -0
- gobby/worktrees/merge/__init__.py +20 -0
- gobby/worktrees/merge/conflict_parser.py +177 -0
- gobby/worktrees/merge/resolver.py +485 -0
- gobby-0.2.5.dist-info/METADATA +351 -0
- gobby-0.2.5.dist-info/RECORD +383 -0
- gobby-0.2.5.dist-info/WHEEL +5 -0
- gobby-0.2.5.dist-info/entry_points.txt +2 -0
- gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
- gobby-0.2.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Task validation module.
|
|
3
|
+
|
|
4
|
+
Handles validating task completion against acceptance criteria
|
|
5
|
+
using LLM providers.
|
|
6
|
+
|
|
7
|
+
Multi-strategy context gathering:
|
|
8
|
+
1. Current uncommitted changes (staged + unstaged)
|
|
9
|
+
2. Multi-commit window (last N commits, configurable)
|
|
10
|
+
3. File-based analysis (read files mentioned in criteria)
|
|
11
|
+
|
|
12
|
+
TODO: Add strategy 4 - codebase grep for test files related to the task.
|
|
13
|
+
Implementation location: get_validation_context_smart() after Strategy 3.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import re
|
|
18
|
+
import subprocess # nosec B404 - subprocess needed for validation commands
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Literal
|
|
22
|
+
|
|
23
|
+
from gobby.config.app import TaskValidationConfig
|
|
24
|
+
from gobby.config.tasks import PatternCriteriaConfig
|
|
25
|
+
from gobby.llm import LLMService
|
|
26
|
+
from gobby.prompts import PromptLoader
|
|
27
|
+
from gobby.tasks.criteria import PatternCriteriaInjector
|
|
28
|
+
from gobby.utils.json_helpers import extract_json_object
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Default prompts (fallbacks for strangler fig pattern)
|
|
33
|
+
DEFAULT_VALIDATE_PROMPT = """Validate if the following changes satisfy the requirements.
|
|
34
|
+
|
|
35
|
+
Task: {title}
|
|
36
|
+
{category_section}{criteria_text}
|
|
37
|
+
|
|
38
|
+
{changes_section}
|
|
39
|
+
IMPORTANT: Return ONLY a JSON object, nothing else. No explanation, no preamble.
|
|
40
|
+
Format: {{"status": "valid", "feedback": "..."}} or {{"status": "invalid", "feedback": "..."}}
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
DEFAULT_CRITERIA_PROMPT = """Generate validation criteria for this task.
|
|
44
|
+
|
|
45
|
+
Task: {title}
|
|
46
|
+
Description: {description}
|
|
47
|
+
|
|
48
|
+
CRITICAL RULES - You MUST follow these:
|
|
49
|
+
1. **Only stated requirements** - Include ONLY requirements explicitly written in the title or description
|
|
50
|
+
2. **No invented values** - Do NOT invent specific numbers, timeouts, thresholds, or limits unless they appear in the task
|
|
51
|
+
3. **No invented edge cases** - Do NOT add edge cases, error scenarios, or boundary conditions beyond what's described
|
|
52
|
+
4. **Proportional detail** - Vague tasks get vague criteria; detailed tasks get detailed criteria
|
|
53
|
+
5. **When in doubt, leave it out** - If something isn't mentioned, don't include it
|
|
54
|
+
|
|
55
|
+
For vague requirements like "fix X" or "add Y", use criteria like:
|
|
56
|
+
- "X no longer produces the reported error/warning"
|
|
57
|
+
- "Y functionality works as expected"
|
|
58
|
+
- "Existing tests continue to pass"
|
|
59
|
+
- "No regressions introduced"
|
|
60
|
+
|
|
61
|
+
DO NOT generate criteria like:
|
|
62
|
+
- "timeout defaults to 30 seconds" (unless 30 seconds is in the task description)
|
|
63
|
+
- "handles edge case Z" (unless Z is mentioned in the task)
|
|
64
|
+
- "logs with format X" (unless that format is specified)
|
|
65
|
+
|
|
66
|
+
Format as markdown checkboxes:
|
|
67
|
+
## Deliverable
|
|
68
|
+
- [ ] What the task explicitly asks for
|
|
69
|
+
|
|
70
|
+
## Functional Requirements
|
|
71
|
+
- [ ] Only requirements stated in the description
|
|
72
|
+
|
|
73
|
+
## Verification
|
|
74
|
+
- [ ] Tests pass (if applicable)
|
|
75
|
+
- [ ] No regressions
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# Default number of commits to look back when gathering context
|
|
79
|
+
DEFAULT_COMMIT_WINDOW = 10
|
|
80
|
+
DEFAULT_MAX_CHARS = 50000
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def run_git_command(
|
|
84
|
+
cmd: list[str],
|
|
85
|
+
cwd: str | Path | None = None,
|
|
86
|
+
timeout: int = 10,
|
|
87
|
+
) -> subprocess.CompletedProcess[str] | None:
|
|
88
|
+
"""Run git command with standardized exception handling.
|
|
89
|
+
|
|
90
|
+
Returns CompletedProcess on success, None on exception (logs debug).
|
|
91
|
+
Caller is responsible for checking returncode and processing stdout.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
cmd: Git command as list of strings (e.g., ["git", "diff"])
|
|
95
|
+
cwd: Working directory for the command
|
|
96
|
+
timeout: Command timeout in seconds (default: 10)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
CompletedProcess on success, None if exception occurred
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
return subprocess.run( # nosec B603 - cmd passed from internal callers with hardcoded git commands
|
|
103
|
+
cmd,
|
|
104
|
+
capture_output=True,
|
|
105
|
+
text=True,
|
|
106
|
+
timeout=timeout,
|
|
107
|
+
cwd=cwd,
|
|
108
|
+
)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logger.debug(f"Git command failed ({' '.join(cmd)}): {e}")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_last_commit_diff(
|
|
115
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
116
|
+
cwd: str | Path | None = None,
|
|
117
|
+
) -> str | None:
|
|
118
|
+
"""Get diff from the most recent commit.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
max_chars: Maximum characters to return (truncates if larger)
|
|
122
|
+
cwd: Working directory for git commands (project repo path)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Diff string from HEAD~1..HEAD, or None if not available
|
|
126
|
+
"""
|
|
127
|
+
result = run_git_command(["git", "diff", "HEAD~1..HEAD"], cwd=cwd)
|
|
128
|
+
if result is None or result.returncode != 0 or not result.stdout.strip():
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
diff: str = result.stdout
|
|
132
|
+
if len(diff) > max_chars:
|
|
133
|
+
diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
|
|
134
|
+
|
|
135
|
+
return diff
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_recent_commits(
|
|
139
|
+
n: int = DEFAULT_COMMIT_WINDOW,
|
|
140
|
+
cwd: str | Path | None = None,
|
|
141
|
+
) -> list[dict[str, str]]:
|
|
142
|
+
"""Get list of recent commits with SHA and subject.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
n: Number of commits to retrieve
|
|
146
|
+
cwd: Working directory for git commands (project repo path)
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of dicts with 'sha' and 'subject' keys
|
|
150
|
+
"""
|
|
151
|
+
result = run_git_command(["git", "log", f"-{n}", "--pretty=format:%H|%s"], cwd=cwd)
|
|
152
|
+
if result is None or result.returncode != 0 or not result.stdout.strip():
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
commits = []
|
|
156
|
+
for line in result.stdout.strip().split("\n"):
|
|
157
|
+
if "|" in line:
|
|
158
|
+
sha, subject = line.split("|", 1)
|
|
159
|
+
commits.append({"sha": sha, "subject": subject})
|
|
160
|
+
|
|
161
|
+
return commits
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_multi_commit_diff(
|
|
165
|
+
commit_count: int = DEFAULT_COMMIT_WINDOW,
|
|
166
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
167
|
+
cwd: str | Path | None = None,
|
|
168
|
+
) -> str | None:
|
|
169
|
+
"""Get combined diff from the last N commits.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
commit_count: Number of commits to include in diff
|
|
173
|
+
max_chars: Maximum characters to return
|
|
174
|
+
cwd: Working directory for git commands (project repo path)
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Combined diff string, or None if not available
|
|
178
|
+
"""
|
|
179
|
+
result = run_git_command(["git", "diff", f"HEAD~{commit_count}..HEAD"], cwd=cwd, timeout=30)
|
|
180
|
+
if result is None or result.returncode != 0 or not result.stdout.strip():
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
diff: str = result.stdout
|
|
184
|
+
if len(diff) > max_chars:
|
|
185
|
+
diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
|
|
186
|
+
|
|
187
|
+
return diff
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def get_commits_since(
|
|
191
|
+
since_sha: str,
|
|
192
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
193
|
+
cwd: str | Path | None = None,
|
|
194
|
+
) -> str | None:
|
|
195
|
+
"""Get diff from a specific commit SHA to HEAD.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
since_sha: Starting commit SHA
|
|
199
|
+
max_chars: Maximum characters to return
|
|
200
|
+
cwd: Working directory for git commands (project repo path)
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Diff string, or None if not available
|
|
204
|
+
"""
|
|
205
|
+
result = run_git_command(["git", "diff", f"{since_sha}..HEAD"], cwd=cwd, timeout=30)
|
|
206
|
+
if result is None or result.returncode != 0 or not result.stdout.strip():
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
diff: str = result.stdout
|
|
210
|
+
if len(diff) > max_chars:
|
|
211
|
+
diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
|
|
212
|
+
|
|
213
|
+
return diff
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def extract_file_patterns_from_text(text: str) -> list[str]:
|
|
217
|
+
"""Extract file paths and patterns from text (criteria, description, title).
|
|
218
|
+
|
|
219
|
+
Looks for:
|
|
220
|
+
- Explicit file paths (src/foo/bar.py, tests/test_foo.py)
|
|
221
|
+
- Module references (gobby.tasks.validation -> src/gobby/tasks/validation.py)
|
|
222
|
+
- Test patterns (test_validation -> tests/**/test_validation*.py)
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
text: Text to search for file patterns
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of file path patterns (may include globs)
|
|
229
|
+
"""
|
|
230
|
+
patterns: set[str] = set()
|
|
231
|
+
|
|
232
|
+
# Match explicit file paths like src/foo/bar.py or ./tests/test_x.py
|
|
233
|
+
file_path_re = re.compile(r"[./]?[\w\-]+(?:/[\w\-]+)*\.\w+")
|
|
234
|
+
for match in file_path_re.findall(text):
|
|
235
|
+
# Skip URLs and common false positives
|
|
236
|
+
if not match.startswith("http") and not match.startswith("www."):
|
|
237
|
+
patterns.add(match.lstrip("./"))
|
|
238
|
+
|
|
239
|
+
# Match module references like gobby.tasks.validation
|
|
240
|
+
module_re = re.compile(r"\b(gobby(?:\.\w+)+)\b")
|
|
241
|
+
for match in module_re.findall(text):
|
|
242
|
+
# Convert module path to file path
|
|
243
|
+
file_path = "src/" + match.replace(".", "/") + ".py"
|
|
244
|
+
patterns.add(file_path)
|
|
245
|
+
|
|
246
|
+
# Extract test file hints from test_ prefixed words
|
|
247
|
+
test_re = re.compile(r"\btest_(\w+)\b")
|
|
248
|
+
for match in test_re.findall(text):
|
|
249
|
+
patterns.add(f"tests/**/test_{match}*.py")
|
|
250
|
+
|
|
251
|
+
# Extract class/function names and look for their definitions
|
|
252
|
+
class_re = re.compile(r"\b([A-Z][a-zA-Z0-9]+(?:Manager|Validator|Plugin|Handler|Service))\b")
|
|
253
|
+
for match in class_re.findall(text):
|
|
254
|
+
# These could be in any .py file, add as grep pattern hint
|
|
255
|
+
patterns.add(
|
|
256
|
+
f"**/{''.join(c if c.islower() else '_' + c.lower() for c in match).lstrip('_')}*.py"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return list(patterns)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def find_matching_files(
|
|
263
|
+
patterns: list[str],
|
|
264
|
+
base_dir: str | Path = ".",
|
|
265
|
+
max_files: int = 10,
|
|
266
|
+
) -> list[Path]:
|
|
267
|
+
"""Find files matching the given patterns.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
patterns: List of file path patterns (may include globs)
|
|
271
|
+
base_dir: Base directory to search from
|
|
272
|
+
max_files: Maximum number of files to return
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
List of Path objects for matching files
|
|
276
|
+
"""
|
|
277
|
+
base = Path(base_dir)
|
|
278
|
+
found: list[Path] = []
|
|
279
|
+
|
|
280
|
+
for pattern in patterns:
|
|
281
|
+
if len(found) >= max_files:
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
# Handle glob patterns
|
|
285
|
+
if "*" in pattern:
|
|
286
|
+
try:
|
|
287
|
+
matches = list(base.glob(pattern))
|
|
288
|
+
for match in matches[: max_files - len(found)]:
|
|
289
|
+
if match.is_file() and match not in found:
|
|
290
|
+
found.append(match)
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.debug(f"Failed to glob pattern {pattern}: {e}")
|
|
293
|
+
else:
|
|
294
|
+
# Direct file path
|
|
295
|
+
path = base / pattern
|
|
296
|
+
if path.is_file() and path not in found:
|
|
297
|
+
found.append(path)
|
|
298
|
+
|
|
299
|
+
return found
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def read_files_content(
|
|
303
|
+
files: list[Path],
|
|
304
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
305
|
+
) -> str:
|
|
306
|
+
"""Read content from multiple files.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
files: List of file paths to read
|
|
310
|
+
max_chars: Maximum total characters to return
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Concatenated file contents with headers
|
|
314
|
+
"""
|
|
315
|
+
content_parts: list[str] = []
|
|
316
|
+
total_chars = 0
|
|
317
|
+
|
|
318
|
+
for file_path in files:
|
|
319
|
+
if total_chars >= max_chars:
|
|
320
|
+
content_parts.append("\n... [additional files truncated] ...")
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
try:
|
|
324
|
+
content = file_path.read_text(encoding="utf-8")
|
|
325
|
+
remaining = max_chars - total_chars
|
|
326
|
+
|
|
327
|
+
if len(content) > remaining:
|
|
328
|
+
content = content[:remaining] + "\n... [file truncated] ..."
|
|
329
|
+
|
|
330
|
+
content_parts.append(f"=== {file_path} ===\n{content}\n")
|
|
331
|
+
total_chars += len(content)
|
|
332
|
+
|
|
333
|
+
except Exception as e:
|
|
334
|
+
logger.debug(f"Failed to read {file_path}: {e}")
|
|
335
|
+
content_parts.append(f"=== {file_path} ===\n(Error reading file: {e})\n")
|
|
336
|
+
|
|
337
|
+
return "\n".join(content_parts)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def get_validation_context_smart(
|
|
341
|
+
task_title: str,
|
|
342
|
+
validation_criteria: str | None = None,
|
|
343
|
+
task_description: str | None = None,
|
|
344
|
+
commit_window: int = DEFAULT_COMMIT_WINDOW,
|
|
345
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
346
|
+
cwd: str | Path | None = None,
|
|
347
|
+
) -> str | None:
|
|
348
|
+
"""Gather validation context using multiple strategies.
|
|
349
|
+
|
|
350
|
+
Multi-strategy context gathering:
|
|
351
|
+
1. Current uncommitted changes (staged + unstaged)
|
|
352
|
+
2. Multi-commit window (last N commits, configurable)
|
|
353
|
+
3. File-based analysis (read files mentioned in criteria)
|
|
354
|
+
|
|
355
|
+
TODO: Add strategy 4 - codebase grep for test files related to the task.
|
|
356
|
+
Implementation location: after Strategy 3 below.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
task_title: Task title for context
|
|
360
|
+
validation_criteria: Validation criteria text
|
|
361
|
+
task_description: Task description text
|
|
362
|
+
commit_window: Number of commits to look back
|
|
363
|
+
max_chars: Maximum characters to return
|
|
364
|
+
cwd: Working directory for git commands (project repo path)
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Validation context string, or None if nothing found
|
|
368
|
+
"""
|
|
369
|
+
context_parts: list[str] = []
|
|
370
|
+
remaining_chars = max_chars
|
|
371
|
+
|
|
372
|
+
# Strategy 1: Current uncommitted changes
|
|
373
|
+
staged = run_git_command(["git", "diff", "--cached"], cwd=cwd)
|
|
374
|
+
if staged and staged.stdout.strip():
|
|
375
|
+
content = staged.stdout[: remaining_chars // 2]
|
|
376
|
+
context_parts.append(f"=== STAGED CHANGES ===\n{content}")
|
|
377
|
+
remaining_chars -= len(content)
|
|
378
|
+
|
|
379
|
+
unstaged = run_git_command(["git", "diff"], cwd=cwd)
|
|
380
|
+
if unstaged and unstaged.stdout.strip():
|
|
381
|
+
content = unstaged.stdout[: remaining_chars // 2]
|
|
382
|
+
context_parts.append(f"=== UNSTAGED CHANGES ===\n{content}")
|
|
383
|
+
remaining_chars -= len(content)
|
|
384
|
+
|
|
385
|
+
# Strategy 2: Multi-commit window
|
|
386
|
+
if remaining_chars > 5000: # Only if we have room
|
|
387
|
+
multi_diff = get_multi_commit_diff(commit_window, remaining_chars // 2, cwd=cwd)
|
|
388
|
+
if multi_diff:
|
|
389
|
+
# Get commit list for context
|
|
390
|
+
commits = get_recent_commits(commit_window, cwd=cwd)
|
|
391
|
+
commit_summary = "\n".join(
|
|
392
|
+
f" - {c['sha'][:8]}: {c['subject'][:60]}" for c in commits[:5]
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
context_parts.append(
|
|
396
|
+
f"=== RECENT COMMITS (last {commit_window}) ===\n"
|
|
397
|
+
f"{commit_summary}\n\n"
|
|
398
|
+
f"=== COMBINED DIFF ===\n{multi_diff}"
|
|
399
|
+
)
|
|
400
|
+
remaining_chars -= len(multi_diff) + len(commit_summary)
|
|
401
|
+
|
|
402
|
+
# Strategy 3: File-based analysis
|
|
403
|
+
if remaining_chars > 2000:
|
|
404
|
+
# Extract file patterns from task info
|
|
405
|
+
search_text = f"{task_title} {validation_criteria or ''} {task_description or ''}"
|
|
406
|
+
patterns = extract_file_patterns_from_text(search_text)
|
|
407
|
+
|
|
408
|
+
if patterns:
|
|
409
|
+
files = find_matching_files(patterns, base_dir=cwd or ".", max_files=5)
|
|
410
|
+
if files:
|
|
411
|
+
file_content = read_files_content(files, remaining_chars)
|
|
412
|
+
context_parts.append(f"=== RELEVANT FILES ===\n{file_content}")
|
|
413
|
+
|
|
414
|
+
if not context_parts:
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
combined = "\n\n".join(context_parts)
|
|
418
|
+
if len(combined) > max_chars:
|
|
419
|
+
combined = combined[:max_chars] + "\n\n... [context truncated] ..."
|
|
420
|
+
|
|
421
|
+
return combined
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def get_git_diff(
|
|
425
|
+
max_chars: int = 50000,
|
|
426
|
+
fallback_to_last_commit: bool = True,
|
|
427
|
+
cwd: str | Path | None = None,
|
|
428
|
+
) -> str | None:
|
|
429
|
+
"""Get changes from git for validation.
|
|
430
|
+
|
|
431
|
+
First checks for uncommitted changes (staged + unstaged).
|
|
432
|
+
If none found and fallback_to_last_commit is True, returns the last commit's diff.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
max_chars: Maximum characters to return (truncates if larger)
|
|
436
|
+
fallback_to_last_commit: If True, fall back to last commit diff when no uncommitted changes
|
|
437
|
+
cwd: Working directory for git commands (project repo path)
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Combined diff string, or None if not in git repo or no changes
|
|
441
|
+
"""
|
|
442
|
+
unstaged = run_git_command(["git", "diff"], cwd=cwd)
|
|
443
|
+
staged = run_git_command(["git", "diff", "--cached"], cwd=cwd)
|
|
444
|
+
|
|
445
|
+
# Check if both commands failed (not in git repo or git error)
|
|
446
|
+
unstaged_failed = unstaged is None or unstaged.returncode != 0
|
|
447
|
+
staged_failed = staged is None or staged.returncode != 0
|
|
448
|
+
if unstaged_failed and staged_failed:
|
|
449
|
+
return None
|
|
450
|
+
|
|
451
|
+
diff_parts = []
|
|
452
|
+
if staged and staged.stdout.strip():
|
|
453
|
+
diff_parts.append("=== STAGED CHANGES ===\n" + staged.stdout)
|
|
454
|
+
if unstaged and unstaged.stdout.strip():
|
|
455
|
+
diff_parts.append("=== UNSTAGED CHANGES ===\n" + unstaged.stdout)
|
|
456
|
+
|
|
457
|
+
# If no uncommitted changes, try last commit
|
|
458
|
+
if not diff_parts and fallback_to_last_commit:
|
|
459
|
+
last_commit_diff = get_last_commit_diff(max_chars, cwd=cwd)
|
|
460
|
+
if last_commit_diff:
|
|
461
|
+
return f"=== LAST COMMIT ===\n{last_commit_diff}"
|
|
462
|
+
return None
|
|
463
|
+
|
|
464
|
+
if not diff_parts:
|
|
465
|
+
return None
|
|
466
|
+
|
|
467
|
+
combined = "\n".join(diff_parts)
|
|
468
|
+
if len(combined) > max_chars:
|
|
469
|
+
combined = combined[:max_chars] + "\n\n... [diff truncated] ..."
|
|
470
|
+
|
|
471
|
+
return combined
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
@dataclass
|
|
475
|
+
class ValidationResult:
|
|
476
|
+
"""Result of task validation."""
|
|
477
|
+
|
|
478
|
+
status: Literal["valid", "invalid", "pending"]
|
|
479
|
+
feedback: str | None = None
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class TaskValidator:
|
|
483
|
+
"""Validates task completion using LLM."""
|
|
484
|
+
|
|
485
|
+
def __init__(
|
|
486
|
+
self,
|
|
487
|
+
config: TaskValidationConfig,
|
|
488
|
+
llm_service: LLMService,
|
|
489
|
+
project_dir: Path | None = None,
|
|
490
|
+
):
|
|
491
|
+
self.config = config
|
|
492
|
+
self.llm_service = llm_service
|
|
493
|
+
self._loader = PromptLoader(project_dir=project_dir)
|
|
494
|
+
|
|
495
|
+
# Register fallbacks for strangler fig pattern
|
|
496
|
+
self._loader.register_fallback("validation/validate", lambda: DEFAULT_VALIDATE_PROMPT)
|
|
497
|
+
self._loader.register_fallback("validation/criteria", lambda: DEFAULT_CRITERIA_PROMPT)
|
|
498
|
+
|
|
499
|
+
async def gather_validation_context(self, file_paths: list[str]) -> str:
|
|
500
|
+
"""
|
|
501
|
+
Gather context for validation from files.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
file_paths: List of absolute file paths to read.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Concatenated file contents.
|
|
508
|
+
"""
|
|
509
|
+
context: list[str] = []
|
|
510
|
+
for path in file_paths:
|
|
511
|
+
try:
|
|
512
|
+
with open(path, encoding="utf-8") as f:
|
|
513
|
+
content = f.read()
|
|
514
|
+
context.append(f"--- {path} ---\n{content}\n")
|
|
515
|
+
except Exception as e:
|
|
516
|
+
logger.warning(f"Failed to read file {path} for validation: {e}")
|
|
517
|
+
context.append(f"--- {path} ---\n(Error reading file: {e})\n")
|
|
518
|
+
return "\n".join(context)
|
|
519
|
+
|
|
520
|
+
async def validate_task(
|
|
521
|
+
self,
|
|
522
|
+
task_id: str,
|
|
523
|
+
title: str,
|
|
524
|
+
description: str | None,
|
|
525
|
+
changes_summary: str,
|
|
526
|
+
validation_criteria: str | None = None,
|
|
527
|
+
context_files: list[str] | None = None,
|
|
528
|
+
category: str | None = None,
|
|
529
|
+
) -> ValidationResult:
|
|
530
|
+
"""
|
|
531
|
+
Validate task completion.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
task_id: Task ID
|
|
535
|
+
title: Task title
|
|
536
|
+
description: Task description (used as fallback if no validation_criteria)
|
|
537
|
+
changes_summary: Summary of changes made (files, diffs, etc.)
|
|
538
|
+
validation_criteria: Specific criteria to validate against (optional)
|
|
539
|
+
context_files: List of files to read for context (optional)
|
|
540
|
+
category: Task domain category (e.g., 'manual', 'code', 'test')
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
ValidationResult with status and feedback
|
|
544
|
+
"""
|
|
545
|
+
if not self.config.enabled:
|
|
546
|
+
return ValidationResult(status="pending", feedback="Validation disabled")
|
|
547
|
+
|
|
548
|
+
if not description and not validation_criteria:
|
|
549
|
+
logger.warning(f"Cannot validate task {task_id}: missing description and criteria")
|
|
550
|
+
return ValidationResult(
|
|
551
|
+
status="pending", feedback="Missing task description and validation criteria"
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
logger.info(f"Validating task {task_id}: {title}")
|
|
555
|
+
|
|
556
|
+
# Gather context if provided
|
|
557
|
+
file_context = ""
|
|
558
|
+
if context_files:
|
|
559
|
+
file_context = await self.gather_validation_context(context_files)
|
|
560
|
+
|
|
561
|
+
# Build prompt
|
|
562
|
+
criteria_text = (
|
|
563
|
+
f"Validation Criteria:\n{validation_criteria}"
|
|
564
|
+
if validation_criteria
|
|
565
|
+
else f"Task Description:\n{description}"
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
# Detect if changes_summary is a git diff
|
|
569
|
+
is_git_diff = changes_summary.startswith("Git diff") or "@@" in changes_summary
|
|
570
|
+
|
|
571
|
+
if is_git_diff:
|
|
572
|
+
changes_section = (
|
|
573
|
+
"Code Changes (git diff):\n"
|
|
574
|
+
"Analyze these ACTUAL code changes to verify the implementation.\n\n"
|
|
575
|
+
f"{changes_summary}\n\n"
|
|
576
|
+
)
|
|
577
|
+
else:
|
|
578
|
+
changes_section = f"Changes Summary:\n{changes_summary}\n\n"
|
|
579
|
+
|
|
580
|
+
# Build test strategy section if provided
|
|
581
|
+
category_section = ""
|
|
582
|
+
if category:
|
|
583
|
+
category_section = f"Test Strategy: {category}\n"
|
|
584
|
+
if category.lower() == "manual":
|
|
585
|
+
category_section += (
|
|
586
|
+
"NOTE: This task uses MANUAL testing. Do NOT require automated test files. "
|
|
587
|
+
"Validation should focus on whether the implementation is correct, "
|
|
588
|
+
"not whether automated tests exist.\n\n"
|
|
589
|
+
)
|
|
590
|
+
else:
|
|
591
|
+
category_section += "\n"
|
|
592
|
+
|
|
593
|
+
# Build prompt using PromptLoader or legacy config
|
|
594
|
+
if self.config.prompt:
|
|
595
|
+
# Legacy inline config (deprecated)
|
|
596
|
+
prompt = self.config.prompt
|
|
597
|
+
if file_context:
|
|
598
|
+
prompt += f"\nFile Context:\n{file_context[:50000]}\n"
|
|
599
|
+
else:
|
|
600
|
+
# Use PromptLoader
|
|
601
|
+
prompt_path = self.config.prompt_path or "validation/validate"
|
|
602
|
+
template_context = {
|
|
603
|
+
"title": title,
|
|
604
|
+
"category_section": category_section,
|
|
605
|
+
"criteria_text": criteria_text,
|
|
606
|
+
"changes_section": changes_section,
|
|
607
|
+
"file_context": file_context[:50000] if file_context else "",
|
|
608
|
+
}
|
|
609
|
+
try:
|
|
610
|
+
prompt = self._loader.render(prompt_path, template_context)
|
|
611
|
+
except FileNotFoundError:
|
|
612
|
+
logger.debug(f"Prompt template '{prompt_path}' not found, using fallback")
|
|
613
|
+
prompt = DEFAULT_VALIDATE_PROMPT.format(**template_context)
|
|
614
|
+
if file_context:
|
|
615
|
+
prompt += f"\nFile Context:\n{file_context[:50000]}\n"
|
|
616
|
+
|
|
617
|
+
try:
|
|
618
|
+
provider = self.llm_service.get_provider(self.config.provider)
|
|
619
|
+
response_content = await provider.generate_text(
|
|
620
|
+
prompt=prompt,
|
|
621
|
+
system_prompt=self.config.system_prompt,
|
|
622
|
+
model=self.config.model,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
if not response_content or not response_content.strip():
|
|
626
|
+
logger.warning(f"Empty LLM response for task {task_id} validation")
|
|
627
|
+
return ValidationResult(
|
|
628
|
+
status="pending", feedback="Validation failed: Empty response from LLM"
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
logger.debug(f"Validation LLM response for {task_id}: {response_content[:200]}...")
|
|
632
|
+
|
|
633
|
+
# Extract JSON using shared utility
|
|
634
|
+
result_data = extract_json_object(response_content)
|
|
635
|
+
if result_data is None:
|
|
636
|
+
logger.warning(f"Failed to parse JSON from validation response for {task_id}")
|
|
637
|
+
return ValidationResult(
|
|
638
|
+
status="pending", feedback="Validation failed: Could not parse response"
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
return ValidationResult(
|
|
642
|
+
status=result_data.get("status", "pending"), feedback=result_data.get("feedback")
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
except Exception as e:
|
|
646
|
+
logger.error(f"Failed to validate task {task_id}: {e}")
|
|
647
|
+
return ValidationResult(status="pending", feedback=f"Validation failed: {str(e)}")
|
|
648
|
+
|
|
649
|
+
async def generate_criteria(
|
|
650
|
+
self,
|
|
651
|
+
title: str,
|
|
652
|
+
description: str | None = None,
|
|
653
|
+
labels: list[str] | None = None,
|
|
654
|
+
) -> str | None:
|
|
655
|
+
"""
|
|
656
|
+
Generate validation criteria from task title and description.
|
|
657
|
+
|
|
658
|
+
When labels are provided (e.g., 'tdd', 'strangler-fig', 'refactoring'),
|
|
659
|
+
pattern-specific criteria from PatternCriteriaConfig are appended to
|
|
660
|
+
the LLM-generated criteria.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
title: Task title
|
|
664
|
+
description: Task description (optional)
|
|
665
|
+
labels: Task labels for pattern criteria injection (optional)
|
|
666
|
+
|
|
667
|
+
Returns:
|
|
668
|
+
Generated validation criteria string, or None if generation fails
|
|
669
|
+
"""
|
|
670
|
+
if not self.config.enabled:
|
|
671
|
+
return None
|
|
672
|
+
|
|
673
|
+
# Build prompt using PromptLoader or legacy config
|
|
674
|
+
template_context = {
|
|
675
|
+
"title": title,
|
|
676
|
+
"description": description or "(no description)",
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
if self.config.criteria_prompt:
|
|
680
|
+
# Legacy inline config (deprecated)
|
|
681
|
+
prompt = self.config.criteria_prompt.format(**template_context)
|
|
682
|
+
else:
|
|
683
|
+
# Use PromptLoader
|
|
684
|
+
prompt_path = self.config.criteria_prompt_path or "validation/criteria"
|
|
685
|
+
try:
|
|
686
|
+
prompt = self._loader.render(prompt_path, template_context)
|
|
687
|
+
except FileNotFoundError:
|
|
688
|
+
logger.debug(f"Prompt template '{prompt_path}' not found, using fallback")
|
|
689
|
+
prompt = DEFAULT_CRITERIA_PROMPT.format(**template_context)
|
|
690
|
+
|
|
691
|
+
try:
|
|
692
|
+
provider = self.llm_service.get_provider(self.config.provider)
|
|
693
|
+
response = await provider.generate_text(
|
|
694
|
+
prompt=prompt,
|
|
695
|
+
system_prompt=self.config.criteria_system_prompt,
|
|
696
|
+
model=self.config.model,
|
|
697
|
+
)
|
|
698
|
+
llm_criteria = response.strip()
|
|
699
|
+
|
|
700
|
+
# Inject pattern-specific criteria if labels are provided
|
|
701
|
+
if labels:
|
|
702
|
+
pattern_config = PatternCriteriaConfig()
|
|
703
|
+
injector = PatternCriteriaInjector(pattern_config=pattern_config)
|
|
704
|
+
pattern_criteria = injector.inject_for_labels(labels=labels)
|
|
705
|
+
|
|
706
|
+
if pattern_criteria:
|
|
707
|
+
llm_criteria = f"{llm_criteria}\n\n{pattern_criteria}"
|
|
708
|
+
|
|
709
|
+
return llm_criteria
|
|
710
|
+
except Exception as e:
|
|
711
|
+
logger.error(f"Failed to generate validation criteria: {e}")
|
|
712
|
+
return None
|