gobby 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. gobby/__init__.py +3 -0
  2. gobby/adapters/__init__.py +30 -0
  3. gobby/adapters/base.py +93 -0
  4. gobby/adapters/claude_code.py +276 -0
  5. gobby/adapters/codex.py +1292 -0
  6. gobby/adapters/gemini.py +343 -0
  7. gobby/agents/__init__.py +37 -0
  8. gobby/agents/codex_session.py +120 -0
  9. gobby/agents/constants.py +112 -0
  10. gobby/agents/context.py +362 -0
  11. gobby/agents/definitions.py +133 -0
  12. gobby/agents/gemini_session.py +111 -0
  13. gobby/agents/registry.py +618 -0
  14. gobby/agents/runner.py +968 -0
  15. gobby/agents/session.py +259 -0
  16. gobby/agents/spawn.py +916 -0
  17. gobby/agents/spawners/__init__.py +77 -0
  18. gobby/agents/spawners/base.py +142 -0
  19. gobby/agents/spawners/cross_platform.py +266 -0
  20. gobby/agents/spawners/embedded.py +225 -0
  21. gobby/agents/spawners/headless.py +226 -0
  22. gobby/agents/spawners/linux.py +125 -0
  23. gobby/agents/spawners/macos.py +277 -0
  24. gobby/agents/spawners/windows.py +308 -0
  25. gobby/agents/tty_config.py +319 -0
  26. gobby/autonomous/__init__.py +32 -0
  27. gobby/autonomous/progress_tracker.py +447 -0
  28. gobby/autonomous/stop_registry.py +269 -0
  29. gobby/autonomous/stuck_detector.py +383 -0
  30. gobby/cli/__init__.py +67 -0
  31. gobby/cli/__main__.py +8 -0
  32. gobby/cli/agents.py +529 -0
  33. gobby/cli/artifacts.py +266 -0
  34. gobby/cli/daemon.py +329 -0
  35. gobby/cli/extensions.py +526 -0
  36. gobby/cli/github.py +263 -0
  37. gobby/cli/init.py +53 -0
  38. gobby/cli/install.py +614 -0
  39. gobby/cli/installers/__init__.py +37 -0
  40. gobby/cli/installers/antigravity.py +65 -0
  41. gobby/cli/installers/claude.py +363 -0
  42. gobby/cli/installers/codex.py +192 -0
  43. gobby/cli/installers/gemini.py +294 -0
  44. gobby/cli/installers/git_hooks.py +377 -0
  45. gobby/cli/installers/shared.py +737 -0
  46. gobby/cli/linear.py +250 -0
  47. gobby/cli/mcp.py +30 -0
  48. gobby/cli/mcp_proxy.py +698 -0
  49. gobby/cli/memory.py +304 -0
  50. gobby/cli/merge.py +384 -0
  51. gobby/cli/projects.py +79 -0
  52. gobby/cli/sessions.py +622 -0
  53. gobby/cli/tasks/__init__.py +30 -0
  54. gobby/cli/tasks/_utils.py +658 -0
  55. gobby/cli/tasks/ai.py +1025 -0
  56. gobby/cli/tasks/commits.py +169 -0
  57. gobby/cli/tasks/crud.py +685 -0
  58. gobby/cli/tasks/deps.py +135 -0
  59. gobby/cli/tasks/labels.py +63 -0
  60. gobby/cli/tasks/main.py +273 -0
  61. gobby/cli/tasks/search.py +178 -0
  62. gobby/cli/tui.py +34 -0
  63. gobby/cli/utils.py +513 -0
  64. gobby/cli/workflows.py +927 -0
  65. gobby/cli/worktrees.py +481 -0
  66. gobby/config/__init__.py +129 -0
  67. gobby/config/app.py +551 -0
  68. gobby/config/extensions.py +167 -0
  69. gobby/config/features.py +472 -0
  70. gobby/config/llm_providers.py +98 -0
  71. gobby/config/logging.py +66 -0
  72. gobby/config/mcp.py +346 -0
  73. gobby/config/persistence.py +247 -0
  74. gobby/config/servers.py +141 -0
  75. gobby/config/sessions.py +250 -0
  76. gobby/config/tasks.py +784 -0
  77. gobby/hooks/__init__.py +104 -0
  78. gobby/hooks/artifact_capture.py +213 -0
  79. gobby/hooks/broadcaster.py +243 -0
  80. gobby/hooks/event_handlers.py +723 -0
  81. gobby/hooks/events.py +218 -0
  82. gobby/hooks/git.py +169 -0
  83. gobby/hooks/health_monitor.py +171 -0
  84. gobby/hooks/hook_manager.py +856 -0
  85. gobby/hooks/hook_types.py +575 -0
  86. gobby/hooks/plugins.py +813 -0
  87. gobby/hooks/session_coordinator.py +396 -0
  88. gobby/hooks/verification_runner.py +268 -0
  89. gobby/hooks/webhooks.py +339 -0
  90. gobby/install/claude/commands/gobby/bug.md +51 -0
  91. gobby/install/claude/commands/gobby/chore.md +51 -0
  92. gobby/install/claude/commands/gobby/epic.md +52 -0
  93. gobby/install/claude/commands/gobby/eval.md +235 -0
  94. gobby/install/claude/commands/gobby/feat.md +49 -0
  95. gobby/install/claude/commands/gobby/nit.md +52 -0
  96. gobby/install/claude/commands/gobby/ref.md +52 -0
  97. gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
  98. gobby/install/claude/hooks/hook_dispatcher.py +364 -0
  99. gobby/install/claude/hooks/validate_settings.py +102 -0
  100. gobby/install/claude/hooks-template.json +118 -0
  101. gobby/install/codex/hooks/hook_dispatcher.py +153 -0
  102. gobby/install/codex/prompts/forget.md +7 -0
  103. gobby/install/codex/prompts/memories.md +7 -0
  104. gobby/install/codex/prompts/recall.md +7 -0
  105. gobby/install/codex/prompts/remember.md +13 -0
  106. gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
  107. gobby/install/gemini/hooks-template.json +138 -0
  108. gobby/install/shared/plugins/code_guardian.py +456 -0
  109. gobby/install/shared/plugins/example_notify.py +331 -0
  110. gobby/integrations/__init__.py +10 -0
  111. gobby/integrations/github.py +145 -0
  112. gobby/integrations/linear.py +145 -0
  113. gobby/llm/__init__.py +40 -0
  114. gobby/llm/base.py +120 -0
  115. gobby/llm/claude.py +578 -0
  116. gobby/llm/claude_executor.py +503 -0
  117. gobby/llm/codex.py +322 -0
  118. gobby/llm/codex_executor.py +513 -0
  119. gobby/llm/executor.py +316 -0
  120. gobby/llm/factory.py +34 -0
  121. gobby/llm/gemini.py +258 -0
  122. gobby/llm/gemini_executor.py +339 -0
  123. gobby/llm/litellm.py +287 -0
  124. gobby/llm/litellm_executor.py +303 -0
  125. gobby/llm/resolver.py +499 -0
  126. gobby/llm/service.py +236 -0
  127. gobby/mcp_proxy/__init__.py +29 -0
  128. gobby/mcp_proxy/actions.py +175 -0
  129. gobby/mcp_proxy/daemon_control.py +198 -0
  130. gobby/mcp_proxy/importer.py +436 -0
  131. gobby/mcp_proxy/lazy.py +325 -0
  132. gobby/mcp_proxy/manager.py +798 -0
  133. gobby/mcp_proxy/metrics.py +609 -0
  134. gobby/mcp_proxy/models.py +139 -0
  135. gobby/mcp_proxy/registries.py +215 -0
  136. gobby/mcp_proxy/schema_hash.py +381 -0
  137. gobby/mcp_proxy/semantic_search.py +706 -0
  138. gobby/mcp_proxy/server.py +549 -0
  139. gobby/mcp_proxy/services/__init__.py +0 -0
  140. gobby/mcp_proxy/services/fallback.py +306 -0
  141. gobby/mcp_proxy/services/recommendation.py +224 -0
  142. gobby/mcp_proxy/services/server_mgmt.py +214 -0
  143. gobby/mcp_proxy/services/system.py +72 -0
  144. gobby/mcp_proxy/services/tool_filter.py +231 -0
  145. gobby/mcp_proxy/services/tool_proxy.py +309 -0
  146. gobby/mcp_proxy/stdio.py +565 -0
  147. gobby/mcp_proxy/tools/__init__.py +27 -0
  148. gobby/mcp_proxy/tools/agents.py +1103 -0
  149. gobby/mcp_proxy/tools/artifacts.py +207 -0
  150. gobby/mcp_proxy/tools/hub.py +335 -0
  151. gobby/mcp_proxy/tools/internal.py +337 -0
  152. gobby/mcp_proxy/tools/memory.py +543 -0
  153. gobby/mcp_proxy/tools/merge.py +422 -0
  154. gobby/mcp_proxy/tools/metrics.py +283 -0
  155. gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
  156. gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
  157. gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
  158. gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
  159. gobby/mcp_proxy/tools/orchestration/review.py +736 -0
  160. gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
  161. gobby/mcp_proxy/tools/session_messages.py +1056 -0
  162. gobby/mcp_proxy/tools/task_dependencies.py +219 -0
  163. gobby/mcp_proxy/tools/task_expansion.py +591 -0
  164. gobby/mcp_proxy/tools/task_github.py +393 -0
  165. gobby/mcp_proxy/tools/task_linear.py +379 -0
  166. gobby/mcp_proxy/tools/task_orchestration.py +77 -0
  167. gobby/mcp_proxy/tools/task_readiness.py +522 -0
  168. gobby/mcp_proxy/tools/task_sync.py +351 -0
  169. gobby/mcp_proxy/tools/task_validation.py +843 -0
  170. gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
  171. gobby/mcp_proxy/tools/tasks/_context.py +112 -0
  172. gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
  173. gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
  174. gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
  175. gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
  176. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
  177. gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
  178. gobby/mcp_proxy/tools/tasks/_search.py +215 -0
  179. gobby/mcp_proxy/tools/tasks/_session.py +125 -0
  180. gobby/mcp_proxy/tools/workflows.py +973 -0
  181. gobby/mcp_proxy/tools/worktrees.py +1264 -0
  182. gobby/mcp_proxy/transports/__init__.py +0 -0
  183. gobby/mcp_proxy/transports/base.py +95 -0
  184. gobby/mcp_proxy/transports/factory.py +44 -0
  185. gobby/mcp_proxy/transports/http.py +139 -0
  186. gobby/mcp_proxy/transports/stdio.py +213 -0
  187. gobby/mcp_proxy/transports/websocket.py +136 -0
  188. gobby/memory/backends/__init__.py +116 -0
  189. gobby/memory/backends/mem0.py +408 -0
  190. gobby/memory/backends/memu.py +485 -0
  191. gobby/memory/backends/null.py +111 -0
  192. gobby/memory/backends/openmemory.py +537 -0
  193. gobby/memory/backends/sqlite.py +304 -0
  194. gobby/memory/context.py +87 -0
  195. gobby/memory/manager.py +1001 -0
  196. gobby/memory/protocol.py +451 -0
  197. gobby/memory/search/__init__.py +66 -0
  198. gobby/memory/search/text.py +127 -0
  199. gobby/memory/viz.py +258 -0
  200. gobby/prompts/__init__.py +13 -0
  201. gobby/prompts/defaults/expansion/system.md +119 -0
  202. gobby/prompts/defaults/expansion/user.md +48 -0
  203. gobby/prompts/defaults/external_validation/agent.md +72 -0
  204. gobby/prompts/defaults/external_validation/external.md +63 -0
  205. gobby/prompts/defaults/external_validation/spawn.md +83 -0
  206. gobby/prompts/defaults/external_validation/system.md +6 -0
  207. gobby/prompts/defaults/features/import_mcp.md +22 -0
  208. gobby/prompts/defaults/features/import_mcp_github.md +17 -0
  209. gobby/prompts/defaults/features/import_mcp_search.md +16 -0
  210. gobby/prompts/defaults/features/recommend_tools.md +32 -0
  211. gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
  212. gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
  213. gobby/prompts/defaults/features/server_description.md +20 -0
  214. gobby/prompts/defaults/features/server_description_system.md +6 -0
  215. gobby/prompts/defaults/features/task_description.md +31 -0
  216. gobby/prompts/defaults/features/task_description_system.md +6 -0
  217. gobby/prompts/defaults/features/tool_summary.md +17 -0
  218. gobby/prompts/defaults/features/tool_summary_system.md +6 -0
  219. gobby/prompts/defaults/research/step.md +58 -0
  220. gobby/prompts/defaults/validation/criteria.md +47 -0
  221. gobby/prompts/defaults/validation/validate.md +38 -0
  222. gobby/prompts/loader.py +346 -0
  223. gobby/prompts/models.py +113 -0
  224. gobby/py.typed +0 -0
  225. gobby/runner.py +488 -0
  226. gobby/search/__init__.py +23 -0
  227. gobby/search/protocol.py +104 -0
  228. gobby/search/tfidf.py +232 -0
  229. gobby/servers/__init__.py +7 -0
  230. gobby/servers/http.py +636 -0
  231. gobby/servers/models.py +31 -0
  232. gobby/servers/routes/__init__.py +23 -0
  233. gobby/servers/routes/admin.py +416 -0
  234. gobby/servers/routes/dependencies.py +118 -0
  235. gobby/servers/routes/mcp/__init__.py +24 -0
  236. gobby/servers/routes/mcp/hooks.py +135 -0
  237. gobby/servers/routes/mcp/plugins.py +121 -0
  238. gobby/servers/routes/mcp/tools.py +1337 -0
  239. gobby/servers/routes/mcp/webhooks.py +159 -0
  240. gobby/servers/routes/sessions.py +582 -0
  241. gobby/servers/websocket.py +766 -0
  242. gobby/sessions/__init__.py +13 -0
  243. gobby/sessions/analyzer.py +322 -0
  244. gobby/sessions/lifecycle.py +240 -0
  245. gobby/sessions/manager.py +563 -0
  246. gobby/sessions/processor.py +225 -0
  247. gobby/sessions/summary.py +532 -0
  248. gobby/sessions/transcripts/__init__.py +41 -0
  249. gobby/sessions/transcripts/base.py +125 -0
  250. gobby/sessions/transcripts/claude.py +386 -0
  251. gobby/sessions/transcripts/codex.py +143 -0
  252. gobby/sessions/transcripts/gemini.py +195 -0
  253. gobby/storage/__init__.py +21 -0
  254. gobby/storage/agents.py +409 -0
  255. gobby/storage/artifact_classifier.py +341 -0
  256. gobby/storage/artifacts.py +285 -0
  257. gobby/storage/compaction.py +67 -0
  258. gobby/storage/database.py +357 -0
  259. gobby/storage/inter_session_messages.py +194 -0
  260. gobby/storage/mcp.py +680 -0
  261. gobby/storage/memories.py +562 -0
  262. gobby/storage/merge_resolutions.py +550 -0
  263. gobby/storage/migrations.py +860 -0
  264. gobby/storage/migrations_legacy.py +1359 -0
  265. gobby/storage/projects.py +166 -0
  266. gobby/storage/session_messages.py +251 -0
  267. gobby/storage/session_tasks.py +97 -0
  268. gobby/storage/sessions.py +817 -0
  269. gobby/storage/task_dependencies.py +223 -0
  270. gobby/storage/tasks/__init__.py +42 -0
  271. gobby/storage/tasks/_aggregates.py +180 -0
  272. gobby/storage/tasks/_crud.py +449 -0
  273. gobby/storage/tasks/_id.py +104 -0
  274. gobby/storage/tasks/_lifecycle.py +311 -0
  275. gobby/storage/tasks/_manager.py +889 -0
  276. gobby/storage/tasks/_models.py +300 -0
  277. gobby/storage/tasks/_ordering.py +119 -0
  278. gobby/storage/tasks/_path_cache.py +110 -0
  279. gobby/storage/tasks/_queries.py +343 -0
  280. gobby/storage/tasks/_search.py +143 -0
  281. gobby/storage/workflow_audit.py +393 -0
  282. gobby/storage/worktrees.py +547 -0
  283. gobby/sync/__init__.py +29 -0
  284. gobby/sync/github.py +333 -0
  285. gobby/sync/linear.py +304 -0
  286. gobby/sync/memories.py +284 -0
  287. gobby/sync/tasks.py +641 -0
  288. gobby/tasks/__init__.py +8 -0
  289. gobby/tasks/build_verification.py +193 -0
  290. gobby/tasks/commits.py +633 -0
  291. gobby/tasks/context.py +747 -0
  292. gobby/tasks/criteria.py +342 -0
  293. gobby/tasks/enhanced_validator.py +226 -0
  294. gobby/tasks/escalation.py +263 -0
  295. gobby/tasks/expansion.py +626 -0
  296. gobby/tasks/external_validator.py +764 -0
  297. gobby/tasks/issue_extraction.py +171 -0
  298. gobby/tasks/prompts/expand.py +327 -0
  299. gobby/tasks/research.py +421 -0
  300. gobby/tasks/tdd.py +352 -0
  301. gobby/tasks/tree_builder.py +263 -0
  302. gobby/tasks/validation.py +712 -0
  303. gobby/tasks/validation_history.py +357 -0
  304. gobby/tasks/validation_models.py +89 -0
  305. gobby/tools/__init__.py +0 -0
  306. gobby/tools/summarizer.py +170 -0
  307. gobby/tui/__init__.py +5 -0
  308. gobby/tui/api_client.py +281 -0
  309. gobby/tui/app.py +327 -0
  310. gobby/tui/screens/__init__.py +25 -0
  311. gobby/tui/screens/agents.py +333 -0
  312. gobby/tui/screens/chat.py +450 -0
  313. gobby/tui/screens/dashboard.py +377 -0
  314. gobby/tui/screens/memory.py +305 -0
  315. gobby/tui/screens/metrics.py +231 -0
  316. gobby/tui/screens/orchestrator.py +904 -0
  317. gobby/tui/screens/sessions.py +412 -0
  318. gobby/tui/screens/tasks.py +442 -0
  319. gobby/tui/screens/workflows.py +289 -0
  320. gobby/tui/screens/worktrees.py +174 -0
  321. gobby/tui/widgets/__init__.py +21 -0
  322. gobby/tui/widgets/chat.py +210 -0
  323. gobby/tui/widgets/conductor.py +104 -0
  324. gobby/tui/widgets/menu.py +132 -0
  325. gobby/tui/widgets/message_panel.py +160 -0
  326. gobby/tui/widgets/review_gate.py +224 -0
  327. gobby/tui/widgets/task_tree.py +99 -0
  328. gobby/tui/widgets/token_budget.py +166 -0
  329. gobby/tui/ws_client.py +258 -0
  330. gobby/utils/__init__.py +3 -0
  331. gobby/utils/daemon_client.py +235 -0
  332. gobby/utils/git.py +222 -0
  333. gobby/utils/id.py +38 -0
  334. gobby/utils/json_helpers.py +161 -0
  335. gobby/utils/logging.py +376 -0
  336. gobby/utils/machine_id.py +135 -0
  337. gobby/utils/metrics.py +589 -0
  338. gobby/utils/project_context.py +182 -0
  339. gobby/utils/project_init.py +263 -0
  340. gobby/utils/status.py +256 -0
  341. gobby/utils/validation.py +80 -0
  342. gobby/utils/version.py +23 -0
  343. gobby/workflows/__init__.py +4 -0
  344. gobby/workflows/actions.py +1310 -0
  345. gobby/workflows/approval_flow.py +138 -0
  346. gobby/workflows/artifact_actions.py +103 -0
  347. gobby/workflows/audit_helpers.py +110 -0
  348. gobby/workflows/autonomous_actions.py +286 -0
  349. gobby/workflows/context_actions.py +394 -0
  350. gobby/workflows/definitions.py +130 -0
  351. gobby/workflows/detection_helpers.py +208 -0
  352. gobby/workflows/engine.py +485 -0
  353. gobby/workflows/evaluator.py +669 -0
  354. gobby/workflows/git_utils.py +96 -0
  355. gobby/workflows/hooks.py +169 -0
  356. gobby/workflows/lifecycle_evaluator.py +613 -0
  357. gobby/workflows/llm_actions.py +70 -0
  358. gobby/workflows/loader.py +333 -0
  359. gobby/workflows/mcp_actions.py +60 -0
  360. gobby/workflows/memory_actions.py +272 -0
  361. gobby/workflows/premature_stop.py +164 -0
  362. gobby/workflows/session_actions.py +139 -0
  363. gobby/workflows/state_actions.py +123 -0
  364. gobby/workflows/state_manager.py +104 -0
  365. gobby/workflows/stop_signal_actions.py +163 -0
  366. gobby/workflows/summary_actions.py +344 -0
  367. gobby/workflows/task_actions.py +249 -0
  368. gobby/workflows/task_enforcement_actions.py +901 -0
  369. gobby/workflows/templates.py +52 -0
  370. gobby/workflows/todo_actions.py +84 -0
  371. gobby/workflows/webhook.py +223 -0
  372. gobby/workflows/webhook_executor.py +399 -0
  373. gobby/worktrees/__init__.py +5 -0
  374. gobby/worktrees/git.py +690 -0
  375. gobby/worktrees/merge/__init__.py +20 -0
  376. gobby/worktrees/merge/conflict_parser.py +177 -0
  377. gobby/worktrees/merge/resolver.py +485 -0
  378. gobby-0.2.5.dist-info/METADATA +351 -0
  379. gobby-0.2.5.dist-info/RECORD +383 -0
  380. gobby-0.2.5.dist-info/WHEEL +5 -0
  381. gobby-0.2.5.dist-info/entry_points.txt +2 -0
  382. gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
  383. gobby-0.2.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,712 @@
1
+ """
2
+ Task validation module.
3
+
4
+ Handles validating task completion against acceptance criteria
5
+ using LLM providers.
6
+
7
+ Multi-strategy context gathering:
8
+ 1. Current uncommitted changes (staged + unstaged)
9
+ 2. Multi-commit window (last N commits, configurable)
10
+ 3. File-based analysis (read files mentioned in criteria)
11
+
12
+ TODO: Add strategy 4 - codebase grep for test files related to the task.
13
+ Implementation location: get_validation_context_smart() after Strategy 3.
14
+ """
15
+
16
+ import logging
17
+ import re
18
+ import subprocess # nosec B404 - subprocess needed for validation commands
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Literal
22
+
23
+ from gobby.config.app import TaskValidationConfig
24
+ from gobby.config.tasks import PatternCriteriaConfig
25
+ from gobby.llm import LLMService
26
+ from gobby.prompts import PromptLoader
27
+ from gobby.tasks.criteria import PatternCriteriaInjector
28
+ from gobby.utils.json_helpers import extract_json_object
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Default prompts (fallbacks for strangler fig pattern)
33
+ DEFAULT_VALIDATE_PROMPT = """Validate if the following changes satisfy the requirements.
34
+
35
+ Task: {title}
36
+ {category_section}{criteria_text}
37
+
38
+ {changes_section}
39
+ IMPORTANT: Return ONLY a JSON object, nothing else. No explanation, no preamble.
40
+ Format: {{"status": "valid", "feedback": "..."}} or {{"status": "invalid", "feedback": "..."}}
41
+ """
42
+
43
+ DEFAULT_CRITERIA_PROMPT = """Generate validation criteria for this task.
44
+
45
+ Task: {title}
46
+ Description: {description}
47
+
48
+ CRITICAL RULES - You MUST follow these:
49
+ 1. **Only stated requirements** - Include ONLY requirements explicitly written in the title or description
50
+ 2. **No invented values** - Do NOT invent specific numbers, timeouts, thresholds, or limits unless they appear in the task
51
+ 3. **No invented edge cases** - Do NOT add edge cases, error scenarios, or boundary conditions beyond what's described
52
+ 4. **Proportional detail** - Vague tasks get vague criteria; detailed tasks get detailed criteria
53
+ 5. **When in doubt, leave it out** - If something isn't mentioned, don't include it
54
+
55
+ For vague requirements like "fix X" or "add Y", use criteria like:
56
+ - "X no longer produces the reported error/warning"
57
+ - "Y functionality works as expected"
58
+ - "Existing tests continue to pass"
59
+ - "No regressions introduced"
60
+
61
+ DO NOT generate criteria like:
62
+ - "timeout defaults to 30 seconds" (unless 30 seconds is in the task description)
63
+ - "handles edge case Z" (unless Z is mentioned in the task)
64
+ - "logs with format X" (unless that format is specified)
65
+
66
+ Format as markdown checkboxes:
67
+ ## Deliverable
68
+ - [ ] What the task explicitly asks for
69
+
70
+ ## Functional Requirements
71
+ - [ ] Only requirements stated in the description
72
+
73
+ ## Verification
74
+ - [ ] Tests pass (if applicable)
75
+ - [ ] No regressions
76
+ """
77
+
78
+ # Default number of commits to look back when gathering context
79
+ DEFAULT_COMMIT_WINDOW = 10
80
+ DEFAULT_MAX_CHARS = 50000
81
+
82
+
83
+ def run_git_command(
84
+ cmd: list[str],
85
+ cwd: str | Path | None = None,
86
+ timeout: int = 10,
87
+ ) -> subprocess.CompletedProcess[str] | None:
88
+ """Run git command with standardized exception handling.
89
+
90
+ Returns CompletedProcess on success, None on exception (logs debug).
91
+ Caller is responsible for checking returncode and processing stdout.
92
+
93
+ Args:
94
+ cmd: Git command as list of strings (e.g., ["git", "diff"])
95
+ cwd: Working directory for the command
96
+ timeout: Command timeout in seconds (default: 10)
97
+
98
+ Returns:
99
+ CompletedProcess on success, None if exception occurred
100
+ """
101
+ try:
102
+ return subprocess.run( # nosec B603 - cmd passed from internal callers with hardcoded git commands
103
+ cmd,
104
+ capture_output=True,
105
+ text=True,
106
+ timeout=timeout,
107
+ cwd=cwd,
108
+ )
109
+ except Exception as e:
110
+ logger.debug(f"Git command failed ({' '.join(cmd)}): {e}")
111
+ return None
112
+
113
+
114
+ def get_last_commit_diff(
115
+ max_chars: int = DEFAULT_MAX_CHARS,
116
+ cwd: str | Path | None = None,
117
+ ) -> str | None:
118
+ """Get diff from the most recent commit.
119
+
120
+ Args:
121
+ max_chars: Maximum characters to return (truncates if larger)
122
+ cwd: Working directory for git commands (project repo path)
123
+
124
+ Returns:
125
+ Diff string from HEAD~1..HEAD, or None if not available
126
+ """
127
+ result = run_git_command(["git", "diff", "HEAD~1..HEAD"], cwd=cwd)
128
+ if result is None or result.returncode != 0 or not result.stdout.strip():
129
+ return None
130
+
131
+ diff: str = result.stdout
132
+ if len(diff) > max_chars:
133
+ diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
134
+
135
+ return diff
136
+
137
+
138
+ def get_recent_commits(
139
+ n: int = DEFAULT_COMMIT_WINDOW,
140
+ cwd: str | Path | None = None,
141
+ ) -> list[dict[str, str]]:
142
+ """Get list of recent commits with SHA and subject.
143
+
144
+ Args:
145
+ n: Number of commits to retrieve
146
+ cwd: Working directory for git commands (project repo path)
147
+
148
+ Returns:
149
+ List of dicts with 'sha' and 'subject' keys
150
+ """
151
+ result = run_git_command(["git", "log", f"-{n}", "--pretty=format:%H|%s"], cwd=cwd)
152
+ if result is None or result.returncode != 0 or not result.stdout.strip():
153
+ return []
154
+
155
+ commits = []
156
+ for line in result.stdout.strip().split("\n"):
157
+ if "|" in line:
158
+ sha, subject = line.split("|", 1)
159
+ commits.append({"sha": sha, "subject": subject})
160
+
161
+ return commits
162
+
163
+
164
+ def get_multi_commit_diff(
165
+ commit_count: int = DEFAULT_COMMIT_WINDOW,
166
+ max_chars: int = DEFAULT_MAX_CHARS,
167
+ cwd: str | Path | None = None,
168
+ ) -> str | None:
169
+ """Get combined diff from the last N commits.
170
+
171
+ Args:
172
+ commit_count: Number of commits to include in diff
173
+ max_chars: Maximum characters to return
174
+ cwd: Working directory for git commands (project repo path)
175
+
176
+ Returns:
177
+ Combined diff string, or None if not available
178
+ """
179
+ result = run_git_command(["git", "diff", f"HEAD~{commit_count}..HEAD"], cwd=cwd, timeout=30)
180
+ if result is None or result.returncode != 0 or not result.stdout.strip():
181
+ return None
182
+
183
+ diff: str = result.stdout
184
+ if len(diff) > max_chars:
185
+ diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
186
+
187
+ return diff
188
+
189
+
190
+ def get_commits_since(
191
+ since_sha: str,
192
+ max_chars: int = DEFAULT_MAX_CHARS,
193
+ cwd: str | Path | None = None,
194
+ ) -> str | None:
195
+ """Get diff from a specific commit SHA to HEAD.
196
+
197
+ Args:
198
+ since_sha: Starting commit SHA
199
+ max_chars: Maximum characters to return
200
+ cwd: Working directory for git commands (project repo path)
201
+
202
+ Returns:
203
+ Diff string, or None if not available
204
+ """
205
+ result = run_git_command(["git", "diff", f"{since_sha}..HEAD"], cwd=cwd, timeout=30)
206
+ if result is None or result.returncode != 0 or not result.stdout.strip():
207
+ return None
208
+
209
+ diff: str = result.stdout
210
+ if len(diff) > max_chars:
211
+ diff = diff[:max_chars] + "\n\n... [diff truncated] ..."
212
+
213
+ return diff
214
+
215
+
216
+ def extract_file_patterns_from_text(text: str) -> list[str]:
217
+ """Extract file paths and patterns from text (criteria, description, title).
218
+
219
+ Looks for:
220
+ - Explicit file paths (src/foo/bar.py, tests/test_foo.py)
221
+ - Module references (gobby.tasks.validation -> src/gobby/tasks/validation.py)
222
+ - Test patterns (test_validation -> tests/**/test_validation*.py)
223
+
224
+ Args:
225
+ text: Text to search for file patterns
226
+
227
+ Returns:
228
+ List of file path patterns (may include globs)
229
+ """
230
+ patterns: set[str] = set()
231
+
232
+ # Match explicit file paths like src/foo/bar.py or ./tests/test_x.py
233
+ file_path_re = re.compile(r"[./]?[\w\-]+(?:/[\w\-]+)*\.\w+")
234
+ for match in file_path_re.findall(text):
235
+ # Skip URLs and common false positives
236
+ if not match.startswith("http") and not match.startswith("www."):
237
+ patterns.add(match.lstrip("./"))
238
+
239
+ # Match module references like gobby.tasks.validation
240
+ module_re = re.compile(r"\b(gobby(?:\.\w+)+)\b")
241
+ for match in module_re.findall(text):
242
+ # Convert module path to file path
243
+ file_path = "src/" + match.replace(".", "/") + ".py"
244
+ patterns.add(file_path)
245
+
246
+ # Extract test file hints from test_ prefixed words
247
+ test_re = re.compile(r"\btest_(\w+)\b")
248
+ for match in test_re.findall(text):
249
+ patterns.add(f"tests/**/test_{match}*.py")
250
+
251
+ # Extract class/function names and look for their definitions
252
+ class_re = re.compile(r"\b([A-Z][a-zA-Z0-9]+(?:Manager|Validator|Plugin|Handler|Service))\b")
253
+ for match in class_re.findall(text):
254
+ # These could be in any .py file, add as grep pattern hint
255
+ patterns.add(
256
+ f"**/{''.join(c if c.islower() else '_' + c.lower() for c in match).lstrip('_')}*.py"
257
+ )
258
+
259
+ return list(patterns)
260
+
261
+
262
+ def find_matching_files(
263
+ patterns: list[str],
264
+ base_dir: str | Path = ".",
265
+ max_files: int = 10,
266
+ ) -> list[Path]:
267
+ """Find files matching the given patterns.
268
+
269
+ Args:
270
+ patterns: List of file path patterns (may include globs)
271
+ base_dir: Base directory to search from
272
+ max_files: Maximum number of files to return
273
+
274
+ Returns:
275
+ List of Path objects for matching files
276
+ """
277
+ base = Path(base_dir)
278
+ found: list[Path] = []
279
+
280
+ for pattern in patterns:
281
+ if len(found) >= max_files:
282
+ break
283
+
284
+ # Handle glob patterns
285
+ if "*" in pattern:
286
+ try:
287
+ matches = list(base.glob(pattern))
288
+ for match in matches[: max_files - len(found)]:
289
+ if match.is_file() and match not in found:
290
+ found.append(match)
291
+ except Exception as e:
292
+ logger.debug(f"Failed to glob pattern {pattern}: {e}")
293
+ else:
294
+ # Direct file path
295
+ path = base / pattern
296
+ if path.is_file() and path not in found:
297
+ found.append(path)
298
+
299
+ return found
300
+
301
+
302
+ def read_files_content(
303
+ files: list[Path],
304
+ max_chars: int = DEFAULT_MAX_CHARS,
305
+ ) -> str:
306
+ """Read content from multiple files.
307
+
308
+ Args:
309
+ files: List of file paths to read
310
+ max_chars: Maximum total characters to return
311
+
312
+ Returns:
313
+ Concatenated file contents with headers
314
+ """
315
+ content_parts: list[str] = []
316
+ total_chars = 0
317
+
318
+ for file_path in files:
319
+ if total_chars >= max_chars:
320
+ content_parts.append("\n... [additional files truncated] ...")
321
+ break
322
+
323
+ try:
324
+ content = file_path.read_text(encoding="utf-8")
325
+ remaining = max_chars - total_chars
326
+
327
+ if len(content) > remaining:
328
+ content = content[:remaining] + "\n... [file truncated] ..."
329
+
330
+ content_parts.append(f"=== {file_path} ===\n{content}\n")
331
+ total_chars += len(content)
332
+
333
+ except Exception as e:
334
+ logger.debug(f"Failed to read {file_path}: {e}")
335
+ content_parts.append(f"=== {file_path} ===\n(Error reading file: {e})\n")
336
+
337
+ return "\n".join(content_parts)
338
+
339
+
340
+ def get_validation_context_smart(
341
+ task_title: str,
342
+ validation_criteria: str | None = None,
343
+ task_description: str | None = None,
344
+ commit_window: int = DEFAULT_COMMIT_WINDOW,
345
+ max_chars: int = DEFAULT_MAX_CHARS,
346
+ cwd: str | Path | None = None,
347
+ ) -> str | None:
348
+ """Gather validation context using multiple strategies.
349
+
350
+ Multi-strategy context gathering:
351
+ 1. Current uncommitted changes (staged + unstaged)
352
+ 2. Multi-commit window (last N commits, configurable)
353
+ 3. File-based analysis (read files mentioned in criteria)
354
+
355
+ TODO: Add strategy 4 - codebase grep for test files related to the task.
356
+ Implementation location: after Strategy 3 below.
357
+
358
+ Args:
359
+ task_title: Task title for context
360
+ validation_criteria: Validation criteria text
361
+ task_description: Task description text
362
+ commit_window: Number of commits to look back
363
+ max_chars: Maximum characters to return
364
+ cwd: Working directory for git commands (project repo path)
365
+
366
+ Returns:
367
+ Validation context string, or None if nothing found
368
+ """
369
+ context_parts: list[str] = []
370
+ remaining_chars = max_chars
371
+
372
+ # Strategy 1: Current uncommitted changes
373
+ staged = run_git_command(["git", "diff", "--cached"], cwd=cwd)
374
+ if staged and staged.stdout.strip():
375
+ content = staged.stdout[: remaining_chars // 2]
376
+ context_parts.append(f"=== STAGED CHANGES ===\n{content}")
377
+ remaining_chars -= len(content)
378
+
379
+ unstaged = run_git_command(["git", "diff"], cwd=cwd)
380
+ if unstaged and unstaged.stdout.strip():
381
+ content = unstaged.stdout[: remaining_chars // 2]
382
+ context_parts.append(f"=== UNSTAGED CHANGES ===\n{content}")
383
+ remaining_chars -= len(content)
384
+
385
+ # Strategy 2: Multi-commit window
386
+ if remaining_chars > 5000: # Only if we have room
387
+ multi_diff = get_multi_commit_diff(commit_window, remaining_chars // 2, cwd=cwd)
388
+ if multi_diff:
389
+ # Get commit list for context
390
+ commits = get_recent_commits(commit_window, cwd=cwd)
391
+ commit_summary = "\n".join(
392
+ f" - {c['sha'][:8]}: {c['subject'][:60]}" for c in commits[:5]
393
+ )
394
+
395
+ context_parts.append(
396
+ f"=== RECENT COMMITS (last {commit_window}) ===\n"
397
+ f"{commit_summary}\n\n"
398
+ f"=== COMBINED DIFF ===\n{multi_diff}"
399
+ )
400
+ remaining_chars -= len(multi_diff) + len(commit_summary)
401
+
402
+ # Strategy 3: File-based analysis
403
+ if remaining_chars > 2000:
404
+ # Extract file patterns from task info
405
+ search_text = f"{task_title} {validation_criteria or ''} {task_description or ''}"
406
+ patterns = extract_file_patterns_from_text(search_text)
407
+
408
+ if patterns:
409
+ files = find_matching_files(patterns, base_dir=cwd or ".", max_files=5)
410
+ if files:
411
+ file_content = read_files_content(files, remaining_chars)
412
+ context_parts.append(f"=== RELEVANT FILES ===\n{file_content}")
413
+
414
+ if not context_parts:
415
+ return None
416
+
417
+ combined = "\n\n".join(context_parts)
418
+ if len(combined) > max_chars:
419
+ combined = combined[:max_chars] + "\n\n... [context truncated] ..."
420
+
421
+ return combined
422
+
423
+
424
+ def get_git_diff(
425
+ max_chars: int = 50000,
426
+ fallback_to_last_commit: bool = True,
427
+ cwd: str | Path | None = None,
428
+ ) -> str | None:
429
+ """Get changes from git for validation.
430
+
431
+ First checks for uncommitted changes (staged + unstaged).
432
+ If none found and fallback_to_last_commit is True, returns the last commit's diff.
433
+
434
+ Args:
435
+ max_chars: Maximum characters to return (truncates if larger)
436
+ fallback_to_last_commit: If True, fall back to last commit diff when no uncommitted changes
437
+ cwd: Working directory for git commands (project repo path)
438
+
439
+ Returns:
440
+ Combined diff string, or None if not in git repo or no changes
441
+ """
442
+ unstaged = run_git_command(["git", "diff"], cwd=cwd)
443
+ staged = run_git_command(["git", "diff", "--cached"], cwd=cwd)
444
+
445
+ # Check if both commands failed (not in git repo or git error)
446
+ unstaged_failed = unstaged is None or unstaged.returncode != 0
447
+ staged_failed = staged is None or staged.returncode != 0
448
+ if unstaged_failed and staged_failed:
449
+ return None
450
+
451
+ diff_parts = []
452
+ if staged and staged.stdout.strip():
453
+ diff_parts.append("=== STAGED CHANGES ===\n" + staged.stdout)
454
+ if unstaged and unstaged.stdout.strip():
455
+ diff_parts.append("=== UNSTAGED CHANGES ===\n" + unstaged.stdout)
456
+
457
+ # If no uncommitted changes, try last commit
458
+ if not diff_parts and fallback_to_last_commit:
459
+ last_commit_diff = get_last_commit_diff(max_chars, cwd=cwd)
460
+ if last_commit_diff:
461
+ return f"=== LAST COMMIT ===\n{last_commit_diff}"
462
+ return None
463
+
464
+ if not diff_parts:
465
+ return None
466
+
467
+ combined = "\n".join(diff_parts)
468
+ if len(combined) > max_chars:
469
+ combined = combined[:max_chars] + "\n\n... [diff truncated] ..."
470
+
471
+ return combined
472
+
473
+
474
+ @dataclass
475
+ class ValidationResult:
476
+ """Result of task validation."""
477
+
478
+ status: Literal["valid", "invalid", "pending"]
479
+ feedback: str | None = None
480
+
481
+
482
+ class TaskValidator:
483
+ """Validates task completion using LLM."""
484
+
485
+ def __init__(
486
+ self,
487
+ config: TaskValidationConfig,
488
+ llm_service: LLMService,
489
+ project_dir: Path | None = None,
490
+ ):
491
+ self.config = config
492
+ self.llm_service = llm_service
493
+ self._loader = PromptLoader(project_dir=project_dir)
494
+
495
+ # Register fallbacks for strangler fig pattern
496
+ self._loader.register_fallback("validation/validate", lambda: DEFAULT_VALIDATE_PROMPT)
497
+ self._loader.register_fallback("validation/criteria", lambda: DEFAULT_CRITERIA_PROMPT)
498
+
499
+ async def gather_validation_context(self, file_paths: list[str]) -> str:
500
+ """
501
+ Gather context for validation from files.
502
+
503
+ Args:
504
+ file_paths: List of absolute file paths to read.
505
+
506
+ Returns:
507
+ Concatenated file contents.
508
+ """
509
+ context: list[str] = []
510
+ for path in file_paths:
511
+ try:
512
+ with open(path, encoding="utf-8") as f:
513
+ content = f.read()
514
+ context.append(f"--- {path} ---\n{content}\n")
515
+ except Exception as e:
516
+ logger.warning(f"Failed to read file {path} for validation: {e}")
517
+ context.append(f"--- {path} ---\n(Error reading file: {e})\n")
518
+ return "\n".join(context)
519
+
520
+ async def validate_task(
521
+ self,
522
+ task_id: str,
523
+ title: str,
524
+ description: str | None,
525
+ changes_summary: str,
526
+ validation_criteria: str | None = None,
527
+ context_files: list[str] | None = None,
528
+ category: str | None = None,
529
+ ) -> ValidationResult:
530
+ """
531
+ Validate task completion.
532
+
533
+ Args:
534
+ task_id: Task ID
535
+ title: Task title
536
+ description: Task description (used as fallback if no validation_criteria)
537
+ changes_summary: Summary of changes made (files, diffs, etc.)
538
+ validation_criteria: Specific criteria to validate against (optional)
539
+ context_files: List of files to read for context (optional)
540
+ category: Task domain category (e.g., 'manual', 'code', 'test')
541
+
542
+ Returns:
543
+ ValidationResult with status and feedback
544
+ """
545
+ if not self.config.enabled:
546
+ return ValidationResult(status="pending", feedback="Validation disabled")
547
+
548
+ if not description and not validation_criteria:
549
+ logger.warning(f"Cannot validate task {task_id}: missing description and criteria")
550
+ return ValidationResult(
551
+ status="pending", feedback="Missing task description and validation criteria"
552
+ )
553
+
554
+ logger.info(f"Validating task {task_id}: {title}")
555
+
556
+ # Gather context if provided
557
+ file_context = ""
558
+ if context_files:
559
+ file_context = await self.gather_validation_context(context_files)
560
+
561
+ # Build prompt
562
+ criteria_text = (
563
+ f"Validation Criteria:\n{validation_criteria}"
564
+ if validation_criteria
565
+ else f"Task Description:\n{description}"
566
+ )
567
+
568
+ # Detect if changes_summary is a git diff
569
+ is_git_diff = changes_summary.startswith("Git diff") or "@@" in changes_summary
570
+
571
+ if is_git_diff:
572
+ changes_section = (
573
+ "Code Changes (git diff):\n"
574
+ "Analyze these ACTUAL code changes to verify the implementation.\n\n"
575
+ f"{changes_summary}\n\n"
576
+ )
577
+ else:
578
+ changes_section = f"Changes Summary:\n{changes_summary}\n\n"
579
+
580
+ # Build test strategy section if provided
581
+ category_section = ""
582
+ if category:
583
+ category_section = f"Test Strategy: {category}\n"
584
+ if category.lower() == "manual":
585
+ category_section += (
586
+ "NOTE: This task uses MANUAL testing. Do NOT require automated test files. "
587
+ "Validation should focus on whether the implementation is correct, "
588
+ "not whether automated tests exist.\n\n"
589
+ )
590
+ else:
591
+ category_section += "\n"
592
+
593
+ # Build prompt using PromptLoader or legacy config
594
+ if self.config.prompt:
595
+ # Legacy inline config (deprecated)
596
+ prompt = self.config.prompt
597
+ if file_context:
598
+ prompt += f"\nFile Context:\n{file_context[:50000]}\n"
599
+ else:
600
+ # Use PromptLoader
601
+ prompt_path = self.config.prompt_path or "validation/validate"
602
+ template_context = {
603
+ "title": title,
604
+ "category_section": category_section,
605
+ "criteria_text": criteria_text,
606
+ "changes_section": changes_section,
607
+ "file_context": file_context[:50000] if file_context else "",
608
+ }
609
+ try:
610
+ prompt = self._loader.render(prompt_path, template_context)
611
+ except FileNotFoundError:
612
+ logger.debug(f"Prompt template '{prompt_path}' not found, using fallback")
613
+ prompt = DEFAULT_VALIDATE_PROMPT.format(**template_context)
614
+ if file_context:
615
+ prompt += f"\nFile Context:\n{file_context[:50000]}\n"
616
+
617
+ try:
618
+ provider = self.llm_service.get_provider(self.config.provider)
619
+ response_content = await provider.generate_text(
620
+ prompt=prompt,
621
+ system_prompt=self.config.system_prompt,
622
+ model=self.config.model,
623
+ )
624
+
625
+ if not response_content or not response_content.strip():
626
+ logger.warning(f"Empty LLM response for task {task_id} validation")
627
+ return ValidationResult(
628
+ status="pending", feedback="Validation failed: Empty response from LLM"
629
+ )
630
+
631
+ logger.debug(f"Validation LLM response for {task_id}: {response_content[:200]}...")
632
+
633
+ # Extract JSON using shared utility
634
+ result_data = extract_json_object(response_content)
635
+ if result_data is None:
636
+ logger.warning(f"Failed to parse JSON from validation response for {task_id}")
637
+ return ValidationResult(
638
+ status="pending", feedback="Validation failed: Could not parse response"
639
+ )
640
+
641
+ return ValidationResult(
642
+ status=result_data.get("status", "pending"), feedback=result_data.get("feedback")
643
+ )
644
+
645
+ except Exception as e:
646
+ logger.error(f"Failed to validate task {task_id}: {e}")
647
+ return ValidationResult(status="pending", feedback=f"Validation failed: {str(e)}")
648
+
649
+ async def generate_criteria(
650
+ self,
651
+ title: str,
652
+ description: str | None = None,
653
+ labels: list[str] | None = None,
654
+ ) -> str | None:
655
+ """
656
+ Generate validation criteria from task title and description.
657
+
658
+ When labels are provided (e.g., 'tdd', 'strangler-fig', 'refactoring'),
659
+ pattern-specific criteria from PatternCriteriaConfig are appended to
660
+ the LLM-generated criteria.
661
+
662
+ Args:
663
+ title: Task title
664
+ description: Task description (optional)
665
+ labels: Task labels for pattern criteria injection (optional)
666
+
667
+ Returns:
668
+ Generated validation criteria string, or None if generation fails
669
+ """
670
+ if not self.config.enabled:
671
+ return None
672
+
673
+ # Build prompt using PromptLoader or legacy config
674
+ template_context = {
675
+ "title": title,
676
+ "description": description or "(no description)",
677
+ }
678
+
679
+ if self.config.criteria_prompt:
680
+ # Legacy inline config (deprecated)
681
+ prompt = self.config.criteria_prompt.format(**template_context)
682
+ else:
683
+ # Use PromptLoader
684
+ prompt_path = self.config.criteria_prompt_path or "validation/criteria"
685
+ try:
686
+ prompt = self._loader.render(prompt_path, template_context)
687
+ except FileNotFoundError:
688
+ logger.debug(f"Prompt template '{prompt_path}' not found, using fallback")
689
+ prompt = DEFAULT_CRITERIA_PROMPT.format(**template_context)
690
+
691
+ try:
692
+ provider = self.llm_service.get_provider(self.config.provider)
693
+ response = await provider.generate_text(
694
+ prompt=prompt,
695
+ system_prompt=self.config.criteria_system_prompt,
696
+ model=self.config.model,
697
+ )
698
+ llm_criteria = response.strip()
699
+
700
+ # Inject pattern-specific criteria if labels are provided
701
+ if labels:
702
+ pattern_config = PatternCriteriaConfig()
703
+ injector = PatternCriteriaInjector(pattern_config=pattern_config)
704
+ pattern_criteria = injector.inject_for_labels(labels=labels)
705
+
706
+ if pattern_criteria:
707
+ llm_criteria = f"{llm_criteria}\n\n{pattern_criteria}"
708
+
709
+ return llm_criteria
710
+ except Exception as e:
711
+ logger.error(f"Failed to generate validation criteria: {e}")
712
+ return None