foundry-mcp 0.3.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. foundry_mcp/__init__.py +7 -1
  2. foundry_mcp/cli/commands/plan.py +10 -3
  3. foundry_mcp/cli/commands/review.py +19 -4
  4. foundry_mcp/cli/commands/specs.py +38 -208
  5. foundry_mcp/cli/output.py +3 -3
  6. foundry_mcp/config.py +235 -5
  7. foundry_mcp/core/ai_consultation.py +146 -9
  8. foundry_mcp/core/discovery.py +6 -6
  9. foundry_mcp/core/error_store.py +2 -2
  10. foundry_mcp/core/intake.py +933 -0
  11. foundry_mcp/core/llm_config.py +20 -2
  12. foundry_mcp/core/metrics_store.py +2 -2
  13. foundry_mcp/core/progress.py +70 -0
  14. foundry_mcp/core/prompts/fidelity_review.py +149 -4
  15. foundry_mcp/core/prompts/markdown_plan_review.py +5 -1
  16. foundry_mcp/core/prompts/plan_review.py +5 -1
  17. foundry_mcp/core/providers/claude.py +6 -47
  18. foundry_mcp/core/providers/codex.py +6 -57
  19. foundry_mcp/core/providers/cursor_agent.py +3 -44
  20. foundry_mcp/core/providers/gemini.py +6 -57
  21. foundry_mcp/core/providers/opencode.py +35 -5
  22. foundry_mcp/core/research/__init__.py +68 -0
  23. foundry_mcp/core/research/memory.py +425 -0
  24. foundry_mcp/core/research/models.py +437 -0
  25. foundry_mcp/core/research/workflows/__init__.py +22 -0
  26. foundry_mcp/core/research/workflows/base.py +204 -0
  27. foundry_mcp/core/research/workflows/chat.py +271 -0
  28. foundry_mcp/core/research/workflows/consensus.py +396 -0
  29. foundry_mcp/core/research/workflows/ideate.py +682 -0
  30. foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
  31. foundry_mcp/core/responses.py +450 -0
  32. foundry_mcp/core/spec.py +2438 -236
  33. foundry_mcp/core/task.py +1064 -19
  34. foundry_mcp/core/testing.py +512 -123
  35. foundry_mcp/core/validation.py +313 -42
  36. foundry_mcp/dashboard/components/charts.py +0 -57
  37. foundry_mcp/dashboard/launcher.py +11 -0
  38. foundry_mcp/dashboard/views/metrics.py +25 -35
  39. foundry_mcp/dashboard/views/overview.py +1 -65
  40. foundry_mcp/resources/specs.py +25 -25
  41. foundry_mcp/schemas/intake-schema.json +89 -0
  42. foundry_mcp/schemas/sdd-spec-schema.json +33 -5
  43. foundry_mcp/server.py +38 -0
  44. foundry_mcp/tools/unified/__init__.py +4 -2
  45. foundry_mcp/tools/unified/authoring.py +2423 -267
  46. foundry_mcp/tools/unified/documentation_helpers.py +69 -6
  47. foundry_mcp/tools/unified/environment.py +235 -6
  48. foundry_mcp/tools/unified/error.py +18 -1
  49. foundry_mcp/tools/unified/lifecycle.py +8 -0
  50. foundry_mcp/tools/unified/plan.py +113 -1
  51. foundry_mcp/tools/unified/research.py +658 -0
  52. foundry_mcp/tools/unified/review.py +370 -16
  53. foundry_mcp/tools/unified/spec.py +367 -0
  54. foundry_mcp/tools/unified/task.py +1163 -48
  55. foundry_mcp/tools/unified/test.py +69 -8
  56. {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/METADATA +7 -1
  57. {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/RECORD +60 -48
  58. {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/WHEEL +0 -0
  59. {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/entry_points.txt +0 -0
  60. {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -860,16 +860,18 @@ def reset_workflow_config() -> None:
860
860
  class WorkflowConsultationConfig:
861
861
  """Per-workflow consultation configuration overrides.
862
862
 
863
- Allows individual workflows to specify minimum model requirements
864
- and timeout overrides for AI consultations.
863
+ Allows individual workflows to specify minimum model requirements,
864
+ timeout overrides, and default review types for AI consultations.
865
865
 
866
866
  TOML Configuration Example:
867
867
  [consultation.workflows.fidelity_review]
868
868
  min_models = 2
869
869
  timeout_override = 600.0
870
+ default_review_type = "full"
870
871
 
871
872
  [consultation.workflows.plan_review]
872
873
  min_models = 3
874
+ default_review_type = "full"
873
875
 
874
876
  Attributes:
875
877
  min_models: Minimum number of models required for consensus (default: 1).
@@ -878,10 +880,17 @@ class WorkflowConsultationConfig:
878
880
  timeout_override: Optional timeout override in seconds. When set,
879
881
  overrides the default_timeout from ConsultationConfig
880
882
  for this specific workflow.
883
+ default_review_type: Default review type for this workflow (default: "full").
884
+ Valid values: "quick", "full", "security", "feasibility".
885
+ Used when no explicit review_type is provided in requests.
881
886
  """
882
887
 
883
888
  min_models: int = 1
884
889
  timeout_override: Optional[float] = None
890
+ default_review_type: str = "full"
891
+
892
+ # Valid review types
893
+ VALID_REVIEW_TYPES = {"quick", "full", "security", "feasibility"}
885
894
 
886
895
  def validate(self) -> None:
887
896
  """Validate the workflow consultation configuration.
@@ -897,6 +906,12 @@ class WorkflowConsultationConfig:
897
906
  f"timeout_override must be positive if set, got {self.timeout_override}"
898
907
  )
899
908
 
909
+ if self.default_review_type not in self.VALID_REVIEW_TYPES:
910
+ raise ValueError(
911
+ f"default_review_type must be one of {sorted(self.VALID_REVIEW_TYPES)}, "
912
+ f"got '{self.default_review_type}'"
913
+ )
914
+
900
915
  @classmethod
901
916
  def from_dict(cls, data: Dict[str, Any]) -> "WorkflowConsultationConfig":
902
917
  """Create WorkflowConsultationConfig from a dictionary.
@@ -917,6 +932,9 @@ class WorkflowConsultationConfig:
917
932
  if value is not None:
918
933
  config.timeout_override = float(value)
919
934
 
935
+ if "default_review_type" in data:
936
+ config.default_review_type = str(data["default_review_type"]).lower()
937
+
920
938
  return config
921
939
 
922
940
 
@@ -180,7 +180,7 @@ class FileMetricsStore(MetricsStore):
180
180
  for efficient querying. Thread-safe with file locking for concurrent access.
181
181
 
182
182
  Directory structure:
183
- .cache/foundry-mcp/metrics/
183
+ ~/.foundry-mcp/metrics/
184
184
  metrics.jsonl - Append-only metrics log
185
185
  index.json - Metric name -> metadata mapping
186
186
  """
@@ -628,7 +628,7 @@ def get_metrics_store(storage_path: Optional[str | Path] = None) -> MetricsStore
628
628
  if _metrics_store is None:
629
629
  if storage_path is None:
630
630
  # Default path
631
- storage_path = Path.home() / ".cache" / "foundry-mcp" / "metrics"
631
+ storage_path = Path.home() / ".foundry-mcp" / "metrics"
632
632
  _metrics_store = FileMetricsStore(storage_path)
633
633
 
634
634
  return _metrics_store
@@ -315,3 +315,73 @@ def get_task_counts_by_status(spec_data: Dict[str, Any]) -> Dict[str, int]:
315
315
  counts[status] += 1
316
316
 
317
317
  return counts
318
+
319
+
320
+ def sync_computed_fields(spec_data: Dict[str, Any]) -> Dict[str, Any]:
321
+ """
322
+ Synchronize computed fields to their canonical top-level locations.
323
+
324
+ This function should be called after any task status change to ensure
325
+ progress_percentage, current_phase, and status are persisted to the spec.
326
+
327
+ Updates (in-place):
328
+ - progress_percentage: calculated from hierarchy counts
329
+ - current_phase: first in_progress phase, or first pending if none
330
+ - status: based on overall progress
331
+
332
+ Args:
333
+ spec_data: Spec data dictionary (modified in place)
334
+
335
+ Returns:
336
+ Dict with computed values for confirmation
337
+ """
338
+ if not spec_data:
339
+ return {}
340
+
341
+ hierarchy = spec_data.get("hierarchy", {})
342
+ root = hierarchy.get("spec-root", {})
343
+
344
+ # Calculate progress percentage
345
+ total = root.get("total_tasks", 0)
346
+ completed = root.get("completed_tasks", 0)
347
+ progress_pct = int((completed / total * 100)) if total > 0 else 0
348
+
349
+ # Determine current phase (first in_progress, or first pending if none)
350
+ current_phase = None
351
+ for key, node in hierarchy.items():
352
+ if node.get("type") == "phase":
353
+ if node.get("status") == "in_progress":
354
+ current_phase = key
355
+ break
356
+ elif current_phase is None and node.get("status") == "pending":
357
+ current_phase = key
358
+
359
+ # Determine overall status based on progress
360
+ if total == 0:
361
+ status = "pending"
362
+ elif completed == total:
363
+ status = "completed"
364
+ elif completed > 0:
365
+ status = "in_progress"
366
+ else:
367
+ status = "pending"
368
+
369
+ # Check if any task is blocked - if so, spec is blocked
370
+ for node in hierarchy.values():
371
+ if node.get("status") == "blocked":
372
+ status = "blocked"
373
+ break
374
+
375
+ # Update top-level fields (canonical location)
376
+ spec_data["progress_percentage"] = progress_pct
377
+ spec_data["current_phase"] = current_phase
378
+ spec_data["status"] = status
379
+
380
+ # Update last_updated timestamp
381
+ spec_data["last_updated"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
382
+
383
+ return {
384
+ "progress_percentage": progress_pct,
385
+ "current_phase": current_phase,
386
+ "status": status
387
+ }
@@ -9,6 +9,7 @@ Prompt IDs (PromptTemplate-based):
9
9
  - FIDELITY_REVIEW_V1: Main 6-section fidelity review prompt
10
10
  - FIDELITY_DEVIATION_ANALYSIS_V1: Analyze identified deviations
11
11
  - FIDELITY_COMPLIANCE_SUMMARY_V1: Generate compliance summary
12
+ - FIDELITY_SYNTHESIS_PROMPT_V1: Multi-model response synthesis
12
13
 
13
14
  Legacy Prompt IDs (string templates for backward compatibility):
14
15
  - review_task: Compare task implementation against spec requirements
@@ -66,6 +67,65 @@ FIDELITY_RESPONSE_SCHEMA = """{
66
67
  }"""
67
68
 
68
69
 
70
+ # JSON response schema for synthesized multi-model fidelity reviews
71
+ FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA = """{
72
+ "verdict": "pass|fail|partial|unknown",
73
+ "verdict_consensus": {
74
+ "votes": {
75
+ "pass": ["model names that voted pass"],
76
+ "fail": ["model names that voted fail"],
77
+ "partial": ["model names that voted partial"],
78
+ "unknown": ["model names that voted unknown"]
79
+ },
80
+ "agreement_level": "strong|moderate|weak|conflicted",
81
+ "notes": "Explanation of verdict determination"
82
+ },
83
+ "summary": "Synthesized overall findings.",
84
+ "requirement_alignment": {
85
+ "answer": "yes|no|partial",
86
+ "details": "Synthesized alignment assessment.",
87
+ "model_agreement": "unanimous|majority|split"
88
+ },
89
+ "success_criteria": {
90
+ "met": "yes|no|partial",
91
+ "details": "Synthesized verification status.",
92
+ "model_agreement": "unanimous|majority|split"
93
+ },
94
+ "deviations": [
95
+ {
96
+ "description": "Merged deviation description",
97
+ "justification": "Combined rationale",
98
+ "severity": "critical|high|medium|low",
99
+ "identified_by": ["model names that identified this"],
100
+ "agreement": "unanimous|majority|single"
101
+ }
102
+ ],
103
+ "test_coverage": {
104
+ "status": "sufficient|insufficient|not_applicable",
105
+ "details": "Synthesized test assessment",
106
+ "model_agreement": "unanimous|majority|split"
107
+ },
108
+ "code_quality": {
109
+ "issues": ["Merged quality concerns with model attribution"],
110
+ "details": "Synthesized commentary"
111
+ },
112
+ "documentation": {
113
+ "status": "adequate|inadequate|not_applicable",
114
+ "details": "Synthesized doc assessment",
115
+ "model_agreement": "unanimous|majority|split"
116
+ },
117
+ "issues": ["Deduplicated issues with model attribution"],
118
+ "recommendations": ["Prioritized actionable steps"],
119
+ "synthesis_metadata": {
120
+ "models_consulted": ["all model names"],
121
+ "models_succeeded": ["successful model names"],
122
+ "models_failed": ["failed model names"],
123
+ "synthesis_provider": "model that performed synthesis",
124
+ "agreement_level": "strong|moderate|weak|conflicted"
125
+ }
126
+ }"""
127
+
128
+
69
129
  # =============================================================================
70
130
  # Severity Categorization Keywords
71
131
  # =============================================================================
@@ -166,6 +226,9 @@ CRITICAL CONSTRAINTS:
166
226
  - This is a READ-ONLY review - you MUST NOT write, create, or modify ANY files
167
227
  - Execute code or commands - ANALYSIS ONLY
168
228
  - Provide findings as structured JSON in your response
229
+ - Do NOT focus on ownership, responsibility, or team assignment concerns
230
+ - Avoid feedback like "who owns", "who verifies", "who is responsible for"
231
+ - Focus on technical requirements and verification steps themselves, not who performs them
169
232
 
170
233
  Focus on:
171
234
  1. Requirement alignment - Does implementation match spec?
@@ -430,6 +493,82 @@ Respond with valid JSON:
430
493
  )
431
494
 
432
495
 
496
+ # Multi-model synthesis prompt - consolidates multiple fidelity reviews
497
+ FIDELITY_SYNTHESIS_PROMPT_V1 = PromptTemplate(
498
+ id="FIDELITY_SYNTHESIS_PROMPT_V1",
499
+ version="1.0",
500
+ system_prompt="""You are an expert at synthesizing multiple fidelity review results.
501
+ Your task is to consolidate diverse perspectives into actionable consensus while preserving JSON format.
502
+
503
+ Guidelines:
504
+ - Attribute findings to specific models using the identified_by field
505
+ - Merge similar deviations, noting which models identified each
506
+ - Resolve verdict disagreements using majority vote or escalate to "partial" on conflict
507
+ - Preserve unique insights from each model
508
+ - Output valid JSON matching the required schema exactly
509
+ - Do NOT focus on ownership, responsibility, or team assignment concerns
510
+ - Focus on technical requirements and verification steps themselves, not who performs them""",
511
+ user_template="""You are synthesizing {num_models} independent AI fidelity reviews.
512
+
513
+ **Specification:** {spec_title} (`{spec_id}`)
514
+ **Review Scope:** {review_scope}
515
+
516
+ **Your Task:** Read all JSON reviews below and create a unified synthesis.
517
+
518
+ ## Individual Model Reviews
519
+
520
+ {model_reviews}
521
+
522
+ ## Synthesis Requirements
523
+
524
+ 1. **Verdict Consensus:**
525
+ - Count votes for each verdict (pass/fail/partial/unknown)
526
+ - Use majority vote for final verdict
527
+ - If tied or conflicted, use "partial" and note disagreement
528
+ - Record agreement_level: "strong" (all agree), "moderate" (majority agrees), "weak" (slight majority), "conflicted" (tied/split)
529
+
530
+ 2. **Deviation Merging:**
531
+ - Group similar deviations across models by description
532
+ - Use highest severity when models disagree on severity
533
+ - Track which models identified each deviation in identified_by array
534
+ - Mark agreement: "unanimous" (all models), "majority" (>50%), "single" (one model)
535
+
536
+ 3. **Issue Consolidation:**
537
+ - Deduplicate issues across models
538
+ - Preserve unique insights
539
+ - Note model agreement level for each finding
540
+
541
+ 4. **Attribution Rules:**
542
+ - "unanimous" = all successful models agree
543
+ - "majority" = >50% of successful models agree
544
+ - "single" = only one model identified this
545
+
546
+ ### Required Response Format
547
+
548
+ Respond **only** with valid JSON matching the schema below. Do not include Markdown, prose, or additional commentary outside the JSON object.
549
+
550
+ ```json
551
+ {response_schema}
552
+ ```
553
+
554
+ Rules:
555
+ - Use lowercase values for enumerated fields (verdict, status, severity, etc.)
556
+ - Keep arrays as arrays (use [] when empty)
557
+ - Populate identified_by with actual model names from the reviews
558
+ - Never omit required fields from the schema
559
+ - Use the actual provider names from the reviews (e.g., "gemini", "codex", "claude")""",
560
+ required_context=["spec_id", "spec_title", "review_scope", "num_models", "model_reviews"],
561
+ optional_context=["response_schema"],
562
+ metadata={
563
+ "workflow": "fidelity_review",
564
+ "author": "system",
565
+ "category": "synthesis",
566
+ "output_format": "json",
567
+ "description": "Multi-model fidelity review synthesis",
568
+ },
569
+ )
570
+
571
+
433
572
  # =============================================================================
434
573
  # Template Registry (PromptTemplate-based)
435
574
  # =============================================================================
@@ -439,6 +578,7 @@ FIDELITY_REVIEW_TEMPLATES: Dict[str, PromptTemplate] = {
439
578
  "FIDELITY_REVIEW_V1": FIDELITY_REVIEW_V1,
440
579
  "FIDELITY_DEVIATION_ANALYSIS_V1": FIDELITY_DEVIATION_ANALYSIS_V1,
441
580
  "FIDELITY_COMPLIANCE_SUMMARY_V1": FIDELITY_COMPLIANCE_SUMMARY_V1,
581
+ "FIDELITY_SYNTHESIS_PROMPT_V1": FIDELITY_SYNTHESIS_PROMPT_V1,
442
582
  }
443
583
 
444
584
 
@@ -475,7 +615,7 @@ class FidelityReviewPromptBuilder(PromptBuilder):
475
615
  Args:
476
616
  prompt_id: Template identifier. Supports:
477
617
  - PromptTemplate IDs: FIDELITY_REVIEW_V1, FIDELITY_DEVIATION_ANALYSIS_V1,
478
- FIDELITY_COMPLIANCE_SUMMARY_V1
618
+ FIDELITY_COMPLIANCE_SUMMARY_V1, FIDELITY_SYNTHESIS_PROMPT_V1
479
619
  context: Template context variables
480
620
 
481
621
  Returns:
@@ -491,9 +631,12 @@ class FidelityReviewPromptBuilder(PromptBuilder):
491
631
  # Provide defaults for optional context
492
632
  render_context = dict(context)
493
633
 
494
- # Add response schema default
634
+ # Add response schema default - use synthesized schema for synthesis prompt
495
635
  if "response_schema" not in render_context:
496
- render_context["response_schema"] = FIDELITY_RESPONSE_SCHEMA
636
+ if prompt_id == "FIDELITY_SYNTHESIS_PROMPT_V1":
637
+ render_context["response_schema"] = FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA
638
+ else:
639
+ render_context["response_schema"] = FIDELITY_RESPONSE_SCHEMA
497
640
 
498
641
  # Add empty defaults for optional fields
499
642
  if "spec_description" not in render_context:
@@ -531,10 +674,12 @@ __all__ = [
531
674
  "FIDELITY_REVIEW_V1",
532
675
  "FIDELITY_DEVIATION_ANALYSIS_V1",
533
676
  "FIDELITY_COMPLIANCE_SUMMARY_V1",
677
+ "FIDELITY_SYNTHESIS_PROMPT_V1",
534
678
  # Template registries
535
679
  "FIDELITY_REVIEW_TEMPLATES",
536
- # Response schema
680
+ # Response schemas
537
681
  "FIDELITY_RESPONSE_SCHEMA",
682
+ "FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA",
538
683
  # Severity keywords
539
684
  "SEVERITY_KEYWORDS",
540
685
  "CRITICAL_KEYWORDS",
@@ -71,6 +71,7 @@ What the plan does well.
71
71
  - Include all sections even if empty (write "None identified" for empty sections)
72
72
  - Be specific and actionable in all feedback
73
73
  - For clarity issues, use Questions section rather than creating a separate category
74
+ - Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
74
75
  """
75
76
 
76
77
 
@@ -89,7 +90,10 @@ Guidelines:
89
90
  - Propose alternatives when better approaches exist
90
91
  - Focus on impact and prioritize feedback by potential consequences
91
92
  - Be collaborative, not adversarial
92
- - Remember: this is an early-stage plan, not a final spec"""
93
+ - Remember: this is an early-stage plan, not a final spec
94
+ - Do NOT focus on ownership, responsibility, or team assignment concerns
95
+ - Avoid feedback like "who owns", "who verifies", "who is responsible for"
96
+ - Focus on technical requirements and verification steps themselves, not who performs them"""
93
97
 
94
98
 
95
99
  # =============================================================================
@@ -78,6 +78,7 @@ What the spec does well.
78
78
  - Be specific and actionable in all feedback
79
79
  - For clarity issues, use Questions section rather than creating a separate category
80
80
  - Attribution: In multi-model reviews, prefix items with "Flagged by [model-name]:" when applicable
81
+ - Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
81
82
  """
82
83
 
83
84
 
@@ -94,7 +95,10 @@ Guidelines:
94
95
  - Ask clarifying questions for ambiguities
95
96
  - Propose alternatives when better approaches exist
96
97
  - Focus on impact and prioritize feedback by potential consequences
97
- - Be collaborative, not adversarial"""
98
+ - Be collaborative, not adversarial
99
+ - Do NOT focus on ownership, responsibility, or team assignment concerns
100
+ - Avoid feedback like "who owns", "who verifies", "who is responsible for"
101
+ - Focus on technical requirements and verification steps themselves, not who performs them"""
98
102
 
99
103
 
100
104
  # =============================================================================
@@ -14,10 +14,7 @@ import os
14
14
  import subprocess
15
15
  from typing import Any, Dict, List, Optional, Protocol, Sequence
16
16
 
17
- logger = logging.getLogger(__name__)
18
-
19
17
  from .base import (
20
- ModelDescriptor,
21
18
  ProviderCapability,
22
19
  ProviderContext,
23
20
  ProviderExecutionError,
@@ -34,6 +31,8 @@ from .base import (
34
31
  from .detectors import detect_provider_availability
35
32
  from .registry import register_provider
36
33
 
34
+ logger = logging.getLogger(__name__)
35
+
37
36
  DEFAULT_BINARY = "claude"
38
37
  DEFAULT_TIMEOUT_SECONDS = 360
39
38
  AVAILABILITY_OVERRIDE_ENV = "CLAUDE_CLI_AVAILABLE_OVERRIDE"
@@ -181,34 +180,11 @@ def _default_runner(
181
180
  )
182
181
 
183
182
 
184
- CLAUDE_MODELS: List[ModelDescriptor] = [
185
- ModelDescriptor(
186
- id="sonnet",
187
- display_name="Sonnet 4.5",
188
- capabilities={
189
- ProviderCapability.TEXT,
190
- ProviderCapability.STREAMING,
191
- ProviderCapability.VISION,
192
- ProviderCapability.THINKING,
193
- },
194
- routing_hints={"tier": "default", "description": "Smartest model for daily use"},
195
- ),
196
- ModelDescriptor(
197
- id="haiku",
198
- display_name="Haiku 4.5",
199
- capabilities={
200
- ProviderCapability.TEXT,
201
- ProviderCapability.STREAMING,
202
- },
203
- routing_hints={"tier": "fast", "description": "Fastest model for simple tasks"},
204
- ),
205
- ]
206
-
207
183
  CLAUDE_METADATA = ProviderMetadata(
208
184
  provider_id="claude",
209
185
  display_name="Anthropic Claude CLI",
210
- models=CLAUDE_MODELS,
211
- default_model="sonnet",
186
+ models=[], # Model validation delegated to CLI
187
+ default_model="opus",
212
188
  capabilities={
213
189
  ProviderCapability.TEXT,
214
190
  ProviderCapability.STREAMING,
@@ -239,24 +215,7 @@ class ClaudeProvider(ProviderContext):
239
215
  self._binary = binary or os.environ.get(CUSTOM_BINARY_ENV, DEFAULT_BINARY)
240
216
  self._env = env
241
217
  self._timeout = timeout or DEFAULT_TIMEOUT_SECONDS
242
- self._model = self._ensure_model(model or metadata.default_model or self._first_model_id())
243
-
244
- def _first_model_id(self) -> str:
245
- if not self.metadata.models:
246
- raise ProviderUnavailableError(
247
- "Claude provider metadata is missing model descriptors.",
248
- provider=self.metadata.provider_id,
249
- )
250
- return self.metadata.models[0].id
251
-
252
- def _ensure_model(self, candidate: str) -> str:
253
- available = {descriptor.id for descriptor in self.metadata.models}
254
- if candidate not in available:
255
- raise ProviderExecutionError(
256
- f"Unsupported Claude model '{candidate}'. Available: {', '.join(sorted(available))}",
257
- provider=self.metadata.provider_id,
258
- )
259
- return candidate
218
+ self._model = model or metadata.default_model or "opus"
260
219
 
261
220
  def _validate_request(self, request: ProviderRequest) -> None:
262
221
  """Validate and normalize request, ignoring unsupported parameters."""
@@ -357,7 +316,7 @@ class ClaudeProvider(ProviderContext):
357
316
  def _resolve_model(self, request: ProviderRequest) -> str:
358
317
  model_override = request.metadata.get("model") if request.metadata else None
359
318
  if model_override:
360
- return self._ensure_model(str(model_override))
319
+ return str(model_override)
361
320
  return self._model
362
321
 
363
322
  def _emit_stream_if_requested(self, content: str, *, stream: bool) -> None:
@@ -15,10 +15,7 @@ import os
15
15
  import subprocess
16
16
  from typing import Any, Dict, List, Optional, Protocol, Sequence, Tuple
17
17
 
18
- logger = logging.getLogger(__name__)
19
-
20
18
  from .base import (
21
- ModelDescriptor,
22
19
  ProviderCapability,
23
20
  ProviderContext,
24
21
  ProviderExecutionError,
@@ -35,6 +32,8 @@ from .base import (
35
32
  from .detectors import detect_provider_availability
36
33
  from .registry import register_provider
37
34
 
35
+ logger = logging.getLogger(__name__)
36
+
38
37
  DEFAULT_BINARY = "codex"
39
38
  DEFAULT_TIMEOUT_SECONDS = 360
40
39
  AVAILABILITY_OVERRIDE_ENV = "CODEX_CLI_AVAILABLE_OVERRIDE"
@@ -228,44 +227,11 @@ def _default_runner(
228
227
  )
229
228
 
230
229
 
231
- CODEX_MODELS: List[ModelDescriptor] = [
232
- ModelDescriptor(
233
- id="gpt-5.1-codex",
234
- display_name="GPT-5.1 Codex",
235
- capabilities={
236
- ProviderCapability.TEXT,
237
- ProviderCapability.STREAMING,
238
- ProviderCapability.FUNCTION_CALLING,
239
- },
240
- routing_hints={"tier": "primary", "optimized_for": "codex"},
241
- ),
242
- ModelDescriptor(
243
- id="gpt-5.1-codex-mini",
244
- display_name="GPT-5.1 Codex Mini",
245
- capabilities={
246
- ProviderCapability.TEXT,
247
- ProviderCapability.STREAMING,
248
- ProviderCapability.FUNCTION_CALLING,
249
- },
250
- routing_hints={"tier": "fast", "optimized_for": "codex"},
251
- ),
252
- ModelDescriptor(
253
- id="gpt-5.1",
254
- display_name="GPT-5.1",
255
- capabilities={
256
- ProviderCapability.TEXT,
257
- ProviderCapability.STREAMING,
258
- ProviderCapability.FUNCTION_CALLING,
259
- },
260
- routing_hints={"tier": "general"},
261
- ),
262
- ]
263
-
264
230
  CODEX_METADATA = ProviderMetadata(
265
231
  provider_id="codex",
266
232
  display_name="OpenAI Codex CLI",
267
- models=CODEX_MODELS,
268
- default_model="gpt-5.1-codex",
233
+ models=[], # Model validation delegated to CLI
234
+ default_model="gpt-5.2",
269
235
  capabilities={ProviderCapability.TEXT, ProviderCapability.STREAMING, ProviderCapability.FUNCTION_CALLING},
270
236
  security_flags={"writes_allowed": False, "read_only": True, "sandbox": "read-only"},
271
237
  extra={
@@ -301,7 +267,7 @@ class CodexProvider(ProviderContext):
301
267
  self._binary = binary or os.environ.get(CUSTOM_BINARY_ENV, DEFAULT_BINARY)
302
268
  self._env = self._prepare_subprocess_env(env)
303
269
  self._timeout = timeout or DEFAULT_TIMEOUT_SECONDS
304
- self._model = self._ensure_model(model or metadata.default_model or self._first_model_id())
270
+ self._model = model or metadata.default_model or "gpt-5.2"
305
271
 
306
272
  def _prepare_subprocess_env(self, custom_env: Optional[Dict[str, str]]) -> Dict[str, str]:
307
273
  """
@@ -323,23 +289,6 @@ class CodexProvider(ProviderContext):
323
289
 
324
290
  return subprocess_env
325
291
 
326
- def _first_model_id(self) -> str:
327
- if not self.metadata.models:
328
- raise ProviderUnavailableError(
329
- "Codex provider metadata is missing model descriptors.",
330
- provider=self.metadata.provider_id,
331
- )
332
- return self.metadata.models[0].id
333
-
334
- def _ensure_model(self, candidate: str) -> str:
335
- available = {descriptor.id for descriptor in self.metadata.models}
336
- if candidate not in available:
337
- raise ProviderExecutionError(
338
- f"Unsupported Codex model '{candidate}'. Available: {', '.join(sorted(available))}",
339
- provider=self.metadata.provider_id,
340
- )
341
- return candidate
342
-
343
292
  def _validate_request(self, request: ProviderRequest) -> None:
344
293
  """Validate and normalize request, ignoring unsupported parameters."""
345
294
  unsupported: List[str] = []
@@ -531,7 +480,7 @@ class CodexProvider(ProviderContext):
531
480
 
532
481
  def _execute(self, request: ProviderRequest) -> ProviderResult:
533
482
  self._validate_request(request)
534
- model = self._ensure_model(
483
+ model = (
535
484
  str(request.metadata.get("model")) if request.metadata and "model" in request.metadata else self._model
536
485
  )
537
486
  prompt = self._build_prompt(request)