foundry-mcp 0.3.3__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry_mcp/__init__.py +7 -1
- foundry_mcp/cli/commands/plan.py +10 -3
- foundry_mcp/cli/commands/review.py +19 -4
- foundry_mcp/cli/commands/specs.py +38 -208
- foundry_mcp/cli/output.py +3 -3
- foundry_mcp/config.py +235 -5
- foundry_mcp/core/ai_consultation.py +146 -9
- foundry_mcp/core/discovery.py +6 -6
- foundry_mcp/core/error_store.py +2 -2
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/llm_config.py +20 -2
- foundry_mcp/core/metrics_store.py +2 -2
- foundry_mcp/core/progress.py +70 -0
- foundry_mcp/core/prompts/fidelity_review.py +149 -4
- foundry_mcp/core/prompts/markdown_plan_review.py +5 -1
- foundry_mcp/core/prompts/plan_review.py +5 -1
- foundry_mcp/core/providers/claude.py +6 -47
- foundry_mcp/core/providers/codex.py +6 -57
- foundry_mcp/core/providers/cursor_agent.py +3 -44
- foundry_mcp/core/providers/gemini.py +6 -57
- foundry_mcp/core/providers/opencode.py +35 -5
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +425 -0
- foundry_mcp/core/research/models.py +437 -0
- foundry_mcp/core/research/workflows/__init__.py +22 -0
- foundry_mcp/core/research/workflows/base.py +204 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +396 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/responses.py +450 -0
- foundry_mcp/core/spec.py +2438 -236
- foundry_mcp/core/task.py +1064 -19
- foundry_mcp/core/testing.py +512 -123
- foundry_mcp/core/validation.py +313 -42
- foundry_mcp/dashboard/components/charts.py +0 -57
- foundry_mcp/dashboard/launcher.py +11 -0
- foundry_mcp/dashboard/views/metrics.py +25 -35
- foundry_mcp/dashboard/views/overview.py +1 -65
- foundry_mcp/resources/specs.py +25 -25
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +33 -5
- foundry_mcp/server.py +38 -0
- foundry_mcp/tools/unified/__init__.py +4 -2
- foundry_mcp/tools/unified/authoring.py +2423 -267
- foundry_mcp/tools/unified/documentation_helpers.py +69 -6
- foundry_mcp/tools/unified/environment.py +235 -6
- foundry_mcp/tools/unified/error.py +18 -1
- foundry_mcp/tools/unified/lifecycle.py +8 -0
- foundry_mcp/tools/unified/plan.py +113 -1
- foundry_mcp/tools/unified/research.py +658 -0
- foundry_mcp/tools/unified/review.py +370 -16
- foundry_mcp/tools/unified/spec.py +367 -0
- foundry_mcp/tools/unified/task.py +1163 -48
- foundry_mcp/tools/unified/test.py +69 -8
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/METADATA +7 -1
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/RECORD +60 -48
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/WHEEL +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/entry_points.txt +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.7.0.dist-info}/licenses/LICENSE +0 -0
foundry_mcp/core/llm_config.py
CHANGED
|
@@ -860,16 +860,18 @@ def reset_workflow_config() -> None:
|
|
|
860
860
|
class WorkflowConsultationConfig:
|
|
861
861
|
"""Per-workflow consultation configuration overrides.
|
|
862
862
|
|
|
863
|
-
Allows individual workflows to specify minimum model requirements
|
|
864
|
-
|
|
863
|
+
Allows individual workflows to specify minimum model requirements,
|
|
864
|
+
timeout overrides, and default review types for AI consultations.
|
|
865
865
|
|
|
866
866
|
TOML Configuration Example:
|
|
867
867
|
[consultation.workflows.fidelity_review]
|
|
868
868
|
min_models = 2
|
|
869
869
|
timeout_override = 600.0
|
|
870
|
+
default_review_type = "full"
|
|
870
871
|
|
|
871
872
|
[consultation.workflows.plan_review]
|
|
872
873
|
min_models = 3
|
|
874
|
+
default_review_type = "full"
|
|
873
875
|
|
|
874
876
|
Attributes:
|
|
875
877
|
min_models: Minimum number of models required for consensus (default: 1).
|
|
@@ -878,10 +880,17 @@ class WorkflowConsultationConfig:
|
|
|
878
880
|
timeout_override: Optional timeout override in seconds. When set,
|
|
879
881
|
overrides the default_timeout from ConsultationConfig
|
|
880
882
|
for this specific workflow.
|
|
883
|
+
default_review_type: Default review type for this workflow (default: "full").
|
|
884
|
+
Valid values: "quick", "full", "security", "feasibility".
|
|
885
|
+
Used when no explicit review_type is provided in requests.
|
|
881
886
|
"""
|
|
882
887
|
|
|
883
888
|
min_models: int = 1
|
|
884
889
|
timeout_override: Optional[float] = None
|
|
890
|
+
default_review_type: str = "full"
|
|
891
|
+
|
|
892
|
+
# Valid review types
|
|
893
|
+
VALID_REVIEW_TYPES = {"quick", "full", "security", "feasibility"}
|
|
885
894
|
|
|
886
895
|
def validate(self) -> None:
|
|
887
896
|
"""Validate the workflow consultation configuration.
|
|
@@ -897,6 +906,12 @@ class WorkflowConsultationConfig:
|
|
|
897
906
|
f"timeout_override must be positive if set, got {self.timeout_override}"
|
|
898
907
|
)
|
|
899
908
|
|
|
909
|
+
if self.default_review_type not in self.VALID_REVIEW_TYPES:
|
|
910
|
+
raise ValueError(
|
|
911
|
+
f"default_review_type must be one of {sorted(self.VALID_REVIEW_TYPES)}, "
|
|
912
|
+
f"got '{self.default_review_type}'"
|
|
913
|
+
)
|
|
914
|
+
|
|
900
915
|
@classmethod
|
|
901
916
|
def from_dict(cls, data: Dict[str, Any]) -> "WorkflowConsultationConfig":
|
|
902
917
|
"""Create WorkflowConsultationConfig from a dictionary.
|
|
@@ -917,6 +932,9 @@ class WorkflowConsultationConfig:
|
|
|
917
932
|
if value is not None:
|
|
918
933
|
config.timeout_override = float(value)
|
|
919
934
|
|
|
935
|
+
if "default_review_type" in data:
|
|
936
|
+
config.default_review_type = str(data["default_review_type"]).lower()
|
|
937
|
+
|
|
920
938
|
return config
|
|
921
939
|
|
|
922
940
|
|
|
@@ -180,7 +180,7 @@ class FileMetricsStore(MetricsStore):
|
|
|
180
180
|
for efficient querying. Thread-safe with file locking for concurrent access.
|
|
181
181
|
|
|
182
182
|
Directory structure:
|
|
183
|
-
|
|
183
|
+
~/.foundry-mcp/metrics/
|
|
184
184
|
metrics.jsonl - Append-only metrics log
|
|
185
185
|
index.json - Metric name -> metadata mapping
|
|
186
186
|
"""
|
|
@@ -628,7 +628,7 @@ def get_metrics_store(storage_path: Optional[str | Path] = None) -> MetricsStore
|
|
|
628
628
|
if _metrics_store is None:
|
|
629
629
|
if storage_path is None:
|
|
630
630
|
# Default path
|
|
631
|
-
storage_path = Path.home() / ".
|
|
631
|
+
storage_path = Path.home() / ".foundry-mcp" / "metrics"
|
|
632
632
|
_metrics_store = FileMetricsStore(storage_path)
|
|
633
633
|
|
|
634
634
|
return _metrics_store
|
foundry_mcp/core/progress.py
CHANGED
|
@@ -315,3 +315,73 @@ def get_task_counts_by_status(spec_data: Dict[str, Any]) -> Dict[str, int]:
|
|
|
315
315
|
counts[status] += 1
|
|
316
316
|
|
|
317
317
|
return counts
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def sync_computed_fields(spec_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
321
|
+
"""
|
|
322
|
+
Synchronize computed fields to their canonical top-level locations.
|
|
323
|
+
|
|
324
|
+
This function should be called after any task status change to ensure
|
|
325
|
+
progress_percentage, current_phase, and status are persisted to the spec.
|
|
326
|
+
|
|
327
|
+
Updates (in-place):
|
|
328
|
+
- progress_percentage: calculated from hierarchy counts
|
|
329
|
+
- current_phase: first in_progress phase, or first pending if none
|
|
330
|
+
- status: based on overall progress
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
spec_data: Spec data dictionary (modified in place)
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Dict with computed values for confirmation
|
|
337
|
+
"""
|
|
338
|
+
if not spec_data:
|
|
339
|
+
return {}
|
|
340
|
+
|
|
341
|
+
hierarchy = spec_data.get("hierarchy", {})
|
|
342
|
+
root = hierarchy.get("spec-root", {})
|
|
343
|
+
|
|
344
|
+
# Calculate progress percentage
|
|
345
|
+
total = root.get("total_tasks", 0)
|
|
346
|
+
completed = root.get("completed_tasks", 0)
|
|
347
|
+
progress_pct = int((completed / total * 100)) if total > 0 else 0
|
|
348
|
+
|
|
349
|
+
# Determine current phase (first in_progress, or first pending if none)
|
|
350
|
+
current_phase = None
|
|
351
|
+
for key, node in hierarchy.items():
|
|
352
|
+
if node.get("type") == "phase":
|
|
353
|
+
if node.get("status") == "in_progress":
|
|
354
|
+
current_phase = key
|
|
355
|
+
break
|
|
356
|
+
elif current_phase is None and node.get("status") == "pending":
|
|
357
|
+
current_phase = key
|
|
358
|
+
|
|
359
|
+
# Determine overall status based on progress
|
|
360
|
+
if total == 0:
|
|
361
|
+
status = "pending"
|
|
362
|
+
elif completed == total:
|
|
363
|
+
status = "completed"
|
|
364
|
+
elif completed > 0:
|
|
365
|
+
status = "in_progress"
|
|
366
|
+
else:
|
|
367
|
+
status = "pending"
|
|
368
|
+
|
|
369
|
+
# Check if any task is blocked - if so, spec is blocked
|
|
370
|
+
for node in hierarchy.values():
|
|
371
|
+
if node.get("status") == "blocked":
|
|
372
|
+
status = "blocked"
|
|
373
|
+
break
|
|
374
|
+
|
|
375
|
+
# Update top-level fields (canonical location)
|
|
376
|
+
spec_data["progress_percentage"] = progress_pct
|
|
377
|
+
spec_data["current_phase"] = current_phase
|
|
378
|
+
spec_data["status"] = status
|
|
379
|
+
|
|
380
|
+
# Update last_updated timestamp
|
|
381
|
+
spec_data["last_updated"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
"progress_percentage": progress_pct,
|
|
385
|
+
"current_phase": current_phase,
|
|
386
|
+
"status": status
|
|
387
|
+
}
|
|
@@ -9,6 +9,7 @@ Prompt IDs (PromptTemplate-based):
|
|
|
9
9
|
- FIDELITY_REVIEW_V1: Main 6-section fidelity review prompt
|
|
10
10
|
- FIDELITY_DEVIATION_ANALYSIS_V1: Analyze identified deviations
|
|
11
11
|
- FIDELITY_COMPLIANCE_SUMMARY_V1: Generate compliance summary
|
|
12
|
+
- FIDELITY_SYNTHESIS_PROMPT_V1: Multi-model response synthesis
|
|
12
13
|
|
|
13
14
|
Legacy Prompt IDs (string templates for backward compatibility):
|
|
14
15
|
- review_task: Compare task implementation against spec requirements
|
|
@@ -66,6 +67,65 @@ FIDELITY_RESPONSE_SCHEMA = """{
|
|
|
66
67
|
}"""
|
|
67
68
|
|
|
68
69
|
|
|
70
|
+
# JSON response schema for synthesized multi-model fidelity reviews
|
|
71
|
+
FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA = """{
|
|
72
|
+
"verdict": "pass|fail|partial|unknown",
|
|
73
|
+
"verdict_consensus": {
|
|
74
|
+
"votes": {
|
|
75
|
+
"pass": ["model names that voted pass"],
|
|
76
|
+
"fail": ["model names that voted fail"],
|
|
77
|
+
"partial": ["model names that voted partial"],
|
|
78
|
+
"unknown": ["model names that voted unknown"]
|
|
79
|
+
},
|
|
80
|
+
"agreement_level": "strong|moderate|weak|conflicted",
|
|
81
|
+
"notes": "Explanation of verdict determination"
|
|
82
|
+
},
|
|
83
|
+
"summary": "Synthesized overall findings.",
|
|
84
|
+
"requirement_alignment": {
|
|
85
|
+
"answer": "yes|no|partial",
|
|
86
|
+
"details": "Synthesized alignment assessment.",
|
|
87
|
+
"model_agreement": "unanimous|majority|split"
|
|
88
|
+
},
|
|
89
|
+
"success_criteria": {
|
|
90
|
+
"met": "yes|no|partial",
|
|
91
|
+
"details": "Synthesized verification status.",
|
|
92
|
+
"model_agreement": "unanimous|majority|split"
|
|
93
|
+
},
|
|
94
|
+
"deviations": [
|
|
95
|
+
{
|
|
96
|
+
"description": "Merged deviation description",
|
|
97
|
+
"justification": "Combined rationale",
|
|
98
|
+
"severity": "critical|high|medium|low",
|
|
99
|
+
"identified_by": ["model names that identified this"],
|
|
100
|
+
"agreement": "unanimous|majority|single"
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"test_coverage": {
|
|
104
|
+
"status": "sufficient|insufficient|not_applicable",
|
|
105
|
+
"details": "Synthesized test assessment",
|
|
106
|
+
"model_agreement": "unanimous|majority|split"
|
|
107
|
+
},
|
|
108
|
+
"code_quality": {
|
|
109
|
+
"issues": ["Merged quality concerns with model attribution"],
|
|
110
|
+
"details": "Synthesized commentary"
|
|
111
|
+
},
|
|
112
|
+
"documentation": {
|
|
113
|
+
"status": "adequate|inadequate|not_applicable",
|
|
114
|
+
"details": "Synthesized doc assessment",
|
|
115
|
+
"model_agreement": "unanimous|majority|split"
|
|
116
|
+
},
|
|
117
|
+
"issues": ["Deduplicated issues with model attribution"],
|
|
118
|
+
"recommendations": ["Prioritized actionable steps"],
|
|
119
|
+
"synthesis_metadata": {
|
|
120
|
+
"models_consulted": ["all model names"],
|
|
121
|
+
"models_succeeded": ["successful model names"],
|
|
122
|
+
"models_failed": ["failed model names"],
|
|
123
|
+
"synthesis_provider": "model that performed synthesis",
|
|
124
|
+
"agreement_level": "strong|moderate|weak|conflicted"
|
|
125
|
+
}
|
|
126
|
+
}"""
|
|
127
|
+
|
|
128
|
+
|
|
69
129
|
# =============================================================================
|
|
70
130
|
# Severity Categorization Keywords
|
|
71
131
|
# =============================================================================
|
|
@@ -166,6 +226,9 @@ CRITICAL CONSTRAINTS:
|
|
|
166
226
|
- This is a READ-ONLY review - you MUST NOT write, create, or modify ANY files
|
|
167
227
|
- Execute code or commands - ANALYSIS ONLY
|
|
168
228
|
- Provide findings as structured JSON in your response
|
|
229
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
230
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
231
|
+
- Focus on technical requirements and verification steps themselves, not who performs them
|
|
169
232
|
|
|
170
233
|
Focus on:
|
|
171
234
|
1. Requirement alignment - Does implementation match spec?
|
|
@@ -430,6 +493,82 @@ Respond with valid JSON:
|
|
|
430
493
|
)
|
|
431
494
|
|
|
432
495
|
|
|
496
|
+
# Multi-model synthesis prompt - consolidates multiple fidelity reviews
|
|
497
|
+
FIDELITY_SYNTHESIS_PROMPT_V1 = PromptTemplate(
|
|
498
|
+
id="FIDELITY_SYNTHESIS_PROMPT_V1",
|
|
499
|
+
version="1.0",
|
|
500
|
+
system_prompt="""You are an expert at synthesizing multiple fidelity review results.
|
|
501
|
+
Your task is to consolidate diverse perspectives into actionable consensus while preserving JSON format.
|
|
502
|
+
|
|
503
|
+
Guidelines:
|
|
504
|
+
- Attribute findings to specific models using the identified_by field
|
|
505
|
+
- Merge similar deviations, noting which models identified each
|
|
506
|
+
- Resolve verdict disagreements using majority vote or escalate to "partial" on conflict
|
|
507
|
+
- Preserve unique insights from each model
|
|
508
|
+
- Output valid JSON matching the required schema exactly
|
|
509
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
510
|
+
- Focus on technical requirements and verification steps themselves, not who performs them""",
|
|
511
|
+
user_template="""You are synthesizing {num_models} independent AI fidelity reviews.
|
|
512
|
+
|
|
513
|
+
**Specification:** {spec_title} (`{spec_id}`)
|
|
514
|
+
**Review Scope:** {review_scope}
|
|
515
|
+
|
|
516
|
+
**Your Task:** Read all JSON reviews below and create a unified synthesis.
|
|
517
|
+
|
|
518
|
+
## Individual Model Reviews
|
|
519
|
+
|
|
520
|
+
{model_reviews}
|
|
521
|
+
|
|
522
|
+
## Synthesis Requirements
|
|
523
|
+
|
|
524
|
+
1. **Verdict Consensus:**
|
|
525
|
+
- Count votes for each verdict (pass/fail/partial/unknown)
|
|
526
|
+
- Use majority vote for final verdict
|
|
527
|
+
- If tied or conflicted, use "partial" and note disagreement
|
|
528
|
+
- Record agreement_level: "strong" (all agree), "moderate" (majority agrees), "weak" (slight majority), "conflicted" (tied/split)
|
|
529
|
+
|
|
530
|
+
2. **Deviation Merging:**
|
|
531
|
+
- Group similar deviations across models by description
|
|
532
|
+
- Use highest severity when models disagree on severity
|
|
533
|
+
- Track which models identified each deviation in identified_by array
|
|
534
|
+
- Mark agreement: "unanimous" (all models), "majority" (>50%), "single" (one model)
|
|
535
|
+
|
|
536
|
+
3. **Issue Consolidation:**
|
|
537
|
+
- Deduplicate issues across models
|
|
538
|
+
- Preserve unique insights
|
|
539
|
+
- Note model agreement level for each finding
|
|
540
|
+
|
|
541
|
+
4. **Attribution Rules:**
|
|
542
|
+
- "unanimous" = all successful models agree
|
|
543
|
+
- "majority" = >50% of successful models agree
|
|
544
|
+
- "single" = only one model identified this
|
|
545
|
+
|
|
546
|
+
### Required Response Format
|
|
547
|
+
|
|
548
|
+
Respond **only** with valid JSON matching the schema below. Do not include Markdown, prose, or additional commentary outside the JSON object.
|
|
549
|
+
|
|
550
|
+
```json
|
|
551
|
+
{response_schema}
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
Rules:
|
|
555
|
+
- Use lowercase values for enumerated fields (verdict, status, severity, etc.)
|
|
556
|
+
- Keep arrays as arrays (use [] when empty)
|
|
557
|
+
- Populate identified_by with actual model names from the reviews
|
|
558
|
+
- Never omit required fields from the schema
|
|
559
|
+
- Use the actual provider names from the reviews (e.g., "gemini", "codex", "claude")""",
|
|
560
|
+
required_context=["spec_id", "spec_title", "review_scope", "num_models", "model_reviews"],
|
|
561
|
+
optional_context=["response_schema"],
|
|
562
|
+
metadata={
|
|
563
|
+
"workflow": "fidelity_review",
|
|
564
|
+
"author": "system",
|
|
565
|
+
"category": "synthesis",
|
|
566
|
+
"output_format": "json",
|
|
567
|
+
"description": "Multi-model fidelity review synthesis",
|
|
568
|
+
},
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
433
572
|
# =============================================================================
|
|
434
573
|
# Template Registry (PromptTemplate-based)
|
|
435
574
|
# =============================================================================
|
|
@@ -439,6 +578,7 @@ FIDELITY_REVIEW_TEMPLATES: Dict[str, PromptTemplate] = {
|
|
|
439
578
|
"FIDELITY_REVIEW_V1": FIDELITY_REVIEW_V1,
|
|
440
579
|
"FIDELITY_DEVIATION_ANALYSIS_V1": FIDELITY_DEVIATION_ANALYSIS_V1,
|
|
441
580
|
"FIDELITY_COMPLIANCE_SUMMARY_V1": FIDELITY_COMPLIANCE_SUMMARY_V1,
|
|
581
|
+
"FIDELITY_SYNTHESIS_PROMPT_V1": FIDELITY_SYNTHESIS_PROMPT_V1,
|
|
442
582
|
}
|
|
443
583
|
|
|
444
584
|
|
|
@@ -475,7 +615,7 @@ class FidelityReviewPromptBuilder(PromptBuilder):
|
|
|
475
615
|
Args:
|
|
476
616
|
prompt_id: Template identifier. Supports:
|
|
477
617
|
- PromptTemplate IDs: FIDELITY_REVIEW_V1, FIDELITY_DEVIATION_ANALYSIS_V1,
|
|
478
|
-
FIDELITY_COMPLIANCE_SUMMARY_V1
|
|
618
|
+
FIDELITY_COMPLIANCE_SUMMARY_V1, FIDELITY_SYNTHESIS_PROMPT_V1
|
|
479
619
|
context: Template context variables
|
|
480
620
|
|
|
481
621
|
Returns:
|
|
@@ -491,9 +631,12 @@ class FidelityReviewPromptBuilder(PromptBuilder):
|
|
|
491
631
|
# Provide defaults for optional context
|
|
492
632
|
render_context = dict(context)
|
|
493
633
|
|
|
494
|
-
# Add response schema default
|
|
634
|
+
# Add response schema default - use synthesized schema for synthesis prompt
|
|
495
635
|
if "response_schema" not in render_context:
|
|
496
|
-
|
|
636
|
+
if prompt_id == "FIDELITY_SYNTHESIS_PROMPT_V1":
|
|
637
|
+
render_context["response_schema"] = FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA
|
|
638
|
+
else:
|
|
639
|
+
render_context["response_schema"] = FIDELITY_RESPONSE_SCHEMA
|
|
497
640
|
|
|
498
641
|
# Add empty defaults for optional fields
|
|
499
642
|
if "spec_description" not in render_context:
|
|
@@ -531,10 +674,12 @@ __all__ = [
|
|
|
531
674
|
"FIDELITY_REVIEW_V1",
|
|
532
675
|
"FIDELITY_DEVIATION_ANALYSIS_V1",
|
|
533
676
|
"FIDELITY_COMPLIANCE_SUMMARY_V1",
|
|
677
|
+
"FIDELITY_SYNTHESIS_PROMPT_V1",
|
|
534
678
|
# Template registries
|
|
535
679
|
"FIDELITY_REVIEW_TEMPLATES",
|
|
536
|
-
# Response
|
|
680
|
+
# Response schemas
|
|
537
681
|
"FIDELITY_RESPONSE_SCHEMA",
|
|
682
|
+
"FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA",
|
|
538
683
|
# Severity keywords
|
|
539
684
|
"SEVERITY_KEYWORDS",
|
|
540
685
|
"CRITICAL_KEYWORDS",
|
|
@@ -71,6 +71,7 @@ What the plan does well.
|
|
|
71
71
|
- Include all sections even if empty (write "None identified" for empty sections)
|
|
72
72
|
- Be specific and actionable in all feedback
|
|
73
73
|
- For clarity issues, use Questions section rather than creating a separate category
|
|
74
|
+
- Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
|
|
74
75
|
"""
|
|
75
76
|
|
|
76
77
|
|
|
@@ -89,7 +90,10 @@ Guidelines:
|
|
|
89
90
|
- Propose alternatives when better approaches exist
|
|
90
91
|
- Focus on impact and prioritize feedback by potential consequences
|
|
91
92
|
- Be collaborative, not adversarial
|
|
92
|
-
- Remember: this is an early-stage plan, not a final spec
|
|
93
|
+
- Remember: this is an early-stage plan, not a final spec
|
|
94
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
95
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
96
|
+
- Focus on technical requirements and verification steps themselves, not who performs them"""
|
|
93
97
|
|
|
94
98
|
|
|
95
99
|
# =============================================================================
|
|
@@ -78,6 +78,7 @@ What the spec does well.
|
|
|
78
78
|
- Be specific and actionable in all feedback
|
|
79
79
|
- For clarity issues, use Questions section rather than creating a separate category
|
|
80
80
|
- Attribution: In multi-model reviews, prefix items with "Flagged by [model-name]:" when applicable
|
|
81
|
+
- Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
|
|
81
82
|
"""
|
|
82
83
|
|
|
83
84
|
|
|
@@ -94,7 +95,10 @@ Guidelines:
|
|
|
94
95
|
- Ask clarifying questions for ambiguities
|
|
95
96
|
- Propose alternatives when better approaches exist
|
|
96
97
|
- Focus on impact and prioritize feedback by potential consequences
|
|
97
|
-
- Be collaborative, not adversarial
|
|
98
|
+
- Be collaborative, not adversarial
|
|
99
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
100
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
101
|
+
- Focus on technical requirements and verification steps themselves, not who performs them"""
|
|
98
102
|
|
|
99
103
|
|
|
100
104
|
# =============================================================================
|
|
@@ -14,10 +14,7 @@ import os
|
|
|
14
14
|
import subprocess
|
|
15
15
|
from typing import Any, Dict, List, Optional, Protocol, Sequence
|
|
16
16
|
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
17
|
from .base import (
|
|
20
|
-
ModelDescriptor,
|
|
21
18
|
ProviderCapability,
|
|
22
19
|
ProviderContext,
|
|
23
20
|
ProviderExecutionError,
|
|
@@ -34,6 +31,8 @@ from .base import (
|
|
|
34
31
|
from .detectors import detect_provider_availability
|
|
35
32
|
from .registry import register_provider
|
|
36
33
|
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
37
36
|
DEFAULT_BINARY = "claude"
|
|
38
37
|
DEFAULT_TIMEOUT_SECONDS = 360
|
|
39
38
|
AVAILABILITY_OVERRIDE_ENV = "CLAUDE_CLI_AVAILABLE_OVERRIDE"
|
|
@@ -181,34 +180,11 @@ def _default_runner(
|
|
|
181
180
|
)
|
|
182
181
|
|
|
183
182
|
|
|
184
|
-
CLAUDE_MODELS: List[ModelDescriptor] = [
|
|
185
|
-
ModelDescriptor(
|
|
186
|
-
id="sonnet",
|
|
187
|
-
display_name="Sonnet 4.5",
|
|
188
|
-
capabilities={
|
|
189
|
-
ProviderCapability.TEXT,
|
|
190
|
-
ProviderCapability.STREAMING,
|
|
191
|
-
ProviderCapability.VISION,
|
|
192
|
-
ProviderCapability.THINKING,
|
|
193
|
-
},
|
|
194
|
-
routing_hints={"tier": "default", "description": "Smartest model for daily use"},
|
|
195
|
-
),
|
|
196
|
-
ModelDescriptor(
|
|
197
|
-
id="haiku",
|
|
198
|
-
display_name="Haiku 4.5",
|
|
199
|
-
capabilities={
|
|
200
|
-
ProviderCapability.TEXT,
|
|
201
|
-
ProviderCapability.STREAMING,
|
|
202
|
-
},
|
|
203
|
-
routing_hints={"tier": "fast", "description": "Fastest model for simple tasks"},
|
|
204
|
-
),
|
|
205
|
-
]
|
|
206
|
-
|
|
207
183
|
CLAUDE_METADATA = ProviderMetadata(
|
|
208
184
|
provider_id="claude",
|
|
209
185
|
display_name="Anthropic Claude CLI",
|
|
210
|
-
models=
|
|
211
|
-
default_model="
|
|
186
|
+
models=[], # Model validation delegated to CLI
|
|
187
|
+
default_model="opus",
|
|
212
188
|
capabilities={
|
|
213
189
|
ProviderCapability.TEXT,
|
|
214
190
|
ProviderCapability.STREAMING,
|
|
@@ -239,24 +215,7 @@ class ClaudeProvider(ProviderContext):
|
|
|
239
215
|
self._binary = binary or os.environ.get(CUSTOM_BINARY_ENV, DEFAULT_BINARY)
|
|
240
216
|
self._env = env
|
|
241
217
|
self._timeout = timeout or DEFAULT_TIMEOUT_SECONDS
|
|
242
|
-
self._model =
|
|
243
|
-
|
|
244
|
-
def _first_model_id(self) -> str:
|
|
245
|
-
if not self.metadata.models:
|
|
246
|
-
raise ProviderUnavailableError(
|
|
247
|
-
"Claude provider metadata is missing model descriptors.",
|
|
248
|
-
provider=self.metadata.provider_id,
|
|
249
|
-
)
|
|
250
|
-
return self.metadata.models[0].id
|
|
251
|
-
|
|
252
|
-
def _ensure_model(self, candidate: str) -> str:
|
|
253
|
-
available = {descriptor.id for descriptor in self.metadata.models}
|
|
254
|
-
if candidate not in available:
|
|
255
|
-
raise ProviderExecutionError(
|
|
256
|
-
f"Unsupported Claude model '{candidate}'. Available: {', '.join(sorted(available))}",
|
|
257
|
-
provider=self.metadata.provider_id,
|
|
258
|
-
)
|
|
259
|
-
return candidate
|
|
218
|
+
self._model = model or metadata.default_model or "opus"
|
|
260
219
|
|
|
261
220
|
def _validate_request(self, request: ProviderRequest) -> None:
|
|
262
221
|
"""Validate and normalize request, ignoring unsupported parameters."""
|
|
@@ -357,7 +316,7 @@ class ClaudeProvider(ProviderContext):
|
|
|
357
316
|
def _resolve_model(self, request: ProviderRequest) -> str:
|
|
358
317
|
model_override = request.metadata.get("model") if request.metadata else None
|
|
359
318
|
if model_override:
|
|
360
|
-
return
|
|
319
|
+
return str(model_override)
|
|
361
320
|
return self._model
|
|
362
321
|
|
|
363
322
|
def _emit_stream_if_requested(self, content: str, *, stream: bool) -> None:
|
|
@@ -15,10 +15,7 @@ import os
|
|
|
15
15
|
import subprocess
|
|
16
16
|
from typing import Any, Dict, List, Optional, Protocol, Sequence, Tuple
|
|
17
17
|
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
18
|
from .base import (
|
|
21
|
-
ModelDescriptor,
|
|
22
19
|
ProviderCapability,
|
|
23
20
|
ProviderContext,
|
|
24
21
|
ProviderExecutionError,
|
|
@@ -35,6 +32,8 @@ from .base import (
|
|
|
35
32
|
from .detectors import detect_provider_availability
|
|
36
33
|
from .registry import register_provider
|
|
37
34
|
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
38
37
|
DEFAULT_BINARY = "codex"
|
|
39
38
|
DEFAULT_TIMEOUT_SECONDS = 360
|
|
40
39
|
AVAILABILITY_OVERRIDE_ENV = "CODEX_CLI_AVAILABLE_OVERRIDE"
|
|
@@ -228,44 +227,11 @@ def _default_runner(
|
|
|
228
227
|
)
|
|
229
228
|
|
|
230
229
|
|
|
231
|
-
CODEX_MODELS: List[ModelDescriptor] = [
|
|
232
|
-
ModelDescriptor(
|
|
233
|
-
id="gpt-5.1-codex",
|
|
234
|
-
display_name="GPT-5.1 Codex",
|
|
235
|
-
capabilities={
|
|
236
|
-
ProviderCapability.TEXT,
|
|
237
|
-
ProviderCapability.STREAMING,
|
|
238
|
-
ProviderCapability.FUNCTION_CALLING,
|
|
239
|
-
},
|
|
240
|
-
routing_hints={"tier": "primary", "optimized_for": "codex"},
|
|
241
|
-
),
|
|
242
|
-
ModelDescriptor(
|
|
243
|
-
id="gpt-5.1-codex-mini",
|
|
244
|
-
display_name="GPT-5.1 Codex Mini",
|
|
245
|
-
capabilities={
|
|
246
|
-
ProviderCapability.TEXT,
|
|
247
|
-
ProviderCapability.STREAMING,
|
|
248
|
-
ProviderCapability.FUNCTION_CALLING,
|
|
249
|
-
},
|
|
250
|
-
routing_hints={"tier": "fast", "optimized_for": "codex"},
|
|
251
|
-
),
|
|
252
|
-
ModelDescriptor(
|
|
253
|
-
id="gpt-5.1",
|
|
254
|
-
display_name="GPT-5.1",
|
|
255
|
-
capabilities={
|
|
256
|
-
ProviderCapability.TEXT,
|
|
257
|
-
ProviderCapability.STREAMING,
|
|
258
|
-
ProviderCapability.FUNCTION_CALLING,
|
|
259
|
-
},
|
|
260
|
-
routing_hints={"tier": "general"},
|
|
261
|
-
),
|
|
262
|
-
]
|
|
263
|
-
|
|
264
230
|
CODEX_METADATA = ProviderMetadata(
|
|
265
231
|
provider_id="codex",
|
|
266
232
|
display_name="OpenAI Codex CLI",
|
|
267
|
-
models=
|
|
268
|
-
default_model="gpt-5.
|
|
233
|
+
models=[], # Model validation delegated to CLI
|
|
234
|
+
default_model="gpt-5.2",
|
|
269
235
|
capabilities={ProviderCapability.TEXT, ProviderCapability.STREAMING, ProviderCapability.FUNCTION_CALLING},
|
|
270
236
|
security_flags={"writes_allowed": False, "read_only": True, "sandbox": "read-only"},
|
|
271
237
|
extra={
|
|
@@ -301,7 +267,7 @@ class CodexProvider(ProviderContext):
|
|
|
301
267
|
self._binary = binary or os.environ.get(CUSTOM_BINARY_ENV, DEFAULT_BINARY)
|
|
302
268
|
self._env = self._prepare_subprocess_env(env)
|
|
303
269
|
self._timeout = timeout or DEFAULT_TIMEOUT_SECONDS
|
|
304
|
-
self._model =
|
|
270
|
+
self._model = model or metadata.default_model or "gpt-5.2"
|
|
305
271
|
|
|
306
272
|
def _prepare_subprocess_env(self, custom_env: Optional[Dict[str, str]]) -> Dict[str, str]:
|
|
307
273
|
"""
|
|
@@ -323,23 +289,6 @@ class CodexProvider(ProviderContext):
|
|
|
323
289
|
|
|
324
290
|
return subprocess_env
|
|
325
291
|
|
|
326
|
-
def _first_model_id(self) -> str:
|
|
327
|
-
if not self.metadata.models:
|
|
328
|
-
raise ProviderUnavailableError(
|
|
329
|
-
"Codex provider metadata is missing model descriptors.",
|
|
330
|
-
provider=self.metadata.provider_id,
|
|
331
|
-
)
|
|
332
|
-
return self.metadata.models[0].id
|
|
333
|
-
|
|
334
|
-
def _ensure_model(self, candidate: str) -> str:
|
|
335
|
-
available = {descriptor.id for descriptor in self.metadata.models}
|
|
336
|
-
if candidate not in available:
|
|
337
|
-
raise ProviderExecutionError(
|
|
338
|
-
f"Unsupported Codex model '{candidate}'. Available: {', '.join(sorted(available))}",
|
|
339
|
-
provider=self.metadata.provider_id,
|
|
340
|
-
)
|
|
341
|
-
return candidate
|
|
342
|
-
|
|
343
292
|
def _validate_request(self, request: ProviderRequest) -> None:
|
|
344
293
|
"""Validate and normalize request, ignoring unsupported parameters."""
|
|
345
294
|
unsupported: List[str] = []
|
|
@@ -531,7 +480,7 @@ class CodexProvider(ProviderContext):
|
|
|
531
480
|
|
|
532
481
|
def _execute(self, request: ProviderRequest) -> ProviderResult:
|
|
533
482
|
self._validate_request(request)
|
|
534
|
-
model =
|
|
483
|
+
model = (
|
|
535
484
|
str(request.metadata.get("model")) if request.metadata and "model" in request.metadata else self._model
|
|
536
485
|
)
|
|
537
486
|
prompt = self._build_prompt(request)
|