foundry-mcp 0.3.3__py3-none-any.whl → 0.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry_mcp/__init__.py +7 -1
- foundry_mcp/cli/__init__.py +0 -13
- foundry_mcp/cli/commands/plan.py +10 -3
- foundry_mcp/cli/commands/review.py +19 -4
- foundry_mcp/cli/commands/session.py +1 -8
- foundry_mcp/cli/commands/specs.py +38 -208
- foundry_mcp/cli/context.py +39 -0
- foundry_mcp/cli/output.py +3 -3
- foundry_mcp/config.py +615 -11
- foundry_mcp/core/ai_consultation.py +146 -9
- foundry_mcp/core/batch_operations.py +1196 -0
- foundry_mcp/core/discovery.py +7 -7
- foundry_mcp/core/error_store.py +2 -2
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/llm_config.py +28 -2
- foundry_mcp/core/metrics_store.py +2 -2
- foundry_mcp/core/naming.py +25 -2
- foundry_mcp/core/progress.py +70 -0
- foundry_mcp/core/prometheus.py +0 -13
- foundry_mcp/core/prompts/fidelity_review.py +149 -4
- foundry_mcp/core/prompts/markdown_plan_review.py +5 -1
- foundry_mcp/core/prompts/plan_review.py +5 -1
- foundry_mcp/core/providers/__init__.py +12 -0
- foundry_mcp/core/providers/base.py +39 -0
- foundry_mcp/core/providers/claude.py +51 -48
- foundry_mcp/core/providers/codex.py +70 -60
- foundry_mcp/core/providers/cursor_agent.py +25 -47
- foundry_mcp/core/providers/detectors.py +34 -7
- foundry_mcp/core/providers/gemini.py +69 -58
- foundry_mcp/core/providers/opencode.py +101 -47
- foundry_mcp/core/providers/package-lock.json +4 -4
- foundry_mcp/core/providers/package.json +1 -1
- foundry_mcp/core/providers/validation.py +128 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1220 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4020 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/responses.py +690 -0
- foundry_mcp/core/spec.py +2439 -236
- foundry_mcp/core/task.py +1205 -31
- foundry_mcp/core/testing.py +512 -123
- foundry_mcp/core/validation.py +319 -43
- foundry_mcp/dashboard/components/charts.py +0 -57
- foundry_mcp/dashboard/launcher.py +11 -0
- foundry_mcp/dashboard/views/metrics.py +25 -35
- foundry_mcp/dashboard/views/overview.py +1 -65
- foundry_mcp/resources/specs.py +25 -25
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +33 -5
- foundry_mcp/server.py +0 -14
- foundry_mcp/tools/unified/__init__.py +39 -18
- foundry_mcp/tools/unified/authoring.py +2371 -248
- foundry_mcp/tools/unified/documentation_helpers.py +69 -6
- foundry_mcp/tools/unified/environment.py +434 -32
- foundry_mcp/tools/unified/error.py +18 -1
- foundry_mcp/tools/unified/lifecycle.py +8 -0
- foundry_mcp/tools/unified/plan.py +133 -2
- foundry_mcp/tools/unified/provider.py +0 -40
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +374 -17
- foundry_mcp/tools/unified/review_helpers.py +16 -1
- foundry_mcp/tools/unified/server.py +9 -24
- foundry_mcp/tools/unified/spec.py +367 -0
- foundry_mcp/tools/unified/task.py +1664 -30
- foundry_mcp/tools/unified/test.py +69 -8
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +8 -1
- foundry_mcp-0.8.10.dist-info/RECORD +153 -0
- foundry_mcp/cli/flags.py +0 -266
- foundry_mcp/core/feature_flags.py +0 -592
- foundry_mcp-0.3.3.dist-info/RECORD +0 -135
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
foundry_mcp/core/llm_config.py
CHANGED
|
@@ -163,6 +163,14 @@ class ProviderSpec:
|
|
|
163
163
|
"[api]provider/model or [cli]transport[:backend/model|:model]"
|
|
164
164
|
)
|
|
165
165
|
|
|
166
|
+
@classmethod
|
|
167
|
+
def parse_flexible(cls, spec: str) -> "ProviderSpec":
|
|
168
|
+
"""Parse with fallback for simple provider IDs."""
|
|
169
|
+
spec = spec.strip()
|
|
170
|
+
if spec.startswith("["):
|
|
171
|
+
return cls.parse(spec)
|
|
172
|
+
return cls(type="cli", provider=spec.lower(), raw=spec)
|
|
173
|
+
|
|
166
174
|
def validate(self) -> List[str]:
|
|
167
175
|
"""Validate the provider specification.
|
|
168
176
|
|
|
@@ -860,16 +868,18 @@ def reset_workflow_config() -> None:
|
|
|
860
868
|
class WorkflowConsultationConfig:
|
|
861
869
|
"""Per-workflow consultation configuration overrides.
|
|
862
870
|
|
|
863
|
-
Allows individual workflows to specify minimum model requirements
|
|
864
|
-
|
|
871
|
+
Allows individual workflows to specify minimum model requirements,
|
|
872
|
+
timeout overrides, and default review types for AI consultations.
|
|
865
873
|
|
|
866
874
|
TOML Configuration Example:
|
|
867
875
|
[consultation.workflows.fidelity_review]
|
|
868
876
|
min_models = 2
|
|
869
877
|
timeout_override = 600.0
|
|
878
|
+
default_review_type = "full"
|
|
870
879
|
|
|
871
880
|
[consultation.workflows.plan_review]
|
|
872
881
|
min_models = 3
|
|
882
|
+
default_review_type = "full"
|
|
873
883
|
|
|
874
884
|
Attributes:
|
|
875
885
|
min_models: Minimum number of models required for consensus (default: 1).
|
|
@@ -878,10 +888,17 @@ class WorkflowConsultationConfig:
|
|
|
878
888
|
timeout_override: Optional timeout override in seconds. When set,
|
|
879
889
|
overrides the default_timeout from ConsultationConfig
|
|
880
890
|
for this specific workflow.
|
|
891
|
+
default_review_type: Default review type for this workflow (default: "full").
|
|
892
|
+
Valid values: "quick", "full", "security", "feasibility".
|
|
893
|
+
Used when no explicit review_type is provided in requests.
|
|
881
894
|
"""
|
|
882
895
|
|
|
883
896
|
min_models: int = 1
|
|
884
897
|
timeout_override: Optional[float] = None
|
|
898
|
+
default_review_type: str = "full"
|
|
899
|
+
|
|
900
|
+
# Valid review types
|
|
901
|
+
VALID_REVIEW_TYPES = {"quick", "full", "security", "feasibility"}
|
|
885
902
|
|
|
886
903
|
def validate(self) -> None:
|
|
887
904
|
"""Validate the workflow consultation configuration.
|
|
@@ -897,6 +914,12 @@ class WorkflowConsultationConfig:
|
|
|
897
914
|
f"timeout_override must be positive if set, got {self.timeout_override}"
|
|
898
915
|
)
|
|
899
916
|
|
|
917
|
+
if self.default_review_type not in self.VALID_REVIEW_TYPES:
|
|
918
|
+
raise ValueError(
|
|
919
|
+
f"default_review_type must be one of {sorted(self.VALID_REVIEW_TYPES)}, "
|
|
920
|
+
f"got '{self.default_review_type}'"
|
|
921
|
+
)
|
|
922
|
+
|
|
900
923
|
@classmethod
|
|
901
924
|
def from_dict(cls, data: Dict[str, Any]) -> "WorkflowConsultationConfig":
|
|
902
925
|
"""Create WorkflowConsultationConfig from a dictionary.
|
|
@@ -917,6 +940,9 @@ class WorkflowConsultationConfig:
|
|
|
917
940
|
if value is not None:
|
|
918
941
|
config.timeout_override = float(value)
|
|
919
942
|
|
|
943
|
+
if "default_review_type" in data:
|
|
944
|
+
config.default_review_type = str(data["default_review_type"]).lower()
|
|
945
|
+
|
|
920
946
|
return config
|
|
921
947
|
|
|
922
948
|
|
|
@@ -180,7 +180,7 @@ class FileMetricsStore(MetricsStore):
|
|
|
180
180
|
for efficient querying. Thread-safe with file locking for concurrent access.
|
|
181
181
|
|
|
182
182
|
Directory structure:
|
|
183
|
-
|
|
183
|
+
~/.foundry-mcp/metrics/
|
|
184
184
|
metrics.jsonl - Append-only metrics log
|
|
185
185
|
index.json - Metric name -> metadata mapping
|
|
186
186
|
"""
|
|
@@ -628,7 +628,7 @@ def get_metrics_store(storage_path: Optional[str | Path] = None) -> MetricsStore
|
|
|
628
628
|
if _metrics_store is None:
|
|
629
629
|
if storage_path is None:
|
|
630
630
|
# Default path
|
|
631
|
-
storage_path = Path.home() / ".
|
|
631
|
+
storage_path = Path.home() / ".foundry-mcp" / "metrics"
|
|
632
632
|
_metrics_store = FileMetricsStore(storage_path)
|
|
633
633
|
|
|
634
634
|
return _metrics_store
|
foundry_mcp/core/naming.py
CHANGED
|
@@ -4,17 +4,34 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import functools
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
import time
|
|
9
10
|
from typing import Any, Callable
|
|
10
11
|
|
|
11
12
|
from mcp.server.fastmcp import FastMCP
|
|
13
|
+
from mcp.types import TextContent
|
|
12
14
|
|
|
13
15
|
from foundry_mcp.core.observability import mcp_tool
|
|
14
16
|
|
|
15
17
|
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
19
|
|
|
20
|
+
def _minify_response(result: dict[str, Any]) -> TextContent:
|
|
21
|
+
"""Convert dict to TextContent with minified JSON.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
result: Dictionary to serialize
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
TextContent with minified JSON string
|
|
28
|
+
"""
|
|
29
|
+
return TextContent(
|
|
30
|
+
type="text",
|
|
31
|
+
text=json.dumps(result, separators=(",", ":"), default=str),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
18
35
|
def canonical_tool(
|
|
19
36
|
mcp: FastMCP,
|
|
20
37
|
*,
|
|
@@ -45,7 +62,10 @@ def canonical_tool(
|
|
|
45
62
|
"""Async wrapper for async underlying functions."""
|
|
46
63
|
start_time = time.perf_counter()
|
|
47
64
|
try:
|
|
48
|
-
|
|
65
|
+
result = await func(*args, **kwargs)
|
|
66
|
+
if isinstance(result, dict):
|
|
67
|
+
return _minify_response(result)
|
|
68
|
+
return result
|
|
49
69
|
except Exception as e:
|
|
50
70
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
51
71
|
_collect_tool_error(
|
|
@@ -64,7 +84,10 @@ def canonical_tool(
|
|
|
64
84
|
"""Sync wrapper for sync underlying functions."""
|
|
65
85
|
start_time = time.perf_counter()
|
|
66
86
|
try:
|
|
67
|
-
|
|
87
|
+
result = func(*args, **kwargs)
|
|
88
|
+
if isinstance(result, dict):
|
|
89
|
+
return _minify_response(result)
|
|
90
|
+
return result
|
|
68
91
|
except Exception as e:
|
|
69
92
|
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
70
93
|
_collect_tool_error(
|
foundry_mcp/core/progress.py
CHANGED
|
@@ -315,3 +315,73 @@ def get_task_counts_by_status(spec_data: Dict[str, Any]) -> Dict[str, int]:
|
|
|
315
315
|
counts[status] += 1
|
|
316
316
|
|
|
317
317
|
return counts
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def sync_computed_fields(spec_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
321
|
+
"""
|
|
322
|
+
Synchronize computed fields to their canonical top-level locations.
|
|
323
|
+
|
|
324
|
+
This function should be called after any task status change to ensure
|
|
325
|
+
progress_percentage, current_phase, and status are persisted to the spec.
|
|
326
|
+
|
|
327
|
+
Updates (in-place):
|
|
328
|
+
- progress_percentage: calculated from hierarchy counts
|
|
329
|
+
- current_phase: first in_progress phase, or first pending if none
|
|
330
|
+
- status: based on overall progress
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
spec_data: Spec data dictionary (modified in place)
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Dict with computed values for confirmation
|
|
337
|
+
"""
|
|
338
|
+
if not spec_data:
|
|
339
|
+
return {}
|
|
340
|
+
|
|
341
|
+
hierarchy = spec_data.get("hierarchy", {})
|
|
342
|
+
root = hierarchy.get("spec-root", {})
|
|
343
|
+
|
|
344
|
+
# Calculate progress percentage
|
|
345
|
+
total = root.get("total_tasks", 0)
|
|
346
|
+
completed = root.get("completed_tasks", 0)
|
|
347
|
+
progress_pct = int((completed / total * 100)) if total > 0 else 0
|
|
348
|
+
|
|
349
|
+
# Determine current phase (first in_progress, or first pending if none)
|
|
350
|
+
current_phase = None
|
|
351
|
+
for key, node in hierarchy.items():
|
|
352
|
+
if node.get("type") == "phase":
|
|
353
|
+
if node.get("status") == "in_progress":
|
|
354
|
+
current_phase = key
|
|
355
|
+
break
|
|
356
|
+
elif current_phase is None and node.get("status") == "pending":
|
|
357
|
+
current_phase = key
|
|
358
|
+
|
|
359
|
+
# Determine overall status based on progress
|
|
360
|
+
if total == 0:
|
|
361
|
+
status = "pending"
|
|
362
|
+
elif completed == total:
|
|
363
|
+
status = "completed"
|
|
364
|
+
elif completed > 0:
|
|
365
|
+
status = "in_progress"
|
|
366
|
+
else:
|
|
367
|
+
status = "pending"
|
|
368
|
+
|
|
369
|
+
# Check if any task is blocked - if so, spec is blocked
|
|
370
|
+
for node in hierarchy.values():
|
|
371
|
+
if node.get("status") == "blocked":
|
|
372
|
+
status = "blocked"
|
|
373
|
+
break
|
|
374
|
+
|
|
375
|
+
# Update top-level fields (canonical location)
|
|
376
|
+
spec_data["progress_percentage"] = progress_pct
|
|
377
|
+
spec_data["current_phase"] = current_phase
|
|
378
|
+
spec_data["status"] = status
|
|
379
|
+
|
|
380
|
+
# Update last_updated timestamp
|
|
381
|
+
spec_data["last_updated"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
"progress_percentage": progress_pct,
|
|
385
|
+
"current_phase": current_phase,
|
|
386
|
+
"status": status
|
|
387
|
+
}
|
foundry_mcp/core/prometheus.py
CHANGED
|
@@ -159,7 +159,6 @@ class PrometheusExporter:
|
|
|
159
159
|
# Manifest/discovery metrics
|
|
160
160
|
self._manifest_tokens: Any = None
|
|
161
161
|
self._manifest_tool_count: Any = None
|
|
162
|
-
self._feature_flag_state: Any = None
|
|
163
162
|
|
|
164
163
|
# Health check metrics
|
|
165
164
|
self._health_status: Any = None
|
|
@@ -236,11 +235,6 @@ class PrometheusExporter:
|
|
|
236
235
|
"Tool count for the advertised tool manifest",
|
|
237
236
|
["manifest"], # unified|legacy
|
|
238
237
|
)
|
|
239
|
-
self._feature_flag_state = Gauge(
|
|
240
|
-
f"{ns}_feature_flag_state",
|
|
241
|
-
"Feature flag state (1=enabled, 0=disabled)",
|
|
242
|
-
["flag"],
|
|
243
|
-
)
|
|
244
238
|
|
|
245
239
|
# Health check metrics
|
|
246
240
|
self._health_status = Gauge(
|
|
@@ -396,13 +390,6 @@ class PrometheusExporter:
|
|
|
396
390
|
self._manifest_tokens.labels(manifest=manifest_label).set(int(tokens))
|
|
397
391
|
self._manifest_tool_count.labels(manifest=manifest_label).set(int(tool_count))
|
|
398
392
|
|
|
399
|
-
def record_feature_flag_state(self, flag: str, enabled: bool) -> None:
|
|
400
|
-
"""Record feature flag enabled/disabled state."""
|
|
401
|
-
if not self.is_enabled():
|
|
402
|
-
return
|
|
403
|
-
|
|
404
|
-
self._feature_flag_state.labels(flag=flag).set(1 if enabled else 0)
|
|
405
|
-
|
|
406
393
|
# -------------------------------------------------------------------------
|
|
407
394
|
# Health Check Metrics
|
|
408
395
|
# -------------------------------------------------------------------------
|
|
@@ -9,6 +9,7 @@ Prompt IDs (PromptTemplate-based):
|
|
|
9
9
|
- FIDELITY_REVIEW_V1: Main 6-section fidelity review prompt
|
|
10
10
|
- FIDELITY_DEVIATION_ANALYSIS_V1: Analyze identified deviations
|
|
11
11
|
- FIDELITY_COMPLIANCE_SUMMARY_V1: Generate compliance summary
|
|
12
|
+
- FIDELITY_SYNTHESIS_PROMPT_V1: Multi-model response synthesis
|
|
12
13
|
|
|
13
14
|
Legacy Prompt IDs (string templates for backward compatibility):
|
|
14
15
|
- review_task: Compare task implementation against spec requirements
|
|
@@ -66,6 +67,65 @@ FIDELITY_RESPONSE_SCHEMA = """{
|
|
|
66
67
|
}"""
|
|
67
68
|
|
|
68
69
|
|
|
70
|
+
# JSON response schema for synthesized multi-model fidelity reviews
|
|
71
|
+
FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA = """{
|
|
72
|
+
"verdict": "pass|fail|partial|unknown",
|
|
73
|
+
"verdict_consensus": {
|
|
74
|
+
"votes": {
|
|
75
|
+
"pass": ["model names that voted pass"],
|
|
76
|
+
"fail": ["model names that voted fail"],
|
|
77
|
+
"partial": ["model names that voted partial"],
|
|
78
|
+
"unknown": ["model names that voted unknown"]
|
|
79
|
+
},
|
|
80
|
+
"agreement_level": "strong|moderate|weak|conflicted",
|
|
81
|
+
"notes": "Explanation of verdict determination"
|
|
82
|
+
},
|
|
83
|
+
"summary": "Synthesized overall findings.",
|
|
84
|
+
"requirement_alignment": {
|
|
85
|
+
"answer": "yes|no|partial",
|
|
86
|
+
"details": "Synthesized alignment assessment.",
|
|
87
|
+
"model_agreement": "unanimous|majority|split"
|
|
88
|
+
},
|
|
89
|
+
"success_criteria": {
|
|
90
|
+
"met": "yes|no|partial",
|
|
91
|
+
"details": "Synthesized verification status.",
|
|
92
|
+
"model_agreement": "unanimous|majority|split"
|
|
93
|
+
},
|
|
94
|
+
"deviations": [
|
|
95
|
+
{
|
|
96
|
+
"description": "Merged deviation description",
|
|
97
|
+
"justification": "Combined rationale",
|
|
98
|
+
"severity": "critical|high|medium|low",
|
|
99
|
+
"identified_by": ["model names that identified this"],
|
|
100
|
+
"agreement": "unanimous|majority|single"
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"test_coverage": {
|
|
104
|
+
"status": "sufficient|insufficient|not_applicable",
|
|
105
|
+
"details": "Synthesized test assessment",
|
|
106
|
+
"model_agreement": "unanimous|majority|split"
|
|
107
|
+
},
|
|
108
|
+
"code_quality": {
|
|
109
|
+
"issues": ["Merged quality concerns with model attribution"],
|
|
110
|
+
"details": "Synthesized commentary"
|
|
111
|
+
},
|
|
112
|
+
"documentation": {
|
|
113
|
+
"status": "adequate|inadequate|not_applicable",
|
|
114
|
+
"details": "Synthesized doc assessment",
|
|
115
|
+
"model_agreement": "unanimous|majority|split"
|
|
116
|
+
},
|
|
117
|
+
"issues": ["Deduplicated issues with model attribution"],
|
|
118
|
+
"recommendations": ["Prioritized actionable steps"],
|
|
119
|
+
"synthesis_metadata": {
|
|
120
|
+
"models_consulted": ["all model names"],
|
|
121
|
+
"models_succeeded": ["successful model names"],
|
|
122
|
+
"models_failed": ["failed model names"],
|
|
123
|
+
"synthesis_provider": "model that performed synthesis",
|
|
124
|
+
"agreement_level": "strong|moderate|weak|conflicted"
|
|
125
|
+
}
|
|
126
|
+
}"""
|
|
127
|
+
|
|
128
|
+
|
|
69
129
|
# =============================================================================
|
|
70
130
|
# Severity Categorization Keywords
|
|
71
131
|
# =============================================================================
|
|
@@ -166,6 +226,9 @@ CRITICAL CONSTRAINTS:
|
|
|
166
226
|
- This is a READ-ONLY review - you MUST NOT write, create, or modify ANY files
|
|
167
227
|
- Execute code or commands - ANALYSIS ONLY
|
|
168
228
|
- Provide findings as structured JSON in your response
|
|
229
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
230
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
231
|
+
- Focus on technical requirements and verification steps themselves, not who performs them
|
|
169
232
|
|
|
170
233
|
Focus on:
|
|
171
234
|
1. Requirement alignment - Does implementation match spec?
|
|
@@ -430,6 +493,82 @@ Respond with valid JSON:
|
|
|
430
493
|
)
|
|
431
494
|
|
|
432
495
|
|
|
496
|
+
# Multi-model synthesis prompt - consolidates multiple fidelity reviews
|
|
497
|
+
FIDELITY_SYNTHESIS_PROMPT_V1 = PromptTemplate(
|
|
498
|
+
id="FIDELITY_SYNTHESIS_PROMPT_V1",
|
|
499
|
+
version="1.0",
|
|
500
|
+
system_prompt="""You are an expert at synthesizing multiple fidelity review results.
|
|
501
|
+
Your task is to consolidate diverse perspectives into actionable consensus while preserving JSON format.
|
|
502
|
+
|
|
503
|
+
Guidelines:
|
|
504
|
+
- Attribute findings to specific models using the identified_by field
|
|
505
|
+
- Merge similar deviations, noting which models identified each
|
|
506
|
+
- Resolve verdict disagreements using majority vote or escalate to "partial" on conflict
|
|
507
|
+
- Preserve unique insights from each model
|
|
508
|
+
- Output valid JSON matching the required schema exactly
|
|
509
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
510
|
+
- Focus on technical requirements and verification steps themselves, not who performs them""",
|
|
511
|
+
user_template="""You are synthesizing {num_models} independent AI fidelity reviews.
|
|
512
|
+
|
|
513
|
+
**Specification:** {spec_title} (`{spec_id}`)
|
|
514
|
+
**Review Scope:** {review_scope}
|
|
515
|
+
|
|
516
|
+
**Your Task:** Read all JSON reviews below and create a unified synthesis.
|
|
517
|
+
|
|
518
|
+
## Individual Model Reviews
|
|
519
|
+
|
|
520
|
+
{model_reviews}
|
|
521
|
+
|
|
522
|
+
## Synthesis Requirements
|
|
523
|
+
|
|
524
|
+
1. **Verdict Consensus:**
|
|
525
|
+
- Count votes for each verdict (pass/fail/partial/unknown)
|
|
526
|
+
- Use majority vote for final verdict
|
|
527
|
+
- If tied or conflicted, use "partial" and note disagreement
|
|
528
|
+
- Record agreement_level: "strong" (all agree), "moderate" (majority agrees), "weak" (slight majority), "conflicted" (tied/split)
|
|
529
|
+
|
|
530
|
+
2. **Deviation Merging:**
|
|
531
|
+
- Group similar deviations across models by description
|
|
532
|
+
- Use highest severity when models disagree on severity
|
|
533
|
+
- Track which models identified each deviation in identified_by array
|
|
534
|
+
- Mark agreement: "unanimous" (all models), "majority" (>50%), "single" (one model)
|
|
535
|
+
|
|
536
|
+
3. **Issue Consolidation:**
|
|
537
|
+
- Deduplicate issues across models
|
|
538
|
+
- Preserve unique insights
|
|
539
|
+
- Note model agreement level for each finding
|
|
540
|
+
|
|
541
|
+
4. **Attribution Rules:**
|
|
542
|
+
- "unanimous" = all successful models agree
|
|
543
|
+
- "majority" = >50% of successful models agree
|
|
544
|
+
- "single" = only one model identified this
|
|
545
|
+
|
|
546
|
+
### Required Response Format
|
|
547
|
+
|
|
548
|
+
Respond **only** with valid JSON matching the schema below. Do not include Markdown, prose, or additional commentary outside the JSON object.
|
|
549
|
+
|
|
550
|
+
```json
|
|
551
|
+
{response_schema}
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
Rules:
|
|
555
|
+
- Use lowercase values for enumerated fields (verdict, status, severity, etc.)
|
|
556
|
+
- Keep arrays as arrays (use [] when empty)
|
|
557
|
+
- Populate identified_by with actual model names from the reviews
|
|
558
|
+
- Never omit required fields from the schema
|
|
559
|
+
- Use the actual provider names from the reviews (e.g., "gemini", "codex", "claude")""",
|
|
560
|
+
required_context=["spec_id", "spec_title", "review_scope", "num_models", "model_reviews"],
|
|
561
|
+
optional_context=["response_schema"],
|
|
562
|
+
metadata={
|
|
563
|
+
"workflow": "fidelity_review",
|
|
564
|
+
"author": "system",
|
|
565
|
+
"category": "synthesis",
|
|
566
|
+
"output_format": "json",
|
|
567
|
+
"description": "Multi-model fidelity review synthesis",
|
|
568
|
+
},
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
433
572
|
# =============================================================================
|
|
434
573
|
# Template Registry (PromptTemplate-based)
|
|
435
574
|
# =============================================================================
|
|
@@ -439,6 +578,7 @@ FIDELITY_REVIEW_TEMPLATES: Dict[str, PromptTemplate] = {
|
|
|
439
578
|
"FIDELITY_REVIEW_V1": FIDELITY_REVIEW_V1,
|
|
440
579
|
"FIDELITY_DEVIATION_ANALYSIS_V1": FIDELITY_DEVIATION_ANALYSIS_V1,
|
|
441
580
|
"FIDELITY_COMPLIANCE_SUMMARY_V1": FIDELITY_COMPLIANCE_SUMMARY_V1,
|
|
581
|
+
"FIDELITY_SYNTHESIS_PROMPT_V1": FIDELITY_SYNTHESIS_PROMPT_V1,
|
|
442
582
|
}
|
|
443
583
|
|
|
444
584
|
|
|
@@ -475,7 +615,7 @@ class FidelityReviewPromptBuilder(PromptBuilder):
|
|
|
475
615
|
Args:
|
|
476
616
|
prompt_id: Template identifier. Supports:
|
|
477
617
|
- PromptTemplate IDs: FIDELITY_REVIEW_V1, FIDELITY_DEVIATION_ANALYSIS_V1,
|
|
478
|
-
FIDELITY_COMPLIANCE_SUMMARY_V1
|
|
618
|
+
FIDELITY_COMPLIANCE_SUMMARY_V1, FIDELITY_SYNTHESIS_PROMPT_V1
|
|
479
619
|
context: Template context variables
|
|
480
620
|
|
|
481
621
|
Returns:
|
|
@@ -491,9 +631,12 @@ class FidelityReviewPromptBuilder(PromptBuilder):
|
|
|
491
631
|
# Provide defaults for optional context
|
|
492
632
|
render_context = dict(context)
|
|
493
633
|
|
|
494
|
-
# Add response schema default
|
|
634
|
+
# Add response schema default - use synthesized schema for synthesis prompt
|
|
495
635
|
if "response_schema" not in render_context:
|
|
496
|
-
|
|
636
|
+
if prompt_id == "FIDELITY_SYNTHESIS_PROMPT_V1":
|
|
637
|
+
render_context["response_schema"] = FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA
|
|
638
|
+
else:
|
|
639
|
+
render_context["response_schema"] = FIDELITY_RESPONSE_SCHEMA
|
|
497
640
|
|
|
498
641
|
# Add empty defaults for optional fields
|
|
499
642
|
if "spec_description" not in render_context:
|
|
@@ -531,10 +674,12 @@ __all__ = [
|
|
|
531
674
|
"FIDELITY_REVIEW_V1",
|
|
532
675
|
"FIDELITY_DEVIATION_ANALYSIS_V1",
|
|
533
676
|
"FIDELITY_COMPLIANCE_SUMMARY_V1",
|
|
677
|
+
"FIDELITY_SYNTHESIS_PROMPT_V1",
|
|
534
678
|
# Template registries
|
|
535
679
|
"FIDELITY_REVIEW_TEMPLATES",
|
|
536
|
-
# Response
|
|
680
|
+
# Response schemas
|
|
537
681
|
"FIDELITY_RESPONSE_SCHEMA",
|
|
682
|
+
"FIDELITY_SYNTHESIZED_RESPONSE_SCHEMA",
|
|
538
683
|
# Severity keywords
|
|
539
684
|
"SEVERITY_KEYWORDS",
|
|
540
685
|
"CRITICAL_KEYWORDS",
|
|
@@ -71,6 +71,7 @@ What the plan does well.
|
|
|
71
71
|
- Include all sections even if empty (write "None identified" for empty sections)
|
|
72
72
|
- Be specific and actionable in all feedback
|
|
73
73
|
- For clarity issues, use Questions section rather than creating a separate category
|
|
74
|
+
- Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
|
|
74
75
|
"""
|
|
75
76
|
|
|
76
77
|
|
|
@@ -89,7 +90,10 @@ Guidelines:
|
|
|
89
90
|
- Propose alternatives when better approaches exist
|
|
90
91
|
- Focus on impact and prioritize feedback by potential consequences
|
|
91
92
|
- Be collaborative, not adversarial
|
|
92
|
-
- Remember: this is an early-stage plan, not a final spec
|
|
93
|
+
- Remember: this is an early-stage plan, not a final spec
|
|
94
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
95
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
96
|
+
- Focus on technical requirements and verification steps themselves, not who performs them"""
|
|
93
97
|
|
|
94
98
|
|
|
95
99
|
# =============================================================================
|
|
@@ -78,6 +78,7 @@ What the spec does well.
|
|
|
78
78
|
- Be specific and actionable in all feedback
|
|
79
79
|
- For clarity issues, use Questions section rather than creating a separate category
|
|
80
80
|
- Attribution: In multi-model reviews, prefix items with "Flagged by [model-name]:" when applicable
|
|
81
|
+
- Do NOT generate feedback about ownership, responsibility, or team assignments (e.g., "who verifies", "who owns", "who is responsible")
|
|
81
82
|
"""
|
|
82
83
|
|
|
83
84
|
|
|
@@ -94,7 +95,10 @@ Guidelines:
|
|
|
94
95
|
- Ask clarifying questions for ambiguities
|
|
95
96
|
- Propose alternatives when better approaches exist
|
|
96
97
|
- Focus on impact and prioritize feedback by potential consequences
|
|
97
|
-
- Be collaborative, not adversarial
|
|
98
|
+
- Be collaborative, not adversarial
|
|
99
|
+
- Do NOT focus on ownership, responsibility, or team assignment concerns
|
|
100
|
+
- Avoid feedback like "who owns", "who verifies", "who is responsible for"
|
|
101
|
+
- Focus on technical requirements and verification steps themselves, not who performs them"""
|
|
98
102
|
|
|
99
103
|
|
|
100
104
|
# =============================================================================
|
|
@@ -56,6 +56,7 @@ from foundry_mcp.core.providers.base import (
|
|
|
56
56
|
ProviderUnavailableError,
|
|
57
57
|
ProviderExecutionError,
|
|
58
58
|
ProviderTimeoutError,
|
|
59
|
+
ContextWindowError,
|
|
59
60
|
# ABC
|
|
60
61
|
ProviderContext,
|
|
61
62
|
)
|
|
@@ -124,6 +125,11 @@ from foundry_mcp.core.providers.validation import (
|
|
|
124
125
|
reset_rate_limiters,
|
|
125
126
|
# Execution wrapper
|
|
126
127
|
with_validation_and_resilience,
|
|
128
|
+
# Context window detection
|
|
129
|
+
CONTEXT_WINDOW_ERROR_PATTERNS,
|
|
130
|
+
is_context_window_error,
|
|
131
|
+
extract_token_counts,
|
|
132
|
+
create_context_window_guidance,
|
|
127
133
|
)
|
|
128
134
|
|
|
129
135
|
# ---------------------------------------------------------------------------
|
|
@@ -160,6 +166,7 @@ __all__ = [
|
|
|
160
166
|
"ProviderUnavailableError",
|
|
161
167
|
"ProviderExecutionError",
|
|
162
168
|
"ProviderTimeoutError",
|
|
169
|
+
"ContextWindowError",
|
|
163
170
|
# ABC
|
|
164
171
|
"ProviderContext",
|
|
165
172
|
# === Detection (detectors.py) ===
|
|
@@ -222,4 +229,9 @@ __all__ = [
|
|
|
222
229
|
"reset_rate_limiters",
|
|
223
230
|
# Execution wrapper
|
|
224
231
|
"with_validation_and_resilience",
|
|
232
|
+
# Context window detection
|
|
233
|
+
"CONTEXT_WINDOW_ERROR_PATTERNS",
|
|
234
|
+
"is_context_window_error",
|
|
235
|
+
"extract_token_counts",
|
|
236
|
+
"create_context_window_guidance",
|
|
225
237
|
]
|
|
@@ -277,6 +277,44 @@ class ProviderTimeoutError(ProviderError):
|
|
|
277
277
|
"""Raised when a provider exceeds its allotted execution time."""
|
|
278
278
|
|
|
279
279
|
|
|
280
|
+
class ContextWindowError(ProviderExecutionError):
|
|
281
|
+
"""Raised when prompt exceeds the model's context window limit.
|
|
282
|
+
|
|
283
|
+
This error indicates the prompt/context size exceeded what the model
|
|
284
|
+
can process. It includes token counts to help with debugging and
|
|
285
|
+
provides actionable guidance for resolution.
|
|
286
|
+
|
|
287
|
+
Attributes:
|
|
288
|
+
prompt_tokens: Estimated tokens in the prompt (if known)
|
|
289
|
+
max_tokens: Maximum context window size (if known)
|
|
290
|
+
provider: Provider that raised the error
|
|
291
|
+
truncation_needed: How many tokens need to be removed
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
def __init__(
|
|
295
|
+
self,
|
|
296
|
+
message: str,
|
|
297
|
+
*,
|
|
298
|
+
provider: Optional[str] = None,
|
|
299
|
+
prompt_tokens: Optional[int] = None,
|
|
300
|
+
max_tokens: Optional[int] = None,
|
|
301
|
+
):
|
|
302
|
+
"""Initialize context window error.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
message: Error message describing the issue
|
|
306
|
+
provider: Provider ID that raised the error
|
|
307
|
+
prompt_tokens: Number of tokens in the prompt (if known)
|
|
308
|
+
max_tokens: Maximum tokens allowed (if known)
|
|
309
|
+
"""
|
|
310
|
+
super().__init__(message, provider=provider)
|
|
311
|
+
self.prompt_tokens = prompt_tokens
|
|
312
|
+
self.max_tokens = max_tokens
|
|
313
|
+
self.truncation_needed = (
|
|
314
|
+
(prompt_tokens - max_tokens) if prompt_tokens and max_tokens else None
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
280
318
|
# =============================================================================
|
|
281
319
|
# Lifecycle Hooks
|
|
282
320
|
# =============================================================================
|
|
@@ -471,6 +509,7 @@ __all__ = [
|
|
|
471
509
|
"ProviderUnavailableError",
|
|
472
510
|
"ProviderExecutionError",
|
|
473
511
|
"ProviderTimeoutError",
|
|
512
|
+
"ContextWindowError",
|
|
474
513
|
# ABC
|
|
475
514
|
"ProviderContext",
|
|
476
515
|
]
|