multi-forge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forge/__init__.py +3 -0
- forge/_extensions/agents/.gitkeep +0 -0
- forge/_extensions/commands/.gitkeep +0 -0
- forge/_extensions/skills/analyze/SKILL.md +87 -0
- forge/_extensions/skills/challenge/SKILL.md +91 -0
- forge/_extensions/skills/consensus/SKILL.md +120 -0
- forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
- forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
- forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
- forge/_extensions/skills/debate/SKILL.md +116 -0
- forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
- forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
- forge/_extensions/skills/panel/SKILL.md +141 -0
- forge/_extensions/skills/panel/resources/synthesis.md +103 -0
- forge/_extensions/skills/qa/SKILL.md +704 -0
- forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
- forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
- forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
- forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
- forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
- forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
- forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
- forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
- forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
- forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
- forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
- forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
- forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
- forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
- forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
- forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
- forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
- forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
- forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
- forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
- forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
- forge/_extensions/skills/qa/resources/checklist.md +103 -0
- forge/_extensions/skills/qa/resources/report-template.md +62 -0
- forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
- forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
- forge/_extensions/skills/review/SKILL.md +125 -0
- forge/_extensions/skills/review/references/claude-4.6.md +474 -0
- forge/_extensions/skills/review/references/claude-4.7.md +710 -0
- forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
- forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
- forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
- forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
- forge/_extensions/skills/review/resources/code-gemini.md +184 -0
- forge/_extensions/skills/review/resources/code-openai.md +203 -0
- forge/_extensions/skills/review/resources/code.md +160 -0
- forge/_extensions/skills/review-docs/SKILL.md +121 -0
- forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
- forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
- forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
- forge/_extensions/skills/review-docs/resources/docs.md +170 -0
- forge/_extensions/skills/smoke-test/SKILL.md +27 -0
- forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
- forge/_extensions/skills/understand/SKILL.md +148 -0
- forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
- forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
- forge/_extensions/skills/understand/resources/code-openai.md +181 -0
- forge/_extensions/skills/understand/resources/code.md +163 -0
- forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
- forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
- forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
- forge/_extensions/skills/understand/resources/docs.md +177 -0
- forge/_extensions/skills/walkthrough/SKILL.md +599 -0
- forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
- forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
- forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
- forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
- forge/backend/__init__.py +174 -0
- forge/backend/adapters/__init__.py +38 -0
- forge/backend/adapters/litellm.py +158 -0
- forge/backend/creation.py +89 -0
- forge/backend/registry.py +178 -0
- forge/cli/__init__.py +16 -0
- forge/cli/auth.py +483 -0
- forge/cli/backend.py +298 -0
- forge/cli/claude.py +411 -0
- forge/cli/config_cmd.py +303 -0
- forge/cli/extensions.py +1001 -0
- forge/cli/gc.py +165 -0
- forge/cli/guard.py +1018 -0
- forge/cli/guards.py +106 -0
- forge/cli/handoff.py +110 -0
- forge/cli/hooks/__init__.py +36 -0
- forge/cli/hooks/_group.py +20 -0
- forge/cli/hooks/_helpers.py +149 -0
- forge/cli/hooks/commands.py +1677 -0
- forge/cli/hooks/direct_commands.py +1304 -0
- forge/cli/hooks/install.py +232 -0
- forge/cli/hooks/policy.py +151 -0
- forge/cli/hooks/read_hygiene.py +74 -0
- forge/cli/hooks/verification.py +370 -0
- forge/cli/logs.py +406 -0
- forge/cli/main.py +292 -0
- forge/cli/proxy.py +1821 -0
- forge/cli/proxy_costs.py +313 -0
- forge/cli/search.py +416 -0
- forge/cli/session.py +892 -0
- forge/cli/session_addendum.py +81 -0
- forge/cli/session_fork.py +750 -0
- forge/cli/session_handoff.py +141 -0
- forge/cli/session_lifecycle.py +2053 -0
- forge/cli/session_manage.py +1336 -0
- forge/cli/session_memory.py +201 -0
- forge/cli/status_line.py +1398 -0
- forge/cli/workflow.py +1964 -0
- forge/config/__init__.py +110 -0
- forge/config/dataclass_utils.py +88 -0
- forge/config/defaults/__init__.py +0 -0
- forge/config/defaults/backends/__init__.py +0 -0
- forge/config/defaults/backends/litellm.yaml +196 -0
- forge/config/defaults/templates/__init__.py +0 -0
- forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
- forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
- forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
- forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
- forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
- forge/config/defaults/templates/litellm-gemini.yaml +21 -0
- forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
- forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
- forge/config/defaults/templates/litellm-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
- forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
- forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
- forge/config/defaults/templates/openrouter-glm.yaml +23 -0
- forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
- forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
- forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
- forge/config/defaults/templates/openrouter-openai.yaml +28 -0
- forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
- forge/config/loader.py +675 -0
- forge/config/schema.py +448 -0
- forge/core/__init__.py +5 -0
- forge/core/auth/__init__.py +67 -0
- forge/core/auth/capabilities.py +219 -0
- forge/core/auth/credentials_file.py +244 -0
- forge/core/auth/protocols.py +18 -0
- forge/core/auth/secrets.py +243 -0
- forge/core/auth/template_secrets.py +112 -0
- forge/core/data/__init__.py +5 -0
- forge/core/data/model_catalog.yaml +1522 -0
- forge/core/data/pricing.yaml +140 -0
- forge/core/data/system_prompt_addendums/__init__.py +0 -0
- forge/core/data/system_prompt_addendums/gemini.md +330 -0
- forge/core/data/system_prompt_addendums/openai.md +328 -0
- forge/core/llm/__init__.py +231 -0
- forge/core/llm/clients/__init__.py +14 -0
- forge/core/llm/clients/base.py +115 -0
- forge/core/llm/clients/litellm.py +619 -0
- forge/core/llm/clients/openai_compat.py +244 -0
- forge/core/llm/clients/openrouter.py +234 -0
- forge/core/llm/credentials.py +439 -0
- forge/core/llm/detection.py +86 -0
- forge/core/llm/errors.py +44 -0
- forge/core/llm/protocols.py +80 -0
- forge/core/llm/types.py +176 -0
- forge/core/logging.py +146 -0
- forge/core/models/__init__.py +91 -0
- forge/core/models/catalog.py +467 -0
- forge/core/models/pricing.py +165 -0
- forge/core/models/types.py +167 -0
- forge/core/naming.py +212 -0
- forge/core/ops/__init__.py +73 -0
- forge/core/ops/context.py +141 -0
- forge/core/ops/gc.py +802 -0
- forge/core/ops/proxy.py +146 -0
- forge/core/ops/resolution.py +135 -0
- forge/core/ops/session.py +344 -0
- forge/core/ops/session_context.py +548 -0
- forge/core/paths.py +38 -0
- forge/core/process.py +54 -0
- forge/core/reactive/__init__.py +38 -0
- forge/core/reactive/cost_tracking.py +300 -0
- forge/core/reactive/env.py +180 -0
- forge/core/reactive/proxy.py +78 -0
- forge/core/reactive/routing.py +622 -0
- forge/core/reactive/session_runner.py +185 -0
- forge/core/reactive/structured_output.py +62 -0
- forge/core/reactive/tagger.py +94 -0
- forge/core/reactive/throttle.py +132 -0
- forge/core/state/__init__.py +59 -0
- forge/core/state/exceptions.py +59 -0
- forge/core/state/io.py +140 -0
- forge/core/state/lock.py +99 -0
- forge/core/state/timestamps.py +60 -0
- forge/core/transcript.py +78 -0
- forge/core/typing_helpers.py +24 -0
- forge/core/workqueue/__init__.py +67 -0
- forge/core/workqueue/queue.py +552 -0
- forge/core/workqueue/types.py +63 -0
- forge/guard/__init__.py +26 -0
- forge/guard/deterministic/__init__.py +26 -0
- forge/guard/deterministic/base.py +158 -0
- forge/guard/deterministic/coding_standards.py +256 -0
- forge/guard/deterministic/registry.py +148 -0
- forge/guard/deterministic/tdd.py +171 -0
- forge/guard/engine.py +216 -0
- forge/guard/protocols.py +91 -0
- forge/guard/queries.py +96 -0
- forge/guard/semantic/__init__.py +34 -0
- forge/guard/semantic/promotion.py +18 -0
- forge/guard/semantic/supervisor.py +813 -0
- forge/guard/semantic/verdict.py +183 -0
- forge/guard/store.py +124 -0
- forge/guard/team/__init__.py +6 -0
- forge/guard/team/config.py +24 -0
- forge/guard/team/handlers.py +209 -0
- forge/guard/team/prompts.py +41 -0
- forge/guard/types.py +125 -0
- forge/guard/workflow/__init__.py +17 -0
- forge/guard/workflow/branches.py +67 -0
- forge/guard/workflow/config.py +63 -0
- forge/guard/workflow/divergence.py +113 -0
- forge/guard/workflow/policy.py +87 -0
- forge/guard/workflow/stages.py +205 -0
- forge/install/__init__.py +55 -0
- forge/install/cli.py +281 -0
- forge/install/exceptions.py +163 -0
- forge/install/hooks.py +109 -0
- forge/install/installer.py +1037 -0
- forge/install/models.py +321 -0
- forge/install/preset.py +272 -0
- forge/install/settings_merge.py +831 -0
- forge/install/tracking.py +238 -0
- forge/install/version.py +141 -0
- forge/proxy/__init__.py +0 -0
- forge/proxy/base_client.py +181 -0
- forge/proxy/client_adapter.py +476 -0
- forge/proxy/client_factory.py +531 -0
- forge/proxy/converters.py +1206 -0
- forge/proxy/cost_logger.py +132 -0
- forge/proxy/cost_tracker.py +242 -0
- forge/proxy/data_models.py +338 -0
- forge/proxy/error_hints.py +92 -0
- forge/proxy/metrics.py +222 -0
- forge/proxy/model_spec.py +158 -0
- forge/proxy/proxies.py +333 -0
- forge/proxy/proxy_identity.py +134 -0
- forge/proxy/proxy_orchestrator.py +1018 -0
- forge/proxy/proxy_startup.py +54 -0
- forge/proxy/server.py +1561 -0
- forge/proxy/utils.py +537 -0
- forge/review/__init__.py +6 -0
- forge/review/adversarial.py +111 -0
- forge/review/consensus.py +236 -0
- forge/review/engine.py +356 -0
- forge/review/models.py +437 -0
- forge/review/resources/__init__.py +5 -0
- forge/review/resources/codereview-performance.md +85 -0
- forge/review/resources/codereview-quick.md +75 -0
- forge/review/resources/codereview-security.md +92 -0
- forge/review/resources/codereview.md +85 -0
- forge/review/resources/docreview-quick.md +75 -0
- forge/review/resources/docreview.md +86 -0
- forge/review/resources/thinkdeep.md +89 -0
- forge/review/routing.py +368 -0
- forge/review/synthesis.py +73 -0
- forge/runtime_config.py +438 -0
- forge/search/__init__.py +55 -0
- forge/search/bm25_store.py +264 -0
- forge/search/content_store.py +197 -0
- forge/search/engine.py +352 -0
- forge/search/exceptions.py +51 -0
- forge/search/extractor.py +234 -0
- forge/search/index_state.py +295 -0
- forge/search/store.py +215 -0
- forge/search/tokenizer.py +24 -0
- forge/session/__init__.py +130 -0
- forge/session/active.py +339 -0
- forge/session/artifacts.py +202 -0
- forge/session/claude/__init__.py +50 -0
- forge/session/claude/cleanup.py +105 -0
- forge/session/claude/invoke.py +236 -0
- forge/session/claude/paths.py +200 -0
- forge/session/cleanup.py +216 -0
- forge/session/config.py +34 -0
- forge/session/direct_model.py +107 -0
- forge/session/effective.py +169 -0
- forge/session/exceptions.py +255 -0
- forge/session/handoff.py +881 -0
- forge/session/handoff_agent.py +544 -0
- forge/session/hooks/__init__.py +35 -0
- forge/session/hooks/models.py +73 -0
- forge/session/hooks/session_start.py +507 -0
- forge/session/identity.py +84 -0
- forge/session/index.py +553 -0
- forge/session/manager.py +1506 -0
- forge/session/models.py +572 -0
- forge/session/overrides.py +344 -0
- forge/session/plan_resolution.py +286 -0
- forge/session/prev_sessions.py +128 -0
- forge/session/store.py +431 -0
- forge/session/validation.py +47 -0
- forge/session/worktree/__init__.py +65 -0
- forge/session/worktree/cleanup.py +262 -0
- forge/session/worktree/config_copy.py +203 -0
- forge/session/worktree/create.py +332 -0
- forge/sidecar/__init__.py +29 -0
- forge/sidecar/container.py +161 -0
- forge/sidecar/docker.py +86 -0
- forge/sidecar/secrets.py +19 -0
- multi_forge-0.2.0.dist-info/METADATA +242 -0
- multi_forge-0.2.0.dist-info/RECORD +311 -0
- multi_forge-0.2.0.dist-info/WHEEL +4 -0
- multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
- multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
- multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
forge/proxy/utils.py
ADDED
|
@@ -0,0 +1,537 @@
|
|
|
1
|
+
"""Utility functions for logging and formatting.
|
|
2
|
+
|
|
3
|
+
Provides proxy request formatting,
|
|
4
|
+
and specialized tool usage event logging to JSON Lines file.
|
|
5
|
+
|
|
6
|
+
Structured JSONL logs are only written when the effective Forge log level is
|
|
7
|
+
"debug" (config.yaml log_level=debug or FORGE_DEBUG=1).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import Any, Literal
|
|
17
|
+
|
|
18
|
+
from rich.pretty import pretty_repr
|
|
19
|
+
|
|
20
|
+
from forge.core.logging import get_effective_log_level
|
|
21
|
+
from forge.core.paths import get_forge_home
|
|
22
|
+
|
|
23
|
+
_logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _should_write_structured_logs() -> bool:
|
|
27
|
+
return get_effective_log_level() == "debug"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _pid_suffix() -> str:
|
|
31
|
+
return str(os.getpid())
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Colors:
|
|
35
|
+
"""ANSI color and formatting codes for terminal output styling."""
|
|
36
|
+
|
|
37
|
+
CYAN = "\033[96m"
|
|
38
|
+
BLUE = "\033[94m"
|
|
39
|
+
GREEN = "\033[92m"
|
|
40
|
+
YELLOW = "\033[93m"
|
|
41
|
+
RED = "\033[91m"
|
|
42
|
+
MAGENTA = "\033[95m"
|
|
43
|
+
RESET = "\033[0m"
|
|
44
|
+
BOLD = "\033[1m"
|
|
45
|
+
UNDERLINE = "\033[4m"
|
|
46
|
+
DIM = "\033[2m"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def log_request_beautifully(
|
|
50
|
+
method: str,
|
|
51
|
+
path: str,
|
|
52
|
+
original_model: str,
|
|
53
|
+
mapped_model: str,
|
|
54
|
+
num_messages: int,
|
|
55
|
+
num_tools: int,
|
|
56
|
+
status_code: int,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Log API requests in a colorized, human-readable format.
|
|
59
|
+
|
|
60
|
+
Creates a visually distinctive terminal output for request monitoring with color-coded
|
|
61
|
+
status indicators, model mapping information, and request details.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
method: HTTP method (GET, POST, etc.)
|
|
65
|
+
path: Request endpoint path
|
|
66
|
+
original_model: Source model requested (Claude model name)
|
|
67
|
+
mapped_model: Target model used (Gemini model name)
|
|
68
|
+
num_messages: Number of messages in the request
|
|
69
|
+
num_tools: Number of tools in the request
|
|
70
|
+
status_code: HTTP status code of the response
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
original_display = f"{Colors.CYAN}{original_model}{Colors.RESET}"
|
|
74
|
+
endpoint = path.split("?")[0]
|
|
75
|
+
mapped_display_name = mapped_model
|
|
76
|
+
mapped_color = Colors.GREEN # Green indicates target Gemini model
|
|
77
|
+
mapped_display = f"{mapped_color}{mapped_display_name}{Colors.RESET}"
|
|
78
|
+
|
|
79
|
+
tools_str = (
|
|
80
|
+
f"{Colors.MAGENTA}{num_tools} tools{Colors.RESET}"
|
|
81
|
+
if num_tools > 0
|
|
82
|
+
else f"{Colors.DIM}{num_tools} tools{Colors.RESET}"
|
|
83
|
+
)
|
|
84
|
+
messages_str = f"{Colors.BLUE}{num_messages} messages{Colors.RESET}"
|
|
85
|
+
|
|
86
|
+
status_color = Colors.GREEN if 200 <= status_code < 300 else Colors.RED
|
|
87
|
+
status_symbol = "✓" if 200 <= status_code < 300 else "✗"
|
|
88
|
+
status_str = f"{status_color}{status_symbol} {status_code}{Colors.RESET}"
|
|
89
|
+
|
|
90
|
+
log_line = f"{Colors.BOLD}{method} {endpoint}{Colors.RESET} {status_str}"
|
|
91
|
+
model_line = f" {original_display} → {mapped_display} ({messages_str}, {tools_str})"
|
|
92
|
+
|
|
93
|
+
# Never write ANSI-colored output to file logs.
|
|
94
|
+
# Only emit these lines to an interactive terminal.
|
|
95
|
+
if sys.stderr.isatty():
|
|
96
|
+
print(log_line, file=sys.stderr)
|
|
97
|
+
print(model_line, file=sys.stderr)
|
|
98
|
+
|
|
99
|
+
_logger.info(
|
|
100
|
+
"Request processed: %s %s - %s (model=%s->%s, msgs=%s, tools=%s)",
|
|
101
|
+
method,
|
|
102
|
+
endpoint,
|
|
103
|
+
status_code,
|
|
104
|
+
original_model,
|
|
105
|
+
mapped_model,
|
|
106
|
+
num_messages,
|
|
107
|
+
num_tools,
|
|
108
|
+
)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
_logger.error("Error during request summary logging: %s", e)
|
|
111
|
+
_logger.info(
|
|
112
|
+
"%s %s %s | %s -> %s | %s msgs, %s tools",
|
|
113
|
+
method,
|
|
114
|
+
path,
|
|
115
|
+
status_code,
|
|
116
|
+
original_model,
|
|
117
|
+
mapped_model,
|
|
118
|
+
num_messages,
|
|
119
|
+
num_tools,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def smart_format_str(obj: object, max_string: int = 500, max_length: int = 100, indent: int = 2) -> str:
|
|
124
|
+
"""Format an object to a string with rich formatting."""
|
|
125
|
+
return pretty_repr(obj, max_string=max_string, max_length=max_length, indent_size=indent)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def smart_format_proto_str(obj: object, max_string: int = 500, max_length: int = 100, indent: int = 2) -> str:
|
|
129
|
+
"""Format a proto object to a string with rich formatting."""
|
|
130
|
+
formatted_obj = proto_to_dict(obj)
|
|
131
|
+
return smart_format_str(formatted_obj, max_string, max_length, indent)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def proto_to_dict(obj: object) -> dict[str, object] | list[dict[str, object]] | object:
|
|
135
|
+
"""Convert proto objects to dictionaries recursively.
|
|
136
|
+
|
|
137
|
+
This is used for logging/pretty-printing only.
|
|
138
|
+
"""
|
|
139
|
+
if hasattr(obj, "to_dict") and callable(obj.to_dict):
|
|
140
|
+
result = obj.to_dict()
|
|
141
|
+
return result if isinstance(result, dict) else {"value": result}
|
|
142
|
+
|
|
143
|
+
if isinstance(obj, (list, tuple)):
|
|
144
|
+
items = [proto_to_dict(item) for item in obj]
|
|
145
|
+
# best-effort: only keep dicts for this branch
|
|
146
|
+
dict_items = [item for item in items if isinstance(item, dict)]
|
|
147
|
+
return dict_items
|
|
148
|
+
|
|
149
|
+
if isinstance(obj, dict):
|
|
150
|
+
return {str(k): proto_to_dict(v) for k, v in obj.items()}
|
|
151
|
+
|
|
152
|
+
return obj
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# Tool Events Logger for JSONL file
|
|
156
|
+
# Create an asyncio Lock to ensure thread-safe writing to the JSONL file
|
|
157
|
+
_tool_events_lock = asyncio.Lock()
|
|
158
|
+
|
|
159
|
+
# Request/Response Logger for JSONL file
|
|
160
|
+
_request_response_lock = asyncio.Lock()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
async def log_tool_event(
|
|
164
|
+
request_id: str,
|
|
165
|
+
tool_name: str | None,
|
|
166
|
+
status: Literal["attempt", "success", "failure"],
|
|
167
|
+
stage: Literal[
|
|
168
|
+
"openai_request",
|
|
169
|
+
"gemini_request",
|
|
170
|
+
"gemini_response",
|
|
171
|
+
"client_response",
|
|
172
|
+
"client_execution_report",
|
|
173
|
+
],
|
|
174
|
+
details: dict[str, Any] | None = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""Log tool usage events to a separate JSON Lines file for analysis.
|
|
177
|
+
|
|
178
|
+
This function captures structured data about tool usage events at different
|
|
179
|
+
stages of the request/response cycle, writing events to a timestamped tool_events.jsonl
|
|
180
|
+
file in a thread-safe manner.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
request_id: The unique identifier for the request
|
|
184
|
+
tool_name: The name of the tool being used (or None for general events)
|
|
185
|
+
status: Whether this is an attempt, success, or failure
|
|
186
|
+
stage: Which part of the process (request to Gemini, response from Gemini, or response to client)
|
|
187
|
+
details: Optional additional information about the event
|
|
188
|
+
"""
|
|
189
|
+
if not _should_write_structured_logs():
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
logs_dir = get_forge_home() / "logs" / "tool_events"
|
|
194
|
+
logs_dir.mkdir(exist_ok=True, parents=True)
|
|
195
|
+
|
|
196
|
+
datestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
|
|
197
|
+
jsonl_path = logs_dir / f"{datestamp}_proxy.{_pid_suffix()}.jsonl"
|
|
198
|
+
|
|
199
|
+
event: dict[str, Any] = {
|
|
200
|
+
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
|
201
|
+
"request_id": request_id,
|
|
202
|
+
"tool_name": tool_name,
|
|
203
|
+
"status": status,
|
|
204
|
+
"stage": stage,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if details:
|
|
208
|
+
event["details"] = details
|
|
209
|
+
|
|
210
|
+
from forge.core.state import open_secure_append
|
|
211
|
+
|
|
212
|
+
async with _tool_events_lock:
|
|
213
|
+
with open_secure_append(jsonl_path) as f:
|
|
214
|
+
f.write(json.dumps(event) + "\n")
|
|
215
|
+
|
|
216
|
+
_logger.debug(
|
|
217
|
+
"Tool event logged: %s %s for %s (request_id=%s)",
|
|
218
|
+
status,
|
|
219
|
+
stage,
|
|
220
|
+
tool_name or "unknown",
|
|
221
|
+
request_id,
|
|
222
|
+
)
|
|
223
|
+
except Exception as e:
|
|
224
|
+
# Log error but don't fail the request
|
|
225
|
+
_logger.error("Failed to log tool event: %s (request_id=%s)", e, request_id, exc_info=True)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Tool Failure Logger — opt-in via RuntimeConfig.log_tool_failures
|
|
229
|
+
_tool_failure_lock = asyncio.Lock()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _should_log_tool_failures() -> bool:
|
|
233
|
+
from forge.runtime_config import get_runtime_config
|
|
234
|
+
|
|
235
|
+
return get_runtime_config().log_tool_failures
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
_TOOL_FAILURE_SCHEMA_VERSION = 1
|
|
239
|
+
_TOOL_INPUT_MAX_STR_LEN = 1024
|
|
240
|
+
_TOOL_INPUT_MAX_DEPTH = 8
|
|
241
|
+
_ERROR_MAX_LEN = 2000
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _truncate_for_log(value: str | dict | list | None, max_len: int) -> str | dict | list | None:
|
|
245
|
+
"""Truncate a top-level string value (used for the error field)."""
|
|
246
|
+
if isinstance(value, str) and len(value) > max_len:
|
|
247
|
+
return value[:max_len] + f"... ({len(value)} chars)"
|
|
248
|
+
return value
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _truncate_recursive(
|
|
252
|
+
value: Any,
|
|
253
|
+
max_str_len: int = _TOOL_INPUT_MAX_STR_LEN,
|
|
254
|
+
max_depth: int = _TOOL_INPUT_MAX_DEPTH,
|
|
255
|
+
) -> Any:
|
|
256
|
+
"""Recursively cap large string values inside nested dicts/lists.
|
|
257
|
+
|
|
258
|
+
Edit/Write tool inputs can carry tens of KB of file content. Without
|
|
259
|
+
this, a single failure can produce a multi-MB JSONL line.
|
|
260
|
+
"""
|
|
261
|
+
if max_depth <= 0:
|
|
262
|
+
return "<truncated: max depth exceeded>"
|
|
263
|
+
if isinstance(value, str):
|
|
264
|
+
if len(value) > max_str_len:
|
|
265
|
+
return value[:max_str_len] + f"... ({len(value)} chars)"
|
|
266
|
+
return value
|
|
267
|
+
if isinstance(value, dict):
|
|
268
|
+
return {k: _truncate_recursive(v, max_str_len, max_depth - 1) for k, v in value.items()}
|
|
269
|
+
if isinstance(value, list):
|
|
270
|
+
return [_truncate_recursive(v, max_str_len, max_depth - 1) for v in value]
|
|
271
|
+
return value
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _truncate_error_for_log(error_content: str | dict | list | None) -> Any:
|
|
275
|
+
"""Bound tool error payloads, including Anthropic list/dict content blocks."""
|
|
276
|
+
if isinstance(error_content, str):
|
|
277
|
+
return _truncate_for_log(error_content, _ERROR_MAX_LEN)
|
|
278
|
+
return _truncate_recursive(error_content, max_str_len=_ERROR_MAX_LEN)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
async def log_tool_failure(
|
|
282
|
+
*,
|
|
283
|
+
request_id: str,
|
|
284
|
+
mapped_model: str,
|
|
285
|
+
tool_name: str | None,
|
|
286
|
+
tool_use_id: str | None,
|
|
287
|
+
tool_input: dict[str, Any] | None,
|
|
288
|
+
error_content: str | dict | list | None,
|
|
289
|
+
) -> None:
|
|
290
|
+
"""Log tool failure to dedicated JSONL for addendum refinement.
|
|
291
|
+
|
|
292
|
+
Opt-in via log_tool_failures (no debug mode required). Best-effort:
|
|
293
|
+
write failures are logged but never break the LLM response.
|
|
294
|
+
"""
|
|
295
|
+
if not _should_log_tool_failures():
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
from forge.core.state import open_secure_append
|
|
300
|
+
|
|
301
|
+
logs_dir = get_forge_home() / "logs" / "tool_failures"
|
|
302
|
+
logs_dir.mkdir(exist_ok=True, parents=True)
|
|
303
|
+
|
|
304
|
+
datestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
|
|
305
|
+
jsonl_path = logs_dir / f"{datestamp}_failures.{_pid_suffix()}.jsonl"
|
|
306
|
+
|
|
307
|
+
record: dict[str, Any] = {
|
|
308
|
+
"schema_version": _TOOL_FAILURE_SCHEMA_VERSION,
|
|
309
|
+
"ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
310
|
+
"request_id": request_id,
|
|
311
|
+
"tool_use_id": tool_use_id,
|
|
312
|
+
"model": mapped_model,
|
|
313
|
+
"tool": tool_name,
|
|
314
|
+
"tool_input": _truncate_recursive(tool_input),
|
|
315
|
+
"error": _truncate_error_for_log(error_content),
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
async with _tool_failure_lock:
|
|
319
|
+
with open_secure_append(jsonl_path) as f:
|
|
320
|
+
f.write(json.dumps(record, default=str) + "\n")
|
|
321
|
+
except Exception as e:
|
|
322
|
+
_logger.warning("Failed to write tool failure log: %s", e)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _redact_content(content: object) -> dict[str, object]:
|
|
326
|
+
"""Replace message/response content with a redaction marker."""
|
|
327
|
+
if content is None:
|
|
328
|
+
return {"redacted": True, "length": 0}
|
|
329
|
+
if isinstance(content, str):
|
|
330
|
+
return {"redacted": True, "length": len(content)}
|
|
331
|
+
if isinstance(content, list):
|
|
332
|
+
return {
|
|
333
|
+
"redacted": True,
|
|
334
|
+
"items": len(content),
|
|
335
|
+
"block_types": [
|
|
336
|
+
(item.get("type") if isinstance(item, dict) else getattr(item, "type", "unknown")) for item in content
|
|
337
|
+
],
|
|
338
|
+
}
|
|
339
|
+
if isinstance(content, dict):
|
|
340
|
+
return {"redacted": True, "length": len(str(content))}
|
|
341
|
+
return {"redacted": True, "length": len(str(content))}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _redact_tools(tools: list) -> list[dict[str, object]]:
|
|
345
|
+
"""Keep tool names and structure, redact descriptions."""
|
|
346
|
+
redacted = []
|
|
347
|
+
for tool in tools:
|
|
348
|
+
if isinstance(tool, dict):
|
|
349
|
+
entry: dict[str, object] = {"name": tool.get("name")}
|
|
350
|
+
if "description" in tool:
|
|
351
|
+
entry["description"] = {"redacted": True}
|
|
352
|
+
if "input_schema" in tool:
|
|
353
|
+
entry["input_schema"] = {"redacted": True}
|
|
354
|
+
redacted.append(entry)
|
|
355
|
+
else:
|
|
356
|
+
name = getattr(tool, "name", None)
|
|
357
|
+
redacted.append({"name": name, "redacted": True})
|
|
358
|
+
return redacted
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _redact_body_for_log(body: dict[str, object] | None) -> dict[str, object] | None:
|
|
362
|
+
"""Replace sensitive content in request/response bodies with redaction markers.
|
|
363
|
+
|
|
364
|
+
Preserves structural metadata (model, role, token counts, status)
|
|
365
|
+
while removing all message text, system prompts, tool descriptions,
|
|
366
|
+
user/org metadata, and tool output.
|
|
367
|
+
"""
|
|
368
|
+
if body is None:
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
_SAFE_KEYS = {
|
|
372
|
+
"model",
|
|
373
|
+
"temperature",
|
|
374
|
+
"max_tokens",
|
|
375
|
+
"top_p",
|
|
376
|
+
"stream",
|
|
377
|
+
"stop_sequences",
|
|
378
|
+
"reasoning_effort",
|
|
379
|
+
"verbosity",
|
|
380
|
+
"usage",
|
|
381
|
+
"id",
|
|
382
|
+
"type",
|
|
383
|
+
"role",
|
|
384
|
+
"stop_reason",
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
redacted: dict[str, object] = {k: v for k, v in body.items() if k in _SAFE_KEYS}
|
|
388
|
+
|
|
389
|
+
if "messages" in body and isinstance(body["messages"], list):
|
|
390
|
+
redacted["messages"] = [
|
|
391
|
+
{
|
|
392
|
+
"role": msg.get("role") if isinstance(msg, dict) else getattr(msg, "role", "unknown"),
|
|
393
|
+
"content": _redact_content(
|
|
394
|
+
msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
|
|
395
|
+
),
|
|
396
|
+
}
|
|
397
|
+
for msg in body["messages"]
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
if "system" in body:
|
|
401
|
+
redacted["system"] = _redact_content(body["system"])
|
|
402
|
+
|
|
403
|
+
if "tools" in body and isinstance(body["tools"], list):
|
|
404
|
+
redacted["tools"] = _redact_tools(body["tools"])
|
|
405
|
+
|
|
406
|
+
if "content" in body and isinstance(body["content"], list):
|
|
407
|
+
redacted["content"] = [
|
|
408
|
+
{
|
|
409
|
+
"type": block.get("type") if isinstance(block, dict) else getattr(block, "type", "unknown"),
|
|
410
|
+
"content": _redact_content(
|
|
411
|
+
block.get("text", block.get("content"))
|
|
412
|
+
if isinstance(block, dict)
|
|
413
|
+
else getattr(block, "text", getattr(block, "content", None))
|
|
414
|
+
),
|
|
415
|
+
}
|
|
416
|
+
for block in body["content"]
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
return redacted
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
async def log_request_response(
|
|
423
|
+
request_id: str,
|
|
424
|
+
original_model: str,
|
|
425
|
+
mapped_model: str,
|
|
426
|
+
request_body: dict[str, object],
|
|
427
|
+
response_body: dict[str, object] | None,
|
|
428
|
+
status_code: int,
|
|
429
|
+
duration_ms: float,
|
|
430
|
+
error: str | None = None,
|
|
431
|
+
num_messages: int | None = None,
|
|
432
|
+
num_tools: int | None = None,
|
|
433
|
+
tool_names: list[str] | None = None,
|
|
434
|
+
has_system: bool = False,
|
|
435
|
+
temperature: float | None = None,
|
|
436
|
+
max_tokens: int | None = None,
|
|
437
|
+
streaming: bool = False,
|
|
438
|
+
) -> None:
|
|
439
|
+
"""Log sanitized request/response metadata to JSONL for debugging.
|
|
440
|
+
|
|
441
|
+
Logs at INFO level on failure (status >= 400) and DEBUG level always.
|
|
442
|
+
Bodies are redacted before writing; these logs are not replay fixtures.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
request_id: Unique request identifier
|
|
446
|
+
original_model: Original model name requested
|
|
447
|
+
mapped_model: Actual model used after mapping
|
|
448
|
+
request_body: Request payload (redacted before write)
|
|
449
|
+
response_body: Response payload (redacted before write; None for streaming)
|
|
450
|
+
status_code: HTTP status code
|
|
451
|
+
duration_ms: Request duration in milliseconds
|
|
452
|
+
error: Error message if request failed
|
|
453
|
+
num_messages: Number of messages in request
|
|
454
|
+
num_tools: Number of tools in request
|
|
455
|
+
tool_names: List of tool names in request
|
|
456
|
+
has_system: Whether request has system message
|
|
457
|
+
temperature: Temperature parameter
|
|
458
|
+
max_tokens: Max tokens parameter
|
|
459
|
+
streaming: Whether request is streaming
|
|
460
|
+
"""
|
|
461
|
+
if not _should_write_structured_logs():
|
|
462
|
+
return
|
|
463
|
+
|
|
464
|
+
try:
|
|
465
|
+
logs_dir = get_forge_home() / "logs" / "requests"
|
|
466
|
+
logs_dir.mkdir(exist_ok=True, parents=True)
|
|
467
|
+
|
|
468
|
+
datestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
|
|
469
|
+
jsonl_path = logs_dir / f"{datestamp}_requests.{_pid_suffix()}.jsonl"
|
|
470
|
+
|
|
471
|
+
event: dict[str, Any] = {
|
|
472
|
+
"timestamp": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
|
473
|
+
"request_id": request_id,
|
|
474
|
+
"original_model": original_model,
|
|
475
|
+
"mapped_model": mapped_model,
|
|
476
|
+
"num_messages": num_messages,
|
|
477
|
+
"num_tools": num_tools,
|
|
478
|
+
"tool_names": tool_names,
|
|
479
|
+
"has_system": has_system,
|
|
480
|
+
"temperature": temperature,
|
|
481
|
+
"max_tokens": max_tokens,
|
|
482
|
+
"streaming": streaming,
|
|
483
|
+
"status_code": status_code,
|
|
484
|
+
"duration_ms": duration_ms,
|
|
485
|
+
"error": error,
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
is_failure = status_code >= 400
|
|
489
|
+
|
|
490
|
+
event["request_body"] = _redact_body_for_log(request_body)
|
|
491
|
+
event["response_body"] = _redact_body_for_log(response_body)
|
|
492
|
+
|
|
493
|
+
from forge.core.state import open_secure_append
|
|
494
|
+
|
|
495
|
+
async with _request_response_lock:
|
|
496
|
+
with open_secure_append(jsonl_path) as f:
|
|
497
|
+
f.write(json.dumps(event, default=str) + "\n")
|
|
498
|
+
|
|
499
|
+
if is_failure:
|
|
500
|
+
_logger.info(
|
|
501
|
+
"[%s] Request/Response logged (FAILURE): status=%s, model=%s->%s, "
|
|
502
|
+
"messages=%s, tools=%s, duration=%sms, error=%s",
|
|
503
|
+
request_id,
|
|
504
|
+
status_code,
|
|
505
|
+
original_model,
|
|
506
|
+
mapped_model,
|
|
507
|
+
num_messages,
|
|
508
|
+
num_tools,
|
|
509
|
+
duration_ms,
|
|
510
|
+
error,
|
|
511
|
+
)
|
|
512
|
+
_logger.info(
|
|
513
|
+
"[%s] Failed request details: tools=%s, temp=%s, max_tokens=%s",
|
|
514
|
+
request_id,
|
|
515
|
+
tool_names,
|
|
516
|
+
temperature,
|
|
517
|
+
max_tokens,
|
|
518
|
+
)
|
|
519
|
+
else:
|
|
520
|
+
_logger.debug(
|
|
521
|
+
"[%s] Request/Response logged: status=%s, model=%s->%s, " "messages=%s, tools=%s, duration=%sms",
|
|
522
|
+
request_id,
|
|
523
|
+
status_code,
|
|
524
|
+
original_model,
|
|
525
|
+
mapped_model,
|
|
526
|
+
num_messages,
|
|
527
|
+
num_tools,
|
|
528
|
+
duration_ms,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
except Exception as e:
|
|
532
|
+
_logger.error(
|
|
533
|
+
"Failed to log request/response: %s (request_id=%s)",
|
|
534
|
+
e,
|
|
535
|
+
request_id,
|
|
536
|
+
exc_info=True,
|
|
537
|
+
)
|
forge/review/__init__.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Adversarial evaluation runner with stance injection.
|
|
2
|
+
|
|
3
|
+
Loads a resource containing ``{stance_prompt}``, replaces the marker with
|
|
4
|
+
each worker's stance prompt (plus ethical guardrail), and delegates to
|
|
5
|
+
``run_multi_review()`` for parallel fan-out.
|
|
6
|
+
|
|
7
|
+
Mandatory blinding: ``resume_id=None`` is hardcoded. Workers never see
|
|
8
|
+
conversation context — they evaluate the resource in isolation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from .engine import run_multi_review
|
|
16
|
+
from .models import AdversarialOutput, ModelSpec, StanceSpec
|
|
17
|
+
from .routing import WorkerRoutingPlan
|
|
18
|
+
|
|
19
|
+
STANCE_MARKER = "{stance_prompt}"
|
|
20
|
+
|
|
21
|
+
ETHICAL_GUARDRAIL = (
|
|
22
|
+
"\n\nIMPORTANT: You are participating in a structured evaluation exercise. "
|
|
23
|
+
"Evaluate the proposal on its technical merits. Do not fabricate evidence, "
|
|
24
|
+
"misrepresent facts, or use manipulative reasoning. Your analysis must be "
|
|
25
|
+
"honest and evidence-based regardless of your assigned stance."
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate_resource(resource_path: str) -> str:
|
|
30
|
+
"""Load a resource file and verify it contains the stance marker.
|
|
31
|
+
|
|
32
|
+
Raises ValueError if the marker is missing.
|
|
33
|
+
"""
|
|
34
|
+
content = Path(resource_path).read_text()
|
|
35
|
+
if STANCE_MARKER not in content:
|
|
36
|
+
raise ValueError(f"Resource {resource_path} must contain '{STANCE_MARKER}' marker " "for stance injection.")
|
|
37
|
+
return content
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def run_adversarial(
|
|
41
|
+
resource_path: str,
|
|
42
|
+
stances: list[StanceSpec],
|
|
43
|
+
*,
|
|
44
|
+
timeout_seconds: int = 600,
|
|
45
|
+
cwd: str | None = None,
|
|
46
|
+
via: str | None = None,
|
|
47
|
+
routing_plan: WorkerRoutingPlan | None = None,
|
|
48
|
+
) -> AdversarialOutput:
|
|
49
|
+
"""Run adversarial evaluation with stance-injected workers.
|
|
50
|
+
|
|
51
|
+
Each stance's prompt replaces ``{stance_prompt}`` in the resource.
|
|
52
|
+
All workers run blind (no conversation context).
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
via: Route all workers through this proxy (passed to routing).
|
|
56
|
+
Ignored when routing_plan is provided.
|
|
57
|
+
routing_plan: Pre-resolved routing plan. When provided, skips
|
|
58
|
+
internal routing resolution.
|
|
59
|
+
|
|
60
|
+
Raises ValueError if the resource lacks the stance marker.
|
|
61
|
+
"""
|
|
62
|
+
from forge.review.routing import resolve_invocation_routing
|
|
63
|
+
|
|
64
|
+
template = validate_resource(resource_path)
|
|
65
|
+
|
|
66
|
+
specs: list[ModelSpec] = []
|
|
67
|
+
seen: dict[str, int] = {}
|
|
68
|
+
for stance in stances:
|
|
69
|
+
filled = template.replace(
|
|
70
|
+
STANCE_MARKER,
|
|
71
|
+
stance.stance_prompt + ETHICAL_GUARDRAIL,
|
|
72
|
+
)
|
|
73
|
+
label = stance.effective_label
|
|
74
|
+
base_id = f"{stance.model.name}-{label}"
|
|
75
|
+
count = seen.get(base_id, 0)
|
|
76
|
+
seen[base_id] = count + 1
|
|
77
|
+
worker_id = base_id if count == 0 else f"{base_id}-{count}"
|
|
78
|
+
specs.append(
|
|
79
|
+
ModelSpec(
|
|
80
|
+
name=stance.model.name,
|
|
81
|
+
model_id=stance.model.model_id,
|
|
82
|
+
family=stance.model.family,
|
|
83
|
+
provider_refs=stance.model.provider_refs,
|
|
84
|
+
description=f"{label} stance via {stance.model.name}",
|
|
85
|
+
preferred_proxy=stance.model.preferred_proxy,
|
|
86
|
+
prompt=filled,
|
|
87
|
+
worker_id=worker_id,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if routing_plan is None:
|
|
92
|
+
routing_plan = resolve_invocation_routing(specs, via=via)
|
|
93
|
+
|
|
94
|
+
# Mandatory blinding: resume_id is always None
|
|
95
|
+
output = run_multi_review(
|
|
96
|
+
prompt="",
|
|
97
|
+
models=specs,
|
|
98
|
+
routing_plan=routing_plan,
|
|
99
|
+
timeout_seconds=timeout_seconds,
|
|
100
|
+
cwd=cwd,
|
|
101
|
+
resume_id=None,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
stance_map = {spec.effective_worker_id: s.effective_label for spec, s in zip(specs, stances)}
|
|
105
|
+
|
|
106
|
+
return AdversarialOutput(
|
|
107
|
+
resource_path=resource_path,
|
|
108
|
+
stances=[s.stance for s in stances],
|
|
109
|
+
results=output.results,
|
|
110
|
+
stance_map=stance_map,
|
|
111
|
+
)
|