eval-protocol 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- development/__init__.py +1 -0
- development/normalize_sandbox_fusion.py +628 -0
- development/utils/__init__.py +1 -0
- development/utils/generate_api_key.py +31 -0
- development/utils/subprocess_manager.py +481 -0
- eval_protocol/__init__.py +86 -0
- eval_protocol/__main__.py +10 -0
- eval_protocol/_version.py +21 -0
- eval_protocol/adapters/__init__.py +1 -0
- eval_protocol/adapters/braintrust.py +8 -0
- eval_protocol/adapters/trl.py +8 -0
- eval_protocol/agent/__init__.py +29 -0
- eval_protocol/agent/models.py +69 -0
- eval_protocol/agent/orchestrator.py +893 -0
- eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol/agent/resources/__init__.py +44 -0
- eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol/agent/resources/bfcl_sim_api_resource.py +314 -0
- eval_protocol/agent/resources/docker_resource.py +479 -0
- eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol/agent/resources/http_rollout_protocol.py +85 -0
- eval_protocol/agent/resources/http_rollout_resource.py +325 -0
- eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol/agent/task_manager.py +1064 -0
- eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol/auth.py +156 -0
- eval_protocol/cli.py +425 -0
- eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol/cli_commands/agent_eval_cmd.py +264 -0
- eval_protocol/cli_commands/common.py +242 -0
- eval_protocol/cli_commands/deploy.py +486 -0
- eval_protocol/cli_commands/deploy_mcp.py +287 -0
- eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol/cli_commands/run_eval_cmd.py +202 -0
- eval_protocol/common_utils.py +36 -0
- eval_protocol/config.py +180 -0
- eval_protocol/datasets/__init__.py +1 -0
- eval_protocol/datasets/loader.py +521 -0
- eval_protocol/evaluation.py +1045 -0
- eval_protocol/execution/__init__.py +1 -0
- eval_protocol/execution/pipeline.py +920 -0
- eval_protocol/gcp_tools.py +484 -0
- eval_protocol/generation/cache.py +141 -0
- eval_protocol/generation/clients/base.py +67 -0
- eval_protocol/generation/clients.py +248 -0
- eval_protocol/generic_server.py +165 -0
- eval_protocol/integrations/__init__.py +12 -0
- eval_protocol/integrations/braintrust.py +51 -0
- eval_protocol/integrations/deepeval.py +106 -0
- eval_protocol/integrations/openeval.py +40 -0
- eval_protocol/integrations/trl.py +187 -0
- eval_protocol/mcp/__init__.py +48 -0
- eval_protocol/mcp/adapter.py +131 -0
- eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol/mcp/client/connection.py +499 -0
- eval_protocol/mcp/clients.py +195 -0
- eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol/mcp/execution/base_policy.py +227 -0
- eval_protocol/mcp/execution/fireworks_policy.py +209 -0
- eval_protocol/mcp/execution/manager.py +506 -0
- eval_protocol/mcp/execution/policy.py +421 -0
- eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol/mcp/mcpgym.py +637 -0
- eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol/mcp/session/manager.py +228 -0
- eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol/mcp/simulation_server.py +458 -0
- eval_protocol/mcp/types.py +80 -0
- eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol/mcp_agent/intermediary_server.py +542 -0
- eval_protocol/mcp_agent/main.py +210 -0
- eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol/mcp_agent/orchestration/local_docker_client.py +702 -0
- eval_protocol/mcp_agent/orchestration/remote_http_client.py +304 -0
- eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol/mcp_agent/session.py +79 -0
- eval_protocol/mcp_env.py +304 -0
- eval_protocol/models.py +366 -0
- eval_protocol/packaging.py +219 -0
- eval_protocol/platform_api.py +360 -0
- eval_protocol/playback_policy.py +396 -0
- eval_protocol/resources.py +128 -0
- eval_protocol/reward_function.py +410 -0
- eval_protocol/rewards/__init__.py +94 -0
- eval_protocol/rewards/accuracy.py +454 -0
- eval_protocol/rewards/accuracy_length.py +173 -0
- eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol/rewards/apps_testing_util.py +559 -0
- eval_protocol/rewards/bfcl_reward.py +313 -0
- eval_protocol/rewards/code_execution.py +1620 -0
- eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol/rewards/deepcoder_reward.py +161 -0
- eval_protocol/rewards/format.py +129 -0
- eval_protocol/rewards/function_calling.py +541 -0
- eval_protocol/rewards/json_schema.py +422 -0
- eval_protocol/rewards/language_consistency.py +700 -0
- eval_protocol/rewards/lean_prover.py +479 -0
- eval_protocol/rewards/length.py +375 -0
- eval_protocol/rewards/list_comparison_math_reward.py +221 -0
- eval_protocol/rewards/math.py +762 -0
- eval_protocol/rewards/multiple_choice_math_reward.py +232 -0
- eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol/rewards/repetition.py +342 -0
- eval_protocol/rewards/tag_count.py +162 -0
- eval_protocol/rl_processing.py +82 -0
- eval_protocol/server.py +271 -0
- eval_protocol/typed_interface.py +260 -0
- eval_protocol/utils/__init__.py +8 -0
- eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol/utils/module_loader.py +56 -0
- eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol/utils/static_policy.py +305 -0
- eval_protocol-0.0.3.dist-info/METADATA +635 -0
- eval_protocol-0.0.3.dist-info/RECORD +130 -0
- eval_protocol-0.0.3.dist-info/WHEEL +5 -0
- eval_protocol-0.0.3.dist-info/entry_points.txt +4 -0
- eval_protocol-0.0.3.dist-info/licenses/LICENSE +201 -0
- eval_protocol-0.0.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI command for creating and deploying an evaluator,
|
|
3
|
+
or registering a pre-deployed remote evaluator.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import importlib # For dynamically importing modules
|
|
7
|
+
import json
|
|
8
|
+
import os # For os.path.join, os.makedirs, os.getcwd (already imported but good to be explicit if used extensively)
|
|
9
|
+
import secrets # For API key generation (already imported but good to be explicit)
|
|
10
|
+
import sys # For sys.executable
|
|
11
|
+
import time # For sleep
|
|
12
|
+
from pathlib import Path # For path operations
|
|
13
|
+
from typing import Any, Dict
|
|
14
|
+
|
|
15
|
+
import yaml # For saving config if save_config helper doesn't exist
|
|
16
|
+
|
|
17
|
+
# TODO: Consider moving subprocess_manager functions to a more central location if used by core CLI
|
|
18
|
+
try:
|
|
19
|
+
from development.utils.subprocess_manager import (
|
|
20
|
+
start_ngrok_and_get_url, # Added ngrok function
|
|
21
|
+
start_process,
|
|
22
|
+
start_serveo_and_get_url,
|
|
23
|
+
stop_process,
|
|
24
|
+
)
|
|
25
|
+
except ImportError:
|
|
26
|
+
# Fallback implementations when development module is not available
|
|
27
|
+
import subprocess
|
|
28
|
+
import signal
|
|
29
|
+
import socket
|
|
30
|
+
|
|
31
|
+
def start_process(command, log_path, env=None):
|
|
32
|
+
"""Fallback process starter."""
|
|
33
|
+
try:
|
|
34
|
+
with open(log_path, "w") as log_file:
|
|
35
|
+
process = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT, env=env)
|
|
36
|
+
return process
|
|
37
|
+
except Exception as e:
|
|
38
|
+
print(f"Error starting process: {e}")
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def stop_process(pid):
|
|
42
|
+
"""Fallback process stopper."""
|
|
43
|
+
try:
|
|
44
|
+
import os
|
|
45
|
+
|
|
46
|
+
os.kill(pid, signal.SIGTERM)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def start_serveo_and_get_url(local_port, log_path):
|
|
51
|
+
"""Fallback serveo tunnel - returns None to indicate unavailable."""
|
|
52
|
+
print("Serveo tunneling not available - development module not found")
|
|
53
|
+
return None, None
|
|
54
|
+
|
|
55
|
+
def start_ngrok_and_get_url(local_port, log_path):
|
|
56
|
+
"""Fallback ngrok tunnel - returns None to indicate unavailable."""
|
|
57
|
+
print("ngrok tunneling not available - development module not found")
|
|
58
|
+
return None, None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
from eval_protocol.auth import get_fireworks_account_id
|
|
62
|
+
from eval_protocol.config import GCPCloudRunConfig, RewardKitConfig
|
|
63
|
+
from eval_protocol.config import _config_file_path as global_loaded_config_path
|
|
64
|
+
from eval_protocol.config import get_config
|
|
65
|
+
from eval_protocol.evaluation import create_evaluation
|
|
66
|
+
from eval_protocol.gcp_tools import (
|
|
67
|
+
build_and_push_docker_image,
|
|
68
|
+
deploy_to_cloud_run,
|
|
69
|
+
ensure_artifact_registry_repo_exists,
|
|
70
|
+
ensure_gcp_secret,
|
|
71
|
+
)
|
|
72
|
+
from eval_protocol.packaging import generate_dockerfile_content
|
|
73
|
+
from eval_protocol.platform_api import ( # For catching errors from create_evaluation
|
|
74
|
+
PlatformAPIError,
|
|
75
|
+
create_or_update_fireworks_secret,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
from .common import check_environment
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _establish_local_server_and_tunnel(args):
|
|
82
|
+
"""
|
|
83
|
+
Handles starting the local generic server and establishing a public tunnel
|
|
84
|
+
using Serveo, with a fallback to ngrok.
|
|
85
|
+
Returns: (public_url, tunnel_provider_name, local_server_pid, tunnel_process_pid)
|
|
86
|
+
Returns (None, None, server_pid_or_None, None) if tunneling fails.
|
|
87
|
+
"""
|
|
88
|
+
if not args.function_ref:
|
|
89
|
+
print("Error: --function-ref is required for local-serve target.")
|
|
90
|
+
return None, None, None, None
|
|
91
|
+
|
|
92
|
+
evaluator_id = args.id
|
|
93
|
+
function_ref = args.function_ref
|
|
94
|
+
local_server_port = args.local_port
|
|
95
|
+
|
|
96
|
+
log_dir = os.path.join(os.getcwd(), "logs", "reward-kit-local")
|
|
97
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
98
|
+
generic_server_log_path = os.path.join(log_dir, f"generic_server_{evaluator_id}.log")
|
|
99
|
+
|
|
100
|
+
server_env = None # Run local server without API key protection
|
|
101
|
+
print(f"Note: Local server for '{evaluator_id}' will run without API key protection.")
|
|
102
|
+
|
|
103
|
+
print(f"Starting local reward function server for '{function_ref}' on port {local_server_port}...")
|
|
104
|
+
server_command = [
|
|
105
|
+
sys.executable,
|
|
106
|
+
"-m",
|
|
107
|
+
"eval_protocol.generic_server",
|
|
108
|
+
function_ref,
|
|
109
|
+
"--port",
|
|
110
|
+
str(local_server_port),
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
local_server_process = start_process(server_command, generic_server_log_path, env=server_env)
|
|
114
|
+
|
|
115
|
+
if not local_server_process or local_server_process.poll() is not None:
|
|
116
|
+
print(f"Error: Failed to start local generic server. Check log: {generic_server_log_path}")
|
|
117
|
+
return None, None, None, None # No server, no tunnel
|
|
118
|
+
|
|
119
|
+
local_server_pid = local_server_process.pid
|
|
120
|
+
print(f"Local server started (PID: {local_server_pid}). Log: {generic_server_log_path}")
|
|
121
|
+
print("Waiting for server to initialize...")
|
|
122
|
+
time.sleep(5)
|
|
123
|
+
|
|
124
|
+
# Attempt Serveo first
|
|
125
|
+
print(f"Attempting Serveo tunnel for local port {local_server_port}...")
|
|
126
|
+
serveo_log_path = os.path.join(log_dir, f"serveo_{evaluator_id}.log")
|
|
127
|
+
serveo_tunnel_process, serveo_url = start_serveo_and_get_url(local_server_port, serveo_log_path)
|
|
128
|
+
|
|
129
|
+
if serveo_url and serveo_tunnel_process:
|
|
130
|
+
print(f"Serveo tunnel established: {serveo_url} (PID: {serveo_tunnel_process.pid}). Log: {serveo_log_path}")
|
|
131
|
+
return serveo_url, "serveo", local_server_pid, serveo_tunnel_process.pid
|
|
132
|
+
else:
|
|
133
|
+
print(f"Serveo tunnel failed. Check log: {serveo_log_path}")
|
|
134
|
+
print("Attempting fallback to ngrok...")
|
|
135
|
+
|
|
136
|
+
ngrok_log_path = os.path.join(log_dir, f"ngrok_{evaluator_id}.log")
|
|
137
|
+
# Assuming ngrok authtoken is pre-configured by the user or via NGROK_AUTHTOKEN env var
|
|
138
|
+
ngrok_tunnel_process, ngrok_url = start_ngrok_and_get_url(local_server_port, ngrok_log_path)
|
|
139
|
+
|
|
140
|
+
if ngrok_url and ngrok_tunnel_process:
|
|
141
|
+
print(f"ngrok tunnel established: {ngrok_url} (PID: {ngrok_tunnel_process.pid}). Log: {ngrok_log_path}")
|
|
142
|
+
return ngrok_url, "ngrok", local_server_pid, ngrok_tunnel_process.pid
|
|
143
|
+
else:
|
|
144
|
+
print(f"ngrok tunnel also failed. Check log: {ngrok_log_path}")
|
|
145
|
+
# Both failed, stop the local server we started
|
|
146
|
+
if local_server_pid:
|
|
147
|
+
stop_process(local_server_pid)
|
|
148
|
+
return (
|
|
149
|
+
None,
|
|
150
|
+
None,
|
|
151
|
+
local_server_pid,
|
|
152
|
+
None,
|
|
153
|
+
) # URL, provider, server_pid, tunnel_pid
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _deploy_to_gcp_cloud_run(args, current_config, gcp_config_from_yaml):
|
|
157
|
+
"""Handles the logic for --target gcp-cloud-run up to service deployment."""
|
|
158
|
+
print(f"Starting GCP Cloud Run deployment for evaluator '{args.id}'...")
|
|
159
|
+
|
|
160
|
+
# Resolve function_ref (must be from CLI for GCP)
|
|
161
|
+
if not args.function_ref: # This check is also in main, but good for helper too
|
|
162
|
+
print("Error: --function-ref is required for GCP Cloud Run deployment.")
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
# Dynamically import the reward function to get its requirements
|
|
166
|
+
inline_requirements_content = None
|
|
167
|
+
try:
|
|
168
|
+
module_name, func_name = args.function_ref.rsplit(".", 1)
|
|
169
|
+
module = importlib.import_module(module_name)
|
|
170
|
+
reward_func = getattr(module, func_name)
|
|
171
|
+
if hasattr(reward_func, "_reward_function_requirements"):
|
|
172
|
+
inline_requirements_content = reward_func._reward_function_requirements
|
|
173
|
+
if inline_requirements_content:
|
|
174
|
+
print(f"Found inline requirements for {args.function_ref}")
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"Warning: Could not load reward function {args.function_ref} to check for inline requirements: {e}")
|
|
177
|
+
# Continue without inline requirements if loading fails
|
|
178
|
+
|
|
179
|
+
# Resolve GCP project_id
|
|
180
|
+
gcp_project_id = args.gcp_project
|
|
181
|
+
if not gcp_project_id and gcp_config_from_yaml:
|
|
182
|
+
gcp_project_id = gcp_config_from_yaml.project_id
|
|
183
|
+
if not gcp_project_id:
|
|
184
|
+
print("Error: GCP Project ID must be provided via --gcp-project argument or in rewardkit.yaml.")
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
# Resolve GCP region
|
|
188
|
+
gcp_region = args.gcp_region
|
|
189
|
+
if not gcp_region and gcp_config_from_yaml:
|
|
190
|
+
gcp_region = gcp_config_from_yaml.region
|
|
191
|
+
if not gcp_region:
|
|
192
|
+
print("Error: GCP Region must be provided via --gcp-region argument or in rewardkit.yaml.")
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
# Resolve GCP AR repo name
|
|
196
|
+
gcp_ar_repo_name = args.gcp_ar_repo
|
|
197
|
+
if not gcp_ar_repo_name and gcp_config_from_yaml:
|
|
198
|
+
gcp_ar_repo_name = gcp_config_from_yaml.artifact_registry_repository
|
|
199
|
+
if not gcp_ar_repo_name:
|
|
200
|
+
gcp_ar_repo_name = "reward-kit-evaluators"
|
|
201
|
+
|
|
202
|
+
print(f"Using GCP Project: {gcp_project_id}, Region: {gcp_region}, AR Repo: {gcp_ar_repo_name}")
|
|
203
|
+
|
|
204
|
+
if not ensure_artifact_registry_repo_exists(
|
|
205
|
+
project_id=gcp_project_id, region=gcp_region, repo_name=gcp_ar_repo_name
|
|
206
|
+
):
|
|
207
|
+
print(f"Failed to ensure Artifact Registry repository '{gcp_ar_repo_name}' exists. Aborting.")
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
dockerfile_content = generate_dockerfile_content(
|
|
211
|
+
function_ref=args.function_ref,
|
|
212
|
+
python_version=(
|
|
213
|
+
f"{args.runtime[6]}.{args.runtime[7:]}"
|
|
214
|
+
if args.runtime.startswith("python") and len(args.runtime) > 7
|
|
215
|
+
else args.runtime.replace("python", "")
|
|
216
|
+
),
|
|
217
|
+
eval_protocol_install_source=".",
|
|
218
|
+
user_requirements_path=None, # Explicitly None, inline_requirements_content will be used
|
|
219
|
+
inline_requirements_content=inline_requirements_content,
|
|
220
|
+
service_port=8080,
|
|
221
|
+
)
|
|
222
|
+
if not dockerfile_content:
|
|
223
|
+
print("Failed to generate Dockerfile content. Aborting.")
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
image_tag = "latest"
|
|
227
|
+
image_name_tag = f"{gcp_region}-docker.pkg.dev/{gcp_project_id}/{gcp_ar_repo_name}/{args.id}:{image_tag}"
|
|
228
|
+
build_context_dir = os.getcwd()
|
|
229
|
+
|
|
230
|
+
if not build_and_push_docker_image(
|
|
231
|
+
image_name_tag=image_name_tag,
|
|
232
|
+
dockerfile_content=dockerfile_content,
|
|
233
|
+
build_context_dir=build_context_dir,
|
|
234
|
+
gcp_project_id=gcp_project_id,
|
|
235
|
+
):
|
|
236
|
+
print(f"Failed to build and push Docker image {image_name_tag}. Aborting.")
|
|
237
|
+
return None
|
|
238
|
+
print(f"Successfully built and pushed Docker image: {image_name_tag}")
|
|
239
|
+
|
|
240
|
+
gcp_env_vars: Dict[str, str] = {}
|
|
241
|
+
parsed_gcp_secrets: Dict[str, Any] = {}
|
|
242
|
+
allow_unauthenticated_gcp = True
|
|
243
|
+
|
|
244
|
+
resolved_auth_mode = "api-key"
|
|
245
|
+
if gcp_config_from_yaml and gcp_config_from_yaml.default_auth_mode:
|
|
246
|
+
resolved_auth_mode = gcp_config_from_yaml.default_auth_mode
|
|
247
|
+
if args.gcp_auth_mode is not None:
|
|
248
|
+
resolved_auth_mode = args.gcp_auth_mode
|
|
249
|
+
print(f"Using GCP Auth Mode for service: {resolved_auth_mode}")
|
|
250
|
+
|
|
251
|
+
if resolved_auth_mode == "api-key":
|
|
252
|
+
print("Configuring GCP Cloud Run service for API key authentication (application layer).")
|
|
253
|
+
evaluator_id = args.id
|
|
254
|
+
api_key_for_service = None # This is the key the service itself will use
|
|
255
|
+
config_path = global_loaded_config_path
|
|
256
|
+
|
|
257
|
+
if current_config.evaluator_endpoint_keys and evaluator_id in current_config.evaluator_endpoint_keys:
|
|
258
|
+
api_key_for_service = current_config.evaluator_endpoint_keys[evaluator_id]
|
|
259
|
+
print(f"Using existing API key for '{evaluator_id}' from configuration for the service.")
|
|
260
|
+
else:
|
|
261
|
+
api_key_for_service = secrets.token_hex(32)
|
|
262
|
+
print(f"Generated new API key for '{evaluator_id}' for the service.")
|
|
263
|
+
if not current_config.evaluator_endpoint_keys:
|
|
264
|
+
current_config.evaluator_endpoint_keys = {}
|
|
265
|
+
current_config.evaluator_endpoint_keys[evaluator_id] = api_key_for_service
|
|
266
|
+
if config_path:
|
|
267
|
+
_save_config(current_config, config_path)
|
|
268
|
+
else:
|
|
269
|
+
print(f"Warning: No rewardkit.yaml found to save API key for '{evaluator_id}'.")
|
|
270
|
+
|
|
271
|
+
gcp_sanitized_eval_id = "".join(filter(lambda char: char.isalnum() or char in ["-", "_"], args.id))
|
|
272
|
+
if not gcp_sanitized_eval_id:
|
|
273
|
+
gcp_sanitized_eval_id = "rewardkit-evaluator"
|
|
274
|
+
secret_id_for_auth_key = f"rk-eval-{gcp_sanitized_eval_id}-authkey"
|
|
275
|
+
secret_labels = {"managed-by": "reward-kit", "evaluator-id": evaluator_id}
|
|
276
|
+
|
|
277
|
+
api_key_secret_version_id = ensure_gcp_secret(
|
|
278
|
+
project_id=gcp_project_id,
|
|
279
|
+
secret_id=secret_id_for_auth_key,
|
|
280
|
+
secret_value=api_key_for_service,
|
|
281
|
+
labels=secret_labels,
|
|
282
|
+
)
|
|
283
|
+
if not api_key_secret_version_id:
|
|
284
|
+
print(f"Error: Failed to store API key in GCP Secret Manager for '{evaluator_id}'. Aborting.")
|
|
285
|
+
return None
|
|
286
|
+
print(f"API key for service stored in GCP Secret Manager: {secret_id_for_auth_key}")
|
|
287
|
+
parsed_gcp_secrets["RK_ENDPOINT_API_KEY"] = api_key_secret_version_id
|
|
288
|
+
|
|
289
|
+
# Register this key with Fireworks secrets for the shim
|
|
290
|
+
fireworks_account_id_for_secret = get_fireworks_account_id()
|
|
291
|
+
if fireworks_account_id_for_secret:
|
|
292
|
+
fw_eval_id_sanitized = args.id.lower()
|
|
293
|
+
fw_eval_id_sanitized = "".join(filter(lambda char: char.isalnum() or char == "-", fw_eval_id_sanitized))
|
|
294
|
+
fw_eval_id_sanitized = "-".join(filter(None, fw_eval_id_sanitized.split("-")))
|
|
295
|
+
if not fw_eval_id_sanitized:
|
|
296
|
+
fw_eval_id_sanitized = "evaluator"
|
|
297
|
+
fw_eval_id_sanitized = fw_eval_id_sanitized[:40]
|
|
298
|
+
fw_secret_key_name = f"rkeval-{fw_eval_id_sanitized}-shim-key"
|
|
299
|
+
print(f"Registering API key on Fireworks platform as secret '{fw_secret_key_name}' for shim...")
|
|
300
|
+
if create_or_update_fireworks_secret(
|
|
301
|
+
account_id=fireworks_account_id_for_secret,
|
|
302
|
+
key_name=fw_secret_key_name,
|
|
303
|
+
secret_value=api_key_for_service,
|
|
304
|
+
):
|
|
305
|
+
print(f"Successfully registered/updated secret '{fw_secret_key_name}' on Fireworks platform.")
|
|
306
|
+
else:
|
|
307
|
+
print(f"Warning: Failed to register/update secret '{fw_secret_key_name}' on Fireworks platform.")
|
|
308
|
+
else:
|
|
309
|
+
print("Warning: Fireworks Account ID not found, cannot store shim API key on Fireworks platform.")
|
|
310
|
+
|
|
311
|
+
cloud_run_service_url = deploy_to_cloud_run(
|
|
312
|
+
service_name=args.id,
|
|
313
|
+
image_name_tag=image_name_tag,
|
|
314
|
+
gcp_project_id=gcp_project_id,
|
|
315
|
+
gcp_region=gcp_region,
|
|
316
|
+
allow_unauthenticated=allow_unauthenticated_gcp, # True if api-key mode, app handles auth
|
|
317
|
+
env_vars=gcp_env_vars if gcp_env_vars else None,
|
|
318
|
+
secrets_to_mount=parsed_gcp_secrets,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if not cloud_run_service_url:
|
|
322
|
+
print("Failed to deploy to Cloud Run or retrieve service URL. Aborting.")
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
print(f"Successfully deployed to Cloud Run. Service URL: {cloud_run_service_url}")
|
|
326
|
+
return cloud_run_service_url
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
# Helper to save config (can be moved to config.py later)
|
|
330
|
+
def _save_config(config_data: RewardKitConfig, path: str):
|
|
331
|
+
# Basic save, ideally config.py would provide a robust method
|
|
332
|
+
try:
|
|
333
|
+
with open(path, "w") as f:
|
|
334
|
+
yaml.dump(config_data.model_dump(exclude_none=True), f, sort_keys=False)
|
|
335
|
+
print(f"Config updated and saved to {path}")
|
|
336
|
+
except Exception as e:
|
|
337
|
+
print(f"Warning: Failed to save updated config to {path}: {e}")
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def deploy_command(args):
|
|
341
|
+
"""Create and deploy an evaluator or register a remote one."""
|
|
342
|
+
|
|
343
|
+
# Check environment variables
|
|
344
|
+
if not check_environment():
|
|
345
|
+
return 1
|
|
346
|
+
|
|
347
|
+
if not args.id: # ID is always required
|
|
348
|
+
print("Error: Evaluator ID (--id) is required.")
|
|
349
|
+
return 1
|
|
350
|
+
|
|
351
|
+
# Process HuggingFace key mapping if provided
|
|
352
|
+
huggingface_message_key_map = None
|
|
353
|
+
if args.huggingface_key_map:
|
|
354
|
+
try:
|
|
355
|
+
huggingface_message_key_map = json.loads(args.huggingface_key_map)
|
|
356
|
+
except json.JSONDecodeError:
|
|
357
|
+
print("Error: Invalid JSON format for --huggingface-key-map")
|
|
358
|
+
return 1
|
|
359
|
+
|
|
360
|
+
# Initialize variables for URL registration path
|
|
361
|
+
service_url_to_register = None
|
|
362
|
+
# api_key_for_shim = None # Not currently used by create_evaluation for shim auth directly
|
|
363
|
+
|
|
364
|
+
# PIDs for cleanup if registration fails for local-serve
|
|
365
|
+
local_server_pid_to_clean = None
|
|
366
|
+
# serveo_pid_to_clean = None # This was old, replaced by local_tunnel_pid_to_clean
|
|
367
|
+
local_tunnel_pid_to_clean = None # Initialize here
|
|
368
|
+
|
|
369
|
+
if args.target == "gcp-cloud-run":
|
|
370
|
+
current_config = get_config() # Needed by the helper
|
|
371
|
+
gcp_config_from_yaml = current_config.gcp_cloud_run if current_config.gcp_cloud_run else None
|
|
372
|
+
|
|
373
|
+
cloud_run_service_url = _deploy_to_gcp_cloud_run(args, current_config, gcp_config_from_yaml)
|
|
374
|
+
if not cloud_run_service_url:
|
|
375
|
+
return 1 # Error already printed by helper
|
|
376
|
+
service_url_to_register = cloud_run_service_url
|
|
377
|
+
|
|
378
|
+
elif args.target == "local-serve":
|
|
379
|
+
# Renamed helper and updated return values
|
|
380
|
+
url, tunnel_provider, server_pid, tunnel_pid = _establish_local_server_and_tunnel(args)
|
|
381
|
+
if not url:
|
|
382
|
+
# _establish_local_server_and_tunnel handles cleanup of server if tunnel fails completely
|
|
383
|
+
return 1 # Error already printed by helper
|
|
384
|
+
service_url_to_register = url
|
|
385
|
+
local_server_pid_to_clean = server_pid
|
|
386
|
+
# serveo_pid_to_clean was specific, now it's generic tunnel_pid
|
|
387
|
+
# Let's rename it for clarity in the cleanup logic
|
|
388
|
+
local_tunnel_pid_to_clean = tunnel_pid
|
|
389
|
+
print(f"Tunnel established using {tunnel_provider}.")
|
|
390
|
+
|
|
391
|
+
elif args.remote_url:
|
|
392
|
+
# This is for --target fireworks (default) but with --remote-url
|
|
393
|
+
print(f"Registering remote URL: {args.remote_url} for evaluator '{args.id}'")
|
|
394
|
+
if not (args.remote_url.startswith("http://") or args.remote_url.startswith("https://")):
|
|
395
|
+
print(f"Error: Invalid --remote-url '{args.remote_url}'. Must start with http:// or https://")
|
|
396
|
+
return 1
|
|
397
|
+
if args.metrics_folders: # This check might be redundant if --target is explicit
|
|
398
|
+
print("Info: --metrics-folders are ignored when deploying with --remote-url.")
|
|
399
|
+
service_url_to_register = args.remote_url
|
|
400
|
+
# No specific shim auth provided by this path.
|
|
401
|
+
|
|
402
|
+
# Common registration step for targets that produce a URL
|
|
403
|
+
if service_url_to_register:
|
|
404
|
+
try:
|
|
405
|
+
print(f"Registering URL '{service_url_to_register}' with Fireworks AI for evaluator '{args.id}'...")
|
|
406
|
+
evaluator = create_evaluation(
|
|
407
|
+
evaluator_id=args.id,
|
|
408
|
+
remote_url=service_url_to_register,
|
|
409
|
+
display_name=args.display_name or args.id,
|
|
410
|
+
description=args.description or f"Evaluator for {args.id} at {service_url_to_register}",
|
|
411
|
+
force=args.force,
|
|
412
|
+
huggingface_dataset=args.huggingface_dataset,
|
|
413
|
+
huggingface_split=args.huggingface_split,
|
|
414
|
+
huggingface_message_key_map=huggingface_message_key_map,
|
|
415
|
+
huggingface_prompt_key=args.huggingface_prompt_key,
|
|
416
|
+
huggingface_response_key=args.huggingface_response_key,
|
|
417
|
+
# remote_auth_header_name="X-Api-Key" if api_key_for_shim else None, # No API key for shim for now
|
|
418
|
+
# remote_auth_header_value=api_key_for_shim # No API key for shim for now
|
|
419
|
+
)
|
|
420
|
+
evaluator_name = evaluator.get("name", args.id)
|
|
421
|
+
print(
|
|
422
|
+
f"Successfully registered evaluator '{evaluator_name}' on Fireworks AI, pointing to '{service_url_to_register}'."
|
|
423
|
+
)
|
|
424
|
+
if args.target == "local-serve":
|
|
425
|
+
# tunnel_provider is defined in the local-serve block
|
|
426
|
+
# We need to ensure it's accessible here or pass it through.
|
|
427
|
+
# For now, let's assume tunnel_provider was defined in the calling scope of this block.
|
|
428
|
+
# This will require a small adjustment to how tunnel_provider is scoped.
|
|
429
|
+
# Let's fetch it from args if we store it there, or pass it.
|
|
430
|
+
# Simpler: just make the message generic or re-fetch from the PIDs.
|
|
431
|
+
# The variable `tunnel_provider` is set in the `elif args.target == "local-serve":` block.
|
|
432
|
+
# It needs to be available here.
|
|
433
|
+
# For now, I'll adjust the print statement to be more generic or rely on the PIDs.
|
|
434
|
+
# The `tunnel_provider` variable is indeed set in the correct scope.
|
|
435
|
+
print(
|
|
436
|
+
f"Local server (PID: {local_server_pid_to_clean}) and {tunnel_provider} tunnel (PID: {local_tunnel_pid_to_clean}) are running."
|
|
437
|
+
)
|
|
438
|
+
print("They will be stopped automatically when this command exits (e.g., Ctrl+C).")
|
|
439
|
+
return 0
|
|
440
|
+
except PlatformAPIError as e:
|
|
441
|
+
print(f"Error registering URL with Fireworks AI: {str(e)}")
|
|
442
|
+
except Exception as e:
|
|
443
|
+
print(f"An unexpected error occurred during Fireworks AI registration: {str(e)}")
|
|
444
|
+
finally:
|
|
445
|
+
# If registration fails for local-serve, clean up the started processes
|
|
446
|
+
if args.target == "local-serve" and ("evaluator" not in locals() or not locals().get("evaluator")):
|
|
447
|
+
print("Registration failed or was interrupted for local-serve. Cleaning up local processes...")
|
|
448
|
+
if local_tunnel_pid_to_clean: # Use the new generic tunnel PID variable
|
|
449
|
+
stop_process(local_tunnel_pid_to_clean)
|
|
450
|
+
if local_server_pid_to_clean:
|
|
451
|
+
stop_process(local_server_pid_to_clean)
|
|
452
|
+
return 1
|
|
453
|
+
|
|
454
|
+
# Fallback to original behavior: Deploying by packaging local metrics_folders (target=fireworks, no remote_url)
|
|
455
|
+
# This is when args.target == "fireworks" (default) AND args.remote_url is NOT provided.
|
|
456
|
+
elif args.target == "fireworks" and not args.remote_url:
|
|
457
|
+
if not args.metrics_folders:
|
|
458
|
+
print("Error: --metrics-folders are required for 'fireworks' target if --remote-url is not provided.")
|
|
459
|
+
return 1
|
|
460
|
+
for folder_spec in args.metrics_folders:
|
|
461
|
+
if "=" not in folder_spec:
|
|
462
|
+
print(f"Error: Metric folder format should be 'name=path', got '{folder_spec}'")
|
|
463
|
+
return 1
|
|
464
|
+
try:
|
|
465
|
+
print(f"Packaging and deploying metrics for evaluator '{args.id}' to Fireworks AI...")
|
|
466
|
+
evaluator = create_evaluation(
|
|
467
|
+
evaluator_id=args.id,
|
|
468
|
+
metric_folders=args.metrics_folders,
|
|
469
|
+
display_name=args.display_name or args.id,
|
|
470
|
+
description=args.description or f"Evaluator: {args.id}",
|
|
471
|
+
force=args.force,
|
|
472
|
+
huggingface_dataset=args.huggingface_dataset,
|
|
473
|
+
huggingface_split=args.huggingface_split,
|
|
474
|
+
huggingface_message_key_map=huggingface_message_key_map,
|
|
475
|
+
huggingface_prompt_key=args.huggingface_prompt_key,
|
|
476
|
+
huggingface_response_key=args.huggingface_response_key,
|
|
477
|
+
)
|
|
478
|
+
evaluator_name = evaluator.get("name", args.id)
|
|
479
|
+
print(f"Successfully created/updated evaluator: {evaluator_name}")
|
|
480
|
+
return 0
|
|
481
|
+
except PlatformAPIError as e:
|
|
482
|
+
print(f"Error creating/updating evaluator '{args.id}': {str(e)}")
|
|
483
|
+
return 1
|
|
484
|
+
except Exception as e:
|
|
485
|
+
print(f"Error creating/updating evaluator '{args.id}': {str(e)}")
|
|
486
|
+
return 1
|