eval-protocol 0.2.93.dev2__py3-none-any.whl → 0.2.93.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol/_version.py +3 -3
- eval_protocol/cli.py +20 -0
- eval_protocol/cli_commands/create_rft.py +435 -337
- eval_protocol/cli_commands/local_test.py +65 -56
- eval_protocol/cli_commands/upload.py +18 -455
- eval_protocol/cli_commands/utils.py +511 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/METADATA +1 -1
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/RECORD +12 -11
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/WHEEL +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/entry_points.txt +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/licenses/LICENSE +0 -0
- {eval_protocol-0.2.93.dev2.dist-info → eval_protocol-0.2.93.dev4.dist-info}/top_level.txt +0 -0
|
@@ -1,99 +1,37 @@
|
|
|
1
|
+
import argparse
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
3
4
|
import sys
|
|
4
5
|
import time
|
|
5
|
-
import argparse
|
|
6
6
|
from typing import Any, Dict, Optional
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
|
+
from pydantic import ValidationError
|
|
9
10
|
|
|
10
|
-
from ..auth import
|
|
11
|
-
get_fireworks_account_id,
|
|
12
|
-
get_fireworks_api_base,
|
|
13
|
-
get_fireworks_api_key,
|
|
14
|
-
verify_api_key_and_get_account_id,
|
|
15
|
-
)
|
|
11
|
+
from ..auth import get_fireworks_api_base, get_fireworks_api_key
|
|
16
12
|
from ..common_utils import get_user_agent
|
|
17
13
|
from ..fireworks_rft import (
|
|
18
|
-
_map_api_host_to_app_host,
|
|
19
14
|
build_default_output_model,
|
|
20
15
|
create_dataset_from_jsonl,
|
|
21
16
|
create_reinforcement_fine_tuning_job,
|
|
17
|
+
detect_dataset_builder,
|
|
18
|
+
materialize_dataset_via_builder,
|
|
22
19
|
)
|
|
23
|
-
from ..
|
|
24
|
-
from .upload import
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _extract_terminal_segment(resource_name: str) -> str:
|
|
39
|
-
"""Return the last path segment if a fully-qualified resource name is provided."""
|
|
40
|
-
try:
|
|
41
|
-
return resource_name.strip("/").split("/")[-1]
|
|
42
|
-
except Exception:
|
|
43
|
-
return resource_name
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _print_links(evaluator_id: str, dataset_id: str, job_name: Optional[str]) -> None:
|
|
47
|
-
api_base = get_fireworks_api_base()
|
|
48
|
-
app_base = _map_api_host_to_app_host(api_base)
|
|
49
|
-
print("\n📊 Dashboard Links:")
|
|
50
|
-
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
51
|
-
print(f" Evaluator: {app_base}/dashboard/evaluators/{evaluator_slug}")
|
|
52
|
-
if dataset_id:
|
|
53
|
-
print(f" Dataset: {app_base}/dashboard/datasets/{dataset_id}")
|
|
54
|
-
if job_name:
|
|
55
|
-
# job_name likely like accounts/{account}/reinforcementFineTuningJobs/{id}
|
|
56
|
-
try:
|
|
57
|
-
job_id = job_name.strip().split("/")[-1]
|
|
58
|
-
print(f" RFT Job: {app_base}/dashboard/fine-tuning/reinforcement/{job_id}")
|
|
59
|
-
except Exception:
|
|
60
|
-
pass
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _auto_find_jsonl(cwd: str) -> Optional[str]:
|
|
64
|
-
"""Find a reasonable JSONL dataset file in the current project.
|
|
65
|
-
|
|
66
|
-
Priority order:
|
|
67
|
-
- dataset.jsonl in cwd
|
|
68
|
-
- data/dataset.jsonl
|
|
69
|
-
- first *.jsonl under cwd (depth-first, skipping common vendor/venv/build dirs)
|
|
70
|
-
Returns a RELATIVE path from cwd if possible.
|
|
71
|
-
"""
|
|
72
|
-
# Direct candidates
|
|
73
|
-
direct_candidates = [
|
|
74
|
-
os.path.join(cwd, "dataset.jsonl"),
|
|
75
|
-
os.path.join(cwd, "data", "dataset.jsonl"),
|
|
76
|
-
]
|
|
77
|
-
for p in direct_candidates:
|
|
78
|
-
if os.path.isfile(p):
|
|
79
|
-
try:
|
|
80
|
-
return os.path.relpath(p, cwd)
|
|
81
|
-
except Exception:
|
|
82
|
-
return p
|
|
83
|
-
|
|
84
|
-
# Walk and find any .jsonl
|
|
85
|
-
skip_dirs = {".venv", "venv", "node_modules", "dist", "build", "__pycache__", ".git", "vendor"}
|
|
86
|
-
for dirpath, dirnames, filenames in os.walk(cwd):
|
|
87
|
-
# prune
|
|
88
|
-
dirnames[:] = [d for d in dirnames if d not in skip_dirs and not d.startswith(".")]
|
|
89
|
-
for name in sorted(filenames):
|
|
90
|
-
if name.endswith(".jsonl"):
|
|
91
|
-
candidate = os.path.join(dirpath, name)
|
|
92
|
-
try:
|
|
93
|
-
return os.path.relpath(candidate, cwd)
|
|
94
|
-
except Exception:
|
|
95
|
-
return candidate
|
|
96
|
-
return None
|
|
20
|
+
from ..models import EvaluationRow
|
|
21
|
+
from .upload import upload_command
|
|
22
|
+
from .utils import (
|
|
23
|
+
_build_entry_point,
|
|
24
|
+
_build_trimmed_dataset_id,
|
|
25
|
+
_build_evaluator_dashboard_url,
|
|
26
|
+
_discover_and_select_tests,
|
|
27
|
+
_discover_tests,
|
|
28
|
+
_ensure_account_id,
|
|
29
|
+
_extract_terminal_segment,
|
|
30
|
+
_normalize_evaluator_id,
|
|
31
|
+
_print_links,
|
|
32
|
+
_resolve_selected_test,
|
|
33
|
+
)
|
|
34
|
+
from .local_test import run_evaluator_test
|
|
97
35
|
|
|
98
36
|
|
|
99
37
|
def _extract_jsonl_from_dataloader(test_file_path: str, test_func_name: str) -> Optional[str]:
|
|
@@ -205,83 +143,23 @@ def _extract_jsonl_from_input_dataset(test_file_path: str, test_func_name: str)
|
|
|
205
143
|
if isinstance(dataset_path, (list, tuple)) and len(dataset_path) > 0:
|
|
206
144
|
dataset_path = dataset_path[0]
|
|
207
145
|
if isinstance(dataset_path, str) and dataset_path:
|
|
146
|
+
candidate_paths = []
|
|
208
147
|
if os.path.isabs(dataset_path):
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return cwd_path
|
|
148
|
+
candidate_paths.append(dataset_path)
|
|
149
|
+
else:
|
|
150
|
+
base_dir = os.path.dirname(os.path.abspath(test_file_path))
|
|
151
|
+
candidate_paths.append(os.path.abspath(os.path.join(base_dir, dataset_path)))
|
|
152
|
+
# Also try resolving from current working directory
|
|
153
|
+
candidate_paths.append(os.path.abspath(os.path.join(os.getcwd(), dataset_path)))
|
|
154
|
+
|
|
155
|
+
for candidate in candidate_paths:
|
|
156
|
+
if os.path.isfile(candidate) and _validate_dataset_jsonl(candidate):
|
|
157
|
+
return candidate
|
|
220
158
|
return None
|
|
221
159
|
except Exception:
|
|
222
160
|
return None
|
|
223
161
|
|
|
224
162
|
|
|
225
|
-
def _build_trimmed_dataset_id(evaluator_id: str) -> str:
|
|
226
|
-
"""Build a dataset id derived from evaluator_id, trimmed to 63 chars.
|
|
227
|
-
|
|
228
|
-
Format: <normalized-base>-dataset-YYYYMMDDHHMMSS, where base is trimmed to fit.
|
|
229
|
-
"""
|
|
230
|
-
# Normalize base similarly to evaluator id rules
|
|
231
|
-
from .upload import _normalize_evaluator_id # local import to avoid cycle at module import time
|
|
232
|
-
|
|
233
|
-
base = _normalize_evaluator_id(evaluator_id)
|
|
234
|
-
suffix = f"-dataset-{time.strftime('%Y%m%d%H%M%S')}"
|
|
235
|
-
max_total = 63
|
|
236
|
-
max_base_len = max_total - len(suffix)
|
|
237
|
-
if max_base_len < 1:
|
|
238
|
-
max_base_len = 1
|
|
239
|
-
if len(base) > max_base_len:
|
|
240
|
-
base = base[:max_base_len].rstrip("-")
|
|
241
|
-
if not base:
|
|
242
|
-
base = "dataset"
|
|
243
|
-
# Ensure first char is a letter
|
|
244
|
-
if not base:
|
|
245
|
-
base = "dataset"
|
|
246
|
-
if not base[0].isalpha():
|
|
247
|
-
base = f"eval-{base}"
|
|
248
|
-
if len(base) > max_base_len:
|
|
249
|
-
base = base[:max_base_len]
|
|
250
|
-
base = base.rstrip("-") or "dataset"
|
|
251
|
-
return f"{base}{suffix}"
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def _resolve_selected_test(
|
|
255
|
-
project_root: str,
|
|
256
|
-
evaluator_id: Optional[str],
|
|
257
|
-
selected_tests: Optional[list] = None,
|
|
258
|
-
) -> tuple[Optional[str], Optional[str]]:
|
|
259
|
-
"""
|
|
260
|
-
Resolve a single test's source file path and function name to use downstream.
|
|
261
|
-
Priority:
|
|
262
|
-
1) If selected_tests provided and length == 1, use it.
|
|
263
|
-
2) Else discover tests; if exactly one test, use it.
|
|
264
|
-
3) Else, if evaluator_id provided, match by normalized '<file-stem>-<func-name>'.
|
|
265
|
-
Returns: (file_path, func_name) or (None, None) if unresolved.
|
|
266
|
-
"""
|
|
267
|
-
try:
|
|
268
|
-
tests = selected_tests if selected_tests is not None else _discover_tests(project_root)
|
|
269
|
-
if not tests:
|
|
270
|
-
return None, None
|
|
271
|
-
if len(tests) == 1:
|
|
272
|
-
return tests[0].file_path, tests[0].qualname.split(".")[-1]
|
|
273
|
-
if evaluator_id:
|
|
274
|
-
for t in tests:
|
|
275
|
-
func_name = t.qualname.split(".")[-1]
|
|
276
|
-
source_file_name = os.path.splitext(os.path.basename(t.file_path))[0]
|
|
277
|
-
candidate = _normalize_evaluator_id(f"{source_file_name}-{func_name}")
|
|
278
|
-
if candidate == evaluator_id:
|
|
279
|
-
return t.file_path, func_name
|
|
280
|
-
return None, None
|
|
281
|
-
except Exception:
|
|
282
|
-
return None, None
|
|
283
|
-
|
|
284
|
-
|
|
285
163
|
def _poll_evaluator_status(
|
|
286
164
|
evaluator_resource_name: str, api_key: str, api_base: str, timeout_minutes: int = 10
|
|
287
165
|
) -> bool:
|
|
@@ -343,45 +221,96 @@ def _poll_evaluator_status(
|
|
|
343
221
|
return False
|
|
344
222
|
|
|
345
223
|
|
|
346
|
-
def
|
|
347
|
-
|
|
348
|
-
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
349
|
-
dry_run: bool = bool(getattr(args, "dry_run", False))
|
|
350
|
-
force: bool = bool(getattr(args, "force", False))
|
|
351
|
-
# Track the specifically chosen test (if any) to aid dataset inference later
|
|
352
|
-
selected_test_file_path: Optional[str] = None
|
|
353
|
-
selected_test_func_name: Optional[str] = None
|
|
224
|
+
def _validate_dataset_jsonl(jsonl_path: str, sample_limit: int = 50) -> bool:
|
|
225
|
+
"""Validate that a JSONL file contains rows compatible with EvaluationRow.
|
|
354
226
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
227
|
+
We stream up to `sample_limit` rows, ensuring each is JSON-decodable and can be
|
|
228
|
+
parsed by the EvaluationRow model. Returns True on success, False on any error.
|
|
229
|
+
"""
|
|
230
|
+
try:
|
|
231
|
+
if not os.path.isfile(jsonl_path):
|
|
232
|
+
print(f"Error: dataset JSONL not found at path: {jsonl_path}")
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
row_count = 0
|
|
236
|
+
with open(jsonl_path, "r", encoding="utf-8") as f:
|
|
237
|
+
for line in f:
|
|
238
|
+
line = line.strip()
|
|
239
|
+
if not line:
|
|
240
|
+
continue
|
|
241
|
+
try:
|
|
242
|
+
data = json.loads(line)
|
|
243
|
+
except json.JSONDecodeError as e:
|
|
244
|
+
print(f"Error: dataset JSONL contains invalid JSON (line {row_count + 1}): {e}")
|
|
245
|
+
return False
|
|
359
246
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
247
|
+
try:
|
|
248
|
+
EvaluationRow.model_validate(data)
|
|
249
|
+
except ValidationError as e:
|
|
250
|
+
print(f"Error: dataset JSONL row {row_count + 1} is not a valid EvaluationRow: {e}")
|
|
251
|
+
return False
|
|
364
252
|
|
|
365
|
-
|
|
253
|
+
row_count += 1
|
|
254
|
+
if row_count >= sample_limit:
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
if row_count == 0:
|
|
258
|
+
print(f"Error: dataset JSONL at {jsonl_path} appears to be empty.")
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
return True
|
|
262
|
+
except Exception as e:
|
|
263
|
+
print(f"Error validating dataset JSONL at {jsonl_path}: {e}")
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _validate_dataset(dataset_jsonl: Optional[str]) -> bool:
|
|
268
|
+
"""Validate dataset JSONL path when available; no-op when using dataset IDs only."""
|
|
269
|
+
if not dataset_jsonl:
|
|
270
|
+
return True
|
|
271
|
+
return _validate_dataset_jsonl(dataset_jsonl)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _validate_evaluator_locally(
|
|
275
|
+
project_root: str,
|
|
276
|
+
selected_test_file: Optional[str],
|
|
277
|
+
selected_test_func: Optional[str],
|
|
278
|
+
ignore_docker: bool,
|
|
279
|
+
docker_build_extra: str,
|
|
280
|
+
docker_run_extra: str,
|
|
281
|
+
) -> bool:
|
|
282
|
+
"""Run pytest locally for the selected evaluation test to validate the evaluator."""
|
|
283
|
+
if not selected_test_file or not selected_test_func:
|
|
284
|
+
# No local test associated; skip validation but warn the user.
|
|
285
|
+
print("Warning: Could not resolve a local evaluation test for this evaluator; skipping local validation.")
|
|
286
|
+
return True
|
|
287
|
+
|
|
288
|
+
pytest_target = _build_entry_point(project_root, selected_test_file, selected_test_func)
|
|
289
|
+
exit_code = run_evaluator_test(
|
|
290
|
+
project_root=project_root,
|
|
291
|
+
pytest_target=pytest_target,
|
|
292
|
+
ignore_docker=ignore_docker,
|
|
293
|
+
docker_build_extra=docker_build_extra,
|
|
294
|
+
docker_run_extra=docker_run_extra,
|
|
295
|
+
)
|
|
296
|
+
return exit_code == 0
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _resolve_evaluator(
|
|
300
|
+
project_root: str,
|
|
301
|
+
evaluator_arg: Optional[str],
|
|
302
|
+
non_interactive: bool,
|
|
303
|
+
account_id: str,
|
|
304
|
+
) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
|
305
|
+
"""Resolve evaluator id/resource and associated local test (file + func)."""
|
|
306
|
+
evaluator_id = evaluator_arg
|
|
307
|
+
selected_test_file_path: Optional[str] = None
|
|
308
|
+
selected_test_func_name: Optional[str] = None
|
|
366
309
|
|
|
367
|
-
# Resolve evaluator id/entry if omitted (reuse upload's selector flow)
|
|
368
|
-
project_root = os.getcwd()
|
|
369
310
|
if not evaluator_id:
|
|
370
|
-
|
|
371
|
-
tests = _discover_tests(project_root)
|
|
372
|
-
if not tests:
|
|
373
|
-
print("No evaluation tests found.")
|
|
374
|
-
print("\nHint: Make sure your tests use the @evaluation_test decorator.")
|
|
375
|
-
return 1
|
|
376
|
-
# Always interactive selection here
|
|
377
|
-
try:
|
|
378
|
-
selected_tests = _prompt_select(tests, non_interactive=non_interactive)
|
|
379
|
-
except Exception:
|
|
380
|
-
print("Error: Failed to open selector UI. Please pass --evaluator or --entry explicitly.")
|
|
381
|
-
return 1
|
|
311
|
+
selected_tests = _discover_and_select_tests(project_root, non_interactive=non_interactive)
|
|
382
312
|
if not selected_tests:
|
|
383
|
-
|
|
384
|
-
return 1
|
|
313
|
+
return None, None, None, None
|
|
385
314
|
if len(selected_tests) != 1:
|
|
386
315
|
if non_interactive and len(selected_tests) > 1:
|
|
387
316
|
print("Error: Multiple evaluation tests found in --yes (non-interactive) mode.")
|
|
@@ -400,7 +329,8 @@ def create_rft_command(args) -> int:
|
|
|
400
329
|
pass
|
|
401
330
|
else:
|
|
402
331
|
print("Error: Please select exactly one evaluation test for 'create rft'.")
|
|
403
|
-
return
|
|
332
|
+
return None, None, None, None
|
|
333
|
+
|
|
404
334
|
# Derive evaluator_id from user's single selection
|
|
405
335
|
chosen = selected_tests[0]
|
|
406
336
|
func_name = chosen.qualname.split(".")[-1]
|
|
@@ -410,129 +340,49 @@ def create_rft_command(args) -> int:
|
|
|
410
340
|
selected_test_file_path, selected_test_func_name = _resolve_selected_test(
|
|
411
341
|
project_root, evaluator_id, selected_tests=selected_tests
|
|
412
342
|
)
|
|
413
|
-
# Resolve evaluator resource name to fully-qualified format required by API.
|
|
414
|
-
# Allow users to pass either short id or fully-qualified resource.
|
|
415
|
-
if evaluator_id and evaluator_id.startswith("accounts/"):
|
|
416
|
-
evaluator_resource_name = evaluator_id
|
|
417
|
-
evaluator_id = _extract_terminal_segment(evaluator_id)
|
|
418
343
|
else:
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
"User-Agent": get_user_agent(),
|
|
429
|
-
}
|
|
430
|
-
resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
|
|
431
|
-
if resp.ok:
|
|
432
|
-
state = resp.json().get("state", "STATE_UNSPECIFIED")
|
|
433
|
-
print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
|
|
434
|
-
# Poll for ACTIVE before proceeding
|
|
435
|
-
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
|
|
436
|
-
if not _poll_evaluator_status(
|
|
437
|
-
evaluator_resource_name=evaluator_resource_name,
|
|
438
|
-
api_key=api_key,
|
|
439
|
-
api_base=api_base,
|
|
440
|
-
timeout_minutes=10,
|
|
441
|
-
):
|
|
442
|
-
app_base = _map_api_host_to_app_host(api_base)
|
|
443
|
-
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
444
|
-
dashboard_url = f"{app_base}/dashboard/evaluators/{evaluator_slug}"
|
|
445
|
-
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
446
|
-
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
447
|
-
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
448
|
-
return 1
|
|
449
|
-
skip_upload = True
|
|
450
|
-
# Populate selected test info for dataset inference later
|
|
451
|
-
st_path, st_func = _resolve_selected_test(project_root, evaluator_id)
|
|
452
|
-
if st_path and st_func:
|
|
453
|
-
selected_test_file_path = st_path
|
|
454
|
-
selected_test_func_name = st_func
|
|
455
|
-
except requests.exceptions.RequestException:
|
|
456
|
-
pass
|
|
457
|
-
|
|
458
|
-
# Ensure evaluator exists by invoking the upload flow programmatically
|
|
459
|
-
if not skip_upload:
|
|
460
|
-
try:
|
|
461
|
-
from .upload import upload_command
|
|
462
|
-
|
|
463
|
-
tests = _discover_tests(project_root)
|
|
464
|
-
selected_entry: Optional[str] = None
|
|
465
|
-
st_path, st_func = _resolve_selected_test(project_root, evaluator_id, selected_tests=tests)
|
|
466
|
-
if st_path and st_func:
|
|
467
|
-
abs_path = os.path.abspath(st_path)
|
|
468
|
-
try:
|
|
469
|
-
rel = os.path.relpath(abs_path, project_root)
|
|
470
|
-
except Exception:
|
|
471
|
-
rel = abs_path
|
|
472
|
-
selected_entry = f"{rel}::{st_func}"
|
|
473
|
-
selected_test_file_path = st_path
|
|
474
|
-
selected_test_func_name = st_func
|
|
475
|
-
# If still unresolved and multiple tests exist, fail fast to avoid uploading unintended evaluators
|
|
476
|
-
if selected_entry is None and len(tests) > 1:
|
|
477
|
-
print(
|
|
478
|
-
f"Error: Multiple evaluation tests found, and the selected evaluator {evaluator_id} does not match any discovered test.\n"
|
|
479
|
-
" Please re-run specifying the evaluator.\n"
|
|
480
|
-
" Hints:\n"
|
|
481
|
-
" - eval-protocol create rft --evaluator <existing-evaluator-id>\n"
|
|
482
|
-
)
|
|
483
|
-
return 1
|
|
344
|
+
# Caller provided an evaluator id or fully-qualified resource; try to resolve local test
|
|
345
|
+
short_id = evaluator_id
|
|
346
|
+
if evaluator_id.startswith("accounts/"):
|
|
347
|
+
short_id = _extract_terminal_segment(evaluator_id)
|
|
348
|
+
st_path, st_func = _resolve_selected_test(project_root, short_id)
|
|
349
|
+
if st_path and st_func:
|
|
350
|
+
selected_test_file_path = st_path
|
|
351
|
+
selected_test_func_name = st_func
|
|
352
|
+
evaluator_id = short_id
|
|
484
353
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
entry=selected_entry,
|
|
488
|
-
id=evaluator_id,
|
|
489
|
-
display_name=None,
|
|
490
|
-
description=None,
|
|
491
|
-
force=force, # Pass through the --force flag
|
|
492
|
-
yes=True,
|
|
493
|
-
env_file=None, # Add the new env_file parameter
|
|
494
|
-
)
|
|
495
|
-
|
|
496
|
-
if force:
|
|
497
|
-
print(f"🔄 Force flag enabled - will overwrite existing evaluator '{evaluator_id}'")
|
|
498
|
-
|
|
499
|
-
rc = upload_command(upload_args)
|
|
500
|
-
if rc == 0:
|
|
501
|
-
print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
|
|
354
|
+
if not evaluator_id:
|
|
355
|
+
return None, None, None, None
|
|
502
356
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
api_base=api_base,
|
|
509
|
-
timeout_minutes=10,
|
|
510
|
-
)
|
|
357
|
+
# Resolve evaluator resource name to fully-qualified format required by API.
|
|
358
|
+
if evaluator_arg and evaluator_arg.startswith("accounts/"):
|
|
359
|
+
evaluator_resource_name = evaluator_arg
|
|
360
|
+
else:
|
|
361
|
+
evaluator_resource_name = f"accounts/{account_id}/evaluators/{evaluator_id}"
|
|
511
362
|
|
|
512
|
-
|
|
513
|
-
# Print helpful message with dashboard link
|
|
514
|
-
app_base = _map_api_host_to_app_host(api_base)
|
|
515
|
-
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
516
|
-
dashboard_url = f"{app_base}/dashboard/evaluators/{evaluator_slug}"
|
|
363
|
+
return evaluator_id, evaluator_resource_name, selected_test_file_path, selected_test_func_name
|
|
517
364
|
|
|
518
|
-
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
519
|
-
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
520
|
-
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
521
|
-
return 1
|
|
522
|
-
else:
|
|
523
|
-
# Evaluator ACTIVE; proceed
|
|
524
|
-
pass
|
|
525
|
-
else:
|
|
526
|
-
print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
|
|
527
|
-
except Exception as e:
|
|
528
|
-
print(f"Warning: Failed to upload evaluator automatically: {e}")
|
|
529
365
|
|
|
530
|
-
|
|
366
|
+
def _resolve_dataset(
|
|
367
|
+
project_root: str,
|
|
368
|
+
account_id: str,
|
|
369
|
+
evaluator_id: str,
|
|
370
|
+
args: argparse.Namespace,
|
|
371
|
+
selected_test_file_path: Optional[str],
|
|
372
|
+
selected_test_func_name: Optional[str],
|
|
373
|
+
) -> tuple[Optional[str], Optional[str], Optional[str]]:
|
|
374
|
+
"""Resolve dataset source without performing any uploads.
|
|
375
|
+
|
|
376
|
+
Returns a tuple of:
|
|
377
|
+
- dataset_id: existing dataset id when using --dataset or fully-qualified dataset resource
|
|
378
|
+
- dataset_resource: fully-qualified dataset resource for existing datasets; None for JSONL sources
|
|
379
|
+
- dataset_jsonl: local JSONL path when using --dataset-jsonl or inferred sources; None for id-only datasets
|
|
380
|
+
"""
|
|
531
381
|
dataset_id = getattr(args, "dataset", None)
|
|
532
382
|
dataset_jsonl = getattr(args, "dataset_jsonl", None)
|
|
533
383
|
dataset_display_name = getattr(args, "dataset_display_name", None)
|
|
534
|
-
dataset_builder = getattr(args, "dataset_builder", None) # accepted but unused in simplified flow
|
|
535
384
|
dataset_resource_override: Optional[str] = None
|
|
385
|
+
|
|
536
386
|
if isinstance(dataset_id, str) and dataset_id.startswith("accounts/"):
|
|
537
387
|
# Caller passed a fully-qualified dataset; capture it for body and keep only terminal id for printing
|
|
538
388
|
dataset_resource_override = dataset_id
|
|
@@ -553,23 +403,21 @@ def create_rft_command(args) -> int:
|
|
|
553
403
|
test_file_for_infer = tests[0].file_path
|
|
554
404
|
func_for_infer = tests[0].qualname.split(".")[-1]
|
|
555
405
|
if test_file_for_infer and func_for_infer:
|
|
556
|
-
#
|
|
406
|
+
# Block using data loaders as a dataset source
|
|
557
407
|
dataset_jsonl = _extract_jsonl_from_dataloader(test_file_for_infer, func_for_infer)
|
|
408
|
+
if dataset_jsonl:
|
|
409
|
+
print(
|
|
410
|
+
"Error: Evaluation tests that use 'data_loaders' to provide a dataset JSONL are not supported for 'create rft'.\n"
|
|
411
|
+
" Please switch to a JSONL-based dataset via input_dataset arg in @evaluation_test decorator."
|
|
412
|
+
)
|
|
413
|
+
return None, None, None
|
|
414
|
+
dataset_jsonl = _extract_jsonl_from_input_dataset(test_file_for_infer, func_for_infer)
|
|
558
415
|
if dataset_jsonl:
|
|
559
416
|
try:
|
|
560
417
|
rel = os.path.relpath(dataset_jsonl, project_root)
|
|
561
418
|
except Exception:
|
|
562
419
|
rel = dataset_jsonl
|
|
563
|
-
print(f"✓ Using JSONL from
|
|
564
|
-
if not dataset_jsonl:
|
|
565
|
-
# Fall back to input_dataset (dataset_path)
|
|
566
|
-
dataset_jsonl = _extract_jsonl_from_input_dataset(test_file_for_infer, func_for_infer)
|
|
567
|
-
if dataset_jsonl:
|
|
568
|
-
try:
|
|
569
|
-
rel = os.path.relpath(dataset_jsonl, project_root)
|
|
570
|
-
except Exception:
|
|
571
|
-
rel = dataset_jsonl
|
|
572
|
-
print(f"✓ Using JSONL from input_dataset: {rel}")
|
|
420
|
+
print(f"✓ Using JSONL from input_dataset: {rel}")
|
|
573
421
|
if not dataset_jsonl:
|
|
574
422
|
# Last resort: attempt to detect and run a dataset builder in the test's directory
|
|
575
423
|
metric_dir = os.path.dirname(test_file_for_infer)
|
|
@@ -585,33 +433,182 @@ def create_rft_command(args) -> int:
|
|
|
585
433
|
print(
|
|
586
434
|
"Error: Could not determine dataset. Provide --dataset or --dataset-jsonl, or ensure a JSONL-based data loader or input_dataset is used in your single discovered test."
|
|
587
435
|
)
|
|
588
|
-
return
|
|
436
|
+
return None, None, None
|
|
589
437
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
438
|
+
# Build dataset resource for existing datasets; JSONL-based datasets will be uploaded later.
|
|
439
|
+
dataset_resource = None
|
|
440
|
+
if dataset_id:
|
|
441
|
+
dataset_resource = dataset_resource_override or f"accounts/{account_id}/datasets/{dataset_id}"
|
|
442
|
+
|
|
443
|
+
return dataset_id, dataset_resource, dataset_jsonl
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _upload_dataset(
|
|
447
|
+
project_root: str,
|
|
448
|
+
account_id: str,
|
|
449
|
+
api_key: str,
|
|
450
|
+
api_base: str,
|
|
451
|
+
evaluator_id: str,
|
|
452
|
+
dataset_id: Optional[str],
|
|
453
|
+
dataset_resource: Optional[str],
|
|
454
|
+
dataset_jsonl: Optional[str],
|
|
455
|
+
args: argparse.Namespace,
|
|
456
|
+
dry_run: bool,
|
|
457
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
458
|
+
"""Create/upload the dataset when using a local JSONL source.
|
|
459
|
+
|
|
460
|
+
For existing datasets (--dataset or fully-qualified ids), this is a no-op that
|
|
461
|
+
simply ensures dataset_id and dataset_resource are populated.
|
|
462
|
+
"""
|
|
463
|
+
# Existing dataset case: nothing to upload
|
|
464
|
+
if not dataset_jsonl:
|
|
465
|
+
if not dataset_id:
|
|
466
|
+
return None, None
|
|
467
|
+
if not dataset_resource:
|
|
468
|
+
dataset_resource = f"accounts/{account_id}/datasets/{dataset_id}"
|
|
469
|
+
return dataset_id, dataset_resource
|
|
470
|
+
|
|
471
|
+
# JSONL-based dataset: upload or simulate upload
|
|
472
|
+
inferred_dataset_id = _build_trimmed_dataset_id(evaluator_id)
|
|
473
|
+
dataset_display_name = getattr(args, "dataset_display_name", None) or inferred_dataset_id
|
|
474
|
+
|
|
475
|
+
# Resolve dataset_jsonl path relative to CWD if needed
|
|
476
|
+
jsonl_path_for_upload = (
|
|
477
|
+
dataset_jsonl if os.path.isabs(dataset_jsonl) else os.path.abspath(os.path.join(project_root, dataset_jsonl))
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
if dry_run:
|
|
481
|
+
print("--dry-run: would create dataset and upload JSONL")
|
|
482
|
+
dataset_id = inferred_dataset_id
|
|
483
|
+
dataset_resource = f"accounts/{account_id}/datasets/{dataset_id}"
|
|
484
|
+
return dataset_id, dataset_resource
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
dataset_id, _ = create_dataset_from_jsonl(
|
|
488
|
+
account_id=account_id,
|
|
489
|
+
api_key=api_key,
|
|
490
|
+
api_base=api_base,
|
|
491
|
+
dataset_id=inferred_dataset_id,
|
|
492
|
+
display_name=dataset_display_name,
|
|
493
|
+
jsonl_path=jsonl_path_for_upload,
|
|
494
|
+
)
|
|
495
|
+
print(f"✓ Created and uploaded dataset: {dataset_id}")
|
|
496
|
+
dataset_resource = f"accounts/{account_id}/datasets/{dataset_id}"
|
|
497
|
+
return dataset_id, dataset_resource
|
|
498
|
+
except Exception as e:
|
|
499
|
+
print(f"Error creating/uploading dataset: {e}")
|
|
500
|
+
return None, None
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _upload_and_ensure_evaluator(
|
|
504
|
+
project_root: str,
|
|
505
|
+
evaluator_id: str,
|
|
506
|
+
evaluator_resource_name: str,
|
|
507
|
+
api_key: str,
|
|
508
|
+
api_base: str,
|
|
509
|
+
force: bool,
|
|
510
|
+
) -> bool:
|
|
511
|
+
"""Ensure the evaluator exists and is ACTIVE, uploading it if needed."""
|
|
512
|
+
# Optional short-circuit: if evaluator already exists and not forcing, skip upload path
|
|
513
|
+
if not force:
|
|
514
|
+
try:
|
|
515
|
+
headers = {
|
|
516
|
+
"Authorization": f"Bearer {api_key}",
|
|
517
|
+
"Content-Type": "application/json",
|
|
518
|
+
"User-Agent": get_user_agent(),
|
|
519
|
+
}
|
|
520
|
+
resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
|
|
521
|
+
if resp.ok:
|
|
522
|
+
state = resp.json().get("state", "STATE_UNSPECIFIED")
|
|
523
|
+
print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
|
|
524
|
+
# Poll for ACTIVE before proceeding
|
|
525
|
+
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
|
|
526
|
+
if not _poll_evaluator_status(
|
|
527
|
+
evaluator_resource_name=evaluator_resource_name,
|
|
604
528
|
api_key=api_key,
|
|
605
529
|
api_base=api_base,
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
return
|
|
530
|
+
timeout_minutes=10,
|
|
531
|
+
):
|
|
532
|
+
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
533
|
+
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
534
|
+
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
535
|
+
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
536
|
+
return False
|
|
537
|
+
return True
|
|
538
|
+
except requests.exceptions.RequestException:
|
|
539
|
+
pass
|
|
614
540
|
|
|
541
|
+
# Ensure evaluator exists by invoking the upload flow programmatically
|
|
542
|
+
try:
|
|
543
|
+
tests = _discover_tests(project_root)
|
|
544
|
+
selected_entry: Optional[str] = None
|
|
545
|
+
st_path, st_func = _resolve_selected_test(project_root, evaluator_id, selected_tests=tests)
|
|
546
|
+
if st_path and st_func:
|
|
547
|
+
selected_entry = _build_entry_point(project_root, st_path, st_func)
|
|
548
|
+
# If still unresolved and multiple tests exist, fail fast to avoid uploading unintended evaluators
|
|
549
|
+
if selected_entry is None and len(tests) > 1:
|
|
550
|
+
print(
|
|
551
|
+
f"Error: Multiple evaluation tests found, and the selected evaluator {evaluator_id} does not match any discovered test.\n"
|
|
552
|
+
" Please re-run specifying the evaluator.\n"
|
|
553
|
+
" Hints:\n"
|
|
554
|
+
" - eval-protocol create rft --evaluator <existing-evaluator-id>\n"
|
|
555
|
+
)
|
|
556
|
+
return False
|
|
557
|
+
|
|
558
|
+
upload_args = argparse.Namespace(
|
|
559
|
+
path=project_root,
|
|
560
|
+
entry=selected_entry,
|
|
561
|
+
id=evaluator_id,
|
|
562
|
+
display_name=None,
|
|
563
|
+
description=None,
|
|
564
|
+
force=force, # Pass through the --force flag
|
|
565
|
+
yes=True,
|
|
566
|
+
env_file=None, # Add the new env_file parameter
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
if force:
|
|
570
|
+
print(f"🔄 Force flag enabled - will overwrite existing evaluator '{evaluator_id}'")
|
|
571
|
+
|
|
572
|
+
rc = upload_command(upload_args)
|
|
573
|
+
if rc == 0:
|
|
574
|
+
print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
|
|
575
|
+
|
|
576
|
+
# Poll for evaluator status
|
|
577
|
+
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
|
|
578
|
+
is_active = _poll_evaluator_status(
|
|
579
|
+
evaluator_resource_name=evaluator_resource_name,
|
|
580
|
+
api_key=api_key,
|
|
581
|
+
api_base=api_base,
|
|
582
|
+
timeout_minutes=10,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
if not is_active:
|
|
586
|
+
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
587
|
+
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
588
|
+
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
589
|
+
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
590
|
+
return False
|
|
591
|
+
return True
|
|
592
|
+
else:
|
|
593
|
+
print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
|
|
594
|
+
return False
|
|
595
|
+
except Exception as e:
|
|
596
|
+
print(f"Warning: Failed to upload evaluator automatically: {e}")
|
|
597
|
+
return False
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def _create_rft_job(
|
|
601
|
+
account_id: str,
|
|
602
|
+
api_key: str,
|
|
603
|
+
api_base: str,
|
|
604
|
+
evaluator_id: str,
|
|
605
|
+
evaluator_resource_name: str,
|
|
606
|
+
dataset_id: str,
|
|
607
|
+
dataset_resource: str,
|
|
608
|
+
args: argparse.Namespace,
|
|
609
|
+
dry_run: bool,
|
|
610
|
+
) -> int:
|
|
611
|
+
"""Build and submit the RFT job request."""
|
|
615
612
|
# Build training config/body
|
|
616
613
|
# Exactly one of base-model or warm-start-from must be provided
|
|
617
614
|
base_model_raw = getattr(args, "base_model", None)
|
|
@@ -682,9 +679,6 @@ def create_rft_command(args) -> int:
|
|
|
682
679
|
"runId": getattr(args, "wandb_run_id", None),
|
|
683
680
|
}
|
|
684
681
|
|
|
685
|
-
# Build dataset resource (prefer override when provided)
|
|
686
|
-
dataset_resource = dataset_resource_override or f"accounts/{account_id}/datasets/{dataset_id}"
|
|
687
|
-
|
|
688
682
|
body: Dict[str, Any] = {
|
|
689
683
|
"displayName": getattr(args, "display_name", None),
|
|
690
684
|
"dataset": dataset_resource,
|
|
@@ -732,3 +726,107 @@ def create_rft_command(args) -> int:
|
|
|
732
726
|
except Exception as e:
|
|
733
727
|
print(f"Error creating RFT job: {e}")
|
|
734
728
|
return 1
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def create_rft_command(args) -> int:
|
|
732
|
+
# Pre-flight: resolve auth and environment
|
|
733
|
+
api_key = get_fireworks_api_key()
|
|
734
|
+
if not api_key:
|
|
735
|
+
print("Error: FIREWORKS_API_KEY not set.")
|
|
736
|
+
return 1
|
|
737
|
+
|
|
738
|
+
account_id = _ensure_account_id()
|
|
739
|
+
if not account_id:
|
|
740
|
+
print("Error: FIREWORKS_ACCOUNT_ID not set and could not be resolved.")
|
|
741
|
+
return 1
|
|
742
|
+
|
|
743
|
+
api_base = get_fireworks_api_base()
|
|
744
|
+
project_root = os.getcwd()
|
|
745
|
+
evaluator_arg: Optional[str] = getattr(args, "evaluator", None)
|
|
746
|
+
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
747
|
+
dry_run: bool = bool(getattr(args, "dry_run", False))
|
|
748
|
+
force: bool = bool(getattr(args, "force", False))
|
|
749
|
+
skip_validation: bool = bool(getattr(args, "skip_validation", False))
|
|
750
|
+
ignore_docker: bool = bool(getattr(args, "ignore_docker", False))
|
|
751
|
+
docker_build_extra: str = getattr(args, "docker_build_extra", "") or ""
|
|
752
|
+
docker_run_extra: str = getattr(args, "docker_run_extra", "") or ""
|
|
753
|
+
|
|
754
|
+
# 1) Resolve evaluator and associated local test
|
|
755
|
+
(
|
|
756
|
+
evaluator_id,
|
|
757
|
+
evaluator_resource_name,
|
|
758
|
+
selected_test_file_path,
|
|
759
|
+
selected_test_func_name,
|
|
760
|
+
) = _resolve_evaluator(project_root, evaluator_arg, non_interactive, account_id)
|
|
761
|
+
if not evaluator_id or not evaluator_resource_name:
|
|
762
|
+
return 1
|
|
763
|
+
|
|
764
|
+
# 2) Resolve dataset source (id or JSONL path)
|
|
765
|
+
dataset_id, dataset_resource, dataset_jsonl = _resolve_dataset(
|
|
766
|
+
project_root=project_root,
|
|
767
|
+
account_id=account_id,
|
|
768
|
+
evaluator_id=evaluator_id,
|
|
769
|
+
args=args,
|
|
770
|
+
selected_test_file_path=selected_test_file_path,
|
|
771
|
+
selected_test_func_name=selected_test_func_name,
|
|
772
|
+
)
|
|
773
|
+
# Require either an existing dataset id or a JSONL source to materialize from
|
|
774
|
+
if dataset_jsonl is None and not dataset_id:
|
|
775
|
+
return 1
|
|
776
|
+
|
|
777
|
+
# 3) Optional local validation
|
|
778
|
+
if not skip_validation:
|
|
779
|
+
# Dataset validation (JSONL must be EvaluationRow-compatible when present)
|
|
780
|
+
if not _validate_dataset(dataset_jsonl):
|
|
781
|
+
return 1
|
|
782
|
+
|
|
783
|
+
# Evaluator validation (run pytest for the selected test, possibly via Docker)
|
|
784
|
+
if not _validate_evaluator_locally(
|
|
785
|
+
project_root=project_root,
|
|
786
|
+
selected_test_file=selected_test_file_path,
|
|
787
|
+
selected_test_func=selected_test_func_name,
|
|
788
|
+
ignore_docker=ignore_docker,
|
|
789
|
+
docker_build_extra=docker_build_extra,
|
|
790
|
+
docker_run_extra=docker_run_extra,
|
|
791
|
+
):
|
|
792
|
+
return 1
|
|
793
|
+
|
|
794
|
+
# 4) Upload dataset when using JSONL sources (no-op for existing datasets)
|
|
795
|
+
dataset_id, dataset_resource = _upload_dataset(
|
|
796
|
+
project_root=project_root,
|
|
797
|
+
account_id=account_id,
|
|
798
|
+
api_key=api_key,
|
|
799
|
+
api_base=api_base,
|
|
800
|
+
evaluator_id=evaluator_id,
|
|
801
|
+
dataset_id=dataset_id,
|
|
802
|
+
dataset_resource=dataset_resource,
|
|
803
|
+
dataset_jsonl=dataset_jsonl,
|
|
804
|
+
args=args,
|
|
805
|
+
dry_run=dry_run,
|
|
806
|
+
)
|
|
807
|
+
if not dataset_id or not dataset_resource:
|
|
808
|
+
return 1
|
|
809
|
+
|
|
810
|
+
# 5) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
|
|
811
|
+
if not _upload_and_ensure_evaluator(
|
|
812
|
+
project_root=project_root,
|
|
813
|
+
evaluator_id=evaluator_id,
|
|
814
|
+
evaluator_resource_name=evaluator_resource_name,
|
|
815
|
+
api_key=api_key,
|
|
816
|
+
api_base=api_base,
|
|
817
|
+
force=force,
|
|
818
|
+
):
|
|
819
|
+
return 1
|
|
820
|
+
|
|
821
|
+
# 6) Create the RFT job
|
|
822
|
+
return _create_rft_job(
|
|
823
|
+
account_id=account_id,
|
|
824
|
+
api_key=api_key,
|
|
825
|
+
api_base=api_base,
|
|
826
|
+
evaluator_id=evaluator_id,
|
|
827
|
+
evaluator_resource_name=evaluator_resource_name,
|
|
828
|
+
dataset_id=dataset_id,
|
|
829
|
+
dataset_resource=dataset_resource,
|
|
830
|
+
args=args,
|
|
831
|
+
dry_run=dry_run,
|
|
832
|
+
)
|