nemo-evaluator-launcher 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (60) hide show
  1. nemo_evaluator_launcher/__init__.py +79 -0
  2. nemo_evaluator_launcher/api/__init__.py +24 -0
  3. nemo_evaluator_launcher/api/functional.py +698 -0
  4. nemo_evaluator_launcher/api/types.py +98 -0
  5. nemo_evaluator_launcher/api/utils.py +19 -0
  6. nemo_evaluator_launcher/cli/__init__.py +15 -0
  7. nemo_evaluator_launcher/cli/export.py +267 -0
  8. nemo_evaluator_launcher/cli/info.py +512 -0
  9. nemo_evaluator_launcher/cli/kill.py +41 -0
  10. nemo_evaluator_launcher/cli/ls_runs.py +134 -0
  11. nemo_evaluator_launcher/cli/ls_tasks.py +136 -0
  12. nemo_evaluator_launcher/cli/main.py +226 -0
  13. nemo_evaluator_launcher/cli/run.py +200 -0
  14. nemo_evaluator_launcher/cli/status.py +164 -0
  15. nemo_evaluator_launcher/cli/version.py +55 -0
  16. nemo_evaluator_launcher/common/__init__.py +16 -0
  17. nemo_evaluator_launcher/common/execdb.py +283 -0
  18. nemo_evaluator_launcher/common/helpers.py +366 -0
  19. nemo_evaluator_launcher/common/logging_utils.py +357 -0
  20. nemo_evaluator_launcher/common/mapping.py +295 -0
  21. nemo_evaluator_launcher/common/printing_utils.py +93 -0
  22. nemo_evaluator_launcher/configs/__init__.py +15 -0
  23. nemo_evaluator_launcher/configs/default.yaml +28 -0
  24. nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
  25. nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
  26. nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
  27. nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
  28. nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
  29. nemo_evaluator_launcher/configs/deployment/vllm.yaml +42 -0
  30. nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
  31. nemo_evaluator_launcher/configs/execution/local.yaml +19 -0
  32. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +34 -0
  33. nemo_evaluator_launcher/executors/__init__.py +22 -0
  34. nemo_evaluator_launcher/executors/base.py +120 -0
  35. nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
  36. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +609 -0
  37. nemo_evaluator_launcher/executors/lepton/executor.py +1004 -0
  38. nemo_evaluator_launcher/executors/lepton/job_helpers.py +398 -0
  39. nemo_evaluator_launcher/executors/local/__init__.py +15 -0
  40. nemo_evaluator_launcher/executors/local/executor.py +605 -0
  41. nemo_evaluator_launcher/executors/local/run.template.sh +103 -0
  42. nemo_evaluator_launcher/executors/registry.py +38 -0
  43. nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
  44. nemo_evaluator_launcher/executors/slurm/executor.py +1147 -0
  45. nemo_evaluator_launcher/exporters/__init__.py +36 -0
  46. nemo_evaluator_launcher/exporters/base.py +121 -0
  47. nemo_evaluator_launcher/exporters/gsheets.py +409 -0
  48. nemo_evaluator_launcher/exporters/local.py +502 -0
  49. nemo_evaluator_launcher/exporters/mlflow.py +619 -0
  50. nemo_evaluator_launcher/exporters/registry.py +40 -0
  51. nemo_evaluator_launcher/exporters/utils.py +624 -0
  52. nemo_evaluator_launcher/exporters/wandb.py +490 -0
  53. nemo_evaluator_launcher/package_info.py +38 -0
  54. nemo_evaluator_launcher/resources/mapping.toml +380 -0
  55. nemo_evaluator_launcher-0.1.28.dist-info/METADATA +494 -0
  56. nemo_evaluator_launcher-0.1.28.dist-info/RECORD +60 -0
  57. nemo_evaluator_launcher-0.1.28.dist-info/WHEEL +5 -0
  58. nemo_evaluator_launcher-0.1.28.dist-info/entry_points.txt +3 -0
  59. nemo_evaluator_launcher-0.1.28.dist-info/licenses/LICENSE +451 -0
  60. nemo_evaluator_launcher-0.1.28.dist-info/top_level.txt +1 -0
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Exporter registration and factory."""
17
+
18
+ from nemo_evaluator_launcher.exporters.gsheets import GSheetsExporter
19
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
20
+ from nemo_evaluator_launcher.exporters.mlflow import MLflowExporter
21
+ from nemo_evaluator_launcher.exporters.registry import available_exporters, get_exporter
22
+ from nemo_evaluator_launcher.exporters.wandb import WandBExporter
23
+
24
+
25
+ def create_exporter(name: str, config: dict = None):
26
+ return get_exporter(name)(config or {})
27
+
28
+
29
+ __all__ = [
30
+ "GSheetsExporter",
31
+ "LocalExporter",
32
+ "MLflowExporter",
33
+ "WandBExporter",
34
+ "available_exporters",
35
+ "get_exporter",
36
+ ]
@@ -0,0 +1,121 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Base exporter interface for nemo-evaluator-launcher results."""
17
+
18
+ import os
19
+ from abc import ABC, abstractmethod
20
+ from dataclasses import asdict, dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any, Dict
23
+
24
+ from nemo_evaluator_launcher.common.execdb import ExecutionDB, JobData
25
+
26
+
27
+ @dataclass
28
+ class ExportResult:
29
+ """Result of an export operation."""
30
+
31
+ success: bool
32
+ dest: str
33
+ message: str
34
+ metadata: Dict[str, Any] = field(default_factory=dict)
35
+
36
+
37
+ class BaseExporter(ABC):
38
+ """Base interface for result exporters."""
39
+
40
+ def __init__(self, config: Dict[str, Any] = None):
41
+ self.config = config or {}
42
+ self.db = ExecutionDB()
43
+
44
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
45
+ """Export all jobs in an invocation."""
46
+ jobs = self.db.get_jobs(invocation_id)
47
+
48
+ if not jobs:
49
+ return {
50
+ "success": False,
51
+ "error": f"No jobs found for invocation {invocation_id}",
52
+ }
53
+
54
+ results = {}
55
+ for job_id, job_data in jobs.items():
56
+ result = self.export_job(job_data)
57
+ results[job_id] = asdict(result)
58
+
59
+ return {"success": True, "invocation_id": invocation_id, "jobs": results}
60
+
61
+ @abstractmethod
62
+ def export_job(self, job_data: JobData) -> ExportResult:
63
+ """Export a single job's results."""
64
+ pass
65
+
66
+ @abstractmethod
67
+ def supports_executor(self, executor_type: str) -> bool:
68
+ """Check if this exporter supports the given executor type."""
69
+ pass
70
+
71
+ def get_job_paths(self, job_data: JobData) -> Dict[str, Any]:
72
+ """Get result paths based on executor type from job metadata."""
73
+ # Special case: remote executor artifacts accessed locally (remote auto-export)
74
+ if job_data.data.get("storage_type") == "remote_local":
75
+ output_dir = Path(job_data.data["output_dir"])
76
+ return {
77
+ "artifacts_dir": output_dir / "artifacts",
78
+ "logs_dir": output_dir / "logs",
79
+ "storage_type": "remote_local",
80
+ }
81
+
82
+ if job_data.executor == "local":
83
+ output_dir = Path(job_data.data["output_dir"])
84
+ return {
85
+ "artifacts_dir": output_dir / "artifacts",
86
+ "logs_dir": output_dir / "logs",
87
+ "storage_type": "local_filesystem",
88
+ }
89
+
90
+ elif job_data.executor == "slurm":
91
+ return {
92
+ "remote_path": job_data.data["remote_rundir_path"],
93
+ "hostname": job_data.data["hostname"],
94
+ "username": job_data.data["username"],
95
+ "storage_type": "remote_ssh",
96
+ }
97
+
98
+ elif job_data.executor == "gitlab":
99
+ pipeline_id = job_data.data.get("pipeline_id")
100
+ if pipeline_id and os.getenv("CI"):
101
+ return {
102
+ "artifacts_dir": Path(f"artifacts/{pipeline_id}"),
103
+ "storage_type": "gitlab_ci_local",
104
+ }
105
+ else:
106
+ return {
107
+ "pipeline_id": pipeline_id,
108
+ "project_id": job_data.data.get("project_id", 155749),
109
+ "storage_type": "gitlab_remote",
110
+ }
111
+
112
+ elif job_data.executor == "lepton":
113
+ output_dir = Path(job_data.data["output_dir"])
114
+ return {
115
+ "artifacts_dir": output_dir / "artifacts",
116
+ "logs_dir": output_dir / "logs",
117
+ "storage_type": "local_filesystem",
118
+ }
119
+
120
+ else:
121
+ raise ValueError(f"Unknown executor: {job_data.executor}")
@@ -0,0 +1,409 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Google Sheets evaluation results exporter."""
17
+
18
+ import os
19
+ import shutil
20
+ import tempfile
21
+ from pathlib import Path
22
+ from typing import Any, Dict, List
23
+
24
+ try:
25
+ import gspread
26
+
27
+ GSPREAD_AVAILABLE = True
28
+ except ImportError:
29
+ GSPREAD_AVAILABLE = False
30
+
31
+ from nemo_evaluator_launcher.common.execdb import JobData
32
+ from nemo_evaluator_launcher.common.logging_utils import logger
33
+ from nemo_evaluator_launcher.exporters.base import BaseExporter, ExportResult
34
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
35
+ from nemo_evaluator_launcher.exporters.registry import register_exporter
36
+ from nemo_evaluator_launcher.exporters.utils import (
37
+ extract_accuracy_metrics,
38
+ extract_exporter_config,
39
+ get_model_name,
40
+ get_task_name,
41
+ )
42
+
43
+
44
+ @register_exporter("gsheets")
45
+ class GSheetsExporter(BaseExporter):
46
+ """Export accuracy metrics to Google Sheets with multi-invocation support."""
47
+
48
+ def supports_executor(self, executor_type: str) -> bool:
49
+ return True
50
+
51
+ def is_available(self) -> bool:
52
+ return GSPREAD_AVAILABLE
53
+
54
+ def _get_artifacts_locally(self, job_data: JobData) -> tuple[Path, str]:
55
+ """Get artifacts locally using LocalExporter."""
56
+ try:
57
+ temp_dir = tempfile.mkdtemp(prefix="gsheets_")
58
+ local_exporter = LocalExporter({"output_dir": temp_dir})
59
+ local_result = local_exporter.export_job(job_data)
60
+
61
+ if not local_result.success:
62
+ logger.warning(f"LocalExporter failed: {local_result.message}")
63
+ shutil.rmtree(temp_dir)
64
+ return None, None
65
+
66
+ artifacts_dir = Path(local_result.dest) / "artifacts"
67
+ if not artifacts_dir.exists():
68
+ logger.warning(f"No artifacts directory found in {local_result.dest}")
69
+ shutil.rmtree(temp_dir)
70
+ return None, None
71
+
72
+ return artifacts_dir, temp_dir
73
+
74
+ except Exception as e:
75
+ logger.error(f"Failed to get artifacts locally: {e}")
76
+ if "temp_dir" in locals() and temp_dir:
77
+ shutil.rmtree(temp_dir)
78
+ return None, None
79
+
80
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
81
+ """Export all jobs in an invocation to Google Sheets."""
82
+ if not self.is_available():
83
+ return {"success": False, "error": "gspread package not installed"}
84
+
85
+ jobs = self.db.get_jobs(invocation_id)
86
+ if not jobs:
87
+ return {
88
+ "success": False,
89
+ "error": f"No jobs found for invocation {invocation_id}",
90
+ }
91
+
92
+ try:
93
+ # Load exporter config from the first job (supports job-embedded config and CLI overrides)
94
+ first_job = next(iter(jobs.values()))
95
+ gsheets_config = extract_exporter_config(first_job, "gsheets", self.config)
96
+
97
+ # Connect to Google Sheets
98
+ service_account_file = gsheets_config.get("service_account_file")
99
+ spreadsheet_name = gsheets_config.get(
100
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
101
+ )
102
+
103
+ if service_account_file:
104
+ gc = gspread.service_account(
105
+ filename=os.path.expanduser(service_account_file)
106
+ )
107
+ else:
108
+ gc = gspread.service_account()
109
+
110
+ # Get or create spreadsheet
111
+ spreadsheet_id = gsheets_config.get("spreadsheet_id")
112
+ try:
113
+ if spreadsheet_id:
114
+ sh = gc.open_by_key(spreadsheet_id)
115
+ else:
116
+ sh = gc.open(spreadsheet_name)
117
+ logger.info(f"Opened existing spreadsheet: {spreadsheet_name}")
118
+ except gspread.SpreadsheetNotFound:
119
+ if spreadsheet_id:
120
+ raise # Can't create with explicit ID
121
+ sh = gc.create(spreadsheet_name)
122
+ logger.info(f"Created new spreadsheet: {spreadsheet_name}")
123
+
124
+ worksheet = sh.sheet1
125
+ # Extract metrics from ALL jobs first to determine headers
126
+ all_job_metrics = {}
127
+ results = {}
128
+
129
+ for job_id, job_data in jobs.items():
130
+ try:
131
+ # Get artifacts locally first
132
+ artifacts_dir, temp_dir = self._get_artifacts_locally(job_data)
133
+ if not artifacts_dir:
134
+ results[job_id] = {
135
+ "success": False,
136
+ "message": "Failed to get artifacts locally",
137
+ }
138
+ all_job_metrics[job_id] = {}
139
+ continue
140
+
141
+ try:
142
+ # Extract metrics from local artifacts
143
+ accuracy_metrics = extract_accuracy_metrics(
144
+ job_data,
145
+ lambda jd: {
146
+ "artifacts_dir": artifacts_dir,
147
+ "storage_type": "local_filesystem",
148
+ },
149
+ )
150
+ all_job_metrics[job_id] = accuracy_metrics
151
+
152
+ if accuracy_metrics:
153
+ results[job_id] = {
154
+ "success": True,
155
+ "message": f"Extracted {len(accuracy_metrics)} metrics",
156
+ "metadata": {"metrics_count": len(accuracy_metrics)},
157
+ }
158
+ else:
159
+ results[job_id] = {
160
+ "success": False,
161
+ "message": "No accuracy metrics found",
162
+ }
163
+ finally:
164
+ if temp_dir:
165
+ shutil.rmtree(temp_dir)
166
+
167
+ except Exception as e:
168
+ logger.error(f"Failed to extract metrics for job {job_id}: {e}")
169
+ results[job_id] = {
170
+ "success": False,
171
+ "message": f"Metric extraction failed: {str(e)}",
172
+ }
173
+ all_job_metrics[job_id] = {}
174
+
175
+ # Get/update headers based on all extracted metrics
176
+ headers = self._get_or_update_headers(worksheet, all_job_metrics)
177
+
178
+ # Add rows for jobs with metrics
179
+ rows_added = 0
180
+ for job_id, job_data in jobs.items():
181
+ if results[job_id]["success"]:
182
+ row_data = self._prepare_row_data(
183
+ job_data, all_job_metrics[job_id], headers
184
+ )
185
+ worksheet.append_row(row_data)
186
+ rows_added += 1
187
+
188
+ return {
189
+ "success": True,
190
+ "invocation_id": invocation_id,
191
+ "jobs": results,
192
+ "metadata": {
193
+ "spreadsheet_name": spreadsheet_name,
194
+ "spreadsheet_url": sh.url,
195
+ "rows_added": rows_added,
196
+ "total_columns": len(headers),
197
+ "metric_columns": len(
198
+ [
199
+ h
200
+ for h in headers
201
+ if h
202
+ not in ["Timestamp", "Invocation ID", "Job ID", "Executor"]
203
+ ]
204
+ ),
205
+ },
206
+ }
207
+
208
+ except Exception as e:
209
+ logger.error(f"Sheets export failed for invocation {invocation_id}: {e}")
210
+ return {"success": False, "error": f"Sheets export failed: {str(e)}"}
211
+
212
+ def export_job(self, job_data: JobData) -> ExportResult:
213
+ """Export single job to Google Sheets."""
214
+ if not self.is_available():
215
+ return ExportResult(
216
+ success=False, dest="gsheets", message="gspread package not installed"
217
+ )
218
+
219
+ try:
220
+ # Extract config from job_data
221
+ gsheets_config = extract_exporter_config(job_data, "gsheets", self.config)
222
+
223
+ # Get artifacts locally first
224
+ artifacts_dir, temp_dir = self._get_artifacts_locally(job_data)
225
+ if not artifacts_dir:
226
+ return ExportResult(
227
+ success=False,
228
+ dest="gsheets",
229
+ message="Failed to get artifacts locally",
230
+ )
231
+
232
+ try:
233
+ # Connect to Google Sheets
234
+ service_account_file = gsheets_config.get("service_account_file")
235
+ spreadsheet_name = gsheets_config.get(
236
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
237
+ )
238
+
239
+ if service_account_file:
240
+ gc = gspread.service_account(
241
+ filename=os.path.expanduser(service_account_file)
242
+ )
243
+ else:
244
+ gc = gspread.service_account()
245
+
246
+ # Get or create spreadsheet
247
+ spreadsheet_id = gsheets_config.get("spreadsheet_id")
248
+ try:
249
+ if spreadsheet_id:
250
+ sh = gc.open_by_key(spreadsheet_id)
251
+ else:
252
+ sh = gc.open(spreadsheet_name)
253
+ except gspread.SpreadsheetNotFound:
254
+ if spreadsheet_id:
255
+ raise # Can't create with explicit ID
256
+ sh = gc.create(spreadsheet_name)
257
+
258
+ worksheet = sh.sheet1
259
+
260
+ # Extract metrics from local artifacts
261
+ log_metrics = gsheets_config.get("log_metrics", [])
262
+ accuracy_metrics = extract_accuracy_metrics(
263
+ job_data,
264
+ lambda jd: {
265
+ "artifacts_dir": artifacts_dir,
266
+ "storage_type": "local_filesystem",
267
+ },
268
+ log_metrics,
269
+ )
270
+
271
+ if not accuracy_metrics:
272
+ return ExportResult(
273
+ success=False,
274
+ dest="gsheets",
275
+ message="No accuracy metrics found",
276
+ )
277
+
278
+ # Get/update headers for this job's metrics
279
+ headers = self._get_or_update_headers(
280
+ worksheet, {job_data.job_id: accuracy_metrics}
281
+ )
282
+
283
+ # Prepare and add single row for this job
284
+ row_data = self._prepare_row_data(job_data, accuracy_metrics, headers)
285
+ worksheet.append_row(row_data)
286
+
287
+ return ExportResult(
288
+ success=True,
289
+ dest="gsheets",
290
+ message=f"Added 1 row for job {job_data.job_id}",
291
+ metadata={
292
+ "spreadsheet_url": sh.url,
293
+ "job_id": job_data.job_id,
294
+ "metrics_logged": len(accuracy_metrics),
295
+ },
296
+ )
297
+
298
+ finally:
299
+ if temp_dir:
300
+ shutil.rmtree(temp_dir)
301
+
302
+ except Exception as e:
303
+ logger.error(f"GSheets export failed for job {job_data.job_id}: {e}")
304
+ return ExportResult(
305
+ success=False, dest="gsheets", message=f"Failed: {str(e)}"
306
+ )
307
+
308
+ def export_multiple_invocations(self, invocation_ids: List[str]) -> Dict[str, Any]:
309
+ """Export multiple invocations to the same sheet."""
310
+ if not self.is_available():
311
+ return {"success": False, "error": "gspread package not installed"}
312
+
313
+ all_results = {}
314
+ total_rows_added = 0
315
+ spreadsheet_url = None
316
+
317
+ for invocation_id in invocation_ids:
318
+ result = self.export_invocation(invocation_id)
319
+ all_results[invocation_id] = result
320
+
321
+ if result["success"]:
322
+ total_rows_added += result.get("metadata", {}).get("rows_added", 0)
323
+ if not spreadsheet_url:
324
+ spreadsheet_url = result.get("metadata", {}).get("spreadsheet_url")
325
+
326
+ return {
327
+ "success": True,
328
+ "invocations": all_results,
329
+ "metadata": {
330
+ "total_invocations": len(invocation_ids),
331
+ "total_rows_added": total_rows_added,
332
+ "spreadsheet_url": spreadsheet_url,
333
+ "spreadsheet_name": self.config.get(
334
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
335
+ ),
336
+ },
337
+ }
338
+
339
+ def _get_or_update_headers(
340
+ self, worksheet, all_metrics: Dict[str, Dict[str, float]]
341
+ ) -> List[str]:
342
+ """Get existing headers or create/update them dynamically."""
343
+
344
+ # Base columns
345
+ base_headers = [
346
+ "Model Name",
347
+ "Task Name",
348
+ "Invocation ID",
349
+ "Job ID",
350
+ "Executor",
351
+ ]
352
+
353
+ # Get all unique clean metric names (everything after first underscore)
354
+ all_clean_metrics = set()
355
+ for job_metrics in all_metrics.values():
356
+ for full_name in job_metrics.keys():
357
+ clean_name = (
358
+ full_name.split("_", 1)[1] if "_" in full_name else full_name
359
+ )
360
+ all_clean_metrics.add(clean_name)
361
+
362
+ target_headers = base_headers + sorted(all_clean_metrics)
363
+
364
+ # Handle sheet creation/updating
365
+ existing_values = worksheet.get_all_values()
366
+ if not existing_values:
367
+ # Empty sheet - create headers
368
+ worksheet.update("1:1", [target_headers])
369
+ worksheet.format("1:1", {"textFormat": {"bold": True}})
370
+ return target_headers
371
+ else:
372
+ # Sheet exists - just update the entire header row
373
+ existing_headers = existing_values[0]
374
+ new_metrics = [
375
+ m for m in sorted(all_clean_metrics) if m not in existing_headers
376
+ ]
377
+ if new_metrics:
378
+ updated_headers = existing_headers + new_metrics
379
+ worksheet.update("1:1", [updated_headers])
380
+ return updated_headers
381
+ return existing_headers
382
+
383
+ def _prepare_row_data(
384
+ self, job_data: JobData, accuracy_metrics: Dict[str, float], headers: List[str]
385
+ ) -> List[str]:
386
+ """Prepare row data dynamically."""
387
+
388
+ task_name = get_task_name(job_data)
389
+ model_name = get_model_name(job_data)
390
+
391
+ row_data = []
392
+ for header in headers:
393
+ if header == "Model Name":
394
+ row_data.append(model_name)
395
+ elif header == "Task Name":
396
+ row_data.append(task_name)
397
+ elif header == "Invocation ID":
398
+ row_data.append(job_data.invocation_id)
399
+ elif header == "Job ID":
400
+ row_data.append(job_data.job_id)
401
+ elif header == "Executor":
402
+ row_data.append(job_data.executor)
403
+ else:
404
+ # Find metric with this clean name
405
+ full_metric = f"{task_name}_{header}"
406
+ value = accuracy_metrics.get(full_metric, "")
407
+ row_data.append(str(value) if value else "")
408
+
409
+ return row_data