nemo-evaluator-launcher 0.1.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nemo-evaluator-launcher might be problematic. Click here for more details.

Files changed (57) hide show
  1. nemo_evaluator_launcher/__init__.py +65 -0
  2. nemo_evaluator_launcher/api/__init__.py +24 -0
  3. nemo_evaluator_launcher/api/functional.py +641 -0
  4. nemo_evaluator_launcher/api/types.py +89 -0
  5. nemo_evaluator_launcher/api/utils.py +19 -0
  6. nemo_evaluator_launcher/cli/__init__.py +15 -0
  7. nemo_evaluator_launcher/cli/export.py +148 -0
  8. nemo_evaluator_launcher/cli/info.py +117 -0
  9. nemo_evaluator_launcher/cli/kill.py +39 -0
  10. nemo_evaluator_launcher/cli/ls_runs.py +113 -0
  11. nemo_evaluator_launcher/cli/ls_tasks.py +34 -0
  12. nemo_evaluator_launcher/cli/main.py +136 -0
  13. nemo_evaluator_launcher/cli/run.py +135 -0
  14. nemo_evaluator_launcher/cli/status.py +118 -0
  15. nemo_evaluator_launcher/cli/version.py +52 -0
  16. nemo_evaluator_launcher/common/__init__.py +16 -0
  17. nemo_evaluator_launcher/common/execdb.py +189 -0
  18. nemo_evaluator_launcher/common/helpers.py +157 -0
  19. nemo_evaluator_launcher/common/logging_utils.py +349 -0
  20. nemo_evaluator_launcher/common/mapping.py +310 -0
  21. nemo_evaluator_launcher/configs/__init__.py +15 -0
  22. nemo_evaluator_launcher/configs/default.yaml +28 -0
  23. nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
  24. nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
  25. nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
  26. nemo_evaluator_launcher/configs/deployment/vllm.yaml +41 -0
  27. nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
  28. nemo_evaluator_launcher/configs/execution/local.yaml +17 -0
  29. nemo_evaluator_launcher/configs/execution/slurm/default.yaml +33 -0
  30. nemo_evaluator_launcher/executors/__init__.py +22 -0
  31. nemo_evaluator_launcher/executors/base.py +97 -0
  32. nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
  33. nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +589 -0
  34. nemo_evaluator_launcher/executors/lepton/executor.py +905 -0
  35. nemo_evaluator_launcher/executors/lepton/job_helpers.py +394 -0
  36. nemo_evaluator_launcher/executors/local/__init__.py +15 -0
  37. nemo_evaluator_launcher/executors/local/executor.py +491 -0
  38. nemo_evaluator_launcher/executors/local/run.template.sh +88 -0
  39. nemo_evaluator_launcher/executors/registry.py +38 -0
  40. nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
  41. nemo_evaluator_launcher/executors/slurm/executor.py +982 -0
  42. nemo_evaluator_launcher/exporters/__init__.py +36 -0
  43. nemo_evaluator_launcher/exporters/base.py +112 -0
  44. nemo_evaluator_launcher/exporters/gsheets.py +391 -0
  45. nemo_evaluator_launcher/exporters/local.py +488 -0
  46. nemo_evaluator_launcher/exporters/mlflow.py +448 -0
  47. nemo_evaluator_launcher/exporters/registry.py +40 -0
  48. nemo_evaluator_launcher/exporters/utils.py +669 -0
  49. nemo_evaluator_launcher/exporters/wandb.py +376 -0
  50. nemo_evaluator_launcher/package_info.py +35 -0
  51. nemo_evaluator_launcher/resources/mapping.toml +344 -0
  52. nemo_evaluator_launcher-0.1.0rc2.dist-info/METADATA +35 -0
  53. nemo_evaluator_launcher-0.1.0rc2.dist-info/RECORD +57 -0
  54. nemo_evaluator_launcher-0.1.0rc2.dist-info/WHEEL +5 -0
  55. nemo_evaluator_launcher-0.1.0rc2.dist-info/entry_points.txt +3 -0
  56. nemo_evaluator_launcher-0.1.0rc2.dist-info/licenses/LICENSE +451 -0
  57. nemo_evaluator_launcher-0.1.0rc2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Exporter registration and factory."""
17
+
18
+ from nemo_evaluator_launcher.exporters.gsheets import GSheetsExporter
19
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
20
+ from nemo_evaluator_launcher.exporters.mlflow import MLflowExporter
21
+ from nemo_evaluator_launcher.exporters.registry import available_exporters, get_exporter
22
+ from nemo_evaluator_launcher.exporters.wandb import WandBExporter
23
+
24
+
25
+ def create_exporter(name: str, config: dict = None):
26
+ return get_exporter(name)(config or {})
27
+
28
+
29
+ __all__ = [
30
+ "GSheetsExporter",
31
+ "LocalExporter",
32
+ "MLflowExporter",
33
+ "WandBExporter",
34
+ "available_exporters",
35
+ "get_exporter",
36
+ ]
@@ -0,0 +1,112 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Base exporter interface for nemo-evaluator-launcher results."""
17
+
18
+ import os
19
+ from abc import ABC, abstractmethod
20
+ from dataclasses import asdict, dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any, Dict
23
+
24
+ from nemo_evaluator_launcher.common.execdb import ExecutionDB, JobData
25
+
26
+
27
+ @dataclass
28
+ class ExportResult:
29
+ """Result of an export operation."""
30
+
31
+ success: bool
32
+ dest: str
33
+ message: str
34
+ metadata: Dict[str, Any] = field(default_factory=dict)
35
+
36
+
37
+ class BaseExporter(ABC):
38
+ """Base interface for result exporters."""
39
+
40
+ def __init__(self, config: Dict[str, Any] = None):
41
+ self.config = config or {}
42
+ self.db = ExecutionDB()
43
+
44
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
45
+ """Export all jobs in an invocation."""
46
+ jobs = self.db.get_jobs(invocation_id)
47
+
48
+ if not jobs:
49
+ return {
50
+ "success": False,
51
+ "error": f"No jobs found for invocation {invocation_id}",
52
+ }
53
+
54
+ results = {}
55
+ for job_id, job_data in jobs.items():
56
+ result = self.export_job(job_data)
57
+ results[job_id] = asdict(result)
58
+
59
+ return {"success": True, "invocation_id": invocation_id, "jobs": results}
60
+
61
+ @abstractmethod
62
+ def export_job(self, job_data: JobData) -> ExportResult:
63
+ """Export a single job's results."""
64
+ pass
65
+
66
+ @abstractmethod
67
+ def supports_executor(self, executor_type: str) -> bool:
68
+ """Check if this exporter supports the given executor type."""
69
+ pass
70
+
71
+ def get_job_paths(self, job_data: JobData) -> Dict[str, Any]:
72
+ """Get result paths based on executor type from job metadata."""
73
+ if job_data.executor == "local":
74
+ output_dir = Path(job_data.data["output_dir"])
75
+ return {
76
+ "artifacts_dir": output_dir / "artifacts",
77
+ "logs_dir": output_dir / "logs",
78
+ "storage_type": "local_filesystem",
79
+ }
80
+
81
+ elif job_data.executor == "slurm":
82
+ return {
83
+ "remote_path": job_data.data["remote_rundir_path"],
84
+ "hostname": job_data.data["hostname"],
85
+ "username": job_data.data["username"],
86
+ "storage_type": "remote_ssh",
87
+ }
88
+
89
+ elif job_data.executor == "gitlab":
90
+ pipeline_id = job_data.data.get("pipeline_id")
91
+ if pipeline_id and os.getenv("CI"):
92
+ return {
93
+ "artifacts_dir": Path(f"artifacts/{pipeline_id}"),
94
+ "storage_type": "gitlab_ci_local",
95
+ }
96
+ else:
97
+ return {
98
+ "pipeline_id": pipeline_id,
99
+ "project_id": job_data.data.get("project_id", 155749),
100
+ "storage_type": "gitlab_remote",
101
+ }
102
+
103
+ elif job_data.executor == "lepton":
104
+ output_dir = Path(job_data.data["output_dir"])
105
+ return {
106
+ "artifacts_dir": output_dir / "artifacts",
107
+ "logs_dir": output_dir / "logs",
108
+ "storage_type": "local_filesystem",
109
+ }
110
+
111
+ else:
112
+ raise ValueError(f"Unknown executor: {job_data.executor}")
@@ -0,0 +1,391 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """Google Sheets evaluation results exporter."""
17
+
18
+ import shutil
19
+ import tempfile
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List
22
+
23
+ try:
24
+ import gspread
25
+
26
+ GSPREAD_AVAILABLE = True
27
+ except ImportError:
28
+ GSPREAD_AVAILABLE = False
29
+
30
+ from nemo_evaluator_launcher.common.execdb import JobData
31
+ from nemo_evaluator_launcher.common.logging_utils import logger
32
+ from nemo_evaluator_launcher.exporters.base import BaseExporter, ExportResult
33
+ from nemo_evaluator_launcher.exporters.local import LocalExporter
34
+ from nemo_evaluator_launcher.exporters.registry import register_exporter
35
+ from nemo_evaluator_launcher.exporters.utils import (
36
+ extract_accuracy_metrics,
37
+ extract_exporter_config,
38
+ get_model_name,
39
+ get_task_name,
40
+ )
41
+
42
+
43
+ @register_exporter("gsheets")
44
+ class GSheetsExporter(BaseExporter):
45
+ """Export accuracy metrics to Google Sheets with multi-invocation support."""
46
+
47
+ def supports_executor(self, executor_type: str) -> bool:
48
+ return True
49
+
50
+ def is_available(self) -> bool:
51
+ return GSPREAD_AVAILABLE
52
+
53
+ def _get_artifacts_locally(self, job_data: JobData) -> tuple[Path, str]:
54
+ """Get artifacts locally using LocalExporter."""
55
+ try:
56
+ temp_dir = tempfile.mkdtemp(prefix="gsheets_")
57
+ local_exporter = LocalExporter({"output_dir": temp_dir})
58
+ local_result = local_exporter.export_job(job_data)
59
+
60
+ if not local_result.success:
61
+ logger.warning(f"LocalExporter failed: {local_result.message}")
62
+ shutil.rmtree(temp_dir)
63
+ return None, None
64
+
65
+ artifacts_dir = Path(local_result.dest) / "artifacts"
66
+ if not artifacts_dir.exists():
67
+ logger.warning(f"No artifacts directory found in {local_result.dest}")
68
+ shutil.rmtree(temp_dir)
69
+ return None, None
70
+
71
+ return artifacts_dir, temp_dir
72
+
73
+ except Exception as e:
74
+ logger.error(f"Failed to get artifacts locally: {e}")
75
+ if "temp_dir" in locals() and temp_dir:
76
+ shutil.rmtree(temp_dir)
77
+ return None, None
78
+
79
+ def export_invocation(self, invocation_id: str) -> Dict[str, Any]:
80
+ """Export all jobs in an invocation to Google Sheets."""
81
+ if not self.is_available():
82
+ return {"success": False, "error": "gspread package not installed"}
83
+
84
+ jobs = self.db.get_jobs(invocation_id)
85
+ if not jobs:
86
+ return {
87
+ "success": False,
88
+ "error": f"No jobs found for invocation {invocation_id}",
89
+ }
90
+
91
+ try:
92
+ # Connect to Google Sheets
93
+ service_account_file = self.config.get("service_account_file")
94
+ spreadsheet_name = self.config.get(
95
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
96
+ )
97
+
98
+ if service_account_file:
99
+ gc = gspread.service_account(filename=service_account_file)
100
+ else:
101
+ gc = gspread.service_account()
102
+
103
+ # Get or create spreadsheet
104
+ try:
105
+ sh = gc.open(spreadsheet_name)
106
+ logger.info(f"Opened existing spreadsheet: {spreadsheet_name}")
107
+ except gspread.SpreadsheetNotFound:
108
+ sh = gc.create(spreadsheet_name)
109
+ logger.info(f"Created new spreadsheet: {spreadsheet_name}")
110
+ sh.share("", perm_type="anyone", role="reader")
111
+
112
+ worksheet = sh.sheet1
113
+
114
+ # Extract metrics from ALL jobs first to determine headers
115
+ all_job_metrics = {}
116
+ results = {}
117
+
118
+ for job_id, job_data in jobs.items():
119
+ try:
120
+ # Get artifacts locally first
121
+ artifacts_dir, temp_dir = self._get_artifacts_locally(job_data)
122
+ if not artifacts_dir:
123
+ results[job_id] = {
124
+ "success": False,
125
+ "message": "Failed to get artifacts locally",
126
+ }
127
+ all_job_metrics[job_id] = {}
128
+ continue
129
+
130
+ try:
131
+ # Extract metrics from local artifacts
132
+ accuracy_metrics = extract_accuracy_metrics(
133
+ job_data,
134
+ lambda jd: {
135
+ "artifacts_dir": artifacts_dir,
136
+ "storage_type": "local_filesystem",
137
+ },
138
+ )
139
+ all_job_metrics[job_id] = accuracy_metrics
140
+
141
+ if accuracy_metrics:
142
+ results[job_id] = {
143
+ "success": True,
144
+ "message": f"Extracted {len(accuracy_metrics)} metrics",
145
+ "metadata": {"metrics_count": len(accuracy_metrics)},
146
+ }
147
+ else:
148
+ results[job_id] = {
149
+ "success": False,
150
+ "message": "No accuracy metrics found",
151
+ }
152
+ finally:
153
+ if temp_dir:
154
+ shutil.rmtree(temp_dir)
155
+
156
+ except Exception as e:
157
+ logger.error(f"Failed to extract metrics for job {job_id}: {e}")
158
+ results[job_id] = {
159
+ "success": False,
160
+ "message": f"Metric extraction failed: {str(e)}",
161
+ }
162
+ all_job_metrics[job_id] = {}
163
+
164
+ # Get/update headers based on all extracted metrics
165
+ headers = self._get_or_update_headers(worksheet, all_job_metrics)
166
+
167
+ # Add rows for jobs with metrics
168
+ rows_added = 0
169
+ for job_id, job_data in jobs.items():
170
+ if results[job_id]["success"]:
171
+ row_data = self._prepare_row_data(
172
+ job_data, all_job_metrics[job_id], headers
173
+ )
174
+ worksheet.append_row(row_data)
175
+ rows_added += 1
176
+
177
+ return {
178
+ "success": True,
179
+ "invocation_id": invocation_id,
180
+ "jobs": results,
181
+ "metadata": {
182
+ "spreadsheet_name": spreadsheet_name,
183
+ "spreadsheet_url": sh.url,
184
+ "rows_added": rows_added,
185
+ "total_columns": len(headers),
186
+ "metric_columns": len(
187
+ [
188
+ h
189
+ for h in headers
190
+ if h
191
+ not in ["Timestamp", "Invocation ID", "Job ID", "Executor"]
192
+ ]
193
+ ),
194
+ },
195
+ }
196
+
197
+ except Exception as e:
198
+ logger.error(f"Sheets export failed for invocation {invocation_id}: {e}")
199
+ return {"success": False, "error": f"Sheets export failed: {str(e)}"}
200
+
201
+ def export_job(self, job_data: JobData) -> ExportResult:
202
+ """Export single job to Google Sheets."""
203
+ if not self.is_available():
204
+ return ExportResult(
205
+ success=False, dest="gsheets", message="gspread package not installed"
206
+ )
207
+
208
+ try:
209
+ # Extract config from job_data
210
+ gsheets_config = extract_exporter_config(job_data, "gsheets", self.config)
211
+
212
+ # Get artifacts locally first
213
+ artifacts_dir, temp_dir = self._get_artifacts_locally(job_data)
214
+ if not artifacts_dir:
215
+ return ExportResult(
216
+ success=False,
217
+ dest="gsheets",
218
+ message="Failed to get artifacts locally",
219
+ )
220
+
221
+ try:
222
+ # Connect to Google Sheets
223
+ service_account_file = gsheets_config.get("service_account_file")
224
+ spreadsheet_name = gsheets_config.get(
225
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
226
+ )
227
+
228
+ if service_account_file:
229
+ gc = gspread.service_account(filename=service_account_file)
230
+ else:
231
+ gc = gspread.service_account()
232
+
233
+ # Get or create spreadsheet
234
+ try:
235
+ sh = gc.open(spreadsheet_name)
236
+ except gspread.SpreadsheetNotFound:
237
+ sh = gc.create(spreadsheet_name)
238
+ sh.share("", perm_type="anyone", role="reader")
239
+
240
+ worksheet = sh.sheet1
241
+
242
+ # Extract metrics from local artifacts
243
+ log_metrics = gsheets_config.get("log_metrics", [])
244
+ accuracy_metrics = extract_accuracy_metrics(
245
+ job_data,
246
+ lambda jd: {
247
+ "artifacts_dir": artifacts_dir,
248
+ "storage_type": "local_filesystem",
249
+ },
250
+ log_metrics,
251
+ )
252
+
253
+ if not accuracy_metrics:
254
+ return ExportResult(
255
+ success=False,
256
+ dest="gsheets",
257
+ message="No accuracy metrics found",
258
+ )
259
+
260
+ # Get/update headers for this job's metrics
261
+ headers = self._get_or_update_headers(
262
+ worksheet, {job_data.job_id: accuracy_metrics}
263
+ )
264
+
265
+ # Prepare and add single row for this job
266
+ row_data = self._prepare_row_data(job_data, accuracy_metrics, headers)
267
+ worksheet.append_row(row_data)
268
+
269
+ return ExportResult(
270
+ success=True,
271
+ dest="gsheets",
272
+ message=f"Added 1 row for job {job_data.job_id}",
273
+ metadata={
274
+ "spreadsheet_url": sh.url,
275
+ "job_id": job_data.job_id,
276
+ "metrics_logged": len(accuracy_metrics),
277
+ },
278
+ )
279
+
280
+ finally:
281
+ if temp_dir:
282
+ shutil.rmtree(temp_dir)
283
+
284
+ except Exception as e:
285
+ logger.error(f"GSheets export failed for job {job_data.job_id}: {e}")
286
+ return ExportResult(
287
+ success=False, dest="gsheets", message=f"Failed: {str(e)}"
288
+ )
289
+
290
+ def export_multiple_invocations(self, invocation_ids: List[str]) -> Dict[str, Any]:
291
+ """Export multiple invocations to the same sheet."""
292
+ if not self.is_available():
293
+ return {"success": False, "error": "gspread package not installed"}
294
+
295
+ all_results = {}
296
+ total_rows_added = 0
297
+ spreadsheet_url = None
298
+
299
+ for invocation_id in invocation_ids:
300
+ result = self.export_invocation(invocation_id)
301
+ all_results[invocation_id] = result
302
+
303
+ if result["success"]:
304
+ total_rows_added += result.get("metadata", {}).get("rows_added", 0)
305
+ if not spreadsheet_url:
306
+ spreadsheet_url = result.get("metadata", {}).get("spreadsheet_url")
307
+
308
+ return {
309
+ "success": True,
310
+ "invocations": all_results,
311
+ "metadata": {
312
+ "total_invocations": len(invocation_ids),
313
+ "total_rows_added": total_rows_added,
314
+ "spreadsheet_url": spreadsheet_url,
315
+ "spreadsheet_name": self.config.get(
316
+ "spreadsheet_name", "NeMo Evaluator Launcher Results"
317
+ ),
318
+ },
319
+ }
320
+
321
+ def _get_or_update_headers(
322
+ self, worksheet, all_metrics: Dict[str, Dict[str, float]]
323
+ ) -> List[str]:
324
+ """Get existing headers or create/update them dynamically."""
325
+
326
+ # Base columns
327
+ base_headers = [
328
+ "Model Name",
329
+ "Task Name",
330
+ "Invocation ID",
331
+ "Job ID",
332
+ "Executor",
333
+ ]
334
+
335
+ # Get all unique clean metric names (everything after first underscore)
336
+ all_clean_metrics = set()
337
+ for job_metrics in all_metrics.values():
338
+ for full_name in job_metrics.keys():
339
+ clean_name = (
340
+ full_name.split("_", 1)[1] if "_" in full_name else full_name
341
+ )
342
+ all_clean_metrics.add(clean_name)
343
+
344
+ target_headers = base_headers + sorted(all_clean_metrics)
345
+
346
+ # Handle sheet creation/updating
347
+ existing_values = worksheet.get_all_values()
348
+ if not existing_values:
349
+ # Empty sheet - create headers
350
+ worksheet.update("1:1", [target_headers])
351
+ worksheet.format("1:1", {"textFormat": {"bold": True}})
352
+ return target_headers
353
+ else:
354
+ # Sheet exists - just update the entire header row
355
+ existing_headers = existing_values[0]
356
+ new_metrics = [
357
+ m for m in sorted(all_clean_metrics) if m not in existing_headers
358
+ ]
359
+ if new_metrics:
360
+ updated_headers = existing_headers + new_metrics
361
+ worksheet.update("1:1", [updated_headers])
362
+ return updated_headers
363
+ return existing_headers
364
+
365
+ def _prepare_row_data(
366
+ self, job_data: JobData, accuracy_metrics: Dict[str, float], headers: List[str]
367
+ ) -> List[str]:
368
+ """Prepare row data dynamically."""
369
+
370
+ task_name = get_task_name(job_data)
371
+ model_name = get_model_name(job_data)
372
+
373
+ row_data = []
374
+ for header in headers:
375
+ if header == "Model Name":
376
+ row_data.append(model_name)
377
+ elif header == "Task Name":
378
+ row_data.append(task_name)
379
+ elif header == "Invocation ID":
380
+ row_data.append(job_data.invocation_id)
381
+ elif header == "Job ID":
382
+ row_data.append(job_data.job_id)
383
+ elif header == "Executor":
384
+ row_data.append(job_data.executor)
385
+ else:
386
+ # Find metric with this clean name
387
+ full_metric = f"{task_name}_{header}"
388
+ value = accuracy_metrics.get(full_metric, "")
389
+ row_data.append(str(value) if value else "")
390
+
391
+ return row_data