harbor-langsmith 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: harbor-langsmith
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LangSmith plugin for Harbor jobs.
|
|
5
|
+
Author: Alex Shaw
|
|
6
|
+
Author-email: Alex Shaw <alexgshaw64@gmail.com>
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Requires-Dist: harbor>=0.13.0
|
|
9
|
+
Requires-Dist: requests>=2.32.4
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Project-URL: Repository, https://github.com/harbor-framework/harbor
|
|
12
|
+
Project-URL: Issues, https://github.com/harbor-framework/harbor/issues
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# harbor-langsmith
|
|
16
|
+
|
|
17
|
+
LangSmith plugin for Harbor jobs.
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install "harbor[langsmith]"
|
|
21
|
+
export LANGSMITH_API_KEY=...
|
|
22
|
+
harbor run ... --plugin langsmith
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
You can also pass the full import path:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
harbor run ... --plugin harbor_langsmith:LangSmithPlugin
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Optional environment variables:
|
|
32
|
+
|
|
33
|
+
- `HARBOR_LANGSMITH_DATASET`
|
|
34
|
+
- `HARBOR_LANGSMITH_EXPERIMENT`
|
|
35
|
+
- `LANGSMITH_ENDPOINT`
|
|
36
|
+
- `LANGSMITH_WORKSPACE_ID`
|
|
37
|
+
- `HARBOR_LANGSMITH_SYNC_DATASET=false`
|
|
38
|
+
- `HARBOR_LANGSMITH_FAIL_FAST=true`
|
|
39
|
+
|
|
40
|
+
Plugin kwargs (CLI `--pk` or job config `kwargs:`) mirror the constructor options:
|
|
41
|
+
`dataset_name`, `experiment_name`, `endpoint`, `api_key`, `workspace_id`,
|
|
42
|
+
`sync_dataset`, and `fail_fast`.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# harbor-langsmith
|
|
2
|
+
|
|
3
|
+
LangSmith plugin for Harbor jobs.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install "harbor[langsmith]"
|
|
7
|
+
export LANGSMITH_API_KEY=...
|
|
8
|
+
harbor run ... --plugin langsmith
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
You can also pass the full import path:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
harbor run ... --plugin harbor_langsmith:LangSmithPlugin
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Optional environment variables:
|
|
18
|
+
|
|
19
|
+
- `HARBOR_LANGSMITH_DATASET`
|
|
20
|
+
- `HARBOR_LANGSMITH_EXPERIMENT`
|
|
21
|
+
- `LANGSMITH_ENDPOINT`
|
|
22
|
+
- `LANGSMITH_WORKSPACE_ID`
|
|
23
|
+
- `HARBOR_LANGSMITH_SYNC_DATASET=false`
|
|
24
|
+
- `HARBOR_LANGSMITH_FAIL_FAST=true`
|
|
25
|
+
|
|
26
|
+
Plugin kwargs (CLI `--pk` or job config `kwargs:`) mirror the constructor options:
|
|
27
|
+
`dataset_name`, `experiment_name`, `endpoint`, `api_key`, `workspace_id`,
|
|
28
|
+
`sync_dataset`, and `fail_fast`.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "harbor-langsmith"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "LangSmith plugin for Harbor jobs."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
authors = [{ name = "Alex Shaw", email = "alexgshaw64@gmail.com" }]
|
|
8
|
+
requires-python = ">=3.12"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"harbor>=0.13.0",
|
|
11
|
+
"requests>=2.32.4",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.entry-points."harbor.plugins"]
|
|
15
|
+
langsmith = "harbor_langsmith:LangSmithPlugin"
|
|
16
|
+
|
|
17
|
+
[project.urls]
|
|
18
|
+
Repository = "https://github.com/harbor-framework/harbor"
|
|
19
|
+
Issues = "https://github.com/harbor-framework/harbor/issues"
|
|
20
|
+
|
|
21
|
+
[tool.uv.sources]
|
|
22
|
+
harbor = { workspace = true }
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["uv_build>=0.10.8,<0.11.0"]
|
|
26
|
+
build-backend = "uv_build"
|
|
27
|
+
|
|
28
|
+
[tool.uv.build-backend]
|
|
29
|
+
module-name = "harbor_langsmith"
|
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import tomllib
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
from uuid import NAMESPACE_URL, uuid5
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from harbor.job import Job
|
|
11
|
+
from harbor.models.job.plugin import BaseJobPlugin
|
|
12
|
+
from harbor.models.job.result import JobResult
|
|
13
|
+
from harbor.trial.hooks import TrialEvent, TrialHookEvent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LangSmithPlugin(BaseJobPlugin):
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
dataset_name: str | None = None,
|
|
21
|
+
experiment_name: str | None = None,
|
|
22
|
+
endpoint: str | None = None,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
workspace_id: str | None = None,
|
|
25
|
+
sync_dataset: bool | None = None,
|
|
26
|
+
fail_fast: bool | None = None,
|
|
27
|
+
):
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.dataset_name = dataset_name or os.getenv("HARBOR_LANGSMITH_DATASET")
|
|
30
|
+
self.experiment_name = experiment_name or os.getenv(
|
|
31
|
+
"HARBOR_LANGSMITH_EXPERIMENT"
|
|
32
|
+
)
|
|
33
|
+
self.endpoint = endpoint or os.getenv("LANGSMITH_ENDPOINT")
|
|
34
|
+
self.api_key = api_key or os.getenv("LANGSMITH_API_KEY")
|
|
35
|
+
self.workspace_id = workspace_id or os.getenv("LANGSMITH_WORKSPACE_ID")
|
|
36
|
+
self.sync_dataset = (
|
|
37
|
+
self._env_bool("HARBOR_LANGSMITH_SYNC_DATASET", default=True)
|
|
38
|
+
if sync_dataset is None
|
|
39
|
+
else sync_dataset
|
|
40
|
+
)
|
|
41
|
+
self.fail_fast = (
|
|
42
|
+
self._env_bool("HARBOR_LANGSMITH_FAIL_FAST", default=False)
|
|
43
|
+
if fail_fast is None
|
|
44
|
+
else fail_fast
|
|
45
|
+
)
|
|
46
|
+
self.request_timeout = float(
|
|
47
|
+
os.getenv("HARBOR_LANGSMITH_REQUEST_TIMEOUT", "120")
|
|
48
|
+
)
|
|
49
|
+
self._session = requests.Session()
|
|
50
|
+
self._base_url = ""
|
|
51
|
+
self._dataset_id: str | None = None
|
|
52
|
+
self._experiment_id: str | None = None
|
|
53
|
+
self._example_ids: dict[str, str] = {}
|
|
54
|
+
self._run_ids: dict[str, str] = {}
|
|
55
|
+
self._phase_run_ids: dict[tuple[str, TrialEvent], str] = {}
|
|
56
|
+
|
|
57
|
+
async def on_job_start(self, job: Job) -> None:
|
|
58
|
+
await asyncio.to_thread(self._setup, job)
|
|
59
|
+
job.on_trial_started(self._handle_event)
|
|
60
|
+
job.on_environment_started(self._handle_event)
|
|
61
|
+
job.on_agent_started(self._handle_event)
|
|
62
|
+
job.on_verification_started(self._handle_event)
|
|
63
|
+
job.on_trial_ended(self._handle_event)
|
|
64
|
+
job.on_trial_cancelled(self._handle_event)
|
|
65
|
+
|
|
66
|
+
async def on_job_end(self, job_result: JobResult) -> None:
|
|
67
|
+
if self._experiment_id is None:
|
|
68
|
+
return
|
|
69
|
+
try:
|
|
70
|
+
await asyncio.to_thread(
|
|
71
|
+
self._request,
|
|
72
|
+
"PATCH",
|
|
73
|
+
f"/sessions/{self._experiment_id}",
|
|
74
|
+
json={
|
|
75
|
+
"end_time": self._format_time(
|
|
76
|
+
job_result.finished_at or datetime.now(timezone.utc)
|
|
77
|
+
)
|
|
78
|
+
},
|
|
79
|
+
ok_statuses={200, 202, 204},
|
|
80
|
+
)
|
|
81
|
+
except Exception:
|
|
82
|
+
if self.fail_fast:
|
|
83
|
+
raise
|
|
84
|
+
|
|
85
|
+
def _setup(self, job: Any) -> None:
|
|
86
|
+
if not self.api_key:
|
|
87
|
+
raise RuntimeError("LANGSMITH_API_KEY is required for LangSmithPlugin")
|
|
88
|
+
|
|
89
|
+
endpoint = (self.endpoint or "https://api.smith.langchain.com").rstrip("/")
|
|
90
|
+
self._base_url = (
|
|
91
|
+
endpoint if endpoint.endswith("/api/v1") else f"{endpoint}/api/v1"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
self._session.headers.update({"x-api-key": self.api_key})
|
|
95
|
+
if self.workspace_id:
|
|
96
|
+
self._session.headers.update({"LANGSMITH-WORKSPACE-ID": self.workspace_id})
|
|
97
|
+
|
|
98
|
+
if self.sync_dataset:
|
|
99
|
+
self._dataset_id = self._get_or_create_dataset(job)
|
|
100
|
+
self._example_ids = self._get_or_create_examples(job)
|
|
101
|
+
|
|
102
|
+
experiment_id = self._stable_uuid(job.id, "experiment")
|
|
103
|
+
payload: dict[str, Any] = {
|
|
104
|
+
"id": experiment_id,
|
|
105
|
+
"name": self.experiment_name or job.config.job_name,
|
|
106
|
+
"start_time": self._format_time(datetime.now(timezone.utc)),
|
|
107
|
+
"extra": {
|
|
108
|
+
"metadata": {
|
|
109
|
+
"harbor_job_id": str(job.id),
|
|
110
|
+
"harbor_job_name": job.config.job_name,
|
|
111
|
+
"harbor_job_dir": str(job.job_dir),
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
}
|
|
115
|
+
if self._dataset_id is not None:
|
|
116
|
+
payload["reference_dataset_id"] = self._dataset_id
|
|
117
|
+
|
|
118
|
+
self._request("POST", "/sessions", json=payload, ok_statuses={200, 201, 409})
|
|
119
|
+
self._experiment_id = experiment_id
|
|
120
|
+
|
|
121
|
+
async def _handle_event(self, event: TrialHookEvent) -> None:
|
|
122
|
+
try:
|
|
123
|
+
await asyncio.to_thread(self._handle_event_sync, event)
|
|
124
|
+
except Exception:
|
|
125
|
+
if self.fail_fast:
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
def _handle_event_sync(self, event: TrialHookEvent) -> None:
|
|
129
|
+
if self._experiment_id is None:
|
|
130
|
+
return
|
|
131
|
+
if event.event == TrialEvent.START:
|
|
132
|
+
self._create_root_run(event)
|
|
133
|
+
return
|
|
134
|
+
if event.event in {
|
|
135
|
+
TrialEvent.ENVIRONMENT_START,
|
|
136
|
+
TrialEvent.AGENT_START,
|
|
137
|
+
TrialEvent.VERIFICATION_START,
|
|
138
|
+
}:
|
|
139
|
+
self._create_phase_run(event)
|
|
140
|
+
return
|
|
141
|
+
if event.event in {TrialEvent.END, TrialEvent.CANCEL}:
|
|
142
|
+
self._finish_trial(event)
|
|
143
|
+
|
|
144
|
+
def _get_or_create_dataset(self, job: Any) -> str | None:
|
|
145
|
+
dataset_name = self.dataset_name or self._default_dataset_name(job)
|
|
146
|
+
if dataset_name is None:
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
existing = self._find_dataset(dataset_name)
|
|
150
|
+
if existing is not None:
|
|
151
|
+
return existing
|
|
152
|
+
|
|
153
|
+
payload = {
|
|
154
|
+
"name": dataset_name,
|
|
155
|
+
"description": f"Harbor dataset synced from job {job.config.job_name}",
|
|
156
|
+
"metadata": {"source": "harbor"},
|
|
157
|
+
}
|
|
158
|
+
response = self._request(
|
|
159
|
+
"POST", "/datasets", json=payload, ok_statuses={200, 201, 409}
|
|
160
|
+
)
|
|
161
|
+
if response.status_code == 409:
|
|
162
|
+
return self._find_dataset(dataset_name)
|
|
163
|
+
return self._extract_id(response.json())
|
|
164
|
+
|
|
165
|
+
def _get_or_create_examples(self, job: Any) -> dict[str, str]:
|
|
166
|
+
if self._dataset_id is None:
|
|
167
|
+
return {}
|
|
168
|
+
|
|
169
|
+
example_ids: dict[str, str] = {}
|
|
170
|
+
for task_config in job._task_configs:
|
|
171
|
+
task_id = task_config.get_task_id()
|
|
172
|
+
full_task_name = task_id.get_name()
|
|
173
|
+
task_name = full_task_name.split("/")[-1]
|
|
174
|
+
example_id = self._stable_uuid(self._dataset_id, "example", task_name)
|
|
175
|
+
instruction = self._read_instruction(task_config)
|
|
176
|
+
payload = {
|
|
177
|
+
"id": example_id,
|
|
178
|
+
"dataset_id": self._dataset_id,
|
|
179
|
+
"inputs": {
|
|
180
|
+
"task_name": task_name,
|
|
181
|
+
"instruction": instruction,
|
|
182
|
+
"task_id": task_id.model_dump(mode="json"),
|
|
183
|
+
},
|
|
184
|
+
"outputs": {},
|
|
185
|
+
"metadata": {
|
|
186
|
+
"source": "harbor",
|
|
187
|
+
"task_config": task_config.model_dump(mode="json"),
|
|
188
|
+
},
|
|
189
|
+
}
|
|
190
|
+
response = self._request(
|
|
191
|
+
"POST", "/examples", json=payload, ok_statuses={200, 201, 409}
|
|
192
|
+
)
|
|
193
|
+
if response.status_code == 409:
|
|
194
|
+
self._request(
|
|
195
|
+
"PATCH",
|
|
196
|
+
f"/examples/{example_id}",
|
|
197
|
+
json={
|
|
198
|
+
"inputs": payload["inputs"],
|
|
199
|
+
"outputs": payload["outputs"],
|
|
200
|
+
"metadata": payload["metadata"],
|
|
201
|
+
},
|
|
202
|
+
ok_statuses={200, 202, 204},
|
|
203
|
+
)
|
|
204
|
+
example_ids[task_name] = example_id
|
|
205
|
+
example_ids[full_task_name] = example_id
|
|
206
|
+
configured_task_name = self._read_configured_task_name(task_config)
|
|
207
|
+
if configured_task_name is not None:
|
|
208
|
+
example_ids[configured_task_name] = example_id
|
|
209
|
+
example_ids[configured_task_name.split("/")[-1]] = example_id
|
|
210
|
+
return example_ids
|
|
211
|
+
|
|
212
|
+
def _create_root_run(self, event: TrialHookEvent) -> None:
|
|
213
|
+
run_id = self._stable_uuid(
|
|
214
|
+
event.config.job_id, "trial", event.config.trial_name
|
|
215
|
+
)
|
|
216
|
+
self._run_ids[event.config.trial_name] = run_id
|
|
217
|
+
reference_example_id = self._example_ids.get(
|
|
218
|
+
event.task_name
|
|
219
|
+
) or self._example_ids.get(event.task_name.split("/")[-1])
|
|
220
|
+
payload = {
|
|
221
|
+
"id": run_id,
|
|
222
|
+
"name": event.config.trial_name,
|
|
223
|
+
"run_type": "chain",
|
|
224
|
+
"inputs": {
|
|
225
|
+
"task_name": event.task_name,
|
|
226
|
+
"instruction": self._read_instruction(event.config.task),
|
|
227
|
+
"task_id": event.config.task.get_task_id().model_dump(mode="json"),
|
|
228
|
+
"trial_name": event.config.trial_name,
|
|
229
|
+
"agent": event.config.agent.name or event.config.agent.import_path,
|
|
230
|
+
"model": event.config.agent.model_name,
|
|
231
|
+
},
|
|
232
|
+
"start_time": self._format_time(event.timestamp),
|
|
233
|
+
"session_id": self._experiment_id,
|
|
234
|
+
"reference_example_id": reference_example_id,
|
|
235
|
+
"extra": {
|
|
236
|
+
"metadata": self._trial_metadata(event),
|
|
237
|
+
"tags": ["harbor", "harbor-trial"],
|
|
238
|
+
},
|
|
239
|
+
}
|
|
240
|
+
if payload["reference_example_id"] is None:
|
|
241
|
+
payload.pop("reference_example_id")
|
|
242
|
+
self._request("POST", "/runs", json=payload, ok_statuses={200, 201, 409})
|
|
243
|
+
|
|
244
|
+
def _create_phase_run(self, event: TrialHookEvent) -> None:
|
|
245
|
+
parent_run_id = self._run_ids.get(event.config.trial_name)
|
|
246
|
+
if parent_run_id is None:
|
|
247
|
+
self._create_root_run(event)
|
|
248
|
+
parent_run_id = self._run_ids[event.config.trial_name]
|
|
249
|
+
|
|
250
|
+
run_id = self._stable_uuid(
|
|
251
|
+
event.config.job_id,
|
|
252
|
+
"trial",
|
|
253
|
+
event.config.trial_name,
|
|
254
|
+
"phase",
|
|
255
|
+
event.event.value,
|
|
256
|
+
)
|
|
257
|
+
self._phase_run_ids[(event.config.trial_name, event.event)] = run_id
|
|
258
|
+
payload = {
|
|
259
|
+
"id": run_id,
|
|
260
|
+
"name": event.event.value,
|
|
261
|
+
"run_type": "chain",
|
|
262
|
+
"inputs": {"phase": event.event.value},
|
|
263
|
+
"start_time": self._format_time(event.timestamp),
|
|
264
|
+
"session_id": self._experiment_id,
|
|
265
|
+
"parent_run_id": parent_run_id,
|
|
266
|
+
"extra": {
|
|
267
|
+
"metadata": self._trial_metadata(event),
|
|
268
|
+
"tags": ["harbor", "harbor-phase", event.event.value],
|
|
269
|
+
},
|
|
270
|
+
}
|
|
271
|
+
self._request("POST", "/runs", json=payload, ok_statuses={200, 201, 409})
|
|
272
|
+
|
|
273
|
+
def _finish_trial(self, event: TrialHookEvent) -> None:
|
|
274
|
+
run_id = self._run_ids.get(event.config.trial_name)
|
|
275
|
+
if run_id is None:
|
|
276
|
+
self._create_root_run(event)
|
|
277
|
+
run_id = self._run_ids[event.config.trial_name]
|
|
278
|
+
|
|
279
|
+
result = event.result
|
|
280
|
+
payload: dict[str, Any] = {
|
|
281
|
+
"outputs": self._trial_outputs(result),
|
|
282
|
+
"end_time": self._format_time(
|
|
283
|
+
result.finished_at if result and result.finished_at else event.timestamp
|
|
284
|
+
),
|
|
285
|
+
}
|
|
286
|
+
if result and result.exception_info is not None:
|
|
287
|
+
payload["error"] = (
|
|
288
|
+
f"{result.exception_info.exception_type}: "
|
|
289
|
+
f"{result.exception_info.exception_message}"
|
|
290
|
+
)
|
|
291
|
+
self._request(
|
|
292
|
+
"PATCH", f"/runs/{run_id}", json=payload, ok_statuses={200, 202, 204}
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if result is not None:
|
|
296
|
+
self._finish_phase_runs(result)
|
|
297
|
+
self._create_feedback(run_id, result)
|
|
298
|
+
|
|
299
|
+
def _finish_phase_runs(self, result: Any) -> None:
|
|
300
|
+
phases = {
|
|
301
|
+
TrialEvent.ENVIRONMENT_START: result.environment_setup,
|
|
302
|
+
TrialEvent.AGENT_START: result.agent_execution,
|
|
303
|
+
TrialEvent.VERIFICATION_START: result.verifier,
|
|
304
|
+
}
|
|
305
|
+
for event, timing in phases.items():
|
|
306
|
+
if timing is None:
|
|
307
|
+
continue
|
|
308
|
+
run_id = self._phase_run_ids.get((result.trial_name, event))
|
|
309
|
+
if run_id is None or timing.finished_at is None:
|
|
310
|
+
continue
|
|
311
|
+
self._request(
|
|
312
|
+
"PATCH",
|
|
313
|
+
f"/runs/{run_id}",
|
|
314
|
+
json={
|
|
315
|
+
"outputs": {"phase": event.value},
|
|
316
|
+
"end_time": self._format_time(timing.finished_at),
|
|
317
|
+
},
|
|
318
|
+
ok_statuses={200, 202, 204},
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def _create_feedback(self, run_id: str, result: Any) -> None:
|
|
322
|
+
if result.verifier_result is not None:
|
|
323
|
+
for key, score in result.verifier_result.rewards.items():
|
|
324
|
+
self._request(
|
|
325
|
+
"POST",
|
|
326
|
+
"/feedback",
|
|
327
|
+
json={
|
|
328
|
+
"id": self._stable_uuid(run_id, "feedback", key),
|
|
329
|
+
"run_id": run_id,
|
|
330
|
+
"key": key,
|
|
331
|
+
"score": score,
|
|
332
|
+
"feedback_source_type": "api",
|
|
333
|
+
},
|
|
334
|
+
ok_statuses={200, 201, 409},
|
|
335
|
+
)
|
|
336
|
+
if result.exception_info is not None:
|
|
337
|
+
self._request(
|
|
338
|
+
"POST",
|
|
339
|
+
"/feedback",
|
|
340
|
+
json={
|
|
341
|
+
"id": self._stable_uuid(run_id, "feedback", "harbor_error"),
|
|
342
|
+
"run_id": run_id,
|
|
343
|
+
"key": "harbor_error",
|
|
344
|
+
"score": 1,
|
|
345
|
+
"value": result.exception_info.exception_type,
|
|
346
|
+
"comment": result.exception_info.exception_message,
|
|
347
|
+
"feedback_source_type": "api",
|
|
348
|
+
},
|
|
349
|
+
ok_statuses={200, 201, 409},
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def _trial_outputs(self, result: Any | None) -> dict[str, Any]:
|
|
353
|
+
if result is None:
|
|
354
|
+
return {}
|
|
355
|
+
n_input, n_cache, n_output, cost = result.compute_token_cost_totals()
|
|
356
|
+
agent_metadata = (
|
|
357
|
+
result.agent_result.metadata
|
|
358
|
+
if result.agent_result is not None
|
|
359
|
+
and result.agent_result.metadata is not None
|
|
360
|
+
else None
|
|
361
|
+
)
|
|
362
|
+
return {
|
|
363
|
+
"task_name": result.task_name,
|
|
364
|
+
"trial_name": result.trial_name,
|
|
365
|
+
"agent_output": (
|
|
366
|
+
agent_metadata.get("answer_written") if agent_metadata else None
|
|
367
|
+
),
|
|
368
|
+
"agent_metadata": agent_metadata,
|
|
369
|
+
"rewards": (
|
|
370
|
+
result.verifier_result.rewards
|
|
371
|
+
if result.verifier_result is not None
|
|
372
|
+
else None
|
|
373
|
+
),
|
|
374
|
+
"exception": (
|
|
375
|
+
result.exception_info.model_dump(mode="json")
|
|
376
|
+
if result.exception_info is not None
|
|
377
|
+
else None
|
|
378
|
+
),
|
|
379
|
+
"tokens": {
|
|
380
|
+
"input": n_input,
|
|
381
|
+
"cache": n_cache,
|
|
382
|
+
"output": n_output,
|
|
383
|
+
},
|
|
384
|
+
"cost_usd": cost,
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
def _trial_metadata(self, event: TrialHookEvent) -> dict[str, Any]:
|
|
388
|
+
return {
|
|
389
|
+
"harbor_trial_id": event.trial_id,
|
|
390
|
+
"harbor_trial_name": event.config.trial_name,
|
|
391
|
+
"harbor_task_name": event.task_name,
|
|
392
|
+
"harbor_job_id": str(event.config.job_id),
|
|
393
|
+
"harbor_agent": event.config.agent.name or event.config.agent.import_path,
|
|
394
|
+
"harbor_model": event.config.agent.model_name,
|
|
395
|
+
"harbor_trial_config": event.config.model_dump(mode="json"),
|
|
396
|
+
"source": "harbor",
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
@staticmethod
|
|
400
|
+
def _read_instruction(task_config: Any) -> str | None:
|
|
401
|
+
try:
|
|
402
|
+
instruction_path = task_config.get_local_path() / "instruction.md"
|
|
403
|
+
if instruction_path.exists():
|
|
404
|
+
return instruction_path.read_text()
|
|
405
|
+
except Exception:
|
|
406
|
+
return None
|
|
407
|
+
return None
|
|
408
|
+
|
|
409
|
+
@staticmethod
|
|
410
|
+
def _read_configured_task_name(task_config: Any) -> str | None:
|
|
411
|
+
try:
|
|
412
|
+
config_path = task_config.get_local_path() / "task.toml"
|
|
413
|
+
if not config_path.exists():
|
|
414
|
+
return None
|
|
415
|
+
data = tomllib.loads(config_path.read_text())
|
|
416
|
+
task = data.get("task")
|
|
417
|
+
if isinstance(task, dict):
|
|
418
|
+
name = task.get("name")
|
|
419
|
+
return name if isinstance(name, str) else None
|
|
420
|
+
except Exception:
|
|
421
|
+
return None
|
|
422
|
+
return None
|
|
423
|
+
|
|
424
|
+
def _default_dataset_name(self, job: Any) -> str | None:
|
|
425
|
+
if len(job.config.datasets) == 1:
|
|
426
|
+
dataset = job.config.datasets[0]
|
|
427
|
+
name = dataset.name or (dataset.path.name if dataset.path else None)
|
|
428
|
+
if name is not None and dataset.version:
|
|
429
|
+
return f"{name}@{dataset.version}"
|
|
430
|
+
return name
|
|
431
|
+
if len(job.config.tasks) == 1:
|
|
432
|
+
task_id = job.config.tasks[0].get_task_id()
|
|
433
|
+
return task_id.get_name().split("/")[-1]
|
|
434
|
+
if len(job.config.tasks) > 1:
|
|
435
|
+
return "harbor-adhoc-tasks"
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
def _find_dataset(self, dataset_name: str) -> str | None:
|
|
439
|
+
for params in ({"name": dataset_name}, {"dataset_name": dataset_name}):
|
|
440
|
+
response = self._request(
|
|
441
|
+
"GET", "/datasets", params=params, ok_statuses={200, 404}
|
|
442
|
+
)
|
|
443
|
+
if response.status_code == 404:
|
|
444
|
+
continue
|
|
445
|
+
datasets = response.json()
|
|
446
|
+
if isinstance(datasets, dict):
|
|
447
|
+
datasets = datasets.get("datasets") or datasets.get("items") or []
|
|
448
|
+
if not isinstance(datasets, list):
|
|
449
|
+
continue
|
|
450
|
+
for dataset in datasets:
|
|
451
|
+
if dataset.get("name") == dataset_name:
|
|
452
|
+
return self._extract_id(dataset)
|
|
453
|
+
return None
|
|
454
|
+
|
|
455
|
+
def _request(
|
|
456
|
+
self,
|
|
457
|
+
method: str,
|
|
458
|
+
path: str,
|
|
459
|
+
*,
|
|
460
|
+
ok_statuses: set[int],
|
|
461
|
+
**kwargs: Any,
|
|
462
|
+
) -> requests.Response:
|
|
463
|
+
response = self._session.request(
|
|
464
|
+
method,
|
|
465
|
+
f"{self._base_url}{path}",
|
|
466
|
+
timeout=self.request_timeout,
|
|
467
|
+
**kwargs,
|
|
468
|
+
)
|
|
469
|
+
if response.status_code not in ok_statuses:
|
|
470
|
+
response.raise_for_status()
|
|
471
|
+
return response
|
|
472
|
+
|
|
473
|
+
@staticmethod
|
|
474
|
+
def _extract_id(payload: Any) -> str | None:
|
|
475
|
+
if isinstance(payload, dict):
|
|
476
|
+
value = payload.get("id") or payload.get("dataset_id")
|
|
477
|
+
return str(value) if value is not None else None
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
@staticmethod
|
|
481
|
+
def _stable_uuid(*parts: Any) -> str:
|
|
482
|
+
normalized = ":".join(str(part) for part in parts if part is not None)
|
|
483
|
+
return str(uuid5(NAMESPACE_URL, f"harbor-langsmith:{normalized}"))
|
|
484
|
+
|
|
485
|
+
@staticmethod
|
|
486
|
+
def _format_time(value: datetime) -> str:
|
|
487
|
+
if value.tzinfo is None:
|
|
488
|
+
value = value.replace(tzinfo=timezone.utc)
|
|
489
|
+
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
490
|
+
|
|
491
|
+
@staticmethod
|
|
492
|
+
def _env_bool(name: str, *, default: bool) -> bool:
|
|
493
|
+
value = os.getenv(name)
|
|
494
|
+
if value is None:
|
|
495
|
+
return default
|
|
496
|
+
return value.lower() in {"1", "true", "yes", "on"}
|