docent-python 0.1.0a8__tar.gz → 0.1.1a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/__init__.py +6 -1
  3. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/metadata.py +1 -1
  4. docent_python-0.1.1a0/docent/loaders/load_inspect.py +76 -0
  5. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/sdk/client.py +73 -3
  6. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/pyproject.toml +1 -1
  7. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/.gitignore +0 -0
  8. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/LICENSE.md +0 -0
  9. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/README.md +0 -0
  10. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/__init__.py +0 -0
  11. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/_log_util/__init__.py +0 -0
  12. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/_log_util/logger.py +0 -0
  13. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/_tiktoken_util.py +0 -0
  14. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/agent_run.py +0 -0
  15. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/chat/__init__.py +0 -0
  16. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/chat/content.py +0 -0
  17. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/chat/message.py +0 -0
  18. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/chat/tool.py +0 -0
  19. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/citation.py +0 -0
  20. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/regex.py +0 -0
  21. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/shared_types.py +0 -0
  22. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/data_models/transcript.py +0 -0
  23. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/py.typed +0 -0
  24. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/samples/__init__.py +0 -0
  25. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/samples/load.py +0 -0
  26. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/samples/log.eval +0 -0
  27. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/samples/tb_airline.json +0 -0
  28. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/docent/sdk/__init__.py +0 -0
  29. {docent_python-0.1.0a8 → docent_python-0.1.1a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.0a8
3
+ Version: 0.1.1a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,6 +1,10 @@
1
1
  from docent.data_models.agent_run import AgentRun
2
2
  from docent.data_models.citation import Citation
3
- from docent.data_models.metadata import BaseAgentRunMetadata, BaseMetadata
3
+ from docent.data_models.metadata import (
4
+ BaseAgentRunMetadata,
5
+ BaseMetadata,
6
+ InspectAgentRunMetadata,
7
+ )
4
8
  from docent.data_models.regex import RegexSnippet
5
9
  from docent.data_models.transcript import Transcript
6
10
 
@@ -10,5 +14,6 @@ __all__ = [
10
14
  "RegexSnippet",
11
15
  "BaseAgentRunMetadata",
12
16
  "BaseMetadata",
17
+ "InspectAgentRunMetadata",
13
18
  "Transcript",
14
19
  ]
@@ -218,7 +218,7 @@ class InspectAgentRunMetadata(BaseAgentRunMetadata):
218
218
  # Parameters for the run
219
219
  model: str = Field(description="The model that was used to generate the transcript")
220
220
 
221
- # Outcome
221
+ # Scoring
222
222
  scoring_metadata: dict[str, Any] | None = Field(
223
223
  description="Additional metadata about the scoring process"
224
224
  )
@@ -0,0 +1,76 @@
1
+ from inspect_ai.log import EvalLog
2
+ from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
3
+
4
+ from docent.data_models import AgentRun, InspectAgentRunMetadata, Transcript
5
+ from docent.data_models.chat import parse_chat_message
6
+
7
+
8
+ def _normalize_inspect_score(score: Score) -> float | None:
9
+ """
10
+ Normalize an inspect score to a float. This implements the same logic as inspect_ai.scorer._metric.value_to_float, but fails more conspicuously.
11
+
12
+ Args:
13
+ score: The inspect score to normalize.
14
+
15
+ Returns:
16
+ The normalized score as a float, or None if the score is not a valid value.
17
+ """
18
+
19
+ if isinstance(score.value, int | float | bool):
20
+ return float(score.value)
21
+ elif score.value == CORRECT:
22
+ return 1.0
23
+ elif score.value == PARTIAL:
24
+ return 0.5
25
+ elif score.value == INCORRECT or score.value == NOANSWER:
26
+ return 0
27
+ elif isinstance(score.value, str):
28
+ value = score.value.lower()
29
+ if value in ["yes", "true"]:
30
+ return 1.0
31
+ elif value in ["no", "false"]:
32
+ return 0.0
33
+ elif value.replace(".", "").isnumeric():
34
+ return float(value)
35
+
36
+ raise ValueError(f"Unknown score value: {score.value}")
37
+
38
+
39
+ def load_inspect_log(log: EvalLog) -> list[AgentRun]:
40
+ if log.samples is None:
41
+ return []
42
+
43
+ agent_runs: list[AgentRun] = []
44
+
45
+ for s in log.samples:
46
+ sample_id = s.id
47
+ epoch_id = s.epoch
48
+
49
+ if s.scores is None:
50
+ sample_scores = {}
51
+ else:
52
+ sample_scores = {k: _normalize_inspect_score(v) for k, v in s.scores.items()}
53
+
54
+ metadata = InspectAgentRunMetadata(
55
+ task_id=log.eval.task,
56
+ sample_id=str(sample_id),
57
+ epoch_id=epoch_id,
58
+ model=log.eval.model,
59
+ additional_metadata=s.metadata,
60
+ scores=sample_scores,
61
+ # Scores could have answers, explanations, and other metadata besides the values we extract
62
+ scoring_metadata=s.scores,
63
+ )
64
+
65
+ agent_runs.append(
66
+ AgentRun(
67
+ transcripts={
68
+ "main": Transcript(
69
+ messages=[parse_chat_message(m.model_dump()) for m in s.messages]
70
+ )
71
+ },
72
+ metadata=metadata,
73
+ )
74
+ )
75
+
76
+ return agent_runs
@@ -4,7 +4,7 @@ from typing import Any
4
4
  import requests
5
5
 
6
6
  from docent._log_util.logger import get_logger
7
- from docent.data_models.agent_run import AgentRun
7
+ from docent.data_models.agent_run import AgentRun, AgentRunWithoutMetadataValidator
8
8
 
9
9
  logger = get_logger(__name__)
10
10
 
@@ -24,8 +24,8 @@ class Docent:
24
24
 
25
25
  def __init__(
26
26
  self,
27
- server_url: str = "https://aws-docent-backend.transluce.org",
28
- web_url: str = "https://docent-alpha.transluce.org",
27
+ server_url: str = "https://api.docent.transluce.org",
28
+ web_url: str = "https://docent.transluce.org",
29
29
  api_key: str | None = None,
30
30
  ):
31
31
  self._server_url = server_url.rstrip("/") + "/rest"
@@ -268,3 +268,73 @@ class Docent:
268
268
  response = self._session.post(url, json={"centroid": centroid})
269
269
  response.raise_for_status()
270
270
  return response.json()
271
+
272
+ def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
273
+ """Get a specific agent run by its ID.
274
+
275
+ Args:
276
+ collection_id: ID of the Collection.
277
+ agent_run_id: The ID of the agent run to retrieve.
278
+
279
+ Returns:
280
+ dict: Dictionary containing the agent run information.
281
+
282
+ Raises:
283
+ requests.exceptions.HTTPError: If the API request fails.
284
+ """
285
+ url = f"{self._server_url}/{collection_id}/agent_run"
286
+ response = self._session.get(url, params={"agent_run_id": agent_run_id})
287
+ response.raise_for_status()
288
+ if response.json() is None:
289
+ return None
290
+ else:
291
+ # We do this to avoid metadata validation failing
292
+ # TODO(mengk): kinda hacky
293
+ return AgentRunWithoutMetadataValidator.model_validate(response.json())
294
+
295
+ def make_collection_public(self, collection_id: str) -> dict[str, Any]:
296
+ """Make a collection publicly accessible to anyone with the link.
297
+
298
+ Args:
299
+ collection_id: ID of the Collection to make public.
300
+
301
+ Returns:
302
+ dict: API response data.
303
+
304
+ Raises:
305
+ requests.exceptions.HTTPError: If the API request fails.
306
+ """
307
+ url = f"{self._server_url}/{collection_id}/make_public"
308
+ response = self._session.post(url)
309
+ response.raise_for_status()
310
+
311
+ logger.info(f"Successfully made Collection '{collection_id}' public")
312
+ return response.json()
313
+
314
+ def share_collection_with_email(self, collection_id: str, email: str) -> dict[str, Any]:
315
+ """Share a collection with a specific user by email address.
316
+
317
+ Args:
318
+ collection_id: ID of the Collection to share.
319
+ email: Email address of the user to share with.
320
+
321
+ Returns:
322
+ dict: API response data.
323
+
324
+ Raises:
325
+ requests.exceptions.HTTPError: If the API request fails.
326
+ """
327
+ url = f"{self._server_url}/{collection_id}/share_with_email"
328
+ payload = {"email": email}
329
+ response = self._session.post(url, json=payload)
330
+
331
+ try:
332
+ response.raise_for_status()
333
+ except requests.exceptions.HTTPError:
334
+ if response.status_code == 404:
335
+ raise ValueError(f"The user you are trying to share with ({email}) does not exist.")
336
+ else:
337
+ raise # Re-raise the original exception
338
+
339
+ logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
340
+ return response.json()
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.0-alpha.8"
4
+ version = "0.1.1-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]
File without changes