docent-python 0.1.5a0__py3-none-any.whl → 0.1.6a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

@@ -15,6 +15,7 @@ from pydantic import (
15
15
  from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
16
16
  from docent.data_models.transcript import (
17
17
  Transcript,
18
+ TranscriptGroup,
18
19
  TranscriptWithoutMetadataValidator,
19
20
  fake_model_dump,
20
21
  )
@@ -36,6 +37,7 @@ class AgentRun(BaseModel):
36
37
  name: Optional human-readable name for the agent run.
37
38
  description: Optional description of the agent run.
38
39
  transcripts: Dict mapping transcript IDs to Transcript objects.
40
+ transcript_groups: Dict mapping transcript group IDs to TranscriptGroup objects.
39
41
  metadata: Additional structured metadata about the agent run as a JSON-serializable dictionary.
40
42
  """
41
43
 
@@ -44,6 +46,7 @@ class AgentRun(BaseModel):
44
46
  description: str | None = None
45
47
 
46
48
  transcripts: dict[str, Transcript]
49
+ transcript_groups: dict[str, TranscriptGroup] = Field(default_factory=dict)
47
50
  metadata: dict[str, Any] = Field(default_factory=dict)
48
51
 
49
52
  @field_serializer("metadata")
@@ -1,4 +1,5 @@
1
1
  import sys
2
+ from datetime import datetime
2
3
  from typing import Any
3
4
  from uuid import uuid4
4
5
 
@@ -73,6 +74,8 @@ class TranscriptGroup(BaseModel):
73
74
  id: Unique identifier for the transcript group, auto-generated by default.
74
75
  name: Optional human-readable name for the transcript group.
75
76
  description: Optional description of the transcript group.
77
+ collection_id: ID of the collection this transcript group belongs to.
78
+ agent_run_id: ID of the agent run this transcript group belongs to.
76
79
  parent_transcript_group_id: Optional ID of the parent transcript group.
77
80
  metadata: Additional structured metadata about the transcript group.
78
81
  """
@@ -80,7 +83,10 @@ class TranscriptGroup(BaseModel):
80
83
  id: str = Field(default_factory=lambda: str(uuid4()))
81
84
  name: str | None = None
82
85
  description: str | None = None
86
+ collection_id: str
87
+ agent_run_id: str
83
88
  parent_transcript_group_id: str | None = None
89
+ created_at: datetime | None = None
84
90
  metadata: dict[str, Any] = Field(default_factory=dict)
85
91
 
86
92
  @field_serializer("metadata")
@@ -129,6 +135,7 @@ class Transcript(BaseModel):
129
135
  name: str | None = None
130
136
  description: str | None = None
131
137
  transcript_group_id: str | None = None
138
+ created_at: datetime | None = None
132
139
 
133
140
  messages: list[ChatMessage]
134
141
  metadata: dict[str, Any] = Field(default_factory=dict)
@@ -1,4 +1,7 @@
1
- from typing import Any
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, BinaryIO, Generator, Tuple
4
+ from zipfile import ZipFile
2
5
 
3
6
  from inspect_ai.log import EvalLog
4
7
  from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
@@ -7,9 +10,9 @@ from docent.data_models import AgentRun, Transcript
7
10
  from docent.data_models.chat import parse_chat_message
8
11
 
9
12
 
10
- def _normalize_inspect_score(score: Score) -> Any:
13
+ def _normalize_inspect_score(score: Score | dict[str, Any]) -> Any:
11
14
  """
12
- Normalize an inspect score to a float. This implements the same logic as inspect_ai.scorer._metric.value_to_float, but fails more conspicuously.
15
+ Normalize an inspect score to a float. Logic mirrors inspect_ai.scorer._metric.value_to_float.
13
16
 
14
17
  Args:
15
18
  score: The inspect score to normalize.
@@ -18,7 +21,7 @@ def _normalize_inspect_score(score: Score) -> Any:
18
21
  The normalized score as a float, or None if the score is not a valid value.
19
22
  """
20
23
 
21
- def _leaf_normalize(value: int | float | bool | str | None) -> float | str | None:
24
+ def _leaf_normalize(value: Any) -> Any:
22
25
  if value is None:
23
26
  return None
24
27
  if isinstance(value, int | float | bool):
@@ -38,12 +41,17 @@ def _normalize_inspect_score(score: Score) -> Any:
38
41
  return float(value)
39
42
  return value
40
43
 
41
- if isinstance(score.value, int | float | bool | str):
42
- return _leaf_normalize(score.value)
43
- if isinstance(score.value, list):
44
- return [_leaf_normalize(v) for v in score.value]
45
- assert isinstance(score.value, dict), "Inspect score must be leaf value, list, or dict"
46
- return {k: _leaf_normalize(v) for k, v in score.value.items()}
44
+ if isinstance(score, dict):
45
+ value = score["value"]
46
+ else:
47
+ value = score.value
48
+
49
+ if isinstance(value, int | float | bool | str):
50
+ return _leaf_normalize(value)
51
+ if isinstance(value, list):
52
+ return [_leaf_normalize(v) for v in value] # type: ignore
53
+ assert isinstance(value, dict), "Inspect score must be leaf value, list, or dict"
54
+ return {k: _leaf_normalize(v) for k, v in value.items()} # type: ignore
47
55
 
48
56
 
49
57
  def load_inspect_log(log: EvalLog) -> list[AgentRun]:
@@ -86,3 +94,117 @@ def load_inspect_log(log: EvalLog) -> list[AgentRun]:
86
94
  )
87
95
 
88
96
  return agent_runs
97
+
98
+
99
+ def _read_sample_as_run(data: dict[str, Any], header_metadata: dict[str, Any] = {}) -> AgentRun:
100
+ if "scores" in data:
101
+ normalized_scores = {k: _normalize_inspect_score(v) for k, v in data["scores"].items()}
102
+ else:
103
+ normalized_scores = {}
104
+
105
+ if "metadata" in data:
106
+ sample_metadata = data["metadata"]
107
+ else:
108
+ sample_metadata = {}
109
+
110
+ run_metadata: dict[str, Any] = {
111
+ "sample_id": data.get("id"),
112
+ "epoch": data.get("epoch"),
113
+ "target": data.get("target"),
114
+ # Scores could have answers, explanations, and other metadata besides the values we extract
115
+ "scoring_metadata": data.get("scores"),
116
+ "scores": normalized_scores,
117
+ # If a key exists in header and sample, sample takes precedence
118
+ **header_metadata,
119
+ **sample_metadata,
120
+ }
121
+
122
+ run = AgentRun(
123
+ transcripts={
124
+ "main": Transcript(
125
+ messages=[parse_chat_message(m) for m in data["messages"]], metadata={}
126
+ ),
127
+ },
128
+ metadata=run_metadata,
129
+ )
130
+ return run
131
+
132
+
133
+ def _run_metadata_from_header(header: dict[str, Any]) -> dict[str, Any]:
134
+ """
135
+ Inspect logs often have a lot of metadata.
136
+ This function tries to get the most important stuff without adding clutter.
137
+ """
138
+ m: dict[str, Any] = {}
139
+ if e := header.get("eval"):
140
+ m["task"] = e["task"]
141
+ m["model"] = e["model"]
142
+ return m
143
+
144
+
145
+ def get_total_samples(file_path: Path, format: str = "json") -> int:
146
+ """Return the total number of samples in the provided file."""
147
+ with open(file_path, "rb") as f:
148
+ if format == "json":
149
+ data = json.load(f)
150
+ return len(data.get("samples", []))
151
+ elif format == "eval":
152
+ z = ZipFile(f, mode="r")
153
+ try:
154
+ return sum(
155
+ 1
156
+ for name in z.namelist()
157
+ if name.startswith("samples/") and name.endswith(".json")
158
+ )
159
+ finally:
160
+ z.close()
161
+ else:
162
+ raise ValueError(f"Format must be 'json' or 'eval': {format}")
163
+
164
+
165
+ def _runs_from_eval_file(
166
+ file: BinaryIO,
167
+ ) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
168
+ zip = ZipFile(file, mode="r")
169
+ header: dict[str, Any] = json.load(zip.open("header.json", "r"))
170
+ header_metadata = _run_metadata_from_header(header)
171
+
172
+ def _iter_runs() -> Generator[AgentRun, None, None]:
173
+ try:
174
+ for sample_file in zip.namelist():
175
+ if not (sample_file.startswith("samples/") and sample_file.endswith(".json")):
176
+ continue
177
+ with zip.open(sample_file, "r") as f:
178
+ data = json.load(f)
179
+ run: AgentRun = _read_sample_as_run(data, header_metadata)
180
+ yield run
181
+ finally:
182
+ zip.close()
183
+
184
+ return header_metadata, _iter_runs()
185
+
186
+
187
+ def _runs_from_json_file(
188
+ file: BinaryIO,
189
+ ) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
190
+ data = json.load(file)
191
+ header_metadata = _run_metadata_from_header(data)
192
+
193
+ def _iter_runs() -> Generator[AgentRun, None, None]:
194
+ for sample in data["samples"]:
195
+ run: AgentRun = _read_sample_as_run(sample, header_metadata)
196
+ yield run
197
+
198
+ return header_metadata, _iter_runs()
199
+
200
+
201
+ def runs_from_file(
202
+ file: BinaryIO, format: str = "json"
203
+ ) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
204
+ if format == "json":
205
+ result = _runs_from_json_file(file)
206
+ elif format == "eval":
207
+ result = _runs_from_eval_file(file)
208
+ else:
209
+ raise ValueError(f"Format must be 'json' or 'eval': {format}")
210
+ return result
docent/trace.py CHANGED
@@ -12,6 +12,7 @@ from contextlib import asynccontextmanager, contextmanager
12
12
  from contextvars import ContextVar, Token
13
13
  from datetime import datetime, timezone
14
14
  from enum import Enum
15
+ from importlib.metadata import Distribution, distributions
15
16
  from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
16
17
 
17
18
  import requests
@@ -19,10 +20,6 @@ from opentelemetry import trace
19
20
  from opentelemetry.context import Context
20
21
  from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
21
22
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
22
- from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
23
- from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
24
- from opentelemetry.instrumentation.langchain import LangchainInstrumentor
25
- from opentelemetry.instrumentation.openai import OpenAIInstrumentor
26
23
  from opentelemetry.instrumentation.threading import ThreadingInstrumentor
27
24
  from opentelemetry.sdk.resources import Resource
28
25
  from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
@@ -34,9 +31,8 @@ from opentelemetry.sdk.trace.export import (
34
31
  from opentelemetry.trace import Span
35
32
 
36
33
  # Configure logging
37
- logging.basicConfig(level=logging.INFO)
38
34
  logger = logging.getLogger(__name__)
39
- logger.disabled = True
35
+ logger.setLevel(logging.ERROR)
40
36
 
41
37
  # Default configuration
42
38
  DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
@@ -223,7 +219,7 @@ class DocentTracer:
223
219
  exporters.append(exporter)
224
220
  logger.info(f"Initialized exporter for endpoint: {endpoint}")
225
221
  else:
226
- logger.warning(f"Failed to initialize exporter for endpoint: {endpoint}")
222
+ logger.critical(f"Failed to initialize exporter for endpoint: {endpoint}")
227
223
 
228
224
  return exporters
229
225
 
@@ -326,8 +322,6 @@ class DocentTracer:
326
322
  logger.info(
327
323
  f"Added {len(otlp_exporters)} OTLP exporters for {len(self.endpoints)} endpoints"
328
324
  )
329
- else:
330
- logger.warning("Failed to initialize OTLP exporter")
331
325
 
332
326
  if self.enable_console_export:
333
327
  console_exporter: ConsoleSpanExporter = ConsoleSpanExporter()
@@ -355,32 +349,44 @@ class DocentTracer:
355
349
  # Instrument OpenAI with our isolated tracer provider
356
350
  if Instruments.OPENAI in enabled_instruments:
357
351
  try:
358
- OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
359
- logger.info("Instrumented OpenAI")
352
+ if is_package_installed("openai"):
353
+ from opentelemetry.instrumentation.openai import OpenAIInstrumentor
354
+
355
+ OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
356
+ logger.info("Instrumented OpenAI")
360
357
  except Exception as e:
361
358
  logger.warning(f"Failed to instrument OpenAI: {e}")
362
359
 
363
360
  # Instrument Anthropic with our isolated tracer provider
364
361
  if Instruments.ANTHROPIC in enabled_instruments:
365
362
  try:
366
- AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
367
- logger.info("Instrumented Anthropic")
363
+ if is_package_installed("anthropic"):
364
+ from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
365
+
366
+ AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
367
+ logger.info("Instrumented Anthropic")
368
368
  except Exception as e:
369
369
  logger.warning(f"Failed to instrument Anthropic: {e}")
370
370
 
371
371
  # Instrument Bedrock with our isolated tracer provider
372
372
  if Instruments.BEDROCK in enabled_instruments:
373
373
  try:
374
- BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
375
- logger.info("Instrumented Bedrock")
374
+ if is_package_installed("boto3"):
375
+ from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
376
+
377
+ BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
378
+ logger.info("Instrumented Bedrock")
376
379
  except Exception as e:
377
380
  logger.warning(f"Failed to instrument Bedrock: {e}")
378
381
 
379
382
  # Instrument LangChain with our isolated tracer provider
380
383
  if Instruments.LANGCHAIN in enabled_instruments:
381
384
  try:
382
- LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
383
- logger.info("Instrumented LangChain")
385
+ if is_package_installed("langchain") or is_package_installed("langgraph"):
386
+ from opentelemetry.instrumentation.langchain import LangchainInstrumentor
387
+
388
+ LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
389
+ logger.info("Instrumented LangChain")
384
390
  except Exception as e:
385
391
  logger.warning(f"Failed to instrument LangChain: {e}")
386
392
 
@@ -812,9 +818,19 @@ class DocentTracer:
812
818
  metadata: Optional metadata to send
813
819
  """
814
820
  collection_id = self.collection_id
821
+
822
+ # Get agent_run_id from current context
823
+ agent_run_id = self.get_current_agent_run_id()
824
+ if not agent_run_id:
825
+ logger.error(
826
+ f"Cannot send transcript group metadata for {transcript_group_id} - no agent_run_id in context"
827
+ )
828
+ return
829
+
815
830
  payload: Dict[str, Any] = {
816
831
  "collection_id": collection_id,
817
832
  "transcript_group_id": transcript_group_id,
833
+ "agent_run_id": agent_run_id,
818
834
  "timestamp": datetime.now(timezone.utc).isoformat(),
819
835
  }
820
836
 
@@ -1019,6 +1035,22 @@ def initialize_tracing(
1019
1035
  return _global_tracer
1020
1036
 
1021
1037
 
1038
+ def _get_package_name(dist: Distribution) -> str | None:
1039
+ try:
1040
+ return dist.name.lower()
1041
+ except (KeyError, AttributeError):
1042
+ return None
1043
+
1044
+
1045
+ installed_packages = {
1046
+ name for dist in distributions() if (name := _get_package_name(dist)) is not None
1047
+ }
1048
+
1049
+
1050
+ def is_package_installed(package_name: str) -> bool:
1051
+ return package_name.lower() in installed_packages
1052
+
1053
+
1022
1054
  def get_tracer() -> DocentTracer:
1023
1055
  """Get the global Docent tracer."""
1024
1056
  if _global_tracer is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.5a0
3
+ Version: 0.1.6a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,29 +1,29 @@
1
1
  docent/__init__.py,sha256=J2BbO6rzilfw9WXRUeolr439EGFezqbMU_kCpCCryRA,59
2
2
  docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docent/trace.py,sha256=aPbV9fy_JpFq9y3yuqhlHKPm2QiNh9SLsKOL2azxghs,62611
3
+ docent/trace.py,sha256=oGhNizXcOU-FZJZkmnd8WQjlCvlRKWI3PncMHUHKQ_4,63667
4
4
  docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
5
5
  docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
6
6
  docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
7
7
  docent/data_models/__init__.py,sha256=4JbTDVzRhS5VZgo8MALwd_YI17GaN7X9E3rOc4Xl7kw,327
8
8
  docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
9
- docent/data_models/agent_run.py,sha256=lw-odD2zzFi-RGvkAFjz9x8l6XWPrGT6uRGqTj9h8qU,9621
9
+ docent/data_models/agent_run.py,sha256=bDRToWUlY52PugoHWU1D9hasr5t_fnTmRLpkzWP1s_k,9811
10
10
  docent/data_models/citation.py,sha256=WsVQZcBT2EJD24ysyeVOC5Xfo165RI7P5_cOnJBgHj0,10015
11
11
  docent/data_models/metadata.py,sha256=r0SYC4i2x096dXMLfw_rAMtcJQCsoV6EOMPZuEngbGA,9062
12
12
  docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
13
13
  docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
14
- docent/data_models/transcript.py,sha256=NDcpvil4dJ8YhG_JJ0X-w0prkXhwhsdO-zoL-CZMipM,15446
14
+ docent/data_models/transcript.py,sha256=0iF2ujcWhTss8WkkpNMeIKJyKOfMEsiMoAQMGwY4ing,15753
15
15
  docent/data_models/chat/__init__.py,sha256=O04XQ2NmO8GTWqkkB_Iydj8j_CucZuLhoyMVTxJN_cs,570
16
16
  docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
17
17
  docent/data_models/chat/message.py,sha256=iAo38kbV6wYbFh8S23cxLy6HY4C_i3PzQ6RpSQG5dxM,3861
18
18
  docent/data_models/chat/tool.py,sha256=x7NKINswPe0Kqvcx4ubjHzB-n0-i4DbFodvaBb2vitk,3042
19
- docent/loaders/load_inspect.py,sha256=yK6LZgprT8kc0Jg4N_cnbhsGCq9lINmMcgALXA9AibY,2812
19
+ docent/loaders/load_inspect.py,sha256=_cK2Qd6gyLQuJVzOlsvEZz7TrqzNmH6ZsLTkSCWAPqQ,6628
20
20
  docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
21
21
  docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
22
22
  docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
23
23
  docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
24
24
  docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  docent/sdk/client.py,sha256=fLdniy8JzMLoZpaS9SP2pHban_ToavgtI8VeHZLMNZo,12773
26
- docent_python-0.1.5a0.dist-info/METADATA,sha256=XRzyvevWQ36Mk6UB6HIGIuD6vCQBX0cazhZylY25Q24,1037
27
- docent_python-0.1.5a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- docent_python-0.1.5a0.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
29
- docent_python-0.1.5a0.dist-info/RECORD,,
26
+ docent_python-0.1.6a0.dist-info/METADATA,sha256=ib_GqBFrOmPvacYb4uncrC5qLsoygIJ0wU852MOea_8,1037
27
+ docent_python-0.1.6a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ docent_python-0.1.6a0.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
29
+ docent_python-0.1.6a0.dist-info/RECORD,,