docent-python 0.1.20a0__py3-none-any.whl → 0.1.21a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

@@ -0,0 +1,95 @@
1
+ import json
2
+ from typing import Any, cast
3
+
4
+ import jsonschema
5
+
6
+ from docent._llm_util.data_models.exceptions import ValidationFailedException
7
+ from docent._llm_util.data_models.llm_output import LLMOutput
8
+ from docent._log_util import get_logger
9
+ from docent.data_models.agent_run import AgentRun
10
+ from docent.data_models.remove_invalid_citation_ranges import remove_invalid_citation_ranges
11
+ from docent.judges.types import traverse_schema_and_transform
12
+ from docent.judges.util.forgiving_json import forgiving_json_loads
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ def _validate_rubric_output(
18
+ output: dict[str, Any], output_schema: dict[str, Any], agent_run: AgentRun
19
+ ) -> dict[str, Any]:
20
+ """Validate and filter citation text ranges in rubric results.
21
+ Also check that the output conforms to the output schema.
22
+
23
+ Args:
24
+ output: Raw results from LLM judge
25
+ agent_run: Agent run containing transcript data for validation
26
+
27
+ Returns:
28
+ Validated result dict with invalid citations removed
29
+
30
+ Raises:
31
+ ValidationFailedException: If validation fails
32
+ """
33
+
34
+ def _validate_citation_string(text: str) -> str:
35
+ validated_text = remove_invalid_citation_ranges(text, agent_run)
36
+ if validated_text != text:
37
+ logger.warning(
38
+ f"Citation validation removed invalid text range from citation in judge result. "
39
+ f"Agent run ID: {agent_run.id}, "
40
+ f"Original text: {text}, "
41
+ f"Validated text: {validated_text}, "
42
+ )
43
+ return validated_text
44
+
45
+ try:
46
+ jsonschema.validate(output, output_schema)
47
+ except jsonschema.ValidationError as e:
48
+ raise ValidationFailedException(f"Schema validation failed: {e}", failed_output=str(output))
49
+
50
+ try:
51
+ return traverse_schema_and_transform(output, output_schema, _validate_citation_string)
52
+ except Exception as e:
53
+ raise ValidationFailedException(
54
+ f"Citation validation failed: {e}", failed_output=str(output)
55
+ )
56
+
57
+
58
+ def parse_and_validate_llm_output(
59
+ llm_output: LLMOutput,
60
+ output_schema: dict[str, Any],
61
+ agent_run: AgentRun,
62
+ ) -> dict[str, Any]:
63
+ """Parse and validate LLM output for rubric evaluation.
64
+
65
+ Args:
66
+ llm_output: The LLM output to parse
67
+ output_schema: The schema to validate against
68
+ agent_run: Agent run for citation validation
69
+
70
+ Returns:
71
+ Validated output dict
72
+
73
+ Raises:
74
+ ValidationFailedException: If parsing or validation fails
75
+ """
76
+ if llm_output.first_text is None:
77
+ raise ValidationFailedException("LLM output has no text", failed_output=None)
78
+
79
+ try:
80
+ output = forgiving_json_loads(llm_output.first_text)
81
+ except json.JSONDecodeError as e:
82
+ raise ValidationFailedException(
83
+ f"Failed to parse JSON: {e}. Raw text: `{llm_output.first_text}`",
84
+ failed_output=llm_output.first_text,
85
+ )
86
+
87
+ if not isinstance(output, dict):
88
+ logger.error(f"Expected dict output, got {type(output)}")
89
+ logger.error(f"LLM output: {llm_output.first_text}")
90
+ raise ValidationFailedException(
91
+ f"Expected dict output, got {type(output)}. Raw text: {llm_output.first_text}",
92
+ failed_output=llm_output.first_text,
93
+ )
94
+
95
+ return _validate_rubric_output(cast(dict[str, Any], output), output_schema, agent_run)
@@ -0,0 +1,84 @@
1
+ from collections import Counter
2
+ from typing import Any, cast
3
+
4
+
5
+ def get_agreement_keys(schema: dict[str, Any]) -> list[str]:
6
+ """Get list of top-level keys in schema that we want to measure agreement on.
7
+
8
+ This includes enum, bool, and int fields. We skip float and strings.
9
+
10
+ Args:
11
+ schema: JSON schema dict
12
+
13
+ Returns:
14
+ List of field names (keys) that should be used for measuring agreement
15
+ """
16
+ agreement_keys: list[str] = []
17
+
18
+ properties = schema.get("properties", {})
19
+ assert isinstance(properties, dict)
20
+ properties = cast(dict[str, Any], properties)
21
+
22
+ for key, field_schema in properties.items():
23
+ assert isinstance(field_schema, dict)
24
+ field_schema = cast(dict[str, Any], field_schema)
25
+
26
+ field_type = field_schema.get("type")
27
+ assert isinstance(field_type, str)
28
+
29
+ # Include boolean fields
30
+ if field_type == "boolean":
31
+ agreement_keys.append(key)
32
+ # Include integer fields
33
+ elif field_type == "integer":
34
+ agreement_keys.append(key)
35
+ # Include enum fields (even strings)
36
+ elif "enum" in field_schema:
37
+ agreement_keys.append(key)
38
+
39
+ return agreement_keys
40
+
41
+
42
+ def find_modal_result(indep_results: list[dict[str, Any]], agreement_keys: list[str]):
43
+ """Find the result that best matches modal values across agreement keys.
44
+
45
+ Args:
46
+ indep_results: List of independent results to analyze
47
+ agreement_keys: Keys to measure agreement on
48
+
49
+ Returns:
50
+ Tuple of (max_idx, agt_key_modes_and_counts) where:
51
+ - max_idx is the index of the result that best matches modal values
52
+ - agt_key_modes_and_counts maps each key to (modal_value, count) or None if no values exist for that key
53
+
54
+ Raises:
55
+ ValueError: If no results are provided
56
+ """
57
+ if not indep_results:
58
+ raise ValueError("No results to score")
59
+
60
+ # For each agreement key, compute the mode and count (or None, if no values exist for that key)
61
+ agt_key_modes_and_counts: dict[str, tuple[str | bool | int, int] | None] = {}
62
+ for key in agreement_keys:
63
+ key_modes = Counter(v for r in indep_results if (v := r.get(key)) is not None)
64
+ if most_common_one := key_modes.most_common(1):
65
+ agt_key_modes_and_counts[key] = most_common_one[0]
66
+ else:
67
+ agt_key_modes_and_counts[key] = None
68
+
69
+ # Score each rollout based on how many agreement keys they match
70
+ # If there is no mode for a key, or if a certain result doesn't have that key, it doesn't count.
71
+ # TODO(mengk): This may bias towards results that have more keys.
72
+ indep_result_scores: list[int] = []
73
+ for r in indep_results:
74
+ score = 0
75
+ for key in agreement_keys:
76
+ mode_and_count = agt_key_modes_and_counts[key]
77
+ if mode_and_count and r.get(key) == mode_and_count[0]:
78
+ score += 1
79
+ indep_result_scores.append(score)
80
+
81
+ # Argmax
82
+ max_idx = indep_result_scores.index(max(indep_result_scores))
83
+
84
+ return max_idx, agt_key_modes_and_counts
docent/trace.py CHANGED
@@ -226,7 +226,7 @@ class DocentTracer:
226
226
  try:
227
227
 
228
228
  # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
229
- default_attribute_limit = 1024
229
+ default_attribute_limit = 1024 * 16
230
230
  env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
231
231
  env_limit = int(env_value) if env_value.isdigit() else 0
232
232
  attribute_limit = max(env_limit, default_attribute_limit)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.20a0
3
+ Version: 0.1.21a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -9,8 +9,12 @@ Author-email: Transluce <info@transluce.org>
9
9
  License-Expression: Apache-2.0
10
10
  License-File: LICENSE.md
11
11
  Requires-Python: >=3.11
12
+ Requires-Dist: anthropic>=0.47.0
12
13
  Requires-Dist: backoff>=2.2.1
14
+ Requires-Dist: google-genai>=1.16.1
13
15
  Requires-Dist: inspect-ai>=0.3.132
16
+ Requires-Dist: jsonschema>=4.24.0
17
+ Requires-Dist: openai>=1.68.0
14
18
  Requires-Dist: opentelemetry-api>=1.34.1
15
19
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
16
20
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
@@ -20,6 +24,7 @@ Requires-Dist: opentelemetry-instrumentation-langchain>=0.40.14
20
24
  Requires-Dist: opentelemetry-instrumentation-openai>=0.40.14
21
25
  Requires-Dist: opentelemetry-instrumentation-threading>=0.55b1
22
26
  Requires-Dist: opentelemetry-sdk>=1.34.1
27
+ Requires-Dist: orjson>=3.11.3
23
28
  Requires-Dist: pydantic>=2.11.7
24
29
  Requires-Dist: pyyaml>=6.0.2
25
30
  Requires-Dist: tiktoken>=0.7.0
@@ -0,0 +1,58 @@
1
+ docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
2
+ docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ docent/trace.py,sha256=_JvDmtWVFARPYvXsNx8-RKRdev4mMxNK6iq9AARzoJE,66362
4
+ docent/trace_2.py,sha256=-OxzXF2kOFkhto1UGXHWVM797EN_BT_uwDSbzgMme8o,67145
5
+ docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
6
+ docent/_llm_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ docent/_llm_util/llm_cache.py,sha256=p8pJ-B1vyJQlNn48ce1Pqv2gLocBVD6fZLPsd2VC5LA,6386
8
+ docent/_llm_util/model_registry.py,sha256=8Y4VwrA2f2EX78cG1VBIBHVvT_p4qqBTdu9a9zJpfTo,3382
9
+ docent/_llm_util/prod_llms.py,sha256=HuGOg5Bhnpk_TijC3mOH8CTRIBy2C8w0_SebiEouNoE,16859
10
+ docent/_llm_util/data_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ docent/_llm_util/data_models/exceptions.py,sha256=IW4BVMVp8r5TufNXyrhy3acgwJiQQQPQjB9VA4RVXw8,1489
12
+ docent/_llm_util/data_models/llm_output.py,sha256=fuYPJ-SwxZjB4XGATA6XpLyc42Ix-kXHgBqFr_jPhK8,10123
13
+ docent/_llm_util/data_models/simple_svc.py,sha256=0twuXP6aEU-jYY0obDSEgjT2lDSJCuZG_NgiqEzZIPM,2881
14
+ docent/_llm_util/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ docent/_llm_util/providers/anthropic.py,sha256=-1oPd5FB4aFwKSmNvXzG8PVewjhgsogLRX1SCpnCxoA,18720
16
+ docent/_llm_util/providers/common.py,sha256=dgcTuU4XkCKoAaM48UW8zMgRYUzj7TDBhvWqtnxBO7g,1166
17
+ docent/_llm_util/providers/google.py,sha256=2D9mDgenZW0pt0_V7koX-aoZzpl8jo8xE5EWOLK7I0k,20314
18
+ docent/_llm_util/providers/openai.py,sha256=4niQV9CNaJ-iiEwYG0BSFxCwcsCAWZz0JuUs4wBKu9M,25904
19
+ docent/_llm_util/providers/openrouter.py,sha256=sT2onpeQ1gAwJLjkQbzD2RodJWTm013Q-siTXezca10,11958
20
+ docent/_llm_util/providers/preference_types.py,sha256=z-TOxj_es1_cs5DzknZaganGyjMkmh5NgtiDYKMRI1I,3751
21
+ docent/_llm_util/providers/provider_registry.py,sha256=EPYGQlegYPtg4ogEusCftm_5PZP-_XVKH1qg3xjPFTU,6337
22
+ docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
23
+ docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
24
+ docent/data_models/__init__.py,sha256=bE_Wy4Ql-9-0ZPcolMCPHhYvaE_Ug6h-jV7wOJ_DAi0,399
25
+ docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
26
+ docent/data_models/agent_run.py,sha256=7_37I9aS9rhDTkAvMPwoJGssQldvvKte8qVb93EnAiY,19329
27
+ docent/data_models/citation.py,sha256=2_M1-_olVOJtjCGGFx1GIwGYWl0ILHxRsW8-EFDS9j0,7844
28
+ docent/data_models/judge.py,sha256=zPbTqztn-yWu6tgD3R5JTyGnNiDhY6cWQ-gz3e_eM5k,340
29
+ docent/data_models/metadata_util.py,sha256=E-EClAP5vVm9xbfTlPSz0tUyCalOfN9Jujd6JGoRnBg,487
30
+ docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
31
+ docent/data_models/remove_invalid_citation_ranges.py,sha256=3RSMsOzFO2cSjkxI549TAo12qdvD-AGHd05Jxu0amvs,6282
32
+ docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
33
+ docent/data_models/transcript.py,sha256=Hkj9-rQfRk6ywICpwM4P1vgpTJ_7T3jxjFtodfFcwPw,20087
34
+ docent/data_models/util.py,sha256=dK0dviDkDe8PiNCZisryctT-dzScWenLXQi6DOk9Ts4,7194
35
+ docent/data_models/chat/__init__.py,sha256=ws77P3raDiOv6XesAMycUwu-uT75D5f9aNgjFeJbUH8,631
36
+ docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
37
+ docent/data_models/chat/message.py,sha256=_72xeTdgv8ogQd4WLl1P3yXfIDkIEQrHlWgdvObeQxY,4291
38
+ docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
39
+ docent/judges/__init__.py,sha256=Sob1uxJRgmr2S2sz4J6skHP8iqcVoiUq7Jlh8S5Sj9Y,462
40
+ docent/judges/impl.py,sha256=qiItNKWPvB0KlB5b0rQoIfT-7m1xzyI028WtgvgvRhU,8864
41
+ docent/judges/types.py,sha256=NlLv42iLDORbPAHppCz-YWZ6ksR4QYDWAweGw75izJ0,8439
42
+ docent/judges/util/forgiving_json.py,sha256=zSh0LF3UVHdSjuMNvEiqUmSxpxPaqK1rSLiI6KCNihg,3549
43
+ docent/judges/util/meta_schema.json,sha256=g3MUa_6e38I3GqZryy8b1w_Y9Krx2xSiWIuaG8Zpszc,2055
44
+ docent/judges/util/meta_schema.py,sha256=6IrIRHERJ6tkRcUtUShJ84I68yUJgkwfFeBjgt42qEA,930
45
+ docent/judges/util/parse_output.py,sha256=qvqt7TEnrAqvzYHqip48boMQSUcoGa-1PA1gIGn-w4s,3381
46
+ docent/judges/util/voting.py,sha256=cAty9b4w7M1OWeW-j8t6vxpZn7VXyE3aBL9Ex2ERKcU,3071
47
+ docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
48
+ docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
49
+ docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
50
+ docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
51
+ docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
52
+ docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ docent/sdk/agent_run_writer.py,sha256=0AWdxejoqZyuj9JSA39WlEwGcMSYTWNqnzIuluySY-M,11043
54
+ docent/sdk/client.py,sha256=K1NVkj_CFj0q-2mSFvWfh8NTqXqosED--dv5aLD7yOE,18239
55
+ docent_python-0.1.21a0.dist-info/METADATA,sha256=H0iEQ39cv90MW0lRZ94XhER6C_znvNU3DBPc6M72i9g,1277
56
+ docent_python-0.1.21a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ docent_python-0.1.21a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
58
+ docent_python-0.1.21a0.dist-info/RECORD,,
@@ -1,34 +0,0 @@
1
- docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
2
- docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docent/trace.py,sha256=CEDT7StBE6DaEffWEiW-Lntx5OxNmv9vyXFGI9UlW28,66357
4
- docent/trace_2.py,sha256=-OxzXF2kOFkhto1UGXHWVM797EN_BT_uwDSbzgMme8o,67145
5
- docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
6
- docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,73
7
- docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
8
- docent/data_models/__init__.py,sha256=bE_Wy4Ql-9-0ZPcolMCPHhYvaE_Ug6h-jV7wOJ_DAi0,399
9
- docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
10
- docent/data_models/agent_run.py,sha256=7_37I9aS9rhDTkAvMPwoJGssQldvvKte8qVb93EnAiY,19329
11
- docent/data_models/citation.py,sha256=2_M1-_olVOJtjCGGFx1GIwGYWl0ILHxRsW8-EFDS9j0,7844
12
- docent/data_models/judge.py,sha256=zPbTqztn-yWu6tgD3R5JTyGnNiDhY6cWQ-gz3e_eM5k,340
13
- docent/data_models/metadata_util.py,sha256=E-EClAP5vVm9xbfTlPSz0tUyCalOfN9Jujd6JGoRnBg,487
14
- docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
15
- docent/data_models/remove_invalid_citation_ranges.py,sha256=3RSMsOzFO2cSjkxI549TAo12qdvD-AGHd05Jxu0amvs,6282
16
- docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
17
- docent/data_models/transcript.py,sha256=7cdj2KAO_e2k3rj7OPzJzmzrkxPHIW7fbHygKTr7EZg,19940
18
- docent/data_models/util.py,sha256=dK0dviDkDe8PiNCZisryctT-dzScWenLXQi6DOk9Ts4,7194
19
- docent/data_models/chat/__init__.py,sha256=ws77P3raDiOv6XesAMycUwu-uT75D5f9aNgjFeJbUH8,631
20
- docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
21
- docent/data_models/chat/message.py,sha256=_72xeTdgv8ogQd4WLl1P3yXfIDkIEQrHlWgdvObeQxY,4291
22
- docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
23
- docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
24
- docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
25
- docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
26
- docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
27
- docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
28
- docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- docent/sdk/agent_run_writer.py,sha256=0AWdxejoqZyuj9JSA39WlEwGcMSYTWNqnzIuluySY-M,11043
30
- docent/sdk/client.py,sha256=K1NVkj_CFj0q-2mSFvWfh8NTqXqosED--dv5aLD7yOE,18239
31
- docent_python-0.1.20a0.dist-info/METADATA,sha256=1XprRqUJ22jFi1WF_4X06nI3rL_fvQLQz7cjGqRXz-s,1114
32
- docent_python-0.1.20a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
33
- docent_python-0.1.20a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
34
- docent_python-0.1.20a0.dist-info/RECORD,,