docent-python 0.1.21a0__py3-none-any.whl → 0.1.22a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/judges/impl.py +11 -1
- docent/judges/util/voting.py +35 -5
- docent/trace.py +18 -0
- {docent_python-0.1.21a0.dist-info → docent_python-0.1.22a0.dist-info}/METADATA +2 -1
- {docent_python-0.1.21a0.dist-info → docent_python-0.1.22a0.dist-info}/RECORD +7 -7
- {docent_python-0.1.21a0.dist-info → docent_python-0.1.22a0.dist-info}/WHEEL +0 -0
- {docent_python-0.1.21a0.dist-info → docent_python-0.1.22a0.dist-info}/licenses/LICENSE.md +0 -0
docent/judges/impl.py
CHANGED
|
@@ -8,7 +8,11 @@ from docent._log_util import get_logger
|
|
|
8
8
|
from docent.data_models.agent_run import AgentRun
|
|
9
9
|
from docent.judges.types import JudgeResult, ResultType, Rubric
|
|
10
10
|
from docent.judges.util.parse_output import parse_and_validate_llm_output
|
|
11
|
-
from docent.judges.util.voting import
|
|
11
|
+
from docent.judges.util.voting import (
|
|
12
|
+
compute_output_distribution,
|
|
13
|
+
find_modal_result,
|
|
14
|
+
get_agreement_keys,
|
|
15
|
+
)
|
|
12
16
|
|
|
13
17
|
logger = get_logger(__name__)
|
|
14
18
|
|
|
@@ -74,6 +78,11 @@ class MajorityVotingJudge(BaseJudge):
|
|
|
74
78
|
)
|
|
75
79
|
final_output = indep_results[final_max_idx]
|
|
76
80
|
|
|
81
|
+
# Compute the distribution of the output across the agreement keys
|
|
82
|
+
final_output_distribution = compute_output_distribution(
|
|
83
|
+
indep_results, self.cfg.output_schema, agreement_keys
|
|
84
|
+
)
|
|
85
|
+
|
|
77
86
|
return JudgeResult(
|
|
78
87
|
agent_run_id=agent_run.id,
|
|
79
88
|
rubric_id=self.cfg.id,
|
|
@@ -85,6 +94,7 @@ class MajorityVotingJudge(BaseJudge):
|
|
|
85
94
|
"final_results": indep_results,
|
|
86
95
|
"final_agt_key_modes_and_counts": final_agt_key_modes_and_counts,
|
|
87
96
|
"final_max_idx": final_max_idx,
|
|
97
|
+
"final_output_distribution": final_output_distribution,
|
|
88
98
|
},
|
|
89
99
|
result_type=ResultType.DIRECT_RESULT,
|
|
90
100
|
)
|
docent/judges/util/voting.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any, cast
|
|
|
5
5
|
def get_agreement_keys(schema: dict[str, Any]) -> list[str]:
|
|
6
6
|
"""Get list of top-level keys in schema that we want to measure agreement on.
|
|
7
7
|
|
|
8
|
-
This includes enum
|
|
8
|
+
This includes enum and bool fields.
|
|
9
9
|
|
|
10
10
|
Args:
|
|
11
11
|
schema: JSON schema dict
|
|
@@ -29,10 +29,7 @@ def get_agreement_keys(schema: dict[str, Any]) -> list[str]:
|
|
|
29
29
|
# Include boolean fields
|
|
30
30
|
if field_type == "boolean":
|
|
31
31
|
agreement_keys.append(key)
|
|
32
|
-
# Include
|
|
33
|
-
elif field_type == "integer":
|
|
34
|
-
agreement_keys.append(key)
|
|
35
|
-
# Include enum fields (even strings)
|
|
32
|
+
# Include enum fields (strings and numbers must be in this category)
|
|
36
33
|
elif "enum" in field_schema:
|
|
37
34
|
agreement_keys.append(key)
|
|
38
35
|
|
|
@@ -82,3 +79,36 @@ def find_modal_result(indep_results: list[dict[str, Any]], agreement_keys: list[
|
|
|
82
79
|
max_idx = indep_result_scores.index(max(indep_result_scores))
|
|
83
80
|
|
|
84
81
|
return max_idx, agt_key_modes_and_counts
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def compute_output_distribution(
|
|
85
|
+
indep_results: list[dict[str, Any]], output_schema: dict[str, Any], agreement_keys: list[str]
|
|
86
|
+
):
|
|
87
|
+
def _get_possible_values(key: str) -> list[str | bool | int | float]:
|
|
88
|
+
if "enum" in output_schema.get("properties", {}).get(key, {}):
|
|
89
|
+
return output_schema.get("properties", {}).get(key, {}).get("enum", [])
|
|
90
|
+
elif output_schema.get("properties", {}).get(key, {}).get("type") == "boolean":
|
|
91
|
+
return [True, False]
|
|
92
|
+
else:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
distributions: dict[str, dict[str | bool | int | float, float]] = {
|
|
96
|
+
key: {value: 0.0 for value in _get_possible_values(key)} for key in agreement_keys
|
|
97
|
+
}
|
|
98
|
+
# Collect counts for each possible value
|
|
99
|
+
for result in indep_results:
|
|
100
|
+
for key in agreement_keys:
|
|
101
|
+
if (value := result.get(key)) is not None: # Could be none if the key is optional
|
|
102
|
+
assert (
|
|
103
|
+
value in distributions[key]
|
|
104
|
+
), "this should never happen; the value must be in possible values, since judge results have been validated against the schema"
|
|
105
|
+
distributions[key][value] += 1
|
|
106
|
+
# Normalize
|
|
107
|
+
for key in distributions:
|
|
108
|
+
total = sum(distributions[key].values())
|
|
109
|
+
if total == 0:
|
|
110
|
+
continue
|
|
111
|
+
for value in distributions[key]:
|
|
112
|
+
distributions[key][value] /= total
|
|
113
|
+
|
|
114
|
+
return distributions
|
docent/trace.py
CHANGED
|
@@ -43,6 +43,7 @@ class Instruments(Enum):
|
|
|
43
43
|
ANTHROPIC = "anthropic"
|
|
44
44
|
BEDROCK = "bedrock"
|
|
45
45
|
LANGCHAIN = "langchain"
|
|
46
|
+
GOOGLE_GENERATIVEAI = "google_generativeai"
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class DocentTracer:
|
|
@@ -392,6 +393,23 @@ class DocentTracer:
|
|
|
392
393
|
except Exception as e:
|
|
393
394
|
logger.warning(f"Failed to instrument LangChain: {e}")
|
|
394
395
|
|
|
396
|
+
# Instrument Google Generative AI with our isolated tracer provider
|
|
397
|
+
if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
|
|
398
|
+
try:
|
|
399
|
+
if is_package_installed("google-generativeai") or is_package_installed(
|
|
400
|
+
"google-genai"
|
|
401
|
+
):
|
|
402
|
+
from opentelemetry.instrumentation.google_generativeai import (
|
|
403
|
+
GoogleGenerativeAiInstrumentor,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
GoogleGenerativeAiInstrumentor().instrument(
|
|
407
|
+
tracer_provider=self._tracer_provider
|
|
408
|
+
)
|
|
409
|
+
logger.info("Instrumented Google Generative AI")
|
|
410
|
+
except Exception as e:
|
|
411
|
+
logger.warning(f"Failed to instrument Google Generative AI: {e}")
|
|
412
|
+
|
|
395
413
|
# Register cleanup handlers
|
|
396
414
|
self._register_cleanup()
|
|
397
415
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docent-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.22a0
|
|
4
4
|
Summary: Docent SDK
|
|
5
5
|
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
|
6
6
|
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
|
@@ -20,6 +20,7 @@ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
|
|
|
20
20
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
21
21
|
Requires-Dist: opentelemetry-instrumentation-anthropic>=0.40.14
|
|
22
22
|
Requires-Dist: opentelemetry-instrumentation-bedrock>=0.40.14
|
|
23
|
+
Requires-Dist: opentelemetry-instrumentation-google-generativeai>=0.40.14
|
|
23
24
|
Requires-Dist: opentelemetry-instrumentation-langchain>=0.40.14
|
|
24
25
|
Requires-Dist: opentelemetry-instrumentation-openai>=0.40.14
|
|
25
26
|
Requires-Dist: opentelemetry-instrumentation-threading>=0.55b1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
|
|
2
2
|
docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
docent/trace.py,sha256=
|
|
3
|
+
docent/trace.py,sha256=u_1M_B1ncCR5a0Yy5ppQY_0k0AFYe4UhSIGeTuZCH0w,67271
|
|
4
4
|
docent/trace_2.py,sha256=-OxzXF2kOFkhto1UGXHWVM797EN_BT_uwDSbzgMme8o,67145
|
|
5
5
|
docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
|
|
6
6
|
docent/_llm_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -37,13 +37,13 @@ docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPei
|
|
|
37
37
|
docent/data_models/chat/message.py,sha256=_72xeTdgv8ogQd4WLl1P3yXfIDkIEQrHlWgdvObeQxY,4291
|
|
38
38
|
docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZyw,3049
|
|
39
39
|
docent/judges/__init__.py,sha256=Sob1uxJRgmr2S2sz4J6skHP8iqcVoiUq7Jlh8S5Sj9Y,462
|
|
40
|
-
docent/judges/impl.py,sha256=
|
|
40
|
+
docent/judges/impl.py,sha256=Cqu-qJDfCnca7CZ_TgDoMPt6DijNv-SjK2RwExRo-9I,9199
|
|
41
41
|
docent/judges/types.py,sha256=NlLv42iLDORbPAHppCz-YWZ6ksR4QYDWAweGw75izJ0,8439
|
|
42
42
|
docent/judges/util/forgiving_json.py,sha256=zSh0LF3UVHdSjuMNvEiqUmSxpxPaqK1rSLiI6KCNihg,3549
|
|
43
43
|
docent/judges/util/meta_schema.json,sha256=g3MUa_6e38I3GqZryy8b1w_Y9Krx2xSiWIuaG8Zpszc,2055
|
|
44
44
|
docent/judges/util/meta_schema.py,sha256=6IrIRHERJ6tkRcUtUShJ84I68yUJgkwfFeBjgt42qEA,930
|
|
45
45
|
docent/judges/util/parse_output.py,sha256=qvqt7TEnrAqvzYHqip48boMQSUcoGa-1PA1gIGn-w4s,3381
|
|
46
|
-
docent/judges/util/voting.py,sha256=
|
|
46
|
+
docent/judges/util/voting.py,sha256=I0Ti0eP7DirdmEQlz-5plrvURUZGmJw0o9yGFgIzE9k,4377
|
|
47
47
|
docent/loaders/load_inspect.py,sha256=VLrtpvcVZ44n2DIPMwUivXqbvOWjaooGw6moY8UQ0VE,6789
|
|
48
48
|
docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
|
|
49
49
|
docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
|
|
@@ -52,7 +52,7 @@ docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5
|
|
|
52
52
|
docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
docent/sdk/agent_run_writer.py,sha256=0AWdxejoqZyuj9JSA39WlEwGcMSYTWNqnzIuluySY-M,11043
|
|
54
54
|
docent/sdk/client.py,sha256=K1NVkj_CFj0q-2mSFvWfh8NTqXqosED--dv5aLD7yOE,18239
|
|
55
|
-
docent_python-0.1.
|
|
56
|
-
docent_python-0.1.
|
|
57
|
-
docent_python-0.1.
|
|
58
|
-
docent_python-0.1.
|
|
55
|
+
docent_python-0.1.22a0.dist-info/METADATA,sha256=4JAnMdDEsuaTCe48gr3wfNqU1SmdxrdAr1mfBQ3WtGQ,1351
|
|
56
|
+
docent_python-0.1.22a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
docent_python-0.1.22a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
|
|
58
|
+
docent_python-0.1.22a0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|