docent-python 0.1.21a0__tar.gz → 0.1.22a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (60) hide show
  1. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/PKG-INFO +2 -1
  2. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/impl.py +11 -1
  3. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/util/voting.py +35 -5
  4. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/trace.py +18 -0
  5. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/pyproject.toml +3 -2
  6. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/.gitignore +0 -0
  7. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/LICENSE.md +0 -0
  8. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/README.md +0 -0
  9. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/__init__.py +0 -0
  10. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/__init__.py +0 -0
  11. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/data_models/__init__.py +0 -0
  12. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  13. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  14. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/data_models/simple_svc.py +0 -0
  15. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/llm_cache.py +0 -0
  16. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/model_registry.py +0 -0
  17. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/prod_llms.py +0 -0
  18. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/__init__.py +0 -0
  19. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/anthropic.py +0 -0
  20. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/common.py +0 -0
  21. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/google.py +0 -0
  22. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/openai.py +0 -0
  23. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/openrouter.py +0 -0
  24. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/preference_types.py +0 -0
  25. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  26. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_log_util/__init__.py +0 -0
  27. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/_log_util/logger.py +0 -0
  28. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/__init__.py +0 -0
  29. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/_tiktoken_util.py +0 -0
  30. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/agent_run.py +0 -0
  31. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/chat/__init__.py +0 -0
  32. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/chat/content.py +0 -0
  33. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/chat/message.py +0 -0
  34. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/chat/tool.py +0 -0
  35. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/citation.py +0 -0
  36. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/judge.py +0 -0
  37. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/metadata_util.py +0 -0
  38. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/regex.py +0 -0
  39. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/remove_invalid_citation_ranges.py +0 -0
  40. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/shared_types.py +0 -0
  41. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/transcript.py +0 -0
  42. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/data_models/util.py +0 -0
  43. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/__init__.py +0 -0
  44. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/types.py +0 -0
  45. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/util/forgiving_json.py +0 -0
  46. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/util/meta_schema.json +0 -0
  47. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/util/meta_schema.py +0 -0
  48. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/judges/util/parse_output.py +0 -0
  49. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/loaders/load_inspect.py +0 -0
  50. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/py.typed +0 -0
  51. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/samples/__init__.py +0 -0
  52. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/samples/load.py +0 -0
  53. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/samples/log.eval +0 -0
  54. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/samples/tb_airline.json +0 -0
  55. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/sdk/__init__.py +0 -0
  56. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/sdk/agent_run_writer.py +0 -0
  57. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/sdk/client.py +0 -0
  58. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/trace_2.py +0 -0
  59. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/docent/trace_temp.py +0 -0
  60. {docent_python-0.1.21a0 → docent_python-0.1.22a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.21a0
3
+ Version: 0.1.22a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -20,6 +20,7 @@ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
20
20
  Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
21
21
  Requires-Dist: opentelemetry-instrumentation-anthropic>=0.40.14
22
22
  Requires-Dist: opentelemetry-instrumentation-bedrock>=0.40.14
23
+ Requires-Dist: opentelemetry-instrumentation-google-generativeai>=0.40.14
23
24
  Requires-Dist: opentelemetry-instrumentation-langchain>=0.40.14
24
25
  Requires-Dist: opentelemetry-instrumentation-openai>=0.40.14
25
26
  Requires-Dist: opentelemetry-instrumentation-threading>=0.55b1
@@ -8,7 +8,11 @@ from docent._log_util import get_logger
8
8
  from docent.data_models.agent_run import AgentRun
9
9
  from docent.judges.types import JudgeResult, ResultType, Rubric
10
10
  from docent.judges.util.parse_output import parse_and_validate_llm_output
11
- from docent.judges.util.voting import find_modal_result, get_agreement_keys
11
+ from docent.judges.util.voting import (
12
+ compute_output_distribution,
13
+ find_modal_result,
14
+ get_agreement_keys,
15
+ )
12
16
 
13
17
  logger = get_logger(__name__)
14
18
 
@@ -74,6 +78,11 @@ class MajorityVotingJudge(BaseJudge):
74
78
  )
75
79
  final_output = indep_results[final_max_idx]
76
80
 
81
+ # Compute the distribution of the output across the agreement keys
82
+ final_output_distribution = compute_output_distribution(
83
+ indep_results, self.cfg.output_schema, agreement_keys
84
+ )
85
+
77
86
  return JudgeResult(
78
87
  agent_run_id=agent_run.id,
79
88
  rubric_id=self.cfg.id,
@@ -85,6 +94,7 @@ class MajorityVotingJudge(BaseJudge):
85
94
  "final_results": indep_results,
86
95
  "final_agt_key_modes_and_counts": final_agt_key_modes_and_counts,
87
96
  "final_max_idx": final_max_idx,
97
+ "final_output_distribution": final_output_distribution,
88
98
  },
89
99
  result_type=ResultType.DIRECT_RESULT,
90
100
  )
@@ -5,7 +5,7 @@ from typing import Any, cast
5
5
  def get_agreement_keys(schema: dict[str, Any]) -> list[str]:
6
6
  """Get list of top-level keys in schema that we want to measure agreement on.
7
7
 
8
- This includes enum, bool, and int fields. We skip float and strings.
8
+ This includes enum and bool fields.
9
9
 
10
10
  Args:
11
11
  schema: JSON schema dict
@@ -29,10 +29,7 @@ def get_agreement_keys(schema: dict[str, Any]) -> list[str]:
29
29
  # Include boolean fields
30
30
  if field_type == "boolean":
31
31
  agreement_keys.append(key)
32
- # Include integer fields
33
- elif field_type == "integer":
34
- agreement_keys.append(key)
35
- # Include enum fields (even strings)
32
+ # Include enum fields (strings and numbers must be in this category)
36
33
  elif "enum" in field_schema:
37
34
  agreement_keys.append(key)
38
35
 
@@ -82,3 +79,36 @@ def find_modal_result(indep_results: list[dict[str, Any]], agreement_keys: list[
82
79
  max_idx = indep_result_scores.index(max(indep_result_scores))
83
80
 
84
81
  return max_idx, agt_key_modes_and_counts
82
+
83
+
84
+ def compute_output_distribution(
85
+ indep_results: list[dict[str, Any]], output_schema: dict[str, Any], agreement_keys: list[str]
86
+ ):
87
+ def _get_possible_values(key: str) -> list[str | bool | int | float]:
88
+ if "enum" in output_schema.get("properties", {}).get(key, {}):
89
+ return output_schema.get("properties", {}).get(key, {}).get("enum", [])
90
+ elif output_schema.get("properties", {}).get(key, {}).get("type") == "boolean":
91
+ return [True, False]
92
+ else:
93
+ return []
94
+
95
+ distributions: dict[str, dict[str | bool | int | float, float]] = {
96
+ key: {value: 0.0 for value in _get_possible_values(key)} for key in agreement_keys
97
+ }
98
+ # Collect counts for each possible value
99
+ for result in indep_results:
100
+ for key in agreement_keys:
101
+ if (value := result.get(key)) is not None: # Could be none if the key is optional
102
+ assert (
103
+ value in distributions[key]
104
+ ), "this should never happen; the value must be in possible values, since judge results have been validated against the schema"
105
+ distributions[key][value] += 1
106
+ # Normalize
107
+ for key in distributions:
108
+ total = sum(distributions[key].values())
109
+ if total == 0:
110
+ continue
111
+ for value in distributions[key]:
112
+ distributions[key][value] /= total
113
+
114
+ return distributions
@@ -43,6 +43,7 @@ class Instruments(Enum):
43
43
  ANTHROPIC = "anthropic"
44
44
  BEDROCK = "bedrock"
45
45
  LANGCHAIN = "langchain"
46
+ GOOGLE_GENERATIVEAI = "google_generativeai"
46
47
 
47
48
 
48
49
  class DocentTracer:
@@ -392,6 +393,23 @@ class DocentTracer:
392
393
  except Exception as e:
393
394
  logger.warning(f"Failed to instrument LangChain: {e}")
394
395
 
396
+ # Instrument Google Generative AI with our isolated tracer provider
397
+ if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
398
+ try:
399
+ if is_package_installed("google-generativeai") or is_package_installed(
400
+ "google-genai"
401
+ ):
402
+ from opentelemetry.instrumentation.google_generativeai import (
403
+ GoogleGenerativeAiInstrumentor,
404
+ )
405
+
406
+ GoogleGenerativeAiInstrumentor().instrument(
407
+ tracer_provider=self._tracer_provider
408
+ )
409
+ logger.info("Instrumented Google Generative AI")
410
+ except Exception as e:
411
+ logger.warning(f"Failed to instrument Google Generative AI: {e}")
412
+
395
413
  # Register cleanup handlers
396
414
  self._register_cleanup()
397
415
 
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.21-alpha"
4
+ version = "0.1.22-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]
@@ -28,11 +28,12 @@ dependencies = [
28
28
  "opentelemetry-exporter-otlp-proto-grpc>=1.34.1",
29
29
  "opentelemetry-exporter-otlp-proto-http>=1.34.1",
30
30
  # Instrumentation
31
+ "opentelemetry-instrumentation-threading>=0.55b1",
31
32
  "opentelemetry-instrumentation-anthropic>=0.40.14",
32
33
  "opentelemetry-instrumentation-bedrock>=0.40.14",
33
34
  "opentelemetry-instrumentation-langchain>=0.40.14",
34
35
  "opentelemetry-instrumentation-openai>=0.40.14",
35
- "opentelemetry-instrumentation-threading>=0.55b1",
36
+ "opentelemetry-instrumentation-google-generativeai>=0.40.14",
36
37
  ]
37
38
 
38
39
  [build-system]