docent-python 0.1.41a0__tar.gz → 0.1.43a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/.gitignore +3 -0
  2. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/PKG-INFO +1 -1
  3. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/data_models/exceptions.py +17 -0
  4. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/llm_svc.py +0 -37
  5. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/model_registry.py +16 -0
  6. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/openai.py +1 -1
  7. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/preference_types.py +4 -0
  8. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/_tiktoken_util.py +2 -2
  9. docent_python-0.1.43a0/docent/data_models/agent_run.py +605 -0
  10. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/citation.py +17 -1
  11. docent_python-0.1.43a0/docent/data_models/transcript.py +394 -0
  12. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/impl.py +115 -53
  13. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/runner.py +6 -7
  14. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/types.py +7 -4
  15. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/sdk/client.py +118 -0
  16. docent_python-0.1.43a0/docent/sdk/llm_context.py +575 -0
  17. docent_python-0.1.43a0/docent/trace copy.py +2991 -0
  18. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/trace.py +16 -1
  19. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/pyproject.toml +1 -1
  20. docent_python-0.1.41a0/docent/data_models/agent_run.py +0 -473
  21. docent_python-0.1.41a0/docent/data_models/transcript.py +0 -305
  22. docent_python-0.1.41a0/docent/sdk/llm_context.py +0 -432
  23. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/LICENSE.md +0 -0
  24. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/README.md +0 -0
  25. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/__init__.py +0 -0
  26. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/__init__.py +0 -0
  27. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/data_models/__init__.py +0 -0
  28. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  29. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/llm_cache.py +0 -0
  30. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/__init__.py +0 -0
  31. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/anthropic.py +0 -0
  32. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/common.py +0 -0
  33. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/google.py +0 -0
  34. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/openrouter.py +0 -0
  35. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  36. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_log_util/__init__.py +0 -0
  37. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/_log_util/logger.py +0 -0
  38. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/__init__.py +0 -0
  39. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/chat/__init__.py +0 -0
  40. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/chat/content.py +0 -0
  41. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/chat/message.py +0 -0
  42. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/chat/tool.py +0 -0
  43. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/formatted_objects.py +0 -0
  44. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/judge.py +0 -0
  45. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/metadata_util.py +0 -0
  46. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/regex.py +0 -0
  47. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/data_models/util.py +0 -0
  48. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/__init__.py +0 -0
  49. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/analysis.py +0 -0
  50. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/stats.py +0 -0
  51. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/util/forgiving_json.py +0 -0
  52. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/util/meta_schema.json +0 -0
  53. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/util/meta_schema.py +0 -0
  54. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/util/parse_output.py +0 -0
  55. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/judges/util/voting.py +0 -0
  56. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/loaders/load_inspect.py +0 -0
  57. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/py.typed +0 -0
  58. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/samples/__init__.py +0 -0
  59. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/samples/load.py +0 -0
  60. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/samples/log.eval +0 -0
  61. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/samples/tb_airline.json +0 -0
  62. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/sdk/__init__.py +0 -0
  63. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/sdk/agent_run_writer.py +0 -0
  64. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/docent/trace_temp.py +0 -0
  65. {docent_python-0.1.41a0 → docent_python-0.1.43a0}/uv.lock +0 -0
@@ -198,3 +198,6 @@ inspect_evals
198
198
 
199
199
  # test data cache
200
200
  data/cache
201
+
202
+ # dont commit package lock, force use of bun lock
203
+ package-lock.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.41a0
3
+ Version: 0.1.43a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -1,7 +1,24 @@
1
+ from typing import Any
2
+
3
+
1
4
  class LLMException(Exception):
2
5
  error_type_id = "other"
3
6
  user_message = "The model failed to respond. Please try again later."
4
7
 
8
+ def serialize(self) -> dict[str, Any]:
9
+ data: dict[str, Any] = {
10
+ "type": self.__class__.__name__,
11
+ "user_message": getattr(self, "user_message", None),
12
+ "error_type_id": getattr(self, "error_type_id", None),
13
+ }
14
+ if failed_output := getattr(self, "failed_output", None):
15
+ data["failed_output"] = str(failed_output)
16
+ return data
17
+
18
+ @classmethod
19
+ def serialize_llm_errors(cls, errors: list["LLMException"]) -> list[dict[str, Any]]:
20
+ return [error.serialize() for error in errors]
21
+
5
22
 
6
23
  class CompletionTooLongException(LLMException):
7
24
  error_type_id = "completion_too_long"
@@ -433,40 +433,3 @@ class BaseLLMService:
433
433
  break
434
434
 
435
435
  return outputs
436
-
437
-
438
- async def get_llm_completions_async(
439
- inputs: list[MessagesInput],
440
- model_options: list[ModelOption],
441
- tools: list[ToolInfo] | None = None,
442
- tool_choice: Literal["auto", "required"] | None = None,
443
- max_new_tokens: int = 1024,
444
- temperature: float = 1.0,
445
- logprobs: bool = False,
446
- top_logprobs: int | None = None,
447
- timeout: float = 120.0,
448
- streaming_callback: AsyncLLMOutputStreamingCallback | None = None,
449
- validation_callback: AsyncLLMOutputStreamingCallback | None = None,
450
- completion_callback: AsyncLLMOutputStreamingCallback | None = None,
451
- use_cache: bool = False,
452
- _api_key_overrides: dict[str, str] = dict(),
453
- ) -> list[LLMOutput]:
454
- """Convenience method for backward compatibility"""
455
-
456
- svc = BaseLLMService()
457
- return await svc.get_completions(
458
- inputs=inputs,
459
- model_options=model_options,
460
- tools=tools,
461
- tool_choice=tool_choice,
462
- max_new_tokens=max_new_tokens,
463
- temperature=temperature,
464
- logprobs=logprobs,
465
- top_logprobs=top_logprobs,
466
- timeout=timeout,
467
- streaming_callback=streaming_callback,
468
- validation_callback=validation_callback,
469
- completion_callback=completion_callback,
470
- use_cache=use_cache,
471
- _api_key_overrides=_api_key_overrides,
472
- )
@@ -78,11 +78,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
78
78
  ),
79
79
  (
80
80
  "gemini-2.5-pro",
81
+ # TODO(mengk, ryan): this is wrong for prompts > 200k
81
82
  ModelInfo(
82
83
  rate={"input": 1.25, "output": 10.00},
83
84
  context_window=1_000_000,
84
85
  ),
85
86
  ),
87
+ (
88
+ "gemini-3-pro-preview",
89
+ # TODO(mengk, ryan): this is wrong for prompts > 200k
90
+ ModelInfo(
91
+ rate={"input": 2.00, "output": 12.00},
92
+ context_window=1_048_576,
93
+ ),
94
+ ),
95
+ (
96
+ "gemini-3-flash-preview",
97
+ ModelInfo(
98
+ rate={"input": 0.50, "output": 3.00},
99
+ context_window=1_048_576,
100
+ ),
101
+ ),
86
102
  (
87
103
  "grok-4-fast",
88
104
  ModelInfo(
@@ -465,7 +465,7 @@ def chunk_and_tokenize(
465
465
  chunk_to_doc: list[int] = []
466
466
 
467
467
  for i, item in enumerate(text):
468
- tokens = encoding.encode(item)
468
+ tokens = encoding.encode(item, disallowed_special=())
469
469
  if len(tokens) <= window_size:
470
470
  chunks = [tokens]
471
471
  else:
@@ -98,6 +98,10 @@ class PublicProviderPreferences(BaseModel):
98
98
  model_name="claude-sonnet-4-5",
99
99
  reasoning_effort="medium",
100
100
  ),
101
+ ModelOption(
102
+ provider="google",
103
+ model_name="gemini-3-flash-preview",
104
+ ),
101
105
  ]
102
106
 
103
107
 
@@ -6,13 +6,13 @@ MAX_TOKENS = 100_000
6
6
  def get_token_count(text: str, model: str = "gpt-4") -> int:
7
7
  """Get the number of tokens in a text under the GPT-4 tokenization scheme."""
8
8
  encoding = tiktoken.encoding_for_model(model)
9
- return len(encoding.encode(text))
9
+ return len(encoding.encode(text, disallowed_special=()))
10
10
 
11
11
 
12
12
  def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
13
13
  """Truncate text to stay within the specified token limit."""
14
14
  encoding = tiktoken.encoding_for_model(model)
15
- tokens = encoding.encode(text)
15
+ tokens = encoding.encode(text, disallowed_special=())
16
16
 
17
17
  if len(tokens) <= max_tokens:
18
18
  return text