docent-python 0.1.51a0__tar.gz → 0.1.53a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/PKG-INFO +1 -1
  2. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/feedback.py +32 -8
  3. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/client.py +69 -10
  4. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/pyproject.toml +1 -1
  5. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/.gitignore +0 -0
  6. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/LICENSE.md +0 -0
  7. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/README.md +0 -0
  8. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/__init__.py +0 -0
  9. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/__init__.py +0 -0
  10. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/data_models/__init__.py +0 -0
  11. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/data_models/exceptions.py +0 -0
  12. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/data_models/llm_output.py +0 -0
  13. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/llm_cache.py +0 -0
  14. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/llm_svc.py +0 -0
  15. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/model_registry.py +0 -0
  16. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/__init__.py +0 -0
  17. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/anthropic.py +0 -0
  18. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/common.py +0 -0
  19. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/google.py +0 -0
  20. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/openai.py +0 -0
  21. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/openrouter.py +0 -0
  22. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/preference_types.py +0 -0
  23. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_llm_util/providers/provider_registry.py +0 -0
  24. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_log_util/__init__.py +0 -0
  25. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/_log_util/logger.py +0 -0
  26. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/__init__.py +0 -0
  27. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/_tiktoken_util.py +0 -0
  28. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/agent_run.py +0 -0
  29. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/chat/__init__.py +0 -0
  30. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/chat/content.py +0 -0
  31. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/chat/message.py +0 -0
  32. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/chat/response_format.py +0 -0
  33. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/chat/tool.py +0 -0
  34. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/citation.py +0 -0
  35. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/formatted_objects.py +0 -0
  36. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/judge.py +0 -0
  37. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/metadata_util.py +0 -0
  38. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/regex.py +0 -0
  39. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/transcript.py +0 -0
  40. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/util.py +0 -0
  41. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/__init__.py +0 -0
  42. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/analysis.py +0 -0
  43. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/impl.py +0 -0
  44. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/runner.py +0 -0
  45. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/stats.py +0 -0
  46. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/types.py +0 -0
  47. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/forgiving_json.py +0 -0
  48. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/meta_schema.json +0 -0
  49. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/meta_schema.py +0 -0
  50. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/parse_output.py +0 -0
  51. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/template_formatter.py +0 -0
  52. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/judges/util/voting.py +0 -0
  53. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/loaders/load_inspect.py +0 -0
  54. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/mcp/__init__.py +0 -0
  55. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/mcp/__main__.py +0 -0
  56. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/mcp/server.py +0 -0
  57. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/py.typed +0 -0
  58. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/samples/__init__.py +0 -0
  59. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/samples/load.py +0 -0
  60. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/samples/log.eval +0 -0
  61. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/samples/tb_airline.json +0 -0
  62. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/__init__.py +0 -0
  63. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/agent_run_writer.py +0 -0
  64. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/llm_context.py +0 -0
  65. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/llm_request.py +0 -0
  66. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/trace.py +0 -0
  67. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/trace_temp.py +0 -0
  68. {docent_python-0.1.51a0 → docent_python-0.1.53a0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.51a0
3
+ Version: 0.1.53a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -101,11 +101,7 @@ class QAPair(BaseModel):
101
101
  """A single review-focus answer captured for one run."""
102
102
 
103
103
  # What the user was shown
104
- focus_item: LabelingRequestFocusItem
105
-
106
- # Whether the user selected a sample answer or not
107
- selected_sample_index: int | None = None
108
- is_custom_response: bool = False
104
+ focus_index: int
109
105
 
110
106
  # What the user responded
111
107
  answer: str
@@ -115,9 +111,15 @@ class QAPair(BaseModel):
115
111
  status: Literal["answered", "skipped"]
116
112
  timestamp: datetime = Field(default_factory=datetime.now)
117
113
 
118
- def to_str(self, indent: int = 0) -> str:
114
+ def to_str(self, labeling_request: "LabelingRequest", indent: int = 0) -> str:
119
115
  """Render QA pair in a deterministic LLM-facing format."""
120
- lines = self.focus_item.to_str(indent=indent).splitlines()
116
+ if self.focus_index < 0 or self.focus_index >= len(labeling_request.review_focus):
117
+ raise ValueError(
118
+ f"focus_index={self.focus_index} is out of bounds for review_focus length "
119
+ f"{len(labeling_request.review_focus)}"
120
+ )
121
+ focus_item = labeling_request.review_focus[self.focus_index]
122
+ lines = focus_item.to_str(indent=indent).splitlines()
121
123
  lines.append(f"User answer: {_text_or_na(self.answer)}")
122
124
  lines.append(f"User explanation: {_text_or_na(self.explanation)}")
123
125
  return "\n".join(lines)
@@ -204,6 +206,7 @@ class LabeledRun(BaseModel):
204
206
  class AgentRunFeedbackContext(BaseModel):
205
207
  """All feedback collected for a single agent run."""
206
208
 
209
+ feedback_context_id: str | None = None
207
210
  agent_run_id: str
208
211
  round: int
209
212
  created_at: datetime = Field(default_factory=datetime.now)
@@ -232,7 +235,10 @@ class AgentRunFeedbackContext(BaseModel):
232
235
  qa_lines.append("N/A")
233
236
  else:
234
237
  for qa_idx, qa_pair in enumerate(self.qa_pairs, start=1):
235
- qa_entry_lines = qa_pair.to_str(indent=indent).splitlines()
238
+ qa_entry_lines = qa_pair.to_str(
239
+ labeling_request=self.labeling_request,
240
+ indent=indent,
241
+ ).splitlines()
236
242
  qa_lines.extend(_tag_block(f"QA {qa_idx}", qa_entry_lines, indent))
237
243
  lines.extend(_tag_block("Question Answer Pairs", qa_lines, indent))
238
244
 
@@ -280,6 +286,24 @@ class FeedbackContextsResponse(BaseModel):
280
286
  contexts: list[FeedbackContext] = Field(default_factory=list[FeedbackContext])
281
287
 
282
288
 
289
+ FeedbackJobStatus = Literal["pending", "running", "cancelling", "canceled", "completed"]
290
+
291
+
292
+ class StartFeedbackContextsJobResponse(BaseModel):
293
+ """Response for enqueueing or reusing a feedback contexts job."""
294
+
295
+ job_id: str
296
+
297
+
298
+ class FeedbackContextsJobStateResponse(BaseModel):
299
+ """Current feedback contexts job status and round-scoped contexts."""
300
+
301
+ job_id: str | None
302
+ job_status: FeedbackJobStatus | None
303
+ current_round: int
304
+ contexts: list[FeedbackContext] = Field(default_factory=list[FeedbackContext])
305
+
306
+
283
307
  class UserData(BaseModel):
284
308
  """User Data (U) for user-context inference and downstream evaluation."""
285
309
 
@@ -21,7 +21,11 @@ from tqdm import tqdm
21
21
  from docent._llm_util.providers.preference_types import ModelOption
22
22
  from docent._log_util.logger import LoggerAdapter, get_logger
23
23
  from docent.data_models.agent_run import AgentRun
24
- from docent.data_models.feedback import AgentRunFeedbackContext, FeedbackContextsResponse
24
+ from docent.data_models.feedback import (
25
+ AgentRunFeedbackContext,
26
+ FeedbackContextsJobStateResponse,
27
+ StartFeedbackContextsJobResponse,
28
+ )
25
29
  from docent.data_models.judge import Label
26
30
  from docent.judges.util.meta_schema import validate_judge_result_schema
27
31
  from docent.loaders import load_inspect
@@ -878,6 +882,44 @@ class Docent:
878
882
  llm_svc = BaseLLMService() # reads API keys from environment
879
883
  return build_judge(rubric, llm_svc)
880
884
 
885
+ def start_rubric_eval_job(
886
+ self,
887
+ collection_id: str,
888
+ rubric_id: str,
889
+ max_agent_runs: int | None = None,
890
+ n_rollouts_per_input: int = 1,
891
+ max_parallel: int | None = None,
892
+ ) -> str:
893
+ """Start or reuse a rubric evaluation job.
894
+
895
+ Args:
896
+ collection_id: ID of the Collection.
897
+ rubric_id: The ID of the rubric to evaluate.
898
+ max_agent_runs: Optional limit on the number of agent runs to evaluate.
899
+ n_rollouts_per_input: Number of judge rollouts to generate per agent run.
900
+ max_parallel: Optional backend concurrency override for the evaluation job.
901
+
902
+ Returns:
903
+ str: The ID of the created or reused job.
904
+
905
+ Raises:
906
+ requests.exceptions.HTTPError: If the API request fails.
907
+ ValueError: If the response does not contain a job ID.
908
+ """
909
+ url = f"{self._api_url}/rubric/{collection_id}/{rubric_id}/evaluate"
910
+ payload = {
911
+ "max_agent_runs": max_agent_runs,
912
+ "n_rollouts_per_input": n_rollouts_per_input,
913
+ "max_parallel": max_parallel,
914
+ }
915
+ response = self._session.post(url, json=payload)
916
+ self._handle_response_errors(response)
917
+
918
+ job_id = response.json().get("job_id")
919
+ if job_id is None:
920
+ raise ValueError("Failed to start rubric eval job: 'job_id' missing in response.")
921
+ return job_id
922
+
881
923
  def get_rubric_run_state(
882
924
  self,
883
925
  collection_id: str,
@@ -886,7 +928,7 @@ class Docent:
886
928
  filter_dict: dict[str, Any] | None = None,
887
929
  include_failures: bool = False,
888
930
  ) -> dict[str, Any]:
889
- """Get rubric run state for a given collection and rubric.
931
+ """Get rubric evaluation results and progress for a collection/rubric.
890
932
 
891
933
  Args:
892
934
  collection_id: ID of the Collection.
@@ -900,6 +942,10 @@ class Docent:
900
942
 
901
943
  Raises:
902
944
  requests.exceptions.HTTPError: If the API request fails.
945
+
946
+ Note:
947
+ This method does not start evaluation. Use `start_rubric_eval_job()` to
948
+ enqueue or reuse a rubric evaluation job.
903
949
  """
904
950
  url = f"{self._api_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
905
951
  body = {
@@ -979,7 +1025,7 @@ class Docent:
979
1025
  self._handle_response_errors(response)
980
1026
  return response.json()["feedback_session_id"]
981
1027
 
982
- def get_feedback_contexts(
1028
+ def start_feedback_contexts_job(
983
1029
  self,
984
1030
  collection_id: str,
985
1031
  feedback_session_id: str,
@@ -989,8 +1035,8 @@ class Docent:
989
1035
  candidate_pool_limit: int = 1_000,
990
1036
  where_clause: str | None = None,
991
1037
  increment_round: bool = False,
992
- ) -> FeedbackContextsResponse:
993
- """Generate or fetch feedback contexts for the current session round."""
1038
+ ) -> StartFeedbackContextsJobResponse:
1039
+ """Start or reuse a background job to compute feedback contexts for a session."""
994
1040
  payload = {
995
1041
  "feedback_session_id": feedback_session_id,
996
1042
  "num_samples": num_samples,
@@ -1000,10 +1046,24 @@ class Docent:
1000
1046
  "where_clause": where_clause,
1001
1047
  "increment_round": increment_round,
1002
1048
  }
1003
- url = f"{self._api_url}/feedback/{collection_id}/contexts"
1049
+ url = f"{self._api_url}/feedback/{collection_id}/contexts/start"
1050
+ response = self._session.post(url, json=payload)
1051
+ self._handle_response_errors(response)
1052
+ return StartFeedbackContextsJobResponse.model_validate(response.json())
1053
+
1054
+ def get_feedback_contexts(
1055
+ self,
1056
+ collection_id: str,
1057
+ feedback_session_id: str,
1058
+ ) -> FeedbackContextsJobStateResponse:
1059
+ """Get feedback contexts state for a session, including job status and current round data."""
1060
+ payload = {
1061
+ "feedback_session_id": feedback_session_id,
1062
+ }
1063
+ url = f"{self._api_url}/feedback/{collection_id}/contexts/state"
1004
1064
  response = self._session.post(url, json=payload)
1005
1065
  self._handle_response_errors(response)
1006
- return FeedbackContextsResponse.model_validate(response.json())
1066
+ return FeedbackContextsJobStateResponse.model_validate(response.json())
1007
1067
 
1008
1068
  def get_agent_run_feedback_contexts_by_session(
1009
1069
  self,
@@ -1012,9 +1072,8 @@ class Docent:
1012
1072
  ) -> list[AgentRunFeedbackContext]:
1013
1073
  """Get all persisted AgentRun feedback contexts for a feedback session.
1014
1074
 
1015
- Unlike `get_feedback_contexts`, this method only reads existing session data and
1016
- returns fully hydrated AgentRun feedback context objects from the database,
1017
- including QA pairs and label (if present).
1075
+ Returns fully hydrated AgentRun feedback context objects from the database,
1076
+ including QA pairs and labels (if present), across all rounds in the session.
1018
1077
  """
1019
1078
  url = f"{self._api_url}/feedback/{collection_id}/session/{feedback_session_id}/contexts"
1020
1079
  response = self._session.get(url)
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "docent-python"
3
3
  description = "Docent SDK"
4
- version = "0.1.51-alpha"
4
+ version = "0.1.53-alpha"
5
5
  authors = [
6
6
  { name="Transluce", email="info@transluce.org" },
7
7
  ]