docent-python 0.1.9a0__py3-none-any.whl → 0.1.11a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

@@ -90,19 +90,36 @@ class AgentRun(BaseModel):
90
90
  raise ValueError("AgentRun must have at least one transcript")
91
91
  return self
92
92
 
93
- def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
93
+ def _to_text_impl(self, token_limit: int = sys.maxsize, use_blocks: bool = False) -> list[str]:
94
94
  """
95
- Represents an agent run as a list of strings, each of which is at most token_limit tokens
96
- under the GPT-4 tokenization scheme.
95
+ Core implementation for converting agent run to text representation.
97
96
 
98
- We'll try to split up long AgentRuns along transcript boundaries and include metadata.
99
- For very long transcripts, we'll have to split them up further and remove metadata.
97
+ Args:
98
+ token_limit: Maximum tokens per returned string under the GPT-4 tokenization scheme
99
+ use_blocks: If True, use individual message blocks. If False, use action units.
100
+
101
+ Returns:
102
+ List of strings, each at most token_limit tokens
100
103
  """
104
+ # Generate transcript strings using appropriate method
105
+ transcript_strs: list[str] = []
106
+ for i, (t_key, t) in enumerate(self.transcripts.items()):
107
+ if use_blocks:
108
+ transcript_content = t.to_str_blocks_with_token_limit(
109
+ token_limit=sys.maxsize,
110
+ transcript_idx=i,
111
+ agent_run_idx=None,
112
+ )[0]
113
+ else:
114
+ transcript_content = t.to_str_with_token_limit(
115
+ token_limit=sys.maxsize,
116
+ transcript_idx=i,
117
+ agent_run_idx=None,
118
+ )[0]
119
+ transcript_strs.append(
120
+ f"<transcript {t_key}>\n{transcript_content}\n</transcript {t_key}>"
121
+ )
101
122
 
102
- transcript_strs: list[str] = [
103
- f"<transcript {t_key}>\n{t.to_str(agent_run_idx=None, transcript_idx=i)}\n</transcript {t_key}>"
104
- for i, (t_key, t) in enumerate(self.transcripts.items())
105
- ]
106
123
  transcripts_str = "\n\n".join(transcript_strs)
107
124
 
108
125
  # Gather metadata
@@ -128,7 +145,6 @@ class AgentRun(BaseModel):
128
145
  return [f"{transcripts_str}" f"{metadata_str}"]
129
146
 
130
147
  # Otherwise, split up the transcript and metadata into chunks
131
- # TODO(vincent, mengk): does this code account for multiple transcripts correctly? a little confused.
132
148
  else:
133
149
  results: list[str] = []
134
150
  transcript_token_counts = [get_token_count(t) for t in transcript_strs]
@@ -150,13 +166,23 @@ class AgentRun(BaseModel):
150
166
  ), "Ranges without metadata should be a single message"
151
167
  t_id, t = list(self.transcripts.items())[msg_range.start]
152
168
  if msg_range.num_tokens < token_limit - 50:
153
- transcript = f"<transcript {t_id}>\n{t.to_str()}\n</transcript {t_id}>"
169
+ if use_blocks:
170
+ transcript = f"<transcript {t_id}>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript {t_id}>"
171
+ else:
172
+ transcript = f"<transcript {t_id}>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript {t_id}>"
154
173
  result = (
155
174
  f"Here is a partial agent run for analysis purposes only:\n{transcript}"
156
175
  )
157
176
  results.append(result)
158
177
  else:
159
- transcript_fragments = t.to_str_with_token_limit(token_limit - 50)
178
+ if use_blocks:
179
+ transcript_fragments = t.to_str_blocks_with_token_limit(
180
+ token_limit=token_limit - 50,
181
+ )
182
+ else:
183
+ transcript_fragments = t.to_str_with_token_limit(
184
+ token_limit=token_limit - 50,
185
+ )
160
186
  for fragment in transcript_fragments:
161
187
  result = f"<transcript {t_id}>\n{fragment}\n</transcript {t_id}>"
162
188
  result = (
@@ -165,6 +191,26 @@ class AgentRun(BaseModel):
165
191
  results.append(result)
166
192
  return results
167
193
 
194
+ def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
195
+ """
196
+ Represents an agent run as a list of strings, each of which is at most token_limit tokens
197
+ under the GPT-4 tokenization scheme.
198
+
199
+ We'll try to split up long AgentRuns along transcript boundaries and include metadata.
200
+ For very long transcripts, we'll have to split them up further and remove metadata.
201
+ """
202
+ return self._to_text_impl(token_limit=token_limit, use_blocks=False)
203
+
204
+ def to_text_blocks(self, token_limit: int = sys.maxsize) -> list[str]:
205
+ """
206
+ Represents an agent run as a list of strings using individual message blocks,
207
+ each of which is at most token_limit tokens under the GPT-4 tokenization scheme.
208
+
209
+ Unlike to_text() which uses action units, this method formats each message
210
+ as an individual block.
211
+ """
212
+ return self._to_text_impl(token_limit=token_limit, use_blocks=True)
213
+
168
214
  @property
169
215
  def text(self) -> str:
170
216
  """Concatenates all transcript texts with double newlines as separators.
@@ -172,7 +218,16 @@ class AgentRun(BaseModel):
172
218
  Returns:
173
219
  str: A string representation of all transcripts.
174
220
  """
175
- return self.to_text()[0]
221
+ return self._to_text_impl(token_limit=sys.maxsize, use_blocks=False)[0]
222
+
223
+ @property
224
+ def text_blocks(self) -> str:
225
+ """Concatenates all transcript texts using individual blocks format.
226
+
227
+ Returns:
228
+ str: A string representation of all transcripts using individual message blocks.
229
+ """
230
+ return self._to_text_impl(token_limit=sys.maxsize, use_blocks=True)[0]
176
231
 
177
232
  def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
178
233
  """Extends the parent model_dump method to include the text property.
@@ -5,6 +5,7 @@ from pydantic import BaseModel, Discriminator
5
5
 
6
6
  from docent.data_models.chat.content import Content
7
7
  from docent.data_models.chat.tool import ToolCall
8
+ from docent.data_models.citation import Citation
8
9
 
9
10
  logger = getLogger(__name__)
10
11
 
@@ -66,11 +67,15 @@ class AssistantMessage(BaseChatMessage):
66
67
  role: Always set to "assistant".
67
68
  model: Optional identifier for the model that generated this message.
68
69
  tool_calls: Optional list of tool calls made by the assistant.
70
+ citations: Optional list of citations referenced in the message content.
71
+ suggested_messages: Optional list of suggested followup messages.
69
72
  """
70
73
 
71
74
  role: Literal["assistant"] = "assistant" # type: ignore
72
75
  model: str | None = None
73
76
  tool_calls: list[ToolCall] | None = None
77
+ citations: list[Citation] | None = None
78
+ suggested_messages: list[str] | None = None
74
79
 
75
80
 
76
81
  class ToolMessage(BaseChatMessage):
@@ -1,223 +1,152 @@
1
1
  import re
2
- from typing import TypedDict
3
2
 
3
+ from pydantic import BaseModel
4
4
 
5
- class Citation(TypedDict):
5
+
6
+ class Citation(BaseModel):
6
7
  start_idx: int
7
8
  end_idx: int
8
- agent_run_idx: int | None
9
- transcript_idx: int | None
9
+ agent_run_idx: int | None = None
10
+ transcript_idx: int | None = None
10
11
  block_idx: int
11
- action_unit_idx: int | None
12
+ action_unit_idx: int | None = None
13
+ start_pattern: str | None = None
12
14
 
13
15
 
14
- def parse_citations_single_run(text: str) -> list[Citation]:
15
- """
16
- Parse citations from text in the format described by SINGLE_BLOCK_CITE_INSTRUCTION.
16
+ RANGE_BEGIN = "<RANGE>"
17
+ RANGE_END = "</RANGE>"
17
18
 
18
- Supported formats:
19
- - Single block: [T<key>B<idx>]
20
- - Multiple blocks: [T<key1>B<idx1>, T<key2>B<idx2>, ...]
21
- - Dash-separated blocks: [T<key1>B<idx1>-T<key2>B<idx2>]
19
+ _SINGLE_RE = re.compile(r"T(\d+)B(\d+)")
20
+ _RANGE_CONTENT_RE = re.compile(r":\s*" + re.escape(RANGE_BEGIN) + r".*?" + re.escape(RANGE_END))
22
21
 
23
- Args:
24
- text: The text to parse citations from
25
22
 
26
- Returns:
27
- A list of Citation objects with start_idx and end_idx representing
28
- the character positions in the text (excluding brackets)
29
- """
30
- citations: list[Citation] = []
23
+ def _extract_range_pattern(range_part: str) -> str | None:
24
+ start_pattern: str | None = None
25
+
26
+ if RANGE_BEGIN in range_part and RANGE_END in range_part:
27
+ range_begin_idx = range_part.find(RANGE_BEGIN)
28
+ range_end_idx = range_part.find(RANGE_END)
29
+ if range_begin_idx != -1 and range_end_idx != -1:
30
+ range_content = range_part[range_begin_idx + len(RANGE_BEGIN) : range_end_idx]
31
+ start_pattern = range_content if range_content else None
32
+
33
+ return start_pattern
31
34
 
32
- # Find all bracketed content first
33
- bracket_pattern = r"\[(.*?)\]"
34
- bracket_matches = re.finditer(bracket_pattern, text)
35
-
36
- for bracket_match in bracket_matches:
37
- bracket_content = bracket_match.group(1)
38
- # Starting position of the bracket content (excluding '[')
39
- content_start_pos = bracket_match.start() + 1
40
-
41
- # Split by commas if present
42
- parts = [part.strip() for part in bracket_content.split(",")]
43
-
44
- for part in parts:
45
- # Check if this part contains a dash (range citation)
46
- if "-" in part:
47
- # Split by dash and process each sub-part
48
- dash_parts = [dash_part.strip() for dash_part in part.split("-")]
49
- for dash_part in dash_parts:
50
- # Check for single block citation: T<key>B<idx>
51
- single_match = re.match(r"T(\d+)B(\d+)", dash_part)
52
- if single_match:
53
- transcript_idx = int(single_match.group(1))
54
- block_idx = int(single_match.group(2))
55
-
56
- # Find position within the original text
57
- citation_text = f"T{transcript_idx}B{block_idx}"
58
- part_pos_in_content = bracket_content.find(dash_part)
59
- ref_pos = content_start_pos + part_pos_in_content
60
- ref_end = ref_pos + len(citation_text)
61
-
62
- # Check if this citation overlaps with any existing citation
63
- if not any(
64
- citation["start_idx"] <= ref_pos < citation["end_idx"]
65
- or citation["start_idx"] < ref_end <= citation["end_idx"]
66
- for citation in citations
67
- ):
68
- citations.append(
69
- Citation(
70
- start_idx=ref_pos,
71
- end_idx=ref_end,
72
- agent_run_idx=None,
73
- transcript_idx=transcript_idx,
74
- block_idx=block_idx,
75
- action_unit_idx=None,
76
- )
77
- )
35
+
36
+ def scan_brackets(text: str) -> list[tuple[int, int, str]]:
37
+ """Scan text for bracketed segments, respecting RANGE markers and nested brackets.
38
+
39
+ Returns a list of (start_index, end_index_exclusive, inner_content).
40
+ """
41
+ matches: list[tuple[int, int, str]] = []
42
+ i = 0
43
+ while i < len(text):
44
+ if text[i] == "[":
45
+ start = i
46
+ bracket_count = 1
47
+ j = i + 1
48
+ in_range = False
49
+
50
+ while j < len(text) and bracket_count > 0:
51
+ if text[j : j + len(RANGE_BEGIN)] == RANGE_BEGIN:
52
+ in_range = True
53
+ elif text[j : j + len(RANGE_END)] == RANGE_END:
54
+ in_range = False
55
+ elif text[j] == "[" and not in_range:
56
+ bracket_count += 1
57
+ elif text[j] == "]" and not in_range:
58
+ bracket_count -= 1
59
+ j += 1
60
+
61
+ if bracket_count == 0:
62
+ end_exclusive = j
63
+ bracket_content = text[start + 1 : end_exclusive - 1]
64
+ matches.append((start, end_exclusive, bracket_content))
65
+ i = j
78
66
  else:
79
- # Check for single block citation: T<key>B<idx>
80
- single_match = re.match(r"T(\d+)B(\d+)", part)
81
- if single_match:
82
- transcript_idx = int(single_match.group(1))
83
- block_idx = int(single_match.group(2))
84
-
85
- # Find position within the original text
86
- citation_text = f"T{transcript_idx}B{block_idx}"
87
- part_pos_in_content = bracket_content.find(part)
88
- ref_pos = content_start_pos + part_pos_in_content
89
- ref_end = ref_pos + len(citation_text)
90
-
91
- # Check if this citation overlaps with any existing citation
92
- if not any(
93
- citation["start_idx"] <= ref_pos < citation["end_idx"]
94
- or citation["start_idx"] < ref_end <= citation["end_idx"]
95
- for citation in citations
96
- ):
97
- citations.append(
98
- Citation(
99
- start_idx=ref_pos,
100
- end_idx=ref_end,
101
- agent_run_idx=None,
102
- transcript_idx=transcript_idx,
103
- block_idx=block_idx,
104
- action_unit_idx=None,
105
- )
106
- )
107
-
108
- return citations
109
-
110
-
111
- def parse_citations_multi_run(text: str) -> list[Citation]:
67
+ i += 1
68
+ else:
69
+ i += 1
70
+ return matches
71
+
72
+
73
+ def parse_single_citation(part: str) -> tuple[int, int, str | None] | None:
112
74
  """
113
- Parse citations from text in the format described by MULTI_BLOCK_CITE_INSTRUCTION.
75
+ Parse a single citation token inside a bracket and return its components.
76
+
77
+ Returns (transcript_idx, block_idx, start_pattern) or None if invalid.
78
+ """
79
+ token = part.strip()
80
+ if not token:
81
+ return None
82
+
83
+ if ":" in token:
84
+ citation_part, range_part = token.split(":", 1)
85
+ single_match = _SINGLE_RE.match(citation_part.strip())
86
+ if not single_match:
87
+ return None
88
+ transcript_idx = int(single_match.group(1))
89
+ block_idx = int(single_match.group(2))
90
+ start_pattern = _extract_range_pattern(range_part)
91
+ return transcript_idx, block_idx, start_pattern
92
+ else:
93
+ single_match = _SINGLE_RE.match(token)
94
+ if not single_match:
95
+ return None
96
+ transcript_idx = int(single_match.group(1))
97
+ block_idx = int(single_match.group(2))
98
+ return transcript_idx, block_idx, None
99
+
100
+
101
+ def parse_citations(text: str) -> tuple[str, list[Citation]]:
102
+ """
103
+ Parse citations from text in the format described by BLOCK_RANGE_CITE_INSTRUCTION.
114
104
 
115
105
  Supported formats:
116
- - Single block in transcript: [R<idx>T<key>B<idx>] or ([R<idx>T<key>B<idx>])
117
- - Multiple blocks: [R<idx1>T<key1>B<idx1>][R<idx2>T<key2>B<idx2>]
118
- - Comma-separated blocks: [R<idx1>T<key1>B<idx1>, R<idx2>T<key2>B<idx2>, ...]
119
- - Dash-separated blocks: [R<idx1>T<key1>B<idx1>-R<idx2>T<key2>B<idx2>]
106
+ - Single block: [T<key>B<idx>]
107
+ - Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
120
108
 
121
109
  Args:
122
110
  text: The text to parse citations from
123
111
 
124
112
  Returns:
125
- A list of Citation objects with start_idx and end_idx representing
126
- the character positions in the text (excluding brackets)
113
+ A tuple of (cleaned_text, citations) where cleaned_text has brackets and range markers removed
114
+ and citations have start_idx and end_idx representing character positions
115
+ in the cleaned text
127
116
  """
128
117
  citations: list[Citation] = []
129
-
130
- # Find all content within brackets - this handles nested brackets too
131
- bracket_pattern = r"\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]"
132
- # Also handle optional parentheses around the brackets
133
- paren_bracket_pattern = r"\(\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\)"
134
-
135
- # Single citation pattern
136
- single_pattern = r"R(\d+)T(\d+)B(\d+)"
137
-
138
- # Find all bracket matches
139
- for pattern in [bracket_pattern, paren_bracket_pattern]:
140
- matches = re.finditer(pattern, text)
141
- for match in matches:
142
- # Get the content inside brackets
143
- if pattern == bracket_pattern:
144
- content = match.group(1)
145
- start_pos = match.start() + 1 # +1 to skip the opening bracket
146
- else:
147
- content = match.group(1)
148
- start_pos = match.start() + 2 # +2 to skip the opening parenthesis and bracket
149
-
150
- # Split by comma if present
151
- items = [item.strip() for item in content.split(",")]
152
-
153
- for item in items:
154
- # Check if this item contains a dash (range citation)
155
- if "-" in item:
156
- # Split by dash and process each sub-item
157
- dash_items = [dash_item.strip() for dash_item in item.split("-")]
158
- for dash_item in dash_items:
159
- # Check for single citation
160
- single_match = re.match(single_pattern, dash_item)
161
- if single_match:
162
- agent_run_idx = int(single_match.group(1))
163
- transcript_idx = int(single_match.group(2))
164
- block_idx = int(single_match.group(3))
165
-
166
- # Calculate position in the original text
167
- citation_text = f"R{agent_run_idx}T{transcript_idx}B{block_idx}"
168
- citation_start = text.find(citation_text, start_pos)
169
- citation_end = citation_start + len(citation_text)
170
-
171
- # Move start_pos for the next item if there are more items
172
- start_pos = citation_end
173
-
174
- # Avoid duplicate citations
175
- if not any(
176
- citation["start_idx"] == citation_start
177
- and citation["end_idx"] == citation_end
178
- for citation in citations
179
- ):
180
- citations.append(
181
- Citation(
182
- start_idx=citation_start,
183
- end_idx=citation_end,
184
- agent_run_idx=agent_run_idx,
185
- transcript_idx=transcript_idx,
186
- block_idx=block_idx,
187
- action_unit_idx=None,
188
- )
189
- )
190
- else:
191
- # Check for single citation
192
- single_match = re.match(single_pattern, item)
193
- if single_match:
194
- agent_run_idx = int(single_match.group(1))
195
- transcript_idx = int(single_match.group(2))
196
- block_idx = int(single_match.group(3))
197
-
198
- # Calculate position in the original text
199
- citation_text = f"R{agent_run_idx}T{transcript_idx}B{block_idx}"
200
- citation_start = text.find(citation_text, start_pos)
201
- citation_end = citation_start + len(citation_text)
202
-
203
- # Move start_pos for the next item if there are more items
204
- start_pos = citation_end
205
-
206
- # Avoid duplicate citations
207
- if not any(
208
- citation["start_idx"] == citation_start
209
- and citation["end_idx"] == citation_end
210
- for citation in citations
211
- ):
212
- citations.append(
213
- Citation(
214
- start_idx=citation_start,
215
- end_idx=citation_end,
216
- agent_run_idx=agent_run_idx,
217
- transcript_idx=transcript_idx,
218
- block_idx=block_idx,
219
- action_unit_idx=None,
220
- )
221
- )
222
-
223
- return citations
118
+ cleaned_text = ""
119
+
120
+ bracket_matches = scan_brackets(text)
121
+
122
+ last_end = 0
123
+ for start, end, bracket_content in bracket_matches:
124
+ # Append non-bracket text segment as-is
125
+ cleaned_text += text[last_end:start]
126
+
127
+ # Parse a single citation token inside the bracket
128
+ parsed = parse_single_citation(bracket_content)
129
+ if parsed:
130
+ transcript_idx, block_idx, start_pattern = parsed
131
+ replacement = f"T{transcript_idx}B{block_idx}"
132
+ # Current absolute start position for this replacement in the cleaned text
133
+ start_idx = len(cleaned_text)
134
+ end_idx = start_idx + len(replacement)
135
+ citations.append(
136
+ Citation(
137
+ start_idx=start_idx,
138
+ end_idx=end_idx,
139
+ agent_run_idx=None,
140
+ transcript_idx=transcript_idx,
141
+ block_idx=block_idx,
142
+ action_unit_idx=None,
143
+ start_pattern=start_pattern,
144
+ )
145
+ )
146
+ cleaned_text += replacement
147
+ last_end = end
148
+
149
+ # Append any remaining tail after the last bracket
150
+ cleaned_text += text[last_end:]
151
+
152
+ return cleaned_text, citations
@@ -0,0 +1,166 @@
1
+ import re
2
+
3
+ from docent.data_models.agent_run import AgentRun
4
+ from docent.data_models.citation import Citation, parse_single_citation, scan_brackets
5
+ from docent.data_models.transcript import format_chat_message
6
+
7
+
8
+ def build_whitespace_flexible_regex(pattern: str) -> re.Pattern[str]:
9
+ """Build regex that is flexible with whitespace matching."""
10
+ out = ""
11
+ i = 0
12
+ while i < len(pattern):
13
+ ch = pattern[i]
14
+ if ch.isspace():
15
+ # Skip all consecutive whitespace
16
+ while i < len(pattern) and pattern[i].isspace():
17
+ i += 1
18
+ out += r"\s+"
19
+ continue
20
+ out += re.escape(ch)
21
+ i += 1
22
+ return re.compile(out, re.DOTALL)
23
+
24
+
25
+ def find_citation_matches_in_text(text: str, start_pattern: str) -> list[tuple[int, int]]:
26
+ """
27
+ Find all matches of a citation pattern in text.
28
+
29
+ Args:
30
+ text: The text to search in
31
+ start_pattern: The pattern to search for
32
+
33
+ Returns:
34
+ List of (start_index, end_index) tuples for matches
35
+ """
36
+ if not start_pattern:
37
+ return []
38
+
39
+ try:
40
+ regex = build_whitespace_flexible_regex(start_pattern)
41
+ matches: list[tuple[int, int]] = []
42
+
43
+ for match in regex.finditer(text):
44
+ if match.group().strip(): # Only count non-empty matches
45
+ matches.append((match.start(), match.end()))
46
+
47
+ return matches
48
+
49
+ except re.error:
50
+ return []
51
+
52
+
53
+ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) -> str | None:
54
+ """
55
+ Get the text content of a specific transcript block from an AgentRun,
56
+ using the same formatting as shown to LLMs via format_chat_message.
57
+
58
+ Args:
59
+ agent_run: The agent run containing transcript data
60
+ citation: Citation with transcript_idx and block_idx
61
+
62
+ Returns:
63
+ Text content of the specified block (including tool calls), or None if not found
64
+ """
65
+ if citation.transcript_idx is None:
66
+ return None
67
+
68
+ try:
69
+ transcript_keys = list(agent_run.transcripts.keys())
70
+ if citation.transcript_idx >= len(transcript_keys):
71
+ return None
72
+
73
+ transcript_key = transcript_keys[citation.transcript_idx]
74
+
75
+ transcript = agent_run.transcripts[transcript_key]
76
+ if citation.block_idx >= len(transcript.messages):
77
+ return None
78
+
79
+ message = transcript.messages[citation.block_idx]
80
+
81
+ # Use the same formatting function that generates content for LLMs
82
+ # This ensures consistent formatting between citation validation and LLM serialization
83
+ return format_chat_message(
84
+ message, citation.block_idx, citation.transcript_idx, citation.agent_run_idx
85
+ )
86
+
87
+ except (KeyError, IndexError, AttributeError):
88
+ return None
89
+
90
+
91
+ def validate_citation_text_range(agent_run: AgentRun, citation: Citation) -> bool:
92
+ """
93
+ Validate that a citation's text range exists in the referenced transcript.
94
+
95
+ Args:
96
+ agent_run: The agent run containing transcript data
97
+ citation: Citation to validate
98
+
99
+ Returns:
100
+ True if the citation's text range exists in the transcript, False otherwise
101
+ """
102
+ if not citation.start_pattern:
103
+ # Nothing to validate
104
+ return True
105
+
106
+ text = get_transcript_text_for_citation(agent_run, citation)
107
+ if text is None:
108
+ return False
109
+
110
+ matches = find_citation_matches_in_text(text, citation.start_pattern)
111
+
112
+ return len(matches) > 0
113
+
114
+
115
+ def remove_invalid_citation_ranges(text: str, agent_run: AgentRun) -> str:
116
+ """
117
+ Remove invalid citation ranges from chat message/judge result. We do this as a separate step before normal citation parsing.
118
+ Normal citation parsing happens every time we load chat/results from db,
119
+ but invalid ranges should never make it to the db.
120
+
121
+ Args:
122
+ text: Original text containing citations
123
+ agent_run: Agent run with transcript data
124
+
125
+ Returns:
126
+ Tuple of (cleaned_text, valid_citations)
127
+ """
128
+ # Find all bracket positions in the original text
129
+ bracket_matches = scan_brackets(text)
130
+ citations: list[Citation] = []
131
+
132
+ for start, end, bracket_content in bracket_matches:
133
+ # Parse this bracket content to get citation info
134
+ parsed = parse_single_citation(bracket_content)
135
+ if parsed:
136
+ transcript_idx, block_idx, start_pattern = parsed
137
+ # The citation spans from start to end in the original text
138
+ citation = Citation(
139
+ start_idx=start,
140
+ end_idx=end,
141
+ agent_run_idx=None,
142
+ transcript_idx=transcript_idx,
143
+ block_idx=block_idx,
144
+ action_unit_idx=None,
145
+ start_pattern=start_pattern,
146
+ )
147
+ citations.append(citation)
148
+
149
+ # Filter to only citations with text ranges that need validation
150
+ citations_to_validate = [c for c in citations if c.start_pattern]
151
+
152
+ # Sort citations by start_idx in reverse order to avoid index shifting issues
153
+ sorted_citations = sorted(citations_to_validate, key=lambda c: c.start_idx, reverse=True)
154
+
155
+ invalid_citations: list[Citation] = [
156
+ c for c in sorted_citations if not validate_citation_text_range(agent_run, c)
157
+ ]
158
+
159
+ # Remove invalid text ranges from citations in the original text
160
+ modified_text = text
161
+ for citation in invalid_citations:
162
+ citation_without_range = f"[T{citation.transcript_idx}B{citation.block_idx}]"
163
+ before = modified_text[: citation.start_idx]
164
+ after = modified_text[citation.end_idx :]
165
+ modified_text = before + citation_without_range + after
166
+ return modified_text
@@ -12,6 +12,7 @@ from docent.data_models._tiktoken_util import (
12
12
  truncate_to_token_limit,
13
13
  )
14
14
  from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
15
+ from docent.data_models.citation import RANGE_BEGIN, RANGE_END
15
16
 
16
17
  # Template for formatting individual transcript blocks
17
18
  TRANSCRIPT_BLOCK_TEMPLATE = """
@@ -21,10 +22,20 @@ TRANSCRIPT_BLOCK_TEMPLATE = """
21
22
  """.strip()
22
23
 
23
24
  # Instructions for citing single transcript blocks
24
- SINGLE_RUN_CITE_INSTRUCTION = "Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Use an inner dash to cite a range of blocks, like [T<idx1>B<idx1>-T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."
25
+ TEXT_RANGE_CITE_INSTRUCTION = f"""Anytime you quote the transcript, or refer to something that happened in the transcript, or make any claim about the transcript, add an inline citation. Each transcript and each block has a unique index. Cite the relevant indices in brackets. For example, to cite the entirety of transcript 0, block 1, write [T0B1].
25
26
 
26
- # Instructions for citing multiple transcript blocks
27
- MULTI_RUN_CITE_INSTRUCTION = "Each run, each transcript, and each block has a unique index. Cite the relevant indices in brackets when relevant, like [R<idx>T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [R<idx1>T<idx1>B<idx1>][R<idx2>T<idx2>B<idx2>]. Use an inner dash to cite a range of blocks, like [R<idx1>T<idx1>B<idx1>-R<idx2>T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."
27
+ A citation may include a specific range of text within a block. Use {RANGE_BEGIN} and {RANGE_END} to mark the specific range of text. Add it after the block ID separated by a colon. For example, to cite the part of transcript 0, block 1, where the agent says "I understand the task", write [T0B1:{RANGE_BEGIN}I understand the task{RANGE_END}]. Citations must follow this exact format. The markers {RANGE_BEGIN} and {RANGE_END} must be used ONLY inside the brackets of a citation.
28
+
29
+ Important notes:
30
+ - You must include the full content of the text range {RANGE_BEGIN} and {RANGE_END}, EXACTLY as it appears in the transcript, word-for-word, including any markers or punctuation that appear in the middle of the text.
31
+ - Citations must be as specific as possible. This means you should usually cite a specific text range within a block.
32
+ - A citation is not a quote. For brevity, text ranges will not be rendered inline. The user will have to click on the citation to see the full text range.
33
+ - Citations are self-contained. Do NOT label them as citation or evidence. Just insert the citation by itself at the appropriate place in the text.
34
+ - Citations must come immediately after the part of a claim that they support. This may be in the middle of a sentence.
35
+ - Each pair of brackets must contain only one citation. To cite multiple blocks, use multiple pairs of brackets, like [T0B0] [T0B1].
36
+ """
37
+
38
+ BLOCK_CITE_INSTRUCTION = f"""Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."""
28
39
 
29
40
 
30
41
  def format_chat_message(
@@ -291,66 +302,105 @@ class Transcript(BaseModel):
291
302
  agent_run_idx: int | None = None,
292
303
  highlight_action_unit: int | None = None,
293
304
  ) -> str:
294
- return self.to_str_with_token_limit(
305
+ return self._to_str_with_token_limit_impl(
295
306
  token_limit=sys.maxsize,
296
- agent_run_idx=agent_run_idx,
297
307
  transcript_idx=transcript_idx,
308
+ agent_run_idx=agent_run_idx,
309
+ use_action_units=True,
298
310
  highlight_action_unit=highlight_action_unit,
299
311
  )[0]
300
312
 
301
- def to_str_with_token_limit(
313
+ def _generate_formatted_blocks(
302
314
  self,
303
- token_limit: int,
304
315
  transcript_idx: int = 0,
305
316
  agent_run_idx: int | None = None,
317
+ use_action_units: bool = True,
306
318
  highlight_action_unit: int | None = None,
307
319
  ) -> list[str]:
308
- """Represents the transcript as a list of strings, each of which is at most token_limit tokens
309
- under the GPT-4 tokenization scheme.
320
+ """Generate formatted blocks for transcript representation.
310
321
 
311
- We'll try to split up long transcripts along message boundaries and include metadata.
312
- For very long messages, we'll have to truncate them and remove metadata.
322
+ Args:
323
+ transcript_idx: Index of the transcript
324
+ agent_run_idx: Optional agent run index
325
+ use_action_units: If True, group messages into action units. If False, use individual blocks.
326
+ highlight_action_unit: Optional action unit to highlight (only used with action units)
313
327
 
314
328
  Returns:
315
- list[str]: A list of strings, each of which is at most token_limit tokens
316
- under the GPT-4 tokenization scheme.
329
+ list[str]: List of formatted blocks
317
330
  """
318
- if highlight_action_unit is not None and not (
319
- 0 <= highlight_action_unit < len(self._units_of_action or [])
320
- ):
321
- raise ValueError(f"Invalid action unit index: {highlight_action_unit}")
322
-
323
- # Format blocks by units of action
324
- au_blocks: list[str] = []
325
- for unit_idx, unit in enumerate(self._units_of_action or []):
326
- unit_blocks: list[str] = []
327
- for msg_idx in unit:
328
- unit_blocks.append(
331
+ if use_action_units:
332
+ if highlight_action_unit is not None and not (
333
+ 0 <= highlight_action_unit < len(self._units_of_action or [])
334
+ ):
335
+ raise ValueError(f"Invalid action unit index: {highlight_action_unit}")
336
+
337
+ blocks: list[str] = []
338
+ for unit_idx, unit in enumerate(self._units_of_action or []):
339
+ unit_blocks: list[str] = []
340
+ for msg_idx in unit:
341
+ unit_blocks.append(
342
+ format_chat_message(
343
+ self.messages[msg_idx],
344
+ msg_idx,
345
+ transcript_idx,
346
+ agent_run_idx,
347
+ )
348
+ )
349
+
350
+ unit_content = "\n".join(unit_blocks)
351
+
352
+ # Add highlighting if requested
353
+ if highlight_action_unit and unit_idx == highlight_action_unit:
354
+ blocks_str_template = "<HIGHLIGHTED>\n{}\n</HIGHLIGHTED>"
355
+ else:
356
+ blocks_str_template = "{}"
357
+ blocks.append(
358
+ blocks_str_template.format(
359
+ f"<action unit {unit_idx}>\n{unit_content}\n</action unit {unit_idx}>"
360
+ )
361
+ )
362
+ else:
363
+ # Individual message blocks
364
+ blocks = []
365
+ for msg_idx, message in enumerate(self.messages):
366
+ blocks.append(
329
367
  format_chat_message(
330
- self.messages[msg_idx],
368
+ message,
331
369
  msg_idx,
332
370
  transcript_idx,
333
371
  agent_run_idx,
334
372
  )
335
373
  )
336
374
 
337
- unit_content = "\n".join(unit_blocks)
375
+ return blocks
338
376
 
339
- # Add highlighting if requested
340
- if highlight_action_unit and unit_idx == highlight_action_unit:
341
- blocks_str_template = "<HIGHLIGHTED>\n{}\n</HIGHLIGHTED>"
342
- else:
343
- blocks_str_template = "{}"
344
- au_blocks.append(
345
- blocks_str_template.format(
346
- f"<action unit {unit_idx}>\n{unit_content}\n</action unit {unit_idx}>"
347
- )
348
- )
349
- blocks_str = "\n".join(au_blocks)
377
+ def _to_str_with_token_limit_impl(
378
+ self,
379
+ token_limit: int,
380
+ transcript_idx: int = 0,
381
+ agent_run_idx: int | None = None,
382
+ use_action_units: bool = True,
383
+ highlight_action_unit: int | None = None,
384
+ ) -> list[str]:
385
+ """Core implementation for string representation with token limits.
386
+
387
+ Args:
388
+ token_limit: Maximum tokens per returned string
389
+ transcript_idx: Index of the transcript
390
+ agent_run_idx: Optional agent run index
391
+ use_action_units: If True, group messages into action units. If False, use individual blocks.
392
+ highlight_action_unit: Optional action unit to highlight (only used with action units)
393
+
394
+ Returns:
395
+ list[str]: List of strings, each within token limit
396
+ """
397
+ blocks = self._generate_formatted_blocks(
398
+ transcript_idx, agent_run_idx, use_action_units, highlight_action_unit
399
+ )
400
+ blocks_str = "\n".join(blocks)
350
401
 
351
402
  # Gather metadata
352
403
  metadata_obj = fake_model_dump(self.metadata)
353
-
354
404
  yaml_width = float("inf")
355
405
  block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
356
406
  metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
@@ -365,25 +415,75 @@ class Transcript(BaseModel):
365
415
  return [f"{block_str}" f"{metadata_str}"]
366
416
  else:
367
417
  results: list[str] = []
368
- block_token_counts = [get_token_count(block) for block in au_blocks]
418
+ block_token_counts = [get_token_count(block) for block in blocks]
369
419
  ranges = group_messages_into_ranges(
370
420
  block_token_counts, metadata_token_count, token_limit
371
421
  )
372
422
  for msg_range in ranges:
373
423
  if msg_range.include_metadata:
374
- cur_au_blocks = "\n".join(au_blocks[msg_range.start : msg_range.end])
375
- results.append(f"<blocks>\n{cur_au_blocks}\n</blocks>\n" f"{metadata_str}")
424
+ cur_blocks = "\n".join(blocks[msg_range.start : msg_range.end])
425
+ results.append(f"<blocks>\n{cur_blocks}\n</blocks>\n" f"{metadata_str}")
376
426
  else:
377
427
  assert (
378
428
  msg_range.end == msg_range.start + 1
379
429
  ), "Ranges without metadata should be a single message"
380
- result = str(au_blocks[msg_range.start])
430
+ result = str(blocks[msg_range.start])
381
431
  if msg_range.num_tokens > token_limit - 10:
382
432
  result = truncate_to_token_limit(result, token_limit - 10)
383
433
  results.append(f"<blocks>\n{result}\n</blocks>\n")
384
434
 
385
435
  return results
386
436
 
437
+ def to_str_blocks(
438
+ self,
439
+ transcript_idx: int = 0,
440
+ agent_run_idx: int | None = None,
441
+ ) -> str:
442
+ """Represents the transcript as a string using individual message blocks.
443
+
444
+ Unlike to_str() which groups messages into action units, this method
445
+ formats each message as an individual block.
446
+
447
+ Returns:
448
+ str: A string representation with individual message blocks.
449
+ """
450
+ return self._to_str_with_token_limit_impl(
451
+ token_limit=sys.maxsize,
452
+ transcript_idx=transcript_idx,
453
+ agent_run_idx=agent_run_idx,
454
+ use_action_units=False,
455
+ )[0]
456
+
457
+ def to_str_with_token_limit(
458
+ self,
459
+ token_limit: int,
460
+ transcript_idx: int = 0,
461
+ agent_run_idx: int | None = None,
462
+ highlight_action_unit: int | None = None,
463
+ ) -> list[str]:
464
+ """Represents the transcript as a list of strings using action units with token limit handling."""
465
+ return self._to_str_with_token_limit_impl(
466
+ token_limit=token_limit,
467
+ transcript_idx=transcript_idx,
468
+ agent_run_idx=agent_run_idx,
469
+ use_action_units=True,
470
+ highlight_action_unit=highlight_action_unit,
471
+ )
472
+
473
+ def to_str_blocks_with_token_limit(
474
+ self,
475
+ token_limit: int,
476
+ transcript_idx: int = 0,
477
+ agent_run_idx: int | None = None,
478
+ ) -> list[str]:
479
+ """Represents the transcript as individual blocks with token limit handling."""
480
+ return self._to_str_with_token_limit_impl(
481
+ token_limit=token_limit,
482
+ transcript_idx=transcript_idx,
483
+ agent_run_idx=agent_run_idx,
484
+ use_action_units=False,
485
+ )
486
+
387
487
 
388
488
  class TranscriptWithoutMetadataValidator(Transcript):
389
489
  """
docent/sdk/client.py CHANGED
@@ -196,7 +196,7 @@ class Docent:
196
196
  response.raise_for_status()
197
197
  return response.json()
198
198
 
199
- def list_searches(self, collection_id: str) -> list[dict[str, Any]]:
199
+ def list_rubrics(self, collection_id: str) -> list[dict[str, Any]]:
200
200
  """List all rubrics for a given collection.
201
201
 
202
202
  Args:
@@ -213,71 +213,73 @@ class Docent:
213
213
  response.raise_for_status()
214
214
  return response.json()
215
215
 
216
- def get_search_results(
217
- self, collection_id: str, rubric_id: str, rubric_version: int
218
- ) -> list[dict[str, Any]]:
219
- """Get rubric results for a given collection, rubric and version.
216
+ def get_rubric_run_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
217
+ """Get rubric run state for a given collection and rubric.
220
218
 
221
219
  Args:
222
220
  collection_id: ID of the Collection.
223
- rubric_id: The ID of the rubric to get results for.
224
- rubric_version: The version of the rubric to get results for.
221
+ rubric_id: The ID of the rubric to get run state for.
225
222
 
226
223
  Returns:
227
- list: List of dictionaries containing rubric result information.
224
+ dict: Dictionary containing rubric run state with results, job_id, and total_agent_runs.
228
225
 
229
226
  Raises:
230
227
  requests.exceptions.HTTPError: If the API request fails.
231
228
  """
232
- url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/results"
233
- response = self._session.get(url, params={"rubric_version": rubric_version})
229
+ url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
230
+ response = self._session.get(url)
234
231
  response.raise_for_status()
235
232
  return response.json()
236
233
 
237
- def list_search_clusters(
238
- self, collection_id: str, rubric_id: str, rubric_version: int | None = None
239
- ) -> list[dict[str, Any]]:
240
- """List all centroids for a given collection and rubric.
234
+ def get_clustering_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
235
+ """Get clustering state for a given collection and rubric.
241
236
 
242
237
  Args:
243
238
  collection_id: ID of the Collection.
244
- rubric_id: The ID of the rubric to get centroids for.
245
- rubric_version: Optional version of the rubric. If not provided, uses latest.
239
+ rubric_id: The ID of the rubric to get clustering state for.
246
240
 
247
241
  Returns:
248
- list: List of dictionaries containing centroid information.
242
+ dict: Dictionary containing job_id, centroids, and assignments.
249
243
 
250
244
  Raises:
251
245
  requests.exceptions.HTTPError: If the API request fails.
252
246
  """
253
- url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/centroids"
254
- params: dict[str, int] = {}
255
- if rubric_version is not None:
256
- params["rubric_version"] = rubric_version
257
- response = self._session.get(url, params=params)
247
+ url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/clustering_job"
248
+ response = self._session.get(url)
258
249
  response.raise_for_status()
259
250
  return response.json()
260
251
 
261
- def get_cluster_matches(
262
- self, collection_id: str, rubric_id: str, rubric_version: int
263
- ) -> list[dict[str, Any]]:
252
+ def get_cluster_centroids(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
253
+ """Get centroids for a given collection and rubric.
254
+
255
+ Args:
256
+ collection_id: ID of the Collection.
257
+ rubric_id: The ID of the rubric to get centroids for.
258
+
259
+ Returns:
260
+ list: List of dictionaries containing centroid information.
261
+
262
+ Raises:
263
+ requests.exceptions.HTTPError: If the API request fails.
264
+ """
265
+ clustering_state = self.get_clustering_state(collection_id, rubric_id)
266
+ return clustering_state.get("centroids", [])
267
+
268
+ def get_cluster_assignments(self, collection_id: str, rubric_id: str) -> dict[str, list[str]]:
264
269
  """Get centroid assignments for a given rubric.
265
270
 
266
271
  Args:
267
272
  collection_id: ID of the Collection.
268
273
  rubric_id: The ID of the rubric to get assignments for.
269
- rubric_version: The version of the rubric to get assignments for.
270
274
 
271
275
  Returns:
272
- list: List of dictionaries containing centroid assignment information.
276
+ dict: Dictionary mapping centroid IDs to lists of judge result IDs.
273
277
 
274
278
  Raises:
275
279
  requests.exceptions.HTTPError: If the API request fails.
276
280
  """
277
- url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/assignments"
278
- response = self._session.get(url, params={"rubric_version": rubric_version})
279
- response.raise_for_status()
280
- return response.json()
281
+ clustering_state = self.get_clustering_state(collection_id, rubric_id)
282
+ return clustering_state.get("assignments", {})
281
283
 
282
284
  def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
283
285
  """Get a specific agent run by its ID.
@@ -348,3 +350,20 @@ class Docent:
348
350
 
349
351
  logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
350
352
  return response.json()
353
+
354
+ def list_agent_run_ids(self, collection_id: str) -> list[str]:
355
+ """Get all agent run IDs for a collection.
356
+
357
+ Args:
358
+ collection_id: ID of the Collection.
359
+
360
+ Returns:
361
+ str: JSON string containing the list of agent run IDs.
362
+
363
+ Raises:
364
+ requests.exceptions.HTTPError: If the API request fails.
365
+ """
366
+ url = f"{self._server_url}/{collection_id}/agent_run_ids"
367
+ response = self._session.get(url)
368
+ response.raise_for_status()
369
+ return response.json()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docent-python
3
- Version: 0.1.9a0
3
+ Version: 0.1.11a0
4
4
  Summary: Docent SDK
5
5
  Project-URL: Homepage, https://github.com/TransluceAI/docent
6
6
  Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -6,15 +6,16 @@ docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,
6
6
  docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
7
7
  docent/data_models/__init__.py,sha256=4JbTDVzRhS5VZgo8MALwd_YI17GaN7X9E3rOc4Xl7kw,327
8
8
  docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
9
- docent/data_models/agent_run.py,sha256=bDRToWUlY52PugoHWU1D9hasr5t_fnTmRLpkzWP1s_k,9811
10
- docent/data_models/citation.py,sha256=WsVQZcBT2EJD24ysyeVOC5Xfo165RI7P5_cOnJBgHj0,10015
9
+ docent/data_models/agent_run.py,sha256=AhokdyEscrlrg0q5aKaOv26cYvkA6LvAoQsz_WBg_pM,12240
10
+ docent/data_models/citation.py,sha256=zpF9WuvVEfktltw1M9P3hwpg5yywizFUKF5zROBR2cY,5062
11
11
  docent/data_models/metadata.py,sha256=r0SYC4i2x096dXMLfw_rAMtcJQCsoV6EOMPZuEngbGA,9062
12
12
  docent/data_models/regex.py,sha256=0ciIerkrNwb91bY5mTcyO5nDWH67xx2tZYObV52fmBo,1684
13
+ docent/data_models/remove_invalid_citation_ranges.py,sha256=0cn4Xg_tgg45nZvc-sjtqLgr1rywBBrsLJ_WBKEF0pY,5673
13
14
  docent/data_models/shared_types.py,sha256=jjm-Dh5S6v7UKInW7SEqoziOsx6Z7Uu4e3VzgCbTWvc,225
14
- docent/data_models/transcript.py,sha256=0iF2ujcWhTss8WkkpNMeIKJyKOfMEsiMoAQMGwY4ing,15753
15
+ docent/data_models/transcript.py,sha256=Gmy4lYdlvC5SXzpnerFJ83lIMPPiYUPgjOUbwg6aWJQ,20238
15
16
  docent/data_models/chat/__init__.py,sha256=GleyRzYqKRkwwSRm_tQJw5BudCbgu9WRSa71Fntz0L0,610
16
17
  docent/data_models/chat/content.py,sha256=Co-jO8frQa_DSP11wJuhPX0s-GpJk8yqtKqPeiAIZ_U,1672
17
- docent/data_models/chat/message.py,sha256=iAo38kbV6wYbFh8S23cxLy6HY4C_i3PzQ6RpSQG5dxM,3861
18
+ docent/data_models/chat/message.py,sha256=xGt09keA6HRxw40xB_toNzEqA9ip7k53dnhXrEbKGO8,4157
18
19
  docent/data_models/chat/tool.py,sha256=x7NKINswPe0Kqvcx4ubjHzB-n0-i4DbFodvaBb2vitk,3042
19
20
  docent/loaders/load_inspect.py,sha256=_cK2Qd6gyLQuJVzOlsvEZz7TrqzNmH6ZsLTkSCWAPqQ,6628
20
21
  docent/samples/__init__.py,sha256=roDFnU6515l9Q8v17Es_SpWyY9jbm5d6X9lV01V0MZo,143
@@ -22,8 +23,8 @@ docent/samples/load.py,sha256=ZGE07r83GBNO4A0QBh5aQ18WAu3mTWA1vxUoHd90nrM,207
22
23
  docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,397708
23
24
  docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
24
25
  docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- docent/sdk/client.py,sha256=fLdniy8JzMLoZpaS9SP2pHban_ToavgtI8VeHZLMNZo,12773
26
- docent_python-0.1.9a0.dist-info/METADATA,sha256=fgAhTw2bXGNLlU2Y6XFq2rvg7lloXipHXWRXXHLq4gw,1037
27
- docent_python-0.1.9a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- docent_python-0.1.9a0.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
29
- docent_python-0.1.9a0.dist-info/RECORD,,
26
+ docent/sdk/client.py,sha256=rvOFXvyAr9QxCijN0_CWENbm8y3YQvR1msfFSBDZvOw,13309
27
+ docent_python-0.1.11a0.dist-info/METADATA,sha256=6VpTCCXzOgvSPC3ox6eeIZepRxdcY9gP4SOh5QF5hQ4,1038
28
+ docent_python-0.1.11a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
29
+ docent_python-0.1.11a0.dist-info/licenses/LICENSE.md,sha256=vOHzq3K4Ndu0UV9hPrtXvlD7pHOjyDQmGjHuLSIkRQY,1087
30
+ docent_python-0.1.11a0.dist-info/RECORD,,