docent-python 0.1.41a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/__init__.py +4 -0
- docent/_llm_util/__init__.py +0 -0
- docent/_llm_util/data_models/__init__.py +0 -0
- docent/_llm_util/data_models/exceptions.py +48 -0
- docent/_llm_util/data_models/llm_output.py +331 -0
- docent/_llm_util/llm_cache.py +193 -0
- docent/_llm_util/llm_svc.py +472 -0
- docent/_llm_util/model_registry.py +134 -0
- docent/_llm_util/providers/__init__.py +0 -0
- docent/_llm_util/providers/anthropic.py +537 -0
- docent/_llm_util/providers/common.py +41 -0
- docent/_llm_util/providers/google.py +530 -0
- docent/_llm_util/providers/openai.py +745 -0
- docent/_llm_util/providers/openrouter.py +375 -0
- docent/_llm_util/providers/preference_types.py +104 -0
- docent/_llm_util/providers/provider_registry.py +164 -0
- docent/_log_util/__init__.py +3 -0
- docent/_log_util/logger.py +141 -0
- docent/data_models/__init__.py +14 -0
- docent/data_models/_tiktoken_util.py +91 -0
- docent/data_models/agent_run.py +473 -0
- docent/data_models/chat/__init__.py +37 -0
- docent/data_models/chat/content.py +56 -0
- docent/data_models/chat/message.py +191 -0
- docent/data_models/chat/tool.py +109 -0
- docent/data_models/citation.py +187 -0
- docent/data_models/formatted_objects.py +84 -0
- docent/data_models/judge.py +17 -0
- docent/data_models/metadata_util.py +16 -0
- docent/data_models/regex.py +56 -0
- docent/data_models/transcript.py +305 -0
- docent/data_models/util.py +170 -0
- docent/judges/__init__.py +23 -0
- docent/judges/analysis.py +77 -0
- docent/judges/impl.py +587 -0
- docent/judges/runner.py +129 -0
- docent/judges/stats.py +205 -0
- docent/judges/types.py +320 -0
- docent/judges/util/forgiving_json.py +108 -0
- docent/judges/util/meta_schema.json +86 -0
- docent/judges/util/meta_schema.py +29 -0
- docent/judges/util/parse_output.py +68 -0
- docent/judges/util/voting.py +139 -0
- docent/loaders/load_inspect.py +215 -0
- docent/py.typed +0 -0
- docent/samples/__init__.py +3 -0
- docent/samples/load.py +9 -0
- docent/samples/log.eval +0 -0
- docent/samples/tb_airline.json +1 -0
- docent/sdk/__init__.py +0 -0
- docent/sdk/agent_run_writer.py +317 -0
- docent/sdk/client.py +1186 -0
- docent/sdk/llm_context.py +432 -0
- docent/trace.py +2741 -0
- docent/trace_temp.py +1086 -0
- docent_python-0.1.41a0.dist-info/METADATA +33 -0
- docent_python-0.1.41a0.dist-info/RECORD +59 -0
- docent_python-0.1.41a0.dist-info/WHEEL +4 -0
- docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, Literal, MutableMapping, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ColorCode:
|
|
9
|
+
fore: str
|
|
10
|
+
style: str = ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Colors:
|
|
14
|
+
# Foreground colors
|
|
15
|
+
BLACK = ColorCode("\033[30m")
|
|
16
|
+
RED = ColorCode("\033[31m")
|
|
17
|
+
GREEN = ColorCode("\033[32m")
|
|
18
|
+
YELLOW = ColorCode("\033[33m")
|
|
19
|
+
BLUE = ColorCode("\033[34m")
|
|
20
|
+
MAGENTA = ColorCode("\033[35m")
|
|
21
|
+
CYAN = ColorCode("\033[36m")
|
|
22
|
+
WHITE = ColorCode("\033[37m")
|
|
23
|
+
BRIGHT_MAGENTA = ColorCode("\033[95m")
|
|
24
|
+
BRIGHT_CYAN = ColorCode("\033[96m")
|
|
25
|
+
|
|
26
|
+
# Styles
|
|
27
|
+
BOLD = "\033[1m"
|
|
28
|
+
RESET = "\033[0m"
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def apply(text: str, color: ColorCode) -> str:
|
|
32
|
+
return f"{color.style}{color.fore}{text}{Colors.RESET}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ColoredFormatter(logging.Formatter):
|
|
36
|
+
COLORS: Dict[int, ColorCode] = {
|
|
37
|
+
logging.DEBUG: Colors.BLUE,
|
|
38
|
+
logging.INFO: Colors.GREEN,
|
|
39
|
+
logging.WARNING: Colors.YELLOW,
|
|
40
|
+
logging.ERROR: Colors.RED,
|
|
41
|
+
logging.CRITICAL: ColorCode("\033[31m", Colors.BOLD),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Available highlight colors
|
|
45
|
+
HIGHLIGHT_COLORS: Dict[str, ColorCode] = {
|
|
46
|
+
"magenta": ColorCode(Colors.BRIGHT_MAGENTA.fore, Colors.BOLD),
|
|
47
|
+
"cyan": ColorCode(Colors.BRIGHT_CYAN.fore, Colors.BOLD),
|
|
48
|
+
"yellow": ColorCode(Colors.YELLOW.fore, Colors.BOLD),
|
|
49
|
+
"red": ColorCode(Colors.RED.fore, Colors.BOLD),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def __init__(self, fmt: Optional[str] = None) -> None:
|
|
53
|
+
super().__init__(
|
|
54
|
+
fmt or "%(asctime)s [%(levelname)s] %(namespace)s: %(message)s", datefmt="%H:%M:%S"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
58
|
+
# Add namespace to extra fields if not present
|
|
59
|
+
if not getattr(record, "namespace", None):
|
|
60
|
+
record.__dict__["namespace"] = record.name
|
|
61
|
+
|
|
62
|
+
# Color the level name
|
|
63
|
+
record.levelname = Colors.apply(record.levelname, self.COLORS[record.levelno])
|
|
64
|
+
|
|
65
|
+
# Color the namespace
|
|
66
|
+
record.__dict__["namespace"] = Colors.apply(record.__dict__["namespace"], Colors.CYAN)
|
|
67
|
+
|
|
68
|
+
# Check if highlight flag is set
|
|
69
|
+
highlight = getattr(record, "highlight", None)
|
|
70
|
+
if highlight:
|
|
71
|
+
# Get the highlight color or default to magenta
|
|
72
|
+
color_name = highlight if isinstance(highlight, str) else "magenta"
|
|
73
|
+
highlight_color = self.HIGHLIGHT_COLORS.get(
|
|
74
|
+
color_name, self.HIGHLIGHT_COLORS["magenta"]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Apply highlight to the message
|
|
78
|
+
original_message = record.getMessage()
|
|
79
|
+
record.msg = Colors.apply(original_message, highlight_color)
|
|
80
|
+
if record.args:
|
|
81
|
+
record.args = ()
|
|
82
|
+
|
|
83
|
+
return super().format(record)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class LoggerAdapter(logging.LoggerAdapter[logging.Logger]):
|
|
87
|
+
"""
|
|
88
|
+
Logger adapter that allows highlighting specific log messages.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def process(
|
|
92
|
+
self, msg: Any, kwargs: MutableMapping[str, Any]
|
|
93
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
|
94
|
+
# Pass highlight flag through to the record
|
|
95
|
+
return msg, kwargs
|
|
96
|
+
|
|
97
|
+
def highlight(
|
|
98
|
+
self,
|
|
99
|
+
msg: object,
|
|
100
|
+
*args: Any,
|
|
101
|
+
color: Literal["magenta", "cyan", "yellow", "red", "green"] = "magenta",
|
|
102
|
+
**kwargs: Any,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Log a highlighted message.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
msg: The message format string
|
|
109
|
+
color: The color to highlight with (magenta, cyan, yellow, red)
|
|
110
|
+
*args: The args for the message format string
|
|
111
|
+
**kwargs: Additional logging kwargs
|
|
112
|
+
"""
|
|
113
|
+
kwargs.setdefault("extra", {})
|
|
114
|
+
if isinstance(kwargs["extra"], dict):
|
|
115
|
+
kwargs["extra"]["highlight"] = color
|
|
116
|
+
return self.info(msg, *args, **kwargs)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_logger(namespace: str) -> LoggerAdapter:
|
|
120
|
+
"""
|
|
121
|
+
Get a colored logger for the specified namespace.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
namespace: The namespace for the logger
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
A configured logger instance with highlighting support
|
|
128
|
+
"""
|
|
129
|
+
logger = logging.getLogger(namespace)
|
|
130
|
+
|
|
131
|
+
# Only add handler if it doesn't exist
|
|
132
|
+
if not logger.handlers:
|
|
133
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
134
|
+
handler.setFormatter(ColoredFormatter())
|
|
135
|
+
logger.addHandler(handler)
|
|
136
|
+
|
|
137
|
+
# Set default level to INFO
|
|
138
|
+
logger.setLevel(logging.INFO)
|
|
139
|
+
|
|
140
|
+
# Wrap with adapter to support highlighting
|
|
141
|
+
return LoggerAdapter(logger, {})
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from docent.data_models.agent_run import AgentRun
|
|
2
|
+
from docent.data_models.citation import InlineCitation
|
|
3
|
+
from docent.data_models.judge import Label
|
|
4
|
+
from docent.data_models.regex import RegexSnippet
|
|
5
|
+
from docent.data_models.transcript import Transcript, TranscriptGroup
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AgentRun",
|
|
9
|
+
"InlineCitation",
|
|
10
|
+
"Label",
|
|
11
|
+
"RegexSnippet",
|
|
12
|
+
"Transcript",
|
|
13
|
+
"TranscriptGroup",
|
|
14
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import tiktoken
|
|
2
|
+
|
|
3
|
+
MAX_TOKENS = 100_000
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_token_count(text: str, model: str = "gpt-4") -> int:
|
|
7
|
+
"""Get the number of tokens in a text under the GPT-4 tokenization scheme."""
|
|
8
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
9
|
+
return len(encoding.encode(text))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
|
|
13
|
+
"""Truncate text to stay within the specified token limit."""
|
|
14
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
15
|
+
tokens = encoding.encode(text)
|
|
16
|
+
|
|
17
|
+
if len(tokens) <= max_tokens:
|
|
18
|
+
return text
|
|
19
|
+
|
|
20
|
+
return encoding.decode(tokens[:max_tokens])
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MessageRange:
|
|
24
|
+
"""A range of messages in a transcript. start is inclusive, end is exclusive."""
|
|
25
|
+
|
|
26
|
+
start: int
|
|
27
|
+
end: int
|
|
28
|
+
include_metadata: bool
|
|
29
|
+
num_tokens: int
|
|
30
|
+
|
|
31
|
+
def __init__(self, start: int, end: int, include_metadata: bool, num_tokens: int):
|
|
32
|
+
self.start = start
|
|
33
|
+
self.end = end
|
|
34
|
+
self.include_metadata = include_metadata
|
|
35
|
+
self.num_tokens = num_tokens
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def group_messages_into_ranges(
|
|
39
|
+
token_counts: list[int], metadata_tokens: int, max_tokens: int, margin: int = 50
|
|
40
|
+
) -> list[MessageRange]:
|
|
41
|
+
"""Split a list of messages + metadata into ranges that stay within the specified token limit.
|
|
42
|
+
|
|
43
|
+
Always tries to create ranges with metadata included, unless a single message + metadata is too long,
|
|
44
|
+
in which case you get a lone message with no metadata
|
|
45
|
+
"""
|
|
46
|
+
ranges: list[MessageRange] = []
|
|
47
|
+
start_index = 0
|
|
48
|
+
running_token_count = 0
|
|
49
|
+
|
|
50
|
+
i = 0
|
|
51
|
+
while i < len(token_counts):
|
|
52
|
+
new_token_count = token_counts[i]
|
|
53
|
+
if running_token_count + new_token_count + metadata_tokens > max_tokens - margin:
|
|
54
|
+
if start_index == i: # a single message + metadata is already too long
|
|
55
|
+
ranges.append(
|
|
56
|
+
MessageRange(
|
|
57
|
+
start=i, end=i + 1, include_metadata=False, num_tokens=new_token_count
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
i += 1
|
|
61
|
+
else:
|
|
62
|
+
# add all messages from start_index to i-1, with metadata included
|
|
63
|
+
ranges.append(
|
|
64
|
+
MessageRange(
|
|
65
|
+
start=start_index,
|
|
66
|
+
end=i,
|
|
67
|
+
include_metadata=True,
|
|
68
|
+
num_tokens=running_token_count + metadata_tokens,
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
running_token_count = 0
|
|
72
|
+
start_index = i
|
|
73
|
+
else:
|
|
74
|
+
running_token_count += new_token_count
|
|
75
|
+
i += 1
|
|
76
|
+
|
|
77
|
+
if running_token_count > 0:
|
|
78
|
+
include_metadata = running_token_count + metadata_tokens < max_tokens - margin
|
|
79
|
+
num_tokens = (
|
|
80
|
+
running_token_count + metadata_tokens if include_metadata else running_token_count
|
|
81
|
+
)
|
|
82
|
+
ranges.append(
|
|
83
|
+
MessageRange(
|
|
84
|
+
start=start_index,
|
|
85
|
+
end=len(token_counts),
|
|
86
|
+
include_metadata=include_metadata,
|
|
87
|
+
num_tokens=num_tokens,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return ranges
|