docent-python 0.1.41a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (59) hide show
  1. docent/__init__.py +4 -0
  2. docent/_llm_util/__init__.py +0 -0
  3. docent/_llm_util/data_models/__init__.py +0 -0
  4. docent/_llm_util/data_models/exceptions.py +48 -0
  5. docent/_llm_util/data_models/llm_output.py +331 -0
  6. docent/_llm_util/llm_cache.py +193 -0
  7. docent/_llm_util/llm_svc.py +472 -0
  8. docent/_llm_util/model_registry.py +134 -0
  9. docent/_llm_util/providers/__init__.py +0 -0
  10. docent/_llm_util/providers/anthropic.py +537 -0
  11. docent/_llm_util/providers/common.py +41 -0
  12. docent/_llm_util/providers/google.py +530 -0
  13. docent/_llm_util/providers/openai.py +745 -0
  14. docent/_llm_util/providers/openrouter.py +375 -0
  15. docent/_llm_util/providers/preference_types.py +104 -0
  16. docent/_llm_util/providers/provider_registry.py +164 -0
  17. docent/_log_util/__init__.py +3 -0
  18. docent/_log_util/logger.py +141 -0
  19. docent/data_models/__init__.py +14 -0
  20. docent/data_models/_tiktoken_util.py +91 -0
  21. docent/data_models/agent_run.py +473 -0
  22. docent/data_models/chat/__init__.py +37 -0
  23. docent/data_models/chat/content.py +56 -0
  24. docent/data_models/chat/message.py +191 -0
  25. docent/data_models/chat/tool.py +109 -0
  26. docent/data_models/citation.py +187 -0
  27. docent/data_models/formatted_objects.py +84 -0
  28. docent/data_models/judge.py +17 -0
  29. docent/data_models/metadata_util.py +16 -0
  30. docent/data_models/regex.py +56 -0
  31. docent/data_models/transcript.py +305 -0
  32. docent/data_models/util.py +170 -0
  33. docent/judges/__init__.py +23 -0
  34. docent/judges/analysis.py +77 -0
  35. docent/judges/impl.py +587 -0
  36. docent/judges/runner.py +129 -0
  37. docent/judges/stats.py +205 -0
  38. docent/judges/types.py +320 -0
  39. docent/judges/util/forgiving_json.py +108 -0
  40. docent/judges/util/meta_schema.json +86 -0
  41. docent/judges/util/meta_schema.py +29 -0
  42. docent/judges/util/parse_output.py +68 -0
  43. docent/judges/util/voting.py +139 -0
  44. docent/loaders/load_inspect.py +215 -0
  45. docent/py.typed +0 -0
  46. docent/samples/__init__.py +3 -0
  47. docent/samples/load.py +9 -0
  48. docent/samples/log.eval +0 -0
  49. docent/samples/tb_airline.json +1 -0
  50. docent/sdk/__init__.py +0 -0
  51. docent/sdk/agent_run_writer.py +317 -0
  52. docent/sdk/client.py +1186 -0
  53. docent/sdk/llm_context.py +432 -0
  54. docent/trace.py +2741 -0
  55. docent/trace_temp.py +1086 -0
  56. docent_python-0.1.41a0.dist-info/METADATA +33 -0
  57. docent_python-0.1.41a0.dist-info/RECORD +59 -0
  58. docent_python-0.1.41a0.dist-info/WHEEL +4 -0
  59. docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
@@ -0,0 +1,141 @@
1
+ import logging
2
+ import sys
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, Literal, MutableMapping, Optional, Tuple
5
+
6
+
7
+ @dataclass
8
+ class ColorCode:
9
+ fore: str
10
+ style: str = ""
11
+
12
+
13
+ class Colors:
14
+ # Foreground colors
15
+ BLACK = ColorCode("\033[30m")
16
+ RED = ColorCode("\033[31m")
17
+ GREEN = ColorCode("\033[32m")
18
+ YELLOW = ColorCode("\033[33m")
19
+ BLUE = ColorCode("\033[34m")
20
+ MAGENTA = ColorCode("\033[35m")
21
+ CYAN = ColorCode("\033[36m")
22
+ WHITE = ColorCode("\033[37m")
23
+ BRIGHT_MAGENTA = ColorCode("\033[95m")
24
+ BRIGHT_CYAN = ColorCode("\033[96m")
25
+
26
+ # Styles
27
+ BOLD = "\033[1m"
28
+ RESET = "\033[0m"
29
+
30
+ @staticmethod
31
+ def apply(text: str, color: ColorCode) -> str:
32
+ return f"{color.style}{color.fore}{text}{Colors.RESET}"
33
+
34
+
35
+ class ColoredFormatter(logging.Formatter):
36
+ COLORS: Dict[int, ColorCode] = {
37
+ logging.DEBUG: Colors.BLUE,
38
+ logging.INFO: Colors.GREEN,
39
+ logging.WARNING: Colors.YELLOW,
40
+ logging.ERROR: Colors.RED,
41
+ logging.CRITICAL: ColorCode("\033[31m", Colors.BOLD),
42
+ }
43
+
44
+ # Available highlight colors
45
+ HIGHLIGHT_COLORS: Dict[str, ColorCode] = {
46
+ "magenta": ColorCode(Colors.BRIGHT_MAGENTA.fore, Colors.BOLD),
47
+ "cyan": ColorCode(Colors.BRIGHT_CYAN.fore, Colors.BOLD),
48
+ "yellow": ColorCode(Colors.YELLOW.fore, Colors.BOLD),
49
+ "red": ColorCode(Colors.RED.fore, Colors.BOLD),
50
+ }
51
+
52
+ def __init__(self, fmt: Optional[str] = None) -> None:
53
+ super().__init__(
54
+ fmt or "%(asctime)s [%(levelname)s] %(namespace)s: %(message)s", datefmt="%H:%M:%S"
55
+ )
56
+
57
+ def format(self, record: logging.LogRecord) -> str:
58
+ # Add namespace to extra fields if not present
59
+ if not getattr(record, "namespace", None):
60
+ record.__dict__["namespace"] = record.name
61
+
62
+ # Color the level name
63
+ record.levelname = Colors.apply(record.levelname, self.COLORS[record.levelno])
64
+
65
+ # Color the namespace
66
+ record.__dict__["namespace"] = Colors.apply(record.__dict__["namespace"], Colors.CYAN)
67
+
68
+ # Check if highlight flag is set
69
+ highlight = getattr(record, "highlight", None)
70
+ if highlight:
71
+ # Get the highlight color or default to magenta
72
+ color_name = highlight if isinstance(highlight, str) else "magenta"
73
+ highlight_color = self.HIGHLIGHT_COLORS.get(
74
+ color_name, self.HIGHLIGHT_COLORS["magenta"]
75
+ )
76
+
77
+ # Apply highlight to the message
78
+ original_message = record.getMessage()
79
+ record.msg = Colors.apply(original_message, highlight_color)
80
+ if record.args:
81
+ record.args = ()
82
+
83
+ return super().format(record)
84
+
85
+
86
+ class LoggerAdapter(logging.LoggerAdapter[logging.Logger]):
87
+ """
88
+ Logger adapter that allows highlighting specific log messages.
89
+ """
90
+
91
+ def process(
92
+ self, msg: Any, kwargs: MutableMapping[str, Any]
93
+ ) -> Tuple[Any, MutableMapping[str, Any]]:
94
+ # Pass highlight flag through to the record
95
+ return msg, kwargs
96
+
97
+ def highlight(
98
+ self,
99
+ msg: object,
100
+ *args: Any,
101
+ color: Literal["magenta", "cyan", "yellow", "red", "green"] = "magenta",
102
+ **kwargs: Any,
103
+ ) -> None:
104
+ """
105
+ Log a highlighted message.
106
+
107
+ Args:
108
+ msg: The message format string
109
+ color: The color to highlight with (magenta, cyan, yellow, red)
110
+ *args: The args for the message format string
111
+ **kwargs: Additional logging kwargs
112
+ """
113
+ kwargs.setdefault("extra", {})
114
+ if isinstance(kwargs["extra"], dict):
115
+ kwargs["extra"]["highlight"] = color
116
+ return self.info(msg, *args, **kwargs)
117
+
118
+
119
+ def get_logger(namespace: str) -> LoggerAdapter:
120
+ """
121
+ Get a colored logger for the specified namespace.
122
+
123
+ Args:
124
+ namespace: The namespace for the logger
125
+
126
+ Returns:
127
+ A configured logger instance with highlighting support
128
+ """
129
+ logger = logging.getLogger(namespace)
130
+
131
+ # Only add handler if it doesn't exist
132
+ if not logger.handlers:
133
+ handler = logging.StreamHandler(sys.stdout)
134
+ handler.setFormatter(ColoredFormatter())
135
+ logger.addHandler(handler)
136
+
137
+ # Set default level to INFO
138
+ logger.setLevel(logging.INFO)
139
+
140
+ # Wrap with adapter to support highlighting
141
+ return LoggerAdapter(logger, {})
@@ -0,0 +1,14 @@
1
+ from docent.data_models.agent_run import AgentRun
2
+ from docent.data_models.citation import InlineCitation
3
+ from docent.data_models.judge import Label
4
+ from docent.data_models.regex import RegexSnippet
5
+ from docent.data_models.transcript import Transcript, TranscriptGroup
6
+
7
+ __all__ = [
8
+ "AgentRun",
9
+ "InlineCitation",
10
+ "Label",
11
+ "RegexSnippet",
12
+ "Transcript",
13
+ "TranscriptGroup",
14
+ ]
@@ -0,0 +1,91 @@
1
+ import tiktoken
2
+
3
+ MAX_TOKENS = 100_000
4
+
5
+
6
+ def get_token_count(text: str, model: str = "gpt-4") -> int:
7
+ """Get the number of tokens in a text under the GPT-4 tokenization scheme."""
8
+ encoding = tiktoken.encoding_for_model(model)
9
+ return len(encoding.encode(text))
10
+
11
+
12
+ def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
13
+ """Truncate text to stay within the specified token limit."""
14
+ encoding = tiktoken.encoding_for_model(model)
15
+ tokens = encoding.encode(text)
16
+
17
+ if len(tokens) <= max_tokens:
18
+ return text
19
+
20
+ return encoding.decode(tokens[:max_tokens])
21
+
22
+
23
+ class MessageRange:
24
+ """A range of messages in a transcript. start is inclusive, end is exclusive."""
25
+
26
+ start: int
27
+ end: int
28
+ include_metadata: bool
29
+ num_tokens: int
30
+
31
+ def __init__(self, start: int, end: int, include_metadata: bool, num_tokens: int):
32
+ self.start = start
33
+ self.end = end
34
+ self.include_metadata = include_metadata
35
+ self.num_tokens = num_tokens
36
+
37
+
38
+ def group_messages_into_ranges(
39
+ token_counts: list[int], metadata_tokens: int, max_tokens: int, margin: int = 50
40
+ ) -> list[MessageRange]:
41
+ """Split a list of messages + metadata into ranges that stay within the specified token limit.
42
+
43
+ Always tries to create ranges with metadata included, unless a single message + metadata is too long,
44
+ in which case you get a lone message with no metadata
45
+ """
46
+ ranges: list[MessageRange] = []
47
+ start_index = 0
48
+ running_token_count = 0
49
+
50
+ i = 0
51
+ while i < len(token_counts):
52
+ new_token_count = token_counts[i]
53
+ if running_token_count + new_token_count + metadata_tokens > max_tokens - margin:
54
+ if start_index == i: # a single message + metadata is already too long
55
+ ranges.append(
56
+ MessageRange(
57
+ start=i, end=i + 1, include_metadata=False, num_tokens=new_token_count
58
+ )
59
+ )
60
+ i += 1
61
+ else:
62
+ # add all messages from start_index to i-1, with metadata included
63
+ ranges.append(
64
+ MessageRange(
65
+ start=start_index,
66
+ end=i,
67
+ include_metadata=True,
68
+ num_tokens=running_token_count + metadata_tokens,
69
+ )
70
+ )
71
+ running_token_count = 0
72
+ start_index = i
73
+ else:
74
+ running_token_count += new_token_count
75
+ i += 1
76
+
77
+ if running_token_count > 0:
78
+ include_metadata = running_token_count + metadata_tokens < max_tokens - margin
79
+ num_tokens = (
80
+ running_token_count + metadata_tokens if include_metadata else running_token_count
81
+ )
82
+ ranges.append(
83
+ MessageRange(
84
+ start=start_index,
85
+ end=len(token_counts),
86
+ include_metadata=include_metadata,
87
+ num_tokens=num_tokens,
88
+ )
89
+ )
90
+
91
+ return ranges