docent-python 0.1.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent_python-0.1.0a1/.gitignore +192 -0
- docent_python-0.1.0a1/LICENSE.md +7 -0
- docent_python-0.1.0a1/PKG-INFO +16 -0
- docent_python-0.1.0a1/docent/__init__.py +3 -0
- docent_python-0.1.0a1/docent/_log_util/__init__.py +3 -0
- docent_python-0.1.0a1/docent/_log_util/logger.py +141 -0
- docent_python-0.1.0a1/docent/data_models/__init__.py +25 -0
- docent_python-0.1.0a1/docent/data_models/_tiktoken_util.py +91 -0
- docent_python-0.1.0a1/docent/data_models/agent_run.py +231 -0
- docent_python-0.1.0a1/docent/data_models/chat/__init__.py +25 -0
- docent_python-0.1.0a1/docent/data_models/chat/content.py +56 -0
- docent_python-0.1.0a1/docent/data_models/chat/message.py +125 -0
- docent_python-0.1.0a1/docent/data_models/chat/tool.py +109 -0
- docent_python-0.1.0a1/docent/data_models/citation.py +223 -0
- docent_python-0.1.0a1/docent/data_models/filters.py +205 -0
- docent_python-0.1.0a1/docent/data_models/metadata.py +219 -0
- docent_python-0.1.0a1/docent/data_models/regex.py +56 -0
- docent_python-0.1.0a1/docent/data_models/shared_types.py +10 -0
- docent_python-0.1.0a1/docent/data_models/transcript.py +347 -0
- docent_python-0.1.0a1/docent/py.typed +0 -0
- docent_python-0.1.0a1/docent/sdk/__init__.py +0 -0
- docent_python-0.1.0a1/docent/sdk/client.py +285 -0
- docent_python-0.1.0a1/pyproject.toml +33 -0
- docent_python-0.1.0a1/uv.lock +438 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
**/*_gitignore.*
|
|
2
|
+
**/*_gitignore/
|
|
3
|
+
*.db
|
|
4
|
+
.stignore
|
|
5
|
+
*syncthing*
|
|
6
|
+
.DS_Store
|
|
7
|
+
# *.sql (neil: disabled for ursid)
|
|
8
|
+
*.gz
|
|
9
|
+
|
|
10
|
+
*.tfstate
|
|
11
|
+
*.tfstate.backup
|
|
12
|
+
*/.terraform/
|
|
13
|
+
*/*.terraform.*
|
|
14
|
+
|
|
15
|
+
# Byte-compiled / optimized / DLL files
|
|
16
|
+
__pycache__/
|
|
17
|
+
*.py[cod]
|
|
18
|
+
*$py.class
|
|
19
|
+
|
|
20
|
+
# C extensions
|
|
21
|
+
*.so
|
|
22
|
+
|
|
23
|
+
# Distribution / packaging
|
|
24
|
+
.Python
|
|
25
|
+
build/
|
|
26
|
+
develop-eggs/
|
|
27
|
+
dist/
|
|
28
|
+
downloads/
|
|
29
|
+
eggs/
|
|
30
|
+
.eggs/
|
|
31
|
+
parts/
|
|
32
|
+
sdist/
|
|
33
|
+
var/
|
|
34
|
+
wheels/
|
|
35
|
+
share/python-wheels/
|
|
36
|
+
*.egg-info/
|
|
37
|
+
.installed.cfg
|
|
38
|
+
*.egg
|
|
39
|
+
MANIFEST
|
|
40
|
+
|
|
41
|
+
# PyInstaller
|
|
42
|
+
# Usually these files are written by a python script from a template
|
|
43
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
44
|
+
*.manifest
|
|
45
|
+
*.spec
|
|
46
|
+
|
|
47
|
+
# Installer logs
|
|
48
|
+
pip-log.txt
|
|
49
|
+
pip-delete-this-directory.txt
|
|
50
|
+
|
|
51
|
+
# Unit test / coverage reports
|
|
52
|
+
htmlcov/
|
|
53
|
+
.tox/
|
|
54
|
+
.nox/
|
|
55
|
+
.coverage
|
|
56
|
+
.coverage.*
|
|
57
|
+
.cache
|
|
58
|
+
nosetests.xml
|
|
59
|
+
coverage.xml
|
|
60
|
+
*.cover
|
|
61
|
+
*.py,cover
|
|
62
|
+
.hypothesis/
|
|
63
|
+
.pytest_cache/
|
|
64
|
+
cover/
|
|
65
|
+
|
|
66
|
+
# Translations
|
|
67
|
+
*.mo
|
|
68
|
+
*.pot
|
|
69
|
+
|
|
70
|
+
# Django stuff:
|
|
71
|
+
*.log
|
|
72
|
+
local_settings.py
|
|
73
|
+
db.sqlite3
|
|
74
|
+
db.sqlite3-journal
|
|
75
|
+
|
|
76
|
+
# Flask stuff:
|
|
77
|
+
instance/
|
|
78
|
+
.webassets-cache
|
|
79
|
+
|
|
80
|
+
# Scrapy stuff:
|
|
81
|
+
.scrapy
|
|
82
|
+
|
|
83
|
+
# Sphinx documentation
|
|
84
|
+
docs/_build/
|
|
85
|
+
|
|
86
|
+
# PyBuilder
|
|
87
|
+
.pybuilder/
|
|
88
|
+
target/
|
|
89
|
+
|
|
90
|
+
# Jupyter Notebook
|
|
91
|
+
.ipynb_checkpoints
|
|
92
|
+
|
|
93
|
+
# IPython
|
|
94
|
+
profile_default/
|
|
95
|
+
ipython_config.py
|
|
96
|
+
|
|
97
|
+
# pyenv
|
|
98
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
99
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
100
|
+
.python-version
|
|
101
|
+
|
|
102
|
+
# pipenv
|
|
103
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
104
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
105
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
106
|
+
# install all needed dependencies.
|
|
107
|
+
#Pipfile.lock
|
|
108
|
+
|
|
109
|
+
# poetry
|
|
110
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
111
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
112
|
+
# commonly ignored for libraries.
|
|
113
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
114
|
+
#poetry.lock
|
|
115
|
+
|
|
116
|
+
# pdm
|
|
117
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
118
|
+
#pdm.lock
|
|
119
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
120
|
+
# in version control.
|
|
121
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
122
|
+
.pdm.toml
|
|
123
|
+
|
|
124
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
125
|
+
__pypackages__/
|
|
126
|
+
|
|
127
|
+
# Celery stuff
|
|
128
|
+
celerybeat-schedule
|
|
129
|
+
celerybeat.pid
|
|
130
|
+
|
|
131
|
+
# SageMath parsed files
|
|
132
|
+
*.sage.py
|
|
133
|
+
|
|
134
|
+
# Environments
|
|
135
|
+
.env
|
|
136
|
+
.venv
|
|
137
|
+
env/
|
|
138
|
+
venv/
|
|
139
|
+
ENV/
|
|
140
|
+
env.bak/
|
|
141
|
+
venv.bak/
|
|
142
|
+
|
|
143
|
+
# Spyder project settings
|
|
144
|
+
.spyderproject
|
|
145
|
+
.spyproject
|
|
146
|
+
|
|
147
|
+
# Rope project settings
|
|
148
|
+
.ropeproject
|
|
149
|
+
|
|
150
|
+
# mkdocs documentation
|
|
151
|
+
/site
|
|
152
|
+
|
|
153
|
+
# mypy
|
|
154
|
+
.mypy_cache/
|
|
155
|
+
.dmypy.json
|
|
156
|
+
dmypy.json
|
|
157
|
+
|
|
158
|
+
# Pyre type checker
|
|
159
|
+
.pyre/
|
|
160
|
+
|
|
161
|
+
# pytype static type analyzer
|
|
162
|
+
.pytype/
|
|
163
|
+
|
|
164
|
+
# Cython debug symbols
|
|
165
|
+
cython_debug/
|
|
166
|
+
|
|
167
|
+
# PyCharm
|
|
168
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
169
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
170
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
171
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
172
|
+
#.idea/
|
|
173
|
+
|
|
174
|
+
# wandb
|
|
175
|
+
**/wandb/
|
|
176
|
+
|
|
177
|
+
# Marimo notebook outputs
|
|
178
|
+
**/__marimo__/
|
|
179
|
+
|
|
180
|
+
# yarn
|
|
181
|
+
**/.yarn/
|
|
182
|
+
**/.pnp.*
|
|
183
|
+
|
|
184
|
+
# data
|
|
185
|
+
*.npy
|
|
186
|
+
*.csv
|
|
187
|
+
*.pkl
|
|
188
|
+
*.eval
|
|
189
|
+
|
|
190
|
+
# personal
|
|
191
|
+
personal/caden/*
|
|
192
|
+
inspect_evals
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2025 Clarity AI Research, Inc. dba Transluce
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docent-python
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: Docent SDK
|
|
5
|
+
Project-URL: Homepage, https://github.com/TransluceAI/docent
|
|
6
|
+
Project-URL: Issues, https://github.com/TransluceAI/docent/issues
|
|
7
|
+
Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
|
|
8
|
+
Author-email: Transluce AI <info@transluce.org>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Requires-Dist: logging>=0.4.9.6
|
|
13
|
+
Requires-Dist: pydantic>=2.11.7
|
|
14
|
+
Requires-Dist: pyyaml>=6.0.2
|
|
15
|
+
Requires-Dist: sqlalchemy>=2.0.41
|
|
16
|
+
Requires-Dist: tiktoken>=0.7.0
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, Literal, MutableMapping, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ColorCode:
|
|
9
|
+
fore: str
|
|
10
|
+
style: str = ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Colors:
|
|
14
|
+
# Foreground colors
|
|
15
|
+
BLACK = ColorCode("\033[30m")
|
|
16
|
+
RED = ColorCode("\033[31m")
|
|
17
|
+
GREEN = ColorCode("\033[32m")
|
|
18
|
+
YELLOW = ColorCode("\033[33m")
|
|
19
|
+
BLUE = ColorCode("\033[34m")
|
|
20
|
+
MAGENTA = ColorCode("\033[35m")
|
|
21
|
+
CYAN = ColorCode("\033[36m")
|
|
22
|
+
WHITE = ColorCode("\033[37m")
|
|
23
|
+
BRIGHT_MAGENTA = ColorCode("\033[95m")
|
|
24
|
+
BRIGHT_CYAN = ColorCode("\033[96m")
|
|
25
|
+
|
|
26
|
+
# Styles
|
|
27
|
+
BOLD = "\033[1m"
|
|
28
|
+
RESET = "\033[0m"
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def apply(text: str, color: ColorCode) -> str:
|
|
32
|
+
return f"{color.style}{color.fore}{text}{Colors.RESET}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ColoredFormatter(logging.Formatter):
|
|
36
|
+
COLORS: Dict[int, ColorCode] = {
|
|
37
|
+
logging.DEBUG: Colors.BLUE,
|
|
38
|
+
logging.INFO: Colors.GREEN,
|
|
39
|
+
logging.WARNING: Colors.YELLOW,
|
|
40
|
+
logging.ERROR: Colors.RED,
|
|
41
|
+
logging.CRITICAL: ColorCode("\033[31m", Colors.BOLD),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Available highlight colors
|
|
45
|
+
HIGHLIGHT_COLORS: Dict[str, ColorCode] = {
|
|
46
|
+
"magenta": ColorCode(Colors.BRIGHT_MAGENTA.fore, Colors.BOLD),
|
|
47
|
+
"cyan": ColorCode(Colors.BRIGHT_CYAN.fore, Colors.BOLD),
|
|
48
|
+
"yellow": ColorCode(Colors.YELLOW.fore, Colors.BOLD),
|
|
49
|
+
"red": ColorCode(Colors.RED.fore, Colors.BOLD),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def __init__(self, fmt: Optional[str] = None) -> None:
|
|
53
|
+
super().__init__(
|
|
54
|
+
fmt or "%(asctime)s [%(levelname)s] %(namespace)s: %(message)s", datefmt="%H:%M:%S"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
58
|
+
# Add namespace to extra fields if not present
|
|
59
|
+
if not getattr(record, "namespace", None):
|
|
60
|
+
record.__dict__["namespace"] = record.name
|
|
61
|
+
|
|
62
|
+
# Color the level name
|
|
63
|
+
record.levelname = Colors.apply(record.levelname, self.COLORS[record.levelno])
|
|
64
|
+
|
|
65
|
+
# Color the namespace
|
|
66
|
+
record.__dict__["namespace"] = Colors.apply(record.__dict__["namespace"], Colors.CYAN)
|
|
67
|
+
|
|
68
|
+
# Check if highlight flag is set
|
|
69
|
+
highlight = getattr(record, "highlight", None)
|
|
70
|
+
if highlight:
|
|
71
|
+
# Get the highlight color or default to magenta
|
|
72
|
+
color_name = highlight if isinstance(highlight, str) else "magenta"
|
|
73
|
+
highlight_color = self.HIGHLIGHT_COLORS.get(
|
|
74
|
+
color_name, self.HIGHLIGHT_COLORS["magenta"]
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Apply highlight to the message
|
|
78
|
+
original_message = record.getMessage()
|
|
79
|
+
record.msg = Colors.apply(original_message, highlight_color)
|
|
80
|
+
if record.args:
|
|
81
|
+
record.args = ()
|
|
82
|
+
|
|
83
|
+
return super().format(record)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class LoggerAdapter(logging.LoggerAdapter[logging.Logger]):
|
|
87
|
+
"""
|
|
88
|
+
Logger adapter that allows highlighting specific log messages.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def process(
|
|
92
|
+
self, msg: Any, kwargs: MutableMapping[str, Any]
|
|
93
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
|
94
|
+
# Pass highlight flag through to the record
|
|
95
|
+
return msg, kwargs
|
|
96
|
+
|
|
97
|
+
def highlight(
|
|
98
|
+
self,
|
|
99
|
+
msg: object,
|
|
100
|
+
*args: Any,
|
|
101
|
+
color: Literal["magenta", "cyan", "yellow", "red", "green"] = "magenta",
|
|
102
|
+
**kwargs: Any,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Log a highlighted message.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
msg: The message format string
|
|
109
|
+
color: The color to highlight with (magenta, cyan, yellow, red)
|
|
110
|
+
*args: The args for the message format string
|
|
111
|
+
**kwargs: Additional logging kwargs
|
|
112
|
+
"""
|
|
113
|
+
kwargs.setdefault("extra", {})
|
|
114
|
+
if isinstance(kwargs["extra"], dict):
|
|
115
|
+
kwargs["extra"]["highlight"] = color
|
|
116
|
+
return self.info(msg, *args, **kwargs)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_logger(namespace: str) -> LoggerAdapter:
|
|
120
|
+
"""
|
|
121
|
+
Get a colored logger for the specified namespace.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
namespace: The namespace for the logger
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
A configured logger instance with highlighting support
|
|
128
|
+
"""
|
|
129
|
+
logger = logging.getLogger(namespace)
|
|
130
|
+
|
|
131
|
+
# Only add handler if it doesn't exist
|
|
132
|
+
if not logger.handlers:
|
|
133
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
134
|
+
handler.setFormatter(ColoredFormatter())
|
|
135
|
+
logger.addHandler(handler)
|
|
136
|
+
|
|
137
|
+
# Set default level to INFO
|
|
138
|
+
logger.setLevel(logging.INFO)
|
|
139
|
+
|
|
140
|
+
# Wrap with adapter to support highlighting
|
|
141
|
+
return LoggerAdapter(logger, {})
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from docent.data_models.agent_run import AgentRun
|
|
2
|
+
from docent.data_models.citation import Citation
|
|
3
|
+
from docent.data_models.filters import (
|
|
4
|
+
AgentRunIdFilter,
|
|
5
|
+
BaseFrameFilter,
|
|
6
|
+
ComplexFilter,
|
|
7
|
+
SearchResultPredicateFilter,
|
|
8
|
+
)
|
|
9
|
+
from docent.data_models.metadata import BaseAgentRunMetadata, BaseMetadata, FrameDimension
|
|
10
|
+
from docent.data_models.regex import RegexSnippet
|
|
11
|
+
from docent.data_models.transcript import Transcript
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"AgentRun",
|
|
15
|
+
"Citation",
|
|
16
|
+
"RegexSnippet",
|
|
17
|
+
"AgentRunIdFilter",
|
|
18
|
+
"FrameDimension",
|
|
19
|
+
"BaseFrameFilter",
|
|
20
|
+
"SearchResultPredicateFilter",
|
|
21
|
+
"ComplexFilter",
|
|
22
|
+
"BaseAgentRunMetadata",
|
|
23
|
+
"BaseMetadata",
|
|
24
|
+
"Transcript",
|
|
25
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import tiktoken
|
|
2
|
+
|
|
3
|
+
MAX_TOKENS = 100_000
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_token_count(text: str, model: str = "gpt-4") -> int:
|
|
7
|
+
"""Get the number of tokens in a text under the GPT-4 tokenization scheme."""
|
|
8
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
9
|
+
return len(encoding.encode(text))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
|
|
13
|
+
"""Truncate text to stay within the specified token limit."""
|
|
14
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
15
|
+
tokens = encoding.encode(text)
|
|
16
|
+
|
|
17
|
+
if len(tokens) <= max_tokens:
|
|
18
|
+
return text
|
|
19
|
+
|
|
20
|
+
return encoding.decode(tokens[:max_tokens])
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MessageRange:
|
|
24
|
+
"""A range of messages in a transcript. start is inclusive, end is exclusive."""
|
|
25
|
+
|
|
26
|
+
start: int
|
|
27
|
+
end: int
|
|
28
|
+
include_metadata: bool
|
|
29
|
+
num_tokens: int
|
|
30
|
+
|
|
31
|
+
def __init__(self, start: int, end: int, include_metadata: bool, num_tokens: int):
|
|
32
|
+
self.start = start
|
|
33
|
+
self.end = end
|
|
34
|
+
self.include_metadata = include_metadata
|
|
35
|
+
self.num_tokens = num_tokens
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def group_messages_into_ranges(
|
|
39
|
+
token_counts: list[int], metadata_tokens: int, max_tokens: int, margin: int = 50
|
|
40
|
+
) -> list[MessageRange]:
|
|
41
|
+
"""Split a list of messages + metadata into ranges that stay within the specified token limit.
|
|
42
|
+
|
|
43
|
+
Always tries to create ranges with metadata included, unless a single message + metadata is too long,
|
|
44
|
+
in which case you get a lone message with no metadata
|
|
45
|
+
"""
|
|
46
|
+
ranges: list[MessageRange] = []
|
|
47
|
+
start_index = 0
|
|
48
|
+
running_token_count = 0
|
|
49
|
+
|
|
50
|
+
i = 0
|
|
51
|
+
while i < len(token_counts):
|
|
52
|
+
new_token_count = token_counts[i]
|
|
53
|
+
if running_token_count + new_token_count + metadata_tokens > max_tokens - margin:
|
|
54
|
+
if start_index == i: # a single message + metadata is already too long
|
|
55
|
+
ranges.append(
|
|
56
|
+
MessageRange(
|
|
57
|
+
start=i, end=i + 1, include_metadata=False, num_tokens=new_token_count
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
i += 1
|
|
61
|
+
else:
|
|
62
|
+
# add all messages from start_index to i-1, with metadata included
|
|
63
|
+
ranges.append(
|
|
64
|
+
MessageRange(
|
|
65
|
+
start=start_index,
|
|
66
|
+
end=i,
|
|
67
|
+
include_metadata=True,
|
|
68
|
+
num_tokens=running_token_count + metadata_tokens,
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
running_token_count = 0
|
|
72
|
+
start_index = i
|
|
73
|
+
else:
|
|
74
|
+
running_token_count += new_token_count
|
|
75
|
+
i += 1
|
|
76
|
+
|
|
77
|
+
if running_token_count > 0:
|
|
78
|
+
include_metadata = running_token_count + metadata_tokens < max_tokens - margin
|
|
79
|
+
num_tokens = (
|
|
80
|
+
running_token_count + metadata_tokens if include_metadata else running_token_count
|
|
81
|
+
)
|
|
82
|
+
ranges.append(
|
|
83
|
+
MessageRange(
|
|
84
|
+
start=start_index,
|
|
85
|
+
end=len(token_counts),
|
|
86
|
+
include_metadata=include_metadata,
|
|
87
|
+
num_tokens=num_tokens,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return ranges
|