alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -194
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -322
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -264
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -81
- alma/graph/backends/__init__.py +32 -18
- alma/graph/backends/kuzu.py +624 -0
- alma/graph/backends/memgraph.py +432 -0
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -432
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -511
- alma/observability/__init__.py +91 -0
- alma/observability/config.py +302 -0
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -0
- alma/observability/metrics.py +583 -0
- alma/observability/tracing.py +440 -0
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -366
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -61
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1048
- alma/storage/base.py +1083 -525
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -0
- alma/storage/file_based.py +614 -619
- alma/storage/migrations/__init__.py +21 -0
- alma/storage/migrations/base.py +321 -0
- alma/storage/migrations/runner.py +323 -0
- alma/storage/migrations/version_stores.py +337 -0
- alma/storage/migrations/versions/__init__.py +11 -0
- alma/storage/migrations/versions/v1_0_0.py +373 -0
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1452
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1358
- alma/testing/__init__.py +46 -0
- alma/testing/factories.py +301 -0
- alma/testing/mocks.py +389 -0
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.0.dist-info/RECORD +0 -76
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/utils/tokenizer.py
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Token Estimation Module.
|
|
3
|
+
|
|
4
|
+
Provides accurate token counting using tiktoken for OpenAI models
|
|
5
|
+
and configurable token budgets per model type.
|
|
6
|
+
|
|
7
|
+
This module addresses Issue #11 (LOW-001): Token Estimation is Rough.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import TYPE_CHECKING, Dict, Optional
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import tiktoken
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModelFamily(Enum):
|
|
22
|
+
"""Model families with different tokenization schemes."""
|
|
23
|
+
|
|
24
|
+
GPT4 = "gpt4" # GPT-4, GPT-4 Turbo, GPT-4o
|
|
25
|
+
GPT35 = "gpt35" # GPT-3.5 Turbo
|
|
26
|
+
CLAUDE = "claude" # Claude 3.x models
|
|
27
|
+
GEMINI = "gemini" # Google Gemini models
|
|
28
|
+
LLAMA = "llama" # Meta Llama models
|
|
29
|
+
MISTRAL = "mistral" # Mistral models
|
|
30
|
+
LOCAL = "local" # Local/open-source models
|
|
31
|
+
UNKNOWN = "unknown" # Fallback
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ModelTokenBudget:
|
|
36
|
+
"""
|
|
37
|
+
Token budget configuration for a model.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
context_window: Maximum context window size for the model
|
|
41
|
+
memory_budget: Recommended tokens to allocate for ALMA memories
|
|
42
|
+
response_reserve: Tokens to reserve for model response
|
|
43
|
+
safety_margin: Additional safety margin (percentage, 0.0-1.0)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
context_window: int
|
|
47
|
+
memory_budget: int
|
|
48
|
+
response_reserve: int = 4096
|
|
49
|
+
safety_margin: float = 0.1
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def effective_memory_budget(self) -> int:
|
|
53
|
+
"""Calculate effective memory budget after safety margin."""
|
|
54
|
+
return int(self.memory_budget * (1 - self.safety_margin))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Default token budgets per model
|
|
58
|
+
DEFAULT_TOKEN_BUDGETS: Dict[str, ModelTokenBudget] = {
|
|
59
|
+
# OpenAI GPT-4 family
|
|
60
|
+
"gpt-4": ModelTokenBudget(
|
|
61
|
+
context_window=8192,
|
|
62
|
+
memory_budget=2000,
|
|
63
|
+
response_reserve=2048,
|
|
64
|
+
),
|
|
65
|
+
"gpt-4-32k": ModelTokenBudget(
|
|
66
|
+
context_window=32768,
|
|
67
|
+
memory_budget=4000,
|
|
68
|
+
response_reserve=4096,
|
|
69
|
+
),
|
|
70
|
+
"gpt-4-turbo": ModelTokenBudget(
|
|
71
|
+
context_window=128000,
|
|
72
|
+
memory_budget=8000,
|
|
73
|
+
response_reserve=4096,
|
|
74
|
+
),
|
|
75
|
+
"gpt-4o": ModelTokenBudget(
|
|
76
|
+
context_window=128000,
|
|
77
|
+
memory_budget=8000,
|
|
78
|
+
response_reserve=4096,
|
|
79
|
+
),
|
|
80
|
+
"gpt-4o-mini": ModelTokenBudget(
|
|
81
|
+
context_window=128000,
|
|
82
|
+
memory_budget=8000,
|
|
83
|
+
response_reserve=4096,
|
|
84
|
+
),
|
|
85
|
+
# OpenAI GPT-3.5 family
|
|
86
|
+
"gpt-3.5-turbo": ModelTokenBudget(
|
|
87
|
+
context_window=16385,
|
|
88
|
+
memory_budget=2000,
|
|
89
|
+
response_reserve=2048,
|
|
90
|
+
),
|
|
91
|
+
"gpt-3.5-turbo-16k": ModelTokenBudget(
|
|
92
|
+
context_window=16385,
|
|
93
|
+
memory_budget=4000,
|
|
94
|
+
response_reserve=4096,
|
|
95
|
+
),
|
|
96
|
+
# Anthropic Claude family
|
|
97
|
+
"claude-3-opus": ModelTokenBudget(
|
|
98
|
+
context_window=200000,
|
|
99
|
+
memory_budget=10000,
|
|
100
|
+
response_reserve=4096,
|
|
101
|
+
),
|
|
102
|
+
"claude-3-sonnet": ModelTokenBudget(
|
|
103
|
+
context_window=200000,
|
|
104
|
+
memory_budget=8000,
|
|
105
|
+
response_reserve=4096,
|
|
106
|
+
),
|
|
107
|
+
"claude-3-haiku": ModelTokenBudget(
|
|
108
|
+
context_window=200000,
|
|
109
|
+
memory_budget=6000,
|
|
110
|
+
response_reserve=4096,
|
|
111
|
+
),
|
|
112
|
+
"claude-3.5-sonnet": ModelTokenBudget(
|
|
113
|
+
context_window=200000,
|
|
114
|
+
memory_budget=8000,
|
|
115
|
+
response_reserve=4096,
|
|
116
|
+
),
|
|
117
|
+
"claude-3.5-haiku": ModelTokenBudget(
|
|
118
|
+
context_window=200000,
|
|
119
|
+
memory_budget=6000,
|
|
120
|
+
response_reserve=4096,
|
|
121
|
+
),
|
|
122
|
+
# Google Gemini family
|
|
123
|
+
"gemini-pro": ModelTokenBudget(
|
|
124
|
+
context_window=32768,
|
|
125
|
+
memory_budget=4000,
|
|
126
|
+
response_reserve=4096,
|
|
127
|
+
),
|
|
128
|
+
"gemini-1.5-pro": ModelTokenBudget(
|
|
129
|
+
context_window=1000000,
|
|
130
|
+
memory_budget=10000,
|
|
131
|
+
response_reserve=8192,
|
|
132
|
+
),
|
|
133
|
+
"gemini-1.5-flash": ModelTokenBudget(
|
|
134
|
+
context_window=1000000,
|
|
135
|
+
memory_budget=8000,
|
|
136
|
+
response_reserve=8192,
|
|
137
|
+
),
|
|
138
|
+
# Local/open-source models (conservative defaults)
|
|
139
|
+
"llama-2-7b": ModelTokenBudget(
|
|
140
|
+
context_window=4096,
|
|
141
|
+
memory_budget=1000,
|
|
142
|
+
response_reserve=1024,
|
|
143
|
+
),
|
|
144
|
+
"llama-2-70b": ModelTokenBudget(
|
|
145
|
+
context_window=4096,
|
|
146
|
+
memory_budget=1000,
|
|
147
|
+
response_reserve=1024,
|
|
148
|
+
),
|
|
149
|
+
"llama-3-8b": ModelTokenBudget(
|
|
150
|
+
context_window=8192,
|
|
151
|
+
memory_budget=2000,
|
|
152
|
+
response_reserve=2048,
|
|
153
|
+
),
|
|
154
|
+
"llama-3-70b": ModelTokenBudget(
|
|
155
|
+
context_window=8192,
|
|
156
|
+
memory_budget=2000,
|
|
157
|
+
response_reserve=2048,
|
|
158
|
+
),
|
|
159
|
+
"mistral-7b": ModelTokenBudget(
|
|
160
|
+
context_window=8192,
|
|
161
|
+
memory_budget=2000,
|
|
162
|
+
response_reserve=2048,
|
|
163
|
+
),
|
|
164
|
+
"mixtral-8x7b": ModelTokenBudget(
|
|
165
|
+
context_window=32768,
|
|
166
|
+
memory_budget=4000,
|
|
167
|
+
response_reserve=4096,
|
|
168
|
+
),
|
|
169
|
+
# Default fallback
|
|
170
|
+
"default": ModelTokenBudget(
|
|
171
|
+
context_window=8192,
|
|
172
|
+
memory_budget=2000,
|
|
173
|
+
response_reserve=2048,
|
|
174
|
+
),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TokenEstimator:
|
|
179
|
+
"""
|
|
180
|
+
Accurate token estimation using tiktoken for OpenAI-compatible tokenization.
|
|
181
|
+
|
|
182
|
+
For non-OpenAI models, uses model-specific approximations based on
|
|
183
|
+
documented token-to-character ratios.
|
|
184
|
+
|
|
185
|
+
Usage:
|
|
186
|
+
estimator = TokenEstimator(model="gpt-4")
|
|
187
|
+
token_count = estimator.count_tokens("Hello, world!")
|
|
188
|
+
budget = estimator.get_token_budget()
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
# Tiktoken encoding cache
|
|
192
|
+
_encoding_cache: Dict[str, "tiktoken.Encoding"] = {} # type: ignore
|
|
193
|
+
|
|
194
|
+
# Approximate tokens-per-character ratios for fallback estimation
|
|
195
|
+
# These are based on documented model characteristics
|
|
196
|
+
TOKENS_PER_CHAR_RATIOS: Dict[ModelFamily, float] = {
|
|
197
|
+
ModelFamily.GPT4: 0.25, # ~4 chars per token on average
|
|
198
|
+
ModelFamily.GPT35: 0.25,
|
|
199
|
+
ModelFamily.CLAUDE: 0.28, # Claude tends to be slightly more token-dense
|
|
200
|
+
ModelFamily.GEMINI: 0.25,
|
|
201
|
+
ModelFamily.LLAMA: 0.27, # Llama tokenizer is similar to GPT
|
|
202
|
+
ModelFamily.MISTRAL: 0.27,
|
|
203
|
+
ModelFamily.LOCAL: 0.25,
|
|
204
|
+
ModelFamily.UNKNOWN: 0.25,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
def __init__(
|
|
208
|
+
self,
|
|
209
|
+
model: str = "gpt-4",
|
|
210
|
+
custom_budget: Optional[ModelTokenBudget] = None,
|
|
211
|
+
):
|
|
212
|
+
"""
|
|
213
|
+
Initialize token estimator.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
model: Model name (e.g., "gpt-4", "claude-3-sonnet", "llama-3-8b")
|
|
217
|
+
custom_budget: Optional custom token budget to override defaults
|
|
218
|
+
"""
|
|
219
|
+
self.model = model.lower()
|
|
220
|
+
self.model_family = self._detect_model_family(self.model)
|
|
221
|
+
self._tiktoken_available = self._check_tiktoken()
|
|
222
|
+
self._encoding = self._get_encoding() if self._tiktoken_available else None
|
|
223
|
+
self._custom_budget = custom_budget
|
|
224
|
+
|
|
225
|
+
def _check_tiktoken(self) -> bool:
|
|
226
|
+
"""Check if tiktoken is available."""
|
|
227
|
+
try:
|
|
228
|
+
import tiktoken # noqa: F401
|
|
229
|
+
|
|
230
|
+
return True
|
|
231
|
+
except ImportError:
|
|
232
|
+
logger.debug("tiktoken not available, using approximate token estimation")
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
def _detect_model_family(self, model: str) -> ModelFamily:
|
|
236
|
+
"""Detect the model family from model name."""
|
|
237
|
+
model_lower = model.lower()
|
|
238
|
+
|
|
239
|
+
if any(x in model_lower for x in ["gpt-4", "gpt4"]):
|
|
240
|
+
return ModelFamily.GPT4
|
|
241
|
+
elif any(x in model_lower for x in ["gpt-3.5", "gpt35"]):
|
|
242
|
+
return ModelFamily.GPT35
|
|
243
|
+
elif "claude" in model_lower:
|
|
244
|
+
return ModelFamily.CLAUDE
|
|
245
|
+
elif "gemini" in model_lower:
|
|
246
|
+
return ModelFamily.GEMINI
|
|
247
|
+
elif "llama" in model_lower:
|
|
248
|
+
return ModelFamily.LLAMA
|
|
249
|
+
elif "mistral" in model_lower or "mixtral" in model_lower:
|
|
250
|
+
return ModelFamily.MISTRAL
|
|
251
|
+
else:
|
|
252
|
+
return ModelFamily.UNKNOWN
|
|
253
|
+
|
|
254
|
+
def _get_encoding(self) -> Optional["tiktoken.Encoding"]: # type: ignore
|
|
255
|
+
"""Get tiktoken encoding for the model."""
|
|
256
|
+
if not self._tiktoken_available:
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
import tiktoken
|
|
260
|
+
|
|
261
|
+
# Map model families to tiktoken encodings
|
|
262
|
+
encoding_map = {
|
|
263
|
+
ModelFamily.GPT4: "cl100k_base",
|
|
264
|
+
ModelFamily.GPT35: "cl100k_base",
|
|
265
|
+
ModelFamily.CLAUDE: "cl100k_base", # Claude uses similar tokenization
|
|
266
|
+
ModelFamily.GEMINI: "cl100k_base", # Approximate
|
|
267
|
+
ModelFamily.LLAMA: "cl100k_base", # Approximate
|
|
268
|
+
ModelFamily.MISTRAL: "cl100k_base", # Approximate
|
|
269
|
+
ModelFamily.LOCAL: "cl100k_base",
|
|
270
|
+
ModelFamily.UNKNOWN: "cl100k_base",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
encoding_name = encoding_map.get(self.model_family, "cl100k_base")
|
|
274
|
+
|
|
275
|
+
# Use cached encoding if available
|
|
276
|
+
if encoding_name not in self._encoding_cache:
|
|
277
|
+
try:
|
|
278
|
+
self._encoding_cache[encoding_name] = tiktoken.get_encoding(
|
|
279
|
+
encoding_name
|
|
280
|
+
)
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.warning(f"Failed to get tiktoken encoding: {e}")
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
return self._encoding_cache[encoding_name]
|
|
286
|
+
|
|
287
|
+
def count_tokens(self, text: str) -> int:
|
|
288
|
+
"""
|
|
289
|
+
Count tokens in text using tiktoken or fallback estimation.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
text: Text to count tokens for
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Estimated token count
|
|
296
|
+
"""
|
|
297
|
+
if not text:
|
|
298
|
+
return 0
|
|
299
|
+
|
|
300
|
+
# Use tiktoken if available
|
|
301
|
+
if self._encoding is not None:
|
|
302
|
+
try:
|
|
303
|
+
return len(self._encoding.encode(text))
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.debug(f"tiktoken encoding failed, using fallback: {e}")
|
|
306
|
+
|
|
307
|
+
# Fallback: character-based estimation
|
|
308
|
+
ratio = self.TOKENS_PER_CHAR_RATIOS.get(self.model_family, 0.25)
|
|
309
|
+
return int(len(text) * ratio)
|
|
310
|
+
|
|
311
|
+
def count_tokens_for_messages(
|
|
312
|
+
self,
|
|
313
|
+
messages: list[dict[str, str]],
|
|
314
|
+
) -> int:
|
|
315
|
+
"""
|
|
316
|
+
Count tokens for a list of messages (chat format).
|
|
317
|
+
|
|
318
|
+
Accounts for message formatting overhead.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
messages: List of message dicts with "role" and "content" keys
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Estimated token count including formatting overhead
|
|
325
|
+
"""
|
|
326
|
+
total = 0
|
|
327
|
+
|
|
328
|
+
# Per-message overhead varies by model
|
|
329
|
+
# GPT-4/3.5: ~4 tokens per message for formatting
|
|
330
|
+
# Claude: ~3 tokens per message
|
|
331
|
+
overhead_per_message = (
|
|
332
|
+
4 if self.model_family in (ModelFamily.GPT4, ModelFamily.GPT35) else 3
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
for message in messages:
|
|
336
|
+
content = message.get("content", "")
|
|
337
|
+
total += self.count_tokens(content)
|
|
338
|
+
total += overhead_per_message
|
|
339
|
+
|
|
340
|
+
# Add reply priming overhead
|
|
341
|
+
total += 3
|
|
342
|
+
|
|
343
|
+
return total
|
|
344
|
+
|
|
345
|
+
def get_token_budget(self) -> ModelTokenBudget:
|
|
346
|
+
"""
|
|
347
|
+
Get the token budget for the current model.
|
|
348
|
+
|
|
349
|
+
Returns custom budget if set, otherwise returns default for model.
|
|
350
|
+
"""
|
|
351
|
+
if self._custom_budget:
|
|
352
|
+
return self._custom_budget
|
|
353
|
+
|
|
354
|
+
# Try exact model match first
|
|
355
|
+
if self.model in DEFAULT_TOKEN_BUDGETS:
|
|
356
|
+
return DEFAULT_TOKEN_BUDGETS[self.model]
|
|
357
|
+
|
|
358
|
+
# Try partial matches - prefer longer key matches
|
|
359
|
+
best_match = None
|
|
360
|
+
best_match_len = 0
|
|
361
|
+
|
|
362
|
+
for key, budget in DEFAULT_TOKEN_BUDGETS.items():
|
|
363
|
+
if key == "default":
|
|
364
|
+
continue
|
|
365
|
+
if key in self.model:
|
|
366
|
+
if len(key) > best_match_len:
|
|
367
|
+
best_match = budget
|
|
368
|
+
best_match_len = len(key)
|
|
369
|
+
elif self.model in key:
|
|
370
|
+
if len(self.model) > best_match_len:
|
|
371
|
+
best_match = budget
|
|
372
|
+
best_match_len = len(self.model)
|
|
373
|
+
|
|
374
|
+
if best_match:
|
|
375
|
+
return best_match
|
|
376
|
+
|
|
377
|
+
# Return default
|
|
378
|
+
return DEFAULT_TOKEN_BUDGETS["default"]
|
|
379
|
+
|
|
380
|
+
def truncate_to_token_limit(
|
|
381
|
+
self,
|
|
382
|
+
text: str,
|
|
383
|
+
max_tokens: int,
|
|
384
|
+
suffix: str = "\n[truncated]",
|
|
385
|
+
) -> str:
|
|
386
|
+
"""
|
|
387
|
+
Truncate text to fit within a token limit.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
text: Text to truncate
|
|
391
|
+
max_tokens: Maximum tokens allowed
|
|
392
|
+
suffix: Suffix to append if truncated
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Truncated text with suffix if it exceeded the limit
|
|
396
|
+
"""
|
|
397
|
+
current_tokens = self.count_tokens(text)
|
|
398
|
+
|
|
399
|
+
if current_tokens <= max_tokens:
|
|
400
|
+
return text
|
|
401
|
+
|
|
402
|
+
# Reserve tokens for suffix
|
|
403
|
+
suffix_tokens = self.count_tokens(suffix)
|
|
404
|
+
target_tokens = max_tokens - suffix_tokens
|
|
405
|
+
|
|
406
|
+
if target_tokens <= 0:
|
|
407
|
+
return suffix
|
|
408
|
+
|
|
409
|
+
# Binary search for the right truncation point
|
|
410
|
+
if self._encoding is not None:
|
|
411
|
+
try:
|
|
412
|
+
tokens = self._encoding.encode(text)
|
|
413
|
+
truncated_tokens = tokens[:target_tokens]
|
|
414
|
+
return self._encoding.decode(truncated_tokens) + suffix
|
|
415
|
+
except Exception:
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
# Fallback: character-based truncation
|
|
419
|
+
ratio = self.TOKENS_PER_CHAR_RATIOS.get(self.model_family, 0.25)
|
|
420
|
+
target_chars = int(target_tokens / ratio)
|
|
421
|
+
return text[:target_chars] + suffix
|
|
422
|
+
|
|
423
|
+
def estimate_remaining_budget(
|
|
424
|
+
self,
|
|
425
|
+
used_tokens: int,
|
|
426
|
+
include_response_reserve: bool = True,
|
|
427
|
+
) -> int:
|
|
428
|
+
"""
|
|
429
|
+
Estimate remaining token budget for memories.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
used_tokens: Tokens already used in context
|
|
433
|
+
include_response_reserve: Whether to subtract response reserve
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
Remaining tokens available for memories
|
|
437
|
+
"""
|
|
438
|
+
budget = self.get_token_budget()
|
|
439
|
+
available = budget.context_window - used_tokens
|
|
440
|
+
|
|
441
|
+
if include_response_reserve:
|
|
442
|
+
available -= budget.response_reserve
|
|
443
|
+
|
|
444
|
+
# Apply safety margin
|
|
445
|
+
available = int(available * (1 - budget.safety_margin))
|
|
446
|
+
|
|
447
|
+
return max(0, min(available, budget.effective_memory_budget))
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def get_token_estimator(
|
|
451
|
+
model: str = "gpt-4",
|
|
452
|
+
custom_budget: Optional[ModelTokenBudget] = None,
|
|
453
|
+
) -> TokenEstimator:
|
|
454
|
+
"""
|
|
455
|
+
Factory function to create a TokenEstimator.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
model: Model name
|
|
459
|
+
custom_budget: Optional custom token budget
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Configured TokenEstimator instance
|
|
463
|
+
"""
|
|
464
|
+
return TokenEstimator(model=model, custom_budget=custom_budget)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def get_default_token_budget(model: str = "gpt-4") -> ModelTokenBudget:
|
|
468
|
+
"""
|
|
469
|
+
Get the default token budget for a model.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
model: Model name
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
Token budget configuration
|
|
476
|
+
"""
|
|
477
|
+
model_lower = model.lower()
|
|
478
|
+
|
|
479
|
+
# Try exact match
|
|
480
|
+
if model_lower in DEFAULT_TOKEN_BUDGETS:
|
|
481
|
+
return DEFAULT_TOKEN_BUDGETS[model_lower]
|
|
482
|
+
|
|
483
|
+
# Try partial match - prefer longer key matches to avoid e.g. "gpt-4" matching "gpt-4o"
|
|
484
|
+
best_match = None
|
|
485
|
+
best_match_len = 0
|
|
486
|
+
|
|
487
|
+
for key, budget in DEFAULT_TOKEN_BUDGETS.items():
|
|
488
|
+
if key == "default":
|
|
489
|
+
continue
|
|
490
|
+
if key in model_lower:
|
|
491
|
+
if len(key) > best_match_len:
|
|
492
|
+
best_match = budget
|
|
493
|
+
best_match_len = len(key)
|
|
494
|
+
elif model_lower in key:
|
|
495
|
+
if len(model_lower) > best_match_len:
|
|
496
|
+
best_match = budget
|
|
497
|
+
best_match_len = len(model_lower)
|
|
498
|
+
|
|
499
|
+
if best_match:
|
|
500
|
+
return best_match
|
|
501
|
+
|
|
502
|
+
return DEFAULT_TOKEN_BUDGETS["default"]
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def estimate_tokens_simple(text: str) -> int:
|
|
506
|
+
"""
|
|
507
|
+
Simple token estimation without model context.
|
|
508
|
+
|
|
509
|
+
Uses the standard ~4 characters per token approximation.
|
|
510
|
+
For more accurate estimation, use TokenEstimator.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
text: Text to estimate tokens for
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Approximate token count
|
|
517
|
+
"""
|
|
518
|
+
if not text:
|
|
519
|
+
return 0
|
|
520
|
+
# Standard approximation: 1 token ~ 4 characters
|
|
521
|
+
return max(1, len(text) // 4)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Workflow Module.
|
|
3
|
+
|
|
4
|
+
Provides workflow context, checkpointing, state management, and artifact
|
|
5
|
+
linking for integration with workflow orchestration systems like AGtestari.
|
|
6
|
+
|
|
7
|
+
Sprint 1 Task 1.7
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# Context and scoping
|
|
11
|
+
# Artifact linking
|
|
12
|
+
from alma.workflow.artifacts import (
|
|
13
|
+
ArtifactRef,
|
|
14
|
+
ArtifactType,
|
|
15
|
+
link_artifact,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Checkpoints for crash recovery
|
|
19
|
+
from alma.workflow.checkpoint import (
|
|
20
|
+
DEFAULT_MAX_STATE_SIZE,
|
|
21
|
+
Checkpoint,
|
|
22
|
+
CheckpointManager,
|
|
23
|
+
)
|
|
24
|
+
from alma.workflow.context import (
|
|
25
|
+
RetrievalScope,
|
|
26
|
+
WorkflowContext,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Workflow outcomes for learning
|
|
30
|
+
from alma.workflow.outcomes import (
|
|
31
|
+
WorkflowOutcome,
|
|
32
|
+
WorkflowResult,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# State reducers for parallel merge
|
|
36
|
+
from alma.workflow.reducers import (
|
|
37
|
+
BUILTIN_REDUCERS,
|
|
38
|
+
AppendReducer,
|
|
39
|
+
FirstValueReducer,
|
|
40
|
+
LastValueReducer,
|
|
41
|
+
MaxReducer,
|
|
42
|
+
MergeDictReducer,
|
|
43
|
+
MinReducer,
|
|
44
|
+
ReducerConfig,
|
|
45
|
+
StateMerger,
|
|
46
|
+
StateReducer,
|
|
47
|
+
SumReducer,
|
|
48
|
+
UnionReducer,
|
|
49
|
+
get_reducer,
|
|
50
|
+
merge_states,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
# Context
|
|
55
|
+
"RetrievalScope",
|
|
56
|
+
"WorkflowContext",
|
|
57
|
+
# Checkpoints
|
|
58
|
+
"Checkpoint",
|
|
59
|
+
"CheckpointManager",
|
|
60
|
+
"DEFAULT_MAX_STATE_SIZE",
|
|
61
|
+
# Outcomes
|
|
62
|
+
"WorkflowOutcome",
|
|
63
|
+
"WorkflowResult",
|
|
64
|
+
# Artifacts
|
|
65
|
+
"ArtifactRef",
|
|
66
|
+
"ArtifactType",
|
|
67
|
+
"link_artifact",
|
|
68
|
+
# Reducers
|
|
69
|
+
"StateReducer",
|
|
70
|
+
"AppendReducer",
|
|
71
|
+
"MergeDictReducer",
|
|
72
|
+
"LastValueReducer",
|
|
73
|
+
"FirstValueReducer",
|
|
74
|
+
"SumReducer",
|
|
75
|
+
"MaxReducer",
|
|
76
|
+
"MinReducer",
|
|
77
|
+
"UnionReducer",
|
|
78
|
+
"ReducerConfig",
|
|
79
|
+
"StateMerger",
|
|
80
|
+
"get_reducer",
|
|
81
|
+
"merge_states",
|
|
82
|
+
"BUILTIN_REDUCERS",
|
|
83
|
+
]
|