llmbuffer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmbuffer/__init__.py ADDED
@@ -0,0 +1,59 @@
1
+ """llmbuffer — cache-optimized LLM conversation history management.
2
+
3
+ Stateful::
4
+
5
+ from llmbuffer import PromptManager, PromptConfig
6
+
7
+ manager = PromptManager(PromptConfig(
8
+ static_system_prompt="You are a helpful assistant.",
9
+ transition_mode="agent_cycle",
10
+ max_tokens=8000,
11
+ ))
12
+ manager.append({"role": "user", "content": "Hi"})
13
+ messages = manager.build_messages(dynamic_system_prompt="Time: 12:00")
14
+
15
+ Stateless / functional::
16
+
17
+ from llmbuffer import functional, new_state, PromptConfig
18
+
19
+ config = PromptConfig(static_system_prompt="...")
20
+ state = new_state()
21
+ state = functional.append_message(state, {"role": "user", "content": "Hi"}, config)
22
+ messages = functional.build_messages(state, config)
23
+ """
24
+
25
+ from . import functional
26
+ from .adapters import (
27
+ AnthropicAdapter,
28
+ OpenAIAdapter,
29
+ ProviderAdapter,
30
+ TransformersAdapter,
31
+ )
32
+ from .config import PromptConfig, TransitionMode
33
+ from .hooks import (
34
+ drop_tool_messages_transition_hook,
35
+ identity_transition_hook,
36
+ truncation_compaction_hook,
37
+ )
38
+ from .manager import PromptManager
39
+ from .state import dumps, loads, new_state
40
+
41
+ __version__ = "0.1.0"
42
+
43
+ __all__ = [
44
+ "AnthropicAdapter",
45
+ "OpenAIAdapter",
46
+ "ProviderAdapter",
47
+ "TransformersAdapter",
48
+ "PromptConfig",
49
+ "TransitionMode",
50
+ "PromptManager",
51
+ "functional",
52
+ "new_state",
53
+ "dumps",
54
+ "loads",
55
+ "identity_transition_hook",
56
+ "truncation_compaction_hook",
57
+ "drop_tool_messages_transition_hook",
58
+ "__version__",
59
+ ]
llmbuffer/adapters.py ADDED
@@ -0,0 +1,127 @@
1
+ """Provider adapters: token counting and cache-marker injection.
2
+
3
+ The core library is provider-agnostic. An adapter supplies:
4
+
5
+ - ``count_tokens(messages)``: estimate the token cost of a message list.
6
+ - ``apply_cache_markers(messages, boundaries)``: inject provider-specific
7
+ cache-control hints at the static-system / long-lived-history boundaries.
8
+
9
+ ``boundaries`` is a list of indices into ``messages`` marking the last
10
+ message of each stable prefix segment (e.g. end of static system prompt,
11
+ end of long-lived history).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import copy
17
+ import json
18
+ from typing import Any, Dict, List, Sequence
19
+
20
+ Message = Dict[str, Any]
21
+
22
+
23
+ class ProviderAdapter:
24
+ """Base adapter. Subclass to support a new provider or tokenizer."""
25
+
26
+ name = "base"
27
+
28
+ def count_tokens(self, messages: Sequence[Message]) -> int:
29
+ """Rough token estimate: ~4 characters per token over JSON content.
30
+
31
+ Deliberately dependency-free; override with a real tokenizer for
32
+ accuracy.
33
+ """
34
+ total_chars = 0
35
+ for msg in messages:
36
+ content = msg.get("content", "")
37
+ if not isinstance(content, str):
38
+ content = json.dumps(content, default=str)
39
+ total_chars += len(content)
40
+ if msg.get("tool_calls"):
41
+ total_chars += len(json.dumps(msg["tool_calls"], default=str))
42
+ return total_chars // 4
43
+
44
+ def apply_cache_markers(
45
+ self, messages: List[Message], boundaries: Sequence[int]
46
+ ) -> List[Message]:
47
+ """Inject cache markers at the given boundary indices.
48
+
49
+ Base implementation is a no-op (returns messages unchanged), which
50
+ is correct for providers with automatic prefix caching (OpenAI).
51
+ """
52
+ return messages
53
+
54
+
55
+ class OpenAIAdapter(ProviderAdapter):
56
+ """OpenAI / LiteLLM chat-completions format.
57
+
58
+ OpenAI prefix caching is automatic and keys on the literal prefix, so
59
+ no markers are injected — stability of the prefix is what matters.
60
+ """
61
+
62
+ name = "openai"
63
+
64
+
65
+ class AnthropicAdapter(ProviderAdapter):
66
+ """Anthropic Messages API format.
67
+
68
+ Injects ``{"cache_control": {"type": "ephemeral"}}`` on the final
69
+ content block of each boundary message.
70
+ """
71
+
72
+ name = "anthropic"
73
+
74
+ def apply_cache_markers(
75
+ self, messages: List[Message], boundaries: Sequence[int]
76
+ ) -> List[Message]:
77
+ result = list(messages)
78
+ for idx in boundaries:
79
+ if not (0 <= idx < len(result)):
80
+ continue
81
+ msg = copy.deepcopy(result[idx])
82
+ content = msg.get("content")
83
+ if isinstance(content, str):
84
+ msg["content"] = [
85
+ {
86
+ "type": "text",
87
+ "text": content,
88
+ "cache_control": {"type": "ephemeral"},
89
+ }
90
+ ]
91
+ elif isinstance(content, list) and content:
92
+ content = copy.deepcopy(content)
93
+ content[-1] = dict(content[-1])
94
+ content[-1]["cache_control"] = {"type": "ephemeral"}
95
+ msg["content"] = content
96
+ result[idx] = msg
97
+ return result
98
+
99
+
100
+ class TransformersAdapter(ProviderAdapter):
101
+ """Hugging Face tokenizer-backed adapter for local models.
102
+
103
+ Pass any object with an ``encode`` or ``apply_chat_template`` method
104
+ (e.g. a ``transformers.PreTrainedTokenizer``).
105
+ """
106
+
107
+ name = "transformers"
108
+
109
+ def __init__(self, tokenizer: Any):
110
+ self.tokenizer = tokenizer
111
+
112
+ def count_tokens(self, messages: Sequence[Message]) -> int:
113
+ if hasattr(self.tokenizer, "apply_chat_template"):
114
+ try:
115
+ ids = self.tokenizer.apply_chat_template(
116
+ list(messages), tokenize=True, add_generation_prompt=False
117
+ )
118
+ return len(ids)
119
+ except Exception:
120
+ pass
121
+ total = 0
122
+ for msg in messages:
123
+ content = msg.get("content", "")
124
+ if not isinstance(content, str):
125
+ content = json.dumps(content, default=str)
126
+ total += len(self.tokenizer.encode(content))
127
+ return total