rollmem 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+ env/
11
+
12
+ # Test / tooling
13
+ .pytest_cache/
14
+ .coverage
15
+ htmlcov/
16
+ .mypy_cache/
17
+ .ruff_cache/
18
+
19
+ # OS / editor
20
+ .DS_Store
21
+ .idea/
22
+ .vscode/
23
+
24
+ # Claude
25
+ .claude/
26
+ .claude.json
27
+ CLAUDE.md
28
+ CLAUDE.local.md
rollmem-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 손옥무 | son okmoo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
rollmem-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: rollmem
3
+ Version: 0.0.1
4
+ Summary: Standalone, dependency-free rolling conversation memory (summary + buffer), inspired by LangChain's ConversationSummaryBufferMemory.
5
+ Project-URL: Homepage, https://github.com/okdoittttt/rollmem
6
+ Project-URL: Repository, https://github.com/okdoittttt/rollmem
7
+ Project-URL: Issues, https://github.com/okdoittttt/rollmem/issues
8
+ Author-email: son okmoo <sonokmoo@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: chatbot,conversation,langchain,llm,memory,rag,summary
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.9
25
+ Provides-Extra: dev
26
+ Requires-Dist: build; extra == 'dev'
27
+ Requires-Dist: pytest>=7; extra == 'dev'
28
+ Requires-Dist: twine; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # rollmem
32
+
33
+ [![PyPI version](https://img.shields.io/pypi/v/rollmem.svg)](https://pypi.org/project/rollmem/)
34
+ [![Python versions](https://img.shields.io/pypi/pyversions/rollmem.svg)](https://pypi.org/project/rollmem/)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
36
+
37
+ Standalone, **dependency-free** rolling conversation memory for LLM apps —
38
+ a running summary plus a recent-message buffer, inspired by LangChain's
39
+ `ConversationSummaryBufferMemory`, but with no LangChain (or any) dependency.
40
+
41
+ Handy for **conversation memory**, **context compression**, **summarization**,
42
+ and **gist**-style long-chat handling — a tiny **LangChain alternative** when
43
+ you only need the summary-buffer pattern.
44
+
45
+ ## Why
46
+
47
+ `ConversationSummaryBufferMemory` is a great pattern: keep recent turns
48
+ verbatim, fold older turns into a running summary so context stays bounded.
49
+ But pulling in all of LangChain just for that is heavy. `rollmem` extracts the
50
+ idea into a tiny, provider-agnostic package. You inject how to summarize and
51
+ how to count tokens — rollmem stays neutral.
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install rollmem
57
+ ```
58
+
59
+ ## Usage
60
+
61
+ ```python
62
+ from rollmem import RollingMemory
63
+
64
+ def summarize(existing_summary, messages):
65
+ # plug in any LLM here; return the new summary string
66
+ folded = " ".join(m.content for m in messages)
67
+ return (existing_summary + " " + folded).strip()
68
+
69
+ mem = RollingMemory(
70
+ max_tokens=2000,
71
+ summarize_fn=summarize, # optional; without it, evicted turns are dropped
72
+ # token_counter=... # optional; defaults to a word-count estimate.
73
+ # # In production inject a model-accurate counter, e.g.
74
+ # # token_counter=lambda text: len(enc.encode(text))
75
+ )
76
+
77
+ mem.add_user_message("Hi, I'm planning a trip to Korea.")
78
+ mem.add_assistant_message("Great! When are you going?")
79
+
80
+ print(mem.get_context()) # -> str: summary (if any) + recent buffer, joined
81
+ print(mem.get_messages()) # -> list[Message]: summary prepended as a system turn
82
+ ```
83
+
84
+ `max_tokens` is the budget for the **verbatim recent-message buffer** — not the
85
+ running summary, and not a model's generation `max_tokens` (output limit). When
86
+ the buffer exceeds it, the oldest turns are folded into the summary.
87
+
88
+ `token_counter` takes a single message's text (`str`) and returns an `int`. The
89
+ default is a crude word count — fine for demos, but pass a model-accurate counter
90
+ (such as `tiktoken`) for real token budgets.
91
+
92
+ ## Persistence
93
+
94
+ `to_dict()` / `from_dict()` serialize the memory **state** (running summary plus
95
+ buffer) to and from a plain `dict` — you choose the storage format:
96
+
97
+ ```python
98
+ import json
99
+
100
+ raw = json.dumps(mem.to_dict()) # save anywhere: file, DB column, cache...
101
+
102
+ mem = RollingMemory.from_dict(
103
+ json.loads(raw),
104
+ max_tokens=2000,
105
+ summarize_fn=summarize, # callbacks are NOT serialized — re-inject them
106
+ # token_counter=...
107
+ )
108
+ ```
109
+
110
+ `max_tokens` and the callbacks are runtime configuration, not saved state, so you
111
+ pass them again on restore. The buffer is restored verbatim; the token budget is
112
+ re-applied on the next added message.
113
+
114
+ ## How it works
115
+
116
+ - New turns go into `buffer`.
117
+ - When `buffer` exceeds `max_tokens`, the oldest turns are folded into `summary`
118
+ via `summarize_fn` (or dropped if none is provided).
119
+ - `get_messages() -> list[Message]` returns the buffer with the summary
120
+ prepended as a `system` turn. `get_context() -> str` is the string form of
121
+ the same thing (prompt-ready), so the two never diverge. Neither adds a
122
+ language-specific label — relabel the summary in your own prompt assembly if
123
+ you need to.
124
+
125
+ ## Limitations
126
+
127
+ - **Lossy by design.** Older turns are folded into the summary repeatedly, so
128
+ each pass can blur or drop detail (a "telephone game" effect). Keep
129
+ `max_tokens` large enough that anything you can't afford to lose stays in the
130
+ verbatim buffer.
131
+ - **The summary is not bounded for you.** `max_tokens` limits only the verbatim
132
+ buffer, not the running summary. rollmem hands your `summarize_fn` the current
133
+ summary plus the evicted turns and stores whatever it returns — so keeping the
134
+ summary compact is your `summarize_fn`'s job. If it merely concatenates,
135
+ the summary (and thus `get_context()`) grows without limit. Prompt it to
136
+ compress, or cap the summary length inside the callback.
137
+ - **Only as accurate as your counter.** The default token counter is a rough
138
+ word count; inject a model-accurate one (e.g. `tiktoken`) for real budgets.
139
+ - **In-memory by default.** State lives in memory, but `to_dict()` / `from_dict()`
140
+ let you persist and restore it (see [Persistence](#persistence)). Callbacks are
141
+ not serialized and must be re-injected on restore.
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,115 @@
1
+ # rollmem
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/rollmem.svg)](https://pypi.org/project/rollmem/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/rollmem.svg)](https://pypi.org/project/rollmem/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ Standalone, **dependency-free** rolling conversation memory for LLM apps —
8
+ a running summary plus a recent-message buffer, inspired by LangChain's
9
+ `ConversationSummaryBufferMemory`, but with no LangChain (or any) dependency.
10
+
11
+ Handy for **conversation memory**, **context compression**, **summarization**,
12
+ and **gist**-style long-chat handling — a tiny **LangChain alternative** when
13
+ you only need the summary-buffer pattern.
14
+
15
+ ## Why
16
+
17
+ `ConversationSummaryBufferMemory` is a great pattern: keep recent turns
18
+ verbatim, fold older turns into a running summary so context stays bounded.
19
+ But pulling in all of LangChain just for that is heavy. `rollmem` extracts the
20
+ idea into a tiny, provider-agnostic package. You inject how to summarize and
21
+ how to count tokens — rollmem stays neutral.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install rollmem
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from rollmem import RollingMemory
33
+
34
+ def summarize(existing_summary, messages):
35
+ # plug in any LLM here; return the new summary string
36
+ folded = " ".join(m.content for m in messages)
37
+ return (existing_summary + " " + folded).strip()
38
+
39
+ mem = RollingMemory(
40
+ max_tokens=2000,
41
+ summarize_fn=summarize, # optional; without it, evicted turns are dropped
42
+ # token_counter=... # optional; defaults to a word-count estimate.
43
+ # # In production inject a model-accurate counter, e.g.
44
+ # # token_counter=lambda text: len(enc.encode(text))
45
+ )
46
+
47
+ mem.add_user_message("Hi, I'm planning a trip to Korea.")
48
+ mem.add_assistant_message("Great! When are you going?")
49
+
50
+ print(mem.get_context()) # -> str: summary (if any) + recent buffer, joined
51
+ print(mem.get_messages()) # -> list[Message]: summary prepended as a system turn
52
+ ```
53
+
54
+ `max_tokens` is the budget for the **verbatim recent-message buffer** — not the
55
+ running summary, and not a model's generation `max_tokens` (output limit). When
56
+ the buffer exceeds it, the oldest turns are folded into the summary.
57
+
58
+ `token_counter` takes a single message's text (`str`) and returns an `int`. The
59
+ default is a crude word count — fine for demos, but pass a model-accurate counter
60
+ (such as `tiktoken`) for real token budgets.
61
+
62
+ ## Persistence
63
+
64
+ `to_dict()` / `from_dict()` serialize the memory **state** (running summary plus
65
+ buffer) to and from a plain `dict` — you choose the storage format:
66
+
67
+ ```python
68
+ import json
69
+
70
+ raw = json.dumps(mem.to_dict()) # save anywhere: file, DB column, cache...
71
+
72
+ mem = RollingMemory.from_dict(
73
+ json.loads(raw),
74
+ max_tokens=2000,
75
+ summarize_fn=summarize, # callbacks are NOT serialized — re-inject them
76
+ # token_counter=...
77
+ )
78
+ ```
79
+
80
+ `max_tokens` and the callbacks are runtime configuration, not saved state, so you
81
+ pass them again on restore. The buffer is restored verbatim; the token budget is
82
+ re-applied on the next added message.
83
+
84
+ ## How it works
85
+
86
+ - New turns go into `buffer`.
87
+ - When `buffer` exceeds `max_tokens`, the oldest turns are folded into `summary`
88
+ via `summarize_fn` (or dropped if none is provided).
89
+ - `get_messages() -> list[Message]` returns the buffer with the summary
90
+ prepended as a `system` turn. `get_context() -> str` is the string form of
91
+ the same thing (prompt-ready), so the two never diverge. Neither adds a
92
+ language-specific label — relabel the summary in your own prompt assembly if
93
+ you need to.
94
+
95
+ ## Limitations
96
+
97
+ - **Lossy by design.** Older turns are folded into the summary repeatedly, so
98
+ each pass can blur or drop detail (a "telephone game" effect). Keep
99
+ `max_tokens` large enough that anything you can't afford to lose stays in the
100
+ verbatim buffer.
101
+ - **The summary is not bounded for you.** `max_tokens` limits only the verbatim
102
+ buffer, not the running summary. rollmem hands your `summarize_fn` the current
103
+ summary plus the evicted turns and stores whatever it returns — so keeping the
104
+ summary compact is your `summarize_fn`'s job. If it merely concatenates,
105
+ the summary (and thus `get_context()`) grows without limit. Prompt it to
106
+ compress, or cap the summary length inside the callback.
107
+ - **Only as accurate as your counter.** The default token counter is a rough
108
+ word count; inject a model-accurate one (e.g. `tiktoken`) for real budgets.
109
+ - **In-memory by default.** State lives in memory, but `to_dict()` / `from_dict()`
110
+ let you persist and restore it (see [Persistence](#persistence)). Callbacks are
111
+ not serialized and must be re-injected on restore.
112
+
113
+ ## License
114
+
115
+ MIT
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "rollmem"
7
+ version = "0.0.1"
8
+ description = "Standalone, dependency-free rolling conversation memory (summary + buffer), inspired by LangChain's ConversationSummaryBufferMemory."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "son okmoo", email = "sonokmoo@gmail.com" },
14
+ ]
15
+ keywords = ["llm", "memory", "conversation", "summary", "chatbot", "rag", "langchain"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3 :: Only",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Software Development :: Libraries :: Python Modules",
27
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
28
+ "Typing :: Typed",
29
+ ]
30
+ dependencies = []
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/okdoittttt/rollmem"
34
+ Repository = "https://github.com/okdoittttt/rollmem"
35
+ Issues = "https://github.com/okdoittttt/rollmem/issues"
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "pytest>=7",
40
+ "build",
41
+ "twine",
42
+ ]
43
+
44
+ [tool.hatch.build.targets.wheel]
45
+ packages = ["src/rollmem"]
@@ -0,0 +1,16 @@
1
+ """rollmem — standalone rolling conversation memory (summary + buffer)."""
2
+
3
+ from .memory import RollingMemory, SummarizeFn, TokenCounter
4
+ from .message import ASSISTANT, SYSTEM, USER, Message
5
+
6
+ __all__ = [
7
+ "RollingMemory",
8
+ "Message",
9
+ "SummarizeFn",
10
+ "TokenCounter",
11
+ "USER",
12
+ "ASSISTANT",
13
+ "SYSTEM",
14
+ ]
15
+
16
+ __version__ = "0.0.1"
@@ -0,0 +1,209 @@
1
+ """Core rolling memory: a running summary plus a recent-message buffer.
2
+
3
+ The behaviour mirrors LangChain's ConversationSummaryBufferMemory but with zero
4
+ dependencies. The two things that *would* tie us to an LLM provider — turning
5
+ messages into a summary, and counting tokens — are injected by the caller:
6
+
7
+ summarize_fn(existing_summary, messages_to_fold) -> new_summary
8
+ token_counter(text) -> int
9
+
10
+ This keeps rollmem usable with any model, or with no model at all (e.g. a fake
11
+ counter and a no-op summarizer in tests).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence
17
+
18
+ from .message import ASSISTANT, SYSTEM, USER, Message
19
+
20
+ SummarizeFn = Callable[[str, Sequence[Message]], str]
21
+ TokenCounter = Callable[[str], int]
22
+
23
+ SCHEMA_VERSION = 1
24
+
25
+
26
+ def _default_token_counter(text: str) -> int:
27
+ """Rough word-based estimate used when the caller injects nothing.
28
+
29
+ Intentionally crude — real deployments should pass a model-accurate counter
30
+ (e.g. tiktoken). Good enough to make the buffer roll in tests and demos.
31
+ """
32
+ return len(text.split())
33
+
34
+
35
+ class RollingMemory:
36
+ """Keeps recent turns verbatim and folds older turns into a running summary.
37
+
38
+ The token budget applies only to the verbatim buffer. The running summary
39
+ is whatever ``summarize_fn`` returns and is not bounded here, so keeping it
40
+ compact is the caller's responsibility: a ``summarize_fn`` that compresses
41
+ keeps ``get_context()`` bounded, while one that merely concatenates lets the
42
+ summary grow without limit.
43
+
44
+ Args:
45
+ max_tokens: token budget for the verbatim buffer. When the buffer
46
+ exceeds this, the oldest messages are folded into the summary until
47
+ it fits again. This bounds the buffer only, not the summary, and is
48
+ unrelated to a model's generation ``max_tokens`` (output limit) — it
49
+ is purely the size of the recent-message buffer rollmem keeps.
50
+ summarize_fn: callback that produces an updated summary from the current
51
+ summary plus the messages being evicted. Required to actually
52
+ summarize; if omitted, evicted messages are dropped (buffer-only).
53
+ It should compress, not just append, to keep the summary bounded.
54
+ token_counter: callback returning a token count for a string. Defaults
55
+ to a word-count estimate.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ max_tokens: int = 2000,
61
+ summarize_fn: Optional[SummarizeFn] = None,
62
+ token_counter: Optional[TokenCounter] = None,
63
+ ) -> None:
64
+ if max_tokens <= 0:
65
+ raise ValueError("max_tokens must be positive")
66
+ self.max_tokens = max_tokens
67
+ self._summarize_fn = summarize_fn
68
+ self._token_counter = token_counter or _default_token_counter
69
+ self.summary: str = ""
70
+ self.buffer: List[Message] = []
71
+
72
+ # -- adding turns -----------------------------------------------------
73
+
74
+ def add_message(self, role: str, content: str) -> None:
75
+ self.buffer.append(Message(role=role, content=content))
76
+ self._prune()
77
+
78
+ def add_user_message(self, content: str) -> None:
79
+ self.add_message(USER, content)
80
+
81
+ def add_assistant_message(self, content: str) -> None:
82
+ self.add_message(ASSISTANT, content)
83
+
84
+ def add_system_message(self, content: str) -> None:
85
+ self.add_message(SYSTEM, content)
86
+
87
+ # -- reading back -----------------------------------------------------
88
+
89
+ def get_context(self) -> str:
90
+ """Summary (if any) followed by the verbatim buffer, as one string.
91
+
92
+ This is the string form of :meth:`get_messages`: the running summary,
93
+ when present, is rendered as a leading ``system`` turn so both methods
94
+ expose it identically and stay consistent. No language-specific label
95
+ is added — wrap or relabel the summary in your own prompt assembly if
96
+ you need to.
97
+ """
98
+ return "\n".join(str(m) for m in self.get_messages())
99
+
100
+ def get_messages(self) -> List[Message]:
101
+ """Buffer messages, with the running summary prepended as a system turn."""
102
+ messages: List[Message] = []
103
+ if self.summary:
104
+ messages.append(Message(role=SYSTEM, content=self.summary))
105
+ messages.extend(self.buffer)
106
+ return messages
107
+
108
+ def clear(self) -> None:
109
+ self.summary = ""
110
+ self.buffer.clear()
111
+
112
+ # -- serialization ----------------------------------------------------
113
+
114
+ def to_dict(self) -> Dict[str, Any]:
115
+ """Serialize the memory state to a plain ``dict``.
116
+
117
+ Only conversation state is captured — the running summary and the
118
+ verbatim buffer. The token budget and the injected callbacks are
119
+ considered runtime configuration, not state, so they are not included
120
+ and must be supplied again at :meth:`from_dict` time.
121
+
122
+ Returns:
123
+ A mapping with ``version``, ``summary``, and ``buffer`` keys,
124
+ suitable for JSON serialization (the caller chooses the format).
125
+ """
126
+ return {
127
+ "version": SCHEMA_VERSION,
128
+ "summary": self.summary,
129
+ "buffer": [m.to_dict() for m in self.buffer],
130
+ }
131
+
132
+ @classmethod
133
+ def from_dict(
134
+ cls,
135
+ data: Mapping[str, Any],
136
+ *,
137
+ max_tokens: int = 2000,
138
+ summarize_fn: Optional[SummarizeFn] = None,
139
+ token_counter: Optional[TokenCounter] = None,
140
+ ) -> RollingMemory:
141
+ """Reconstruct a memory from its ``dict`` representation.
142
+
143
+ The buffer is restored verbatim: this does not call the pruning logic,
144
+ so loading never triggers an unexpected ``summarize_fn`` call or drops
145
+ turns. The token budget is re-applied on the next ``add_message``; if
146
+ ``max_tokens`` is smaller than when the state was saved, the buffer may
147
+ momentarily exceed it until the next turn is added.
148
+
149
+ Args:
150
+ data: A mapping produced by :meth:`to_dict`.
151
+ max_tokens: Token budget for the restored buffer. Runtime
152
+ configuration, not part of the saved state.
153
+ summarize_fn: Summarizer callback to re-inject. Callbacks are not
154
+ serialized, so pass it again to keep summarization working.
155
+ token_counter: Token-counter callback to re-inject. Defaults to the
156
+ word-count estimate when omitted.
157
+
158
+ Returns:
159
+ The reconstructed ``RollingMemory``.
160
+
161
+ Raises:
162
+ ValueError: If ``data`` has an unsupported serialization version.
163
+ """
164
+ version = data.get("version")
165
+ if version != SCHEMA_VERSION:
166
+ raise ValueError(f"unsupported serialization version: {version!r}")
167
+ memory = cls(
168
+ max_tokens=max_tokens,
169
+ summarize_fn=summarize_fn,
170
+ token_counter=token_counter,
171
+ )
172
+ memory.summary = data.get("summary", "")
173
+ memory.buffer = [Message.from_dict(m) for m in data.get("buffer", [])]
174
+ return memory
175
+
176
+ # -- internals --------------------------------------------------------
177
+
178
+ def _buffer_tokens(self) -> int:
179
+ return sum(self._token_counter(m.content) for m in self.buffer)
180
+
181
+ def _prune(self) -> None:
182
+ """Fold oldest messages into the summary until the buffer fits budget.
183
+
184
+ Eviction is computed first, then summarized in a single ``summarize_fn``
185
+ call, and only after that succeeds are the messages dropped from the
186
+ buffer. This keeps the summarizer call cheap (one call, not one per
187
+ message) and means a summarizer failure leaves the buffer untouched
188
+ rather than silently losing turns.
189
+ """
190
+ # Figure out how many of the oldest messages must go, without mutating
191
+ # the buffer yet. Always keep at least one message in the buffer.
192
+ tokens = self._buffer_tokens()
193
+ evict_count = 0
194
+ while (
195
+ len(self.buffer) - evict_count > 1
196
+ and tokens > self.max_tokens
197
+ ):
198
+ tokens -= self._token_counter(self.buffer[evict_count].content)
199
+ evict_count += 1
200
+
201
+ if evict_count == 0:
202
+ return
203
+
204
+ evicted = self.buffer[:evict_count]
205
+ if self._summarize_fn is not None:
206
+ # If this raises, we have not touched the buffer yet — no data loss.
207
+ self.summary = self._summarize_fn(self.summary, evicted)
208
+
209
+ del self.buffer[:evict_count]
@@ -0,0 +1,49 @@
1
+ """Provider-agnostic message representation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Dict, Mapping
7
+
8
+ # Conventional roles. rollmem does not enforce these — any string is accepted —
9
+ # but these are the values the built-in helpers (add_user_message, etc.) emit.
10
+ USER = "user"
11
+ ASSISTANT = "assistant"
12
+ SYSTEM = "system"
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Message:
17
+ """A single turn in a conversation.
18
+
19
+ Deliberately minimal so rollmem stays free of any LLM-provider schema.
20
+ Adapters (OpenAI, Anthropic, LangChain, ...) convert to/from this type.
21
+ """
22
+
23
+ role: str
24
+ content: str
25
+
26
+ def __str__(self) -> str:
27
+ return f"{self.role}: {self.content}"
28
+
29
+ def to_dict(self) -> Dict[str, str]:
30
+ """Return a plain ``dict`` representation of this message.
31
+
32
+ Returns:
33
+ A mapping with ``role`` and ``content`` keys, suitable for JSON
34
+ serialization (the caller chooses the serialization format).
35
+ """
36
+ return {"role": self.role, "content": self.content}
37
+
38
+ @classmethod
39
+ def from_dict(cls, data: Mapping[str, str]) -> Message:
40
+ """Reconstruct a message from its ``dict`` representation.
41
+
42
+ Args:
43
+ data: A mapping with ``role`` and ``content`` keys, as produced by
44
+ :meth:`to_dict`.
45
+
46
+ Returns:
47
+ The reconstructed ``Message``.
48
+ """
49
+ return cls(role=data["role"], content=data["content"])
File without changes
@@ -0,0 +1,176 @@
1
+ from rollmem import RollingMemory, Message
2
+
3
+
4
+ def char_counter(text: str) -> int:
5
+ """Deterministic token counter for tests: 1 token per character."""
6
+ return len(text)
7
+
8
+
9
+ def fake_summarizer(existing: str, evicted) -> str:
10
+ folded = " ".join(m.content for m in evicted)
11
+ return (existing + " " + folded).strip()
12
+
13
+
14
+ def test_buffer_only_keeps_recent():
15
+ mem = RollingMemory(max_tokens=10, token_counter=char_counter)
16
+ mem.add_user_message("hello") # 5
17
+ mem.add_assistant_message("world") # 5 -> total 10, fits
18
+ assert len(mem.buffer) == 2
19
+ assert mem.summary == ""
20
+
21
+
22
+ def test_eviction_without_summarizer_drops_messages():
23
+ mem = RollingMemory(max_tokens=6, token_counter=char_counter)
24
+ mem.add_user_message("aaaa") # 4
25
+ mem.add_assistant_message("bbbb") # over budget -> oldest dropped
26
+ assert mem.summary == ""
27
+ assert [m.content for m in mem.buffer] == ["bbbb"]
28
+
29
+
30
+ def test_eviction_folds_into_summary():
31
+ mem = RollingMemory(
32
+ max_tokens=6,
33
+ token_counter=char_counter,
34
+ summarize_fn=fake_summarizer,
35
+ )
36
+ mem.add_user_message("aaaa")
37
+ mem.add_assistant_message("bbbb")
38
+ assert mem.summary == "aaaa"
39
+ assert [m.content for m in mem.buffer] == ["bbbb"]
40
+
41
+
42
+ def test_multiple_evictions_summarize_in_one_call():
43
+ calls = []
44
+
45
+ def counting_summarizer(existing, evicted):
46
+ calls.append(list(evicted))
47
+ return existing + " " + " ".join(m.content for m in evicted)
48
+
49
+ mem = RollingMemory(
50
+ max_tokens=4,
51
+ token_counter=char_counter,
52
+ summarize_fn=counting_summarizer,
53
+ )
54
+ mem.add_user_message("aa") # 2
55
+ mem.add_assistant_message("bb") # 2 -> total 4, fits
56
+ mem.add_user_message("cccc") # pushes total to 8; "aa" and "bb" must go
57
+
58
+ # Both evicted messages summarized together: exactly one summarizer call.
59
+ assert len(calls) == 1
60
+ assert [m.content for m in calls[0]] == ["aa", "bb"]
61
+ assert [m.content for m in mem.buffer] == ["cccc"]
62
+
63
+
64
+ def test_summarizer_failure_does_not_lose_messages():
65
+ def failing_summarizer(existing, evicted):
66
+ raise RuntimeError("LLM unavailable")
67
+
68
+ mem = RollingMemory(
69
+ max_tokens=4,
70
+ token_counter=char_counter,
71
+ summarize_fn=failing_summarizer,
72
+ )
73
+ mem.add_user_message("aaaa") # 4, fits
74
+ import pytest
75
+
76
+ with pytest.raises(RuntimeError):
77
+ mem.add_assistant_message("bbbb") # triggers prune -> summarizer raises
78
+
79
+ # Buffer untouched, nothing summarized: no data loss.
80
+ assert mem.summary == ""
81
+ assert [m.content for m in mem.buffer] == ["aaaa", "bbbb"]
82
+
83
+
84
+ def test_get_context_includes_summary():
85
+ mem = RollingMemory(
86
+ max_tokens=6,
87
+ token_counter=char_counter,
88
+ summarize_fn=fake_summarizer,
89
+ )
90
+ mem.add_user_message("aaaa")
91
+ mem.add_assistant_message("bbbb")
92
+ ctx = mem.get_context()
93
+ # Summary is rendered as a leading system turn, matching get_messages.
94
+ assert "system: aaaa" in ctx
95
+ assert "assistant: bbbb" in ctx
96
+ # get_context is exactly the string form of get_messages.
97
+ assert ctx == "\n".join(str(m) for m in mem.get_messages())
98
+
99
+
100
+ def test_clear():
101
+ mem = RollingMemory(max_tokens=6, token_counter=char_counter)
102
+ mem.add_user_message("x")
103
+ mem.clear()
104
+ assert mem.buffer == []
105
+ assert mem.summary == ""
106
+
107
+
108
+ def test_invalid_max_tokens():
109
+ import pytest
110
+
111
+ with pytest.raises(ValueError):
112
+ RollingMemory(max_tokens=0)
113
+
114
+
115
+ def test_message_round_trips():
116
+ msg = Message(role="user", content="hello")
117
+ assert msg.to_dict() == {"role": "user", "content": "hello"}
118
+ assert Message.from_dict(msg.to_dict()) == msg
119
+
120
+
121
+ def test_memory_to_dict_shape():
122
+ mem = RollingMemory(max_tokens=100, token_counter=char_counter)
123
+ mem.add_user_message("hi")
124
+ mem.add_assistant_message("yo")
125
+
126
+ data = mem.to_dict()
127
+ assert set(data) == {"version", "summary", "buffer"}
128
+ assert data["version"] == 1
129
+ assert all(isinstance(entry, dict) for entry in data["buffer"])
130
+ assert [entry["content"] for entry in data["buffer"]] == ["hi", "yo"]
131
+
132
+
133
+ def test_memory_round_trips():
134
+ mem = RollingMemory(
135
+ max_tokens=6,
136
+ token_counter=char_counter,
137
+ summarize_fn=fake_summarizer,
138
+ )
139
+ mem.add_user_message("aaaa")
140
+ mem.add_assistant_message("bbbb") # evicts "aaaa" into the summary
141
+ assert mem.summary == "aaaa"
142
+ assert [m.content for m in mem.buffer] == ["bbbb"]
143
+
144
+ restored = RollingMemory.from_dict(
145
+ mem.to_dict(),
146
+ max_tokens=6,
147
+ token_counter=char_counter,
148
+ summarize_fn=fake_summarizer,
149
+ )
150
+ assert restored.summary == mem.summary
151
+ assert [m.content for m in restored.buffer] == [m.content for m in mem.buffer]
152
+
153
+
154
+ def test_from_dict_restores_working_memory():
155
+ saved = {"version": 1, "summary": "earlier", "buffer": [{"role": "user", "content": "bbbb"}]}
156
+
157
+ mem = RollingMemory.from_dict(
158
+ saved,
159
+ max_tokens=6,
160
+ token_counter=char_counter,
161
+ summarize_fn=fake_summarizer,
162
+ )
163
+ # Buffer restored verbatim; budget not enforced until the next add.
164
+ assert [m.content for m in mem.buffer] == ["bbbb"]
165
+
166
+ # Re-injected callback is live: a new turn triggers eviction + summary.
167
+ mem.add_assistant_message("cccc")
168
+ assert mem.summary == "earlier bbbb"
169
+ assert [m.content for m in mem.buffer] == ["cccc"]
170
+
171
+
172
+ def test_from_dict_rejects_unknown_version():
173
+ import pytest
174
+
175
+ with pytest.raises(ValueError):
176
+ RollingMemory.from_dict({"version": 999, "summary": "", "buffer": []})