rollmem 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rollmem/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """rollmem — standalone rolling conversation memory (summary + buffer)."""
2
+
3
+ from .memory import RollingMemory, SummarizeFn, TokenCounter
4
+ from .message import ASSISTANT, SYSTEM, USER, Message
5
+
6
+ __all__ = [
7
+ "RollingMemory",
8
+ "Message",
9
+ "SummarizeFn",
10
+ "TokenCounter",
11
+ "USER",
12
+ "ASSISTANT",
13
+ "SYSTEM",
14
+ ]
15
+
16
+ __version__ = "0.0.1"
rollmem/memory.py ADDED
@@ -0,0 +1,209 @@
1
+ """Core rolling memory: a running summary plus a recent-message buffer.
2
+
3
+ The behaviour mirrors LangChain's ConversationSummaryBufferMemory but with zero
4
+ dependencies. The two things that *would* tie us to an LLM provider — turning
5
+ messages into a summary, and counting tokens — are injected by the caller:
6
+
7
+ summarize_fn(existing_summary, messages_to_fold) -> new_summary
8
+ token_counter(text) -> int
9
+
10
+ This keeps rollmem usable with any model, or with no model at all (e.g. a fake
11
+ counter and a no-op summarizer in tests).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence
17
+
18
+ from .message import ASSISTANT, SYSTEM, USER, Message
19
+
20
+ SummarizeFn = Callable[[str, Sequence[Message]], str]
21
+ TokenCounter = Callable[[str], int]
22
+
23
+ SCHEMA_VERSION = 1
24
+
25
+
26
+ def _default_token_counter(text: str) -> int:
27
+ """Rough word-based estimate used when the caller injects nothing.
28
+
29
+ Intentionally crude — real deployments should pass a model-accurate counter
30
+ (e.g. tiktoken). Good enough to make the buffer roll in tests and demos.
31
+ """
32
+ return len(text.split())
33
+
34
+
35
+ class RollingMemory:
36
+ """Keeps recent turns verbatim and folds older turns into a running summary.
37
+
38
+ The token budget applies only to the verbatim buffer. The running summary
39
+ is whatever ``summarize_fn`` returns and is not bounded here, so keeping it
40
+ compact is the caller's responsibility: a ``summarize_fn`` that compresses
41
+ keeps ``get_context()`` bounded, while one that merely concatenates lets the
42
+ summary grow without limit.
43
+
44
+ Args:
45
+ max_tokens: token budget for the verbatim buffer. When the buffer
46
+ exceeds this, the oldest messages are folded into the summary until
47
+ it fits again. This bounds the buffer only, not the summary, and is
48
+ unrelated to a model's generation ``max_tokens`` (output limit) — it
49
+ is purely the size of the recent-message buffer rollmem keeps.
50
+ summarize_fn: callback that produces an updated summary from the current
51
+ summary plus the messages being evicted. Required to actually
52
+ summarize; if omitted, evicted messages are dropped (buffer-only).
53
+ It should compress, not just append, to keep the summary bounded.
54
+ token_counter: callback returning a token count for a string. Defaults
55
+ to a word-count estimate.
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ max_tokens: int = 2000,
61
+ summarize_fn: Optional[SummarizeFn] = None,
62
+ token_counter: Optional[TokenCounter] = None,
63
+ ) -> None:
64
+ if max_tokens <= 0:
65
+ raise ValueError("max_tokens must be positive")
66
+ self.max_tokens = max_tokens
67
+ self._summarize_fn = summarize_fn
68
+ self._token_counter = token_counter or _default_token_counter
69
+ self.summary: str = ""
70
+ self.buffer: List[Message] = []
71
+
72
+ # -- adding turns -----------------------------------------------------
73
+
74
+ def add_message(self, role: str, content: str) -> None:
75
+ self.buffer.append(Message(role=role, content=content))
76
+ self._prune()
77
+
78
+ def add_user_message(self, content: str) -> None:
79
+ self.add_message(USER, content)
80
+
81
+ def add_assistant_message(self, content: str) -> None:
82
+ self.add_message(ASSISTANT, content)
83
+
84
+ def add_system_message(self, content: str) -> None:
85
+ self.add_message(SYSTEM, content)
86
+
87
+ # -- reading back -----------------------------------------------------
88
+
89
+ def get_context(self) -> str:
90
+ """Summary (if any) followed by the verbatim buffer, as one string.
91
+
92
+ This is the string form of :meth:`get_messages`: the running summary,
93
+ when present, is rendered as a leading ``system`` turn so both methods
94
+ expose it identically and stay consistent. No language-specific label
95
+ is added — wrap or relabel the summary in your own prompt assembly if
96
+ you need to.
97
+ """
98
+ return "\n".join(str(m) for m in self.get_messages())
99
+
100
+ def get_messages(self) -> List[Message]:
101
+ """Buffer messages, with the running summary prepended as a system turn."""
102
+ messages: List[Message] = []
103
+ if self.summary:
104
+ messages.append(Message(role=SYSTEM, content=self.summary))
105
+ messages.extend(self.buffer)
106
+ return messages
107
+
108
+ def clear(self) -> None:
109
+ self.summary = ""
110
+ self.buffer.clear()
111
+
112
+ # -- serialization ----------------------------------------------------
113
+
114
+ def to_dict(self) -> Dict[str, Any]:
115
+ """Serialize the memory state to a plain ``dict``.
116
+
117
+ Only conversation state is captured — the running summary and the
118
+ verbatim buffer. The token budget and the injected callbacks are
119
+ considered runtime configuration, not state, so they are not included
120
+ and must be supplied again at :meth:`from_dict` time.
121
+
122
+ Returns:
123
+ A mapping with ``version``, ``summary``, and ``buffer`` keys,
124
+ suitable for JSON serialization (the caller chooses the format).
125
+ """
126
+ return {
127
+ "version": SCHEMA_VERSION,
128
+ "summary": self.summary,
129
+ "buffer": [m.to_dict() for m in self.buffer],
130
+ }
131
+
132
+ @classmethod
133
+ def from_dict(
134
+ cls,
135
+ data: Mapping[str, Any],
136
+ *,
137
+ max_tokens: int = 2000,
138
+ summarize_fn: Optional[SummarizeFn] = None,
139
+ token_counter: Optional[TokenCounter] = None,
140
+ ) -> RollingMemory:
141
+ """Reconstruct a memory from its ``dict`` representation.
142
+
143
+ The buffer is restored verbatim: this does not call the pruning logic,
144
+ so loading never triggers an unexpected ``summarize_fn`` call or drops
145
+ turns. The token budget is re-applied on the next ``add_message``; if
146
+ ``max_tokens`` is smaller than when the state was saved, the buffer may
147
+ momentarily exceed it until the next turn is added.
148
+
149
+ Args:
150
+ data: A mapping produced by :meth:`to_dict`.
151
+ max_tokens: Token budget for the restored buffer. Runtime
152
+ configuration, not part of the saved state.
153
+ summarize_fn: Summarizer callback to re-inject. Callbacks are not
154
+ serialized, so pass it again to keep summarization working.
155
+ token_counter: Token-counter callback to re-inject. Defaults to the
156
+ word-count estimate when omitted.
157
+
158
+ Returns:
159
+ The reconstructed ``RollingMemory``.
160
+
161
+ Raises:
162
+ ValueError: If ``data`` has an unsupported serialization version.
163
+ """
164
+ version = data.get("version")
165
+ if version != SCHEMA_VERSION:
166
+ raise ValueError(f"unsupported serialization version: {version!r}")
167
+ memory = cls(
168
+ max_tokens=max_tokens,
169
+ summarize_fn=summarize_fn,
170
+ token_counter=token_counter,
171
+ )
172
+ memory.summary = data.get("summary", "")
173
+ memory.buffer = [Message.from_dict(m) for m in data.get("buffer", [])]
174
+ return memory
175
+
176
+ # -- internals --------------------------------------------------------
177
+
178
+ def _buffer_tokens(self) -> int:
179
+ return sum(self._token_counter(m.content) for m in self.buffer)
180
+
181
+ def _prune(self) -> None:
182
+ """Fold oldest messages into the summary until the buffer fits budget.
183
+
184
+ Eviction is computed first, then summarized in a single ``summarize_fn``
185
+ call, and only after that succeeds are the messages dropped from the
186
+ buffer. This keeps the summarizer call cheap (one call, not one per
187
+ message) and means a summarizer failure leaves the buffer untouched
188
+ rather than silently losing turns.
189
+ """
190
+ # Figure out how many of the oldest messages must go, without mutating
191
+ # the buffer yet. Always keep at least one message in the buffer.
192
+ tokens = self._buffer_tokens()
193
+ evict_count = 0
194
+ while (
195
+ len(self.buffer) - evict_count > 1
196
+ and tokens > self.max_tokens
197
+ ):
198
+ tokens -= self._token_counter(self.buffer[evict_count].content)
199
+ evict_count += 1
200
+
201
+ if evict_count == 0:
202
+ return
203
+
204
+ evicted = self.buffer[:evict_count]
205
+ if self._summarize_fn is not None:
206
+ # If this raises, we have not touched the buffer yet — no data loss.
207
+ self.summary = self._summarize_fn(self.summary, evicted)
208
+
209
+ del self.buffer[:evict_count]
rollmem/message.py ADDED
@@ -0,0 +1,49 @@
1
+ """Provider-agnostic message representation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Dict, Mapping
7
+
8
+ # Conventional roles. rollmem does not enforce these — any string is accepted —
9
+ # but these are the values the built-in helpers (add_user_message, etc.) emit.
10
+ USER = "user"
11
+ ASSISTANT = "assistant"
12
+ SYSTEM = "system"
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Message:
17
+ """A single turn in a conversation.
18
+
19
+ Deliberately minimal so rollmem stays free of any LLM-provider schema.
20
+ Adapters (OpenAI, Anthropic, LangChain, ...) convert to/from this type.
21
+ """
22
+
23
+ role: str
24
+ content: str
25
+
26
+ def __str__(self) -> str:
27
+ return f"{self.role}: {self.content}"
28
+
29
+ def to_dict(self) -> Dict[str, str]:
30
+ """Return a plain ``dict`` representation of this message.
31
+
32
+ Returns:
33
+ A mapping with ``role`` and ``content`` keys, suitable for JSON
34
+ serialization (the caller chooses the serialization format).
35
+ """
36
+ return {"role": self.role, "content": self.content}
37
+
38
+ @classmethod
39
+ def from_dict(cls, data: Mapping[str, str]) -> Message:
40
+ """Reconstruct a message from its ``dict`` representation.
41
+
42
+ Args:
43
+ data: A mapping with ``role`` and ``content`` keys, as produced by
44
+ :meth:`to_dict`.
45
+
46
+ Returns:
47
+ The reconstructed ``Message``.
48
+ """
49
+ return cls(role=data["role"], content=data["content"])
rollmem/py.typed ADDED
File without changes
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: rollmem
3
+ Version: 0.0.1
4
+ Summary: Standalone, dependency-free rolling conversation memory (summary + buffer), inspired by LangChain's ConversationSummaryBufferMemory.
5
+ Project-URL: Homepage, https://github.com/okdoittttt/rollmem
6
+ Project-URL: Repository, https://github.com/okdoittttt/rollmem
7
+ Project-URL: Issues, https://github.com/okdoittttt/rollmem/issues
8
+ Author-email: son okmoo <sonokmoo@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: chatbot,conversation,langchain,llm,memory,rag,summary
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.9
25
+ Provides-Extra: dev
26
+ Requires-Dist: build; extra == 'dev'
27
+ Requires-Dist: pytest>=7; extra == 'dev'
28
+ Requires-Dist: twine; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # rollmem
32
+
33
+ [![PyPI version](https://img.shields.io/pypi/v/rollmem.svg)](https://pypi.org/project/rollmem/)
34
+ [![Python versions](https://img.shields.io/pypi/pyversions/rollmem.svg)](https://pypi.org/project/rollmem/)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
36
+
37
+ Standalone, **dependency-free** rolling conversation memory for LLM apps —
38
+ a running summary plus a recent-message buffer, inspired by LangChain's
39
+ `ConversationSummaryBufferMemory`, but with no LangChain (or any) dependency.
40
+
41
+ Handy for **conversation memory**, **context compression**, **summarization**,
42
+ and **gist**-style long-chat handling — a tiny **LangChain alternative** when
43
+ you only need the summary-buffer pattern.
44
+
45
+ ## Why
46
+
47
+ `ConversationSummaryBufferMemory` is a great pattern: keep recent turns
48
+ verbatim, fold older turns into a running summary so context stays bounded.
49
+ But pulling in all of LangChain just for that is heavy. `rollmem` extracts the
50
+ idea into a tiny, provider-agnostic package. You inject how to summarize and
51
+ how to count tokens — rollmem stays neutral.
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install rollmem
57
+ ```
58
+
59
+ ## Usage
60
+
61
+ ```python
62
+ from rollmem import RollingMemory
63
+
64
+ def summarize(existing_summary, messages):
65
+ # plug in any LLM here; return the new summary string
66
+ folded = " ".join(m.content for m in messages)
67
+ return (existing_summary + " " + folded).strip()
68
+
69
+ mem = RollingMemory(
70
+ max_tokens=2000,
71
+ summarize_fn=summarize, # optional; without it, evicted turns are dropped
72
+ # token_counter=... # optional; defaults to a word-count estimate.
73
+ # # In production inject a model-accurate counter, e.g.
74
+ # # token_counter=lambda text: len(enc.encode(text))
75
+ )
76
+
77
+ mem.add_user_message("Hi, I'm planning a trip to Korea.")
78
+ mem.add_assistant_message("Great! When are you going?")
79
+
80
+ print(mem.get_context()) # -> str: summary (if any) + recent buffer, joined
81
+ print(mem.get_messages()) # -> list[Message]: summary prepended as a system turn
82
+ ```
83
+
84
+ `max_tokens` is the budget for the **verbatim recent-message buffer** — not the
85
+ running summary, and not a model's generation `max_tokens` (output limit). When
86
+ the buffer exceeds it, the oldest turns are folded into the summary.
87
+
88
+ `token_counter` takes a single message's text (`str`) and returns an `int`. The
89
+ default is a crude word count — fine for demos, but pass a model-accurate counter
90
+ (such as `tiktoken`) for real token budgets.
91
+
92
+ ## Persistence
93
+
94
+ `to_dict()` / `from_dict()` serialize the memory **state** (running summary plus
95
+ buffer) to and from a plain `dict` — you choose the storage format:
96
+
97
+ ```python
98
+ import json
99
+
100
+ raw = json.dumps(mem.to_dict()) # save anywhere: file, DB column, cache...
101
+
102
+ mem = RollingMemory.from_dict(
103
+ json.loads(raw),
104
+ max_tokens=2000,
105
+ summarize_fn=summarize, # callbacks are NOT serialized — re-inject them
106
+ # token_counter=...
107
+ )
108
+ ```
109
+
110
+ `max_tokens` and the callbacks are runtime configuration, not saved state, so you
111
+ pass them again on restore. The buffer is restored verbatim; the token budget is
112
+ re-applied on the next added message.
113
+
114
+ ## How it works
115
+
116
+ - New turns go into `buffer`.
117
+ - When `buffer` exceeds `max_tokens`, the oldest turns are folded into `summary`
118
+ via `summarize_fn` (or dropped if none is provided).
119
+ - `get_messages() -> list[Message]` returns the buffer with the summary
120
+ prepended as a `system` turn. `get_context() -> str` is the string form of
121
+ the same thing (prompt-ready), so the two never diverge. Neither adds a
122
+ language-specific label — relabel the summary in your own prompt assembly if
123
+ you need to.
124
+
125
+ ## Limitations
126
+
127
+ - **Lossy by design.** Older turns are folded into the summary repeatedly, so
128
+ each pass can blur or drop detail (a "telephone game" effect). Keep
129
+ `max_tokens` large enough that anything you can't afford to lose stays in the
130
+ verbatim buffer.
131
+ - **The summary is not bounded for you.** `max_tokens` limits only the verbatim
132
+ buffer, not the running summary. rollmem hands your `summarize_fn` the current
133
+ summary plus the evicted turns and stores whatever it returns — so keeping the
134
+ summary compact is your `summarize_fn`'s job. If it merely concatenates,
135
+ the summary (and thus `get_context()`) grows without limit. Prompt it to
136
+ compress, or cap the summary length inside the callback.
137
+ - **Only as accurate as your counter.** The default token counter is a rough
138
+ word count; inject a model-accurate one (e.g. `tiktoken`) for real budgets.
139
+ - **In-memory by default.** State lives in memory, but `to_dict()` / `from_dict()`
140
+ let you persist and restore it (see [Persistence](#persistence)). Callbacks are
141
+ not serialized and must be re-injected on restore.
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,8 @@
1
+ rollmem/__init__.py,sha256=Xe6PMgyVT8TskAl5i544e5Tu5iT5jQzMXMfY9vbcKSs,349
2
+ rollmem/memory.py,sha256=bdGncefZWw33solluDxGIChJr1Y1YuFBHxyO5dBCdwM,8498
3
+ rollmem/message.py,sha256=8uamVUMQpdfY-mzNS-8aXDT3E8kbRJswV2_mJP4RwG4,1478
4
+ rollmem/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ rollmem-0.0.1.dist-info/METADATA,sha256=mKHVFX_gw90xT7pvNsm811B_AZ7p6ij0Exm2sEbTPjg,6135
6
+ rollmem-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
7
+ rollmem-0.0.1.dist-info/licenses/LICENSE,sha256=NBdRD4nIdWtt1GhhF6q-UKm15N-snL7ZKs3lF_RZ_9Y,1078
8
+ rollmem-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 손옥무 | son okmoo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.