PyPI - coffloader - Versions diffs - 0.1.0__tar.gz - Mend

coffloader 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

coffloader-0.1.0/.gitignore +42 -0
coffloader-0.1.0/CHANGELOG.md +15 -0
coffloader-0.1.0/LICENSE +21 -0
coffloader-0.1.0/PKG-INFO +201 -0
coffloader-0.1.0/README.md +172 -0
coffloader-0.1.0/pyproject.toml +48 -0
coffloader-0.1.0/src/coffloader/__init__.py +17 -0
coffloader-0.1.0/src/coffloader/backends/__init__.py +8 -0
coffloader-0.1.0/src/coffloader/backends/base.py +23 -0
coffloader-0.1.0/src/coffloader/backends/composite.py +41 -0
coffloader-0.1.0/src/coffloader/backends/local.py +37 -0
coffloader-0.1.0/src/coffloader/backends/memory.py +25 -0
coffloader-0.1.0/src/coffloader/index/__init__.py +15 -0
coffloader-0.1.0/src/coffloader/index/embeddings.py +181 -0
coffloader-0.1.0/src/coffloader/index/fts.py +218 -0
coffloader-0.1.0/src/coffloader/index/hybrid.py +80 -0
coffloader-0.1.0/src/coffloader/py.typed +0 -0
coffloader-0.1.0/src/coffloader/store.py +279 -0
coffloader-0.1.0/src/coffloader/toc.py +67 -0
coffloader-0.1.0/tests/__init__.py +0 -0
coffloader-0.1.0/tests/test_basic.py +153 -0
coffloader-0.1.0/tests/test_conversation_simulator.py +241 -0
coffloader-0.1.0/tests/test_stage3_agent.py +190 -0

coffloader-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,42 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+dist/
+*.egg-info/
+*.egg
+.eggs/
+# Virtual environments
+.venv/
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.mypy_cache/
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+*.db
+# Secrets
+.env
+# Build artifacts
+*.whl
+*.tar.gz

coffloader-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,15 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+## [0.1.0] - 2026-06-07
+### Added
+- Initial release
+- `Coffloader` main class with `write`, `search`, `read`, `inspect`, `delete` methods
+- `MemoryBackend`, `LocalBackend`, `CompositeBackend` for flexible storage
+- SQLite FTS5 index for BM25 keyword search
+- Optional semantic search with sentence-transformers (`pip install coffloader[embed]`)
+- Hybrid search combining BM25 + embeddings via Reciprocal Rank Fusion
+- Size limits with reject or metadata-only modes
+- Namespace filtering for multi-session/multi-agent isolation

coffloader-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 coffloader contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

coffloader-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,201 @@
+Metadata-Version: 2.4
+Name: coffloader
+Version: 0.1.0
+Summary: External memory for AI agents — offload context to a VFS, index summaries, retrieve on demand.
+Project-URL: Homepage, https://github.com/mingyk/coffloader
+Project-URL: Repository, https://github.com/mingyk/coffloader
+Author: coffloader contributors
+License-Expression: MIT
+License-File: LICENSE
+Keywords: agent,context,llm,memory,rag,vfs
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Provides-Extra: dev
+Requires-Dist: mypy>=1.10; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff>=0.4; extra == 'dev'
+Provides-Extra: embed
+Requires-Dist: numpy>=1.21; extra == 'embed'
+Requires-Dist: sentence-transformers>=2.2; extra == 'embed'
+Description-Content-Type: text/markdown
+# coffloader
+**External memory for AI agents** — offload context to a VFS, index caller-provided summaries, retrieve on demand.
+[![Python](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![Status](https://img.shields.io/badge/status-pre--alpha-orange.svg)](#status)
+```bash
+pip install coffloader              # core (BM25 search)
+pip install coffloader[embed]       # + semantic search (sentence-transformers)
+```
+---
+## What it does
+Agents accumulate context faster than any window allows. coffloader offloads content to storage, keeps a searchable index of summaries, and retrieves full content on demand.
+```
+write(content, summary) → store blob + index summary
+search(query)           → top-k summaries + addresses
+read(address)           → full content
+```
+**Key constraints:**
+- `summary` is **required** on write — your agent/LLM provides it, not coffloader
+- No LLM calls inside the library — pure storage and retrieval
+- Caller handles contradiction detection, dedup, and reasoning
+---
+## Quick start
+```python
+from coffloader import Coffloader
+store = Coffloader()
+# 1. Offload a conversation segment (summary comes from your agent)
+store.write(
+    content="[Turn 1] User: I was charged twice for order #9910...",
+    summary="Customer reports duplicate charge on order #9910",
+    metadata={"session_id": "ticket_8842", "segment": 1},
+    path="/sessions/ticket_8842/seg_001.txt",
+)
+# 2. Later: search when user asks about earlier context
+hits = store.search("order number", namespace="/sessions/ticket_8842/")
+# 3. Load full content and inject into your LLM
+text = store.read_text(hits[0].address)
+```
+**The loop:** offload cold context → search when needed → read and inject.
+---
+## API
+```python
+store = Coffloader(
+    backend=None,           # default: in-memory VFS
+    max_bytes=512_000,      # default: 512 KB — reject oversized payloads
+    on_oversize="reject",   # "reject" or "metadata_only"
+    hybrid=True,            # default: True — use BM25 + embeddings if available
+    min_similarity=0.3,     # default: 0.3 — filter out weak embedding matches
+                            # lower = more results, less relevant
+                            # higher = fewer results, more relevant
+                            # set to 0.0 to disable filtering
+)
+# Store content with a caller-provided summary
+result = store.write(content, summary, metadata={}, path=None)
+# Search indexed summaries (returns TocEntry list, not full content)
+hits = store.search(query, k=5, filters={}, namespace=None)
+#                         ^^^ number of results to return
+# Load full content
+data = store.read(address)          # bytes
+text = store.read_text(address)     # str
+# Check size before writing
+check = store.inspect(content)      # .acceptable, .byte_count
+# Delete
+store.delete(address)
+```
+**Defaults are exposed as class attributes:**
+```python
+Coffloader.DEFAULT_MAX_BYTES       # 512_000
+Coffloader.DEFAULT_MIN_SIMILARITY  # 0.3
+```
+---
+## Composite backends
+Route paths to different storage:
+```python
+from coffloader import Coffloader, CompositeBackend, LocalBackend, MemoryBackend
+store = Coffloader(
+    backend=CompositeBackend(
+        default=MemoryBackend(),
+        routes={"/archive/": LocalBackend(root="./data")},
+    )
+)
+```
+---
+## Patterns
+**Long session (segmented):** Offload every ~15 turns. Search returns precise segments, not the whole transcript.
+```python
+store.write(content=turns_1_15, summary="...", path="/sessions/abc/seg_001.txt")
+store.write(content=turns_16_30, summary="...", path="/sessions/abc/seg_002.txt")
+```
+**Tool output:** Offload large grep/API results with a structural summary (no LLM needed).
+```python
+store.write(
+    content=grep_output,
+    summary=f"grep error src/ → {n} matches",
+    path=f"/active/{session}/tool_001.txt",
+)
+```
+**Multi-agent:** Use namespaces for isolation (`/agent/{id}/`) or sharing (`/shared/`).
+---
+## Limits
+- Max payload: 512 KB by default (configurable)
+- Oversized content is rejected or recorded as metadata-only
+- No silent truncation
+---
+## Status
+Pre-alpha. Core API is stable: `write`, `search`, `read`, `inspect`, `delete`.
+**Working:**
+- BM25 (keyword) search via SQLite FTS5
+- Semantic search via `[embed]` optional extra
+- Hybrid search (BM25 + embeddings) with Reciprocal Rank Fusion
+**Not yet implemented:**
+- Persistent index to disk
+- Sharded TOC for large corpora
+---
+## Non-goals
+- LLM calls from the library
+- Automatic dedup, contradiction detection, or memory merge
+- Knowledge graphs or hierarchical rollups
+---
+## License
+MIT

coffloader-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,172 @@
+# coffloader
+**External memory for AI agents** — offload context to a VFS, index caller-provided summaries, retrieve on demand.
+[![Python](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![Status](https://img.shields.io/badge/status-pre--alpha-orange.svg)](#status)
+```bash
+pip install coffloader              # core (BM25 search)
+pip install coffloader[embed]       # + semantic search (sentence-transformers)
+```
+---
+## What it does
+Agents accumulate context faster than any window allows. coffloader offloads content to storage, keeps a searchable index of summaries, and retrieves full content on demand.
+```
+write(content, summary) → store blob + index summary
+search(query)           → top-k summaries + addresses
+read(address)           → full content
+```
+**Key constraints:**
+- `summary` is **required** on write — your agent/LLM provides it, not coffloader
+- No LLM calls inside the library — pure storage and retrieval
+- Caller handles contradiction detection, dedup, and reasoning
+---
+## Quick start
+```python
+from coffloader import Coffloader
+store = Coffloader()
+# 1. Offload a conversation segment (summary comes from your agent)
+store.write(
+    content="[Turn 1] User: I was charged twice for order #9910...",
+    summary="Customer reports duplicate charge on order #9910",
+    metadata={"session_id": "ticket_8842", "segment": 1},
+    path="/sessions/ticket_8842/seg_001.txt",
+)
+# 2. Later: search when user asks about earlier context
+hits = store.search("order number", namespace="/sessions/ticket_8842/")
+# 3. Load full content and inject into your LLM
+text = store.read_text(hits[0].address)
+```
+**The loop:** offload cold context → search when needed → read and inject.
+---
+## API
+```python
+store = Coffloader(
+    backend=None,           # default: in-memory VFS
+    max_bytes=512_000,      # default: 512 KB — reject oversized payloads
+    on_oversize="reject",   # "reject" or "metadata_only"
+    hybrid=True,            # default: True — use BM25 + embeddings if available
+    min_similarity=0.3,     # default: 0.3 — filter out weak embedding matches
+                            # lower = more results, less relevant
+                            # higher = fewer results, more relevant
+                            # set to 0.0 to disable filtering
+)
+# Store content with a caller-provided summary
+result = store.write(content, summary, metadata={}, path=None)
+# Search indexed summaries (returns TocEntry list, not full content)
+hits = store.search(query, k=5, filters={}, namespace=None)
+#                         ^^^ number of results to return
+# Load full content
+data = store.read(address)          # bytes
+text = store.read_text(address)     # str
+# Check size before writing
+check = store.inspect(content)      # .acceptable, .byte_count
+# Delete
+store.delete(address)
+```
+**Defaults are exposed as class attributes:**
+```python
+Coffloader.DEFAULT_MAX_BYTES       # 512_000
+Coffloader.DEFAULT_MIN_SIMILARITY  # 0.3
+```
+---
+## Composite backends
+Route paths to different storage:
+```python
+from coffloader import Coffloader, CompositeBackend, LocalBackend, MemoryBackend
+store = Coffloader(
+    backend=CompositeBackend(
+        default=MemoryBackend(),
+        routes={"/archive/": LocalBackend(root="./data")},
+    )
+)
+```
+---
+## Patterns
+**Long session (segmented):** Offload every ~15 turns. Search returns precise segments, not the whole transcript.
+```python
+store.write(content=turns_1_15, summary="...", path="/sessions/abc/seg_001.txt")
+store.write(content=turns_16_30, summary="...", path="/sessions/abc/seg_002.txt")
+```
+**Tool output:** Offload large grep/API results with a structural summary (no LLM needed).
+```python
+store.write(
+    content=grep_output,
+    summary=f"grep error src/ → {n} matches",
+    path=f"/active/{session}/tool_001.txt",
+)
+```
+**Multi-agent:** Use namespaces for isolation (`/agent/{id}/`) or sharing (`/shared/`).
+---
+## Limits
+- Max payload: 512 KB by default (configurable)
+- Oversized content is rejected or recorded as metadata-only
+- No silent truncation
+---
+## Status
+Pre-alpha. Core API is stable: `write`, `search`, `read`, `inspect`, `delete`.
+**Working:**
+- BM25 (keyword) search via SQLite FTS5
+- Semantic search via `[embed]` optional extra
+- Hybrid search (BM25 + embeddings) with Reciprocal Rank Fusion
+**Not yet implemented:**
+- Persistent index to disk
+- Sharded TOC for large corpora
+---
+## Non-goals
+- LLM calls from the library
+- Automatic dedup, contradiction detection, or memory merge
+- Knowledge graphs or hierarchical rollups
+---
+## License
+MIT

coffloader-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,48 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "coffloader"
+version = "0.1.0"
+description = "External memory for AI agents — offload context to a VFS, index summaries, retrieve on demand."
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.9"
+authors = [{ name = "coffloader contributors" }]
+keywords = ["llm", "agent", "memory", "context", "rag", "vfs"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = []
+[project.optional-dependencies]
+embed = ["sentence-transformers>=2.2", "numpy>=1.21"]
+dev = ["pytest>=8.0", "ruff>=0.4", "mypy>=1.10"]
+[project.urls]
+Homepage = "https://github.com/mingyk/coffloader"
+Repository = "https://github.com/mingyk/coffloader"
+[tool.hatch.build.targets.wheel]
+packages = ["src/coffloader"]
+[tool.ruff]
+line-length = 100
+target-version = "py39"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP"]
+[tool.mypy]
+python_version = "3.9"
+strict = true

coffloader-0.1.0/src/coffloader/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""coffloader — External memory for AI agents."""
+from .backends import CompositeBackend, LocalBackend, MemoryBackend
+from .store import Coffloader
+from .toc import InspectResult, TocEntry, WriteResult
+__version__ = "0.1.0"
+__all__ = [
+    "Coffloader",
+    "TocEntry",
+    "WriteResult",
+    "InspectResult",
+    "CompositeBackend",
+    "LocalBackend",
+    "MemoryBackend",
+]

coffloader-0.1.0/src/coffloader/backends/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Storage backends for coffloader."""
+from .base import BackendProtocol
+from .composite import CompositeBackend
+from .local import LocalBackend
+from .memory import MemoryBackend
+__all__ = ["BackendProtocol", "CompositeBackend", "LocalBackend", "MemoryBackend"]

coffloader-0.1.0/src/coffloader/backends/base.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Backend protocol for VFS storage."""
+from typing import Protocol
+class BackendProtocol(Protocol):
+    """Interface for storage backends."""
+    def write(self, path: str, data: bytes) -> None:
+        """Store data at the given path."""
+        ...
+    def read(self, path: str) -> bytes:
+        """Read data from the given path. Raises KeyError if not found."""
+        ...
+    def delete(self, path: str) -> bool:
+        """Delete data at the given path. Returns True if deleted, False if not found."""
+        ...
+    def exists(self, path: str) -> bool:
+        """Check if path exists."""
+        ...

coffloader-0.1.0/src/coffloader/backends/composite.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Composite backend that routes by path prefix."""
+from __future__ import annotations
+from .base import BackendProtocol
+from .memory import MemoryBackend
+class CompositeBackend:
+    """Route paths to different backends based on prefix.
+    Longest matching prefix wins. Unmatched paths go to the default backend.
+    """
+    def __init__(
+        self,
+        default: BackendProtocol | None = None,
+        routes: dict[str, BackendProtocol] | None = None,
+    ) -> None:
+        self._default: BackendProtocol = default or MemoryBackend()
+        self._routes = routes or {}
+        # Sort routes by length descending for longest-prefix matching
+        self._sorted_prefixes = sorted(self._routes.keys(), key=len, reverse=True)
+    def _get_backend(self, path: str) -> BackendProtocol:
+        for prefix in self._sorted_prefixes:
+            if path.startswith(prefix):
+                return self._routes[prefix]
+        return self._default
+    def write(self, path: str, data: bytes) -> None:
+        self._get_backend(path).write(path, data)
+    def read(self, path: str) -> bytes:
+        return self._get_backend(path).read(path)
+    def delete(self, path: str) -> bool:
+        return self._get_backend(path).delete(path)
+    def exists(self, path: str) -> bool:
+        return self._get_backend(path).exists(path)

coffloader-0.1.0/src/coffloader/backends/local.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Local filesystem storage backend."""
+from pathlib import Path
+class LocalBackend:
+    """Store blobs on local disk under a root directory."""
+    def __init__(self, root: str | Path) -> None:
+        self._root = Path(root).resolve()
+        self._root.mkdir(parents=True, exist_ok=True)
+    def _resolve(self, path: str) -> Path:
+        # Strip leading slash for joining
+        relative = path.lstrip("/")
+        return self._root / relative
+    def write(self, path: str, data: bytes) -> None:
+        file_path = self._resolve(path)
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_bytes(data)
+    def read(self, path: str) -> bytes:
+        file_path = self._resolve(path)
+        if not file_path.exists():
+            raise KeyError(f"Path not found: {path}")
+        return file_path.read_bytes()
+    def delete(self, path: str) -> bool:
+        file_path = self._resolve(path)
+        if file_path.exists():
+            file_path.unlink()
+            return True
+        return False
+    def exists(self, path: str) -> bool:
+        return self._resolve(path).exists()

coffloader-0.1.0/src/coffloader/backends/memory.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""In-memory storage backend."""
+class MemoryBackend:
+    """Store blobs in a Python dict. Data lost on process exit."""
+    def __init__(self) -> None:
+        self._store: dict[str, bytes] = {}
+    def write(self, path: str, data: bytes) -> None:
+        self._store[path] = data
+    def read(self, path: str) -> bytes:
+        if path not in self._store:
+            raise KeyError(f"Path not found: {path}")
+        return self._store[path]
+    def delete(self, path: str) -> bool:
+        if path in self._store:
+            del self._store[path]
+            return True
+        return False
+    def exists(self, path: str) -> bool:
+        return path in self._store

coffloader-0.1.0/src/coffloader/index/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Index implementations for TOC search."""
+from .fts import FTSIndex
+# Optional imports for embedding-based search
+try:
+    from .embeddings import EmbeddingIndex
+    from .hybrid import HybridIndex
+    EMBEDDINGS_AVAILABLE = True
+except ImportError:
+    EMBEDDINGS_AVAILABLE = False
+    EmbeddingIndex = None  # type: ignore
+    HybridIndex = None  # type: ignore
+__all__ = ["FTSIndex", "EmbeddingIndex", "HybridIndex", "EMBEDDINGS_AVAILABLE"]