langchain-mimir 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_mimir-0.1.0/.github/workflows/publish.yml +45 -0
- langchain_mimir-0.1.0/.github/workflows/test.yml +28 -0
- langchain_mimir-0.1.0/.gitignore +13 -0
- langchain_mimir-0.1.0/LICENSE +21 -0
- langchain_mimir-0.1.0/PKG-INFO +125 -0
- langchain_mimir-0.1.0/README.md +97 -0
- langchain_mimir-0.1.0/langchain_mimir/__init__.py +34 -0
- langchain_mimir-0.1.0/langchain_mimir/client.py +327 -0
- langchain_mimir-0.1.0/langchain_mimir/integration.py +189 -0
- langchain_mimir-0.1.0/langchain_mimir/py.typed +0 -0
- langchain_mimir-0.1.0/pyproject.toml +42 -0
- langchain_mimir-0.1.0/tests/test_langchain_mimir.py +336 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.12'
|
|
18
|
+
|
|
19
|
+
- name: Install build
|
|
20
|
+
run: pip install build==1.2.2.post1
|
|
21
|
+
|
|
22
|
+
- name: Build
|
|
23
|
+
run: python -m build
|
|
24
|
+
|
|
25
|
+
- uses: actions/upload-artifact@v4
|
|
26
|
+
with:
|
|
27
|
+
name: dist
|
|
28
|
+
path: dist/
|
|
29
|
+
|
|
30
|
+
publish:
|
|
31
|
+
needs: build
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
environment:
|
|
34
|
+
name: pypi
|
|
35
|
+
url: https://pypi.org/p/langchain-mimir
|
|
36
|
+
permissions:
|
|
37
|
+
id-token: write
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/download-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: dist
|
|
42
|
+
path: dist/
|
|
43
|
+
|
|
44
|
+
- name: Publish to PyPI
|
|
45
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
16
|
+
python-version: ['3.10', '3.12']
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install
|
|
25
|
+
run: pip install -e ".[test]"
|
|
26
|
+
|
|
27
|
+
- name: Run tests
|
|
28
|
+
run: python -m pytest tests/ -q
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Perseus Computing LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-mimir
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Mimir persistent, local, encrypted memory for LangChain — tools and a retriever backed by the Mimir MCP engine.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Perseus-Computing-LLC/langchain-mimir
|
|
6
|
+
Project-URL: Repository, https://github.com/Perseus-Computing-LLC/langchain-mimir
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/Perseus-Computing-LLC/langchain-mimir/issues
|
|
8
|
+
Project-URL: Mimir, https://github.com/Perseus-Computing-LLC/mimir
|
|
9
|
+
Author-email: Perseus Computing LLC <hermes@perseus.observer>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agents,langchain,llm,mcp,memory,mimir,retriever
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: langchain-core>=0.3.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# langchain-mimir
|
|
30
|
+
|
|
31
|
+
Persistent, local-first, encrypted memory for [LangChain](https://www.langchain.com/),
|
|
32
|
+
backed by [Mimir](https://github.com/Perseus-Computing-LLC/mimir) — an open-source
|
|
33
|
+
(MIT) memory engine with FTS5 + dense hybrid search and optional AES-256-GCM
|
|
34
|
+
encryption, exposed over the Model Context Protocol (MCP) stdio transport.
|
|
35
|
+
|
|
36
|
+
It gives a LangChain agent durable memory that survives across runs and
|
|
37
|
+
processes, stored in a single local SQLite file you control — no external
|
|
38
|
+
service, no cloud.
|
|
39
|
+
|
|
40
|
+
## What you get
|
|
41
|
+
|
|
42
|
+
This package wraps Mimir using the modern `langchain-core` interfaces:
|
|
43
|
+
|
|
44
|
+
- **`create_mimir_tools(client)`** — a pair of `StructuredTool`s
|
|
45
|
+
(`mimir_remember` / `mimir_recall`) you give to an agent so it can manage its
|
|
46
|
+
own long-term memory via tool calls. This is the current-recommended LangChain
|
|
47
|
+
pattern (the legacy `Memory` / `ConversationBufferMemory` classes are
|
|
48
|
+
deprecated).
|
|
49
|
+
- **`MimirRetriever`** — a `BaseRetriever` returning `Document`s, for drop-in use
|
|
50
|
+
in RAG chains and anywhere LangChain accepts a retriever (`.invoke(query)`).
|
|
51
|
+
- **`MimirClient`** — the low-level MCP stdio client, if you want direct access.
|
|
52
|
+
|
|
53
|
+
## Prerequisite: the `mimir` binary
|
|
54
|
+
|
|
55
|
+
This package talks to a local `mimir` executable via JSON-RPC over stdio. You
|
|
56
|
+
must have it installed:
|
|
57
|
+
|
|
58
|
+
- Download a release from
|
|
59
|
+
<https://github.com/Perseus-Computing-LLC/mimir/releases>, or build from source
|
|
60
|
+
(`cargo build --release`), and put `mimir` on your `$PATH`.
|
|
61
|
+
- Or pass an absolute path: `MimirClient(mimir_binary="/path/to/mimir")`.
|
|
62
|
+
|
|
63
|
+
On Windows the binary may be named `mimir.exe`; ensure its directory is on
|
|
64
|
+
`PATH`, or pass the full path.
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install langchain-mimir
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
### As agent tools
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from langchain_mimir import MimirClient, create_mimir_tools
|
|
78
|
+
|
|
79
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
80
|
+
tools = create_mimir_tools(client) # [mimir_remember, mimir_recall]
|
|
81
|
+
|
|
82
|
+
# Bind to any tool-calling model / agent:
|
|
83
|
+
from langchain.chat_models import init_chat_model
|
|
84
|
+
|
|
85
|
+
llm = init_chat_model("anthropic:claude-sonnet-4-5")
|
|
86
|
+
llm_with_memory = llm.bind_tools(tools)
|
|
87
|
+
|
|
88
|
+
resp = llm_with_memory.invoke("Remember that my favorite language is Rust.")
|
|
89
|
+
# ... the model will call mimir_remember; execute the tool call as usual.
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### As a retriever
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from langchain_mimir import MimirClient, MimirRetriever
|
|
96
|
+
|
|
97
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
98
|
+
client.remember("The capital of France is Paris.")
|
|
99
|
+
|
|
100
|
+
retriever = MimirRetriever(client=client, k=5)
|
|
101
|
+
docs = retriever.invoke("What is the capital of France?")
|
|
102
|
+
print(docs[0].page_content) # -> "The capital of France is Paris."
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Direct client
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from langchain_mimir import MimirClient
|
|
109
|
+
|
|
110
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
111
|
+
client.remember("Project deadline is July 15.", tags=["project", "deadline"])
|
|
112
|
+
items = client.recall("when is the deadline")
|
|
113
|
+
print(items[0]["text"])
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## How it works
|
|
117
|
+
|
|
118
|
+
`MimirClient` spawns `mimir --db <path>` as a subprocess and speaks JSON-RPC 2.0
|
|
119
|
+
(MCP) over its stdin/stdout. A background reader thread and a lock make calls
|
|
120
|
+
thread-safe and timeout-bounded. Memories are stored via `mimir_remember` and
|
|
121
|
+
retrieved via `mimir_recall`.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT © 2026 Perseus Computing LLC
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# langchain-mimir
|
|
2
|
+
|
|
3
|
+
Persistent, local-first, encrypted memory for [LangChain](https://www.langchain.com/),
|
|
4
|
+
backed by [Mimir](https://github.com/Perseus-Computing-LLC/mimir) — an open-source
|
|
5
|
+
(MIT) memory engine with FTS5 + dense hybrid search and optional AES-256-GCM
|
|
6
|
+
encryption, exposed over the Model Context Protocol (MCP) stdio transport.
|
|
7
|
+
|
|
8
|
+
It gives a LangChain agent durable memory that survives across runs and
|
|
9
|
+
processes, stored in a single local SQLite file you control — no external
|
|
10
|
+
service, no cloud.
|
|
11
|
+
|
|
12
|
+
## What you get
|
|
13
|
+
|
|
14
|
+
This package wraps Mimir using the modern `langchain-core` interfaces:
|
|
15
|
+
|
|
16
|
+
- **`create_mimir_tools(client)`** — a pair of `StructuredTool`s
|
|
17
|
+
(`mimir_remember` / `mimir_recall`) you give to an agent so it can manage its
|
|
18
|
+
own long-term memory via tool calls. This is the current-recommended LangChain
|
|
19
|
+
pattern (the legacy `Memory` / `ConversationBufferMemory` classes are
|
|
20
|
+
deprecated).
|
|
21
|
+
- **`MimirRetriever`** — a `BaseRetriever` returning `Document`s, for drop-in use
|
|
22
|
+
in RAG chains and anywhere LangChain accepts a retriever (`.invoke(query)`).
|
|
23
|
+
- **`MimirClient`** — the low-level MCP stdio client, if you want direct access.
|
|
24
|
+
|
|
25
|
+
## Prerequisite: the `mimir` binary
|
|
26
|
+
|
|
27
|
+
This package talks to a local `mimir` executable via JSON-RPC over stdio. You
|
|
28
|
+
must have it installed:
|
|
29
|
+
|
|
30
|
+
- Download a release from
|
|
31
|
+
<https://github.com/Perseus-Computing-LLC/mimir/releases>, or build from source
|
|
32
|
+
(`cargo build --release`), and put `mimir` on your `$PATH`.
|
|
33
|
+
- Or pass an absolute path: `MimirClient(mimir_binary="/path/to/mimir")`.
|
|
34
|
+
|
|
35
|
+
On Windows the binary may be named `mimir.exe`; ensure its directory is on
|
|
36
|
+
`PATH`, or pass the full path.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install langchain-mimir
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
### As agent tools
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from langchain_mimir import MimirClient, create_mimir_tools
|
|
50
|
+
|
|
51
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
52
|
+
tools = create_mimir_tools(client) # [mimir_remember, mimir_recall]
|
|
53
|
+
|
|
54
|
+
# Bind to any tool-calling model / agent:
|
|
55
|
+
from langchain.chat_models import init_chat_model
|
|
56
|
+
|
|
57
|
+
llm = init_chat_model("anthropic:claude-sonnet-4-5")
|
|
58
|
+
llm_with_memory = llm.bind_tools(tools)
|
|
59
|
+
|
|
60
|
+
resp = llm_with_memory.invoke("Remember that my favorite language is Rust.")
|
|
61
|
+
# ... the model will call mimir_remember; execute the tool call as usual.
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### As a retriever
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from langchain_mimir import MimirClient, MimirRetriever
|
|
68
|
+
|
|
69
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
70
|
+
client.remember("The capital of France is Paris.")
|
|
71
|
+
|
|
72
|
+
retriever = MimirRetriever(client=client, k=5)
|
|
73
|
+
docs = retriever.invoke("What is the capital of France?")
|
|
74
|
+
print(docs[0].page_content) # -> "The capital of France is Paris."
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Direct client
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from langchain_mimir import MimirClient
|
|
81
|
+
|
|
82
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
83
|
+
client.remember("Project deadline is July 15.", tags=["project", "deadline"])
|
|
84
|
+
items = client.recall("when is the deadline")
|
|
85
|
+
print(items[0]["text"])
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## How it works
|
|
89
|
+
|
|
90
|
+
`MimirClient` spawns `mimir --db <path>` as a subprocess and speaks JSON-RPC 2.0
|
|
91
|
+
(MCP) over its stdin/stdout. A background reader thread and a lock make calls
|
|
92
|
+
thread-safe and timeout-bounded. Memories are stored via `mimir_remember` and
|
|
93
|
+
retrieved via `mimir_recall`.
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
MIT © 2026 Perseus Computing LLC
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""langchain-mimir — Mimir persistent memory for LangChain.
|
|
2
|
+
|
|
3
|
+
Mimir (github.com/Perseus-Computing-LLC/mimir) is an open-source (MIT),
|
|
4
|
+
local-first, encrypted persistent memory engine that speaks MCP JSON-RPC over
|
|
5
|
+
stdio. This package exposes it to LangChain via the modern ``langchain-core``
|
|
6
|
+
interfaces:
|
|
7
|
+
|
|
8
|
+
- :class:`MimirClient` — low-level stdio client for the ``mimir`` binary.
|
|
9
|
+
- :func:`create_mimir_tools` — ``StructuredTool``s (remember / recall) for agents.
|
|
10
|
+
- :class:`MimirRetriever` — a ``BaseRetriever`` for RAG chains.
|
|
11
|
+
|
|
12
|
+
Requirements:
|
|
13
|
+
A ``mimir`` binary must be on ``$PATH`` or passed via ``mimir_binary=``.
|
|
14
|
+
Download from https://github.com/Perseus-Computing-LLC/mimir/releases
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .client import MimirClient, MimirError
|
|
18
|
+
from .integration import (
|
|
19
|
+
MimirRetriever,
|
|
20
|
+
create_mimir_tools,
|
|
21
|
+
create_recall_tool,
|
|
22
|
+
create_remember_tool,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"MimirClient",
|
|
29
|
+
"MimirError",
|
|
30
|
+
"MimirRetriever",
|
|
31
|
+
"create_mimir_tools",
|
|
32
|
+
"create_remember_tool",
|
|
33
|
+
"create_recall_tool",
|
|
34
|
+
]
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Mimir MCP stdio client.
|
|
2
|
+
|
|
3
|
+
Spawns a local ``mimir`` binary and speaks JSON-RPC 2.0 over its stdin/stdout
|
|
4
|
+
(MCP stdio transport). This is the low-level transport reused by the LangChain
|
|
5
|
+
tools and retriever; it has no LangChain dependency of its own.
|
|
6
|
+
|
|
7
|
+
The subprocess/JSON-RPC machinery here is adapted from the proven
|
|
8
|
+
``adk-mimir-memory`` client (github.com/Perseus-Computing-LLC/adk-mimir-memory):
|
|
9
|
+
a background reader thread pumps stdout lines into a queue so RPC calls can wait
|
|
10
|
+
with a timeout and correlate responses by id, and a lock serializes
|
|
11
|
+
request/response exchanges so they never interleave.
|
|
12
|
+
|
|
13
|
+
Requirements:
|
|
14
|
+
A ``mimir`` binary must be on ``$PATH`` or passed explicitly via
|
|
15
|
+
``mimir_binary``. Download from:
|
|
16
|
+
https://github.com/Perseus-Computing-LLC/mimir/releases
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import atexit
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import queue
|
|
25
|
+
import shutil
|
|
26
|
+
import subprocess
|
|
27
|
+
import threading
|
|
28
|
+
import time
|
|
29
|
+
|
|
30
|
+
__all__ = ["MimirClient", "MimirError"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class MimirError(RuntimeError):
|
|
34
|
+
"""Raised when the Mimir subprocess errors, crashes, or times out."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MimirClient:
|
|
38
|
+
"""Thread-safe JSON-RPC client for a local Mimir MCP stdio server.
|
|
39
|
+
|
|
40
|
+
Starts ``mimir --db <db_path>`` as a subprocess, performs the MCP
|
|
41
|
+
``initialize`` handshake, and exposes :meth:`call_tool` for invoking any of
|
|
42
|
+
Mimir's MCP tools (``mimir_remember``, ``mimir_recall``, ...).
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
db_path: Filesystem path to the Mimir SQLite database.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
db_path: str = "~/.langchain/mimir.db",
|
|
51
|
+
mimir_binary: str = "mimir",
|
|
52
|
+
timeout_s: float = 30.0,
|
|
53
|
+
encryption_key: str | None = None,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Initializes and starts the Mimir client.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
db_path: Path to the Mimir database file. Defaults to
|
|
59
|
+
``~/.langchain/mimir.db``.
|
|
60
|
+
mimir_binary: Name or absolute path of the ``mimir`` executable.
|
|
61
|
+
Defaults to ``mimir`` (resolved from ``$PATH``).
|
|
62
|
+
timeout_s: Maximum time to wait for any single RPC response.
|
|
63
|
+
encryption_key: Optional path to an AES-256-GCM key file; if given,
|
|
64
|
+
passed to the binary via ``--encryption-key``.
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
MimirError: If the binary cannot be found or the subprocess fails to
|
|
68
|
+
start or complete the MCP handshake.
|
|
69
|
+
"""
|
|
70
|
+
self.db_path = os.path.expanduser(db_path)
|
|
71
|
+
self._timeout_s = timeout_s
|
|
72
|
+
|
|
73
|
+
# Resolve the mimir binary.
|
|
74
|
+
if os.path.isabs(mimir_binary) and os.path.exists(mimir_binary):
|
|
75
|
+
self._mimir_binary = mimir_binary
|
|
76
|
+
else:
|
|
77
|
+
resolved = shutil.which(mimir_binary)
|
|
78
|
+
if resolved is None and os.path.exists(mimir_binary):
|
|
79
|
+
resolved = mimir_binary
|
|
80
|
+
if resolved is None:
|
|
81
|
+
raise MimirError(
|
|
82
|
+
f"mimir binary not found (looked for '{mimir_binary}'). "
|
|
83
|
+
"Install Mimir from "
|
|
84
|
+
"https://github.com/Perseus-Computing-LLC/mimir/releases "
|
|
85
|
+
"or pass the absolute path via mimir_binary=."
|
|
86
|
+
)
|
|
87
|
+
self._mimir_binary = resolved
|
|
88
|
+
|
|
89
|
+
# Ensure the database directory exists.
|
|
90
|
+
os.makedirs(os.path.dirname(self.db_path) or ".", exist_ok=True)
|
|
91
|
+
|
|
92
|
+
argv = [self._mimir_binary, "--db", self.db_path]
|
|
93
|
+
if encryption_key:
|
|
94
|
+
argv += ["--encryption-key", encryption_key]
|
|
95
|
+
|
|
96
|
+
# Start the MCP stdio subprocess. stderr is discarded: nothing drains
|
|
97
|
+
# it, so a chatty server filling the OS pipe buffer would block on its
|
|
98
|
+
# stderr write while we wait on stdout (a two-pipe deadlock).
|
|
99
|
+
try:
|
|
100
|
+
self._proc = subprocess.Popen(
|
|
101
|
+
argv,
|
|
102
|
+
stdin=subprocess.PIPE,
|
|
103
|
+
stdout=subprocess.PIPE,
|
|
104
|
+
stderr=subprocess.DEVNULL,
|
|
105
|
+
text=True,
|
|
106
|
+
)
|
|
107
|
+
except OSError as e:
|
|
108
|
+
raise MimirError(f"failed to start mimir subprocess: {e}") from e
|
|
109
|
+
|
|
110
|
+
self._lock = threading.Lock()
|
|
111
|
+
self._request_id = 0
|
|
112
|
+
self._closed = False
|
|
113
|
+
|
|
114
|
+
# Background reader: pump stdout lines into a queue so _rpc can wait with
|
|
115
|
+
# a timeout and correlate responses by id, rather than blocking forever
|
|
116
|
+
# on a bare readline().
|
|
117
|
+
self._recv: queue.Queue = queue.Queue()
|
|
118
|
+
proc_stdout = self._proc.stdout
|
|
119
|
+
|
|
120
|
+
def _pump() -> None:
|
|
121
|
+
try:
|
|
122
|
+
for line in proc_stdout:
|
|
123
|
+
self._recv.put(line)
|
|
124
|
+
except Exception:
|
|
125
|
+
pass
|
|
126
|
+
finally:
|
|
127
|
+
self._recv.put(None) # EOF sentinel
|
|
128
|
+
|
|
129
|
+
self._reader = threading.Thread(target=_pump, daemon=True)
|
|
130
|
+
self._reader.start()
|
|
131
|
+
|
|
132
|
+
# MCP handshake: initialize, then the required initialized notification
|
|
133
|
+
# before any tools/call.
|
|
134
|
+
self._rpc(
|
|
135
|
+
"initialize",
|
|
136
|
+
{
|
|
137
|
+
"protocolVersion": "2024-11-05",
|
|
138
|
+
"capabilities": {},
|
|
139
|
+
"clientInfo": {"name": "langchain-mimir", "version": "0.1.0"},
|
|
140
|
+
},
|
|
141
|
+
)
|
|
142
|
+
self._notify("notifications/initialized", {})
|
|
143
|
+
|
|
144
|
+
atexit.register(self.close)
|
|
145
|
+
|
|
146
|
+
# ── lifecycle ──────────────────────────────────────────────────────────
|
|
147
|
+
|
|
148
|
+
def close(self) -> None:
|
|
149
|
+
"""Terminates the Mimir subprocess. Safe to call multiple times."""
|
|
150
|
+
if self._closed:
|
|
151
|
+
return
|
|
152
|
+
self._closed = True
|
|
153
|
+
try:
|
|
154
|
+
self._proc.terminate()
|
|
155
|
+
self._proc.wait(timeout=5)
|
|
156
|
+
except Exception:
|
|
157
|
+
try:
|
|
158
|
+
self._proc.kill()
|
|
159
|
+
except Exception:
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
def __enter__(self) -> "MimirClient":
|
|
163
|
+
return self
|
|
164
|
+
|
|
165
|
+
def __exit__(self, *exc) -> None:
|
|
166
|
+
self.close()
|
|
167
|
+
|
|
168
|
+
# ── JSON-RPC core ──────────────────────────────────────────────────────
|
|
169
|
+
|
|
170
|
+
def _next_id(self) -> int:
|
|
171
|
+
self._request_id += 1
|
|
172
|
+
return self._request_id
|
|
173
|
+
|
|
174
|
+
def _rpc(self, method: str, params: object) -> dict:
|
|
175
|
+
"""Sends a JSON-RPC request and returns the ``result`` dict.
|
|
176
|
+
|
|
177
|
+
The lock is held for the whole exchange so request/response pairs never
|
|
178
|
+
interleave. Replies with a non-matching id (notifications, stale
|
|
179
|
+
replies) are skipped.
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
MimirError: On transport failure, RPC error, or timeout.
|
|
183
|
+
"""
|
|
184
|
+
with self._lock:
|
|
185
|
+
req_id = self._next_id()
|
|
186
|
+
req = {
|
|
187
|
+
"jsonrpc": "2.0",
|
|
188
|
+
"id": req_id,
|
|
189
|
+
"method": method,
|
|
190
|
+
"params": params,
|
|
191
|
+
}
|
|
192
|
+
payload = json.dumps(req, default=str)
|
|
193
|
+
try:
|
|
194
|
+
self._proc.stdin.write(payload + "\n")
|
|
195
|
+
self._proc.stdin.flush()
|
|
196
|
+
except (BrokenPipeError, OSError) as e:
|
|
197
|
+
raise MimirError(
|
|
198
|
+
f"mimir communication failed: {e}. The process may have crashed."
|
|
199
|
+
) from e
|
|
200
|
+
|
|
201
|
+
deadline = time.monotonic() + self._timeout_s
|
|
202
|
+
while True:
|
|
203
|
+
remaining = deadline - time.monotonic()
|
|
204
|
+
if remaining <= 0:
|
|
205
|
+
raise MimirError(
|
|
206
|
+
f"mimir RPC '{method}' timed out after {self._timeout_s}s."
|
|
207
|
+
)
|
|
208
|
+
try:
|
|
209
|
+
raw = self._recv.get(timeout=remaining)
|
|
210
|
+
except queue.Empty:
|
|
211
|
+
raise MimirError(
|
|
212
|
+
f"mimir RPC '{method}' timed out after {self._timeout_s}s."
|
|
213
|
+
)
|
|
214
|
+
if raw is None:
|
|
215
|
+
raise MimirError(
|
|
216
|
+
"mimir closed its output stream (it may have crashed)."
|
|
217
|
+
)
|
|
218
|
+
raw = raw.strip()
|
|
219
|
+
if not raw:
|
|
220
|
+
continue
|
|
221
|
+
try:
|
|
222
|
+
resp = json.loads(raw)
|
|
223
|
+
except json.JSONDecodeError:
|
|
224
|
+
continue # non-JSON noise on stdout
|
|
225
|
+
if resp.get("id") != req_id:
|
|
226
|
+
continue # notification or a stale/other reply
|
|
227
|
+
|
|
228
|
+
if "error" in resp:
|
|
229
|
+
err = resp["error"]
|
|
230
|
+
raise MimirError(
|
|
231
|
+
f"mimir RPC error [{err.get('code')}]: {err.get('message')}"
|
|
232
|
+
)
|
|
233
|
+
return resp.get("result", {})
|
|
234
|
+
|
|
235
|
+
def _notify(self, method: str, params: object) -> None:
|
|
236
|
+
"""Sends a JSON-RPC notification (no id, no response expected)."""
|
|
237
|
+
payload = json.dumps({"jsonrpc": "2.0", "method": method, "params": params})
|
|
238
|
+
with self._lock:
|
|
239
|
+
try:
|
|
240
|
+
self._proc.stdin.write(payload + "\n")
|
|
241
|
+
self._proc.stdin.flush()
|
|
242
|
+
except (BrokenPipeError, OSError):
|
|
243
|
+
pass
|
|
244
|
+
|
|
245
|
+
# ── public API ─────────────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
def call_tool(self, name: str, arguments: dict) -> dict:
|
|
248
|
+
"""Calls a Mimir MCP tool and returns its structured result.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
name: The Mimir tool name (e.g. ``mimir_remember``).
|
|
252
|
+
arguments: The tool's arguments dict.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
The tool's ``structuredContent`` if present, otherwise the parsed
|
|
256
|
+
text content, otherwise ``{}``.
|
|
257
|
+
"""
|
|
258
|
+
result = self._rpc("tools/call", {"name": name, "arguments": arguments})
|
|
259
|
+
# MCP result: {content: [{type: "text", text: "..."}], structuredContent: {...}}
|
|
260
|
+
sc = result.get("structuredContent")
|
|
261
|
+
if sc is not None:
|
|
262
|
+
return sc
|
|
263
|
+
content = result.get("content", [])
|
|
264
|
+
if content:
|
|
265
|
+
try:
|
|
266
|
+
return json.loads(content[0].get("text", "{}"))
|
|
267
|
+
except (json.JSONDecodeError, IndexError, KeyError, AttributeError):
|
|
268
|
+
pass
|
|
269
|
+
return {}
|
|
270
|
+
|
|
271
|
+
# convenience wrappers ---------------------------------------------------
|
|
272
|
+
|
|
273
|
+
def remember(
|
|
274
|
+
self,
|
|
275
|
+
text: str,
|
|
276
|
+
*,
|
|
277
|
+
category: str = "langchain-memory",
|
|
278
|
+
key: str | None = None,
|
|
279
|
+
tags: list[str] | None = None,
|
|
280
|
+
extra_body: dict | None = None,
|
|
281
|
+
) -> dict:
|
|
282
|
+
"""Stores a memory. Returns the ``mimir_remember`` result.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
text: The natural-language memory content.
|
|
286
|
+
category: Mimir category (namespace) for the entity.
|
|
287
|
+
key: Stable key within the category; autogenerated if omitted.
|
|
288
|
+
Reusing a key updates that entity (idempotent upsert).
|
|
289
|
+
tags: Optional tags stored on the entity.
|
|
290
|
+
extra_body: Extra fields merged into the stored JSON body.
|
|
291
|
+
"""
|
|
292
|
+
if key is None:
|
|
293
|
+
key = f"mem-{int(time.time() * 1000)}-{self._next_id()}"
|
|
294
|
+
body = {"text": text}
|
|
295
|
+
if extra_body:
|
|
296
|
+
body.update(extra_body)
|
|
297
|
+
args = {
|
|
298
|
+
"category": category,
|
|
299
|
+
"key": key,
|
|
300
|
+
"body_json": json.dumps(body),
|
|
301
|
+
}
|
|
302
|
+
if tags:
|
|
303
|
+
args["tags"] = tags
|
|
304
|
+
return self.call_tool("mimir_remember", args)
|
|
305
|
+
|
|
306
|
+
def recall(
|
|
307
|
+
self,
|
|
308
|
+
query: str,
|
|
309
|
+
*,
|
|
310
|
+
limit: int = 5,
|
|
311
|
+
category: str | None = None,
|
|
312
|
+
) -> list[dict]:
|
|
313
|
+
"""Searches memories. Returns the list of raw Mimir items.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
query: Natural-language / keyword query (FTS5; terms OR'd).
|
|
317
|
+
limit: Maximum number of items to return.
|
|
318
|
+
category: Optional category to scope the search.
|
|
319
|
+
"""
|
|
320
|
+
args: dict = {"query": query, "limit": limit}
|
|
321
|
+
if category is not None:
|
|
322
|
+
args["category"] = category
|
|
323
|
+
result = self.call_tool("mimir_recall", args)
|
|
324
|
+
items = result.get("items")
|
|
325
|
+
if items is None:
|
|
326
|
+
items = result.get("results", [])
|
|
327
|
+
return items or []
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""LangChain integration surface for the Mimir memory engine.
|
|
2
|
+
|
|
3
|
+
Two complementary, current-recommended ``langchain-core`` surfaces are exposed:
|
|
4
|
+
|
|
5
|
+
1. **Tools** — :func:`create_mimir_tools` returns a pair of ``StructuredTool``s
|
|
6
|
+
(``mimir_remember`` / ``mimir_recall``) that an agent can call to persist and
|
|
7
|
+
retrieve long-term memory. Tool-calling is the modern LangChain pattern for
|
|
8
|
+
giving an agent agency over its own memory (the legacy ``Memory`` /
|
|
9
|
+
``ConversationBufferMemory`` classes are deprecated).
|
|
10
|
+
|
|
11
|
+
2. **Retriever** — :class:`MimirRetriever` is a ``BaseRetriever`` that turns a
|
|
12
|
+
query into ``Document`` objects, for drop-in use in RAG chains and anywhere a
|
|
13
|
+
LangChain retriever is accepted (``.invoke(query)``).
|
|
14
|
+
|
|
15
|
+
Both are thin wrappers over :class:`langchain_mimir.client.MimirClient`.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
|
24
|
+
from langchain_core.documents import Document
|
|
25
|
+
from langchain_core.retrievers import BaseRetriever
|
|
26
|
+
from langchain_core.tools import StructuredTool
|
|
27
|
+
from pydantic import BaseModel, Field
|
|
28
|
+
|
|
29
|
+
from .client import MimirClient
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"MimirRetriever",
|
|
33
|
+
"create_mimir_tools",
|
|
34
|
+
"create_remember_tool",
|
|
35
|
+
"create_recall_tool",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ── helpers ──────────────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _item_to_text(item: dict) -> str:
|
|
43
|
+
"""Extracts the best human-readable text from a Mimir recall item."""
|
|
44
|
+
text = item.get("text")
|
|
45
|
+
if text:
|
|
46
|
+
return text
|
|
47
|
+
body = item.get("body_json")
|
|
48
|
+
if isinstance(body, str):
|
|
49
|
+
try:
|
|
50
|
+
body = json.loads(body)
|
|
51
|
+
except json.JSONDecodeError:
|
|
52
|
+
return body
|
|
53
|
+
if isinstance(body, dict):
|
|
54
|
+
return body.get("text") or body.get("content") or json.dumps(body)
|
|
55
|
+
return ""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _item_to_document(item: dict) -> Document:
|
|
59
|
+
"""Converts a raw Mimir recall item into a LangChain ``Document``."""
|
|
60
|
+
return Document(
|
|
61
|
+
page_content=_item_to_text(item),
|
|
62
|
+
metadata={
|
|
63
|
+
"id": item.get("id"),
|
|
64
|
+
"category": item.get("category"),
|
|
65
|
+
"key": item.get("key"),
|
|
66
|
+
"tags": item.get("tags", []),
|
|
67
|
+
"decay_score": item.get("decay_score"),
|
|
68
|
+
"created_at_unix_ms": item.get("created_at_unix_ms"),
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ── retriever ────────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MimirRetriever(BaseRetriever):
|
|
77
|
+
"""Retriever backed by Mimir's FTS5 keyword search.
|
|
78
|
+
|
|
79
|
+
Example::
|
|
80
|
+
|
|
81
|
+
from langchain_mimir import MimirClient, MimirRetriever
|
|
82
|
+
|
|
83
|
+
client = MimirClient(db_path="~/.langchain/mimir.db")
|
|
84
|
+
client.remember("The capital of France is Paris.")
|
|
85
|
+
|
|
86
|
+
retriever = MimirRetriever(client=client)
|
|
87
|
+
docs = retriever.invoke("What is the capital of France?")
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
client: MimirClient
|
|
91
|
+
k: int = 5
|
|
92
|
+
category: str | None = None
|
|
93
|
+
|
|
94
|
+
# MimirClient is an arbitrary (non-pydantic) type.
|
|
95
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
96
|
+
|
|
97
|
+
def _get_relevant_documents(
|
|
98
|
+
self,
|
|
99
|
+
query: str,
|
|
100
|
+
*,
|
|
101
|
+
run_manager: CallbackManagerForRetrieverRun | None = None,
|
|
102
|
+
) -> list[Document]:
|
|
103
|
+
items = self.client.recall(query, limit=self.k, category=self.category)
|
|
104
|
+
return [_item_to_document(it) for it in items]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ── tools ────────────────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class _RememberInput(BaseModel):
|
|
111
|
+
text: str = Field(description="The fact or memory to store for later recall.")
|
|
112
|
+
tags: list[str] | None = Field(
|
|
113
|
+
default=None, description="Optional tags to label this memory."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class _RecallInput(BaseModel):
|
|
118
|
+
query: str = Field(description="What to search the memory for.")
|
|
119
|
+
limit: int = Field(default=5, description="Max number of memories to return.")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def create_remember_tool(
|
|
123
|
+
client: MimirClient,
|
|
124
|
+
*,
|
|
125
|
+
category: str = "langchain-memory",
|
|
126
|
+
) -> StructuredTool:
|
|
127
|
+
"""Builds a ``StructuredTool`` that stores a memory in Mimir."""
|
|
128
|
+
|
|
129
|
+
def _remember(text: str, tags: list[str] | None = None) -> str:
|
|
130
|
+
result = client.remember(text, category=category, tags=tags)
|
|
131
|
+
action = result.get("action", "stored")
|
|
132
|
+
key = result.get("key", "")
|
|
133
|
+
return f"Memory {action} (key={key})."
|
|
134
|
+
|
|
135
|
+
return StructuredTool.from_function(
|
|
136
|
+
func=_remember,
|
|
137
|
+
name="mimir_remember",
|
|
138
|
+
description=(
|
|
139
|
+
"Store a fact or memory in long-term persistent memory so it can be "
|
|
140
|
+
"recalled in future conversations. Use this whenever the user shares "
|
|
141
|
+
"durable information worth remembering."
|
|
142
|
+
),
|
|
143
|
+
args_schema=_RememberInput,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def create_recall_tool(
|
|
148
|
+
client: MimirClient,
|
|
149
|
+
*,
|
|
150
|
+
category: str | None = "langchain-memory",
|
|
151
|
+
) -> StructuredTool:
|
|
152
|
+
"""Builds a ``StructuredTool`` that searches Mimir's memory."""
|
|
153
|
+
|
|
154
|
+
def _recall(query: str, limit: int = 5) -> str:
|
|
155
|
+
items = client.recall(query, limit=limit, category=category)
|
|
156
|
+
if not items:
|
|
157
|
+
return "No relevant memories found."
|
|
158
|
+
lines = [f"- {_item_to_text(it)}" for it in items if _item_to_text(it)]
|
|
159
|
+
return "\n".join(lines) if lines else "No relevant memories found."
|
|
160
|
+
|
|
161
|
+
return StructuredTool.from_function(
|
|
162
|
+
func=_recall,
|
|
163
|
+
name="mimir_recall",
|
|
164
|
+
description=(
|
|
165
|
+
"Search long-term persistent memory for facts relevant to a query. "
|
|
166
|
+
"Use this to recall things the user told you in past conversations."
|
|
167
|
+
),
|
|
168
|
+
args_schema=_RecallInput,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def create_mimir_tools(
|
|
173
|
+
client: MimirClient,
|
|
174
|
+
*,
|
|
175
|
+
category: str = "langchain-memory",
|
|
176
|
+
) -> list[StructuredTool]:
|
|
177
|
+
"""Returns ``[remember_tool, recall_tool]`` bound to ``client``.
|
|
178
|
+
|
|
179
|
+
Pass the result to any LangChain agent / ``bind_tools`` call to give the
|
|
180
|
+
model agency over its own persistent memory.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
client: An initialized :class:`MimirClient`.
|
|
184
|
+
category: The Mimir category (namespace) used for both tools.
|
|
185
|
+
"""
|
|
186
|
+
return [
|
|
187
|
+
create_remember_tool(client, category=category),
|
|
188
|
+
create_recall_tool(client, category=category),
|
|
189
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "langchain-mimir"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Perseus Computing LLC", email = "hermes@perseus.observer" },
|
|
10
|
+
]
|
|
11
|
+
description = "Mimir persistent, local, encrypted memory for LangChain — tools and a retriever backed by the Mimir MCP engine."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
requires-python = ">=3.10"
|
|
15
|
+
keywords = ["langchain", "mimir", "memory", "mcp", "retriever", "agents", "llm"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"langchain-core>=0.3.0",
|
|
29
|
+
"pydantic>=2.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
test = ["pytest>=7.0"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/Perseus-Computing-LLC/langchain-mimir"
|
|
37
|
+
Repository = "https://github.com/Perseus-Computing-LLC/langchain-mimir"
|
|
38
|
+
"Bug Tracker" = "https://github.com/Perseus-Computing-LLC/langchain-mimir/issues"
|
|
39
|
+
Mimir = "https://github.com/Perseus-Computing-LLC/mimir"
|
|
40
|
+
|
|
41
|
+
[tool.hatch.build.targets.wheel]
|
|
42
|
+
packages = ["langchain_mimir"]
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Tests for langchain-mimir.
|
|
2
|
+
|
|
3
|
+
The unit tests monkeypatch ``subprocess.Popen`` with an in-process fake that
|
|
4
|
+
speaks JSON-RPC 2.0 over fake stdin/stdout pipes and models Mimir's
|
|
5
|
+
remember/recall behavior, so they run with no real ``mimir`` binary. They
|
|
6
|
+
exercise the real RPC, threading, tool, and retriever code paths.
|
|
7
|
+
|
|
8
|
+
A final smoke test runs a real remember->recall round trip if (and only if) a
|
|
9
|
+
``mimir`` binary is discoverable; otherwise it is skipped.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import queue
|
|
16
|
+
import shutil
|
|
17
|
+
|
|
18
|
+
import pytest
|
|
19
|
+
|
|
20
|
+
from langchain_core.documents import Document
|
|
21
|
+
from langchain_core.retrievers import BaseRetriever
|
|
22
|
+
from langchain_core.tools import StructuredTool
|
|
23
|
+
|
|
24
|
+
import langchain_mimir.client as client_mod
|
|
25
|
+
from langchain_mimir import (
|
|
26
|
+
MimirClient,
|
|
27
|
+
MimirError,
|
|
28
|
+
MimirRetriever,
|
|
29
|
+
create_mimir_tools,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── Fake Mimir MCP stdio server ──────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class _FakeStdin:
|
|
37
|
+
def __init__(self, on_line):
|
|
38
|
+
self._on_line = on_line
|
|
39
|
+
|
|
40
|
+
def write(self, s):
|
|
41
|
+
for line in s.splitlines():
|
|
42
|
+
if line.strip():
|
|
43
|
+
self._on_line(line)
|
|
44
|
+
|
|
45
|
+
def flush(self):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def close(self):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _FakeStdout:
|
|
53
|
+
"""Blocking, iterable line source fed by the fake server."""
|
|
54
|
+
|
|
55
|
+
def __init__(self):
|
|
56
|
+
self._q = queue.Queue()
|
|
57
|
+
|
|
58
|
+
def put(self, line):
|
|
59
|
+
self._q.put(line)
|
|
60
|
+
|
|
61
|
+
def __iter__(self):
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def __next__(self):
|
|
65
|
+
item = self._q.get()
|
|
66
|
+
if item is None:
|
|
67
|
+
raise StopIteration
|
|
68
|
+
return item
|
|
69
|
+
|
|
70
|
+
def close(self):
|
|
71
|
+
self._q.put(None)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class FakeMimir:
|
|
75
|
+
"""Minimal Popen-compatible fake of the Mimir MCP stdio server.
|
|
76
|
+
|
|
77
|
+
Models remember as an upsert into ``self.store`` and recall as a naive
|
|
78
|
+
OR-of-terms substring match over stored text, returning Mimir-shaped items.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, *, answer_tools=True):
|
|
82
|
+
self.store: dict[tuple, dict] = {} # (category, key) -> item
|
|
83
|
+
self._counter = 0
|
|
84
|
+
self.stdout = _FakeStdout()
|
|
85
|
+
self.stdin = _FakeStdin(self._handle)
|
|
86
|
+
self._alive = True
|
|
87
|
+
self._answer_tools = answer_tools
|
|
88
|
+
|
|
89
|
+
# Popen-compatible surface -------------------------------------------------
|
|
90
|
+
def terminate(self):
|
|
91
|
+
self._alive = False
|
|
92
|
+
self.stdout.close()
|
|
93
|
+
|
|
94
|
+
def wait(self, timeout=None):
|
|
95
|
+
return 0
|
|
96
|
+
|
|
97
|
+
def kill(self):
|
|
98
|
+
self._alive = False
|
|
99
|
+
self.stdout.close()
|
|
100
|
+
|
|
101
|
+
# JSON-RPC handling --------------------------------------------------------
|
|
102
|
+
def _reply(self, rid, result):
|
|
103
|
+
self.stdout.put(json.dumps({"jsonrpc": "2.0", "id": rid, "result": result}))
|
|
104
|
+
|
|
105
|
+
def _handle(self, line):
|
|
106
|
+
req = json.loads(line)
|
|
107
|
+
rid = req.get("id")
|
|
108
|
+
method = req.get("method")
|
|
109
|
+
if rid is None:
|
|
110
|
+
return # notification, no response
|
|
111
|
+
if method == "initialize":
|
|
112
|
+
self._reply(rid, {"protocolVersion": "2024-11-05", "capabilities": {}})
|
|
113
|
+
return
|
|
114
|
+
if method == "tools/call":
|
|
115
|
+
if not self._answer_tools:
|
|
116
|
+
return # simulate a hang -> RPC timeout
|
|
117
|
+
self._handle_tool(rid, req["params"])
|
|
118
|
+
return
|
|
119
|
+
self._reply(rid, {})
|
|
120
|
+
|
|
121
|
+
def _handle_tool(self, rid, params):
|
|
122
|
+
name = params["name"]
|
|
123
|
+
args = params["arguments"]
|
|
124
|
+
if name == "mimir_remember":
|
|
125
|
+
self._counter += 1
|
|
126
|
+
ckey = (args["category"], args["key"])
|
|
127
|
+
existed = ckey in self.store
|
|
128
|
+
body = args.get("body_json", "{}")
|
|
129
|
+
try:
|
|
130
|
+
text = json.loads(body).get("text", "")
|
|
131
|
+
except json.JSONDecodeError:
|
|
132
|
+
text = ""
|
|
133
|
+
self.store[ckey] = {
|
|
134
|
+
"id": f"mem-{self._counter}",
|
|
135
|
+
"category": args["category"],
|
|
136
|
+
"key": args["key"],
|
|
137
|
+
"text": text,
|
|
138
|
+
"body_json": body,
|
|
139
|
+
"tags": args.get("tags", []),
|
|
140
|
+
"decay_score": 0.5,
|
|
141
|
+
"created_at_unix_ms": 1000 + self._counter,
|
|
142
|
+
}
|
|
143
|
+
sc = {
|
|
144
|
+
"action": "updated" if existed else "created",
|
|
145
|
+
"category": args["category"],
|
|
146
|
+
"key": args["key"],
|
|
147
|
+
"id": self.store[ckey]["id"],
|
|
148
|
+
}
|
|
149
|
+
self._mcp_reply(rid, sc)
|
|
150
|
+
elif name == "mimir_recall":
|
|
151
|
+
query = args.get("query", "").lower()
|
|
152
|
+
terms = [t for t in query.split() if t]
|
|
153
|
+
cat = args.get("category")
|
|
154
|
+
limit = args.get("limit", 5)
|
|
155
|
+
items = []
|
|
156
|
+
for (c, _k), item in self.store.items():
|
|
157
|
+
if cat is not None and c != cat:
|
|
158
|
+
continue
|
|
159
|
+
hay = item["text"].lower()
|
|
160
|
+
if any(t in hay for t in terms):
|
|
161
|
+
items.append(item)
|
|
162
|
+
items = items[:limit]
|
|
163
|
+
self._mcp_reply(rid, {"items": items, "total": len(items)})
|
|
164
|
+
else:
|
|
165
|
+
self._mcp_reply(rid, {})
|
|
166
|
+
|
|
167
|
+
def _mcp_reply(self, rid, structured):
|
|
168
|
+
# Mirror real MCP tools/call result shape.
|
|
169
|
+
self._reply(
|
|
170
|
+
rid,
|
|
171
|
+
{
|
|
172
|
+
"content": [{"type": "text", "text": json.dumps(structured)}],
|
|
173
|
+
"structuredContent": structured,
|
|
174
|
+
},
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ── fixtures ─────────────────────────────────────────────────────────────────
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@pytest.fixture
|
|
182
|
+
def fake_client(monkeypatch, tmp_path):
|
|
183
|
+
"""A MimirClient wired to an in-process FakeMimir (no real binary)."""
|
|
184
|
+
fake = FakeMimir()
|
|
185
|
+
|
|
186
|
+
def fake_popen(argv, **kwargs):
|
|
187
|
+
return fake
|
|
188
|
+
|
|
189
|
+
monkeypatch.setattr(client_mod.subprocess, "Popen", fake_popen)
|
|
190
|
+
# Make binary resolution succeed without a real executable.
|
|
191
|
+
monkeypatch.setattr(client_mod.shutil, "which", lambda name: "/fake/mimir")
|
|
192
|
+
|
|
193
|
+
client = MimirClient(db_path=str(tmp_path / "mimir.db"))
|
|
194
|
+
client._fake = fake # for assertions
|
|
195
|
+
yield client
|
|
196
|
+
client.close()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ── client tests ─────────────────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def test_binary_not_found(monkeypatch, tmp_path):
|
|
203
|
+
monkeypatch.setattr(client_mod.shutil, "which", lambda name: None)
|
|
204
|
+
with pytest.raises(MimirError, match="mimir binary not found"):
|
|
205
|
+
MimirClient(db_path=str(tmp_path / "x.db"), mimir_binary="definitely-missing")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_remember_then_recall(fake_client):
|
|
209
|
+
r = fake_client.remember("The capital of France is Paris.", key="k1")
|
|
210
|
+
assert r["action"] == "created"
|
|
211
|
+
|
|
212
|
+
items = fake_client.recall("capital France")
|
|
213
|
+
assert len(items) == 1
|
|
214
|
+
assert "Paris" in items[0]["text"]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_remember_is_idempotent_upsert(fake_client):
|
|
218
|
+
fake_client.remember("first", key="dup")
|
|
219
|
+
r2 = fake_client.remember("second", key="dup")
|
|
220
|
+
assert r2["action"] == "updated"
|
|
221
|
+
assert len(fake_client._fake.store) == 1
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def test_recall_respects_limit(fake_client):
|
|
225
|
+
for i in range(5):
|
|
226
|
+
fake_client.remember(f"alpha memory number {i}", key=f"k{i}")
|
|
227
|
+
items = fake_client.recall("alpha", limit=2)
|
|
228
|
+
assert len(items) == 2
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def test_recall_no_match_returns_empty(fake_client):
|
|
232
|
+
fake_client.remember("something unrelated", key="k1")
|
|
233
|
+
assert fake_client.recall("nonexistent zebra") == []
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_rpc_timeout(monkeypatch, tmp_path):
|
|
237
|
+
fake = FakeMimir(answer_tools=True)
|
|
238
|
+
monkeypatch.setattr(client_mod.subprocess, "Popen", lambda *a, **k: fake)
|
|
239
|
+
monkeypatch.setattr(client_mod.shutil, "which", lambda name: "/fake/mimir")
|
|
240
|
+
client = MimirClient(db_path=str(tmp_path / "m.db"), timeout_s=0.3)
|
|
241
|
+
# Flip the fake to stop answering tool calls -> the next call must time out.
|
|
242
|
+
fake._answer_tools = False
|
|
243
|
+
with pytest.raises(MimirError, match="timed out"):
|
|
244
|
+
client.recall("anything")
|
|
245
|
+
client.close()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# ── tools tests ──────────────────────────────────────────────────────────────
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_create_mimir_tools_shape(fake_client):
|
|
252
|
+
tools = create_mimir_tools(fake_client)
|
|
253
|
+
assert len(tools) == 2
|
|
254
|
+
assert all(isinstance(t, StructuredTool) for t in tools)
|
|
255
|
+
names = {t.name for t in tools}
|
|
256
|
+
assert names == {"mimir_remember", "mimir_recall"}
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def test_remember_tool_invoke(fake_client):
|
|
260
|
+
remember, recall = create_mimir_tools(fake_client)
|
|
261
|
+
out = remember.invoke({"text": "I love Rust.", "tags": ["pref"]})
|
|
262
|
+
assert "created" in out or "stored" in out
|
|
263
|
+
# And it is recallable through the recall tool.
|
|
264
|
+
recalled = recall.invoke({"query": "Rust"})
|
|
265
|
+
assert "Rust" in recalled
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def test_recall_tool_no_results(fake_client):
|
|
269
|
+
_, recall = create_mimir_tools(fake_client)
|
|
270
|
+
out = recall.invoke({"query": "nothing here"})
|
|
271
|
+
assert out == "No relevant memories found."
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_tool_args_schema_present(fake_client):
|
|
275
|
+
remember, recall = create_mimir_tools(fake_client)
|
|
276
|
+
assert "text" in remember.args
|
|
277
|
+
assert "query" in recall.args
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# ── retriever tests ──────────────────────────────────────────────────────────
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def test_retriever_is_base_retriever(fake_client):
|
|
284
|
+
r = MimirRetriever(client=fake_client)
|
|
285
|
+
assert isinstance(r, BaseRetriever)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def test_retriever_returns_documents(fake_client):
|
|
289
|
+
fake_client.remember("The capital of France is Paris.", key="k1")
|
|
290
|
+
retriever = MimirRetriever(client=fake_client, k=3)
|
|
291
|
+
docs = retriever.invoke("What is the capital of France?")
|
|
292
|
+
assert len(docs) == 1
|
|
293
|
+
assert isinstance(docs[0], Document)
|
|
294
|
+
assert "Paris" in docs[0].page_content
|
|
295
|
+
assert docs[0].metadata["key"] == "k1"
|
|
296
|
+
assert docs[0].metadata["category"] == "langchain-memory"
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def test_retriever_empty(fake_client):
|
|
300
|
+
retriever = MimirRetriever(client=fake_client)
|
|
301
|
+
assert retriever.invoke("zebra unicorn") == []
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def test_retriever_category_scoping(fake_client):
|
|
305
|
+
fake_client.remember("scoped fact apple", category="catA", key="a")
|
|
306
|
+
fake_client.remember("other fact apple", category="catB", key="b")
|
|
307
|
+
retriever = MimirRetriever(client=fake_client, category="catA")
|
|
308
|
+
docs = retriever.invoke("apple")
|
|
309
|
+
assert len(docs) == 1
|
|
310
|
+
assert docs[0].metadata["category"] == "catA"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
# ── real binary smoke test (skipped if mimir is unavailable) ─────────────────
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _find_mimir():
|
|
317
|
+
return shutil.which("mimir") or shutil.which("mimir.exe")
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
@pytest.mark.skipif(_find_mimir() is None, reason="no real mimir binary on PATH")
|
|
321
|
+
def test_real_roundtrip(tmp_path):
|
|
322
|
+
"""Real remember -> recall against an actual mimir subprocess."""
|
|
323
|
+
binary = _find_mimir()
|
|
324
|
+
client = MimirClient(db_path=str(tmp_path / "real.db"), mimir_binary=binary)
|
|
325
|
+
try:
|
|
326
|
+
client.remember(
|
|
327
|
+
"The capital of France is Paris.", category="lc-smoke", key="smoke1"
|
|
328
|
+
)
|
|
329
|
+
items = client.recall("capital France", category="lc-smoke")
|
|
330
|
+
assert any("Paris" in (it.get("text") or "") for it in items)
|
|
331
|
+
|
|
332
|
+
retriever = MimirRetriever(client=client, category="lc-smoke")
|
|
333
|
+
docs = retriever.invoke("capital of France")
|
|
334
|
+
assert any("Paris" in d.page_content for d in docs)
|
|
335
|
+
finally:
|
|
336
|
+
client.close()
|