autogen-goodmem 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogen_goodmem-0.1.0/PKG-INFO +126 -0
- autogen_goodmem-0.1.0/README.md +95 -0
- autogen_goodmem-0.1.0/autogen_goodmem/__init__.py +24 -0
- autogen_goodmem-0.1.0/autogen_goodmem/_client.py +500 -0
- autogen_goodmem-0.1.0/autogen_goodmem/_config.py +47 -0
- autogen_goodmem-0.1.0/autogen_goodmem/_context_provider.py +232 -0
- autogen_goodmem-0.1.0/autogen_goodmem/_tools.py +180 -0
- autogen_goodmem-0.1.0/pyproject.toml +50 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autogen-goodmem
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GoodMem memory and tools for the AutoGen agent framework.
|
|
5
|
+
Keywords: autogen,goodmem,memory,rag,agents,llm
|
|
6
|
+
Author: PAIR Systems
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Dist: autogen-core>=0.7.5
|
|
18
|
+
Requires-Dist: httpx>=0.24.0,<1
|
|
19
|
+
Requires-Dist: pydantic>=2.0,<3
|
|
20
|
+
Requires-Dist: typing-extensions>=4.7
|
|
21
|
+
Requires-Dist: pytest>=7 ; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-asyncio>=0.23 ; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-timeout ; extra == "dev"
|
|
24
|
+
Requires-Dist: build ; extra == "dev"
|
|
25
|
+
Requires-Dist: twine ; extra == "dev"
|
|
26
|
+
Project-URL: Homepage, https://github.com/PAIR-Systems-Inc/autogen-goodmem
|
|
27
|
+
Project-URL: Issues, https://github.com/PAIR-Systems-Inc/autogen-goodmem/issues
|
|
28
|
+
Project-URL: Repository, https://github.com/PAIR-Systems-Inc/autogen-goodmem
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
|
|
31
|
+
# autogen-goodmem
|
|
32
|
+
|
|
33
|
+
[GoodMem](https://goodmem.ai) memory and tools for the [AutoGen](https://github.com/microsoft/autogen) agent framework.
|
|
34
|
+
|
|
35
|
+
`autogen-goodmem` gives an AutoGen agent two things:
|
|
36
|
+
|
|
37
|
+
1. **`GoodMemContextProvider`** — an `autogen_core.memory.Memory` implementation backed by a GoodMem space, so context retrieval is automatic on every turn.
|
|
38
|
+
2. **`create_goodmem_tools(client)`** — 11 `FunctionTool`s the agent can call directly to manage spaces and memories.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install autogen-goodmem
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quickstart
|
|
47
|
+
|
|
48
|
+
### As an AutoGen `Memory`
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import asyncio
|
|
52
|
+
from autogen_core.memory import MemoryContent, MemoryMimeType
|
|
53
|
+
from autogen_goodmem import GoodMemContextProvider, GoodMemMemoryConfig
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def main() -> None:
|
|
57
|
+
provider = GoodMemContextProvider(
|
|
58
|
+
config=GoodMemMemoryConfig(
|
|
59
|
+
base_url="https://localhost:8080",
|
|
60
|
+
api_key="gm_...",
|
|
61
|
+
space_name="my-kb",
|
|
62
|
+
embedder_id="<embedder-uuid>",
|
|
63
|
+
verify_ssl=False,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
await provider.add(MemoryContent(content="The capital of France is Paris.", mime_type=MemoryMimeType.TEXT))
|
|
67
|
+
results = await provider.query("What is the capital of France?")
|
|
68
|
+
for r in results.results:
|
|
69
|
+
print(r.content)
|
|
70
|
+
await provider.close()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
asyncio.run(main())
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### As tools on an agent
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from autogen_goodmem import GoodMemClient, create_goodmem_tools
|
|
80
|
+
|
|
81
|
+
client = GoodMemClient(base_url="https://localhost:8080", api_key="gm_...", verify_ssl=False)
|
|
82
|
+
tools = create_goodmem_tools(client) # 11 FunctionTools
|
|
83
|
+
# pass `tools=tools` to your AssistantAgent
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Tool surface
|
|
87
|
+
|
|
88
|
+
`create_goodmem_tools(client)` returns these 11 tools in order:
|
|
89
|
+
|
|
90
|
+
| Tool | Purpose |
|
|
91
|
+
| --- | --- |
|
|
92
|
+
| `goodmem_list_embedders` | List embedder models available on the server. |
|
|
93
|
+
| `goodmem_list_spaces` | List all spaces visible to the API key. |
|
|
94
|
+
| `goodmem_get_space` | Fetch one space by ID. |
|
|
95
|
+
| `goodmem_create_space` | Create a space (idempotent by name). |
|
|
96
|
+
| `goodmem_update_space` | Update name / publicRead / labels / chunking. |
|
|
97
|
+
| `goodmem_delete_space` | Delete a space and everything in it. |
|
|
98
|
+
| `goodmem_create_memory` | Add a memory from text or a local file. |
|
|
99
|
+
| `goodmem_list_memories` | Paginated listing of a space's memories. |
|
|
100
|
+
| `goodmem_retrieve_memories` | Semantic retrieval, optional reranker + LLM. |
|
|
101
|
+
| `goodmem_get_memory` | Fetch a memory's metadata and original content. |
|
|
102
|
+
| `goodmem_delete_memory` | Delete a memory and its embeddings. |
|
|
103
|
+
|
|
104
|
+
`goodmem_retrieve_memories` (and `GoodMemClient.retrieve_memories`) accept the optional post-processor params: `reranker_id`, `llm_id`, `relevance_threshold` (0-1), `llm_temperature` (0-2), `max_results`, `chronological_resort`.
|
|
105
|
+
|
|
106
|
+
## Integration tests
|
|
107
|
+
|
|
108
|
+
The tests in `tests/test_goodmem_integration.py` exercise every public method against a live GoodMem server. They are opt-in via the `integration` marker.
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install -e ".[dev]"
|
|
112
|
+
|
|
113
|
+
export GOODMEM_API_KEY=gm_...
|
|
114
|
+
export GOODMEM_BASE_URL=https://localhost:8080
|
|
115
|
+
export GOODMEM_EMBEDDER_ID=<uuid>
|
|
116
|
+
export GOODMEM_RERANKER_ID=<uuid>
|
|
117
|
+
export GOODMEM_LLM_ID=<uuid>
|
|
118
|
+
export GOODMEM_PDF_PATH=/path/to/sample.pdf # optional
|
|
119
|
+
|
|
120
|
+
python -m pytest tests/test_goodmem_integration.py -v -s -m integration
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT
|
|
126
|
+
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# autogen-goodmem
|
|
2
|
+
|
|
3
|
+
[GoodMem](https://goodmem.ai) memory and tools for the [AutoGen](https://github.com/microsoft/autogen) agent framework.
|
|
4
|
+
|
|
5
|
+
`autogen-goodmem` gives an AutoGen agent two things:
|
|
6
|
+
|
|
7
|
+
1. **`GoodMemContextProvider`** — an `autogen_core.memory.Memory` implementation backed by a GoodMem space, so context retrieval is automatic on every turn.
|
|
8
|
+
2. **`create_goodmem_tools(client)`** — 11 `FunctionTool`s the agent can call directly to manage spaces and memories.
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install autogen-goodmem
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quickstart
|
|
17
|
+
|
|
18
|
+
### As an AutoGen `Memory`
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import asyncio
|
|
22
|
+
from autogen_core.memory import MemoryContent, MemoryMimeType
|
|
23
|
+
from autogen_goodmem import GoodMemContextProvider, GoodMemMemoryConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def main() -> None:
|
|
27
|
+
provider = GoodMemContextProvider(
|
|
28
|
+
config=GoodMemMemoryConfig(
|
|
29
|
+
base_url="https://localhost:8080",
|
|
30
|
+
api_key="gm_...",
|
|
31
|
+
space_name="my-kb",
|
|
32
|
+
embedder_id="<embedder-uuid>",
|
|
33
|
+
verify_ssl=False,
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
await provider.add(MemoryContent(content="The capital of France is Paris.", mime_type=MemoryMimeType.TEXT))
|
|
37
|
+
results = await provider.query("What is the capital of France?")
|
|
38
|
+
for r in results.results:
|
|
39
|
+
print(r.content)
|
|
40
|
+
await provider.close()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
asyncio.run(main())
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### As tools on an agent
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from autogen_goodmem import GoodMemClient, create_goodmem_tools
|
|
50
|
+
|
|
51
|
+
client = GoodMemClient(base_url="https://localhost:8080", api_key="gm_...", verify_ssl=False)
|
|
52
|
+
tools = create_goodmem_tools(client) # 11 FunctionTools
|
|
53
|
+
# pass `tools=tools` to your AssistantAgent
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Tool surface
|
|
57
|
+
|
|
58
|
+
`create_goodmem_tools(client)` returns these 11 tools in order:
|
|
59
|
+
|
|
60
|
+
| Tool | Purpose |
|
|
61
|
+
| --- | --- |
|
|
62
|
+
| `goodmem_list_embedders` | List embedder models available on the server. |
|
|
63
|
+
| `goodmem_list_spaces` | List all spaces visible to the API key. |
|
|
64
|
+
| `goodmem_get_space` | Fetch one space by ID. |
|
|
65
|
+
| `goodmem_create_space` | Create a space (idempotent by name). |
|
|
66
|
+
| `goodmem_update_space` | Update name / publicRead / labels / chunking. |
|
|
67
|
+
| `goodmem_delete_space` | Delete a space and everything in it. |
|
|
68
|
+
| `goodmem_create_memory` | Add a memory from text or a local file. |
|
|
69
|
+
| `goodmem_list_memories` | Paginated listing of a space's memories. |
|
|
70
|
+
| `goodmem_retrieve_memories` | Semantic retrieval, optional reranker + LLM. |
|
|
71
|
+
| `goodmem_get_memory` | Fetch a memory's metadata and original content. |
|
|
72
|
+
| `goodmem_delete_memory` | Delete a memory and its embeddings. |
|
|
73
|
+
|
|
74
|
+
`goodmem_retrieve_memories` (and `GoodMemClient.retrieve_memories`) accept the optional post-processor params: `reranker_id`, `llm_id`, `relevance_threshold` (0-1), `llm_temperature` (0-2), `max_results`, `chronological_resort`.
|
|
75
|
+
|
|
76
|
+
## Integration tests
|
|
77
|
+
|
|
78
|
+
The tests in `tests/test_goodmem_integration.py` exercise every public method against a live GoodMem server. They are opt-in via the `integration` marker.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install -e ".[dev]"
|
|
82
|
+
|
|
83
|
+
export GOODMEM_API_KEY=gm_...
|
|
84
|
+
export GOODMEM_BASE_URL=https://localhost:8080
|
|
85
|
+
export GOODMEM_EMBEDDER_ID=<uuid>
|
|
86
|
+
export GOODMEM_RERANKER_ID=<uuid>
|
|
87
|
+
export GOODMEM_LLM_ID=<uuid>
|
|
88
|
+
export GOODMEM_PDF_PATH=/path/to/sample.pdf # optional
|
|
89
|
+
|
|
90
|
+
python -m pytest tests/test_goodmem_integration.py -v -s -m integration
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""autogen-goodmem: GoodMem memory + tools for the AutoGen agent framework."""
|
|
2
|
+
|
|
3
|
+
from ._client import GoodMemClient
|
|
4
|
+
from ._config import ChunkingConfig, GoodMemMemoryConfig, PostProcessorConfig
|
|
5
|
+
from ._context_provider import GoodMemContextProvider
|
|
6
|
+
from ._tools import TOOL_NAMES, create_goodmem_tools
|
|
7
|
+
|
|
8
|
+
# Backwards-compatible alias for code that imported GoodMemMemory from the
|
|
9
|
+
# autogen-ext monorepo location.
|
|
10
|
+
GoodMemMemory = GoodMemContextProvider
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"GoodMemClient",
|
|
16
|
+
"GoodMemContextProvider",
|
|
17
|
+
"GoodMemMemory",
|
|
18
|
+
"GoodMemMemoryConfig",
|
|
19
|
+
"ChunkingConfig",
|
|
20
|
+
"PostProcessorConfig",
|
|
21
|
+
"create_goodmem_tools",
|
|
22
|
+
"TOOL_NAMES",
|
|
23
|
+
"__version__",
|
|
24
|
+
]
|
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"""Pure-async REST client for the GoodMem v1 API.
|
|
2
|
+
|
|
3
|
+
This client is framework-agnostic: it returns plain dictionaries (and lists of
|
|
4
|
+
them) and never imports anything from AutoGen. :class:`GoodMemContextProvider`
|
|
5
|
+
and :func:`create_goodmem_tools` are built on top of it.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import base64
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import mimetypes
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
from ._config import ChunkingConfig, PostProcessorConfig
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ── MIME type detection ─────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
_EXTRA_MIME: Dict[str, str] = {
|
|
28
|
+
".md": "text/markdown",
|
|
29
|
+
".pdf": "application/pdf",
|
|
30
|
+
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
31
|
+
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
32
|
+
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _guess_mime(path: Path) -> str:
|
|
37
|
+
mime, _ = mimetypes.guess_type(path.name)
|
|
38
|
+
if mime:
|
|
39
|
+
return mime
|
|
40
|
+
return _EXTRA_MIME.get(path.suffix.lower(), "application/octet-stream")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ── Retrieve-response NDJSON/SSE parser ────────────────────────────────
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_retrieve_response(
|
|
47
|
+
response_text: str,
|
|
48
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], Any]:
|
|
49
|
+
"""Parse the NDJSON / SSE body returned by ``POST /v1/memories:retrieve``.
|
|
50
|
+
|
|
51
|
+
Returns ``(results, memories, abstract_reply)`` where ``results`` is a list
|
|
52
|
+
of ``{chunkId, chunkText, memoryId, relevanceScore, memoryIndex}`` dicts.
|
|
53
|
+
"""
|
|
54
|
+
results: List[Dict[str, Any]] = []
|
|
55
|
+
memories: List[Dict[str, Any]] = []
|
|
56
|
+
abstract_reply: Any = None
|
|
57
|
+
|
|
58
|
+
for raw_line in response_text.splitlines():
|
|
59
|
+
line = raw_line.strip()
|
|
60
|
+
if not line:
|
|
61
|
+
continue
|
|
62
|
+
if line.startswith("event:"):
|
|
63
|
+
continue
|
|
64
|
+
if line.startswith("data:"):
|
|
65
|
+
line = line[5:].strip()
|
|
66
|
+
if not line:
|
|
67
|
+
continue
|
|
68
|
+
try:
|
|
69
|
+
item = json.loads(line)
|
|
70
|
+
except json.JSONDecodeError:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
if item.get("memoryDefinition"):
|
|
74
|
+
memories.append(item["memoryDefinition"])
|
|
75
|
+
elif item.get("abstractReply"):
|
|
76
|
+
abstract_reply = item["abstractReply"]
|
|
77
|
+
elif item.get("retrievedItem"):
|
|
78
|
+
chunk_outer = item["retrievedItem"].get("chunk", {})
|
|
79
|
+
chunk = chunk_outer.get("chunk", {})
|
|
80
|
+
results.append(
|
|
81
|
+
{
|
|
82
|
+
"chunkId": chunk.get("chunkId"),
|
|
83
|
+
"chunkText": chunk.get("chunkText"),
|
|
84
|
+
"memoryId": chunk.get("memoryId"),
|
|
85
|
+
"relevanceScore": chunk_outer.get("relevanceScore"),
|
|
86
|
+
"memoryIndex": chunk_outer.get("memoryIndex"),
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return results, memories, abstract_reply
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── Client ──────────────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class GoodMemClient:
|
|
97
|
+
"""Async REST client for the GoodMem v1 API.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
base_url: Base URL, e.g. ``https://localhost:8080``.
|
|
101
|
+
api_key: API key sent as the ``X-API-Key`` header.
|
|
102
|
+
verify_ssl: Pass ``False`` for self-signed local servers.
|
|
103
|
+
timeout: Per-request timeout (seconds).
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
base_url: str,
|
|
109
|
+
api_key: str,
|
|
110
|
+
*,
|
|
111
|
+
verify_ssl: bool = True,
|
|
112
|
+
timeout: float = 120.0,
|
|
113
|
+
) -> None:
|
|
114
|
+
self._base_url = base_url.rstrip("/")
|
|
115
|
+
self._api_key = api_key
|
|
116
|
+
self._verify_ssl = verify_ssl
|
|
117
|
+
self._timeout = timeout
|
|
118
|
+
self._headers = {
|
|
119
|
+
"X-API-Key": api_key,
|
|
120
|
+
"Content-Type": "application/json",
|
|
121
|
+
"Accept": "application/json",
|
|
122
|
+
}
|
|
123
|
+
self._client: Optional[httpx.AsyncClient] = None
|
|
124
|
+
|
|
125
|
+
# ── Lifecycle ──────────────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
def _ensure_client(self) -> httpx.AsyncClient:
|
|
128
|
+
if self._client is None or self._client.is_closed:
|
|
129
|
+
self._client = httpx.AsyncClient(timeout=self._timeout, verify=self._verify_ssl)
|
|
130
|
+
return self._client
|
|
131
|
+
|
|
132
|
+
async def close(self) -> None:
|
|
133
|
+
if self._client and not self._client.is_closed:
|
|
134
|
+
await self._client.aclose()
|
|
135
|
+
self._client = None
|
|
136
|
+
|
|
137
|
+
async def __aenter__(self) -> "GoodMemClient":
|
|
138
|
+
self._ensure_client()
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
|
|
142
|
+
await self.close()
|
|
143
|
+
|
|
144
|
+
# ── Embedders ──────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
async def list_embedders(self) -> List[Dict[str, Any]]:
|
|
147
|
+
"""``GET /v1/embedders`` -> list of embedder definitions."""
|
|
148
|
+
client = self._ensure_client()
|
|
149
|
+
resp = await client.get(f"{self._base_url}/v1/embedders", headers=self._headers)
|
|
150
|
+
resp.raise_for_status()
|
|
151
|
+
body = resp.json()
|
|
152
|
+
return body if isinstance(body, list) else body.get("embedders", [])
|
|
153
|
+
|
|
154
|
+
# ── Spaces ─────────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
async def list_spaces(self) -> List[Dict[str, Any]]:
|
|
157
|
+
"""``GET /v1/spaces`` -> list of spaces."""
|
|
158
|
+
client = self._ensure_client()
|
|
159
|
+
resp = await client.get(f"{self._base_url}/v1/spaces", headers=self._headers)
|
|
160
|
+
resp.raise_for_status()
|
|
161
|
+
body = resp.json()
|
|
162
|
+
return body if isinstance(body, list) else body.get("spaces", [])
|
|
163
|
+
|
|
164
|
+
async def get_space(self, space_id: str) -> Dict[str, Any]:
|
|
165
|
+
"""``GET /v1/spaces/{id}``."""
|
|
166
|
+
client = self._ensure_client()
|
|
167
|
+
resp = await client.get(f"{self._base_url}/v1/spaces/{space_id}", headers=self._headers)
|
|
168
|
+
resp.raise_for_status()
|
|
169
|
+
return resp.json()
|
|
170
|
+
|
|
171
|
+
async def create_space(
|
|
172
|
+
self,
|
|
173
|
+
name: str,
|
|
174
|
+
embedder_id: str,
|
|
175
|
+
*,
|
|
176
|
+
chunking: Optional[ChunkingConfig] = None,
|
|
177
|
+
) -> Dict[str, Any]:
|
|
178
|
+
"""Create a space, or reuse an existing one with the same name.
|
|
179
|
+
|
|
180
|
+
Returns a dict that includes ``reused: True`` when a name collision was
|
|
181
|
+
found. Handles a race condition where ``POST /v1/spaces`` returns 409
|
|
182
|
+
between the list-check and the create call.
|
|
183
|
+
"""
|
|
184
|
+
chunking = chunking or ChunkingConfig()
|
|
185
|
+
client = self._ensure_client()
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
existing = await self.list_spaces()
|
|
189
|
+
for space in existing:
|
|
190
|
+
if space.get("name") == name:
|
|
191
|
+
sid = space.get("spaceId") or space.get("id")
|
|
192
|
+
return {
|
|
193
|
+
"spaceId": sid,
|
|
194
|
+
"name": name,
|
|
195
|
+
"embedderId": embedder_id,
|
|
196
|
+
"reused": True,
|
|
197
|
+
}
|
|
198
|
+
except Exception:
|
|
199
|
+
logger.debug("list_spaces failed before create; will attempt create anyway", exc_info=True)
|
|
200
|
+
|
|
201
|
+
body = {
|
|
202
|
+
"name": name,
|
|
203
|
+
"spaceEmbedders": [{"embedderId": embedder_id, "defaultRetrievalWeight": 1.0}],
|
|
204
|
+
"defaultChunkingConfig": {
|
|
205
|
+
"recursive": {
|
|
206
|
+
"chunkSize": chunking.chunk_size,
|
|
207
|
+
"chunkOverlap": chunking.chunk_overlap,
|
|
208
|
+
"separators": chunking.separators,
|
|
209
|
+
"keepStrategy": chunking.keep_strategy,
|
|
210
|
+
"separatorIsRegex": chunking.separator_is_regex,
|
|
211
|
+
"lengthMeasurement": chunking.length_measurement,
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
}
|
|
215
|
+
resp = await client.post(
|
|
216
|
+
f"{self._base_url}/v1/spaces",
|
|
217
|
+
headers=self._headers,
|
|
218
|
+
json=body,
|
|
219
|
+
)
|
|
220
|
+
if resp.status_code == 409:
|
|
221
|
+
# Race: someone else just created it. Re-list and find by name.
|
|
222
|
+
for space in await self.list_spaces():
|
|
223
|
+
if space.get("name") == name:
|
|
224
|
+
sid = space.get("spaceId") or space.get("id")
|
|
225
|
+
return {
|
|
226
|
+
"spaceId": sid,
|
|
227
|
+
"name": name,
|
|
228
|
+
"embedderId": embedder_id,
|
|
229
|
+
"reused": True,
|
|
230
|
+
}
|
|
231
|
+
resp.raise_for_status()
|
|
232
|
+
|
|
233
|
+
resp.raise_for_status()
|
|
234
|
+
result = resp.json()
|
|
235
|
+
return {
|
|
236
|
+
"spaceId": result.get("spaceId") or result.get("id"),
|
|
237
|
+
"name": result.get("name", name),
|
|
238
|
+
"embedderId": embedder_id,
|
|
239
|
+
"chunkingConfig": body["defaultChunkingConfig"],
|
|
240
|
+
"reused": False,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async def update_space(
|
|
244
|
+
self,
|
|
245
|
+
space_id: str,
|
|
246
|
+
*,
|
|
247
|
+
name: Optional[str] = None,
|
|
248
|
+
public_read: Optional[bool] = None,
|
|
249
|
+
replace_labels: Optional[Dict[str, str]] = None,
|
|
250
|
+
merge_labels: Optional[Dict[str, str]] = None,
|
|
251
|
+
chunking: Optional[ChunkingConfig] = None,
|
|
252
|
+
) -> Dict[str, Any]:
|
|
253
|
+
"""``PUT /v1/spaces/{id}`` with only the fields the API accepts.
|
|
254
|
+
|
|
255
|
+
Note: the server rejects a bare ``labels`` field with 400 — use
|
|
256
|
+
``replace_labels`` (full replacement) or ``merge_labels`` (additive).
|
|
257
|
+
"""
|
|
258
|
+
body: Dict[str, Any] = {}
|
|
259
|
+
if name is not None:
|
|
260
|
+
body["name"] = name
|
|
261
|
+
if public_read is not None:
|
|
262
|
+
body["publicRead"] = public_read
|
|
263
|
+
if replace_labels is not None:
|
|
264
|
+
body["replaceLabels"] = replace_labels
|
|
265
|
+
if merge_labels is not None:
|
|
266
|
+
body["mergeLabels"] = merge_labels
|
|
267
|
+
if chunking is not None:
|
|
268
|
+
body["defaultChunkingConfig"] = {
|
|
269
|
+
"recursive": {
|
|
270
|
+
"chunkSize": chunking.chunk_size,
|
|
271
|
+
"chunkOverlap": chunking.chunk_overlap,
|
|
272
|
+
"separators": chunking.separators,
|
|
273
|
+
"keepStrategy": chunking.keep_strategy,
|
|
274
|
+
"separatorIsRegex": chunking.separator_is_regex,
|
|
275
|
+
"lengthMeasurement": chunking.length_measurement,
|
|
276
|
+
},
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
client = self._ensure_client()
|
|
280
|
+
resp = await client.put(
|
|
281
|
+
f"{self._base_url}/v1/spaces/{space_id}",
|
|
282
|
+
headers=self._headers,
|
|
283
|
+
json=body,
|
|
284
|
+
)
|
|
285
|
+
resp.raise_for_status()
|
|
286
|
+
return resp.json()
|
|
287
|
+
|
|
288
|
+
async def delete_space(self, space_id: str) -> Dict[str, Any]:
|
|
289
|
+
"""``DELETE /v1/spaces/{id}``."""
|
|
290
|
+
client = self._ensure_client()
|
|
291
|
+
resp = await client.delete(
|
|
292
|
+
f"{self._base_url}/v1/spaces/{space_id}",
|
|
293
|
+
headers=self._headers,
|
|
294
|
+
)
|
|
295
|
+
resp.raise_for_status()
|
|
296
|
+
return {"spaceId": space_id, "success": True}
|
|
297
|
+
|
|
298
|
+
# ── Memories ───────────────────────────────────────────────────────
|
|
299
|
+
|
|
300
|
+
async def create_memory(
|
|
301
|
+
self,
|
|
302
|
+
*,
|
|
303
|
+
space_id: str,
|
|
304
|
+
content: Optional[str] = None,
|
|
305
|
+
file_path: Optional[str] = None,
|
|
306
|
+
content_type: Optional[str] = None,
|
|
307
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
308
|
+
) -> Dict[str, Any]:
|
|
309
|
+
"""Create a memory from text or a file.
|
|
310
|
+
|
|
311
|
+
Exactly one of ``content`` (inline text) or ``file_path`` (any file —
|
|
312
|
+
binary content is base64-encoded into ``originalContentB64``) must be
|
|
313
|
+
supplied. When ``file_path`` is used, the MIME type is auto-detected
|
|
314
|
+
from the extension unless overridden via ``content_type``.
|
|
315
|
+
"""
|
|
316
|
+
if (content is None) == (file_path is None):
|
|
317
|
+
raise ValueError("create_memory requires exactly one of content or file_path")
|
|
318
|
+
|
|
319
|
+
body: Dict[str, Any] = {"spaceId": space_id}
|
|
320
|
+
|
|
321
|
+
if file_path is not None:
|
|
322
|
+
path = Path(file_path)
|
|
323
|
+
if not path.exists():
|
|
324
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
325
|
+
mime = content_type or _guess_mime(path)
|
|
326
|
+
body["contentType"] = mime
|
|
327
|
+
file_bytes = path.read_bytes()
|
|
328
|
+
if mime.startswith("text/"):
|
|
329
|
+
body["originalContent"] = file_bytes.decode("utf-8")
|
|
330
|
+
else:
|
|
331
|
+
body["originalContentB64"] = base64.b64encode(file_bytes).decode("ascii")
|
|
332
|
+
else:
|
|
333
|
+
body["contentType"] = content_type or "text/plain"
|
|
334
|
+
body["originalContent"] = content # type: ignore[assignment]
|
|
335
|
+
|
|
336
|
+
if metadata:
|
|
337
|
+
body["metadata"] = metadata
|
|
338
|
+
|
|
339
|
+
client = self._ensure_client()
|
|
340
|
+
resp = await client.post(
|
|
341
|
+
f"{self._base_url}/v1/memories",
|
|
342
|
+
headers=self._headers,
|
|
343
|
+
json=body,
|
|
344
|
+
)
|
|
345
|
+
resp.raise_for_status()
|
|
346
|
+
return resp.json()
|
|
347
|
+
|
|
348
|
+
async def list_memories(
|
|
349
|
+
self,
|
|
350
|
+
space_id: str,
|
|
351
|
+
*,
|
|
352
|
+
next_token: Optional[str] = None,
|
|
353
|
+
) -> Dict[str, Any]:
|
|
354
|
+
"""``GET /v1/spaces/{id}/memories`` -> ``{memories, nextToken}``.
|
|
355
|
+
|
|
356
|
+
There is no top-level ``GET /v1/memories`` — listing is always
|
|
357
|
+
scoped to a space.
|
|
358
|
+
"""
|
|
359
|
+
params: Dict[str, Any] = {}
|
|
360
|
+
if next_token:
|
|
361
|
+
params["nextToken"] = next_token
|
|
362
|
+
client = self._ensure_client()
|
|
363
|
+
resp = await client.get(
|
|
364
|
+
f"{self._base_url}/v1/spaces/{space_id}/memories",
|
|
365
|
+
headers=self._headers,
|
|
366
|
+
params=params,
|
|
367
|
+
)
|
|
368
|
+
resp.raise_for_status()
|
|
369
|
+
body = resp.json()
|
|
370
|
+
if isinstance(body, list):
|
|
371
|
+
return {"memories": body, "nextToken": None}
|
|
372
|
+
return {
|
|
373
|
+
"memories": body.get("memories", []),
|
|
374
|
+
"nextToken": body.get("nextToken"),
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async def get_memory(
|
|
378
|
+
self,
|
|
379
|
+
memory_id: str,
|
|
380
|
+
*,
|
|
381
|
+
include_content: bool = True,
|
|
382
|
+
) -> Dict[str, Any]:
|
|
383
|
+
"""``GET /v1/memories/{id}`` (and optionally ``/content``).
|
|
384
|
+
|
|
385
|
+
The ``/content`` endpoint may return plain text rather than JSON for
|
|
386
|
+
text memories — both shapes are handled.
|
|
387
|
+
"""
|
|
388
|
+
client = self._ensure_client()
|
|
389
|
+
resp = await client.get(
|
|
390
|
+
f"{self._base_url}/v1/memories/{memory_id}",
|
|
391
|
+
headers=self._headers,
|
|
392
|
+
)
|
|
393
|
+
resp.raise_for_status()
|
|
394
|
+
result: Dict[str, Any] = {"memory": resp.json()}
|
|
395
|
+
|
|
396
|
+
if include_content:
|
|
397
|
+
try:
|
|
398
|
+
content_resp = await client.get(
|
|
399
|
+
f"{self._base_url}/v1/memories/{memory_id}/content",
|
|
400
|
+
headers=self._headers,
|
|
401
|
+
)
|
|
402
|
+
content_resp.raise_for_status()
|
|
403
|
+
ctype = content_resp.headers.get("content-type", "")
|
|
404
|
+
if "application/json" in ctype:
|
|
405
|
+
try:
|
|
406
|
+
result["content"] = content_resp.json()
|
|
407
|
+
except json.JSONDecodeError:
|
|
408
|
+
result["content"] = content_resp.text
|
|
409
|
+
else:
|
|
410
|
+
result["content"] = content_resp.text
|
|
411
|
+
except Exception as exc:
|
|
412
|
+
result["contentError"] = f"Failed to fetch content: {exc}"
|
|
413
|
+
|
|
414
|
+
return result
|
|
415
|
+
|
|
416
|
+
async def delete_memory(self, memory_id: str) -> Dict[str, Any]:
|
|
417
|
+
"""``DELETE /v1/memories/{id}``."""
|
|
418
|
+
client = self._ensure_client()
|
|
419
|
+
resp = await client.delete(
|
|
420
|
+
f"{self._base_url}/v1/memories/{memory_id}",
|
|
421
|
+
headers=self._headers,
|
|
422
|
+
)
|
|
423
|
+
resp.raise_for_status()
|
|
424
|
+
return {"memoryId": memory_id, "success": True}
|
|
425
|
+
|
|
426
|
+
# ── Retrieve ────────────────────────────────────────────────────────
|
|
427
|
+
|
|
428
|
+
async def retrieve_memories(
|
|
429
|
+
self,
|
|
430
|
+
*,
|
|
431
|
+
message: str,
|
|
432
|
+
space_ids: List[str],
|
|
433
|
+
max_results: int = 5,
|
|
434
|
+
fetch_memory: bool = True,
|
|
435
|
+
wait_for_indexing: bool = False,
|
|
436
|
+
wait_seconds: float = 60.0,
|
|
437
|
+
poll_interval: float = 5.0,
|
|
438
|
+
# Post-processor params
|
|
439
|
+
reranker_id: Optional[str] = None,
|
|
440
|
+
llm_id: Optional[str] = None,
|
|
441
|
+
relevance_threshold: Optional[float] = None,
|
|
442
|
+
llm_temperature: Optional[float] = None,
|
|
443
|
+
chronological_resort: bool = False,
|
|
444
|
+
) -> Dict[str, Any]:
|
|
445
|
+
"""``POST /v1/memories:retrieve`` with optional reranker + LLM.
|
|
446
|
+
|
|
447
|
+
When ``wait_for_indexing=True`` and no results come back yet, polls
|
|
448
|
+
every ``poll_interval`` seconds for up to ``wait_seconds`` total —
|
|
449
|
+
new memories take a few seconds to embed and become searchable.
|
|
450
|
+
|
|
451
|
+
Returns ``{"results": [...], "memories": [...], "abstractReply": ...}``.
|
|
452
|
+
"""
|
|
453
|
+
body: Dict[str, Any] = {
|
|
454
|
+
"message": message,
|
|
455
|
+
"spaceKeys": [{"spaceId": sid} for sid in space_ids],
|
|
456
|
+
"requestedSize": max_results,
|
|
457
|
+
"fetchMemory": fetch_memory,
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
use_post = any(
|
|
461
|
+
v is not None for v in (reranker_id, llm_id, relevance_threshold, llm_temperature)
|
|
462
|
+
) or chronological_resort
|
|
463
|
+
if use_post:
|
|
464
|
+
cfg: Dict[str, Any] = {"max_results": max_results}
|
|
465
|
+
if reranker_id is not None:
|
|
466
|
+
cfg["reranker_id"] = reranker_id
|
|
467
|
+
if llm_id is not None:
|
|
468
|
+
cfg["llm_id"] = llm_id
|
|
469
|
+
if relevance_threshold is not None:
|
|
470
|
+
cfg["relevance_threshold"] = relevance_threshold
|
|
471
|
+
if llm_temperature is not None:
|
|
472
|
+
cfg["llm_temp"] = llm_temperature
|
|
473
|
+
if chronological_resort:
|
|
474
|
+
cfg["chronological_resort"] = True
|
|
475
|
+
body["postProcessor"] = {
|
|
476
|
+
"name": "com.goodmem.retrieval.postprocess.ChatPostProcessorFactory",
|
|
477
|
+
"config": cfg,
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
headers = {**self._headers, "Accept": "application/x-ndjson"}
|
|
481
|
+
client = self._ensure_client()
|
|
482
|
+
|
|
483
|
+
start = asyncio.get_event_loop().time()
|
|
484
|
+
while True:
|
|
485
|
+
resp = await client.post(
|
|
486
|
+
f"{self._base_url}/v1/memories:retrieve",
|
|
487
|
+
headers=headers,
|
|
488
|
+
json=body,
|
|
489
|
+
)
|
|
490
|
+
resp.raise_for_status()
|
|
491
|
+
results, memories, abstract = _parse_retrieve_response(resp.text)
|
|
492
|
+
|
|
493
|
+
if results or not wait_for_indexing:
|
|
494
|
+
return {"results": results, "memories": memories, "abstractReply": abstract}
|
|
495
|
+
|
|
496
|
+
if asyncio.get_event_loop().time() - start >= wait_seconds:
|
|
497
|
+
logger.warning("retrieve_memories: no results after %.0fs of polling", wait_seconds)
|
|
498
|
+
return {"results": results, "memories": memories, "abstractReply": abstract}
|
|
499
|
+
|
|
500
|
+
await asyncio.sleep(poll_interval)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Configuration classes for the autogen-goodmem integration."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Literal, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ChunkingConfig(BaseModel):
|
|
9
|
+
"""Recursive chunking config used when creating a space."""
|
|
10
|
+
|
|
11
|
+
chunk_size: int = Field(default=256, description="Characters per chunk")
|
|
12
|
+
chunk_overlap: int = Field(default=25, description="Overlap between consecutive chunks")
|
|
13
|
+
keep_strategy: Literal["KEEP_END", "KEEP_START", "DISCARD"] = Field(default="KEEP_END")
|
|
14
|
+
length_measurement: Literal["CHARACTER_COUNT", "TOKEN_COUNT"] = Field(default="CHARACTER_COUNT")
|
|
15
|
+
separators: List[str] = Field(default=["\n\n", "\n", ". ", " ", ""])
|
|
16
|
+
separator_is_regex: bool = Field(default=False)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PostProcessorConfig(BaseModel):
|
|
20
|
+
"""Optional reranker / LLM post-processor used by retrieve_memories."""
|
|
21
|
+
|
|
22
|
+
reranker_id: Optional[str] = Field(default=None)
|
|
23
|
+
llm_id: Optional[str] = Field(default=None)
|
|
24
|
+
relevance_threshold: Optional[float] = Field(default=None, description="0-1")
|
|
25
|
+
llm_temperature: Optional[float] = Field(default=None, description="0-2")
|
|
26
|
+
max_results: Optional[int] = Field(default=None)
|
|
27
|
+
chronological_resort: bool = Field(default=False)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class GoodMemMemoryConfig(BaseModel):
|
|
31
|
+
"""Configuration for :class:`GoodMemContextProvider`.
|
|
32
|
+
|
|
33
|
+
Connects an AutoGen ``Memory`` to a single GoodMem space, automatically
|
|
34
|
+
creating or reusing the space by name on first use.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
base_url: str = Field(description="GoodMem API base URL, e.g. https://localhost:8080")
|
|
38
|
+
api_key: str = Field(description="GoodMem API key (sent as X-API-Key)")
|
|
39
|
+
space_name: str = Field(description="Space to create or reuse")
|
|
40
|
+
embedder_id: str = Field(description="Embedder model ID for this space")
|
|
41
|
+
max_results: int = Field(default=5)
|
|
42
|
+
include_memory_definition: bool = Field(default=True)
|
|
43
|
+
wait_for_indexing: bool = Field(default=True, description="Poll up to ~60s for first results")
|
|
44
|
+
chunking: ChunkingConfig = Field(default_factory=ChunkingConfig)
|
|
45
|
+
post_processor: Optional[PostProcessorConfig] = Field(default=None)
|
|
46
|
+
metadata: Optional[Dict[str, str]] = Field(default=None)
|
|
47
|
+
verify_ssl: bool = Field(default=True)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""AutoGen-native memory provider backed by GoodMem.
|
|
2
|
+
|
|
3
|
+
In AutoGen, the extension point for retrieval-augmented context injection is
|
|
4
|
+
:class:`autogen_core.memory.Memory` — the equivalent of "BaseContextProvider"
|
|
5
|
+
in other frameworks. :class:`GoodMemContextProvider` plugs a single GoodMem
|
|
6
|
+
space into that surface so an AutoGen agent can call ``memory.add(...)`` /
|
|
7
|
+
``memory.query(...)`` and ``update_context`` will inject retrieved chunks as
|
|
8
|
+
a system message.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from autogen_core import CancellationToken, Component
|
|
17
|
+
from autogen_core.memory import (
|
|
18
|
+
Memory,
|
|
19
|
+
MemoryContent,
|
|
20
|
+
MemoryMimeType,
|
|
21
|
+
MemoryQueryResult,
|
|
22
|
+
UpdateContextResult,
|
|
23
|
+
)
|
|
24
|
+
from autogen_core.model_context import ChatCompletionContext
|
|
25
|
+
from autogen_core.models import SystemMessage
|
|
26
|
+
from typing_extensions import Self
|
|
27
|
+
|
|
28
|
+
from ._client import GoodMemClient
|
|
29
|
+
from ._config import GoodMemMemoryConfig
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GoodMemContextProvider(Memory, Component[GoodMemMemoryConfig]):
|
|
35
|
+
"""AutoGen ``Memory`` provider backed by a GoodMem space.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
config: Connection details and space configuration.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
|
|
42
|
+
.. code-block:: python
|
|
43
|
+
|
|
44
|
+
from autogen_goodmem import GoodMemContextProvider, GoodMemMemoryConfig
|
|
45
|
+
|
|
46
|
+
provider = GoodMemContextProvider(
|
|
47
|
+
config=GoodMemMemoryConfig(
|
|
48
|
+
base_url="https://localhost:8080",
|
|
49
|
+
api_key="gm_...",
|
|
50
|
+
space_name="my-kb",
|
|
51
|
+
embedder_id="<embedder-uuid>",
|
|
52
|
+
verify_ssl=False,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
await provider.add(MemoryContent(content="hi", mime_type=MemoryMimeType.TEXT))
|
|
56
|
+
results = await provider.query("hello")
|
|
57
|
+
await provider.close()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
component_config_schema = GoodMemMemoryConfig
|
|
61
|
+
component_provider_override = "autogen_goodmem.GoodMemContextProvider"
|
|
62
|
+
|
|
63
|
+
def __init__(self, config: GoodMemMemoryConfig) -> None:
|
|
64
|
+
self._config = config
|
|
65
|
+
self._client = GoodMemClient(
|
|
66
|
+
base_url=config.base_url,
|
|
67
|
+
api_key=config.api_key,
|
|
68
|
+
verify_ssl=config.verify_ssl,
|
|
69
|
+
)
|
|
70
|
+
self._space_id: Optional[str] = None
|
|
71
|
+
|
|
72
|
+
# ── Internals ──────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
async def _ensure_space(self) -> str:
|
|
75
|
+
if self._space_id is not None:
|
|
76
|
+
return self._space_id
|
|
77
|
+
result = await self._client.create_space(
|
|
78
|
+
self._config.space_name,
|
|
79
|
+
self._config.embedder_id,
|
|
80
|
+
chunking=self._config.chunking,
|
|
81
|
+
)
|
|
82
|
+
self._space_id = result["spaceId"]
|
|
83
|
+
if result.get("reused"):
|
|
84
|
+
logger.info("Reusing GoodMem space %s (%s)", self._config.space_name, self._space_id)
|
|
85
|
+
else:
|
|
86
|
+
logger.info("Created GoodMem space %s (%s)", self._config.space_name, self._space_id)
|
|
87
|
+
return self._space_id # type: ignore[return-value]
|
|
88
|
+
|
|
89
|
+
# ── AutoGen Memory interface ───────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
async def update_context(self, model_context: ChatCompletionContext) -> UpdateContextResult:
|
|
92
|
+
messages = await model_context.get_messages()
|
|
93
|
+
if not messages:
|
|
94
|
+
return UpdateContextResult(memories=MemoryQueryResult(results=[]))
|
|
95
|
+
|
|
96
|
+
last = messages[-1]
|
|
97
|
+
query_text = last.content if isinstance(last.content, str) else str(last)
|
|
98
|
+
query_results = await self.query(query_text)
|
|
99
|
+
|
|
100
|
+
if query_results.results:
|
|
101
|
+
lines = [f"{i}. {str(m.content)}" for i, m in enumerate(query_results.results, 1)]
|
|
102
|
+
await model_context.add_message(SystemMessage(content="\nRelevant memory content:\n" + "\n".join(lines)))
|
|
103
|
+
|
|
104
|
+
return UpdateContextResult(memories=query_results)
|
|
105
|
+
|
|
106
|
+
async def add(
|
|
107
|
+
self,
|
|
108
|
+
content: MemoryContent,
|
|
109
|
+
cancellation_token: CancellationToken | None = None,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Store a text memory. Use :meth:`add_file` for binary files."""
|
|
112
|
+
space_id = await self._ensure_space()
|
|
113
|
+
text = content.content
|
|
114
|
+
if not isinstance(text, str):
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"GoodMemContextProvider.add() only supports text content. "
|
|
117
|
+
"Use add_file() for binary files (PDF, images, etc.)."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
merged: Dict[str, Any] = {}
|
|
121
|
+
if self._config.metadata:
|
|
122
|
+
merged.update(self._config.metadata)
|
|
123
|
+
if content.metadata:
|
|
124
|
+
merged.update(content.metadata)
|
|
125
|
+
|
|
126
|
+
await self._client.create_memory(
|
|
127
|
+
space_id=space_id,
|
|
128
|
+
content=text,
|
|
129
|
+
content_type="text/plain",
|
|
130
|
+
metadata=merged or None,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
async def add_file(
|
|
134
|
+
self,
|
|
135
|
+
file_path: str,
|
|
136
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
137
|
+
cancellation_token: CancellationToken | None = None,
|
|
138
|
+
) -> Dict[str, Any]:
|
|
139
|
+
space_id = await self._ensure_space()
|
|
140
|
+
merged: Dict[str, Any] = {}
|
|
141
|
+
if self._config.metadata:
|
|
142
|
+
merged.update(self._config.metadata)
|
|
143
|
+
if metadata:
|
|
144
|
+
merged.update(metadata)
|
|
145
|
+
result = await self._client.create_memory(
|
|
146
|
+
space_id=space_id,
|
|
147
|
+
file_path=file_path,
|
|
148
|
+
metadata=merged or None,
|
|
149
|
+
)
|
|
150
|
+
return {
|
|
151
|
+
"memoryId": result.get("memoryId"),
|
|
152
|
+
"spaceId": result.get("spaceId"),
|
|
153
|
+
"status": result.get("processingStatus", "PENDING"),
|
|
154
|
+
"contentType": result.get("contentType"),
|
|
155
|
+
"fileName": file_path.rsplit("/", 1)[-1],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async def query(
|
|
159
|
+
self,
|
|
160
|
+
query: str | MemoryContent,
|
|
161
|
+
cancellation_token: CancellationToken | None = None,
|
|
162
|
+
**kwargs: Any,
|
|
163
|
+
) -> MemoryQueryResult:
|
|
164
|
+
space_id = await self._ensure_space()
|
|
165
|
+
space_ids: List[str] = kwargs.get("space_ids", [space_id])
|
|
166
|
+
query_text = query if isinstance(query, str) else str(query.content)
|
|
167
|
+
|
|
168
|
+
pp = self._config.post_processor
|
|
169
|
+
kw: Dict[str, Any] = {}
|
|
170
|
+
if pp is not None:
|
|
171
|
+
kw.update(
|
|
172
|
+
reranker_id=pp.reranker_id,
|
|
173
|
+
llm_id=pp.llm_id,
|
|
174
|
+
relevance_threshold=pp.relevance_threshold,
|
|
175
|
+
llm_temperature=pp.llm_temperature,
|
|
176
|
+
chronological_resort=pp.chronological_resort,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
result = await self._client.retrieve_memories(
|
|
180
|
+
message=query_text,
|
|
181
|
+
space_ids=space_ids,
|
|
182
|
+
max_results=self._config.max_results,
|
|
183
|
+
fetch_memory=self._config.include_memory_definition,
|
|
184
|
+
wait_for_indexing=self._config.wait_for_indexing,
|
|
185
|
+
**kw,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
abstract = result.get("abstractReply")
|
|
189
|
+
out: List[MemoryContent] = []
|
|
190
|
+
for item in result["results"]:
|
|
191
|
+
meta: Dict[str, Any] = {
|
|
192
|
+
"chunkId": item.get("chunkId"),
|
|
193
|
+
"memoryId": item.get("memoryId"),
|
|
194
|
+
"relevanceScore": item.get("relevanceScore"),
|
|
195
|
+
"memoryIndex": item.get("memoryIndex"),
|
|
196
|
+
}
|
|
197
|
+
if abstract:
|
|
198
|
+
meta["abstractReply"] = abstract
|
|
199
|
+
out.append(
|
|
200
|
+
MemoryContent(
|
|
201
|
+
content=item.get("chunkText", ""),
|
|
202
|
+
mime_type=MemoryMimeType.TEXT,
|
|
203
|
+
metadata=meta,
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
return MemoryQueryResult(results=out)
|
|
207
|
+
|
|
208
|
+
async def clear(self) -> None:
|
|
209
|
+
logger.warning(
|
|
210
|
+
"GoodMem does not support bulk clear; delete memories individually "
|
|
211
|
+
"or recreate the space."
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
async def close(self) -> None:
|
|
215
|
+
await self._client.close()
|
|
216
|
+
self._space_id = None
|
|
217
|
+
|
|
218
|
+
# ── Convenience pass-throughs ──────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def client(self) -> GoodMemClient:
|
|
222
|
+
"""The underlying :class:`GoodMemClient` (useful for advanced ops)."""
|
|
223
|
+
return self._client
|
|
224
|
+
|
|
225
|
+
# ── Serialization ──────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
def _to_config(self) -> GoodMemMemoryConfig:
|
|
228
|
+
return self._config
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def _from_config(cls, config: GoodMemMemoryConfig) -> Self:
|
|
232
|
+
return cls(config=config)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Factory for the 11 GoodMem AutoGen ``FunctionTool`` instances.
|
|
2
|
+
|
|
3
|
+
Each tool is a thin wrapper around :class:`GoodMemClient`, exposed as an
|
|
4
|
+
AutoGen ``FunctionTool`` that an agent can call. Tool names are prefixed
|
|
5
|
+
with ``goodmem_`` so multiple memory backends can coexist on one agent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from autogen_core.tools import FunctionTool
|
|
13
|
+
|
|
14
|
+
from ._client import GoodMemClient
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
TOOL_NAMES: List[str] = [
|
|
18
|
+
"goodmem_list_embedders",
|
|
19
|
+
"goodmem_list_spaces",
|
|
20
|
+
"goodmem_get_space",
|
|
21
|
+
"goodmem_create_space",
|
|
22
|
+
"goodmem_update_space",
|
|
23
|
+
"goodmem_delete_space",
|
|
24
|
+
"goodmem_create_memory",
|
|
25
|
+
"goodmem_list_memories",
|
|
26
|
+
"goodmem_retrieve_memories",
|
|
27
|
+
"goodmem_get_memory",
|
|
28
|
+
"goodmem_delete_memory",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def create_goodmem_tools(client: GoodMemClient) -> List[FunctionTool]:
|
|
33
|
+
"""Return the 11 GoodMem ``FunctionTool`` instances bound to ``client``.
|
|
34
|
+
|
|
35
|
+
The returned list is in the order declared in :data:`TOOL_NAMES`.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
async def goodmem_list_embedders() -> List[Dict[str, Any]]:
|
|
39
|
+
"""List available embedder models on the GoodMem server."""
|
|
40
|
+
return await client.list_embedders()
|
|
41
|
+
|
|
42
|
+
async def goodmem_list_spaces() -> List[Dict[str, Any]]:
|
|
43
|
+
"""List all GoodMem spaces visible to the current API key."""
|
|
44
|
+
return await client.list_spaces()
|
|
45
|
+
|
|
46
|
+
async def goodmem_get_space(space_id: str) -> Dict[str, Any]:
|
|
47
|
+
"""Fetch a single GoodMem space by ID."""
|
|
48
|
+
return await client.get_space(space_id)
|
|
49
|
+
|
|
50
|
+
async def goodmem_create_space(
|
|
51
|
+
name: str,
|
|
52
|
+
embedder_id: str,
|
|
53
|
+
) -> Dict[str, Any]:
|
|
54
|
+
"""Create a GoodMem space, or reuse the existing one with the same name.
|
|
55
|
+
|
|
56
|
+
Returns ``{spaceId, name, embedderId, reused}``.
|
|
57
|
+
"""
|
|
58
|
+
return await client.create_space(name=name, embedder_id=embedder_id)
|
|
59
|
+
|
|
60
|
+
async def goodmem_update_space(
|
|
61
|
+
space_id: str,
|
|
62
|
+
name: Optional[str] = None,
|
|
63
|
+
public_read: Optional[bool] = None,
|
|
64
|
+
replace_labels: Optional[Dict[str, str]] = None,
|
|
65
|
+
merge_labels: Optional[Dict[str, str]] = None,
|
|
66
|
+
) -> Dict[str, Any]:
|
|
67
|
+
"""Update a GoodMem space.
|
|
68
|
+
|
|
69
|
+
Only ``name``, ``publicRead``, ``replaceLabels``, ``mergeLabels``, and
|
|
70
|
+
``defaultChunkingConfig`` are accepted by the server — passing a bare
|
|
71
|
+
``labels`` field returns 400, so prefer ``replace_labels`` (full
|
|
72
|
+
replacement) or ``merge_labels`` (additive merge).
|
|
73
|
+
"""
|
|
74
|
+
return await client.update_space(
|
|
75
|
+
space_id,
|
|
76
|
+
name=name,
|
|
77
|
+
public_read=public_read,
|
|
78
|
+
replace_labels=replace_labels,
|
|
79
|
+
merge_labels=merge_labels,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
async def goodmem_delete_space(space_id: str) -> Dict[str, Any]:
|
|
83
|
+
"""Permanently delete a GoodMem space and every memory inside it."""
|
|
84
|
+
return await client.delete_space(space_id)
|
|
85
|
+
|
|
86
|
+
async def goodmem_create_memory(
|
|
87
|
+
space_id: str,
|
|
88
|
+
content: Optional[str] = None,
|
|
89
|
+
file_path: Optional[str] = None,
|
|
90
|
+
content_type: Optional[str] = None,
|
|
91
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
92
|
+
) -> Dict[str, Any]:
|
|
93
|
+
"""Create a memory in a space from inline text or a local file.
|
|
94
|
+
|
|
95
|
+
Exactly one of ``content`` (inline text) or ``file_path`` must be
|
|
96
|
+
provided. Binary files (PDF, images) are base64-encoded; text files
|
|
97
|
+
are sent inline. MIME type is auto-detected from the extension when
|
|
98
|
+
``content_type`` is omitted.
|
|
99
|
+
"""
|
|
100
|
+
return await client.create_memory(
|
|
101
|
+
space_id=space_id,
|
|
102
|
+
content=content,
|
|
103
|
+
file_path=file_path,
|
|
104
|
+
content_type=content_type,
|
|
105
|
+
metadata=metadata,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
async def goodmem_list_memories(
|
|
109
|
+
space_id: str,
|
|
110
|
+
next_token: Optional[str] = None,
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
"""List memories in a space (paginated via ``nextToken``).
|
|
113
|
+
|
|
114
|
+
Note: GoodMem has no top-level ``GET /v1/memories`` — listing is
|
|
115
|
+
always scoped to a space.
|
|
116
|
+
"""
|
|
117
|
+
return await client.list_memories(space_id, next_token=next_token)
|
|
118
|
+
|
|
119
|
+
async def goodmem_retrieve_memories(
|
|
120
|
+
message: str,
|
|
121
|
+
space_ids: List[str],
|
|
122
|
+
max_results: int = 5,
|
|
123
|
+
fetch_memory: bool = True,
|
|
124
|
+
wait_for_indexing: bool = False,
|
|
125
|
+
reranker_id: Optional[str] = None,
|
|
126
|
+
llm_id: Optional[str] = None,
|
|
127
|
+
relevance_threshold: Optional[float] = None,
|
|
128
|
+
llm_temperature: Optional[float] = None,
|
|
129
|
+
chronological_resort: bool = False,
|
|
130
|
+
) -> Dict[str, Any]:
|
|
131
|
+
"""Semantic retrieval against one or more GoodMem spaces.
|
|
132
|
+
|
|
133
|
+
Optionally re-orders results with ``reranker_id`` and/or generates a
|
|
134
|
+
natural-language ``abstractReply`` with ``llm_id``.
|
|
135
|
+
"""
|
|
136
|
+
return await client.retrieve_memories(
|
|
137
|
+
message=message,
|
|
138
|
+
space_ids=space_ids,
|
|
139
|
+
max_results=max_results,
|
|
140
|
+
fetch_memory=fetch_memory,
|
|
141
|
+
wait_for_indexing=wait_for_indexing,
|
|
142
|
+
reranker_id=reranker_id,
|
|
143
|
+
llm_id=llm_id,
|
|
144
|
+
relevance_threshold=relevance_threshold,
|
|
145
|
+
llm_temperature=llm_temperature,
|
|
146
|
+
chronological_resort=chronological_resort,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
async def goodmem_get_memory(
|
|
150
|
+
memory_id: str,
|
|
151
|
+
include_content: bool = True,
|
|
152
|
+
) -> Dict[str, Any]:
|
|
153
|
+
"""Fetch a memory's metadata, and optionally its original content."""
|
|
154
|
+
return await client.get_memory(memory_id, include_content=include_content)
|
|
155
|
+
|
|
156
|
+
async def goodmem_delete_memory(memory_id: str) -> Dict[str, Any]:
|
|
157
|
+
"""Permanently delete a memory and its chunks/embeddings."""
|
|
158
|
+
return await client.delete_memory(memory_id)
|
|
159
|
+
|
|
160
|
+
funcs = [
|
|
161
|
+
goodmem_list_embedders,
|
|
162
|
+
goodmem_list_spaces,
|
|
163
|
+
goodmem_get_space,
|
|
164
|
+
goodmem_create_space,
|
|
165
|
+
goodmem_update_space,
|
|
166
|
+
goodmem_delete_space,
|
|
167
|
+
goodmem_create_memory,
|
|
168
|
+
goodmem_list_memories,
|
|
169
|
+
goodmem_retrieve_memories,
|
|
170
|
+
goodmem_get_memory,
|
|
171
|
+
goodmem_delete_memory,
|
|
172
|
+
]
|
|
173
|
+
return [
|
|
174
|
+
FunctionTool(
|
|
175
|
+
f,
|
|
176
|
+
name=f.__name__,
|
|
177
|
+
description=(f.__doc__ or f.__name__).strip().splitlines()[0],
|
|
178
|
+
)
|
|
179
|
+
for f in funcs
|
|
180
|
+
]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "autogen-goodmem"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "GoodMem memory and tools for the AutoGen agent framework."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = {text = "MIT"}
|
|
8
|
+
authors = [{name = "PAIR Systems"}]
|
|
9
|
+
keywords = ["autogen", "goodmem", "memory", "rag", "agents", "llm"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Programming Language :: Python :: 3",
|
|
12
|
+
"Programming Language :: Python :: 3.10",
|
|
13
|
+
"Programming Language :: Python :: 3.11",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"autogen-core>=0.7.5",
|
|
22
|
+
"httpx>=0.24.0,<1",
|
|
23
|
+
"pydantic>=2.0,<3",
|
|
24
|
+
"typing-extensions>=4.7",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = [
|
|
29
|
+
"pytest>=7",
|
|
30
|
+
"pytest-asyncio>=0.23",
|
|
31
|
+
"pytest-timeout",
|
|
32
|
+
"build",
|
|
33
|
+
"twine",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/PAIR-Systems-Inc/autogen-goodmem"
|
|
38
|
+
Repository = "https://github.com/PAIR-Systems-Inc/autogen-goodmem"
|
|
39
|
+
Issues = "https://github.com/PAIR-Systems-Inc/autogen-goodmem/issues"
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
asyncio_mode = "auto"
|
|
43
|
+
markers = ["integration: live-server tests"]
|
|
44
|
+
|
|
45
|
+
[build-system]
|
|
46
|
+
requires = ["flit-core>=3.11,<4.0"]
|
|
47
|
+
build-backend = "flit_core.buildapi"
|
|
48
|
+
|
|
49
|
+
[tool.flit.module]
|
|
50
|
+
name = "autogen_goodmem"
|