memorius 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memorius/__init__.py +29 -0
- memorius/cli/main.py +306 -0
- memorius/config.py +133 -0
- memorius/embeddings.py +157 -0
- memorius/hooks/__init__.py +306 -0
- memorius/hooks/cli.py +226 -0
- memorius/hooks/engine.py +447 -0
- memorius/mcp_server.py +250 -0
- memorius/normalizers/__init__.py +467 -0
- memorius/normalizers/cli.py +218 -0
- memorius/plugin_gen/__init__.py +1 -0
- memorius/plugin_gen/cli.py +739 -0
- memorius/rest_server.py +81 -0
- memorius/vault.py +555 -0
- memorius-0.1.0.dist-info/METADATA +259 -0
- memorius-0.1.0.dist-info/RECORD +19 -0
- memorius-0.1.0.dist-info/WHEEL +5 -0
- memorius-0.1.0.dist-info/entry_points.txt +5 -0
- memorius-0.1.0.dist-info/top_level.txt +1 -0
memorius/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""memorius — self-contained memory vault for any AI agent.
|
|
2
|
+
|
|
3
|
+
Architecture:
|
|
4
|
+
|
|
5
|
+
Storage Layer:
|
|
6
|
+
chroma_db: Vector store for semantic search (ChromaDB)
|
|
7
|
+
sqlite_store: Metadata store for vaults, shelves, folders, notes, and diaries (SQLite)
|
|
8
|
+
Hierarchy: Vault > Shelf > Folder > Note
|
|
9
|
+
|
|
10
|
+
Vault Layer:
|
|
11
|
+
vault: Vault > Shelf > Folder > Note hierarchy
|
|
12
|
+
diary: Session diary entries with timestamps
|
|
13
|
+
mine: Extract memories from conversations/transcripts
|
|
14
|
+
search: Multi-modal search (vector + metadata + temporal)
|
|
15
|
+
|
|
16
|
+
Server Layer:
|
|
17
|
+
mcp: MCP protocol server (primary interface for tool-calling agents)
|
|
18
|
+
rest: FastAPI REST server (alternative interface for web/curl)
|
|
19
|
+
|
|
20
|
+
Integration Layer:
|
|
21
|
+
hooks: Agent-agnostic hook lifecycle system
|
|
22
|
+
plugin_gen: Per-agent plugin manifest generator
|
|
23
|
+
normalizers: Conversation format importers (Discord, Telegram, WhatsApp)
|
|
24
|
+
|
|
25
|
+
CLI Layer:
|
|
26
|
+
memorius: Main CLI — init, mine, search, diary, status, serve, hook
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0"
|
memorius/cli/main.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""memorius CLI — interact with your memory vault from the terminal.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
memorius init Initialize a new vault
|
|
5
|
+
memorius status Show vault status
|
|
6
|
+
memorius store <text> Store a memory
|
|
7
|
+
memorius search <query> Semantic search across memories
|
|
8
|
+
memorius mine <file> Mine memories from a transcript file
|
|
9
|
+
memorius diary <session> Write a diary entry (interactive)
|
|
10
|
+
memorius diaries List recent diary entries
|
|
11
|
+
memorius ls [vault] Explore vault hierarchy
|
|
12
|
+
memorius serve Start the MCP server (stdio)
|
|
13
|
+
memorius serve-rest Start the REST API server
|
|
14
|
+
memorius config Show current config
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
import sys
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
from memorius import __version__
|
|
26
|
+
from memorius.config import load_config, DEFAULT_CONFIG_DIR, DEFAULT_CONFIG_PATH
|
|
27
|
+
from memorius.vault import VaultEngine
|
|
28
|
+
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level=logging.WARNING,
|
|
31
|
+
format="%(levelname)s [%(name)s] %(message)s",
|
|
32
|
+
)
|
|
33
|
+
logger = logging.getLogger("memorius.cli")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def main():
|
|
37
|
+
parser = argparse.ArgumentParser(
|
|
38
|
+
"memorius",
|
|
39
|
+
description="Memory vault for any AI agent — store, search, and organize memories.",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument("--version", action="store_true", help="Show version")
|
|
42
|
+
parser.add_argument("--config", default=None, help="Path to config file")
|
|
43
|
+
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
|
|
44
|
+
|
|
45
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
46
|
+
|
|
47
|
+
subparsers.add_parser("init", help="Initialize a new vault")
|
|
48
|
+
subparsers.add_parser("status", help="Show vault status")
|
|
49
|
+
|
|
50
|
+
store_p = subparsers.add_parser("store", help="Store a memory")
|
|
51
|
+
store_p.add_argument("content", nargs="?", default=None, help="Memory content")
|
|
52
|
+
store_p.add_argument("--vault", default="main", help="Vault name")
|
|
53
|
+
store_p.add_argument("--shelf", default="default", help="Shelf name")
|
|
54
|
+
store_p.add_argument("--folder", default="default", help="Folder name")
|
|
55
|
+
store_p.add_argument("--note", default="default", help="Note name")
|
|
56
|
+
|
|
57
|
+
search_p = subparsers.add_parser("search", help="Semantic search")
|
|
58
|
+
search_p.add_argument("query", nargs="?", default=None, help="Search query")
|
|
59
|
+
search_p.add_argument("--n", type=int, default=10, help="Number of results")
|
|
60
|
+
search_p.add_argument("--vault", default=None, help="Filter by vault")
|
|
61
|
+
search_p.add_argument("--shelf", default=None, help="Filter by shelf")
|
|
62
|
+
|
|
63
|
+
mine_p = subparsers.add_parser("mine", help="Mine memories from a transcript")
|
|
64
|
+
mine_p.add_argument("file", nargs="?", default=None, help="Transcript file path")
|
|
65
|
+
mine_p.add_argument("--vault", default="main", help="Vault name")
|
|
66
|
+
mine_p.add_argument("--text", default=None, help="Transcript text (inline)")
|
|
67
|
+
|
|
68
|
+
diary_p = subparsers.add_parser("diary", help="Write a diary entry")
|
|
69
|
+
diary_p.add_argument("session_id", nargs="?", default=None, help="Session ID")
|
|
70
|
+
diary_p.add_argument("--title", default="", help="Diary title")
|
|
71
|
+
diary_p.add_argument("--summary", default="", help="Diary summary")
|
|
72
|
+
diary_p.add_argument("--content", default="", help="Diary content")
|
|
73
|
+
diary_p.add_argument("--vault", default="main", help="Vault name")
|
|
74
|
+
|
|
75
|
+
subparsers.add_parser("diaries", help="List recent diary entries")
|
|
76
|
+
p = subparsers.add_parser("ls", help="Explore vault hierarchy")
|
|
77
|
+
p.add_argument("--vault", default=None, help="Vault to explore (default: all)")
|
|
78
|
+
|
|
79
|
+
serve_p = subparsers.add_parser("serve", help="Start MCP server (stdio)")
|
|
80
|
+
serve_p.add_argument("--port", type=int, default=8911, help="Not used for stdio")
|
|
81
|
+
|
|
82
|
+
serve_rest_p = subparsers.add_parser("serve-rest", help="Start REST API server")
|
|
83
|
+
serve_rest_p.add_argument("--port", type=int, default=8912, help="Port")
|
|
84
|
+
serve_rest_p.add_argument("--host", default="127.0.0.1", help="Host")
|
|
85
|
+
|
|
86
|
+
config_p = subparsers.add_parser("config", help="Show configuration")
|
|
87
|
+
config_p.add_argument("--show", action="store_true", default=True, help="Show config")
|
|
88
|
+
config_p.add_argument("--path", action="store_true", help="Show config file path")
|
|
89
|
+
|
|
90
|
+
args = parser.parse_args()
|
|
91
|
+
|
|
92
|
+
if args.version:
|
|
93
|
+
print(f"memorius v{__version__}")
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
if args.debug:
|
|
97
|
+
logging.getLogger("memorius").setLevel(logging.DEBUG)
|
|
98
|
+
|
|
99
|
+
if args.command is None:
|
|
100
|
+
parser.print_help()
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
# Load config and create engine
|
|
104
|
+
config = load_config(args.config)
|
|
105
|
+
engine = VaultEngine(config)
|
|
106
|
+
|
|
107
|
+
# Dispatch
|
|
108
|
+
commands = {
|
|
109
|
+
"init": cmd_init,
|
|
110
|
+
"status": cmd_status,
|
|
111
|
+
"store": cmd_store,
|
|
112
|
+
"search": cmd_search,
|
|
113
|
+
"mine": cmd_mine,
|
|
114
|
+
"diary": cmd_diary,
|
|
115
|
+
"diaries": cmd_diaries,
|
|
116
|
+
"ls": cmd_ls,
|
|
117
|
+
"serve": cmd_serve,
|
|
118
|
+
"serve-rest": cmd_serve_rest,
|
|
119
|
+
"config": cmd_config,
|
|
120
|
+
}
|
|
121
|
+
handler = commands.get(args.command)
|
|
122
|
+
if handler:
|
|
123
|
+
handler(engine, args, config)
|
|
124
|
+
else:
|
|
125
|
+
print(f"Unknown command: {args.command}")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cmd_init(engine, args, config):
|
|
129
|
+
"""Initialize the vault — ensures config dir and storage exists."""
|
|
130
|
+
DEFAULT_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
if not DEFAULT_CONFIG_PATH.exists():
|
|
132
|
+
from memorius.config import DEFAULT_CONFIG_YAML
|
|
133
|
+
DEFAULT_CONFIG_PATH.write_text(DEFAULT_CONFIG_YAML)
|
|
134
|
+
print(f"Created config: {DEFAULT_CONFIG_PATH}")
|
|
135
|
+
else:
|
|
136
|
+
print(f"Config exists: {DEFAULT_CONFIG_PATH}")
|
|
137
|
+
|
|
138
|
+
engine._meta.ensure_vault("main", "Main vault")
|
|
139
|
+
print("Vault initialized: main")
|
|
140
|
+
print(f"Storage: {config.get('storage', {}).get('path', '~/.memorius/data')}")
|
|
141
|
+
print(f"Embeddings: {config.get('embeddings', {}).get('provider', 'chroma-default')}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def cmd_status(engine, args, config):
|
|
145
|
+
"""Show vault status."""
|
|
146
|
+
status = engine.status()
|
|
147
|
+
print(f" Vaults: {status['vaults']}")
|
|
148
|
+
print(f" Memories: {status['memories']}")
|
|
149
|
+
print(f" Embeddings: {status['embedding_provider']} (dim={status['embedding_dimension']})")
|
|
150
|
+
print()
|
|
151
|
+
diaries = engine._meta.list_diaries(limit=5)
|
|
152
|
+
if diaries:
|
|
153
|
+
print("Recent diaries:")
|
|
154
|
+
for d in diaries:
|
|
155
|
+
print(f" [{d['created_at'][:19]}] {d.get('title', 'untitled')} ({d['session_id']})")
|
|
156
|
+
else:
|
|
157
|
+
print("No diaries yet.")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def cmd_store(engine, args, config):
|
|
161
|
+
"""Store a memory. Reads from stdin if no content arg."""
|
|
162
|
+
content = args.content
|
|
163
|
+
if not content:
|
|
164
|
+
content = sys.stdin.read().strip()
|
|
165
|
+
if not content:
|
|
166
|
+
print("Error: content required (pass as argument or pipe to stdin)")
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
memory = engine.store(
|
|
170
|
+
content=content,
|
|
171
|
+
vault=args.vault,
|
|
172
|
+
shelf=args.shelf,
|
|
173
|
+
folder=args.folder,
|
|
174
|
+
note=args.note,
|
|
175
|
+
)
|
|
176
|
+
print(f"Stored: {memory.id}")
|
|
177
|
+
print(f" Path: {memory.vault}/{memory.shelf}/{memory.folder}/{memory.note}")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def cmd_search(engine, args, config):
|
|
181
|
+
"""Semantic search across memories."""
|
|
182
|
+
query = args.query
|
|
183
|
+
if not query:
|
|
184
|
+
query = sys.stdin.read().strip()
|
|
185
|
+
if not query:
|
|
186
|
+
print("Error: query required (pass as argument or pipe to stdin)")
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
results = engine.search(
|
|
190
|
+
query=query,
|
|
191
|
+
vault=args.vault,
|
|
192
|
+
shelf=args.shelf,
|
|
193
|
+
limit=args.n,
|
|
194
|
+
)
|
|
195
|
+
print(f'Search: "{query}"')
|
|
196
|
+
print(f"Results: {len(results)}")
|
|
197
|
+
print()
|
|
198
|
+
for i, m in enumerate(results, 1):
|
|
199
|
+
print(f"{i}. [{m.vault}/{m.shelf}/{m.folder}/{m.note}]")
|
|
200
|
+
print(f" {m.content[:200]}")
|
|
201
|
+
if len(m.content) > 200:
|
|
202
|
+
print(" ...")
|
|
203
|
+
print()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def cmd_mine(engine, args, config):
|
|
207
|
+
"""Mine memories from a transcript."""
|
|
208
|
+
text = args.text
|
|
209
|
+
if not text and args.file:
|
|
210
|
+
text = Path(args.file).read_text()
|
|
211
|
+
if not text:
|
|
212
|
+
text = sys.stdin.read().strip()
|
|
213
|
+
if not text:
|
|
214
|
+
print("Error: transcript required (--text, <file>, or stdin)")
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
memories = engine.mine(
|
|
218
|
+
text=text,
|
|
219
|
+
vault=args.vault,
|
|
220
|
+
)
|
|
221
|
+
print(f"Mined {len(memories)} memories into {args.vault}/conversations/mined/transcript")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def cmd_diary(engine, args, config):
|
|
225
|
+
"""Write a diary entry."""
|
|
226
|
+
session_id = args.session_id or input("Session ID: ").strip()
|
|
227
|
+
if not session_id:
|
|
228
|
+
print("Error: session_id required")
|
|
229
|
+
return
|
|
230
|
+
title = args.title or input("Title: ").strip()
|
|
231
|
+
summary = args.summary or input("Summary: ").strip()
|
|
232
|
+
|
|
233
|
+
entry = engine.write_diary(
|
|
234
|
+
session_id=session_id,
|
|
235
|
+
vault=args.vault,
|
|
236
|
+
title=title,
|
|
237
|
+
summary=summary,
|
|
238
|
+
content=args.content,
|
|
239
|
+
)
|
|
240
|
+
print(f"Diary written: {entry['id']}")
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def cmd_diaries(engine, args, config):
|
|
244
|
+
"""List recent diary entries."""
|
|
245
|
+
diaries = engine._meta.list_diaries(limit=10)
|
|
246
|
+
if not diaries:
|
|
247
|
+
print("No diary entries.")
|
|
248
|
+
return
|
|
249
|
+
for d in diaries:
|
|
250
|
+
print(f"[{d['created_at'][:19]}] {d['title'] or 'untitled'}")
|
|
251
|
+
print(f" Session: {d['session_id']} | Vault: {d['vault']}")
|
|
252
|
+
if d['summary']:
|
|
253
|
+
print(f" Summary: {d['summary'][:200]}")
|
|
254
|
+
print()
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def cmd_ls(engine, args, config):
|
|
258
|
+
"""Explore vault hierarchy."""
|
|
259
|
+
vaults = engine._meta.list_vaults()
|
|
260
|
+
if not vaults:
|
|
261
|
+
print("No vaults. Run: memorius init")
|
|
262
|
+
return
|
|
263
|
+
for v in vaults:
|
|
264
|
+
print(f"{v['name']}/")
|
|
265
|
+
shelves = engine._meta.list_shelves(v['name'])
|
|
266
|
+
for sh in shelves:
|
|
267
|
+
print(f" {sh['name']}/")
|
|
268
|
+
folders = engine._meta.list_folders(v['name'], sh['name'])
|
|
269
|
+
for f in folders[:5]:
|
|
270
|
+
print(f" {f['name']}/")
|
|
271
|
+
notes = engine._meta.list_notes(v['name'], sh['name'], f['name'])
|
|
272
|
+
for n in notes:
|
|
273
|
+
print(f" {n['name']} ({n['memory_count']} memories)")
|
|
274
|
+
if len(folders) > 5:
|
|
275
|
+
print(f" ... and {len(folders) - 5} more folders")
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def cmd_serve(engine, args, config):
|
|
279
|
+
"""Start the MCP server (stdio)."""
|
|
280
|
+
from memorius.mcp_server import McpServer
|
|
281
|
+
server = McpServer(engine)
|
|
282
|
+
print("memorius MCP server starting (stdio)...", file=sys.stderr)
|
|
283
|
+
server.run()
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def cmd_serve_rest(engine, args, config):
|
|
287
|
+
"""Start the REST API server."""
|
|
288
|
+
from memorius.rest_server import run_rest_server
|
|
289
|
+
config_server = config.get("server", {})
|
|
290
|
+
host = args.host or config_server.get("host", "127.0.0.1")
|
|
291
|
+
port = args.port or config_server.get("rest_port", 8912)
|
|
292
|
+
run_rest_server(engine, host=host, port=port)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def cmd_config(engine, args, config):
|
|
296
|
+
"""Show current configuration."""
|
|
297
|
+
if args.path:
|
|
298
|
+
path = DEFAULT_CONFIG_PATH
|
|
299
|
+
print(path)
|
|
300
|
+
print(f" Exists: {path.exists()}")
|
|
301
|
+
return
|
|
302
|
+
print(json.dumps(config, indent=2, default=str))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
if __name__ == "__main__":
|
|
306
|
+
main()
|
memorius/config.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Configuration loading for memorius — YAML config with env var overrides."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
DEFAULT_CONFIG_DIR = Path.home() / ".memorius"
|
|
11
|
+
DEFAULT_CONFIG_PATH = DEFAULT_CONFIG_DIR / "config.yaml"
|
|
12
|
+
|
|
13
|
+
DEFAULT_CONFIG_YAML = """
|
|
14
|
+
# memorius configuration
|
|
15
|
+
# Path: ~/.memorius/config.yaml
|
|
16
|
+
|
|
17
|
+
storage:
|
|
18
|
+
type: chroma # chroma | sqlite (chroma is always primary for vectors)
|
|
19
|
+
path: ~/.memorius/data
|
|
20
|
+
|
|
21
|
+
embeddings:
|
|
22
|
+
provider: chroma-default # chroma-default | sentence-transformers | openai
|
|
23
|
+
model: all-MiniLM-L6-v2
|
|
24
|
+
# openai:
|
|
25
|
+
# api_key: ...
|
|
26
|
+
# model: text-embedding-3-small
|
|
27
|
+
|
|
28
|
+
server:
|
|
29
|
+
mcp_port: 8911
|
|
30
|
+
rest_port: 8912
|
|
31
|
+
host: "127.0.0.1"
|
|
32
|
+
|
|
33
|
+
vault:
|
|
34
|
+
default: "main"
|
|
35
|
+
max_note_size: 1000
|
|
36
|
+
|
|
37
|
+
hooks:
|
|
38
|
+
enabled: true
|
|
39
|
+
config: ~/.memorius/hooks.yaml
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_config(path: str | Path | None = None) -> dict[str, Any]:
|
|
44
|
+
"""Load config from YAML file, merging with env var overrides.
|
|
45
|
+
|
|
46
|
+
Env var overrides (prefixed with MEMORIUS_):
|
|
47
|
+
MEMORIUS_STORAGE_PATH → storage.path
|
|
48
|
+
MEMORIUS_EMBEDDINGS_PROVIDER → embeddings.provider
|
|
49
|
+
MEMORIUS_OPENAI_API_KEY → embeddings.openai.api_key
|
|
50
|
+
MEMORIUS_MCP_PORT → server.mcp_port
|
|
51
|
+
MEMORIUS_REST_PORT → server.rest_port
|
|
52
|
+
MEMORIUS_HOST → server.host
|
|
53
|
+
"""
|
|
54
|
+
import yaml
|
|
55
|
+
|
|
56
|
+
config_path = Path(path or _find_config()).expanduser()
|
|
57
|
+
|
|
58
|
+
if config_path.exists():
|
|
59
|
+
with open(config_path) as f:
|
|
60
|
+
config = yaml.safe_load(f) or {}
|
|
61
|
+
else:
|
|
62
|
+
config = _default_config()
|
|
63
|
+
|
|
64
|
+
_apply_env_overrides(config)
|
|
65
|
+
_ensure_defaults(config)
|
|
66
|
+
return config
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _find_config() -> Path:
|
|
70
|
+
"""Find config file: cwd ./memorius.yaml, then ~/.memorius/config.yaml."""
|
|
71
|
+
cwd_config = Path.cwd() / "memorius.yaml"
|
|
72
|
+
if cwd_config.exists():
|
|
73
|
+
return cwd_config
|
|
74
|
+
return DEFAULT_CONFIG_PATH
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _default_config() -> dict[str, Any]:
|
|
78
|
+
import yaml
|
|
79
|
+
return yaml.safe_load(DEFAULT_CONFIG_YAML)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _ensure_defaults(config: dict[str, Any]):
|
|
83
|
+
"""Fill in missing keys with defaults."""
|
|
84
|
+
storage = config.setdefault("storage", {})
|
|
85
|
+
storage.setdefault("type", "chroma")
|
|
86
|
+
storage.setdefault("path", "~/.memorius/data")
|
|
87
|
+
|
|
88
|
+
embeddings = config.setdefault("embeddings", {})
|
|
89
|
+
embeddings.setdefault("provider", "chroma-default")
|
|
90
|
+
embeddings.setdefault("model", "all-MiniLM-L6-v2")
|
|
91
|
+
|
|
92
|
+
server = config.setdefault("server", {})
|
|
93
|
+
server.setdefault("mcp_port", 8911)
|
|
94
|
+
server.setdefault("rest_port", 8912)
|
|
95
|
+
server.setdefault("host", "127.0.0.1")
|
|
96
|
+
|
|
97
|
+
vault_cfg = config.setdefault("vault", {})
|
|
98
|
+
vault_cfg.setdefault("default", "main")
|
|
99
|
+
vault_cfg.setdefault("max_note_size", 1000)
|
|
100
|
+
|
|
101
|
+
hooks = config.setdefault("hooks", {})
|
|
102
|
+
hooks.setdefault("enabled", True)
|
|
103
|
+
hooks.setdefault("config", "~/.memorius/hooks.yaml")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _apply_env_overrides(config: dict[str, Any]):
|
|
107
|
+
"""Override config values from environment variables."""
|
|
108
|
+
overrides = {
|
|
109
|
+
"MEMORIUS_STORAGE_PATH": ("storage", "path"),
|
|
110
|
+
"MEMORIUS_EMBEDDINGS_PROVIDER": ("embeddings", "provider"),
|
|
111
|
+
"MEMORIUS_MCP_PORT": ("server", "mcp_port"),
|
|
112
|
+
"MEMORIUS_REST_PORT": ("server", "rest_port"),
|
|
113
|
+
"MEMORIUS_HOST": ("server", "host"),
|
|
114
|
+
"MEMORIUS_DEFAULT_VAULT": ("vault", "default"),
|
|
115
|
+
}
|
|
116
|
+
for env_key, (section, key) in overrides.items():
|
|
117
|
+
val = os.environ.get(env_key)
|
|
118
|
+
if val is not None:
|
|
119
|
+
config.setdefault(section, {})[key] = _coerce(val)
|
|
120
|
+
|
|
121
|
+
# Special handling for OpenAI key
|
|
122
|
+
openai_key = os.environ.get("MEMORIUS_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
123
|
+
if openai_key:
|
|
124
|
+
emb = config.setdefault("embeddings", {})
|
|
125
|
+
emb.setdefault("openai", {})["api_key"] = openai_key
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _coerce(val: str) -> int | str:
|
|
129
|
+
"""Coerce string to int if possible, else return as-is."""
|
|
130
|
+
try:
|
|
131
|
+
return int(val)
|
|
132
|
+
except ValueError:
|
|
133
|
+
return val
|
memorius/embeddings.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Embedding providers for memorius — abstracted vector embedding interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any, ClassVar
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EmbeddingProvider(ABC):
|
|
10
|
+
"""Abstract embedding provider. Returns normalized float vectors."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
14
|
+
"""Embed a list of texts into vectors."""
|
|
15
|
+
...
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def dimension(self) -> int:
|
|
20
|
+
"""Return the embedding vector dimension."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def from_config(cls, config: dict[str, Any]) -> "EmbeddingProvider":
|
|
26
|
+
"""Create provider from config dict."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SentenceTransformerProvider(EmbeddingProvider):
|
|
31
|
+
"""Local sentence-transformers embeddings (offline, no API key needed)."""
|
|
32
|
+
|
|
33
|
+
_model_key: ClassVar[str] = "all-MiniLM-L6-v2"
|
|
34
|
+
|
|
35
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
36
|
+
self._model_name = model_name
|
|
37
|
+
self._model = None # lazy load
|
|
38
|
+
self._dim = 384 # all-MiniLM-L6-v2 default
|
|
39
|
+
|
|
40
|
+
def _lazy_load(self):
|
|
41
|
+
if self._model is not None:
|
|
42
|
+
return
|
|
43
|
+
try:
|
|
44
|
+
from sentence_transformers import SentenceTransformer
|
|
45
|
+
self._model = SentenceTransformer(self._model_name)
|
|
46
|
+
self._dim = self._model.get_sentence_embedding_dimension()
|
|
47
|
+
except ImportError:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"sentence-transformers not installed. "
|
|
50
|
+
"Install: pip install memorius[local-embeddings]"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
54
|
+
self._lazy_load()
|
|
55
|
+
embeddings = self._model.encode(texts, normalize_embeddings=True)
|
|
56
|
+
return [emb.tolist() for emb in embeddings]
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def dimension(self) -> int:
|
|
60
|
+
self._lazy_load()
|
|
61
|
+
return self._dim
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_config(cls, config: dict[str, Any]) -> "SentenceTransformerProvider":
|
|
65
|
+
model = config.get("model", cls._model_key)
|
|
66
|
+
return cls(model_name=model)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
70
|
+
"""OpenAI API-based embeddings."""
|
|
71
|
+
|
|
72
|
+
def __init__(self, api_key: str, model: str = "text-embedding-3-small"):
|
|
73
|
+
self._api_key = api_key
|
|
74
|
+
self._model = model
|
|
75
|
+
self._dim = 1536 if "3-small" in model else 3072
|
|
76
|
+
self._client = None
|
|
77
|
+
|
|
78
|
+
def _lazy_load(self):
|
|
79
|
+
if self._client is not None:
|
|
80
|
+
return
|
|
81
|
+
try:
|
|
82
|
+
from openai import OpenAI
|
|
83
|
+
self._client = OpenAI(api_key=self._api_key)
|
|
84
|
+
except ImportError:
|
|
85
|
+
raise ImportError(
|
|
86
|
+
"openai package not installed. "
|
|
87
|
+
"Install: pip install memorius[openai]"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
91
|
+
self._lazy_load()
|
|
92
|
+
resp = self._client.embeddings.create(input=texts, model=self._model)
|
|
93
|
+
# Sort by index to preserve order
|
|
94
|
+
sorted_data = sorted(resp.data, key=lambda d: d.index)
|
|
95
|
+
return [d.embedding for d in sorted_data]
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def dimension(self) -> int:
|
|
99
|
+
return self._dim
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def from_config(cls, config: dict[str, Any]) -> "OpenAIEmbeddingProvider":
|
|
103
|
+
openai_cfg = config.get("openai", {})
|
|
104
|
+
api_key = openai_cfg.get("api_key", config.get("api_key", ""))
|
|
105
|
+
model = openai_cfg.get("model", config.get("model", "text-embedding-3-small"))
|
|
106
|
+
if not api_key:
|
|
107
|
+
raise ValueError("OpenAI API key required. Set MEMORIUS_OPENAI_API_KEY or OPENAI_API_KEY")
|
|
108
|
+
return cls(api_key=api_key, model=model)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ChromaDefaultProvider(EmbeddingProvider):
|
|
112
|
+
"""ChromaDB's built-in embedding function (ONNX all-MiniLM-L6-v2).
|
|
113
|
+
|
|
114
|
+
No extra dependencies — ChromaDB ships its own ONNX runtime.
|
|
115
|
+
"""
|
|
116
|
+
def __init__(self):
|
|
117
|
+
import chromadb.utils.embedding_functions as ef
|
|
118
|
+
self._fn = ef.DefaultEmbeddingFunction()
|
|
119
|
+
self._dim = 384
|
|
120
|
+
|
|
121
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
122
|
+
result = self._fn(texts)
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def dimension(self) -> int:
|
|
127
|
+
return self._dim
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def from_config(cls, config: dict[str, Any]) -> "ChromaDefaultProvider":
|
|
131
|
+
return cls()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class EmbeddingFactory:
|
|
135
|
+
"""Create embedding providers from config."""
|
|
136
|
+
|
|
137
|
+
_registry: dict[str, type[EmbeddingProvider]] = {
|
|
138
|
+
"chroma-default": ChromaDefaultProvider,
|
|
139
|
+
"sentence-transformers": SentenceTransformerProvider,
|
|
140
|
+
"openai": OpenAIEmbeddingProvider,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def create(cls, config: dict[str, Any]) -> EmbeddingProvider:
|
|
145
|
+
provider_name = config.get("provider", "sentence-transformers")
|
|
146
|
+
provider_cls = cls._registry.get(provider_name)
|
|
147
|
+
if provider_cls is None:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Unknown embedding provider: {provider_name}. "
|
|
150
|
+
f"Available: {list(cls._registry.keys())}"
|
|
151
|
+
)
|
|
152
|
+
return provider_cls.from_config(config)
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def register(cls, name: str, provider_cls: type[EmbeddingProvider]):
|
|
156
|
+
"""Register a custom embedding provider."""
|
|
157
|
+
cls._registry[name] = provider_cls
|