omni-agent-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
omni/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """OmniService — an independent, namespaced memory service.
2
+
3
+ OmniMemory packaged as a standalone FastAPI service with a local-LLM (Ollama)
4
+ memory pipeline. Clients (Claude Code via hooks + MCP, or any agent) push raw
5
+ interactions to /ingest and fetch context-scoped memory from /retrieve.
6
+
7
+ See external_docs/OmniMemory_design.md → "Claude Code Integration".
8
+ """
9
+
10
+ __version__ = "0.1.0"
omni/cli.py ADDED
@@ -0,0 +1,289 @@
1
+ """omni — thin CLI client for OmniService (invoked by Claude Code hooks).
2
+
3
+ Subcommands:
4
+ omni serve start the FastAPI service
5
+ omni mcp start the MCP stdio server
6
+ omni ingest [--ns N] [--transcript P] push new transcript turns (Stop hook)
7
+ omni retrieve [--ns N] [--query Q | --session-start] fetch memory (SessionStart hook)
8
+ omni verify [--ns N] force a VERIFY flush
9
+ omni snapshot [--ns N] print full assembled memory
10
+
11
+ Hook usage reads the hook payload JSON from stdin (transcript_path, cwd, prompt).
12
+ Namespace resolution order: --ns > stdin cwd > $CLAUDE_PROJECT_DIR > cwd.
13
+ """
14
+
15
+ import argparse
16
+ import json
17
+ import os
18
+ import sys
19
+ from typing import List, Optional
20
+
21
+ import httpx
22
+
23
+ from omni import config, storage
24
+
25
+
26
+ def _base_url() -> str:
27
+ return os.environ.get("OMNI_URL", f"http://{config.HOST}:{config.PORT}")
28
+
29
+
30
+ def _read_stdin_json() -> dict:
31
+ if sys.stdin is None or sys.stdin.isatty():
32
+ return {}
33
+ raw = sys.stdin.read().strip()
34
+ if not raw:
35
+ return {}
36
+ try:
37
+ return json.loads(raw)
38
+ except Exception:
39
+ return {}
40
+
41
+
42
+ def _resolve_ns(arg_ns: Optional[str], hook: dict) -> str:
43
+ return (arg_ns or hook.get("cwd")
44
+ or os.environ.get("CLAUDE_PROJECT_DIR") or os.getcwd())
45
+
46
+
47
+ def _resolve_client(arg_client: Optional[str]) -> str:
48
+ return arg_client or config.CLIENT_ID
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Transcript parsing (Claude Code JSONL)
53
+ # ---------------------------------------------------------------------------
54
+
55
+ def _content_to_text(content) -> str:
56
+ if isinstance(content, str):
57
+ return content
58
+ if isinstance(content, list):
59
+ chunks = []
60
+ for block in content:
61
+ if isinstance(block, dict):
62
+ if block.get("type") == "text" and block.get("text"):
63
+ chunks.append(block["text"])
64
+ elif "content" in block and isinstance(block["content"], str):
65
+ chunks.append(block["content"])
66
+ return "\n".join(chunks)
67
+ return ""
68
+
69
+
70
+ def parse_transcript_turns(path: str) -> List[dict]:
71
+ """Extract ordered user/assistant text turns from a Claude Code transcript JSONL."""
72
+ turns: List[dict] = []
73
+ if not path or not os.path.exists(path):
74
+ return turns
75
+ with open(path) as f:
76
+ for line in f:
77
+ line = line.strip()
78
+ if not line:
79
+ continue
80
+ try:
81
+ entry = json.loads(line)
82
+ except Exception:
83
+ continue
84
+ msg = entry.get("message") if isinstance(entry, dict) else None
85
+ if not isinstance(msg, dict):
86
+ continue
87
+ role = msg.get("role")
88
+ if role not in ("user", "assistant"):
89
+ continue
90
+ text = _content_to_text(msg.get("content")).strip()
91
+ if text:
92
+ turns.append({"role": role, "content": text})
93
+ return turns
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Commands
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def cmd_serve(args):
101
+ from omni.server import main as serve_main
102
+ serve_main()
103
+
104
+
105
+ def cmd_mcp(args):
106
+ from omni.mcp_server import main as mcp_main
107
+ mcp_main()
108
+
109
+
110
+ def cmd_ingest(args):
111
+ hook = _read_stdin_json()
112
+ ns = _resolve_ns(args.ns, hook)
113
+ client = _resolve_client(args.client)
114
+ transcript = args.transcript or hook.get("transcript_path")
115
+ if not transcript:
116
+ print(json.dumps({"ok": False, "error": "no transcript_path"}))
117
+ return
118
+
119
+ all_turns = parse_transcript_turns(transcript)
120
+
121
+ # Cursor: only ingest turns newer than what we already archived for this transcript.
122
+ d = storage.ns_dir(client, ns)
123
+ cursor = storage.load_cursor(d)
124
+ key = os.path.abspath(transcript)
125
+ already = int(cursor.get(key, 0))
126
+ new_turns = all_turns[already:]
127
+ if not new_turns:
128
+ print(json.dumps({"ok": True, "ingested": 0, "client_id": client, "namespace": ns}))
129
+ return
130
+
131
+ ts = hook.get("timestamp", "")
132
+ try:
133
+ resp = httpx.post(f"{_base_url()}/ingest", json={
134
+ "client_id": client, "namespace": ns, "turns": new_turns,
135
+ "timestamp": ts, "source": key,
136
+ }, timeout=30)
137
+ resp.raise_for_status()
138
+ except Exception as e:
139
+ print(json.dumps({"ok": False, "error": str(e)}))
140
+ return
141
+
142
+ cursor[key] = len(all_turns)
143
+ storage.save_cursor(d, cursor)
144
+ print(json.dumps({"ok": True, "ingested": len(new_turns),
145
+ "client_id": client, "namespace": ns}))
146
+
147
+
148
+ def cmd_retrieve(args):
149
+ hook = _read_stdin_json()
150
+ ns = _resolve_ns(args.ns, hook)
151
+ client = _resolve_client(args.client)
152
+ if args.session_start:
153
+ mode, query = "session-start", ""
154
+ else:
155
+ mode = "search"
156
+ query = args.query or hook.get("prompt", "")
157
+ try:
158
+ resp = httpx.post(f"{_base_url()}/retrieve", json={
159
+ "client_id": client, "namespace": ns, "query": query, "mode": mode,
160
+ }, timeout=30)
161
+ resp.raise_for_status()
162
+ context = resp.json().get("context", "")
163
+ except Exception as e:
164
+ # Never break the session on a memory miss.
165
+ sys.stderr.write(f"omni retrieve failed: {e}\n")
166
+ return
167
+
168
+ if args.session_start and not args.raw:
169
+ print(json.dumps({"hookSpecificOutput": {
170
+ "hookEventName": "SessionStart",
171
+ "additionalContext": f"# Project memory (OmniService)\n\n{context}",
172
+ }}))
173
+ else:
174
+ print(context)
175
+
176
+
177
+ def cmd_verify(args):
178
+ hook = _read_stdin_json()
179
+ ns = _resolve_ns(args.ns, hook)
180
+ client = _resolve_client(args.client)
181
+ resp = httpx.post(f"{_base_url()}/verify", json={"client_id": client, "namespace": ns}, timeout=300)
182
+ print(json.dumps(resp.json()))
183
+
184
+
185
+ def cmd_snapshot(args):
186
+ hook = _read_stdin_json()
187
+ ns = _resolve_ns(args.ns, hook)
188
+ client = _resolve_client(args.client)
189
+ resp = httpx.get(f"{_base_url()}/snapshot",
190
+ params={"client_id": client, "namespace": ns}, timeout=30)
191
+ print(resp.json().get("text", ""))
192
+
193
+
194
+ # Claude Code wiring. Capture runs at two points, both cursor-deduped so each transcript
195
+ # turn is ingested exactly once: UserPromptSubmit (capture the user turn on submit — earliest
196
+ # availability + crash resilience) and Stop (capture the completed exchange after each
197
+ # assistant response). SessionStart seeds memory into context.
198
+ _HOOK_BLOCK = {
199
+ "SessionStart": [{"hooks": [{"type": "command", "command": "omni retrieve --session-start"}]}],
200
+ "UserPromptSubmit": [{"hooks": [{"type": "command", "command": "omni ingest"}]}],
201
+ "Stop": [{"hooks": [{"type": "command", "command": "omni ingest"}]}],
202
+ }
203
+ _MCP_OMNI = {"command": "omni", "args": ["mcp"]}
204
+
205
+
206
+ def _load_json(path: str) -> dict:
207
+ try:
208
+ with open(path) as f:
209
+ return json.load(f)
210
+ except Exception:
211
+ return {}
212
+
213
+
214
+ def cmd_install_hooks(args):
215
+ """Idempotently wire Claude Code hooks + MCP into a project (merges, never clobbers)."""
216
+ proj = os.path.abspath(args.project or os.environ.get("CLAUDE_PROJECT_DIR") or os.getcwd())
217
+ sdir = os.path.join(proj, ".claude")
218
+ os.makedirs(sdir, exist_ok=True)
219
+ spath = os.path.join(sdir, "settings.json")
220
+ settings = _load_json(spath)
221
+ hooks = settings.setdefault("hooks", {})
222
+ added = []
223
+ for event, entries in _HOOK_BLOCK.items():
224
+ cur = hooks.setdefault(event, [])
225
+ cmd = entries[0]["hooks"][0]["command"]
226
+ present = any(cmd == h.get("command")
227
+ for e in cur if isinstance(e, dict)
228
+ for h in e.get("hooks", []) if isinstance(h, dict))
229
+ if not present:
230
+ cur.extend(entries)
231
+ added.append(event)
232
+ with open(spath, "w") as f:
233
+ json.dump(settings, f, indent=2)
234
+ mpath = os.path.join(proj, ".mcp.json")
235
+ mcfg = _load_json(mpath)
236
+ servers = mcfg.setdefault("mcpServers", {})
237
+ mcp_added = "omni" not in servers
238
+ servers.setdefault("omni", dict(_MCP_OMNI))
239
+ with open(mpath, "w") as f:
240
+ json.dump(mcfg, f, indent=2)
241
+ print(json.dumps({"ok": True, "project": proj, "settings": spath,
242
+ "hooks_added": added, "mcp": mpath, "mcp_added": mcp_added}))
243
+
244
+
245
+ def build_parser() -> argparse.ArgumentParser:
246
+ p = argparse.ArgumentParser(prog="omni", description="OmniService client")
247
+ sub = p.add_subparsers(dest="cmd", required=True)
248
+
249
+ sub.add_parser("serve", help="start the FastAPI service").set_defaults(func=cmd_serve)
250
+ sub.add_parser("mcp", help="start the MCP stdio server").set_defaults(func=cmd_mcp)
251
+
252
+ pi = sub.add_parser("ingest", help="push new transcript turns")
253
+ pi.add_argument("--ns")
254
+ pi.add_argument("--client", help="client id (default: $OMNI_CLIENT_ID or 'claude-code')")
255
+ pi.add_argument("--transcript")
256
+ pi.set_defaults(func=cmd_ingest)
257
+
258
+ pr = sub.add_parser("retrieve", help="fetch memory context")
259
+ pr.add_argument("--ns")
260
+ pr.add_argument("--client")
261
+ pr.add_argument("--query")
262
+ pr.add_argument("--session-start", action="store_true")
263
+ pr.add_argument("--raw", action="store_true", help="print context only (no hook JSON)")
264
+ pr.set_defaults(func=cmd_retrieve)
265
+
266
+ pv = sub.add_parser("verify", help="force a VERIFY flush")
267
+ pv.add_argument("--ns")
268
+ pv.add_argument("--client")
269
+ pv.set_defaults(func=cmd_verify)
270
+
271
+ ps = sub.add_parser("snapshot", help="print full assembled memory")
272
+ ps.add_argument("--ns")
273
+ ps.add_argument("--client")
274
+ ps.set_defaults(func=cmd_snapshot)
275
+
276
+ ph = sub.add_parser("install-hooks",
277
+ help="wire Claude Code hooks + MCP into a project (idempotent)")
278
+ ph.add_argument("--project", help="project dir (default: $CLAUDE_PROJECT_DIR or cwd)")
279
+ ph.set_defaults(func=cmd_install_hooks)
280
+ return p
281
+
282
+
283
+ def main():
284
+ args = build_parser().parse_args()
285
+ args.func(args)
286
+
287
+
288
+ if __name__ == "__main__":
289
+ main()
omni/config.py ADDED
@@ -0,0 +1,64 @@
1
+ """Configuration for OmniService.
2
+
3
+ All values can be overridden via environment variables (OMNI_*). Defaults are
4
+ chosen to sit alongside Ollama (:11434) without collisions.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+
11
+ def _env(name: str, default: str) -> str:
12
+ return os.environ.get(name, default)
13
+
14
+
15
+ # Storage root — service-owned, NOT under ~/.claude. One subdir per namespace.
16
+ STORAGE_ROOT = Path(_env("OMNI_STORAGE_ROOT", str(Path.home() / ".omni"))).expanduser()
17
+
18
+ # HTTP service
19
+ HOST = _env("OMNI_HOST", "127.0.0.1")
20
+ PORT = int(_env("OMNI_PORT", "11435"))
21
+
22
+ # Client identity (required on every request; partitions storage per client).
23
+ # CLI/MCP clients default to this; test runs use a distinct id (e.g. "test").
24
+ CLIENT_ID = _env("OMNI_CLIENT_ID", "claude-code")
25
+
26
+ # Local LLM (Ollama, OpenAI-compatible endpoint)
27
+ OLLAMA_BASE_URL = _env("OMNI_OLLAMA_BASE_URL", "http://localhost:11434/v1")
28
+ # Gemma 4 E4B. NOTE: the OpenAI /v1 path runs Ollama at its DEFAULT num_ctx (4096) and
29
+ # silently truncates long prompts; a baked-in-context variant is required to use more.
30
+ # Create once: printf 'FROM gemma4:e4b\nPARAMETER num_ctx 32768\n' > Modelfile && \
31
+ # ollama create gemma4-ctx32k -f Modelfile
32
+ EXTRACT_MODEL = _env("OMNI_EXTRACT_MODEL", "gemma4-ctx32k") # EXTRACT / RELATE / COMPRESS
33
+ VERIFY_MODEL = _env("OMNI_VERIFY_MODEL", "gemma4-ctx32k") # VERIFY (stronger reasoning)
34
+
35
+ # Ensemble answering (docs/ensemble_design.md): K context strategies -> answers -> rerank.
36
+ ANSWER_MODEL = _env("OMNI_ANSWER_MODEL", "claude-code")
37
+ RERANK_MODEL = _env("OMNI_RERANK_MODEL", "claude-code")
38
+ ENSEMBLE_STRATEGIES = _env("OMNI_ENSEMBLE_STRATEGIES", "search,raw,chain,uncertain").split(",")
39
+
40
+ # Fine-tuned student served via an OpenAI-compatible endpoint (training/serve_student.py).
41
+ # When a memory-op model name == STUDENT_MODEL and STUDENT_URL is set, call_local routes
42
+ # there instead of Ollama. Embeddings still use Ollama.
43
+ STUDENT_MODEL = _env("OMNI_STUDENT_MODEL", "omnimem-ops")
44
+ STUDENT_URL = _env("OMNI_STUDENT_URL", "") # e.g. http://127.0.0.1:11437/v1
45
+
46
+ # Background pipeline tuning
47
+ # EXTRACT over a fact-dense session can emit many records; too small a cap truncates the
48
+ # JSON mid-stream. Parsing is now salvage-tolerant, but a roomier cap avoids the loss.
49
+ EXTRACT_MAX_TOKENS = int(_env("OMNI_EXTRACT_MAX_TOKENS", "4096"))
50
+ VERIFY_MAX_TOKENS = int(_env("OMNI_VERIFY_MAX_TOKENS", "3072"))
51
+ EXTRACT_WORKERS = int(_env("OMNI_EXTRACT_WORKERS", "4"))
52
+ VERIFY_DEBOUNCE_SECONDS = float(_env("OMNI_VERIFY_DEBOUNCE_SECONDS", "20"))
53
+ VERIFY_RAW_WINDOW = int(_env("OMNI_VERIFY_RAW_WINDOW", "8")) # recent raw files VERIFY re-reads
54
+ VERIFY_CHUNK = int(_env("OMNI_VERIFY_CHUNK", "6")) # raw sessions per focused VERIFY call
55
+
56
+ # Retrieval shaping
57
+ RETRIEVE_TOP_PAGES = int(_env("OMNI_RETRIEVE_TOP_PAGES", "8"))
58
+ RETRIEVE_TOP_HISTORY = int(_env("OMNI_RETRIEVE_TOP_HISTORY", "30"))
59
+
60
+ # Vector index (semantic retrieval + Tr revision-chain reconstruction)
61
+ VECTOR_ENABLED = _env("OMNI_VECTOR_ENABLED", "1") == "1"
62
+ EMBED_MODEL = _env("OMNI_EMBED_MODEL", "nomic-embed-text")
63
+ VECTOR_TOPK = int(_env("OMNI_VECTOR_TOPK", "12"))
64
+ RECONSTRUCT_MAX_ENTITIES = int(_env("OMNI_RECONSTRUCT_MAX_ENTITIES", "12"))