knowledge-worker 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledge_worker-0.6.0.dist-info/METADATA +365 -0
- knowledge_worker-0.6.0.dist-info/RECORD +27 -0
- knowledge_worker-0.6.0.dist-info/WHEEL +5 -0
- knowledge_worker-0.6.0.dist-info/entry_points.txt +3 -0
- knowledge_worker-0.6.0.dist-info/licenses/LICENSE +21 -0
- knowledge_worker-0.6.0.dist-info/top_level.txt +2 -0
- mygraph/__init__.py +23 -0
- mygraph/anthropic_client.py +199 -0
- mygraph/audit.py +137 -0
- mygraph/check.py +273 -0
- mygraph/discover.py +654 -0
- mygraph/eval_log.py +36 -0
- mygraph/export_context.py +124 -0
- mygraph/extractor.py +243 -0
- mygraph/extractor_openai.py +165 -0
- mygraph/ingest.py +170 -0
- mygraph/memory_audit.py +1094 -0
- mygraph/merge.py +133 -0
- mygraph/mygraph.py +773 -0
- mygraph/owl_io.py +202 -0
- mygraph/review.py +151 -0
- mygraph/validator.py +149 -0
- mygraph/viz.py +409 -0
- ollama_proxy/eval_compare.py +185 -0
- ollama_proxy/extractor_adapter.py +168 -0
- ollama_proxy/proxy.py +143 -0
- ollama_proxy/server.py +194 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
extractor_adapter.py — local-Gemma drop-in for mygraph's extractor (v1.5).
|
|
3
|
+
|
|
4
|
+
mygraph/extractor.py uses Claude with native tool-use to emit a structured
|
|
5
|
+
candidates payload. Ollama models don't speak Anthropic tool-use; they speak
|
|
6
|
+
Ollama's `format` (JSON mode + optional JSON schema). This adapter:
|
|
7
|
+
|
|
8
|
+
- reuses the same prompt template + schema as mygraph/extractor.py
|
|
9
|
+
- calls Ollama with `format=<schema>` for constrained JSON output
|
|
10
|
+
- returns the same dict shape, so validator.py / review.py / merge.py work
|
|
11
|
+
without modification
|
|
12
|
+
|
|
13
|
+
Usage (drop-in):
|
|
14
|
+
from ollama_proxy.extractor_adapter import extract as gemma_extract
|
|
15
|
+
payload = gemma_extract(Path("notes.md"), out_path=Path("out.json"))
|
|
16
|
+
|
|
17
|
+
Or via the helper CLI:
|
|
18
|
+
python ollama_proxy/extractor_adapter.py path/to/file.md [out.json]
|
|
19
|
+
|
|
20
|
+
Env:
|
|
21
|
+
OLLAMA_DEFAULT_MODEL default model tag (gemma4:e4b)
|
|
22
|
+
OLLAMA_BASE_URL http://127.0.0.1:11434
|
|
23
|
+
GEMMA_NUM_CTX optional context window override (default 8192)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
# Make mygraph/ importable regardless of cwd.
|
|
35
|
+
_HERE = Path(__file__).resolve().parent
|
|
36
|
+
_MYGRAPH = _HERE.parent / "mygraph"
|
|
37
|
+
if str(_MYGRAPH) not in sys.path:
|
|
38
|
+
sys.path.insert(0, str(_MYGRAPH))
|
|
39
|
+
|
|
40
|
+
from mygraph import Graph, NODE_TYPES, EDGE_TYPES, slug # noqa: E402
|
|
41
|
+
from extractor import ( # noqa: E402
|
|
42
|
+
EXTRACTION_TOOL,
|
|
43
|
+
PROMPT_TEMPLATE,
|
|
44
|
+
build_source_decl,
|
|
45
|
+
ensure_provenance_edges,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
import httpx
|
|
50
|
+
except ImportError as e:
|
|
51
|
+
raise SystemExit(
|
|
52
|
+
"extractor_adapter: missing dep. Run: pip install httpx"
|
|
53
|
+
) from e
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
|
57
|
+
DEFAULT_MODEL = os.environ.get("OLLAMA_DEFAULT_MODEL", "gemma4:e4b")
|
|
58
|
+
NUM_CTX = int(os.environ.get("GEMMA_NUM_CTX", "8192"))
|
|
59
|
+
|
|
60
|
+
# JSON schema mirrors mygraph/extractor.py's tool input_schema.
|
|
61
|
+
RESPONSE_SCHEMA = EXTRACTION_TOOL["input_schema"]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _ollama_chat(prompt: str, model: str, schema: dict) -> dict:
|
|
65
|
+
"""Call /api/chat with format=schema, return parsed JSON content."""
|
|
66
|
+
payload = {
|
|
67
|
+
"model": model,
|
|
68
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
69
|
+
"stream": False,
|
|
70
|
+
"format": schema, # Ollama structured-output mode
|
|
71
|
+
"options": {"num_ctx": NUM_CTX, "temperature": 0.2},
|
|
72
|
+
}
|
|
73
|
+
with httpx.Client(timeout=600.0) as c:
|
|
74
|
+
r = c.post(f"{OLLAMA_BASE_URL}/api/chat", json=payload)
|
|
75
|
+
if r.status_code != 200:
|
|
76
|
+
raise RuntimeError(
|
|
77
|
+
f"extractor_adapter: ollama returned {r.status_code}: {r.text[:500]}"
|
|
78
|
+
)
|
|
79
|
+
body = r.json()
|
|
80
|
+
content = (body.get("message") or {}).get("content", "")
|
|
81
|
+
if not content:
|
|
82
|
+
raise RuntimeError(f"extractor_adapter: empty response from {model}")
|
|
83
|
+
try:
|
|
84
|
+
return json.loads(content)
|
|
85
|
+
except json.JSONDecodeError as e:
|
|
86
|
+
# Fallback: strip code fences if the model wrapped output despite format hint.
|
|
87
|
+
stripped = content.strip()
|
|
88
|
+
if stripped.startswith("```"):
|
|
89
|
+
stripped = stripped.strip("`")
|
|
90
|
+
stripped = stripped.split("\n", 1)[1] if "\n" in stripped else stripped
|
|
91
|
+
stripped = stripped.rsplit("```", 1)[0] if stripped.endswith("```") else stripped
|
|
92
|
+
return json.loads(stripped)
|
|
93
|
+
raise RuntimeError(
|
|
94
|
+
f"extractor_adapter: model returned non-JSON: {content[:300]}"
|
|
95
|
+
) from e
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def extract(md_path: Path, out_path: Path | None = None,
|
|
99
|
+
model: str = DEFAULT_MODEL) -> dict:
|
|
100
|
+
"""End-to-end extract via local Ollama. Same return shape as
|
|
101
|
+
mygraph.extractor.extract — drop-in compatible with validate()/review()/merge()."""
|
|
102
|
+
g = Graph.load()
|
|
103
|
+
decl = build_source_decl(md_path)
|
|
104
|
+
source_text = md_path.read_text(encoding="utf-8")
|
|
105
|
+
existing_ids = sorted(g.nodes.keys())
|
|
106
|
+
prompt = PROMPT_TEMPLATE.format(
|
|
107
|
+
source_id=decl["source_id"],
|
|
108
|
+
source_label=decl["source_label"],
|
|
109
|
+
source_path=decl["source_path"],
|
|
110
|
+
node_types=", ".join(sorted(NODE_TYPES)),
|
|
111
|
+
edge_types=", ".join(sorted(EDGE_TYPES)),
|
|
112
|
+
existing_ids="\n".join(f" - {i}" for i in existing_ids),
|
|
113
|
+
source_text=source_text,
|
|
114
|
+
)
|
|
115
|
+
payload = _ollama_chat(prompt, model=model, schema=RESPONSE_SCHEMA)
|
|
116
|
+
|
|
117
|
+
# Ensure required keys exist so validator never crashes on a missing top-level.
|
|
118
|
+
payload.setdefault("source", {
|
|
119
|
+
"id": decl["source_id"],
|
|
120
|
+
"label": decl["source_label"],
|
|
121
|
+
"body": "",
|
|
122
|
+
})
|
|
123
|
+
payload.setdefault("nodes", [])
|
|
124
|
+
payload.setdefault("edges", [])
|
|
125
|
+
injected = ensure_provenance_edges(payload)
|
|
126
|
+
if injected:
|
|
127
|
+
print(
|
|
128
|
+
"extractor_adapter: gateway returned missing provenance edges; "
|
|
129
|
+
f"synthesized {injected} MENTIONED_IN edges.",
|
|
130
|
+
file=sys.stderr,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
payload.setdefault("_meta", {})
|
|
134
|
+
payload["_meta"]["source_path"] = decl["source_path"]
|
|
135
|
+
payload["_meta"]["ingested_at"] = decl["ingested_at"]
|
|
136
|
+
payload["_meta"]["model"] = model
|
|
137
|
+
payload["_meta"]["backend"] = "ollama"
|
|
138
|
+
|
|
139
|
+
if out_path:
|
|
140
|
+
out_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
|
141
|
+
return payload
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def main(argv: list[str]) -> int:
|
|
145
|
+
if len(argv) < 2:
|
|
146
|
+
print("Usage: python extractor_adapter.py <path/to/file.md> [out.json] [--model NAME]")
|
|
147
|
+
return 1
|
|
148
|
+
md = Path(argv[1]).expanduser().resolve()
|
|
149
|
+
out = None
|
|
150
|
+
model = DEFAULT_MODEL
|
|
151
|
+
rest = argv[2:]
|
|
152
|
+
if "--model" in rest:
|
|
153
|
+
i = rest.index("--model")
|
|
154
|
+
model = rest[i + 1]
|
|
155
|
+
del rest[i:i + 2]
|
|
156
|
+
if rest:
|
|
157
|
+
out = Path(rest[0]).expanduser().resolve()
|
|
158
|
+
else:
|
|
159
|
+
out = md.parent / f"{md.stem}.candidates.gemma.json"
|
|
160
|
+
payload = extract(md, out, model=model)
|
|
161
|
+
print(f"extractor_adapter: model={model} "
|
|
162
|
+
f"nodes={len(payload.get('nodes', []))} "
|
|
163
|
+
f"edges={len(payload.get('edges', []))} -> {out}")
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
sys.exit(main(sys.argv[1:]))
|
ollama_proxy/proxy.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
proxy.py — thin Ollama-API-compatible passthrough with logging (v1.5).
|
|
3
|
+
|
|
4
|
+
Why this exists alongside server.py (the MCP server):
|
|
5
|
+
- AnythingLLM and most external LLM clients speak Ollama's REST API directly,
|
|
6
|
+
NOT MCP. So the MCP server (server.py) is for Claude Code/Cowork; this
|
|
7
|
+
proxy is for AnythingLLM and other Ollama-compatible clients.
|
|
8
|
+
- Tailscale-expose THIS port (default 11435) instead of raw 11434 so we get:
|
|
9
|
+
* per-call logging (latency, model, prompt size, error)
|
|
10
|
+
* a single chokepoint for future auth/rate-limit if we ever leave the tailnet
|
|
11
|
+
- 100% pass-through: same request/response shape as Ollama. AnythingLLM points
|
|
12
|
+
at this URL and treats it as Ollama.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
python proxy.py # binds 127.0.0.1:11435 → 127.0.0.1:11434
|
|
16
|
+
python proxy.py --host 0.0.0.0 # listen on all interfaces (tailnet)
|
|
17
|
+
python proxy.py --port 8080
|
|
18
|
+
OLLAMA_BASE_URL=http://127.0.0.1:11434 python proxy.py
|
|
19
|
+
|
|
20
|
+
Deps: fastapi uvicorn httpx
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
import time
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
import httpx
|
|
35
|
+
from fastapi import FastAPI, Request
|
|
36
|
+
from fastapi.responses import JSONResponse, Response, StreamingResponse
|
|
37
|
+
import uvicorn
|
|
38
|
+
except ImportError as e:
|
|
39
|
+
print(
|
|
40
|
+
"proxy.py: missing deps. Install with:\n"
|
|
41
|
+
" pip install fastapi 'uvicorn[standard]' httpx",
|
|
42
|
+
file=sys.stderr,
|
|
43
|
+
)
|
|
44
|
+
raise SystemExit(1) from e
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
|
48
|
+
LOG_PATH = Path(os.environ.get(
|
|
49
|
+
"OLLAMA_PROXY_LOG",
|
|
50
|
+
str(Path(__file__).parent / "proxy.log.jsonl"),
|
|
51
|
+
))
|
|
52
|
+
|
|
53
|
+
app = FastAPI(title="ollama-proxy", version="1.5.0")
|
|
54
|
+
_client = httpx.AsyncClient(base_url=OLLAMA_BASE_URL, timeout=300.0)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _log(record: dict) -> None:
|
|
58
|
+
record.setdefault("ts", datetime.now(timezone.utc).isoformat())
|
|
59
|
+
try:
|
|
60
|
+
with LOG_PATH.open("a", encoding="utf-8") as f:
|
|
61
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
62
|
+
except OSError:
|
|
63
|
+
pass # never let logging break a request
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@app.get("/healthz")
|
|
67
|
+
async def healthz() -> dict:
|
|
68
|
+
try:
|
|
69
|
+
r = await _client.get("/api/tags")
|
|
70
|
+
r.raise_for_status()
|
|
71
|
+
return {"ok": True, "ollama": OLLAMA_BASE_URL,
|
|
72
|
+
"models": [m["name"] for m in r.json().get("models", [])]}
|
|
73
|
+
except Exception as e:
|
|
74
|
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=502)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"])
|
|
78
|
+
async def passthrough(path: str, request: Request):
|
|
79
|
+
"""Mirror every request to Ollama. Stream responses if the client asked
|
|
80
|
+
for stream=true; otherwise buffer + log."""
|
|
81
|
+
body = await request.body()
|
|
82
|
+
headers = {k: v for k, v in request.headers.items()
|
|
83
|
+
if k.lower() not in {"host", "content-length"}}
|
|
84
|
+
target = f"/{path}"
|
|
85
|
+
qs = request.url.query
|
|
86
|
+
if qs:
|
|
87
|
+
target = f"{target}?{qs}"
|
|
88
|
+
|
|
89
|
+
# detect streaming vs buffered
|
|
90
|
+
streaming = False
|
|
91
|
+
if body and request.method == "POST":
|
|
92
|
+
try:
|
|
93
|
+
payload = json.loads(body)
|
|
94
|
+
streaming = bool(payload.get("stream", True if path.startswith("api/") else False))
|
|
95
|
+
model = payload.get("model")
|
|
96
|
+
prompt_len = len(json.dumps(payload.get("messages") or payload.get("prompt") or ""))
|
|
97
|
+
except Exception:
|
|
98
|
+
payload, model, prompt_len = None, None, len(body)
|
|
99
|
+
else:
|
|
100
|
+
payload, model, prompt_len = None, None, 0
|
|
101
|
+
|
|
102
|
+
t0 = time.perf_counter()
|
|
103
|
+
if streaming:
|
|
104
|
+
async def stream_iter():
|
|
105
|
+
async with _client.stream(request.method, target, content=body, headers=headers) as r:
|
|
106
|
+
status = r.status_code
|
|
107
|
+
async for chunk in r.aiter_raw():
|
|
108
|
+
yield chunk
|
|
109
|
+
_log({
|
|
110
|
+
"kind": "request", "method": request.method, "path": target,
|
|
111
|
+
"status": status, "model": model, "prompt_len": prompt_len,
|
|
112
|
+
"duration_ms": int((time.perf_counter() - t0) * 1000),
|
|
113
|
+
"stream": True,
|
|
114
|
+
})
|
|
115
|
+
return StreamingResponse(stream_iter(), media_type="application/x-ndjson")
|
|
116
|
+
|
|
117
|
+
r = await _client.request(request.method, target, content=body, headers=headers)
|
|
118
|
+
_log({
|
|
119
|
+
"kind": "request", "method": request.method, "path": target,
|
|
120
|
+
"status": r.status_code, "model": model, "prompt_len": prompt_len,
|
|
121
|
+
"duration_ms": int((time.perf_counter() - t0) * 1000),
|
|
122
|
+
"stream": False,
|
|
123
|
+
})
|
|
124
|
+
return Response(content=r.content, status_code=r.status_code,
|
|
125
|
+
media_type=r.headers.get("content-type"))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def main(argv: list[str]) -> int:
|
|
129
|
+
p = argparse.ArgumentParser(description="Logging passthrough proxy in front of Ollama")
|
|
130
|
+
p.add_argument("--host", default=os.environ.get("PROXY_HOST", "127.0.0.1"),
|
|
131
|
+
help="Bind host (set 0.0.0.0 to be reachable on the tailnet)")
|
|
132
|
+
p.add_argument("--port", type=int, default=int(os.environ.get("PROXY_PORT", "11435")),
|
|
133
|
+
help="Bind port (default 11435, Ollama's 11434 + 1)")
|
|
134
|
+
args = p.parse_args(argv)
|
|
135
|
+
print(f"proxy: forwarding http://{args.host}:{args.port}/* -> {OLLAMA_BASE_URL}/*",
|
|
136
|
+
file=sys.stderr)
|
|
137
|
+
print(f"proxy: log -> {LOG_PATH}", file=sys.stderr)
|
|
138
|
+
uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
|
|
139
|
+
return 0
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
sys.exit(main(sys.argv[1:]))
|
ollama_proxy/server.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""
|
|
2
|
+
server.py — MCP server wrapping local Ollama (v1.5).
|
|
3
|
+
|
|
4
|
+
Stateless MCP server exposing four tools:
|
|
5
|
+
- chat(model, messages, options?) Chat completion via /api/chat
|
|
6
|
+
- generate(model, prompt, options?) Single-shot completion via /api/generate
|
|
7
|
+
- list_models() Installed models via /api/tags
|
|
8
|
+
- embed(model, input) Embeddings via /api/embed
|
|
9
|
+
|
|
10
|
+
Transports:
|
|
11
|
+
- stdio (default) — for Claude Code, Cowork, local MCP clients
|
|
12
|
+
- sse — for remote MCP clients over HTTP (optional)
|
|
13
|
+
|
|
14
|
+
Default model: gemma4:e4b (override with OLLAMA_DEFAULT_MODEL env var).
|
|
15
|
+
Ollama base : http://127.0.0.1:11434 (override with OLLAMA_BASE_URL).
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
python server.py # stdio transport (default)
|
|
19
|
+
python server.py --sse --port 7421
|
|
20
|
+
OLLAMA_DEFAULT_MODEL=gemma4:latest python server.py
|
|
21
|
+
|
|
22
|
+
Deps: mcp[cli] httpx
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import httpx
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from mcp.server.fastmcp import FastMCP
|
|
36
|
+
except ImportError as e:
|
|
37
|
+
print(
|
|
38
|
+
"server.py: missing dependency `mcp`. Install with:\n"
|
|
39
|
+
" pip install 'mcp[cli]' httpx",
|
|
40
|
+
file=sys.stderr,
|
|
41
|
+
)
|
|
42
|
+
raise SystemExit(1) from e
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
|
46
|
+
DEFAULT_MODEL = os.environ.get("OLLAMA_DEFAULT_MODEL", "gemma4:e4b")
|
|
47
|
+
TIMEOUT_S = float(os.environ.get("OLLAMA_TIMEOUT_S", "300"))
|
|
48
|
+
|
|
49
|
+
mcp = FastMCP("ollama-proxy")
|
|
50
|
+
_client = httpx.Client(base_url=OLLAMA_BASE_URL, timeout=TIMEOUT_S)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _post(path: str, payload: dict) -> dict:
|
|
54
|
+
r = _client.post(path, json=payload)
|
|
55
|
+
r.raise_for_status()
|
|
56
|
+
return r.json()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get(path: str) -> dict:
|
|
60
|
+
r = _client.get(path)
|
|
61
|
+
r.raise_for_status()
|
|
62
|
+
return r.json()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@mcp.tool()
|
|
66
|
+
def chat(
|
|
67
|
+
messages: list[dict],
|
|
68
|
+
model: str | None = None,
|
|
69
|
+
options: dict | None = None,
|
|
70
|
+
format: str | None = None,
|
|
71
|
+
) -> dict:
|
|
72
|
+
"""Chat completion via Ollama /api/chat.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
messages: list of {"role": "user|assistant|system", "content": str}
|
|
76
|
+
model: Ollama model tag. Defaults to OLLAMA_DEFAULT_MODEL (gemma4:e4b).
|
|
77
|
+
options: Ollama runtime options (temperature, num_ctx, etc.).
|
|
78
|
+
format: "json" to force JSON-mode output.
|
|
79
|
+
|
|
80
|
+
Returns the raw Ollama response: {message: {role, content}, done, ...}
|
|
81
|
+
"""
|
|
82
|
+
payload: dict[str, Any] = {
|
|
83
|
+
"model": model or DEFAULT_MODEL,
|
|
84
|
+
"messages": messages,
|
|
85
|
+
"stream": False,
|
|
86
|
+
}
|
|
87
|
+
if options:
|
|
88
|
+
payload["options"] = options
|
|
89
|
+
if format:
|
|
90
|
+
payload["format"] = format
|
|
91
|
+
return _post("/api/chat", payload)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@mcp.tool()
|
|
95
|
+
def generate(
|
|
96
|
+
prompt: str,
|
|
97
|
+
model: str | None = None,
|
|
98
|
+
options: dict | None = None,
|
|
99
|
+
format: str | None = None,
|
|
100
|
+
system: str | None = None,
|
|
101
|
+
) -> dict:
|
|
102
|
+
"""Single-shot completion via Ollama /api/generate.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
prompt: the user prompt
|
|
106
|
+
model: defaults to OLLAMA_DEFAULT_MODEL
|
|
107
|
+
options: Ollama runtime options
|
|
108
|
+
format: "json" to force JSON output
|
|
109
|
+
system: optional system prompt
|
|
110
|
+
"""
|
|
111
|
+
payload: dict[str, Any] = {
|
|
112
|
+
"model": model or DEFAULT_MODEL,
|
|
113
|
+
"prompt": prompt,
|
|
114
|
+
"stream": False,
|
|
115
|
+
}
|
|
116
|
+
if options:
|
|
117
|
+
payload["options"] = options
|
|
118
|
+
if format:
|
|
119
|
+
payload["format"] = format
|
|
120
|
+
if system:
|
|
121
|
+
payload["system"] = system
|
|
122
|
+
return _post("/api/generate", payload)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@mcp.tool()
|
|
126
|
+
def list_models() -> dict:
|
|
127
|
+
"""List models installed in the local Ollama instance."""
|
|
128
|
+
return _get("/api/tags")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@mcp.tool()
|
|
132
|
+
def embed(input: str | list[str], model: str | None = None) -> dict:
|
|
133
|
+
"""Get embeddings via Ollama /api/embed.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
input: a string or list of strings to embed
|
|
137
|
+
model: embedding-capable model tag (e.g. "nomic-embed-text")
|
|
138
|
+
"""
|
|
139
|
+
payload = {
|
|
140
|
+
"model": model or DEFAULT_MODEL,
|
|
141
|
+
"input": input,
|
|
142
|
+
}
|
|
143
|
+
return _post("/api/embed", payload)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def main(argv: list[str]) -> int:
|
|
147
|
+
p = argparse.ArgumentParser(description="MCP server wrapping local Ollama")
|
|
148
|
+
p.add_argument("--sse", action="store_true",
|
|
149
|
+
help="Use SSE/HTTP transport instead of stdio (for remote MCP clients)")
|
|
150
|
+
p.add_argument("--port", type=int, default=int(os.environ.get("MCP_PORT", "7421")),
|
|
151
|
+
help="HTTP port for --sse mode (default 7421)")
|
|
152
|
+
p.add_argument("--host", default=os.environ.get("MCP_HOST", "127.0.0.1"),
|
|
153
|
+
help="Bind host for --sse mode (default 127.0.0.1; set 0.0.0.0 for tailnet)")
|
|
154
|
+
args = p.parse_args(argv)
|
|
155
|
+
|
|
156
|
+
# Sanity check: is Ollama reachable?
|
|
157
|
+
try:
|
|
158
|
+
_get("/api/tags")
|
|
159
|
+
except Exception as e:
|
|
160
|
+
print(
|
|
161
|
+
f"server.py: cannot reach Ollama at {OLLAMA_BASE_URL} ({e}).\n"
|
|
162
|
+
"Is the Ollama daemon running? Try: ollama serve",
|
|
163
|
+
file=sys.stderr,
|
|
164
|
+
)
|
|
165
|
+
# Don't exit — let the MCP client see the error per-call. But warn loudly.
|
|
166
|
+
|
|
167
|
+
if args.sse:
|
|
168
|
+
# FastMCP's SSE transport binds host/port via settings.
|
|
169
|
+
mcp.settings.host = args.host
|
|
170
|
+
mcp.settings.port = args.port
|
|
171
|
+
print(f"server.py: SSE transport on http://{args.host}:{args.port}/sse",
|
|
172
|
+
file=sys.stderr)
|
|
173
|
+
mcp.run(transport="sse")
|
|
174
|
+
else:
|
|
175
|
+
# Friendly hint when launched interactively — MCP-stdio is for clients,
|
|
176
|
+
# not humans. If stdin is a TTY, the user almost certainly meant to use
|
|
177
|
+
# --sse or register this in an MCP client config.
|
|
178
|
+
if sys.stdin.isatty():
|
|
179
|
+
print(
|
|
180
|
+
"server.py: stdio transport ready. This server is meant to be\n"
|
|
181
|
+
" spawned by an MCP client (Claude Code, Cowork, Claude Desktop)\n"
|
|
182
|
+
" via subprocess pipes — running it in a terminal will produce\n"
|
|
183
|
+
" JSON-RPC parse errors on every keystroke. To test interactively:\n"
|
|
184
|
+
" npx @modelcontextprotocol/inspector python server.py\n"
|
|
185
|
+
" Or run with --sse to expose an HTTP/SSE endpoint instead.\n"
|
|
186
|
+
" Press Ctrl-C to exit.\n",
|
|
187
|
+
file=sys.stderr,
|
|
188
|
+
)
|
|
189
|
+
mcp.run(transport="stdio")
|
|
190
|
+
return 0
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
if __name__ == "__main__":
|
|
194
|
+
sys.exit(main(sys.argv[1:]))
|