knowledge-worker 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ """
2
+ extractor_adapter.py — local-Gemma drop-in for mygraph's extractor (v1.5).
3
+
4
+ mygraph/extractor.py uses Claude with native tool-use to emit a structured
5
+ candidates payload. Ollama models don't speak Anthropic tool-use; they speak
6
+ Ollama's `format` (JSON mode + optional JSON schema). This adapter:
7
+
8
+ - reuses the same prompt template + schema as mygraph/extractor.py
9
+ - calls Ollama with `format=<schema>` for constrained JSON output
10
+ - returns the same dict shape, so validator.py / review.py / merge.py work
11
+ without modification
12
+
13
+ Usage (drop-in):
14
+ from ollama_proxy.extractor_adapter import extract as gemma_extract
15
+ payload = gemma_extract(Path("notes.md"), out_path=Path("out.json"))
16
+
17
+ Or via the helper CLI:
18
+ python ollama_proxy/extractor_adapter.py path/to/file.md [out.json]
19
+
20
+ Env:
21
+ OLLAMA_DEFAULT_MODEL default model tag (gemma4:e4b)
22
+ OLLAMA_BASE_URL http://127.0.0.1:11434
23
+ GEMMA_NUM_CTX optional context window override (default 8192)
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import os
30
+ import sys
31
+ from datetime import datetime, timezone
32
+ from pathlib import Path
33
+
34
+ # Make mygraph/ importable regardless of cwd.
35
+ _HERE = Path(__file__).resolve().parent
36
+ _MYGRAPH = _HERE.parent / "mygraph"
37
+ if str(_MYGRAPH) not in sys.path:
38
+ sys.path.insert(0, str(_MYGRAPH))
39
+
40
+ from mygraph import Graph, NODE_TYPES, EDGE_TYPES, slug # noqa: E402
41
+ from extractor import ( # noqa: E402
42
+ EXTRACTION_TOOL,
43
+ PROMPT_TEMPLATE,
44
+ build_source_decl,
45
+ ensure_provenance_edges,
46
+ )
47
+
48
+ try:
49
+ import httpx
50
+ except ImportError as e:
51
+ raise SystemExit(
52
+ "extractor_adapter: missing dep. Run: pip install httpx"
53
+ ) from e
54
+
55
+
56
+ OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
57
+ DEFAULT_MODEL = os.environ.get("OLLAMA_DEFAULT_MODEL", "gemma4:e4b")
58
+ NUM_CTX = int(os.environ.get("GEMMA_NUM_CTX", "8192"))
59
+
60
+ # JSON schema mirrors mygraph/extractor.py's tool input_schema.
61
+ RESPONSE_SCHEMA = EXTRACTION_TOOL["input_schema"]
62
+
63
+
64
+ def _ollama_chat(prompt: str, model: str, schema: dict) -> dict:
65
+ """Call /api/chat with format=schema, return parsed JSON content."""
66
+ payload = {
67
+ "model": model,
68
+ "messages": [{"role": "user", "content": prompt}],
69
+ "stream": False,
70
+ "format": schema, # Ollama structured-output mode
71
+ "options": {"num_ctx": NUM_CTX, "temperature": 0.2},
72
+ }
73
+ with httpx.Client(timeout=600.0) as c:
74
+ r = c.post(f"{OLLAMA_BASE_URL}/api/chat", json=payload)
75
+ if r.status_code != 200:
76
+ raise RuntimeError(
77
+ f"extractor_adapter: ollama returned {r.status_code}: {r.text[:500]}"
78
+ )
79
+ body = r.json()
80
+ content = (body.get("message") or {}).get("content", "")
81
+ if not content:
82
+ raise RuntimeError(f"extractor_adapter: empty response from {model}")
83
+ try:
84
+ return json.loads(content)
85
+ except json.JSONDecodeError as e:
86
+ # Fallback: strip code fences if the model wrapped output despite format hint.
87
+ stripped = content.strip()
88
+ if stripped.startswith("```"):
89
+ stripped = stripped.strip("`")
90
+ stripped = stripped.split("\n", 1)[1] if "\n" in stripped else stripped
91
+ stripped = stripped.rsplit("```", 1)[0] if stripped.endswith("```") else stripped
92
+ return json.loads(stripped)
93
+ raise RuntimeError(
94
+ f"extractor_adapter: model returned non-JSON: {content[:300]}"
95
+ ) from e
96
+
97
+
98
+ def extract(md_path: Path, out_path: Path | None = None,
99
+ model: str = DEFAULT_MODEL) -> dict:
100
+ """End-to-end extract via local Ollama. Same return shape as
101
+ mygraph.extractor.extract — drop-in compatible with validate()/review()/merge()."""
102
+ g = Graph.load()
103
+ decl = build_source_decl(md_path)
104
+ source_text = md_path.read_text(encoding="utf-8")
105
+ existing_ids = sorted(g.nodes.keys())
106
+ prompt = PROMPT_TEMPLATE.format(
107
+ source_id=decl["source_id"],
108
+ source_label=decl["source_label"],
109
+ source_path=decl["source_path"],
110
+ node_types=", ".join(sorted(NODE_TYPES)),
111
+ edge_types=", ".join(sorted(EDGE_TYPES)),
112
+ existing_ids="\n".join(f" - {i}" for i in existing_ids),
113
+ source_text=source_text,
114
+ )
115
+ payload = _ollama_chat(prompt, model=model, schema=RESPONSE_SCHEMA)
116
+
117
+ # Ensure required keys exist so validator never crashes on a missing top-level.
118
+ payload.setdefault("source", {
119
+ "id": decl["source_id"],
120
+ "label": decl["source_label"],
121
+ "body": "",
122
+ })
123
+ payload.setdefault("nodes", [])
124
+ payload.setdefault("edges", [])
125
+ injected = ensure_provenance_edges(payload)
126
+ if injected:
127
+ print(
128
+ "extractor_adapter: gateway returned missing provenance edges; "
129
+ f"synthesized {injected} MENTIONED_IN edges.",
130
+ file=sys.stderr,
131
+ )
132
+
133
+ payload.setdefault("_meta", {})
134
+ payload["_meta"]["source_path"] = decl["source_path"]
135
+ payload["_meta"]["ingested_at"] = decl["ingested_at"]
136
+ payload["_meta"]["model"] = model
137
+ payload["_meta"]["backend"] = "ollama"
138
+
139
+ if out_path:
140
+ out_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
141
+ return payload
142
+
143
+
144
+ def main(argv: list[str]) -> int:
145
+ if len(argv) < 2:
146
+ print("Usage: python extractor_adapter.py <path/to/file.md> [out.json] [--model NAME]")
147
+ return 1
148
+ md = Path(argv[1]).expanduser().resolve()
149
+ out = None
150
+ model = DEFAULT_MODEL
151
+ rest = argv[2:]
152
+ if "--model" in rest:
153
+ i = rest.index("--model")
154
+ model = rest[i + 1]
155
+ del rest[i:i + 2]
156
+ if rest:
157
+ out = Path(rest[0]).expanduser().resolve()
158
+ else:
159
+ out = md.parent / f"{md.stem}.candidates.gemma.json"
160
+ payload = extract(md, out, model=model)
161
+ print(f"extractor_adapter: model={model} "
162
+ f"nodes={len(payload.get('nodes', []))} "
163
+ f"edges={len(payload.get('edges', []))} -> {out}")
164
+ return 0
165
+
166
+
167
+ if __name__ == "__main__":
168
+ sys.exit(main(sys.argv[1:]))
ollama_proxy/proxy.py ADDED
@@ -0,0 +1,143 @@
1
+ """
2
+ proxy.py — thin Ollama-API-compatible passthrough with logging (v1.5).
3
+
4
+ Why this exists alongside server.py (the MCP server):
5
+ - AnythingLLM and most external LLM clients speak Ollama's REST API directly,
6
+ NOT MCP. So the MCP server (server.py) is for Claude Code/Cowork; this
7
+ proxy is for AnythingLLM and other Ollama-compatible clients.
8
+ - Tailscale-expose THIS port (default 11435) instead of raw 11434 so we get:
9
+ * per-call logging (latency, model, prompt size, error)
10
+ * a single chokepoint for future auth/rate-limit if we ever leave the tailnet
11
+ - 100% pass-through: same request/response shape as Ollama. AnythingLLM points
12
+ at this URL and treats it as Ollama.
13
+
14
+ Usage:
15
+ python proxy.py # binds 127.0.0.1:11435 → 127.0.0.1:11434
16
+ python proxy.py --host 0.0.0.0 # listen on all interfaces (tailnet)
17
+ python proxy.py --port 8080
18
+ OLLAMA_BASE_URL=http://127.0.0.1:11434 python proxy.py
19
+
20
+ Deps: fastapi uvicorn httpx
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import json
27
+ import os
28
+ import sys
29
+ import time
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+
33
+ try:
34
+ import httpx
35
+ from fastapi import FastAPI, Request
36
+ from fastapi.responses import JSONResponse, Response, StreamingResponse
37
+ import uvicorn
38
+ except ImportError as e:
39
+ print(
40
+ "proxy.py: missing deps. Install with:\n"
41
+ " pip install fastapi 'uvicorn[standard]' httpx",
42
+ file=sys.stderr,
43
+ )
44
+ raise SystemExit(1) from e
45
+
46
+
47
+ OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
48
+ LOG_PATH = Path(os.environ.get(
49
+ "OLLAMA_PROXY_LOG",
50
+ str(Path(__file__).parent / "proxy.log.jsonl"),
51
+ ))
52
+
53
+ app = FastAPI(title="ollama-proxy", version="1.5.0")
54
+ _client = httpx.AsyncClient(base_url=OLLAMA_BASE_URL, timeout=300.0)
55
+
56
+
57
+ def _log(record: dict) -> None:
58
+ record.setdefault("ts", datetime.now(timezone.utc).isoformat())
59
+ try:
60
+ with LOG_PATH.open("a", encoding="utf-8") as f:
61
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
62
+ except OSError:
63
+ pass # never let logging break a request
64
+
65
+
66
+ @app.get("/healthz")
67
+ async def healthz() -> dict:
68
+ try:
69
+ r = await _client.get("/api/tags")
70
+ r.raise_for_status()
71
+ return {"ok": True, "ollama": OLLAMA_BASE_URL,
72
+ "models": [m["name"] for m in r.json().get("models", [])]}
73
+ except Exception as e:
74
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=502)
75
+
76
+
77
+ @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"])
78
+ async def passthrough(path: str, request: Request):
79
+ """Mirror every request to Ollama. Stream responses if the client asked
80
+ for stream=true; otherwise buffer + log."""
81
+ body = await request.body()
82
+ headers = {k: v for k, v in request.headers.items()
83
+ if k.lower() not in {"host", "content-length"}}
84
+ target = f"/{path}"
85
+ qs = request.url.query
86
+ if qs:
87
+ target = f"{target}?{qs}"
88
+
89
+ # detect streaming vs buffered
90
+ streaming = False
91
+ if body and request.method == "POST":
92
+ try:
93
+ payload = json.loads(body)
94
+ streaming = bool(payload.get("stream", True if path.startswith("api/") else False))
95
+ model = payload.get("model")
96
+ prompt_len = len(json.dumps(payload.get("messages") or payload.get("prompt") or ""))
97
+ except Exception:
98
+ payload, model, prompt_len = None, None, len(body)
99
+ else:
100
+ payload, model, prompt_len = None, None, 0
101
+
102
+ t0 = time.perf_counter()
103
+ if streaming:
104
+ async def stream_iter():
105
+ async with _client.stream(request.method, target, content=body, headers=headers) as r:
106
+ status = r.status_code
107
+ async for chunk in r.aiter_raw():
108
+ yield chunk
109
+ _log({
110
+ "kind": "request", "method": request.method, "path": target,
111
+ "status": status, "model": model, "prompt_len": prompt_len,
112
+ "duration_ms": int((time.perf_counter() - t0) * 1000),
113
+ "stream": True,
114
+ })
115
+ return StreamingResponse(stream_iter(), media_type="application/x-ndjson")
116
+
117
+ r = await _client.request(request.method, target, content=body, headers=headers)
118
+ _log({
119
+ "kind": "request", "method": request.method, "path": target,
120
+ "status": r.status_code, "model": model, "prompt_len": prompt_len,
121
+ "duration_ms": int((time.perf_counter() - t0) * 1000),
122
+ "stream": False,
123
+ })
124
+ return Response(content=r.content, status_code=r.status_code,
125
+ media_type=r.headers.get("content-type"))
126
+
127
+
128
+ def main(argv: list[str]) -> int:
129
+ p = argparse.ArgumentParser(description="Logging passthrough proxy in front of Ollama")
130
+ p.add_argument("--host", default=os.environ.get("PROXY_HOST", "127.0.0.1"),
131
+ help="Bind host (set 0.0.0.0 to be reachable on the tailnet)")
132
+ p.add_argument("--port", type=int, default=int(os.environ.get("PROXY_PORT", "11435")),
133
+ help="Bind port (default 11435, Ollama's 11434 + 1)")
134
+ args = p.parse_args(argv)
135
+ print(f"proxy: forwarding http://{args.host}:{args.port}/* -> {OLLAMA_BASE_URL}/*",
136
+ file=sys.stderr)
137
+ print(f"proxy: log -> {LOG_PATH}", file=sys.stderr)
138
+ uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
139
+ return 0
140
+
141
+
142
+ if __name__ == "__main__":
143
+ sys.exit(main(sys.argv[1:]))
ollama_proxy/server.py ADDED
@@ -0,0 +1,194 @@
1
+ """
2
+ server.py — MCP server wrapping local Ollama (v1.5).
3
+
4
+ Stateless MCP server exposing four tools:
5
+ - chat(model, messages, options?) Chat completion via /api/chat
6
+ - generate(model, prompt, options?) Single-shot completion via /api/generate
7
+ - list_models() Installed models via /api/tags
8
+ - embed(model, input) Embeddings via /api/embed
9
+
10
+ Transports:
11
+ - stdio (default) — for Claude Code, Cowork, local MCP clients
12
+ - sse — for remote MCP clients over HTTP (optional)
13
+
14
+ Default model: gemma4:e4b (override with OLLAMA_DEFAULT_MODEL env var).
15
+ Ollama base : http://127.0.0.1:11434 (override with OLLAMA_BASE_URL).
16
+
17
+ Usage:
18
+ python server.py # stdio transport (default)
19
+ python server.py --sse --port 7421
20
+ OLLAMA_DEFAULT_MODEL=gemma4:latest python server.py
21
+
22
+ Deps: mcp[cli] httpx
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import os
29
+ import sys
30
+ from typing import Any
31
+
32
+ import httpx
33
+
34
+ try:
35
+ from mcp.server.fastmcp import FastMCP
36
+ except ImportError as e:
37
+ print(
38
+ "server.py: missing dependency `mcp`. Install with:\n"
39
+ " pip install 'mcp[cli]' httpx",
40
+ file=sys.stderr,
41
+ )
42
+ raise SystemExit(1) from e
43
+
44
+
45
+ OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
46
+ DEFAULT_MODEL = os.environ.get("OLLAMA_DEFAULT_MODEL", "gemma4:e4b")
47
+ TIMEOUT_S = float(os.environ.get("OLLAMA_TIMEOUT_S", "300"))
48
+
49
+ mcp = FastMCP("ollama-proxy")
50
+ _client = httpx.Client(base_url=OLLAMA_BASE_URL, timeout=TIMEOUT_S)
51
+
52
+
53
+ def _post(path: str, payload: dict) -> dict:
54
+ r = _client.post(path, json=payload)
55
+ r.raise_for_status()
56
+ return r.json()
57
+
58
+
59
+ def _get(path: str) -> dict:
60
+ r = _client.get(path)
61
+ r.raise_for_status()
62
+ return r.json()
63
+
64
+
65
+ @mcp.tool()
66
+ def chat(
67
+ messages: list[dict],
68
+ model: str | None = None,
69
+ options: dict | None = None,
70
+ format: str | None = None,
71
+ ) -> dict:
72
+ """Chat completion via Ollama /api/chat.
73
+
74
+ Args:
75
+ messages: list of {"role": "user|assistant|system", "content": str}
76
+ model: Ollama model tag. Defaults to OLLAMA_DEFAULT_MODEL (gemma4:e4b).
77
+ options: Ollama runtime options (temperature, num_ctx, etc.).
78
+ format: "json" to force JSON-mode output.
79
+
80
+ Returns the raw Ollama response: {message: {role, content}, done, ...}
81
+ """
82
+ payload: dict[str, Any] = {
83
+ "model": model or DEFAULT_MODEL,
84
+ "messages": messages,
85
+ "stream": False,
86
+ }
87
+ if options:
88
+ payload["options"] = options
89
+ if format:
90
+ payload["format"] = format
91
+ return _post("/api/chat", payload)
92
+
93
+
94
+ @mcp.tool()
95
+ def generate(
96
+ prompt: str,
97
+ model: str | None = None,
98
+ options: dict | None = None,
99
+ format: str | None = None,
100
+ system: str | None = None,
101
+ ) -> dict:
102
+ """Single-shot completion via Ollama /api/generate.
103
+
104
+ Args:
105
+ prompt: the user prompt
106
+ model: defaults to OLLAMA_DEFAULT_MODEL
107
+ options: Ollama runtime options
108
+ format: "json" to force JSON output
109
+ system: optional system prompt
110
+ """
111
+ payload: dict[str, Any] = {
112
+ "model": model or DEFAULT_MODEL,
113
+ "prompt": prompt,
114
+ "stream": False,
115
+ }
116
+ if options:
117
+ payload["options"] = options
118
+ if format:
119
+ payload["format"] = format
120
+ if system:
121
+ payload["system"] = system
122
+ return _post("/api/generate", payload)
123
+
124
+
125
+ @mcp.tool()
126
+ def list_models() -> dict:
127
+ """List models installed in the local Ollama instance."""
128
+ return _get("/api/tags")
129
+
130
+
131
+ @mcp.tool()
132
+ def embed(input: str | list[str], model: str | None = None) -> dict:
133
+ """Get embeddings via Ollama /api/embed.
134
+
135
+ Args:
136
+ input: a string or list of strings to embed
137
+ model: embedding-capable model tag (e.g. "nomic-embed-text")
138
+ """
139
+ payload = {
140
+ "model": model or DEFAULT_MODEL,
141
+ "input": input,
142
+ }
143
+ return _post("/api/embed", payload)
144
+
145
+
146
+ def main(argv: list[str]) -> int:
147
+ p = argparse.ArgumentParser(description="MCP server wrapping local Ollama")
148
+ p.add_argument("--sse", action="store_true",
149
+ help="Use SSE/HTTP transport instead of stdio (for remote MCP clients)")
150
+ p.add_argument("--port", type=int, default=int(os.environ.get("MCP_PORT", "7421")),
151
+ help="HTTP port for --sse mode (default 7421)")
152
+ p.add_argument("--host", default=os.environ.get("MCP_HOST", "127.0.0.1"),
153
+ help="Bind host for --sse mode (default 127.0.0.1; set 0.0.0.0 for tailnet)")
154
+ args = p.parse_args(argv)
155
+
156
+ # Sanity check: is Ollama reachable?
157
+ try:
158
+ _get("/api/tags")
159
+ except Exception as e:
160
+ print(
161
+ f"server.py: cannot reach Ollama at {OLLAMA_BASE_URL} ({e}).\n"
162
+ "Is the Ollama daemon running? Try: ollama serve",
163
+ file=sys.stderr,
164
+ )
165
+ # Don't exit — let the MCP client see the error per-call. But warn loudly.
166
+
167
+ if args.sse:
168
+ # FastMCP's SSE transport binds host/port via settings.
169
+ mcp.settings.host = args.host
170
+ mcp.settings.port = args.port
171
+ print(f"server.py: SSE transport on http://{args.host}:{args.port}/sse",
172
+ file=sys.stderr)
173
+ mcp.run(transport="sse")
174
+ else:
175
+ # Friendly hint when launched interactively — MCP-stdio is for clients,
176
+ # not humans. If stdin is a TTY, the user almost certainly meant to use
177
+ # --sse or register this in an MCP client config.
178
+ if sys.stdin.isatty():
179
+ print(
180
+ "server.py: stdio transport ready. This server is meant to be\n"
181
+ " spawned by an MCP client (Claude Code, Cowork, Claude Desktop)\n"
182
+ " via subprocess pipes — running it in a terminal will produce\n"
183
+ " JSON-RPC parse errors on every keystroke. To test interactively:\n"
184
+ " npx @modelcontextprotocol/inspector python server.py\n"
185
+ " Or run with --sse to expose an HTTP/SSE endpoint instead.\n"
186
+ " Press Ctrl-C to exit.\n",
187
+ file=sys.stderr,
188
+ )
189
+ mcp.run(transport="stdio")
190
+ return 0
191
+
192
+
193
+ if __name__ == "__main__":
194
+ sys.exit(main(sys.argv[1:]))