mcp-embedding-daemon 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ .venv/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+
11
+ # Node
12
+ node_modules/
13
+ .pnpm-store/
14
+ *.tsbuildinfo
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+
20
+ # OS
21
+ .DS_Store
22
+ Thumbs.db
23
+
24
+ # Local state
25
+ _backup/
26
+ .worktrees/
27
+ *.log
28
+ .env
29
+ .env.*
30
+ !.env.example
31
+ coverage/
32
+ *.lcov
33
+
34
+ # Code review graph
35
+ .code-review-graph/
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-embedding-daemon
3
+ Version: 1.0.0
4
+ Summary: Shared ONNX/GGUF embedding server for the n24q02m MCP ecosystem
5
+ Project-URL: Homepage, https://github.com/n24q02m/mcp-core
6
+ Project-URL: Repository, https://github.com/n24q02m/mcp-core
7
+ Project-URL: Issues, https://github.com/n24q02m/mcp-core/issues
8
+ Author-email: n24q02m <quangminh2422004@gmail.com>
9
+ License: MIT
10
+ Keywords: embedding,gguf,inference,mcp,onnx
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: ==3.13.*
19
+ Requires-Dist: fastapi>=0.111.0
20
+ Requires-Dist: httpx>=0.28.1
21
+ Requires-Dist: numpy>=1.26.0
22
+ Requires-Dist: onnxruntime>=1.18.0
23
+ Requires-Dist: pydantic>=2.7.0
24
+ Requires-Dist: uvicorn>=0.30.0
25
+ Provides-Extra: cuda
26
+ Requires-Dist: onnxruntime-gpu>=1.18.0; extra == 'cuda'
27
+ Provides-Extra: gguf
28
+ Requires-Dist: llama-cpp-python>=0.2.70; extra == 'gguf'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # mcp-core
32
+
33
+ Unified MCP Streamable HTTP 2025-11-25 transport, OAuth 2.1 Authorization
34
+ Server, lifecycle management, install automation, and shared embedding
35
+ daemon for the n24q02m MCP ecosystem.
36
+
37
+ `mcp-core` is the **functional successor** to the archived
38
+ [`mcp-relay-core`](https://github.com/n24q02m/mcp-relay-core). All crypto,
39
+ storage, OAuth, relay, and schema modules from `mcp-relay-core` ship under
40
+ the same paths in `mcp-core` (1:1 superset), so downstream MCP servers can
41
+ migrate with a pure import + dependency rename. See
42
+ [`docs/migration-from-mcp-relay-core.md`](docs/migration-from-mcp-relay-core.md)
43
+ for the rename table.
44
+
45
+ ## Packages
46
+
47
+ | Package | Language | Registry | Install |
48
+ |---------|----------|----------|---------|
49
+ | [`packages/core-py`](packages/core-py) | Python 3.13 | PyPI: [`n24q02m-mcp-core`](https://pypi.org/project/n24q02m-mcp-core/) | `pip install n24q02m-mcp-core` |
50
+ | [`packages/core-ts`](packages/core-ts) | TypeScript / Node 24 | npm: [`@n24q02m/mcp-core`](https://www.npmjs.com/package/@n24q02m/mcp-core) | `bun add @n24q02m/mcp-core` |
51
+ | [`packages/embedding-daemon`](packages/embedding-daemon) | Python 3.13 | PyPI: [`mcp-embedding-daemon`](https://pypi.org/project/mcp-embedding-daemon/) | `pip install mcp-embedding-daemon` |
52
+ | [`packages/stdio-proxy`](packages/stdio-proxy) | Python 3.13 | PyPI: [`mcp-stdio-proxy`](https://pypi.org/project/mcp-stdio-proxy/) | `pip install mcp-stdio-proxy` |
53
+
54
+ All four packages share the same version (`semantic-release.toml` bumps all
55
+ three Python `pyproject.toml` files plus the npm `package.json` in lockstep).
56
+
57
+ ## What you get
58
+
59
+ ### `n24q02m-mcp-core` (Python) and `@n24q02m/mcp-core` (TypeScript)
60
+
61
+ Identical public API in both languages:
62
+
63
+ - **`crypto/`** — ECDH P-256, AES-256-GCM, HKDF-SHA256 primitives.
64
+ Cross-language test vectors guarantee Python and TypeScript produce the
65
+ same ciphertext for the same input.
66
+ - **`storage/`** — encrypted config file (`config.enc`) backed by PBKDF2
67
+ 600k + machine-id key derivation, plus session lock files and config
68
+ resolver helpers.
69
+ - **`oauth/`** — OAuth 2.1 Authorization Server building blocks: `JWTIssuer`
70
+ (RS256), `OAuthProvider` (PKCE flow + relay session integration),
71
+ `SqliteUserStore` for multi-user mode.
72
+ - **`relay/`** — `RelaySession`, `create_session`, `poll_for_result`,
73
+ `send_message` plus the EFF Diceware wordlist for passphrase generation.
74
+ - **`schema/`** — `RelayConfigSchema` TypedDict that downstream servers use
75
+ to declare their config form.
76
+ - **`transport/`** — `StreamableHTTPServer` wrapper around FastMCP /
77
+ `@modelcontextprotocol/sdk` Streamable HTTP transport, plus
78
+ `OAuthMiddleware` (RFC 6750 + RFC 9728 compliant Bearer validation).
79
+ - **`lifecycle/`** — `LifecycleLock` cross-platform file lock that prevents
80
+ two server instances from binding the same `(name, port)` pair.
81
+ - **`install/`** (Python only) — `AgentInstaller` that writes MCP server
82
+ entries into Claude Code, Cursor, Codex, Windsurf, and OpenCode config
83
+ files.
84
+
85
+ ### `mcp-embedding-daemon`
86
+
87
+ FastAPI HTTP server scaffold for the upcoming shared ONNX/GGUF embedding
88
+ backend. v0.1.0 alpha exposes:
89
+
90
+ - `GET /health` — returns `{status, version}`
91
+ - `POST /embed` — returns 501 with a roadmap link (backend wiring lands in
92
+ the next release)
93
+ - `POST /rerank` — returns 501 with a roadmap link
94
+
95
+ CLI entry point: `mcp-embedding-daemon --host 127.0.0.1 --port 9800`.
96
+
97
+ ### `mcp-stdio-proxy`
98
+
99
+ Thin stdio-to-HTTP forwarder for agents that only support stdio MCP transport
100
+ (e.g., Antigravity). Reads JSON-RPC frames from stdin, POSTs them to a remote
101
+ MCP server, writes responses to stdout.
102
+
103
+ CLI entry point: `mcp-stdio-proxy --url https://my-mcp.example.com/mcp --token <bearer>`.
104
+ Falls back to `MCP_CORE_SERVER_URL` and `MCP_CORE_SERVER_TOKEN` env vars when
105
+ flags are not supplied.
106
+
107
+ ## Quick start (Python)
108
+
109
+ ```python
110
+ from mcp_core import RelaySession, create_session, decrypt
111
+ from mcp_core.transport.streamable_http import StreamableHTTPServer
112
+ from mcp_core.oauth import JWTIssuer
113
+ from mcp_core.transport.oauth_middleware import OAuthMiddleware
114
+ from fastmcp import FastMCP
115
+
116
+ mcp = FastMCP("my-server")
117
+
118
+ issuer = JWTIssuer("my-server")
119
+ issuer # Use issuer.issue_access_token(sub) / verify_access_token(token)
120
+
121
+ middleware = [OAuthMiddleware(issuer=issuer, resource_metadata_url="http://127.0.0.1:9876/.well-known/oauth-protected-resource")]
122
+ server = StreamableHTTPServer(mcp, port=9876, middleware=middleware)
123
+ server.run()
124
+ ```
125
+
126
+ ## Quick start (TypeScript)
127
+
128
+ ```typescript
129
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
130
+ import { JWTIssuer } from '@n24q02m/mcp-core/oauth'
131
+ import { OAuthMiddleware, StreamableHTTPServer } from '@n24q02m/mcp-core/transport'
132
+
133
+ const server = new McpServer({ name: 'my-server', version: '0.0.0' })
134
+ const issuer = new JWTIssuer('my-server')
135
+ await issuer.init()
136
+
137
+ const middleware = new OAuthMiddleware({
138
+ jwtIssuer: issuer,
139
+ resourceMetadataUrl: 'http://127.0.0.1:9876/.well-known/oauth-protected-resource'
140
+ })
141
+
142
+ const http = new StreamableHTTPServer({ server, port: 9876, oauthMiddleware: middleware })
143
+ await http.connect()
144
+ // Then mount http.handleRequest(req, res) on your http.Server / Express / Hono.
145
+ ```
146
+
147
+ ## Development
148
+
149
+ ```bash
150
+ mise run setup # install runtimes + deps + pre-commit hooks
151
+ bun install # root TypeScript workspace install
152
+
153
+ # Python (per package)
154
+ cd packages/core-py
155
+ uv sync --group dev
156
+ uv run pytest
157
+ uv run ty check
158
+ uv run ruff check .
159
+
160
+ # TypeScript
161
+ cd packages/core-ts
162
+ bun run test
163
+ bun run check
164
+ bun run build
165
+ ```
166
+
167
+ ## Spec
168
+
169
+ Architecture design lives in
170
+ [claude-plugins/docs/superpowers/specs/2026-04-10-mcp-core-unified-transport-design.md](https://github.com/n24q02m/claude-plugins/blob/feat/phase3-mcp-core-unified/docs/superpowers/specs/2026-04-10-mcp-core-unified-transport-design.md).
171
+
172
+ ## License
173
+
174
+ MIT
@@ -0,0 +1,144 @@
1
+ # mcp-core
2
+
3
+ Unified MCP Streamable HTTP 2025-11-25 transport, OAuth 2.1 Authorization
4
+ Server, lifecycle management, install automation, and shared embedding
5
+ daemon for the n24q02m MCP ecosystem.
6
+
7
+ `mcp-core` is the **functional successor** to the archived
8
+ [`mcp-relay-core`](https://github.com/n24q02m/mcp-relay-core). All crypto,
9
+ storage, OAuth, relay, and schema modules from `mcp-relay-core` ship under
10
+ the same paths in `mcp-core` (1:1 superset), so downstream MCP servers can
11
+ migrate with a pure import + dependency rename. See
12
+ [`docs/migration-from-mcp-relay-core.md`](docs/migration-from-mcp-relay-core.md)
13
+ for the rename table.
14
+
15
+ ## Packages
16
+
17
+ | Package | Language | Registry | Install |
18
+ |---------|----------|----------|---------|
19
+ | [`packages/core-py`](packages/core-py) | Python 3.13 | PyPI: [`n24q02m-mcp-core`](https://pypi.org/project/n24q02m-mcp-core/) | `pip install n24q02m-mcp-core` |
20
+ | [`packages/core-ts`](packages/core-ts) | TypeScript / Node 24 | npm: [`@n24q02m/mcp-core`](https://www.npmjs.com/package/@n24q02m/mcp-core) | `bun add @n24q02m/mcp-core` |
21
+ | [`packages/embedding-daemon`](packages/embedding-daemon) | Python 3.13 | PyPI: [`mcp-embedding-daemon`](https://pypi.org/project/mcp-embedding-daemon/) | `pip install mcp-embedding-daemon` |
22
+ | [`packages/stdio-proxy`](packages/stdio-proxy) | Python 3.13 | PyPI: [`mcp-stdio-proxy`](https://pypi.org/project/mcp-stdio-proxy/) | `pip install mcp-stdio-proxy` |
23
+
24
+ All four packages share the same version (`semantic-release.toml` bumps all
25
+ three Python `pyproject.toml` files plus the npm `package.json` in lockstep).
26
+
27
+ ## What you get
28
+
29
+ ### `n24q02m-mcp-core` (Python) and `@n24q02m/mcp-core` (TypeScript)
30
+
31
+ Identical public API in both languages:
32
+
33
+ - **`crypto/`** — ECDH P-256, AES-256-GCM, HKDF-SHA256 primitives.
34
+ Cross-language test vectors guarantee Python and TypeScript produce the
35
+ same ciphertext for the same input.
36
+ - **`storage/`** — encrypted config file (`config.enc`) backed by PBKDF2
37
+ 600k + machine-id key derivation, plus session lock files and config
38
+ resolver helpers.
39
+ - **`oauth/`** — OAuth 2.1 Authorization Server building blocks: `JWTIssuer`
40
+ (RS256), `OAuthProvider` (PKCE flow + relay session integration),
41
+ `SqliteUserStore` for multi-user mode.
42
+ - **`relay/`** — `RelaySession`, `create_session`, `poll_for_result`,
43
+ `send_message` plus the EFF Diceware wordlist for passphrase generation.
44
+ - **`schema/`** — `RelayConfigSchema` TypedDict that downstream servers use
45
+ to declare their config form.
46
+ - **`transport/`** — `StreamableHTTPServer` wrapper around FastMCP /
47
+ `@modelcontextprotocol/sdk` Streamable HTTP transport, plus
48
+ `OAuthMiddleware` (RFC 6750 + RFC 9728 compliant Bearer validation).
49
+ - **`lifecycle/`** — `LifecycleLock` cross-platform file lock that prevents
50
+ two server instances from binding the same `(name, port)` pair.
51
+ - **`install/`** (Python only) — `AgentInstaller` that writes MCP server
52
+ entries into Claude Code, Cursor, Codex, Windsurf, and OpenCode config
53
+ files.
54
+
55
+ ### `mcp-embedding-daemon`
56
+
57
+ FastAPI HTTP server scaffold for the upcoming shared ONNX/GGUF embedding
58
+ backend. v0.1.0 alpha exposes:
59
+
60
+ - `GET /health` — returns `{status, version}`
61
+ - `POST /embed` — returns 501 with a roadmap link (backend wiring lands in
62
+ the next release)
63
+ - `POST /rerank` — returns 501 with a roadmap link
64
+
65
+ CLI entry point: `mcp-embedding-daemon --host 127.0.0.1 --port 9800`.
66
+
67
+ ### `mcp-stdio-proxy`
68
+
69
+ Thin stdio-to-HTTP forwarder for agents that only support stdio MCP transport
70
+ (e.g., Antigravity). Reads JSON-RPC frames from stdin, POSTs them to a remote
71
+ MCP server, writes responses to stdout.
72
+
73
+ CLI entry point: `mcp-stdio-proxy --url https://my-mcp.example.com/mcp --token <bearer>`.
74
+ Falls back to `MCP_CORE_SERVER_URL` and `MCP_CORE_SERVER_TOKEN` env vars when
75
+ flags are not supplied.
76
+
77
+ ## Quick start (Python)
78
+
79
+ ```python
80
+ from mcp_core import RelaySession, create_session, decrypt
81
+ from mcp_core.transport.streamable_http import StreamableHTTPServer
82
+ from mcp_core.oauth import JWTIssuer
83
+ from mcp_core.transport.oauth_middleware import OAuthMiddleware
84
+ from fastmcp import FastMCP
85
+
86
+ mcp = FastMCP("my-server")
87
+
88
+ issuer = JWTIssuer("my-server")
89
+ issuer # Use issuer.issue_access_token(sub) / verify_access_token(token)
90
+
91
+ middleware = [OAuthMiddleware(issuer=issuer, resource_metadata_url="http://127.0.0.1:9876/.well-known/oauth-protected-resource")]
92
+ server = StreamableHTTPServer(mcp, port=9876, middleware=middleware)
93
+ server.run()
94
+ ```
95
+
96
+ ## Quick start (TypeScript)
97
+
98
+ ```typescript
99
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
100
+ import { JWTIssuer } from '@n24q02m/mcp-core/oauth'
101
+ import { OAuthMiddleware, StreamableHTTPServer } from '@n24q02m/mcp-core/transport'
102
+
103
+ const server = new McpServer({ name: 'my-server', version: '0.0.0' })
104
+ const issuer = new JWTIssuer('my-server')
105
+ await issuer.init()
106
+
107
+ const middleware = new OAuthMiddleware({
108
+ jwtIssuer: issuer,
109
+ resourceMetadataUrl: 'http://127.0.0.1:9876/.well-known/oauth-protected-resource'
110
+ })
111
+
112
+ const http = new StreamableHTTPServer({ server, port: 9876, oauthMiddleware: middleware })
113
+ await http.connect()
114
+ // Then mount http.handleRequest(req, res) on your http.Server / Express / Hono.
115
+ ```
116
+
117
+ ## Development
118
+
119
+ ```bash
120
+ mise run setup # install runtimes + deps + pre-commit hooks
121
+ bun install # root TypeScript workspace install
122
+
123
+ # Python (per package)
124
+ cd packages/core-py
125
+ uv sync --group dev
126
+ uv run pytest
127
+ uv run ty check
128
+ uv run ruff check .
129
+
130
+ # TypeScript
131
+ cd packages/core-ts
132
+ bun run test
133
+ bun run check
134
+ bun run build
135
+ ```
136
+
137
+ ## Spec
138
+
139
+ Architecture design lives in
140
+ [claude-plugins/docs/superpowers/specs/2026-04-10-mcp-core-unified-transport-design.md](https://github.com/n24q02m/claude-plugins/blob/feat/phase3-mcp-core-unified/docs/superpowers/specs/2026-04-10-mcp-core-unified-transport-design.md).
141
+
142
+ ## License
143
+
144
+ MIT
@@ -0,0 +1,58 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mcp-embedding-daemon"
7
+ version = "1.0.0"
8
+ description = "Shared ONNX/GGUF embedding server for the n24q02m MCP ecosystem"
9
+ readme = "README.md"
10
+ requires-python = "==3.13.*"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "n24q02m", email = "quangminh2422004@gmail.com" }]
13
+ keywords = ["mcp", "embedding", "onnx", "gguf", "inference"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
+ ]
23
+ dependencies = [
24
+ "fastapi>=0.111.0",
25
+ "uvicorn>=0.30.0",
26
+ "onnxruntime>=1.18.0",
27
+ "numpy>=1.26.0",
28
+ "httpx>=0.28.1",
29
+ "pydantic>=2.7.0",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ gguf = ["llama-cpp-python>=0.2.70"]
34
+ cuda = ["onnxruntime-gpu>=1.18.0"]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/n24q02m/mcp-core"
38
+ Repository = "https://github.com/n24q02m/mcp-core"
39
+ Issues = "https://github.com/n24q02m/mcp-core/issues"
40
+
41
+ [project.scripts]
42
+ mcp-embedding-daemon = "mcp_embedding_daemon.__main__:main"
43
+
44
+ [dependency-groups]
45
+ dev = [
46
+ "httpx>=0.28.1",
47
+ "pytest>=9.0.3",
48
+ "pytest-cov>=7.0.0",
49
+ "ruff>=0.15.7",
50
+ "ty>=0.0.1a22",
51
+ ]
52
+
53
+ [tool.hatch.build.targets.wheel]
54
+ packages = ["src/mcp_embedding_daemon"]
55
+
56
+ [tool.ruff]
57
+ line-length = 120
58
+ target-version = "py313"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,53 @@
1
+ """CLI entry point for mcp-embedding-daemon."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+
8
+
9
+ def main() -> int:
10
+ parser = argparse.ArgumentParser(
11
+ prog="mcp-embedding-daemon",
12
+ description="Shared ONNX/GGUF embedding server for the n24q02m MCP ecosystem",
13
+ )
14
+ parser.add_argument(
15
+ "--host",
16
+ default="127.0.0.1",
17
+ help="Bind address (default: 127.0.0.1)",
18
+ )
19
+ parser.add_argument(
20
+ "--port",
21
+ type=int,
22
+ default=9800,
23
+ help="Bind port (default: 9800)",
24
+ )
25
+ parser.add_argument(
26
+ "--log-level",
27
+ default="info",
28
+ choices=["critical", "error", "warning", "info", "debug", "trace"],
29
+ help="uvicorn log level (default: info)",
30
+ )
31
+ args = parser.parse_args()
32
+
33
+ try:
34
+ import uvicorn
35
+ except ImportError:
36
+ sys.stderr.write(
37
+ "uvicorn is required to run mcp-embedding-daemon. "
38
+ "Install it via `pip install mcp-embedding-daemon[server]` "
39
+ "or `uv add uvicorn`.\n"
40
+ )
41
+ return 1
42
+
43
+ uvicorn.run(
44
+ "mcp_embedding_daemon.api:app",
45
+ host=args.host,
46
+ port=args.port,
47
+ log_level=args.log_level,
48
+ )
49
+ return 0
50
+
51
+
52
+ if __name__ == "__main__":
53
+ sys.exit(main())
@@ -0,0 +1,76 @@
1
+ """HTTP API for shared embedding daemon.
2
+
3
+ Exposes /embed (text to vector), /rerank (query + docs to scores), /health.
4
+ Used by wet-mcp, mnemo-mcp, better-code-review-graph to share a single
5
+ ONNX/GGUF model instance instead of loading per-server.
6
+
7
+ v0.1.0 alpha: /health works. /embed and /rerank return 501 Not Implemented
8
+ with a pointer to the roadmap because the ONNX + GGUF backends ship as
9
+ thin adapters around qwen3-embed in a follow-up release.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from fastapi import FastAPI, HTTPException, status
15
+ from pydantic import BaseModel
16
+
17
+
18
+ class EmbedRequest(BaseModel):
19
+ model: str = "qwen3-0.6b"
20
+ input: list[str]
21
+ dims: int = 768
22
+
23
+
24
+ class EmbedResponse(BaseModel):
25
+ data: list[list[float]]
26
+ model: str
27
+ dims: int
28
+
29
+
30
+ class RerankRequest(BaseModel):
31
+ model: str = "qwen3-rerank-0.6b"
32
+ query: str
33
+ documents: list[str]
34
+ top_n: int | None = None
35
+
36
+
37
+ class RerankResponse(BaseModel):
38
+ results: list[dict]
39
+ model: str
40
+
41
+
42
+ class HealthResponse(BaseModel):
43
+ status: str
44
+ version: str
45
+
46
+
47
+ __version__ = "0.1.0"
48
+
49
+ app = FastAPI(title="mcp-embedding-daemon", version=__version__)
50
+
51
+
52
+ NOT_IMPLEMENTED_DETAIL = (
53
+ "Embedding backend (ONNX / GGUF) is not yet wired in v0.1.0. "
54
+ "Track progress at https://github.com/n24q02m/mcp-core/issues"
55
+ )
56
+
57
+
58
+ @app.get("/health", response_model=HealthResponse)
59
+ async def health() -> HealthResponse:
60
+ return HealthResponse(status="ok", version=__version__)
61
+
62
+
63
+ @app.post("/embed", response_model=EmbedResponse)
64
+ async def embed(req: EmbedRequest) -> EmbedResponse:
65
+ raise HTTPException(
66
+ status_code=status.HTTP_501_NOT_IMPLEMENTED,
67
+ detail=NOT_IMPLEMENTED_DETAIL,
68
+ )
69
+
70
+
71
+ @app.post("/rerank", response_model=RerankResponse)
72
+ async def rerank(req: RerankRequest) -> RerankResponse:
73
+ raise HTTPException(
74
+ status_code=status.HTTP_501_NOT_IMPLEMENTED,
75
+ detail=NOT_IMPLEMENTED_DETAIL,
76
+ )
@@ -0,0 +1,14 @@
1
+ """GGUF backend via llama-cpp-python.
2
+
3
+ Used when ONNX unavailable or quantized GGUF preferred for CPU inference.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ class GGUFBackend:
10
+ def __init__(self, model_path: str) -> None:
11
+ self._model_path = model_path
12
+
13
+ def embed(self, texts: list[str]) -> list[list[float]]:
14
+ raise NotImplementedError("Wire to llama-cpp-python in a follow-up Phase I task")
@@ -0,0 +1,17 @@
1
+ """ONNX backend -- CPU or CUDA ExecutionProvider.
2
+
3
+ Reuses qwen3-embed repo model loader. Auto-detects CUDA availability.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ class ONNXBackend:
10
+ def __init__(self, model_path: str) -> None:
11
+ self._model_path = model_path
12
+
13
+ def embed(self, texts: list[str]) -> list[list[float]]:
14
+ raise NotImplementedError("Wire to qwen3-embed in a follow-up Phase I task")
15
+
16
+ def rerank(self, query: str, docs: list[str]) -> list[tuple[int, float]]:
17
+ raise NotImplementedError("Wire to qwen3-embed in a follow-up Phase I task")
@@ -0,0 +1,48 @@
1
+ """Tests for mcp_embedding_daemon.api."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi.testclient import TestClient
6
+
7
+ from mcp_embedding_daemon.api import __version__, app
8
+
9
+
10
+ def test_health_returns_ok() -> None:
11
+ client = TestClient(app)
12
+ resp = client.get("/health")
13
+ assert resp.status_code == 200
14
+ assert resp.json() == {"status": "ok", "version": __version__}
15
+
16
+
17
+ def test_embed_returns_501_with_roadmap_link() -> None:
18
+ client = TestClient(app)
19
+ resp = client.post("/embed", json={"input": ["hello world"]})
20
+ assert resp.status_code == 501
21
+ body = resp.json()
22
+ assert "not yet wired" in body["detail"]
23
+ assert "github.com/n24q02m/mcp-core" in body["detail"]
24
+
25
+
26
+ def test_rerank_returns_501_with_roadmap_link() -> None:
27
+ client = TestClient(app)
28
+ resp = client.post(
29
+ "/rerank",
30
+ json={"query": "test", "documents": ["doc a", "doc b"]},
31
+ )
32
+ assert resp.status_code == 501
33
+ body = resp.json()
34
+ assert "not yet wired" in body["detail"]
35
+
36
+
37
+ def test_embed_validates_input_schema() -> None:
38
+ client = TestClient(app)
39
+ # Missing required `input` field.
40
+ resp = client.post("/embed", json={})
41
+ assert resp.status_code == 422
42
+
43
+
44
+ def test_rerank_validates_input_schema() -> None:
45
+ client = TestClient(app)
46
+ # Missing required `query` and `documents` fields.
47
+ resp = client.post("/rerank", json={})
48
+ assert resp.status_code == 422
@@ -0,0 +1,6 @@
1
+ from mcp_embedding_daemon import __version__
2
+
3
+
4
+ def test_version_exposed() -> None:
5
+ assert isinstance(__version__, str)
6
+ assert len(__version__) > 0