dify-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dify_mcp/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Dify Knowledge Base MCP Server."""
2
+
3
+ __version__ = "0.1.0"
dify_mcp/config.py ADDED
@@ -0,0 +1,64 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+
6
+ def _find_env_file() -> str | None:
7
+ """Locate an optional .env file.
8
+
9
+ Environment variables always take precedence; a .env file is only a
10
+ convenience fallback. We look in the current working directory first
11
+ (normal usage), then in the source-checkout project root. When the
12
+ package is pip-installed and no local .env exists, this returns None and
13
+ configuration comes entirely from the process environment (e.g. the
14
+ ``env`` block of an MCP client's ``mcp.json``).
15
+ """
16
+ candidates = [
17
+ Path.cwd() / ".env",
18
+ Path(__file__).resolve().parents[2] / ".env",
19
+ ]
20
+ for path in candidates:
21
+ if path.is_file():
22
+ return str(path)
23
+ return None
24
+
25
+
26
+ ENV_FILE = _find_env_file()
27
+
28
+
29
+ class Settings(BaseSettings):
30
+ model_config = SettingsConfigDict(
31
+ env_file=ENV_FILE,
32
+ env_file_encoding="utf-8",
33
+ extra="ignore",
34
+ )
35
+ dify_api_base: str = "http://127.0.0.1/v1"
36
+ dify_api_key: str
37
+ dify_allowed_datasets: str
38
+ dify_timeout: float = 30.0
39
+ dify_verify_ssl: bool = False
40
+
41
+ @property
42
+ def api_base(self) -> str:
43
+ base = self.dify_api_base.rstrip("/")
44
+ if not base.startswith(("http://", "https://")):
45
+ base = f"http://{base}"
46
+ return base
47
+
48
+ @property
49
+ def allowed_dataset_ids(self) -> frozenset[str]:
50
+ ids = {item.strip() for item in self.dify_allowed_datasets.split(",") if item.strip()}
51
+ if not ids:
52
+ raise ValueError("DIFY_ALLOWED_DATASETS must contain at least one dataset UUID")
53
+ return frozenset(ids)
54
+
55
+ def ensure_dataset_allowed(self, dataset_id: str) -> None:
56
+ if dataset_id not in self.allowed_dataset_ids:
57
+ allowed = ", ".join(sorted(self.allowed_dataset_ids))
58
+ raise PermissionError(
59
+ f"Dataset '{dataset_id}' is not in the allowed list: {allowed}"
60
+ )
61
+
62
+
63
+ def load_settings() -> Settings:
64
+ return Settings() # type: ignore[call-arg]
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import httpx
6
+
7
+ from dify_mcp.config import Settings
8
+
9
+
10
+ class DifyAPIError(Exception):
11
+ def __init__(self, message: str, status_code: int | None = None) -> None:
12
+ super().__init__(message)
13
+ self.status_code = status_code
14
+
15
+
16
+ class DifyClient:
17
+ def __init__(self, settings: Settings) -> None:
18
+ self._settings = settings
19
+ self._client = httpx.AsyncClient(
20
+ base_url=settings.api_base,
21
+ headers={
22
+ "Authorization": f"Bearer {settings.dify_api_key}",
23
+ "Content-Type": "application/json",
24
+ },
25
+ timeout=settings.dify_timeout,
26
+ verify=settings.dify_verify_ssl,
27
+ )
28
+
29
+ async def close(self) -> None:
30
+ await self._client.aclose()
31
+
32
+ async def _request(self, method: str, path: str, **kwargs: Any) -> Any:
33
+ try:
34
+ response = await self._client.request(method, path, **kwargs)
35
+ except httpx.TimeoutException as exc:
36
+ raise DifyAPIError(
37
+ f"Request timed out after {self._settings.dify_timeout}s: {method} {path}"
38
+ ) from exc
39
+ except httpx.RequestError as exc:
40
+ raise DifyAPIError(f"Network error calling Dify API: {exc}") from exc
41
+
42
+ if response.status_code >= 400:
43
+ detail = response.text.strip() or response.reason_phrase
44
+ raise DifyAPIError(
45
+ f"Dify API error {response.status_code}: {detail}",
46
+ status_code=response.status_code,
47
+ )
48
+
49
+ if response.status_code == 204 or not response.content:
50
+ return {}
51
+ return response.json()
52
+
53
+ async def health_check(self) -> dict[str, Any]:
54
+ return await self.list_datasets(page=1, limit=1)
55
+
56
+ async def list_datasets(
57
+ self,
58
+ *,
59
+ page: int = 1,
60
+ limit: int = 20,
61
+ keyword: str | None = None,
62
+ ) -> dict[str, Any]:
63
+ params: dict[str, Any] = {"page": page, "limit": limit}
64
+ if keyword:
65
+ params["keyword"] = keyword
66
+ return await self._request("GET", "/datasets", params=params)
67
+
68
+ async def get_dataset(self, dataset_id: str) -> dict[str, Any]:
69
+ return await self._request("GET", f"/datasets/{dataset_id}")
70
+
71
+ async def retrieve(
72
+ self,
73
+ dataset_id: str,
74
+ query: str,
75
+ *,
76
+ top_k: int = 5,
77
+ search_method: str | None = None,
78
+ reranking_enable: bool | None = None,
79
+ score_threshold_enabled: bool | None = None,
80
+ score_threshold: float | None = None,
81
+ ) -> dict[str, Any]:
82
+ body: dict[str, Any] = {"query": query}
83
+ retrieval_model: dict[str, Any] = {"top_k": top_k}
84
+
85
+ if search_method is not None:
86
+ retrieval_model["search_method"] = search_method
87
+ if reranking_enable is not None:
88
+ retrieval_model["reranking_enable"] = reranking_enable
89
+ if score_threshold_enabled is not None:
90
+ retrieval_model["score_threshold_enabled"] = score_threshold_enabled
91
+ if score_threshold is not None:
92
+ retrieval_model["score_threshold"] = score_threshold
93
+
94
+ if len(retrieval_model) > 1 or "top_k" in retrieval_model:
95
+ body["retrieval_model"] = retrieval_model
96
+
97
+ return await self._request("POST", f"/datasets/{dataset_id}/retrieve", json=body)
98
+
99
+ async def list_documents(
100
+ self,
101
+ dataset_id: str,
102
+ *,
103
+ page: int = 1,
104
+ limit: int = 20,
105
+ keyword: str | None = None,
106
+ status: str | None = None,
107
+ ) -> dict[str, Any]:
108
+ params: dict[str, Any] = {"page": page, "limit": limit}
109
+ if keyword:
110
+ params["keyword"] = keyword
111
+ if status:
112
+ params["status"] = status
113
+ return await self._request("GET", f"/datasets/{dataset_id}/documents", params=params)
114
+
115
+ async def get_document(self, dataset_id: str, document_id: str) -> dict[str, Any]:
116
+ return await self._request("GET", f"/datasets/{dataset_id}/documents/{document_id}")
117
+
118
+ async def list_segments(
119
+ self,
120
+ dataset_id: str,
121
+ document_id: str,
122
+ *,
123
+ page: int = 1,
124
+ limit: int = 20,
125
+ keyword: str | None = None,
126
+ ) -> dict[str, Any]:
127
+ params: dict[str, Any] = {"page": page, "limit": limit}
128
+ if keyword:
129
+ params["keyword"] = keyword
130
+ return await self._request(
131
+ "GET",
132
+ f"/datasets/{dataset_id}/documents/{document_id}/segments",
133
+ params=params,
134
+ )
dify_mcp/formatters.py ADDED
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ def simplify_dataset(item: dict[str, Any]) -> dict[str, Any]:
7
+ retrieval = item.get("retrieval_model_dict") or {}
8
+ return {
9
+ "id": item.get("id"),
10
+ "name": item.get("name"),
11
+ "description": item.get("description"),
12
+ "document_count": item.get("document_count"),
13
+ "indexing_technique": item.get("indexing_technique"),
14
+ "search_method": retrieval.get("search_method"),
15
+ "top_k": retrieval.get("top_k"),
16
+ "enable_api": item.get("enable_api"),
17
+ }
18
+
19
+
20
+ def simplify_document(item: dict[str, Any]) -> dict[str, Any]:
21
+ return {
22
+ "id": item.get("id"),
23
+ "name": item.get("name"),
24
+ "indexing_status": item.get("indexing_status"),
25
+ "word_count": item.get("word_count"),
26
+ "hit_count": item.get("hit_count"),
27
+ "created_at": item.get("created_at"),
28
+ }
29
+
30
+
31
+ def format_retrieve_results(payload: dict[str, Any], dataset_id: str) -> dict[str, Any]:
32
+ query_obj = payload.get("query") or {}
33
+ query_text = query_obj.get("content") if isinstance(query_obj, dict) else query_obj
34
+
35
+ results: list[dict[str, Any]] = []
36
+ for record in payload.get("records") or []:
37
+ segment = record.get("segment") or {}
38
+ document = segment.get("document") or {}
39
+ results.append(
40
+ {
41
+ "content": segment.get("content", ""),
42
+ "answer": segment.get("answer") or None,
43
+ "score": record.get("score"),
44
+ "segment_id": segment.get("id"),
45
+ "document_id": segment.get("document_id") or document.get("id"),
46
+ "document_name": document.get("name"),
47
+ "position": segment.get("position"),
48
+ }
49
+ )
50
+
51
+ return {
52
+ "dataset_id": dataset_id,
53
+ "query": query_text,
54
+ "result_count": len(results),
55
+ "results": results,
56
+ }
dify_mcp/server.py ADDED
@@ -0,0 +1,217 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from contextlib import asynccontextmanager
5
+ from typing import Any, AsyncIterator
6
+
7
+ from mcp.server.fastmcp import Context, FastMCP
8
+
9
+ from dify_mcp.config import Settings, load_settings
10
+ from dify_mcp.dify_client import DifyAPIError, DifyClient
11
+ from dify_mcp.formatters import format_retrieve_results, simplify_dataset, simplify_document
12
+
13
+ MAX_QUERY_LENGTH = 250
14
+
15
+
16
+ def _error(message: str) -> str:
17
+ return json.dumps({"error": message}, ensure_ascii=False)
18
+
19
+
20
+ def _ok(payload: Any) -> str:
21
+ return json.dumps(payload, ensure_ascii=False, indent=2)
22
+
23
+
24
+ @asynccontextmanager
25
+ async def app_lifespan(server: FastMCP) -> AsyncIterator[dict[str, Any]]:
26
+ settings = load_settings()
27
+ client = DifyClient(settings)
28
+ try:
29
+ await client.health_check()
30
+ except DifyAPIError as exc:
31
+ raise RuntimeError(f"Dify connectivity check failed: {exc}") from exc
32
+
33
+ yield {"settings": settings, "client": client}
34
+ await client.close()
35
+
36
+
37
+ mcp = FastMCP(
38
+ "dify-knowledge",
39
+ instructions=(
40
+ "Tools for querying Dify knowledge bases over the LAN. "
41
+ "Only datasets listed in DIFY_ALLOWED_DATASETS are accessible. "
42
+ "Use list_datasets to discover allowed knowledge bases, then search_knowledge to retrieve chunks."
43
+ ),
44
+ lifespan=app_lifespan,
45
+ )
46
+
47
+
48
+ def _ctx(ctx: Context) -> tuple[Settings, DifyClient]:
49
+ settings: Settings = ctx.request_context.lifespan_context["settings"]
50
+ client: DifyClient = ctx.request_context.lifespan_context["client"]
51
+ return settings, client
52
+
53
+
54
+ @mcp.tool()
55
+ async def list_datasets(
56
+ ctx: Context,
57
+ keyword: str | None = None,
58
+ page: int = 1,
59
+ limit: int = 20,
60
+ ) -> str:
61
+ """List allowed Dify knowledge bases (filtered by DIFY_ALLOWED_DATASETS)."""
62
+ settings, client = _ctx(ctx)
63
+ try:
64
+ payload = await client.list_datasets(page=page, limit=limit, keyword=keyword)
65
+ except DifyAPIError as exc:
66
+ return _error(str(exc))
67
+
68
+ datasets = [
69
+ simplify_dataset(item)
70
+ for item in payload.get("data") or []
71
+ if item.get("id") in settings.allowed_dataset_ids
72
+ ]
73
+ return _ok(
74
+ {
75
+ "allowed_dataset_ids": sorted(settings.allowed_dataset_ids),
76
+ "datasets": datasets,
77
+ "total": len(datasets),
78
+ "has_more": payload.get("has_more", False),
79
+ }
80
+ )
81
+
82
+
83
+ @mcp.tool()
84
+ async def get_dataset(ctx: Context, dataset_id: str) -> str:
85
+ """Get details for one allowed knowledge base."""
86
+ settings, client = _ctx(ctx)
87
+ try:
88
+ settings.ensure_dataset_allowed(dataset_id)
89
+ payload = await client.get_dataset(dataset_id)
90
+ except PermissionError as exc:
91
+ return _error(str(exc))
92
+ except DifyAPIError as exc:
93
+ return _error(str(exc))
94
+
95
+ return _ok(simplify_dataset(payload))
96
+
97
+
98
+ @mcp.tool()
99
+ async def search_knowledge(
100
+ ctx: Context,
101
+ dataset_id: str,
102
+ query: str,
103
+ top_k: int = 5,
104
+ search_method: str | None = None,
105
+ ) -> str:
106
+ """Retrieve relevant chunks from an allowed Dify knowledge base."""
107
+ settings, client = _ctx(ctx)
108
+ query = query.strip()
109
+ if not query:
110
+ return _error("query must not be empty")
111
+ if len(query) > MAX_QUERY_LENGTH:
112
+ return _error(f"query exceeds Dify limit of {MAX_QUERY_LENGTH} characters")
113
+
114
+ try:
115
+ settings.ensure_dataset_allowed(dataset_id)
116
+ payload = await client.retrieve(
117
+ dataset_id,
118
+ query,
119
+ top_k=top_k,
120
+ search_method=search_method,
121
+ )
122
+ except PermissionError as exc:
123
+ return _error(str(exc))
124
+ except DifyAPIError as exc:
125
+ return _error(str(exc))
126
+
127
+ return _ok(format_retrieve_results(payload, dataset_id))
128
+
129
+
130
+ @mcp.tool()
131
+ async def list_documents(
132
+ ctx: Context,
133
+ dataset_id: str,
134
+ keyword: str | None = None,
135
+ page: int = 1,
136
+ limit: int = 20,
137
+ status: str | None = None,
138
+ ) -> str:
139
+ """List documents inside an allowed knowledge base."""
140
+ settings, client = _ctx(ctx)
141
+ try:
142
+ settings.ensure_dataset_allowed(dataset_id)
143
+ payload = await client.list_documents(
144
+ dataset_id,
145
+ page=page,
146
+ limit=limit,
147
+ keyword=keyword,
148
+ status=status,
149
+ )
150
+ except PermissionError as exc:
151
+ return _error(str(exc))
152
+ except DifyAPIError as exc:
153
+ return _error(str(exc))
154
+
155
+ documents = [simplify_document(item) for item in payload.get("data") or []]
156
+ return _ok(
157
+ {
158
+ "dataset_id": dataset_id,
159
+ "documents": documents,
160
+ "total": payload.get("total"),
161
+ "has_more": payload.get("has_more", False),
162
+ }
163
+ )
164
+
165
+
166
+ @mcp.tool()
167
+ async def list_document_segments(
168
+ ctx: Context,
169
+ dataset_id: str,
170
+ document_id: str,
171
+ keyword: str | None = None,
172
+ page: int = 1,
173
+ limit: int = 20,
174
+ ) -> str:
175
+ """List text segments (chunks) for a document in an allowed knowledge base."""
176
+ settings, client = _ctx(ctx)
177
+ try:
178
+ settings.ensure_dataset_allowed(dataset_id)
179
+ payload = await client.list_segments(
180
+ dataset_id,
181
+ document_id,
182
+ page=page,
183
+ limit=limit,
184
+ keyword=keyword,
185
+ )
186
+ except PermissionError as exc:
187
+ return _error(str(exc))
188
+ except DifyAPIError as exc:
189
+ return _error(str(exc))
190
+
191
+ segments = [
192
+ {
193
+ "id": item.get("id"),
194
+ "content": item.get("content"),
195
+ "position": item.get("position"),
196
+ "word_count": item.get("word_count"),
197
+ "enabled": item.get("enabled"),
198
+ }
199
+ for item in payload.get("data") or []
200
+ ]
201
+ return _ok(
202
+ {
203
+ "dataset_id": dataset_id,
204
+ "document_id": document_id,
205
+ "segments": segments,
206
+ "total": payload.get("total"),
207
+ "has_more": payload.get("has_more", False),
208
+ }
209
+ )
210
+
211
+
212
+ def main() -> None:
213
+ mcp.run()
214
+
215
+
216
+ if __name__ == "__main__":
217
+ main()
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: dify-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server exposing Dify knowledge base retrieval to Cursor and other MCP clients
5
+ Project-URL: Homepage, https://github.com/salted-butter-joshua/dify-mcp
6
+ Project-URL: Repository, https://github.com/salted-butter-joshua/dify-mcp
7
+ Project-URL: Issues, https://github.com/salted-butter-joshua/dify-mcp/issues
8
+ Author-email: salted-butter-joshua <chenshidatou@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: dify,knowledge-base,llm,mcp,model-context-protocol,rag,retrieval
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: httpx>=0.27.0
22
+ Requires-Dist: mcp>=1.6.0
23
+ Requires-Dist: pydantic-settings>=2.0.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # dify-mcp
27
+
28
+ <!-- mcp-name: io.github.salted-butter-joshua/dify-mcp -->
29
+
30
+ Expose [Dify](https://dify.ai) knowledge base retrieval capabilities via [MCP](https://modelcontextprotocol.io) (Model Context Protocol), for use in Cursor and other MCP-compatible clients.
31
+
32
+ Connect to a self-hosted or remote Dify instance over HTTP, with optional dataset allowlisting for access control.
33
+
34
+ ## Features
35
+
36
+ - **Knowledge base discovery** — list and inspect allowed datasets
37
+ - **Semantic retrieval** — search chunks via Dify `POST /datasets/{dataset_id}/retrieve`
38
+ - **Document browsing** — list documents and segments within a dataset
39
+ - **Dataset allowlist** — restrict access to specific `dataset_id` values
40
+ - **stdio transport** — no manual server startup; the MCP client launches the process
41
+
42
+ ## Requirements
43
+
44
+ - Python **3.11+**
45
+ - A running Dify instance with Knowledge Base API enabled
46
+ - Network access from the machine running the MCP server to your Dify API endpoint
47
+ - A Dify **Knowledge Base API Key** (Dify → Knowledge → Service API → API Key)
48
+
49
+ ## Quick Start
50
+
51
+ ### 1. Clone and install
52
+
53
+ ```bash
54
+ git clone https://github.com/salted-butter-joshua/dify-mcp.git
55
+ cd dify-mcp
56
+
57
+ python3.11 -m venv .venv
58
+
59
+ # Windows
60
+ .venv\Scripts\python.exe -m pip install -e .
61
+
62
+ # macOS / Linux
63
+ .venv/bin/python -m pip install -e .
64
+ ```
65
+
66
+ Verify import:
67
+
68
+ ```bash
69
+ # Windows
70
+ .venv\Scripts\python.exe -c "import dify_mcp; print('OK')"
71
+
72
+ # macOS / Linux
73
+ .venv/bin/python -c "import dify_mcp; print('OK')"
74
+ ```
75
+
76
+ ### 2. Configure environment
77
+
78
+ Copy the example env file and edit it:
79
+
80
+ ```bash
81
+ cp .env.example .env
82
+ ```
83
+
84
+ ```env
85
+ DIFY_API_BASE=http://your-dify-host/v1
86
+ DIFY_API_KEY=dataset-your-api-key
87
+ DIFY_ALLOWED_DATASETS=dataset-uuid-1,dataset-uuid-2
88
+ DIFY_TIMEOUT=30
89
+ DIFY_VERIFY_SSL=false
90
+ ```
91
+
92
+ | Variable | Description |
93
+ |----------|-------------|
94
+ | `DIFY_API_BASE` | Dify Knowledge API base URL, e.g. `http://192.168.1.100/v1` |
95
+ | `DIFY_API_KEY` | Knowledge Base API key |
96
+ | `DIFY_ALLOWED_DATASETS` | Comma-separated dataset UUIDs to expose (required) |
97
+ | `DIFY_TIMEOUT` | HTTP timeout in seconds (default: `30`) |
98
+ | `DIFY_VERIFY_SSL` | Verify TLS certificates (default: `false` for self-signed certs) |
99
+
100
+ Run the health check:
101
+
102
+ ```bash
103
+ # Windows
104
+ .venv\Scripts\python.exe scripts/health_check.py
105
+
106
+ # macOS / Linux
107
+ .venv/bin/python scripts/health_check.py
108
+ ```
109
+
110
+ ### 3. Add to Cursor
111
+
112
+ You do **not** need to start the MCP server manually. Cursor launches it automatically via stdio.
113
+
114
+ 1. Open Cursor Settings → **MCP** → **Edit config**
115
+ 2. Add the following to `mcpServers` in your MCP config file:
116
+ - Windows: `%USERPROFILE%\.cursor\mcp.json`
117
+ - macOS / Linux: `~/.cursor/mcp.json`
118
+ 3. Replace paths and env values with your own
119
+ 4. Restart Cursor
120
+
121
+ See [`mcp.json.example`](mcp.json.example) for a full example.
122
+
123
+ **Windows example:**
124
+
125
+ ```json
126
+ {
127
+ "mcpServers": {
128
+ "dify-knowledge": {
129
+ "command": "/absolute/path/to/dify-mcp/.venv/Scripts/python.exe",
130
+ "args": ["-m", "dify_mcp.server"],
131
+ "cwd": "/absolute/path/to/dify-mcp",
132
+ "env": {
133
+ "DIFY_API_BASE": "http://your-dify-host/v1",
134
+ "DIFY_API_KEY": "dataset-your-api-key",
135
+ "DIFY_ALLOWED_DATASETS": "dataset-uuid-1,dataset-uuid-2",
136
+ "DIFY_TIMEOUT": "30",
137
+ "DIFY_VERIFY_SSL": "false"
138
+ }
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ **macOS / Linux example:**
145
+
146
+ ```json
147
+ {
148
+ "mcpServers": {
149
+ "dify-knowledge": {
150
+ "command": "/absolute/path/to/dify-mcp/.venv/bin/python",
151
+ "args": ["-m", "dify_mcp.server"],
152
+ "cwd": "/absolute/path/to/dify-mcp",
153
+ "env": {
154
+ "DIFY_API_BASE": "http://your-dify-host/v1",
155
+ "DIFY_API_KEY": "dataset-your-api-key",
156
+ "DIFY_ALLOWED_DATASETS": "dataset-uuid-1,dataset-uuid-2"
157
+ }
158
+ }
159
+ }
160
+ }
161
+ ```
162
+
163
+ ### 4. Verify in Cursor
164
+
165
+ 1. Cursor Settings → **MCP** → confirm `dify-knowledge` shows as enabled
166
+ 2. In chat, try:
167
+ - *List available Dify knowledge bases*
168
+ - *Search the knowledge base for "your query"*
169
+
170
+ ## MCP Tools
171
+
172
+ | Tool | Description |
173
+ |------|-------------|
174
+ | `list_datasets` | List knowledge bases within the allowlist |
175
+ | `get_dataset` | Get metadata for one dataset |
176
+ | `search_knowledge` | Retrieve relevant chunks (primary retrieval tool) |
177
+ | `list_documents` | List documents in a dataset |
178
+ | `list_document_segments` | List text segments for a document |
179
+
180
+ ## Architecture
181
+
182
+ ```
183
+ Cursor (MCP client)
184
+ │ stdio
185
+
186
+ dify-mcp (local process)
187
+ │ HTTPS / HTTP
188
+
189
+ Dify Knowledge API (/v1/datasets/...)
190
+ ```
191
+
192
+ The MCP server runs locally on your machine and calls the Dify API over the network. Dify does not need to reach your machine.
193
+
194
+ ## Troubleshooting
195
+
196
+ | Issue | Cause | Fix |
197
+ |-------|-------|-----|
198
+ | `No matching distribution found for mcp` | Python < 3.11 | Use Python 3.11+ in `.venv` |
199
+ | MCP shows error (red) | Wrong Python path, invalid key, or network issue | Check `mcp.json` paths and env vars |
200
+ | `401 Unauthorized` | Invalid API key | Regenerate key in Dify Service API panel |
201
+ | `Dataset not in allowed list` | UUID not in `DIFY_ALLOWED_DATASETS` | Add the dataset UUID to the allowlist |
202
+ | Health check missing env vars | `.env` not found | Ensure `.env` exists in the project root |
203
+ | Connection timeout | Dify unreachable | Check network / VPN / firewall |
204
+
205
+ ## Project Structure
206
+
207
+ ```
208
+ dify-mcp/
209
+ ├── src/dify_mcp/
210
+ │ ├── server.py # MCP entry point
211
+ │ ├── config.py # Settings and allowlist
212
+ │ ├── dify_client.py # Dify Knowledge API client
213
+ │ └── formatters.py # Response formatting
214
+ ├── scripts/
215
+ │ └── health_check.py # Connectivity test
216
+ ├── mcp.json.example # Cursor MCP config template
217
+ ├── .env.example # Environment variable template
218
+ └── Dify-API.md # Local API path reference
219
+ ```
220
+
221
+ ## API Reference
222
+
223
+ - [Dify Knowledge Base API (official)](https://docs.dify.ai/api-reference/knowledge-bases/list-knowledge-bases)
224
+ - [Retrieve chunks](https://docs.dify.ai/api-reference/knowledge-bases/retrieve-chunks-from-a-knowledge-base-test-retrieval)
225
+ - Local path reference: [`Dify-API.md`](Dify-API.md)
226
+
227
+ ## Security Notes
228
+
229
+ - Never commit `.env` or API keys to version control
230
+ - A single Knowledge Base API key can access all visible datasets under the account — use `DIFY_ALLOWED_DATASETS` to limit exposure
231
+ - Prefer running MCP locally; keep API keys in `mcp.json` env or `.env` on your machine only
@@ -0,0 +1,10 @@
1
+ dify_mcp/__init__.py,sha256=ctxkdpmhSdHV-NDl-ItAQkReUiY5hkWbXtjaAXBXPoA,64
2
+ dify_mcp/config.py,sha256=VHThs8k-uUOX3lxmjowTBjU9oDMaafWtAtBPd7a0g-o,2128
3
+ dify_mcp/dify_client.py,sha256=H6lq_KZEL8R6T39bIZsVrFWbEudM4id-1ks-AxSjdSE,4699
4
+ dify_mcp/formatters.py,sha256=bw-4ugvBpP6V_ZK23_s33NItNtvG3ZxbLfFA_wuYYHE,1977
5
+ dify_mcp/server.py,sha256=1P6FbmeTs2_PwkmaVvb3bTxyACnME4wEQhZD_wYN1ns,6216
6
+ dify_mcp-0.1.0.dist-info/METADATA,sha256=YFfpmwaXr015LhlizFd_COHv1ZnuHB1KIArYV7_D3wc,7495
7
+ dify_mcp-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ dify_mcp-0.1.0.dist-info/entry_points.txt,sha256=Sfbz8BEZk-jsHdF8IXTXWDKkFlEATlO7b7Yi2wscHwU,50
9
+ dify_mcp-0.1.0.dist-info/licenses/LICENSE,sha256=34DTZXciXb86yKtzo8a0njblbt6nm7wJeGkYg4qUk04,1077
10
+ dify_mcp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dify-mcp = dify_mcp.server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 salted-butter-joshua
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.