dify-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dify_mcp/__init__.py +3 -0
- dify_mcp/config.py +64 -0
- dify_mcp/dify_client.py +134 -0
- dify_mcp/formatters.py +56 -0
- dify_mcp/server.py +217 -0
- dify_mcp-0.1.0.dist-info/METADATA +231 -0
- dify_mcp-0.1.0.dist-info/RECORD +10 -0
- dify_mcp-0.1.0.dist-info/WHEEL +4 -0
- dify_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- dify_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
dify_mcp/__init__.py
ADDED
dify_mcp/config.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _find_env_file() -> str | None:
|
|
7
|
+
"""Locate an optional .env file.
|
|
8
|
+
|
|
9
|
+
Environment variables always take precedence; a .env file is only a
|
|
10
|
+
convenience fallback. We look in the current working directory first
|
|
11
|
+
(normal usage), then in the source-checkout project root. When the
|
|
12
|
+
package is pip-installed and no local .env exists, this returns None and
|
|
13
|
+
configuration comes entirely from the process environment (e.g. the
|
|
14
|
+
``env`` block of an MCP client's ``mcp.json``).
|
|
15
|
+
"""
|
|
16
|
+
candidates = [
|
|
17
|
+
Path.cwd() / ".env",
|
|
18
|
+
Path(__file__).resolve().parents[2] / ".env",
|
|
19
|
+
]
|
|
20
|
+
for path in candidates:
|
|
21
|
+
if path.is_file():
|
|
22
|
+
return str(path)
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
ENV_FILE = _find_env_file()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Settings(BaseSettings):
|
|
30
|
+
model_config = SettingsConfigDict(
|
|
31
|
+
env_file=ENV_FILE,
|
|
32
|
+
env_file_encoding="utf-8",
|
|
33
|
+
extra="ignore",
|
|
34
|
+
)
|
|
35
|
+
dify_api_base: str = "http://127.0.0.1/v1"
|
|
36
|
+
dify_api_key: str
|
|
37
|
+
dify_allowed_datasets: str
|
|
38
|
+
dify_timeout: float = 30.0
|
|
39
|
+
dify_verify_ssl: bool = False
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def api_base(self) -> str:
|
|
43
|
+
base = self.dify_api_base.rstrip("/")
|
|
44
|
+
if not base.startswith(("http://", "https://")):
|
|
45
|
+
base = f"http://{base}"
|
|
46
|
+
return base
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def allowed_dataset_ids(self) -> frozenset[str]:
|
|
50
|
+
ids = {item.strip() for item in self.dify_allowed_datasets.split(",") if item.strip()}
|
|
51
|
+
if not ids:
|
|
52
|
+
raise ValueError("DIFY_ALLOWED_DATASETS must contain at least one dataset UUID")
|
|
53
|
+
return frozenset(ids)
|
|
54
|
+
|
|
55
|
+
def ensure_dataset_allowed(self, dataset_id: str) -> None:
|
|
56
|
+
if dataset_id not in self.allowed_dataset_ids:
|
|
57
|
+
allowed = ", ".join(sorted(self.allowed_dataset_ids))
|
|
58
|
+
raise PermissionError(
|
|
59
|
+
f"Dataset '{dataset_id}' is not in the allowed list: {allowed}"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def load_settings() -> Settings:
|
|
64
|
+
return Settings() # type: ignore[call-arg]
|
dify_mcp/dify_client.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from dify_mcp.config import Settings
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DifyAPIError(Exception):
|
|
11
|
+
def __init__(self, message: str, status_code: int | None = None) -> None:
|
|
12
|
+
super().__init__(message)
|
|
13
|
+
self.status_code = status_code
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DifyClient:
|
|
17
|
+
def __init__(self, settings: Settings) -> None:
|
|
18
|
+
self._settings = settings
|
|
19
|
+
self._client = httpx.AsyncClient(
|
|
20
|
+
base_url=settings.api_base,
|
|
21
|
+
headers={
|
|
22
|
+
"Authorization": f"Bearer {settings.dify_api_key}",
|
|
23
|
+
"Content-Type": "application/json",
|
|
24
|
+
},
|
|
25
|
+
timeout=settings.dify_timeout,
|
|
26
|
+
verify=settings.dify_verify_ssl,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
async def close(self) -> None:
|
|
30
|
+
await self._client.aclose()
|
|
31
|
+
|
|
32
|
+
async def _request(self, method: str, path: str, **kwargs: Any) -> Any:
|
|
33
|
+
try:
|
|
34
|
+
response = await self._client.request(method, path, **kwargs)
|
|
35
|
+
except httpx.TimeoutException as exc:
|
|
36
|
+
raise DifyAPIError(
|
|
37
|
+
f"Request timed out after {self._settings.dify_timeout}s: {method} {path}"
|
|
38
|
+
) from exc
|
|
39
|
+
except httpx.RequestError as exc:
|
|
40
|
+
raise DifyAPIError(f"Network error calling Dify API: {exc}") from exc
|
|
41
|
+
|
|
42
|
+
if response.status_code >= 400:
|
|
43
|
+
detail = response.text.strip() or response.reason_phrase
|
|
44
|
+
raise DifyAPIError(
|
|
45
|
+
f"Dify API error {response.status_code}: {detail}",
|
|
46
|
+
status_code=response.status_code,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if response.status_code == 204 or not response.content:
|
|
50
|
+
return {}
|
|
51
|
+
return response.json()
|
|
52
|
+
|
|
53
|
+
async def health_check(self) -> dict[str, Any]:
|
|
54
|
+
return await self.list_datasets(page=1, limit=1)
|
|
55
|
+
|
|
56
|
+
async def list_datasets(
|
|
57
|
+
self,
|
|
58
|
+
*,
|
|
59
|
+
page: int = 1,
|
|
60
|
+
limit: int = 20,
|
|
61
|
+
keyword: str | None = None,
|
|
62
|
+
) -> dict[str, Any]:
|
|
63
|
+
params: dict[str, Any] = {"page": page, "limit": limit}
|
|
64
|
+
if keyword:
|
|
65
|
+
params["keyword"] = keyword
|
|
66
|
+
return await self._request("GET", "/datasets", params=params)
|
|
67
|
+
|
|
68
|
+
async def get_dataset(self, dataset_id: str) -> dict[str, Any]:
|
|
69
|
+
return await self._request("GET", f"/datasets/{dataset_id}")
|
|
70
|
+
|
|
71
|
+
async def retrieve(
|
|
72
|
+
self,
|
|
73
|
+
dataset_id: str,
|
|
74
|
+
query: str,
|
|
75
|
+
*,
|
|
76
|
+
top_k: int = 5,
|
|
77
|
+
search_method: str | None = None,
|
|
78
|
+
reranking_enable: bool | None = None,
|
|
79
|
+
score_threshold_enabled: bool | None = None,
|
|
80
|
+
score_threshold: float | None = None,
|
|
81
|
+
) -> dict[str, Any]:
|
|
82
|
+
body: dict[str, Any] = {"query": query}
|
|
83
|
+
retrieval_model: dict[str, Any] = {"top_k": top_k}
|
|
84
|
+
|
|
85
|
+
if search_method is not None:
|
|
86
|
+
retrieval_model["search_method"] = search_method
|
|
87
|
+
if reranking_enable is not None:
|
|
88
|
+
retrieval_model["reranking_enable"] = reranking_enable
|
|
89
|
+
if score_threshold_enabled is not None:
|
|
90
|
+
retrieval_model["score_threshold_enabled"] = score_threshold_enabled
|
|
91
|
+
if score_threshold is not None:
|
|
92
|
+
retrieval_model["score_threshold"] = score_threshold
|
|
93
|
+
|
|
94
|
+
if len(retrieval_model) > 1 or "top_k" in retrieval_model:
|
|
95
|
+
body["retrieval_model"] = retrieval_model
|
|
96
|
+
|
|
97
|
+
return await self._request("POST", f"/datasets/{dataset_id}/retrieve", json=body)
|
|
98
|
+
|
|
99
|
+
async def list_documents(
|
|
100
|
+
self,
|
|
101
|
+
dataset_id: str,
|
|
102
|
+
*,
|
|
103
|
+
page: int = 1,
|
|
104
|
+
limit: int = 20,
|
|
105
|
+
keyword: str | None = None,
|
|
106
|
+
status: str | None = None,
|
|
107
|
+
) -> dict[str, Any]:
|
|
108
|
+
params: dict[str, Any] = {"page": page, "limit": limit}
|
|
109
|
+
if keyword:
|
|
110
|
+
params["keyword"] = keyword
|
|
111
|
+
if status:
|
|
112
|
+
params["status"] = status
|
|
113
|
+
return await self._request("GET", f"/datasets/{dataset_id}/documents", params=params)
|
|
114
|
+
|
|
115
|
+
async def get_document(self, dataset_id: str, document_id: str) -> dict[str, Any]:
|
|
116
|
+
return await self._request("GET", f"/datasets/{dataset_id}/documents/{document_id}")
|
|
117
|
+
|
|
118
|
+
async def list_segments(
|
|
119
|
+
self,
|
|
120
|
+
dataset_id: str,
|
|
121
|
+
document_id: str,
|
|
122
|
+
*,
|
|
123
|
+
page: int = 1,
|
|
124
|
+
limit: int = 20,
|
|
125
|
+
keyword: str | None = None,
|
|
126
|
+
) -> dict[str, Any]:
|
|
127
|
+
params: dict[str, Any] = {"page": page, "limit": limit}
|
|
128
|
+
if keyword:
|
|
129
|
+
params["keyword"] = keyword
|
|
130
|
+
return await self._request(
|
|
131
|
+
"GET",
|
|
132
|
+
f"/datasets/{dataset_id}/documents/{document_id}/segments",
|
|
133
|
+
params=params,
|
|
134
|
+
)
|
dify_mcp/formatters.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def simplify_dataset(item: dict[str, Any]) -> dict[str, Any]:
|
|
7
|
+
retrieval = item.get("retrieval_model_dict") or {}
|
|
8
|
+
return {
|
|
9
|
+
"id": item.get("id"),
|
|
10
|
+
"name": item.get("name"),
|
|
11
|
+
"description": item.get("description"),
|
|
12
|
+
"document_count": item.get("document_count"),
|
|
13
|
+
"indexing_technique": item.get("indexing_technique"),
|
|
14
|
+
"search_method": retrieval.get("search_method"),
|
|
15
|
+
"top_k": retrieval.get("top_k"),
|
|
16
|
+
"enable_api": item.get("enable_api"),
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def simplify_document(item: dict[str, Any]) -> dict[str, Any]:
|
|
21
|
+
return {
|
|
22
|
+
"id": item.get("id"),
|
|
23
|
+
"name": item.get("name"),
|
|
24
|
+
"indexing_status": item.get("indexing_status"),
|
|
25
|
+
"word_count": item.get("word_count"),
|
|
26
|
+
"hit_count": item.get("hit_count"),
|
|
27
|
+
"created_at": item.get("created_at"),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def format_retrieve_results(payload: dict[str, Any], dataset_id: str) -> dict[str, Any]:
|
|
32
|
+
query_obj = payload.get("query") or {}
|
|
33
|
+
query_text = query_obj.get("content") if isinstance(query_obj, dict) else query_obj
|
|
34
|
+
|
|
35
|
+
results: list[dict[str, Any]] = []
|
|
36
|
+
for record in payload.get("records") or []:
|
|
37
|
+
segment = record.get("segment") or {}
|
|
38
|
+
document = segment.get("document") or {}
|
|
39
|
+
results.append(
|
|
40
|
+
{
|
|
41
|
+
"content": segment.get("content", ""),
|
|
42
|
+
"answer": segment.get("answer") or None,
|
|
43
|
+
"score": record.get("score"),
|
|
44
|
+
"segment_id": segment.get("id"),
|
|
45
|
+
"document_id": segment.get("document_id") or document.get("id"),
|
|
46
|
+
"document_name": document.get("name"),
|
|
47
|
+
"position": segment.get("position"),
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
"dataset_id": dataset_id,
|
|
53
|
+
"query": query_text,
|
|
54
|
+
"result_count": len(results),
|
|
55
|
+
"results": results,
|
|
56
|
+
}
|
dify_mcp/server.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
from typing import Any, AsyncIterator
|
|
6
|
+
|
|
7
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
8
|
+
|
|
9
|
+
from dify_mcp.config import Settings, load_settings
|
|
10
|
+
from dify_mcp.dify_client import DifyAPIError, DifyClient
|
|
11
|
+
from dify_mcp.formatters import format_retrieve_results, simplify_dataset, simplify_document
|
|
12
|
+
|
|
13
|
+
MAX_QUERY_LENGTH = 250
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _error(message: str) -> str:
|
|
17
|
+
return json.dumps({"error": message}, ensure_ascii=False)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _ok(payload: Any) -> str:
|
|
21
|
+
return json.dumps(payload, ensure_ascii=False, indent=2)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@asynccontextmanager
|
|
25
|
+
async def app_lifespan(server: FastMCP) -> AsyncIterator[dict[str, Any]]:
|
|
26
|
+
settings = load_settings()
|
|
27
|
+
client = DifyClient(settings)
|
|
28
|
+
try:
|
|
29
|
+
await client.health_check()
|
|
30
|
+
except DifyAPIError as exc:
|
|
31
|
+
raise RuntimeError(f"Dify connectivity check failed: {exc}") from exc
|
|
32
|
+
|
|
33
|
+
yield {"settings": settings, "client": client}
|
|
34
|
+
await client.close()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
mcp = FastMCP(
|
|
38
|
+
"dify-knowledge",
|
|
39
|
+
instructions=(
|
|
40
|
+
"Tools for querying Dify knowledge bases over the LAN. "
|
|
41
|
+
"Only datasets listed in DIFY_ALLOWED_DATASETS are accessible. "
|
|
42
|
+
"Use list_datasets to discover allowed knowledge bases, then search_knowledge to retrieve chunks."
|
|
43
|
+
),
|
|
44
|
+
lifespan=app_lifespan,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _ctx(ctx: Context) -> tuple[Settings, DifyClient]:
|
|
49
|
+
settings: Settings = ctx.request_context.lifespan_context["settings"]
|
|
50
|
+
client: DifyClient = ctx.request_context.lifespan_context["client"]
|
|
51
|
+
return settings, client
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@mcp.tool()
|
|
55
|
+
async def list_datasets(
|
|
56
|
+
ctx: Context,
|
|
57
|
+
keyword: str | None = None,
|
|
58
|
+
page: int = 1,
|
|
59
|
+
limit: int = 20,
|
|
60
|
+
) -> str:
|
|
61
|
+
"""List allowed Dify knowledge bases (filtered by DIFY_ALLOWED_DATASETS)."""
|
|
62
|
+
settings, client = _ctx(ctx)
|
|
63
|
+
try:
|
|
64
|
+
payload = await client.list_datasets(page=page, limit=limit, keyword=keyword)
|
|
65
|
+
except DifyAPIError as exc:
|
|
66
|
+
return _error(str(exc))
|
|
67
|
+
|
|
68
|
+
datasets = [
|
|
69
|
+
simplify_dataset(item)
|
|
70
|
+
for item in payload.get("data") or []
|
|
71
|
+
if item.get("id") in settings.allowed_dataset_ids
|
|
72
|
+
]
|
|
73
|
+
return _ok(
|
|
74
|
+
{
|
|
75
|
+
"allowed_dataset_ids": sorted(settings.allowed_dataset_ids),
|
|
76
|
+
"datasets": datasets,
|
|
77
|
+
"total": len(datasets),
|
|
78
|
+
"has_more": payload.get("has_more", False),
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@mcp.tool()
|
|
84
|
+
async def get_dataset(ctx: Context, dataset_id: str) -> str:
|
|
85
|
+
"""Get details for one allowed knowledge base."""
|
|
86
|
+
settings, client = _ctx(ctx)
|
|
87
|
+
try:
|
|
88
|
+
settings.ensure_dataset_allowed(dataset_id)
|
|
89
|
+
payload = await client.get_dataset(dataset_id)
|
|
90
|
+
except PermissionError as exc:
|
|
91
|
+
return _error(str(exc))
|
|
92
|
+
except DifyAPIError as exc:
|
|
93
|
+
return _error(str(exc))
|
|
94
|
+
|
|
95
|
+
return _ok(simplify_dataset(payload))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@mcp.tool()
|
|
99
|
+
async def search_knowledge(
|
|
100
|
+
ctx: Context,
|
|
101
|
+
dataset_id: str,
|
|
102
|
+
query: str,
|
|
103
|
+
top_k: int = 5,
|
|
104
|
+
search_method: str | None = None,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Retrieve relevant chunks from an allowed Dify knowledge base."""
|
|
107
|
+
settings, client = _ctx(ctx)
|
|
108
|
+
query = query.strip()
|
|
109
|
+
if not query:
|
|
110
|
+
return _error("query must not be empty")
|
|
111
|
+
if len(query) > MAX_QUERY_LENGTH:
|
|
112
|
+
return _error(f"query exceeds Dify limit of {MAX_QUERY_LENGTH} characters")
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
settings.ensure_dataset_allowed(dataset_id)
|
|
116
|
+
payload = await client.retrieve(
|
|
117
|
+
dataset_id,
|
|
118
|
+
query,
|
|
119
|
+
top_k=top_k,
|
|
120
|
+
search_method=search_method,
|
|
121
|
+
)
|
|
122
|
+
except PermissionError as exc:
|
|
123
|
+
return _error(str(exc))
|
|
124
|
+
except DifyAPIError as exc:
|
|
125
|
+
return _error(str(exc))
|
|
126
|
+
|
|
127
|
+
return _ok(format_retrieve_results(payload, dataset_id))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@mcp.tool()
|
|
131
|
+
async def list_documents(
|
|
132
|
+
ctx: Context,
|
|
133
|
+
dataset_id: str,
|
|
134
|
+
keyword: str | None = None,
|
|
135
|
+
page: int = 1,
|
|
136
|
+
limit: int = 20,
|
|
137
|
+
status: str | None = None,
|
|
138
|
+
) -> str:
|
|
139
|
+
"""List documents inside an allowed knowledge base."""
|
|
140
|
+
settings, client = _ctx(ctx)
|
|
141
|
+
try:
|
|
142
|
+
settings.ensure_dataset_allowed(dataset_id)
|
|
143
|
+
payload = await client.list_documents(
|
|
144
|
+
dataset_id,
|
|
145
|
+
page=page,
|
|
146
|
+
limit=limit,
|
|
147
|
+
keyword=keyword,
|
|
148
|
+
status=status,
|
|
149
|
+
)
|
|
150
|
+
except PermissionError as exc:
|
|
151
|
+
return _error(str(exc))
|
|
152
|
+
except DifyAPIError as exc:
|
|
153
|
+
return _error(str(exc))
|
|
154
|
+
|
|
155
|
+
documents = [simplify_document(item) for item in payload.get("data") or []]
|
|
156
|
+
return _ok(
|
|
157
|
+
{
|
|
158
|
+
"dataset_id": dataset_id,
|
|
159
|
+
"documents": documents,
|
|
160
|
+
"total": payload.get("total"),
|
|
161
|
+
"has_more": payload.get("has_more", False),
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@mcp.tool()
|
|
167
|
+
async def list_document_segments(
|
|
168
|
+
ctx: Context,
|
|
169
|
+
dataset_id: str,
|
|
170
|
+
document_id: str,
|
|
171
|
+
keyword: str | None = None,
|
|
172
|
+
page: int = 1,
|
|
173
|
+
limit: int = 20,
|
|
174
|
+
) -> str:
|
|
175
|
+
"""List text segments (chunks) for a document in an allowed knowledge base."""
|
|
176
|
+
settings, client = _ctx(ctx)
|
|
177
|
+
try:
|
|
178
|
+
settings.ensure_dataset_allowed(dataset_id)
|
|
179
|
+
payload = await client.list_segments(
|
|
180
|
+
dataset_id,
|
|
181
|
+
document_id,
|
|
182
|
+
page=page,
|
|
183
|
+
limit=limit,
|
|
184
|
+
keyword=keyword,
|
|
185
|
+
)
|
|
186
|
+
except PermissionError as exc:
|
|
187
|
+
return _error(str(exc))
|
|
188
|
+
except DifyAPIError as exc:
|
|
189
|
+
return _error(str(exc))
|
|
190
|
+
|
|
191
|
+
segments = [
|
|
192
|
+
{
|
|
193
|
+
"id": item.get("id"),
|
|
194
|
+
"content": item.get("content"),
|
|
195
|
+
"position": item.get("position"),
|
|
196
|
+
"word_count": item.get("word_count"),
|
|
197
|
+
"enabled": item.get("enabled"),
|
|
198
|
+
}
|
|
199
|
+
for item in payload.get("data") or []
|
|
200
|
+
]
|
|
201
|
+
return _ok(
|
|
202
|
+
{
|
|
203
|
+
"dataset_id": dataset_id,
|
|
204
|
+
"document_id": document_id,
|
|
205
|
+
"segments": segments,
|
|
206
|
+
"total": payload.get("total"),
|
|
207
|
+
"has_more": payload.get("has_more", False),
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def main() -> None:
|
|
213
|
+
mcp.run()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
if __name__ == "__main__":
|
|
217
|
+
main()
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dify-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server exposing Dify knowledge base retrieval to Cursor and other MCP clients
|
|
5
|
+
Project-URL: Homepage, https://github.com/salted-butter-joshua/dify-mcp
|
|
6
|
+
Project-URL: Repository, https://github.com/salted-butter-joshua/dify-mcp
|
|
7
|
+
Project-URL: Issues, https://github.com/salted-butter-joshua/dify-mcp/issues
|
|
8
|
+
Author-email: salted-butter-joshua <chenshidatou@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: dify,knowledge-base,llm,mcp,model-context-protocol,rag,retrieval
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: httpx>=0.27.0
|
|
22
|
+
Requires-Dist: mcp>=1.6.0
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# dify-mcp
|
|
27
|
+
|
|
28
|
+
<!-- mcp-name: io.github.salted-butter-joshua/dify-mcp -->
|
|
29
|
+
|
|
30
|
+
Expose [Dify](https://dify.ai) knowledge base retrieval capabilities via [MCP](https://modelcontextprotocol.io) (Model Context Protocol), for use in Cursor and other MCP-compatible clients.
|
|
31
|
+
|
|
32
|
+
Connect to a self-hosted or remote Dify instance over HTTP, with optional dataset allowlisting for access control.
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
- **Knowledge base discovery** — list and inspect allowed datasets
|
|
37
|
+
- **Semantic retrieval** — search chunks via Dify `POST /datasets/{dataset_id}/retrieve`
|
|
38
|
+
- **Document browsing** — list documents and segments within a dataset
|
|
39
|
+
- **Dataset allowlist** — restrict access to specific `dataset_id` values
|
|
40
|
+
- **stdio transport** — no manual server startup; the MCP client launches the process
|
|
41
|
+
|
|
42
|
+
## Requirements
|
|
43
|
+
|
|
44
|
+
- Python **3.11+**
|
|
45
|
+
- A running Dify instance with Knowledge Base API enabled
|
|
46
|
+
- Network access from the machine running the MCP server to your Dify API endpoint
|
|
47
|
+
- A Dify **Knowledge Base API Key** (Dify → Knowledge → Service API → API Key)
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
### 1. Clone and install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
git clone https://github.com/salted-butter-joshua/dify-mcp.git
|
|
55
|
+
cd dify-mcp
|
|
56
|
+
|
|
57
|
+
python3.11 -m venv .venv
|
|
58
|
+
|
|
59
|
+
# Windows
|
|
60
|
+
.venv\Scripts\python.exe -m pip install -e .
|
|
61
|
+
|
|
62
|
+
# macOS / Linux
|
|
63
|
+
.venv/bin/python -m pip install -e .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Verify import:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# Windows
|
|
70
|
+
.venv\Scripts\python.exe -c "import dify_mcp; print('OK')"
|
|
71
|
+
|
|
72
|
+
# macOS / Linux
|
|
73
|
+
.venv/bin/python -c "import dify_mcp; print('OK')"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 2. Configure environment
|
|
77
|
+
|
|
78
|
+
Copy the example env file and edit it:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
cp .env.example .env
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
```env
|
|
85
|
+
DIFY_API_BASE=http://your-dify-host/v1
|
|
86
|
+
DIFY_API_KEY=dataset-your-api-key
|
|
87
|
+
DIFY_ALLOWED_DATASETS=dataset-uuid-1,dataset-uuid-2
|
|
88
|
+
DIFY_TIMEOUT=30
|
|
89
|
+
DIFY_VERIFY_SSL=false
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
| Variable | Description |
|
|
93
|
+
|----------|-------------|
|
|
94
|
+
| `DIFY_API_BASE` | Dify Knowledge API base URL, e.g. `http://192.168.1.100/v1` |
|
|
95
|
+
| `DIFY_API_KEY` | Knowledge Base API key |
|
|
96
|
+
| `DIFY_ALLOWED_DATASETS` | Comma-separated dataset UUIDs to expose (required) |
|
|
97
|
+
| `DIFY_TIMEOUT` | HTTP timeout in seconds (default: `30`) |
|
|
98
|
+
| `DIFY_VERIFY_SSL` | Verify TLS certificates (default: `false` for self-signed certs) |
|
|
99
|
+
|
|
100
|
+
Run the health check:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Windows
|
|
104
|
+
.venv\Scripts\python.exe scripts/health_check.py
|
|
105
|
+
|
|
106
|
+
# macOS / Linux
|
|
107
|
+
.venv/bin/python scripts/health_check.py
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 3. Add to Cursor
|
|
111
|
+
|
|
112
|
+
You do **not** need to start the MCP server manually. Cursor launches it automatically via stdio.
|
|
113
|
+
|
|
114
|
+
1. Open Cursor Settings → **MCP** → **Edit config**
|
|
115
|
+
2. Add the following to `mcpServers` in your MCP config file:
|
|
116
|
+
- Windows: `%USERPROFILE%\.cursor\mcp.json`
|
|
117
|
+
- macOS / Linux: `~/.cursor/mcp.json`
|
|
118
|
+
3. Replace paths and env values with your own
|
|
119
|
+
4. Restart Cursor
|
|
120
|
+
|
|
121
|
+
See [`mcp.json.example`](mcp.json.example) for a full example.
|
|
122
|
+
|
|
123
|
+
**Windows example:**
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"mcpServers": {
|
|
128
|
+
"dify-knowledge": {
|
|
129
|
+
"command": "/absolute/path/to/dify-mcp/.venv/Scripts/python.exe",
|
|
130
|
+
"args": ["-m", "dify_mcp.server"],
|
|
131
|
+
"cwd": "/absolute/path/to/dify-mcp",
|
|
132
|
+
"env": {
|
|
133
|
+
"DIFY_API_BASE": "http://your-dify-host/v1",
|
|
134
|
+
"DIFY_API_KEY": "dataset-your-api-key",
|
|
135
|
+
"DIFY_ALLOWED_DATASETS": "dataset-uuid-1,dataset-uuid-2",
|
|
136
|
+
"DIFY_TIMEOUT": "30",
|
|
137
|
+
"DIFY_VERIFY_SSL": "false"
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**macOS / Linux example:**
|
|
145
|
+
|
|
146
|
+
```json
|
|
147
|
+
{
|
|
148
|
+
"mcpServers": {
|
|
149
|
+
"dify-knowledge": {
|
|
150
|
+
"command": "/absolute/path/to/dify-mcp/.venv/bin/python",
|
|
151
|
+
"args": ["-m", "dify_mcp.server"],
|
|
152
|
+
"cwd": "/absolute/path/to/dify-mcp",
|
|
153
|
+
"env": {
|
|
154
|
+
"DIFY_API_BASE": "http://your-dify-host/v1",
|
|
155
|
+
"DIFY_API_KEY": "dataset-your-api-key",
|
|
156
|
+
"DIFY_ALLOWED_DATASETS": "dataset-uuid-1,dataset-uuid-2"
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 4. Verify in Cursor
|
|
164
|
+
|
|
165
|
+
1. Cursor Settings → **MCP** → confirm `dify-knowledge` shows as enabled
|
|
166
|
+
2. In chat, try:
|
|
167
|
+
- *List available Dify knowledge bases*
|
|
168
|
+
- *Search the knowledge base for "your query"*
|
|
169
|
+
|
|
170
|
+
## MCP Tools
|
|
171
|
+
|
|
172
|
+
| Tool | Description |
|
|
173
|
+
|------|-------------|
|
|
174
|
+
| `list_datasets` | List knowledge bases within the allowlist |
|
|
175
|
+
| `get_dataset` | Get metadata for one dataset |
|
|
176
|
+
| `search_knowledge` | Retrieve relevant chunks (primary retrieval tool) |
|
|
177
|
+
| `list_documents` | List documents in a dataset |
|
|
178
|
+
| `list_document_segments` | List text segments for a document |
|
|
179
|
+
|
|
180
|
+
## Architecture
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
Cursor (MCP client)
|
|
184
|
+
│ stdio
|
|
185
|
+
▼
|
|
186
|
+
dify-mcp (local process)
|
|
187
|
+
│ HTTPS / HTTP
|
|
188
|
+
▼
|
|
189
|
+
Dify Knowledge API (/v1/datasets/...)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
The MCP server runs locally on your machine and calls the Dify API over the network. Dify does not need to reach your machine.
|
|
193
|
+
|
|
194
|
+
## Troubleshooting
|
|
195
|
+
|
|
196
|
+
| Issue | Cause | Fix |
|
|
197
|
+
|-------|-------|-----|
|
|
198
|
+
| `No matching distribution found for mcp` | Python < 3.11 | Use Python 3.11+ in `.venv` |
|
|
199
|
+
| MCP shows error (red) | Wrong Python path, invalid key, or network issue | Check `mcp.json` paths and env vars |
|
|
200
|
+
| `401 Unauthorized` | Invalid API key | Regenerate key in Dify Service API panel |
|
|
201
|
+
| `Dataset not in allowed list` | UUID not in `DIFY_ALLOWED_DATASETS` | Add the dataset UUID to the allowlist |
|
|
202
|
+
| Health check missing env vars | `.env` not found | Ensure `.env` exists in the project root |
|
|
203
|
+
| Connection timeout | Dify unreachable | Check network / VPN / firewall |
|
|
204
|
+
|
|
205
|
+
## Project Structure
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
dify-mcp/
|
|
209
|
+
├── src/dify_mcp/
|
|
210
|
+
│ ├── server.py # MCP entry point
|
|
211
|
+
│ ├── config.py # Settings and allowlist
|
|
212
|
+
│ ├── dify_client.py # Dify Knowledge API client
|
|
213
|
+
│ └── formatters.py # Response formatting
|
|
214
|
+
├── scripts/
|
|
215
|
+
│ └── health_check.py # Connectivity test
|
|
216
|
+
├── mcp.json.example # Cursor MCP config template
|
|
217
|
+
├── .env.example # Environment variable template
|
|
218
|
+
└── Dify-API.md # Local API path reference
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## API Reference
|
|
222
|
+
|
|
223
|
+
- [Dify Knowledge Base API (official)](https://docs.dify.ai/api-reference/knowledge-bases/list-knowledge-bases)
|
|
224
|
+
- [Retrieve chunks](https://docs.dify.ai/api-reference/knowledge-bases/retrieve-chunks-from-a-knowledge-base-test-retrieval)
|
|
225
|
+
- Local path reference: [`Dify-API.md`](Dify-API.md)
|
|
226
|
+
|
|
227
|
+
## Security Notes
|
|
228
|
+
|
|
229
|
+
- Never commit `.env` or API keys to version control
|
|
230
|
+
- A single Knowledge Base API key can access all visible datasets under the account — use `DIFY_ALLOWED_DATASETS` to limit exposure
|
|
231
|
+
- Prefer running MCP locally; keep API keys in `mcp.json` env or `.env` on your machine only
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
dify_mcp/__init__.py,sha256=ctxkdpmhSdHV-NDl-ItAQkReUiY5hkWbXtjaAXBXPoA,64
|
|
2
|
+
dify_mcp/config.py,sha256=VHThs8k-uUOX3lxmjowTBjU9oDMaafWtAtBPd7a0g-o,2128
|
|
3
|
+
dify_mcp/dify_client.py,sha256=H6lq_KZEL8R6T39bIZsVrFWbEudM4id-1ks-AxSjdSE,4699
|
|
4
|
+
dify_mcp/formatters.py,sha256=bw-4ugvBpP6V_ZK23_s33NItNtvG3ZxbLfFA_wuYYHE,1977
|
|
5
|
+
dify_mcp/server.py,sha256=1P6FbmeTs2_PwkmaVvb3bTxyACnME4wEQhZD_wYN1ns,6216
|
|
6
|
+
dify_mcp-0.1.0.dist-info/METADATA,sha256=YFfpmwaXr015LhlizFd_COHv1ZnuHB1KIArYV7_D3wc,7495
|
|
7
|
+
dify_mcp-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
dify_mcp-0.1.0.dist-info/entry_points.txt,sha256=Sfbz8BEZk-jsHdF8IXTXWDKkFlEATlO7b7Yi2wscHwU,50
|
|
9
|
+
dify_mcp-0.1.0.dist-info/licenses/LICENSE,sha256=34DTZXciXb86yKtzo8a0njblbt6nm7wJeGkYg4qUk04,1077
|
|
10
|
+
dify_mcp-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 salted-butter-joshua
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|