liquid-api 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liquid/__init__.py +60 -0
- liquid/_defaults.py +58 -0
- liquid/auth/__init__.py +8 -0
- liquid/auth/classifier.py +73 -0
- liquid/auth/manager.py +108 -0
- liquid/client.py +213 -0
- liquid/discovery/__init__.py +18 -0
- liquid/discovery/base.py +53 -0
- liquid/discovery/browser.py +175 -0
- liquid/discovery/diff.py +66 -0
- liquid/discovery/graphql.py +180 -0
- liquid/discovery/mcp.py +159 -0
- liquid/discovery/openapi.py +227 -0
- liquid/discovery/rest_heuristic.py +157 -0
- liquid/events.py +37 -0
- liquid/exceptions.py +51 -0
- liquid/mapping/__init__.py +9 -0
- liquid/mapping/learning.py +62 -0
- liquid/mapping/proposer.py +150 -0
- liquid/mapping/reviewer.py +84 -0
- liquid/models/__init__.py +36 -0
- liquid/models/adapter.py +35 -0
- liquid/models/llm.py +42 -0
- liquid/models/schema.py +84 -0
- liquid/models/sync.py +35 -0
- liquid/protocols.py +29 -0
- liquid/py.typed +0 -0
- liquid/sync/__init__.py +29 -0
- liquid/sync/auto_repair.py +64 -0
- liquid/sync/engine.py +176 -0
- liquid/sync/fetcher.py +92 -0
- liquid/sync/mapper.py +73 -0
- liquid/sync/pagination.py +102 -0
- liquid/sync/retry.py +47 -0
- liquid/sync/selector.py +32 -0
- liquid/sync/transform.py +103 -0
- liquid_api-0.2.0.dist-info/METADATA +177 -0
- liquid_api-0.2.0.dist-info/RECORD +39 -0
- liquid_api-0.2.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Browser-based API discovery using Playwright.
|
|
2
|
+
|
|
3
|
+
This is the last-resort discovery strategy (Level 4). It launches a headless
|
|
4
|
+
browser, navigates the target URL, captures network requests, and uses an LLM
|
|
5
|
+
to classify discovered endpoints.
|
|
6
|
+
|
|
7
|
+
Requires the `browser` extra: pip install liquid[browser]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
from liquid.exceptions import DiscoveryError
|
|
16
|
+
from liquid.models.schema import APISchema, AuthRequirement, Endpoint
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from liquid.protocols import LLMBackend
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
_PLAYWRIGHT_AVAILABLE = False
|
|
24
|
+
try:
|
|
25
|
+
from playwright.async_api import async_playwright # type: ignore[import-untyped]
|
|
26
|
+
|
|
27
|
+
_PLAYWRIGHT_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BrowserDiscovery:
|
|
33
|
+
"""Discovers APIs by browsing the target URL and capturing network traffic.
|
|
34
|
+
|
|
35
|
+
This strategy:
|
|
36
|
+
1. Launches a headless Chromium browser
|
|
37
|
+
2. Navigates to the target URL
|
|
38
|
+
3. Captures all XHR/Fetch network requests
|
|
39
|
+
4. Uses LLM to classify captured requests into API endpoints
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, llm: LLMBackend, timeout_ms: int = 30000) -> None:
|
|
43
|
+
self.llm = llm
|
|
44
|
+
self.timeout_ms = timeout_ms
|
|
45
|
+
|
|
46
|
+
async def discover(self, url: str) -> APISchema | None:
|
|
47
|
+
if not _PLAYWRIGHT_AVAILABLE:
|
|
48
|
+
logger.debug("Playwright not installed, skipping BrowserDiscovery")
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
captured = await self._browse_and_capture(url)
|
|
53
|
+
if not captured:
|
|
54
|
+
return None
|
|
55
|
+
return await self._classify_with_llm(url, captured)
|
|
56
|
+
except DiscoveryError:
|
|
57
|
+
raise
|
|
58
|
+
except Exception as e:
|
|
59
|
+
raise DiscoveryError(f"Browser discovery failed for {url}: {e}") from e
|
|
60
|
+
|
|
61
|
+
async def _browse_and_capture(self, url: str) -> list[dict[str, Any]]:
|
|
62
|
+
captured: list[dict[str, Any]] = []
|
|
63
|
+
|
|
64
|
+
async with async_playwright() as p:
|
|
65
|
+
browser = await p.chromium.launch(headless=True)
|
|
66
|
+
context = await browser.new_context()
|
|
67
|
+
page = await context.new_page()
|
|
68
|
+
|
|
69
|
+
async def on_response(response):
|
|
70
|
+
request = response.request
|
|
71
|
+
if request.resource_type in ("xhr", "fetch"):
|
|
72
|
+
content_type = response.headers.get("content-type", "")
|
|
73
|
+
if "json" in content_type or "xml" in content_type:
|
|
74
|
+
try:
|
|
75
|
+
body = await response.text()
|
|
76
|
+
captured.append(
|
|
77
|
+
{
|
|
78
|
+
"url": request.url,
|
|
79
|
+
"method": request.method,
|
|
80
|
+
"status": response.status,
|
|
81
|
+
"content_type": content_type,
|
|
82
|
+
"body_preview": body[:500],
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
except Exception:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
page.on("response", on_response)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
await page.goto(url, wait_until="networkidle", timeout=self.timeout_ms)
|
|
92
|
+
# Scroll to trigger lazy-loaded requests
|
|
93
|
+
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
94
|
+
await page.wait_for_timeout(2000)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.warning("Browser navigation error for %s: %s", url, e)
|
|
97
|
+
finally:
|
|
98
|
+
await browser.close()
|
|
99
|
+
|
|
100
|
+
return captured
|
|
101
|
+
|
|
102
|
+
async def _classify_with_llm(self, url: str, captured: list[dict[str, Any]]) -> APISchema:
|
|
103
|
+
from liquid.models.llm import Message
|
|
104
|
+
|
|
105
|
+
captures_summary = "\n".join(
|
|
106
|
+
f"- {c['method']} {c['url']} ({c['status']}): {c['body_preview'][:150]}" for c in captured[:20]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
messages = [
|
|
110
|
+
Message(
|
|
111
|
+
role="system",
|
|
112
|
+
content=(
|
|
113
|
+
"You are an API analyst. Given captured network requests from browsing a website, "
|
|
114
|
+
"identify the API endpoints. Respond with JSON: "
|
|
115
|
+
'{"service_name": "...", "endpoints": [{"path": "...", "method": "...", "description": "..."}], '
|
|
116
|
+
'"auth_type": "oauth2|api_key|bearer|basic|custom"}'
|
|
117
|
+
),
|
|
118
|
+
),
|
|
119
|
+
Message(
|
|
120
|
+
role="user",
|
|
121
|
+
content=f"URL: {url}\n\nCaptured requests:\n{captures_summary}",
|
|
122
|
+
),
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
response = await self.llm.chat(messages)
|
|
126
|
+
return self._parse_response(response.content or "{}", url, captured)
|
|
127
|
+
|
|
128
|
+
def _parse_response(self, content: str, url: str, captured: list[dict[str, Any]]) -> APISchema:
|
|
129
|
+
import json
|
|
130
|
+
from urllib.parse import urlparse
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(content)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
data = {}
|
|
136
|
+
|
|
137
|
+
endpoints: list[Endpoint] = []
|
|
138
|
+
for ep in data.get("endpoints", []):
|
|
139
|
+
if isinstance(ep, dict) and "path" in ep:
|
|
140
|
+
endpoints.append(
|
|
141
|
+
Endpoint(
|
|
142
|
+
path=ep["path"],
|
|
143
|
+
method=ep.get("method", "GET").upper(),
|
|
144
|
+
description=ep.get("description", ""),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if not endpoints:
|
|
149
|
+
# Fallback: create endpoints from captured URLs
|
|
150
|
+
seen_paths: set[str] = set()
|
|
151
|
+
for c in captured:
|
|
152
|
+
parsed = urlparse(c["url"])
|
|
153
|
+
if parsed.path not in seen_paths:
|
|
154
|
+
seen_paths.add(parsed.path)
|
|
155
|
+
endpoints.append(
|
|
156
|
+
Endpoint(
|
|
157
|
+
path=parsed.path,
|
|
158
|
+
method=c["method"],
|
|
159
|
+
description=f"Captured via browser ({c['status']})",
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
auth_type = data.get("auth_type", "custom")
|
|
164
|
+
valid_types = {"oauth2", "api_key", "bearer", "basic", "custom"}
|
|
165
|
+
if auth_type not in valid_types:
|
|
166
|
+
auth_type = "custom"
|
|
167
|
+
tier = "A" if auth_type in ("oauth2", "bearer") else "C"
|
|
168
|
+
|
|
169
|
+
return APISchema(
|
|
170
|
+
source_url=url,
|
|
171
|
+
service_name=data.get("service_name", urlparse(url).hostname or "Unknown"),
|
|
172
|
+
discovery_method="browser",
|
|
173
|
+
endpoints=endpoints,
|
|
174
|
+
auth=AuthRequirement(type=auth_type, tier=tier),
|
|
175
|
+
)
|
liquid/discovery/diff.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Schema diff utility for detecting API changes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from liquid.models.schema import APISchema, Endpoint, SchemaDiff
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def diff_schemas(old: APISchema, new: APISchema) -> SchemaDiff:
|
|
11
|
+
"""Compare two API schemas and return a structured diff."""
|
|
12
|
+
old_ep_map = {(ep.path, ep.method): ep for ep in old.endpoints}
|
|
13
|
+
new_ep_map = {(ep.path, ep.method): ep for ep in new.endpoints}
|
|
14
|
+
|
|
15
|
+
old_keys = set(old_ep_map.keys())
|
|
16
|
+
new_keys = set(new_ep_map.keys())
|
|
17
|
+
|
|
18
|
+
added_endpoints = [new_ep_map[k] for k in sorted(new_keys - old_keys)]
|
|
19
|
+
removed_endpoints = [old_ep_map[k] for k in sorted(old_keys - new_keys)]
|
|
20
|
+
unchanged_endpoints = [new_ep_map[k] for k in sorted(old_keys & new_keys)]
|
|
21
|
+
|
|
22
|
+
old_fields = _extract_all_fields(old.endpoints)
|
|
23
|
+
new_fields = _extract_all_fields(new.endpoints)
|
|
24
|
+
|
|
25
|
+
added_fields = sorted(new_fields - old_fields)
|
|
26
|
+
removed_fields = sorted(old_fields - new_fields)
|
|
27
|
+
unchanged_fields = sorted(old_fields & new_fields)
|
|
28
|
+
|
|
29
|
+
has_breaking = bool(removed_endpoints or removed_fields)
|
|
30
|
+
|
|
31
|
+
return SchemaDiff(
|
|
32
|
+
added_endpoints=added_endpoints,
|
|
33
|
+
removed_endpoints=removed_endpoints,
|
|
34
|
+
unchanged_endpoints=unchanged_endpoints,
|
|
35
|
+
added_fields=added_fields,
|
|
36
|
+
removed_fields=removed_fields,
|
|
37
|
+
unchanged_fields=unchanged_fields,
|
|
38
|
+
has_breaking_changes=has_breaking,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_all_fields(endpoints: list[Endpoint]) -> set[str]:
|
|
43
|
+
"""Extract all field paths from endpoint response schemas."""
|
|
44
|
+
fields: set[str] = set()
|
|
45
|
+
for ep in endpoints:
|
|
46
|
+
if ep.response_schema:
|
|
47
|
+
_collect_fields(ep.response_schema, "", fields)
|
|
48
|
+
return fields
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _collect_fields(schema: dict[str, Any], prefix: str, fields: set[str]) -> None:
|
|
52
|
+
schema_type = schema.get("type", "")
|
|
53
|
+
|
|
54
|
+
if schema_type == "object":
|
|
55
|
+
properties = schema.get("properties", {})
|
|
56
|
+
for prop_name, prop_schema in properties.items():
|
|
57
|
+
full_path = f"{prefix}.{prop_name}" if prefix else prop_name
|
|
58
|
+
fields.add(full_path)
|
|
59
|
+
if isinstance(prop_schema, dict):
|
|
60
|
+
_collect_fields(prop_schema, full_path, fields)
|
|
61
|
+
|
|
62
|
+
elif schema_type == "array":
|
|
63
|
+
items = schema.get("items", {})
|
|
64
|
+
item_prefix = f"{prefix}[]" if prefix else "[]"
|
|
65
|
+
if isinstance(items, dict):
|
|
66
|
+
_collect_fields(items, item_prefix, fields)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from liquid.exceptions import DiscoveryError
|
|
9
|
+
from liquid.models.schema import (
|
|
10
|
+
APISchema,
|
|
11
|
+
AuthRequirement,
|
|
12
|
+
Endpoint,
|
|
13
|
+
Parameter,
|
|
14
|
+
ParameterLocation,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
_INTROSPECTION_QUERY = """
|
|
20
|
+
query IntrospectionQuery {
|
|
21
|
+
__schema {
|
|
22
|
+
queryType { name }
|
|
23
|
+
mutationType { name }
|
|
24
|
+
types {
|
|
25
|
+
kind
|
|
26
|
+
name
|
|
27
|
+
description
|
|
28
|
+
fields {
|
|
29
|
+
name
|
|
30
|
+
description
|
|
31
|
+
args {
|
|
32
|
+
name
|
|
33
|
+
description
|
|
34
|
+
type { kind name ofType { kind name ofType { kind name } } }
|
|
35
|
+
defaultValue
|
|
36
|
+
}
|
|
37
|
+
type { kind name ofType { kind name ofType { kind name } } }
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
_GRAPHQL_PATHS = ["/graphql", "/api/graphql", "/graphql/v1", "/gql"]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class GraphQLDiscovery:
|
|
48
|
+
"""Discovers APIs by running a GraphQL introspection query."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, http_client: httpx.AsyncClient | None = None) -> None:
|
|
51
|
+
self._external_client = http_client
|
|
52
|
+
|
|
53
|
+
async def discover(self, url: str) -> APISchema | None:
|
|
54
|
+
client = self._external_client or httpx.AsyncClient()
|
|
55
|
+
try:
|
|
56
|
+
introspection = await self._run_introspection(client, url)
|
|
57
|
+
if introspection is None:
|
|
58
|
+
return None
|
|
59
|
+
return self._parse_introspection(introspection, url)
|
|
60
|
+
finally:
|
|
61
|
+
if not self._external_client:
|
|
62
|
+
await client.aclose()
|
|
63
|
+
|
|
64
|
+
async def _run_introspection(
|
|
65
|
+
self,
|
|
66
|
+
client: httpx.AsyncClient,
|
|
67
|
+
base_url: str,
|
|
68
|
+
) -> dict[str, Any] | None:
|
|
69
|
+
base = base_url.rstrip("/")
|
|
70
|
+
for path in _GRAPHQL_PATHS:
|
|
71
|
+
try:
|
|
72
|
+
resp = await client.post(
|
|
73
|
+
f"{base}{path}",
|
|
74
|
+
json={"query": _INTROSPECTION_QUERY},
|
|
75
|
+
headers={"Content-Type": "application/json"},
|
|
76
|
+
timeout=10.0,
|
|
77
|
+
)
|
|
78
|
+
if resp.is_success:
|
|
79
|
+
data = resp.json()
|
|
80
|
+
if "data" in data and "__schema" in data["data"]:
|
|
81
|
+
logger.info("GraphQL introspection succeeded at %s%s", base, path)
|
|
82
|
+
return data["data"]["__schema"]
|
|
83
|
+
except Exception:
|
|
84
|
+
continue
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def _parse_introspection(self, schema: dict[str, Any], source_url: str) -> APISchema:
|
|
88
|
+
try:
|
|
89
|
+
endpoints = self._extract_endpoints(schema)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
raise DiscoveryError(f"Failed to parse GraphQL introspection: {e}") from e
|
|
92
|
+
|
|
93
|
+
return APISchema(
|
|
94
|
+
source_url=source_url,
|
|
95
|
+
service_name=self._infer_service_name(source_url),
|
|
96
|
+
discovery_method="graphql",
|
|
97
|
+
endpoints=endpoints,
|
|
98
|
+
auth=AuthRequirement(type="bearer", tier="A"),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def _extract_endpoints(self, schema: dict[str, Any]) -> list[Endpoint]:
|
|
102
|
+
endpoints: list[Endpoint] = []
|
|
103
|
+
types_map = {t["name"]: t for t in schema.get("types", []) if isinstance(t, dict)}
|
|
104
|
+
|
|
105
|
+
query_type_name = (schema.get("queryType") or {}).get("name", "Query")
|
|
106
|
+
mutation_type_name = (schema.get("mutationType") or {}).get("name", "Mutation")
|
|
107
|
+
|
|
108
|
+
for type_name, method in [(query_type_name, "POST"), (mutation_type_name, "POST")]:
|
|
109
|
+
type_def = types_map.get(type_name)
|
|
110
|
+
if not type_def:
|
|
111
|
+
continue
|
|
112
|
+
for field in type_def.get("fields", []):
|
|
113
|
+
if not isinstance(field, dict):
|
|
114
|
+
continue
|
|
115
|
+
name = field.get("name", "")
|
|
116
|
+
if name.startswith("_"):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
params = [
|
|
120
|
+
Parameter(
|
|
121
|
+
name=arg["name"],
|
|
122
|
+
location=ParameterLocation.BODY,
|
|
123
|
+
required=arg.get("type", {}).get("kind") == "NON_NULL",
|
|
124
|
+
description=arg.get("description"),
|
|
125
|
+
)
|
|
126
|
+
for arg in field.get("args", [])
|
|
127
|
+
if isinstance(arg, dict)
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
op_type = "query" if type_name == query_type_name else "mutation"
|
|
131
|
+
endpoints.append(
|
|
132
|
+
Endpoint(
|
|
133
|
+
path=f"/graphql#{op_type}.{name}",
|
|
134
|
+
method=method,
|
|
135
|
+
description=field.get("description", "") or "",
|
|
136
|
+
parameters=params,
|
|
137
|
+
response_schema=self._type_to_schema(field.get("type", {})),
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return endpoints
|
|
142
|
+
|
|
143
|
+
def _type_to_schema(self, gql_type: dict[str, Any]) -> dict[str, Any]:
|
|
144
|
+
kind = gql_type.get("kind", "")
|
|
145
|
+
name = gql_type.get("name", "")
|
|
146
|
+
|
|
147
|
+
if kind == "NON_NULL":
|
|
148
|
+
return self._type_to_schema(gql_type.get("ofType", {}))
|
|
149
|
+
if kind == "LIST":
|
|
150
|
+
return {"type": "array", "items": self._type_to_schema(gql_type.get("ofType", {}))}
|
|
151
|
+
if kind == "SCALAR":
|
|
152
|
+
return {"type": _scalar_to_json_type(name)}
|
|
153
|
+
if kind in ("OBJECT", "INTERFACE"):
|
|
154
|
+
return {"type": "object", "title": name}
|
|
155
|
+
if kind == "ENUM":
|
|
156
|
+
return {"type": "string", "title": name}
|
|
157
|
+
return {"type": "object"}
|
|
158
|
+
|
|
159
|
+
def _infer_service_name(self, url: str) -> str:
|
|
160
|
+
from urllib.parse import urlparse
|
|
161
|
+
|
|
162
|
+
parsed = urlparse(url)
|
|
163
|
+
host = parsed.hostname or "unknown"
|
|
164
|
+
parts = host.split(".")
|
|
165
|
+
if len(parts) >= 2:
|
|
166
|
+
return parts[-2].capitalize()
|
|
167
|
+
return host.capitalize()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _scalar_to_json_type(name: str) -> str:
|
|
171
|
+
mapping = {
|
|
172
|
+
"String": "string",
|
|
173
|
+
"Int": "integer",
|
|
174
|
+
"Float": "number",
|
|
175
|
+
"Boolean": "boolean",
|
|
176
|
+
"ID": "string",
|
|
177
|
+
"DateTime": "string",
|
|
178
|
+
"Date": "string",
|
|
179
|
+
}
|
|
180
|
+
return mapping.get(name, "string")
|
liquid/discovery/mcp.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""MCP-based API discovery.
|
|
2
|
+
|
|
3
|
+
If the service publishes an MCP server, tools and resources are already
|
|
4
|
+
structured with types and descriptions — the cheapest and most reliable
|
|
5
|
+
discovery method (Level 1).
|
|
6
|
+
|
|
7
|
+
Requires the `mcp` extra: pip install liquid[mcp]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from liquid.exceptions import DiscoveryError
|
|
16
|
+
from liquid.models.schema import (
|
|
17
|
+
APISchema,
|
|
18
|
+
AuthRequirement,
|
|
19
|
+
Endpoint,
|
|
20
|
+
Parameter,
|
|
21
|
+
ParameterLocation,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
_MCP_AVAILABLE = False
|
|
27
|
+
try:
|
|
28
|
+
from mcp import ClientSession # type: ignore[import-untyped]
|
|
29
|
+
from mcp.client.streamable_http import streamable_http_client # type: ignore[import-untyped]
|
|
30
|
+
|
|
31
|
+
_MCP_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class MCPDiscovery:
|
|
37
|
+
"""Discovers APIs by connecting to an MCP server.
|
|
38
|
+
|
|
39
|
+
MCP servers publish tools and resources with structured types
|
|
40
|
+
and descriptions. This strategy connects via Streamable HTTP,
|
|
41
|
+
lists available tools/resources, and maps them to APISchema.
|
|
42
|
+
|
|
43
|
+
Falls back gracefully if the `mcp` package is not installed
|
|
44
|
+
or the URL doesn't expose an MCP endpoint.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, mcp_path: str = "/mcp") -> None:
|
|
48
|
+
self.mcp_path = mcp_path
|
|
49
|
+
|
|
50
|
+
async def discover(self, url: str) -> APISchema | None:
|
|
51
|
+
if not _MCP_AVAILABLE:
|
|
52
|
+
logger.debug("MCP SDK not installed, skipping MCPDiscovery")
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
mcp_url = f"{url.rstrip('/')}{self.mcp_path}"
|
|
56
|
+
try:
|
|
57
|
+
return await self._connect_and_discover(mcp_url, url)
|
|
58
|
+
except DiscoveryError:
|
|
59
|
+
raise
|
|
60
|
+
except Exception as e:
|
|
61
|
+
logger.debug("MCP discovery failed for %s: %s", mcp_url, e)
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
async def _connect_and_discover(self, mcp_url: str, source_url: str) -> APISchema | None:
|
|
65
|
+
async with streamable_http_client(mcp_url) as (read, write), ClientSession(read, write) as session:
|
|
66
|
+
await session.initialize()
|
|
67
|
+
|
|
68
|
+
tools_result = await session.list_tools()
|
|
69
|
+
resources_result = await session.list_resources()
|
|
70
|
+
|
|
71
|
+
tools = tools_result.tools if tools_result else []
|
|
72
|
+
resources = resources_result.resources if resources_result else []
|
|
73
|
+
|
|
74
|
+
if not tools and not resources:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
endpoints = self._tools_to_endpoints(tools)
|
|
78
|
+
resource_endpoints = self._resources_to_endpoints(resources)
|
|
79
|
+
endpoints.extend(resource_endpoints)
|
|
80
|
+
|
|
81
|
+
service_name = self._infer_service_name(source_url)
|
|
82
|
+
|
|
83
|
+
return APISchema(
|
|
84
|
+
source_url=source_url,
|
|
85
|
+
service_name=service_name,
|
|
86
|
+
discovery_method="mcp",
|
|
87
|
+
endpoints=endpoints,
|
|
88
|
+
auth=AuthRequirement(type="bearer", tier="A"),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def _tools_to_endpoints(self, tools: list[Any]) -> list[Endpoint]:
|
|
92
|
+
endpoints: list[Endpoint] = []
|
|
93
|
+
for tool in tools:
|
|
94
|
+
name = getattr(tool, "name", str(tool))
|
|
95
|
+
description = getattr(tool, "description", "") or ""
|
|
96
|
+
input_schema = getattr(tool, "inputSchema", None) or {}
|
|
97
|
+
|
|
98
|
+
params = self._schema_to_parameters(input_schema)
|
|
99
|
+
|
|
100
|
+
endpoints.append(
|
|
101
|
+
Endpoint(
|
|
102
|
+
path=f"/mcp/tools/{name}",
|
|
103
|
+
method="POST",
|
|
104
|
+
description=description[:500],
|
|
105
|
+
parameters=params,
|
|
106
|
+
response_schema={"type": "object"},
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
return endpoints
|
|
110
|
+
|
|
111
|
+
def _resources_to_endpoints(self, resources: list[Any]) -> list[Endpoint]:
|
|
112
|
+
endpoints: list[Endpoint] = []
|
|
113
|
+
for resource in resources:
|
|
114
|
+
uri = str(getattr(resource, "uri", resource))
|
|
115
|
+
name = getattr(resource, "name", uri)
|
|
116
|
+
description = getattr(resource, "description", "") or ""
|
|
117
|
+
mime_type = getattr(resource, "mimeType", "application/json")
|
|
118
|
+
|
|
119
|
+
endpoints.append(
|
|
120
|
+
Endpoint(
|
|
121
|
+
path=f"/mcp/resources/{name}",
|
|
122
|
+
method="GET",
|
|
123
|
+
description=description[:500] or f"Resource: {uri}",
|
|
124
|
+
response_schema={"type": "object", "mimeType": mime_type},
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
return endpoints
|
|
128
|
+
|
|
129
|
+
def _schema_to_parameters(self, input_schema: dict[str, Any]) -> list[Parameter]:
|
|
130
|
+
if not isinstance(input_schema, dict):
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
properties = input_schema.get("properties", {})
|
|
134
|
+
required_fields = set(input_schema.get("required", []))
|
|
135
|
+
params: list[Parameter] = []
|
|
136
|
+
|
|
137
|
+
for prop_name, prop_schema in properties.items():
|
|
138
|
+
if not isinstance(prop_schema, dict):
|
|
139
|
+
continue
|
|
140
|
+
params.append(
|
|
141
|
+
Parameter(
|
|
142
|
+
name=prop_name,
|
|
143
|
+
location=ParameterLocation.BODY,
|
|
144
|
+
required=prop_name in required_fields,
|
|
145
|
+
schema=prop_schema,
|
|
146
|
+
description=prop_schema.get("description"),
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
return params
|
|
150
|
+
|
|
151
|
+
def _infer_service_name(self, url: str) -> str:
|
|
152
|
+
from urllib.parse import urlparse
|
|
153
|
+
|
|
154
|
+
parsed = urlparse(url)
|
|
155
|
+
host = parsed.hostname or "unknown"
|
|
156
|
+
parts = host.split(".")
|
|
157
|
+
if len(parts) >= 2:
|
|
158
|
+
return parts[-2].capitalize()
|
|
159
|
+
return host.capitalize()
|