liquid-api 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ """Browser-based API discovery using Playwright.
2
+
3
+ This is the last-resort discovery strategy (Level 4). It launches a headless
4
+ browser, navigates the target URL, captures network requests, and uses an LLM
5
+ to classify discovered endpoints.
6
+
7
+ Requires the `browser` extra: pip install liquid[browser]
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ from liquid.exceptions import DiscoveryError
16
+ from liquid.models.schema import APISchema, AuthRequirement, Endpoint
17
+
18
+ if TYPE_CHECKING:
19
+ from liquid.protocols import LLMBackend
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ _PLAYWRIGHT_AVAILABLE = False
24
+ try:
25
+ from playwright.async_api import async_playwright # type: ignore[import-untyped]
26
+
27
+ _PLAYWRIGHT_AVAILABLE = True
28
+ except ImportError:
29
+ pass
30
+
31
+
32
+ class BrowserDiscovery:
33
+ """Discovers APIs by browsing the target URL and capturing network traffic.
34
+
35
+ This strategy:
36
+ 1. Launches a headless Chromium browser
37
+ 2. Navigates to the target URL
38
+ 3. Captures all XHR/Fetch network requests
39
+ 4. Uses LLM to classify captured requests into API endpoints
40
+ """
41
+
42
+ def __init__(self, llm: LLMBackend, timeout_ms: int = 30000) -> None:
43
+ self.llm = llm
44
+ self.timeout_ms = timeout_ms
45
+
46
+ async def discover(self, url: str) -> APISchema | None:
47
+ if not _PLAYWRIGHT_AVAILABLE:
48
+ logger.debug("Playwright not installed, skipping BrowserDiscovery")
49
+ return None
50
+
51
+ try:
52
+ captured = await self._browse_and_capture(url)
53
+ if not captured:
54
+ return None
55
+ return await self._classify_with_llm(url, captured)
56
+ except DiscoveryError:
57
+ raise
58
+ except Exception as e:
59
+ raise DiscoveryError(f"Browser discovery failed for {url}: {e}") from e
60
+
61
+ async def _browse_and_capture(self, url: str) -> list[dict[str, Any]]:
62
+ captured: list[dict[str, Any]] = []
63
+
64
+ async with async_playwright() as p:
65
+ browser = await p.chromium.launch(headless=True)
66
+ context = await browser.new_context()
67
+ page = await context.new_page()
68
+
69
+ async def on_response(response):
70
+ request = response.request
71
+ if request.resource_type in ("xhr", "fetch"):
72
+ content_type = response.headers.get("content-type", "")
73
+ if "json" in content_type or "xml" in content_type:
74
+ try:
75
+ body = await response.text()
76
+ captured.append(
77
+ {
78
+ "url": request.url,
79
+ "method": request.method,
80
+ "status": response.status,
81
+ "content_type": content_type,
82
+ "body_preview": body[:500],
83
+ }
84
+ )
85
+ except Exception:
86
+ pass
87
+
88
+ page.on("response", on_response)
89
+
90
+ try:
91
+ await page.goto(url, wait_until="networkidle", timeout=self.timeout_ms)
92
+ # Scroll to trigger lazy-loaded requests
93
+ await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
94
+ await page.wait_for_timeout(2000)
95
+ except Exception as e:
96
+ logger.warning("Browser navigation error for %s: %s", url, e)
97
+ finally:
98
+ await browser.close()
99
+
100
+ return captured
101
+
102
+ async def _classify_with_llm(self, url: str, captured: list[dict[str, Any]]) -> APISchema:
103
+ from liquid.models.llm import Message
104
+
105
+ captures_summary = "\n".join(
106
+ f"- {c['method']} {c['url']} ({c['status']}): {c['body_preview'][:150]}" for c in captured[:20]
107
+ )
108
+
109
+ messages = [
110
+ Message(
111
+ role="system",
112
+ content=(
113
+ "You are an API analyst. Given captured network requests from browsing a website, "
114
+ "identify the API endpoints. Respond with JSON: "
115
+ '{"service_name": "...", "endpoints": [{"path": "...", "method": "...", "description": "..."}], '
116
+ '"auth_type": "oauth2|api_key|bearer|basic|custom"}'
117
+ ),
118
+ ),
119
+ Message(
120
+ role="user",
121
+ content=f"URL: {url}\n\nCaptured requests:\n{captures_summary}",
122
+ ),
123
+ ]
124
+
125
+ response = await self.llm.chat(messages)
126
+ return self._parse_response(response.content or "{}", url, captured)
127
+
128
+ def _parse_response(self, content: str, url: str, captured: list[dict[str, Any]]) -> APISchema:
129
+ import json
130
+ from urllib.parse import urlparse
131
+
132
+ try:
133
+ data = json.loads(content)
134
+ except json.JSONDecodeError:
135
+ data = {}
136
+
137
+ endpoints: list[Endpoint] = []
138
+ for ep in data.get("endpoints", []):
139
+ if isinstance(ep, dict) and "path" in ep:
140
+ endpoints.append(
141
+ Endpoint(
142
+ path=ep["path"],
143
+ method=ep.get("method", "GET").upper(),
144
+ description=ep.get("description", ""),
145
+ )
146
+ )
147
+
148
+ if not endpoints:
149
+ # Fallback: create endpoints from captured URLs
150
+ seen_paths: set[str] = set()
151
+ for c in captured:
152
+ parsed = urlparse(c["url"])
153
+ if parsed.path not in seen_paths:
154
+ seen_paths.add(parsed.path)
155
+ endpoints.append(
156
+ Endpoint(
157
+ path=parsed.path,
158
+ method=c["method"],
159
+ description=f"Captured via browser ({c['status']})",
160
+ )
161
+ )
162
+
163
+ auth_type = data.get("auth_type", "custom")
164
+ valid_types = {"oauth2", "api_key", "bearer", "basic", "custom"}
165
+ if auth_type not in valid_types:
166
+ auth_type = "custom"
167
+ tier = "A" if auth_type in ("oauth2", "bearer") else "C"
168
+
169
+ return APISchema(
170
+ source_url=url,
171
+ service_name=data.get("service_name", urlparse(url).hostname or "Unknown"),
172
+ discovery_method="browser",
173
+ endpoints=endpoints,
174
+ auth=AuthRequirement(type=auth_type, tier=tier),
175
+ )
@@ -0,0 +1,66 @@
1
+ """Schema diff utility for detecting API changes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from liquid.models.schema import APISchema, Endpoint, SchemaDiff
8
+
9
+
10
+ def diff_schemas(old: APISchema, new: APISchema) -> SchemaDiff:
11
+ """Compare two API schemas and return a structured diff."""
12
+ old_ep_map = {(ep.path, ep.method): ep for ep in old.endpoints}
13
+ new_ep_map = {(ep.path, ep.method): ep for ep in new.endpoints}
14
+
15
+ old_keys = set(old_ep_map.keys())
16
+ new_keys = set(new_ep_map.keys())
17
+
18
+ added_endpoints = [new_ep_map[k] for k in sorted(new_keys - old_keys)]
19
+ removed_endpoints = [old_ep_map[k] for k in sorted(old_keys - new_keys)]
20
+ unchanged_endpoints = [new_ep_map[k] for k in sorted(old_keys & new_keys)]
21
+
22
+ old_fields = _extract_all_fields(old.endpoints)
23
+ new_fields = _extract_all_fields(new.endpoints)
24
+
25
+ added_fields = sorted(new_fields - old_fields)
26
+ removed_fields = sorted(old_fields - new_fields)
27
+ unchanged_fields = sorted(old_fields & new_fields)
28
+
29
+ has_breaking = bool(removed_endpoints or removed_fields)
30
+
31
+ return SchemaDiff(
32
+ added_endpoints=added_endpoints,
33
+ removed_endpoints=removed_endpoints,
34
+ unchanged_endpoints=unchanged_endpoints,
35
+ added_fields=added_fields,
36
+ removed_fields=removed_fields,
37
+ unchanged_fields=unchanged_fields,
38
+ has_breaking_changes=has_breaking,
39
+ )
40
+
41
+
42
+ def _extract_all_fields(endpoints: list[Endpoint]) -> set[str]:
43
+ """Extract all field paths from endpoint response schemas."""
44
+ fields: set[str] = set()
45
+ for ep in endpoints:
46
+ if ep.response_schema:
47
+ _collect_fields(ep.response_schema, "", fields)
48
+ return fields
49
+
50
+
51
+ def _collect_fields(schema: dict[str, Any], prefix: str, fields: set[str]) -> None:
52
+ schema_type = schema.get("type", "")
53
+
54
+ if schema_type == "object":
55
+ properties = schema.get("properties", {})
56
+ for prop_name, prop_schema in properties.items():
57
+ full_path = f"{prefix}.{prop_name}" if prefix else prop_name
58
+ fields.add(full_path)
59
+ if isinstance(prop_schema, dict):
60
+ _collect_fields(prop_schema, full_path, fields)
61
+
62
+ elif schema_type == "array":
63
+ items = schema.get("items", {})
64
+ item_prefix = f"{prefix}[]" if prefix else "[]"
65
+ if isinstance(items, dict):
66
+ _collect_fields(items, item_prefix, fields)
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ import httpx
7
+
8
+ from liquid.exceptions import DiscoveryError
9
+ from liquid.models.schema import (
10
+ APISchema,
11
+ AuthRequirement,
12
+ Endpoint,
13
+ Parameter,
14
+ ParameterLocation,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _INTROSPECTION_QUERY = """
20
+ query IntrospectionQuery {
21
+ __schema {
22
+ queryType { name }
23
+ mutationType { name }
24
+ types {
25
+ kind
26
+ name
27
+ description
28
+ fields {
29
+ name
30
+ description
31
+ args {
32
+ name
33
+ description
34
+ type { kind name ofType { kind name ofType { kind name } } }
35
+ defaultValue
36
+ }
37
+ type { kind name ofType { kind name ofType { kind name } } }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ """
43
+
44
+ _GRAPHQL_PATHS = ["/graphql", "/api/graphql", "/graphql/v1", "/gql"]
45
+
46
+
47
+ class GraphQLDiscovery:
48
+ """Discovers APIs by running a GraphQL introspection query."""
49
+
50
+ def __init__(self, http_client: httpx.AsyncClient | None = None) -> None:
51
+ self._external_client = http_client
52
+
53
+ async def discover(self, url: str) -> APISchema | None:
54
+ client = self._external_client or httpx.AsyncClient()
55
+ try:
56
+ introspection = await self._run_introspection(client, url)
57
+ if introspection is None:
58
+ return None
59
+ return self._parse_introspection(introspection, url)
60
+ finally:
61
+ if not self._external_client:
62
+ await client.aclose()
63
+
64
+ async def _run_introspection(
65
+ self,
66
+ client: httpx.AsyncClient,
67
+ base_url: str,
68
+ ) -> dict[str, Any] | None:
69
+ base = base_url.rstrip("/")
70
+ for path in _GRAPHQL_PATHS:
71
+ try:
72
+ resp = await client.post(
73
+ f"{base}{path}",
74
+ json={"query": _INTROSPECTION_QUERY},
75
+ headers={"Content-Type": "application/json"},
76
+ timeout=10.0,
77
+ )
78
+ if resp.is_success:
79
+ data = resp.json()
80
+ if "data" in data and "__schema" in data["data"]:
81
+ logger.info("GraphQL introspection succeeded at %s%s", base, path)
82
+ return data["data"]["__schema"]
83
+ except Exception:
84
+ continue
85
+ return None
86
+
87
+ def _parse_introspection(self, schema: dict[str, Any], source_url: str) -> APISchema:
88
+ try:
89
+ endpoints = self._extract_endpoints(schema)
90
+ except Exception as e:
91
+ raise DiscoveryError(f"Failed to parse GraphQL introspection: {e}") from e
92
+
93
+ return APISchema(
94
+ source_url=source_url,
95
+ service_name=self._infer_service_name(source_url),
96
+ discovery_method="graphql",
97
+ endpoints=endpoints,
98
+ auth=AuthRequirement(type="bearer", tier="A"),
99
+ )
100
+
101
+ def _extract_endpoints(self, schema: dict[str, Any]) -> list[Endpoint]:
102
+ endpoints: list[Endpoint] = []
103
+ types_map = {t["name"]: t for t in schema.get("types", []) if isinstance(t, dict)}
104
+
105
+ query_type_name = (schema.get("queryType") or {}).get("name", "Query")
106
+ mutation_type_name = (schema.get("mutationType") or {}).get("name", "Mutation")
107
+
108
+ for type_name, method in [(query_type_name, "POST"), (mutation_type_name, "POST")]:
109
+ type_def = types_map.get(type_name)
110
+ if not type_def:
111
+ continue
112
+ for field in type_def.get("fields", []):
113
+ if not isinstance(field, dict):
114
+ continue
115
+ name = field.get("name", "")
116
+ if name.startswith("_"):
117
+ continue
118
+
119
+ params = [
120
+ Parameter(
121
+ name=arg["name"],
122
+ location=ParameterLocation.BODY,
123
+ required=arg.get("type", {}).get("kind") == "NON_NULL",
124
+ description=arg.get("description"),
125
+ )
126
+ for arg in field.get("args", [])
127
+ if isinstance(arg, dict)
128
+ ]
129
+
130
+ op_type = "query" if type_name == query_type_name else "mutation"
131
+ endpoints.append(
132
+ Endpoint(
133
+ path=f"/graphql#{op_type}.{name}",
134
+ method=method,
135
+ description=field.get("description", "") or "",
136
+ parameters=params,
137
+ response_schema=self._type_to_schema(field.get("type", {})),
138
+ )
139
+ )
140
+
141
+ return endpoints
142
+
143
+ def _type_to_schema(self, gql_type: dict[str, Any]) -> dict[str, Any]:
144
+ kind = gql_type.get("kind", "")
145
+ name = gql_type.get("name", "")
146
+
147
+ if kind == "NON_NULL":
148
+ return self._type_to_schema(gql_type.get("ofType", {}))
149
+ if kind == "LIST":
150
+ return {"type": "array", "items": self._type_to_schema(gql_type.get("ofType", {}))}
151
+ if kind == "SCALAR":
152
+ return {"type": _scalar_to_json_type(name)}
153
+ if kind in ("OBJECT", "INTERFACE"):
154
+ return {"type": "object", "title": name}
155
+ if kind == "ENUM":
156
+ return {"type": "string", "title": name}
157
+ return {"type": "object"}
158
+
159
+ def _infer_service_name(self, url: str) -> str:
160
+ from urllib.parse import urlparse
161
+
162
+ parsed = urlparse(url)
163
+ host = parsed.hostname or "unknown"
164
+ parts = host.split(".")
165
+ if len(parts) >= 2:
166
+ return parts[-2].capitalize()
167
+ return host.capitalize()
168
+
169
+
170
+ def _scalar_to_json_type(name: str) -> str:
171
+ mapping = {
172
+ "String": "string",
173
+ "Int": "integer",
174
+ "Float": "number",
175
+ "Boolean": "boolean",
176
+ "ID": "string",
177
+ "DateTime": "string",
178
+ "Date": "string",
179
+ }
180
+ return mapping.get(name, "string")
@@ -0,0 +1,159 @@
1
+ """MCP-based API discovery.
2
+
3
+ If the service publishes an MCP server, tools and resources are already
4
+ structured with types and descriptions — the cheapest and most reliable
5
+ discovery method (Level 1).
6
+
7
+ Requires the `mcp` extra: pip install liquid[mcp]
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from typing import Any
14
+
15
+ from liquid.exceptions import DiscoveryError
16
+ from liquid.models.schema import (
17
+ APISchema,
18
+ AuthRequirement,
19
+ Endpoint,
20
+ Parameter,
21
+ ParameterLocation,
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ _MCP_AVAILABLE = False
27
+ try:
28
+ from mcp import ClientSession # type: ignore[import-untyped]
29
+ from mcp.client.streamable_http import streamable_http_client # type: ignore[import-untyped]
30
+
31
+ _MCP_AVAILABLE = True
32
+ except ImportError:
33
+ pass
34
+
35
+
36
+ class MCPDiscovery:
37
+ """Discovers APIs by connecting to an MCP server.
38
+
39
+ MCP servers publish tools and resources with structured types
40
+ and descriptions. This strategy connects via Streamable HTTP,
41
+ lists available tools/resources, and maps them to APISchema.
42
+
43
+ Falls back gracefully if the `mcp` package is not installed
44
+ or the URL doesn't expose an MCP endpoint.
45
+ """
46
+
47
+ def __init__(self, mcp_path: str = "/mcp") -> None:
48
+ self.mcp_path = mcp_path
49
+
50
+ async def discover(self, url: str) -> APISchema | None:
51
+ if not _MCP_AVAILABLE:
52
+ logger.debug("MCP SDK not installed, skipping MCPDiscovery")
53
+ return None
54
+
55
+ mcp_url = f"{url.rstrip('/')}{self.mcp_path}"
56
+ try:
57
+ return await self._connect_and_discover(mcp_url, url)
58
+ except DiscoveryError:
59
+ raise
60
+ except Exception as e:
61
+ logger.debug("MCP discovery failed for %s: %s", mcp_url, e)
62
+ return None
63
+
64
+ async def _connect_and_discover(self, mcp_url: str, source_url: str) -> APISchema | None:
65
+ async with streamable_http_client(mcp_url) as (read, write), ClientSession(read, write) as session:
66
+ await session.initialize()
67
+
68
+ tools_result = await session.list_tools()
69
+ resources_result = await session.list_resources()
70
+
71
+ tools = tools_result.tools if tools_result else []
72
+ resources = resources_result.resources if resources_result else []
73
+
74
+ if not tools and not resources:
75
+ return None
76
+
77
+ endpoints = self._tools_to_endpoints(tools)
78
+ resource_endpoints = self._resources_to_endpoints(resources)
79
+ endpoints.extend(resource_endpoints)
80
+
81
+ service_name = self._infer_service_name(source_url)
82
+
83
+ return APISchema(
84
+ source_url=source_url,
85
+ service_name=service_name,
86
+ discovery_method="mcp",
87
+ endpoints=endpoints,
88
+ auth=AuthRequirement(type="bearer", tier="A"),
89
+ )
90
+
91
+ def _tools_to_endpoints(self, tools: list[Any]) -> list[Endpoint]:
92
+ endpoints: list[Endpoint] = []
93
+ for tool in tools:
94
+ name = getattr(tool, "name", str(tool))
95
+ description = getattr(tool, "description", "") or ""
96
+ input_schema = getattr(tool, "inputSchema", None) or {}
97
+
98
+ params = self._schema_to_parameters(input_schema)
99
+
100
+ endpoints.append(
101
+ Endpoint(
102
+ path=f"/mcp/tools/{name}",
103
+ method="POST",
104
+ description=description[:500],
105
+ parameters=params,
106
+ response_schema={"type": "object"},
107
+ )
108
+ )
109
+ return endpoints
110
+
111
+ def _resources_to_endpoints(self, resources: list[Any]) -> list[Endpoint]:
112
+ endpoints: list[Endpoint] = []
113
+ for resource in resources:
114
+ uri = str(getattr(resource, "uri", resource))
115
+ name = getattr(resource, "name", uri)
116
+ description = getattr(resource, "description", "") or ""
117
+ mime_type = getattr(resource, "mimeType", "application/json")
118
+
119
+ endpoints.append(
120
+ Endpoint(
121
+ path=f"/mcp/resources/{name}",
122
+ method="GET",
123
+ description=description[:500] or f"Resource: {uri}",
124
+ response_schema={"type": "object", "mimeType": mime_type},
125
+ )
126
+ )
127
+ return endpoints
128
+
129
+ def _schema_to_parameters(self, input_schema: dict[str, Any]) -> list[Parameter]:
130
+ if not isinstance(input_schema, dict):
131
+ return []
132
+
133
+ properties = input_schema.get("properties", {})
134
+ required_fields = set(input_schema.get("required", []))
135
+ params: list[Parameter] = []
136
+
137
+ for prop_name, prop_schema in properties.items():
138
+ if not isinstance(prop_schema, dict):
139
+ continue
140
+ params.append(
141
+ Parameter(
142
+ name=prop_name,
143
+ location=ParameterLocation.BODY,
144
+ required=prop_name in required_fields,
145
+ schema=prop_schema,
146
+ description=prop_schema.get("description"),
147
+ )
148
+ )
149
+ return params
150
+
151
+ def _infer_service_name(self, url: str) -> str:
152
+ from urllib.parse import urlparse
153
+
154
+ parsed = urlparse(url)
155
+ host = parsed.hostname or "unknown"
156
+ parts = host.split(".")
157
+ if len(parts) >= 2:
158
+ return parts[-2].capitalize()
159
+ return host.capitalize()