liquid-api 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liquid/__init__.py +60 -0
- liquid/_defaults.py +58 -0
- liquid/auth/__init__.py +8 -0
- liquid/auth/classifier.py +73 -0
- liquid/auth/manager.py +108 -0
- liquid/client.py +213 -0
- liquid/discovery/__init__.py +18 -0
- liquid/discovery/base.py +53 -0
- liquid/discovery/browser.py +175 -0
- liquid/discovery/diff.py +66 -0
- liquid/discovery/graphql.py +180 -0
- liquid/discovery/mcp.py +159 -0
- liquid/discovery/openapi.py +227 -0
- liquid/discovery/rest_heuristic.py +157 -0
- liquid/events.py +37 -0
- liquid/exceptions.py +51 -0
- liquid/mapping/__init__.py +9 -0
- liquid/mapping/learning.py +62 -0
- liquid/mapping/proposer.py +150 -0
- liquid/mapping/reviewer.py +84 -0
- liquid/models/__init__.py +36 -0
- liquid/models/adapter.py +35 -0
- liquid/models/llm.py +42 -0
- liquid/models/schema.py +84 -0
- liquid/models/sync.py +35 -0
- liquid/protocols.py +29 -0
- liquid/py.typed +0 -0
- liquid/sync/__init__.py +29 -0
- liquid/sync/auto_repair.py +64 -0
- liquid/sync/engine.py +176 -0
- liquid/sync/fetcher.py +92 -0
- liquid/sync/mapper.py +73 -0
- liquid/sync/pagination.py +102 -0
- liquid/sync/retry.py +47 -0
- liquid/sync/selector.py +32 -0
- liquid/sync/transform.py +103 -0
- liquid_api-0.2.0.dist-info/METADATA +177 -0
- liquid_api-0.2.0.dist-info/RECORD +39 -0
- liquid_api-0.2.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
from liquid.exceptions import DiscoveryError
|
|
10
|
+
from liquid.models.schema import (
|
|
11
|
+
APISchema,
|
|
12
|
+
AuthRequirement,
|
|
13
|
+
Endpoint,
|
|
14
|
+
PaginationType,
|
|
15
|
+
Parameter,
|
|
16
|
+
ParameterLocation,
|
|
17
|
+
RateLimits,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
_SPEC_PATHS = [
|
|
23
|
+
"/openapi.json",
|
|
24
|
+
"/openapi.yaml",
|
|
25
|
+
"/swagger.json",
|
|
26
|
+
"/swagger/v1/swagger.json",
|
|
27
|
+
"/api-docs",
|
|
28
|
+
"/api/swagger.json",
|
|
29
|
+
"/.well-known/openapi.yaml",
|
|
30
|
+
"/.well-known/openapi.json",
|
|
31
|
+
"/v3/api-docs",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OpenAPIDiscovery:
|
|
36
|
+
"""Discovers APIs by finding and parsing OpenAPI/Swagger specifications."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, http_client: httpx.AsyncClient | None = None) -> None:
|
|
39
|
+
self._external_client = http_client
|
|
40
|
+
|
|
41
|
+
async def discover(self, url: str) -> APISchema | None:
|
|
42
|
+
async with self._get_client() as client:
|
|
43
|
+
spec = await self._find_spec(client, url)
|
|
44
|
+
if spec is None:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
return self._parse_spec(spec, url)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
raise DiscoveryError(f"Failed to parse OpenAPI spec from {url}: {e}") from e
|
|
51
|
+
|
|
52
|
+
async def _find_spec(self, client: httpx.AsyncClient, base_url: str) -> dict[str, Any] | None:
|
|
53
|
+
base = base_url.rstrip("/")
|
|
54
|
+
for path in _SPEC_PATHS:
|
|
55
|
+
try:
|
|
56
|
+
resp = await client.get(f"{base}{path}", follow_redirects=True, timeout=10.0)
|
|
57
|
+
if resp.is_success:
|
|
58
|
+
content_type = resp.headers.get("content-type", "")
|
|
59
|
+
text = resp.text
|
|
60
|
+
is_yaml = "yaml" in content_type or path.endswith(".yaml")
|
|
61
|
+
spec = yaml.safe_load(text) if is_yaml else resp.json()
|
|
62
|
+
if isinstance(spec, dict) and ("openapi" in spec or "swagger" in spec):
|
|
63
|
+
logger.info("Found OpenAPI spec at %s%s", base, path)
|
|
64
|
+
return spec
|
|
65
|
+
except Exception:
|
|
66
|
+
continue
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def _parse_spec(self, spec: dict[str, Any], source_url: str) -> APISchema:
|
|
70
|
+
version = spec.get("openapi", spec.get("swagger", ""))
|
|
71
|
+
is_v3 = str(version).startswith("3")
|
|
72
|
+
|
|
73
|
+
info = spec.get("info", {})
|
|
74
|
+
service_name = info.get("title", "Unknown")
|
|
75
|
+
|
|
76
|
+
endpoints = self._extract_endpoints(spec, is_v3)
|
|
77
|
+
auth = self._extract_auth(spec, is_v3)
|
|
78
|
+
rate_limits = self._extract_rate_limits(spec)
|
|
79
|
+
|
|
80
|
+
return APISchema(
|
|
81
|
+
source_url=source_url,
|
|
82
|
+
service_name=service_name,
|
|
83
|
+
discovery_method="openapi",
|
|
84
|
+
endpoints=endpoints,
|
|
85
|
+
auth=auth,
|
|
86
|
+
rate_limits=rate_limits,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def _extract_endpoints(self, spec: dict[str, Any], is_v3: bool) -> list[Endpoint]:
|
|
90
|
+
endpoints: list[Endpoint] = []
|
|
91
|
+
paths = spec.get("paths", {})
|
|
92
|
+
|
|
93
|
+
for path, path_item in paths.items():
|
|
94
|
+
if not isinstance(path_item, dict):
|
|
95
|
+
continue
|
|
96
|
+
for method in ("get", "post", "put", "patch", "delete"):
|
|
97
|
+
operation = path_item.get(method)
|
|
98
|
+
if not isinstance(operation, dict):
|
|
99
|
+
continue
|
|
100
|
+
if operation.get("deprecated", False):
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
params = self._extract_parameters(path_item.get("parameters", []) + operation.get("parameters", []))
|
|
104
|
+
response_schema = self._extract_response_schema(operation, is_v3)
|
|
105
|
+
description = operation.get("summary", operation.get("description", ""))
|
|
106
|
+
pagination = self._infer_pagination(params)
|
|
107
|
+
|
|
108
|
+
endpoints.append(
|
|
109
|
+
Endpoint(
|
|
110
|
+
path=path,
|
|
111
|
+
method=method.upper(),
|
|
112
|
+
description=str(description)[:500] if description else "",
|
|
113
|
+
parameters=params,
|
|
114
|
+
response_schema=response_schema,
|
|
115
|
+
pagination=pagination,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return endpoints
|
|
120
|
+
|
|
121
|
+
def _extract_parameters(self, raw_params: list[dict[str, Any]]) -> list[Parameter]:
|
|
122
|
+
params: list[Parameter] = []
|
|
123
|
+
for p in raw_params:
|
|
124
|
+
if not isinstance(p, dict):
|
|
125
|
+
continue
|
|
126
|
+
name = p.get("name", "")
|
|
127
|
+
if not name:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
location_str = p.get("in", "query")
|
|
131
|
+
try:
|
|
132
|
+
location = ParameterLocation(location_str)
|
|
133
|
+
except ValueError:
|
|
134
|
+
location = ParameterLocation.QUERY
|
|
135
|
+
|
|
136
|
+
raw_schema = p.get("schema")
|
|
137
|
+
if raw_schema is None:
|
|
138
|
+
type_str = p.get("type")
|
|
139
|
+
raw_schema = {"type": type_str} if type_str else None
|
|
140
|
+
|
|
141
|
+
params.append(
|
|
142
|
+
Parameter(
|
|
143
|
+
name=name,
|
|
144
|
+
location=location,
|
|
145
|
+
required=bool(p.get("required", False)),
|
|
146
|
+
schema=raw_schema,
|
|
147
|
+
description=p.get("description"),
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
return params
|
|
151
|
+
|
|
152
|
+
def _extract_response_schema(self, operation: dict[str, Any], is_v3: bool) -> dict[str, Any]:
|
|
153
|
+
responses = operation.get("responses", {})
|
|
154
|
+
success_resp = responses.get("200", responses.get("201", {}))
|
|
155
|
+
if not isinstance(success_resp, dict):
|
|
156
|
+
return {}
|
|
157
|
+
|
|
158
|
+
if is_v3:
|
|
159
|
+
content = success_resp.get("content", {})
|
|
160
|
+
json_content = content.get("application/json", {})
|
|
161
|
+
return json_content.get("schema", {})
|
|
162
|
+
else:
|
|
163
|
+
return success_resp.get("schema", {})
|
|
164
|
+
|
|
165
|
+
def _extract_auth(self, spec: dict[str, Any], is_v3: bool) -> AuthRequirement:
|
|
166
|
+
if is_v3:
|
|
167
|
+
components = spec.get("components", {})
|
|
168
|
+
security_schemes = components.get("securitySchemes", {})
|
|
169
|
+
else:
|
|
170
|
+
security_schemes = spec.get("securityDefinitions", {})
|
|
171
|
+
|
|
172
|
+
if not security_schemes:
|
|
173
|
+
return AuthRequirement(type="custom", tier="C")
|
|
174
|
+
|
|
175
|
+
for _name, scheme in security_schemes.items():
|
|
176
|
+
if not isinstance(scheme, dict):
|
|
177
|
+
continue
|
|
178
|
+
scheme_type = scheme.get("type", "").lower()
|
|
179
|
+
|
|
180
|
+
if scheme_type == "oauth2":
|
|
181
|
+
return AuthRequirement(type="oauth2", tier="A")
|
|
182
|
+
if scheme_type == "apikey":
|
|
183
|
+
return AuthRequirement(type="api_key", tier="C")
|
|
184
|
+
if scheme_type == "http":
|
|
185
|
+
bearer_scheme = scheme.get("scheme", "").lower()
|
|
186
|
+
if bearer_scheme == "bearer":
|
|
187
|
+
return AuthRequirement(type="bearer", tier="A")
|
|
188
|
+
if bearer_scheme == "basic":
|
|
189
|
+
return AuthRequirement(type="basic", tier="C")
|
|
190
|
+
|
|
191
|
+
return AuthRequirement(type="custom", tier="C")
|
|
192
|
+
|
|
193
|
+
def _extract_rate_limits(self, spec: dict[str, Any]) -> RateLimits | None:
|
|
194
|
+
extensions = {k: v for k, v in spec.items() if k.startswith("x-")}
|
|
195
|
+
rate_limit = extensions.get("x-rateLimit-limit") or extensions.get("x-rate-limit")
|
|
196
|
+
if rate_limit:
|
|
197
|
+
return RateLimits(requests_per_minute=float(rate_limit) if isinstance(rate_limit, int | float) else None)
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def _infer_pagination(self, params: list[Parameter]) -> PaginationType | None:
|
|
201
|
+
param_names = {p.name.lower() for p in params}
|
|
202
|
+
if "cursor" in param_names or "after" in param_names or "before" in param_names:
|
|
203
|
+
return PaginationType.CURSOR
|
|
204
|
+
if "offset" in param_names:
|
|
205
|
+
return PaginationType.OFFSET
|
|
206
|
+
if "page" in param_names or "page_number" in param_names:
|
|
207
|
+
return PaginationType.PAGE_NUMBER
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def _get_client(self) -> httpx.AsyncClient:
|
|
211
|
+
if self._external_client:
|
|
212
|
+
|
|
213
|
+
class _NoOpContext:
|
|
214
|
+
async def __aenter__(self):
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
async def __aexit__(self, *args):
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
def __getattr__(self, name):
|
|
221
|
+
return getattr(self._client, name)
|
|
222
|
+
|
|
223
|
+
def __init__(self, client):
|
|
224
|
+
self._client = client
|
|
225
|
+
|
|
226
|
+
return _NoOpContext(self._external_client) # type: ignore[return-value]
|
|
227
|
+
return httpx.AsyncClient()
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from liquid.exceptions import DiscoveryError
|
|
9
|
+
from liquid.models.schema import APISchema, AuthRequirement, Endpoint
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from liquid.protocols import LLMBackend
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
_PROBE_PATHS = [
|
|
17
|
+
"/api",
|
|
18
|
+
"/api/v1",
|
|
19
|
+
"/api/v2",
|
|
20
|
+
"/v1",
|
|
21
|
+
"/v2",
|
|
22
|
+
"/docs",
|
|
23
|
+
"/api-docs",
|
|
24
|
+
"/rest",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
_COMMON_RESOURCE_PATHS = [
|
|
28
|
+
"/users",
|
|
29
|
+
"/items",
|
|
30
|
+
"/orders",
|
|
31
|
+
"/products",
|
|
32
|
+
"/accounts",
|
|
33
|
+
"/events",
|
|
34
|
+
"/webhooks",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RESTHeuristicDiscovery:
|
|
39
|
+
"""Discovers REST APIs by probing common patterns and using LLM to interpret."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
llm: LLMBackend,
|
|
44
|
+
http_client: httpx.AsyncClient | None = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
self.llm = llm
|
|
47
|
+
self._external_client = http_client
|
|
48
|
+
|
|
49
|
+
async def discover(self, url: str) -> APISchema | None:
|
|
50
|
+
client = self._external_client or httpx.AsyncClient()
|
|
51
|
+
try:
|
|
52
|
+
found_endpoints = await self._probe_endpoints(client, url)
|
|
53
|
+
if not found_endpoints:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
return await self._interpret_with_llm(url, found_endpoints)
|
|
57
|
+
except DiscoveryError:
|
|
58
|
+
raise
|
|
59
|
+
except Exception as e:
|
|
60
|
+
raise DiscoveryError(f"REST heuristic discovery failed: {e}") from e
|
|
61
|
+
finally:
|
|
62
|
+
if not self._external_client:
|
|
63
|
+
await client.aclose()
|
|
64
|
+
|
|
65
|
+
async def _probe_endpoints(
|
|
66
|
+
self,
|
|
67
|
+
client: httpx.AsyncClient,
|
|
68
|
+
base_url: str,
|
|
69
|
+
) -> list[dict]:
|
|
70
|
+
base = base_url.rstrip("/")
|
|
71
|
+
found: list[dict] = []
|
|
72
|
+
|
|
73
|
+
all_paths = _PROBE_PATHS + [f"/api/v1{p}" for p in _COMMON_RESOURCE_PATHS]
|
|
74
|
+
|
|
75
|
+
for path in all_paths:
|
|
76
|
+
try:
|
|
77
|
+
resp = await client.get(f"{base}{path}", timeout=5.0, follow_redirects=True)
|
|
78
|
+
if resp.is_success:
|
|
79
|
+
content_type = resp.headers.get("content-type", "")
|
|
80
|
+
if "json" in content_type:
|
|
81
|
+
found.append(
|
|
82
|
+
{
|
|
83
|
+
"path": path,
|
|
84
|
+
"status": resp.status_code,
|
|
85
|
+
"content_type": content_type,
|
|
86
|
+
"body_preview": resp.text[:500],
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
except Exception:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
return found
|
|
93
|
+
|
|
94
|
+
async def _interpret_with_llm(self, url: str, probed: list[dict]) -> APISchema:
|
|
95
|
+
from liquid.models.llm import Message
|
|
96
|
+
|
|
97
|
+
probe_summary = "\n".join(f"- {p['path']} ({p['status']}): {p['body_preview'][:200]}" for p in probed)
|
|
98
|
+
|
|
99
|
+
messages = [
|
|
100
|
+
Message(
|
|
101
|
+
role="system",
|
|
102
|
+
content=(
|
|
103
|
+
"You are an API analyst. Given probe results from an unknown REST API, "
|
|
104
|
+
"identify the likely endpoints, HTTP methods, and data structure. "
|
|
105
|
+
"Respond with a JSON object containing: service_name (string), "
|
|
106
|
+
"endpoints (array of {path, method, description}), "
|
|
107
|
+
"auth_type (oauth2|api_key|bearer|basic|custom)."
|
|
108
|
+
),
|
|
109
|
+
),
|
|
110
|
+
Message(
|
|
111
|
+
role="user",
|
|
112
|
+
content=f"Base URL: {url}\n\nProbe results:\n{probe_summary}",
|
|
113
|
+
),
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
response = await self.llm.chat(messages)
|
|
117
|
+
return self._parse_llm_response(response.content or "{}", url, probed)
|
|
118
|
+
|
|
119
|
+
def _parse_llm_response(self, content: str, url: str, probed: list[dict]) -> APISchema:
|
|
120
|
+
import json
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
data = json.loads(content)
|
|
124
|
+
except json.JSONDecodeError:
|
|
125
|
+
data = {}
|
|
126
|
+
|
|
127
|
+
endpoints = []
|
|
128
|
+
for ep in data.get("endpoints", []):
|
|
129
|
+
if isinstance(ep, dict) and "path" in ep:
|
|
130
|
+
endpoints.append(
|
|
131
|
+
Endpoint(
|
|
132
|
+
path=ep["path"],
|
|
133
|
+
method=ep.get("method", "GET").upper(),
|
|
134
|
+
description=ep.get("description", ""),
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if not endpoints:
|
|
139
|
+
endpoints = [
|
|
140
|
+
Endpoint(path=p["path"], method="GET", description=f"Discovered via probe ({p['status']})")
|
|
141
|
+
for p in probed
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
auth_type = data.get("auth_type", "custom")
|
|
145
|
+
valid_auth_types = {"oauth2", "api_key", "bearer", "basic", "custom"}
|
|
146
|
+
if auth_type not in valid_auth_types:
|
|
147
|
+
auth_type = "custom"
|
|
148
|
+
|
|
149
|
+
tier = "A" if auth_type in ("oauth2", "bearer") else "C"
|
|
150
|
+
|
|
151
|
+
return APISchema(
|
|
152
|
+
source_url=url,
|
|
153
|
+
service_name=data.get("service_name", "Unknown"),
|
|
154
|
+
discovery_method="rest_heuristic",
|
|
155
|
+
endpoints=endpoints,
|
|
156
|
+
auth=AuthRequirement(type=auth_type, tier=tier),
|
|
157
|
+
)
|
liquid/events.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from typing import Protocol, runtime_checkable
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
from liquid.models.schema import SchemaDiff # noqa: TC001
|
|
9
|
+
from liquid.models.sync import SyncError, SyncResult # noqa: TC001
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Event(BaseModel):
|
|
13
|
+
timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
14
|
+
adapter_id: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SyncCompleted(Event):
|
|
18
|
+
result: SyncResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SyncFailed(Event):
|
|
22
|
+
error: SyncError
|
|
23
|
+
consecutive_failures: int = 1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ReDiscoveryNeeded(Event):
|
|
27
|
+
reason: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AdapterRepaired(Event):
|
|
31
|
+
diff: SchemaDiff
|
|
32
|
+
auto_approved: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@runtime_checkable
|
|
36
|
+
class EventHandler(Protocol):
|
|
37
|
+
async def handle(self, event: Event) -> None: ...
|
liquid/exceptions.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LiquidError(Exception):
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DiscoveryError(LiquidError):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AuthSetupError(LiquidError):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MappingError(LiquidError):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SyncRuntimeError(LiquidError):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FieldNotFoundError(SyncRuntimeError):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AuthError(SyncRuntimeError):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RateLimitError(SyncRuntimeError):
|
|
33
|
+
def __init__(self, message: str = "Rate limit exceeded", retry_after: float | None = None) -> None:
|
|
34
|
+
super().__init__(message)
|
|
35
|
+
self.retry_after = retry_after
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ServiceDownError(SyncRuntimeError):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EndpointGoneError(SyncRuntimeError):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ReDiscoveryNeededError(LiquidError):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class VaultError(LiquidError):
|
|
51
|
+
pass
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from liquid.models.adapter import FieldMapping
|
|
8
|
+
from liquid.protocols import KnowledgeStore
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MappingLearner:
|
|
14
|
+
"""Records corrections and retrieves known mappings for learning."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, knowledge: KnowledgeStore | None = None) -> None:
|
|
17
|
+
self.knowledge = knowledge
|
|
18
|
+
|
|
19
|
+
async def record_corrections(
|
|
20
|
+
self,
|
|
21
|
+
service: str,
|
|
22
|
+
target_model: str,
|
|
23
|
+
corrections: list[tuple[FieldMapping, FieldMapping]],
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Store corrected mappings for future use."""
|
|
26
|
+
if not self.knowledge or not corrections:
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
corrected_mappings = [corrected for _original, corrected in corrections]
|
|
30
|
+
existing = await self.knowledge.find_mapping(service, target_model)
|
|
31
|
+
|
|
32
|
+
merged = self._merge_mappings(existing, corrected_mappings) if existing else corrected_mappings
|
|
33
|
+
|
|
34
|
+
await self.knowledge.store_mapping(service, target_model, merged)
|
|
35
|
+
logger.info(
|
|
36
|
+
"Stored %d corrected mappings for %s -> %s",
|
|
37
|
+
len(corrected_mappings),
|
|
38
|
+
service,
|
|
39
|
+
target_model,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
async def get_known_mappings(
|
|
43
|
+
self,
|
|
44
|
+
service: str,
|
|
45
|
+
target_model: str,
|
|
46
|
+
) -> list[FieldMapping] | None:
|
|
47
|
+
if not self.knowledge:
|
|
48
|
+
return None
|
|
49
|
+
return await self.knowledge.find_mapping(service, target_model)
|
|
50
|
+
|
|
51
|
+
def _merge_mappings(
|
|
52
|
+
self,
|
|
53
|
+
existing: list[FieldMapping],
|
|
54
|
+
new: list[FieldMapping],
|
|
55
|
+
) -> list[FieldMapping]:
|
|
56
|
+
"""Merge new corrections into existing mappings, preferring corrections."""
|
|
57
|
+
by_target: dict[str, FieldMapping] = {}
|
|
58
|
+
for m in existing:
|
|
59
|
+
by_target[m.target_field] = m
|
|
60
|
+
for m in new:
|
|
61
|
+
by_target[m.target_field] = m
|
|
62
|
+
return list(by_target.values())
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
from liquid.exceptions import MappingError
|
|
8
|
+
from liquid.models.adapter import FieldMapping
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from liquid.models.schema import APISchema
|
|
12
|
+
from liquid.protocols import KnowledgeStore, LLMBackend
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MappingProposer:
|
|
18
|
+
"""Proposes field mappings using KnowledgeStore (if available) then LLM."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, llm: LLMBackend, knowledge: KnowledgeStore | None = None) -> None:
|
|
21
|
+
self.llm = llm
|
|
22
|
+
self.knowledge = knowledge
|
|
23
|
+
|
|
24
|
+
async def propose(
|
|
25
|
+
self,
|
|
26
|
+
schema: APISchema,
|
|
27
|
+
target_model: dict[str, Any],
|
|
28
|
+
existing_mappings: list[FieldMapping] | None = None,
|
|
29
|
+
removed_fields: list[str] | None = None,
|
|
30
|
+
) -> list[FieldMapping]:
|
|
31
|
+
if existing_mappings and removed_fields is not None:
|
|
32
|
+
return await self._selective_repropose(
|
|
33
|
+
schema,
|
|
34
|
+
target_model,
|
|
35
|
+
existing_mappings,
|
|
36
|
+
set(removed_fields),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if self.knowledge:
|
|
40
|
+
known = await self.knowledge.find_mapping(schema.service_name, json.dumps(target_model))
|
|
41
|
+
if known:
|
|
42
|
+
logger.info("Found existing mappings for %s in knowledge store", schema.service_name)
|
|
43
|
+
return known
|
|
44
|
+
|
|
45
|
+
return await self._propose_with_llm(schema, target_model)
|
|
46
|
+
|
|
47
|
+
async def _selective_repropose(
|
|
48
|
+
self,
|
|
49
|
+
schema: APISchema,
|
|
50
|
+
target_model: dict[str, Any],
|
|
51
|
+
existing: list[FieldMapping],
|
|
52
|
+
removed: set[str],
|
|
53
|
+
) -> list[FieldMapping]:
|
|
54
|
+
kept: list[FieldMapping] = []
|
|
55
|
+
broken_targets: list[str] = []
|
|
56
|
+
|
|
57
|
+
for m in existing:
|
|
58
|
+
if m.source_path in removed:
|
|
59
|
+
broken_targets.append(m.target_field)
|
|
60
|
+
logger.info("Mapping %s → %s dropped (field removed)", m.source_path, m.target_field)
|
|
61
|
+
else:
|
|
62
|
+
kept.append(
|
|
63
|
+
FieldMapping(
|
|
64
|
+
source_path=m.source_path,
|
|
65
|
+
target_field=m.target_field,
|
|
66
|
+
transform=m.transform,
|
|
67
|
+
confidence=1.0,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if broken_targets:
|
|
72
|
+
new_proposals = await self._propose_with_llm(schema, target_model)
|
|
73
|
+
for proposal in new_proposals:
|
|
74
|
+
if proposal.target_field in broken_targets and not any(
|
|
75
|
+
k.target_field == proposal.target_field for k in kept
|
|
76
|
+
):
|
|
77
|
+
kept.append(proposal)
|
|
78
|
+
|
|
79
|
+
return kept
|
|
80
|
+
|
|
81
|
+
async def _propose_with_llm(
|
|
82
|
+
self,
|
|
83
|
+
schema: APISchema,
|
|
84
|
+
target_model: dict[str, Any],
|
|
85
|
+
) -> list[FieldMapping]:
|
|
86
|
+
from liquid.models.llm import Message
|
|
87
|
+
|
|
88
|
+
endpoints_desc = "\n".join(f"- {ep.method} {ep.path}: {ep.description}" for ep in schema.endpoints[:20])
|
|
89
|
+
response_schemas = "\n".join(
|
|
90
|
+
f" {ep.path}: {json.dumps(ep.response_schema)[:300]}" for ep in schema.endpoints[:10] if ep.response_schema
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
messages = [
|
|
94
|
+
Message(
|
|
95
|
+
role="system",
|
|
96
|
+
content=(
|
|
97
|
+
"You are a data mapping expert. Given an API schema and a target data model, "
|
|
98
|
+
"propose field mappings. Respond with a JSON array of objects, each with: "
|
|
99
|
+
"source_path (string, dot-notation), target_field (string), "
|
|
100
|
+
"transform (string expression or null), confidence (float 0-1)."
|
|
101
|
+
),
|
|
102
|
+
),
|
|
103
|
+
Message(
|
|
104
|
+
role="user",
|
|
105
|
+
content=(
|
|
106
|
+
f"API: {schema.service_name}\n"
|
|
107
|
+
f"Endpoints:\n{endpoints_desc}\n\n"
|
|
108
|
+
f"Response schemas:\n{response_schemas}\n\n"
|
|
109
|
+
f"Target model:\n{json.dumps(target_model, indent=2)}\n\n"
|
|
110
|
+
"Propose field mappings as a JSON array."
|
|
111
|
+
),
|
|
112
|
+
),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
response = await self.llm.chat(messages)
|
|
117
|
+
return self._parse_mappings(response.content or "[]")
|
|
118
|
+
except Exception as e:
|
|
119
|
+
raise MappingError(f"LLM mapping proposal failed: {e}") from e
|
|
120
|
+
|
|
121
|
+
def _parse_mappings(self, content: str) -> list[FieldMapping]:
|
|
122
|
+
try:
|
|
123
|
+
start = content.find("[")
|
|
124
|
+
end = content.rfind("]") + 1
|
|
125
|
+
if start == -1 or end == 0:
|
|
126
|
+
return []
|
|
127
|
+
raw = json.loads(content[start:end])
|
|
128
|
+
except json.JSONDecodeError:
|
|
129
|
+
logger.warning("Failed to parse LLM mapping response")
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
mappings: list[FieldMapping] = []
|
|
133
|
+
for item in raw:
|
|
134
|
+
if not isinstance(item, dict):
|
|
135
|
+
continue
|
|
136
|
+
if "source_path" not in item or "target_field" not in item:
|
|
137
|
+
continue
|
|
138
|
+
try:
|
|
139
|
+
mappings.append(
|
|
140
|
+
FieldMapping(
|
|
141
|
+
source_path=item["source_path"],
|
|
142
|
+
target_field=item["target_field"],
|
|
143
|
+
transform=item.get("transform"),
|
|
144
|
+
confidence=float(item.get("confidence", 0.5)),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
except (ValueError, TypeError):
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
return mappings
|