patchr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apps/__init__.py +2 -0
- apps/api/__init__.py +2 -0
- apps/api/main.py +652 -0
- apps/benchmarks/__init__.py +1 -0
- apps/benchmarks/main.py +20 -0
- apps/sandbox/__init__.py +1 -0
- apps/sandbox/main.py +20 -0
- apps/worker/__init__.py +2 -0
- apps/worker/main.py +15 -0
- apps/worker/verify.py +14 -0
- patchr/__init__.py +12 -0
- patchr/sdk/__init__.py +20 -0
- patchr/sdk/client.py +12 -0
- patchr-0.1.0.dist-info/METADATA +137 -0
- patchr-0.1.0.dist-info/RECORD +116 -0
- patchr-0.1.0.dist-info/WHEEL +5 -0
- patchr-0.1.0.dist-info/entry_points.txt +5 -0
- patchr-0.1.0.dist-info/licenses/LICENSE +17 -0
- patchr-0.1.0.dist-info/top_level.txt +3 -0
- picux/__init__.py +6 -0
- picux/agents/__init__.py +5 -0
- picux/agents/registry.py +204 -0
- picux/api/__init__.py +5 -0
- picux/api/service.py +5075 -0
- picux/audit/__init__.py +31 -0
- picux/audit/activity.py +97 -0
- picux/audit/observability.py +55 -0
- picux/audit/verification/__init__.py +21 -0
- picux/audit/verification/ledger.py +633 -0
- picux/benchmarks/__init__.py +5 -0
- picux/benchmarks/local.py +286 -0
- picux/config.py +140 -0
- picux/contracts/__init__.py +22 -0
- picux/contracts/handshake.py +122 -0
- picux/contracts/integration.py +385 -0
- picux/contracts/openapi.py +187 -0
- picux/contracts/protocol_map.py +152 -0
- picux/contracts/routes.py +980 -0
- picux/contracts/schema_catalog.py +125 -0
- picux/core/__init__.py +17 -0
- picux/core/models.py +148 -0
- picux/core/router.py +131 -0
- picux/core/runtime.py +42 -0
- picux/core/state_machine.py +38 -0
- picux/domains/__init__.py +2 -0
- picux/domains/bridge/HostRun.py +1104 -0
- picux/domains/bridge/__init__.py +6 -0
- picux/domains/bridge/engine.py +345 -0
- picux/domains/hunt/__init__.py +6 -0
- picux/domains/hunt/engine.py +307 -0
- picux/domains/hunt/models.py +88 -0
- picux/domains/pay/__init__.py +16 -0
- picux/domains/pay/adapters.py +607 -0
- picux/domains/pay/engine.py +950 -0
- picux/domains/pay/models.py +95 -0
- picux/domains/proxy/__init__.py +5 -0
- picux/domains/proxy/engine.py +466 -0
- picux/domains/resolve/__init__.py +5 -0
- picux/domains/resolve/engine.py +546 -0
- picux/orchestrator/__init__.py +3 -0
- picux/orchestrator/engine.py +2840 -0
- picux/portals/__init__.py +17 -0
- picux/portals/templates.py +272 -0
- picux/protocols/__init__.py +1 -0
- picux/protocols/a2a/__init__.py +6 -0
- picux/protocols/a2a/client.py +51 -0
- picux/protocols/a2a/envelope.py +132 -0
- picux/protocols/mcp/__init__.py +7 -0
- picux/protocols/mcp/client.py +69 -0
- picux/protocols/mcp/contract.py +67 -0
- picux/protocols/mcp/server.py +76 -0
- picux/sandbox/__init__.py +6 -0
- picux/sandbox/midnight_arbitrage.py +215 -0
- picux/sandbox/models.py +90 -0
- picux/sdk/__init__.py +13 -0
- picux/sdk/client.py +768 -0
- picux/sdk/external.py +245 -0
- picux/security/__init__.py +18 -0
- picux/security/auth.py +86 -0
- picux/security/config_validator.py +58 -0
- picux/security/policy.py +158 -0
- picux/security/secrets.py +144 -0
- picux/signals/__init__.py +1 -0
- picux/signals/community/__init__.py +24 -0
- picux/signals/community/adapters/__init__.py +7 -0
- picux/signals/community/adapters/reddit.py +37 -0
- picux/signals/community/adapters/shopify.py +23 -0
- picux/signals/community/adapters/web.py +23 -0
- picux/signals/community/disambiguation.py +51 -0
- picux/signals/community/intake.py +227 -0
- picux/signals/community/models.py +102 -0
- picux/signals/community/rules.py +91 -0
- picux/signals/community/scoring.py +64 -0
- picux/storage/__init__.py +41 -0
- picux/storage/agents.py +50 -0
- picux/storage/cases.py +440 -0
- picux/storage/channels.py +476 -0
- picux/storage/connectors.py +411 -0
- picux/storage/envelopes.py +137 -0
- picux/storage/escrows.py +168 -0
- picux/storage/events.py +989 -0
- picux/storage/keyspace.py +60 -0
- picux/storage/mandates.py +107 -0
- picux/storage/portals.py +222 -0
- picux/storage/postgres.py +2049 -0
- picux/storage/providers.py +148 -0
- picux/storage/proxy.py +231 -0
- picux/storage/receipts.py +131 -0
- picux/storage/signals.py +147 -0
- picux/storage/tasks.py +179 -0
- picux/tools/__init__.py +11 -0
- picux/tools/shared.py +2048 -0
- picux/verification/__init__.py +5 -0
- picux/verification/rollout.py +183 -0
- picux/workflows/__init__.py +5 -0
- picux/workflows/templates.py +74 -0
picux/tools/shared.py
ADDED
|
@@ -0,0 +1,2048 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import hashlib
|
|
5
|
+
import html
|
|
6
|
+
import ipaddress
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import tempfile
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
from urllib.parse import quote_plus, urlencode, unquote_to_bytes, urlparse
|
|
17
|
+
from urllib.request import Request, urlopen
|
|
18
|
+
|
|
19
|
+
from picux.core import PicuxIntentRouter
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
SHARED_DOMAINS = {"hunt", "bridge", "resolve", "proxy", "pay"}
|
|
23
|
+
DEFAULT_FX_RATES = {
|
|
24
|
+
"USD": 1.0,
|
|
25
|
+
"SEK": 9.207515,
|
|
26
|
+
"NGN": 1359.86,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class NLPTool:
|
|
32
|
+
"""Classify raw user input and detect follow-up context for domain routing."""
|
|
33
|
+
|
|
34
|
+
router: PicuxIntentRouter = field(default_factory=PicuxIntentRouter)
|
|
35
|
+
|
|
36
|
+
def run(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
37
|
+
query = _query(payload)
|
|
38
|
+
context = payload.get("context", {}) if isinstance(payload.get("context"), dict) else {}
|
|
39
|
+
taskId = str(payload.get("taskId", context.get("taskId", "")) or "")
|
|
40
|
+
decision = self.router.classify(query)
|
|
41
|
+
route = _route(query, seed=decision.domain.value)
|
|
42
|
+
entities = _entities(query, payload)
|
|
43
|
+
isFollowUp = bool(taskId and _looksLikeFollowUp(query))
|
|
44
|
+
return {
|
|
45
|
+
"ok": True,
|
|
46
|
+
"tool": "nlpTool",
|
|
47
|
+
"input": query,
|
|
48
|
+
"intent": {
|
|
49
|
+
"domain": route[0] if route else decision.domain.value,
|
|
50
|
+
"reason": decision.reason,
|
|
51
|
+
"kind": _intentKind(query),
|
|
52
|
+
},
|
|
53
|
+
"route": route,
|
|
54
|
+
"isFollowUp": isFollowUp,
|
|
55
|
+
"followUp": {
|
|
56
|
+
"taskId": taskId,
|
|
57
|
+
"state": str(context.get("state", context.get("status", "")) or ""),
|
|
58
|
+
"reason": "ongoingTaskReference" if isFollowUp else "",
|
|
59
|
+
},
|
|
60
|
+
"entities": entities,
|
|
61
|
+
"confidence": _confidence(query, route, entities),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class ImageReader:
|
|
67
|
+
"""Read image evidence and return query-relevant text signals."""
|
|
68
|
+
|
|
69
|
+
def read(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
70
|
+
query = _query(payload)
|
|
71
|
+
images = _imageInputs(payload)
|
|
72
|
+
readings = []
|
|
73
|
+
evidence = []
|
|
74
|
+
for index, image in enumerate(images, start=1):
|
|
75
|
+
rawBytes = _imageBytes(image)
|
|
76
|
+
text = _imageText(image, rawBytes)
|
|
77
|
+
digest = hashlib.sha256(rawBytes or text.encode("utf-8")).hexdigest() if rawBytes or text else ""
|
|
78
|
+
matches = _matches(query, text)
|
|
79
|
+
imageId = str(image.get("imageId", image.get("artifactId", image.get("name", f"image_{index}"))) or f"image_{index}")
|
|
80
|
+
reading = {
|
|
81
|
+
"imageId": imageId,
|
|
82
|
+
"name": str(image.get("name", imageId) or imageId),
|
|
83
|
+
"mime": str(image.get("mime", image.get("contentType", "")) or ""),
|
|
84
|
+
"sha256": digest,
|
|
85
|
+
"text": _redact(text[:4000]),
|
|
86
|
+
"matches": matches,
|
|
87
|
+
"summary": _summary(text, matches),
|
|
88
|
+
"sourceBound": bool(digest),
|
|
89
|
+
}
|
|
90
|
+
readings.append(reading)
|
|
91
|
+
evidence.append(
|
|
92
|
+
{
|
|
93
|
+
"kind": "imageText",
|
|
94
|
+
"name": reading["name"],
|
|
95
|
+
"sha256": digest,
|
|
96
|
+
"mime": reading["mime"],
|
|
97
|
+
"label": f"image:{reading['name']}",
|
|
98
|
+
"source": "picux://tools/imageReader",
|
|
99
|
+
"summary": reading["summary"],
|
|
100
|
+
}
|
|
101
|
+
)
|
|
102
|
+
return {
|
|
103
|
+
"ok": True,
|
|
104
|
+
"tool": "imageReader",
|
|
105
|
+
"query": query,
|
|
106
|
+
"count": len(readings),
|
|
107
|
+
"readings": readings,
|
|
108
|
+
"evidence": evidence,
|
|
109
|
+
"status": "read" if readings else "noImages",
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass(frozen=True)
|
|
114
|
+
class BrowserReader:
|
|
115
|
+
"""Headless-browser compatible website reader with a safe stdlib fallback."""
|
|
116
|
+
|
|
117
|
+
timeoutSec: int = 6
|
|
118
|
+
renderedTimeoutMs: int = 12000
|
|
119
|
+
renderedCommand: tuple[str, ...] | None = None
|
|
120
|
+
|
|
121
|
+
def read(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
122
|
+
query = _query(payload)
|
|
123
|
+
payloadEntities = payload.get("entities", {}) if isinstance(payload.get("entities"), dict) else {}
|
|
124
|
+
entities = {**payloadEntities, **_entities(query, payload)}
|
|
125
|
+
fxRates = _fxRates(payload)
|
|
126
|
+
budget = _budgetContext(query, payload, fxRates)
|
|
127
|
+
urls = [str(item).strip() for item in payload.get("urls", []) if str(item).strip()] if isinstance(payload.get("urls"), list) else []
|
|
128
|
+
marketplaceSet = _marketplaceSet(query, entities, payload)
|
|
129
|
+
searchTargets = _searchTargets(query, entities, payload, marketplaceSet=marketplaceSet)
|
|
130
|
+
allowNetwork = bool(payload.get("allowNetwork", payload.get("live", False)))
|
|
131
|
+
suppliedTargets = [{"source": "clientUrl", "url": url, "kind": "supplied"} for url in urls]
|
|
132
|
+
liveTargets = searchTargets if allowNetwork else []
|
|
133
|
+
readTargets = _dedupeTargets([*suppliedTargets, *liveTargets])
|
|
134
|
+
allowPrivateNetwork = bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False)))
|
|
135
|
+
preferRendered = allowNetwork and str(payload.get("browserMode", payload.get("adapter", "auto")) or "auto").lower() not in {"http", "stdlib", "stdlibhttpreader"}
|
|
136
|
+
searchText = str(payload.get("searchText", entities.get("item", "") or query) or "").strip()
|
|
137
|
+
targetLimit = _targetLimit(payload)
|
|
138
|
+
selectedTargets = readTargets[:targetLimit]
|
|
139
|
+
blockedTargets = [
|
|
140
|
+
_blockedUrlObservation(
|
|
141
|
+
str(target.get("url", "") or ""),
|
|
142
|
+
query,
|
|
143
|
+
source=str(target.get("source", "source") or "source"),
|
|
144
|
+
error=blockedReason,
|
|
145
|
+
)
|
|
146
|
+
for target in selectedTargets
|
|
147
|
+
for blockedReason in [_privateTargetBlockReason(str(target.get("url", "") or ""), allowPrivateNetwork=allowPrivateNetwork)]
|
|
148
|
+
if blockedReason
|
|
149
|
+
]
|
|
150
|
+
safeTargets = [
|
|
151
|
+
target
|
|
152
|
+
for target in selectedTargets
|
|
153
|
+
if not _privateTargetBlockReason(str(target.get("url", "") or ""), allowPrivateNetwork=allowPrivateNetwork)
|
|
154
|
+
]
|
|
155
|
+
rendered = self._readRendered(safeTargets, query, searchText) if preferRendered and safeTargets else _renderedSkipped("notRequested" if safeTargets else "noTargets")
|
|
156
|
+
observations = [*blockedTargets, *rendered["observations"]]
|
|
157
|
+
adapter = "playwrightChromium" if observations and rendered["meta"].get("available") else "stdlibHttpReader"
|
|
158
|
+
if not rendered["observations"]:
|
|
159
|
+
observations = list(blockedTargets)
|
|
160
|
+
for target in safeTargets:
|
|
161
|
+
observations.append(self._readUrl(str(target.get("url", "")), query, source=str(target.get("source", "source") or "source"), allowPrivateNetwork=allowPrivateNetwork))
|
|
162
|
+
adapter = "stdlibHttpReader"
|
|
163
|
+
offers = _offersFromObservations(observations, query, entities, budget=budget, fxRates=fxRates)
|
|
164
|
+
errors = [item for item in observations if not item.get("ok")]
|
|
165
|
+
if observations and len(errors) == len(observations):
|
|
166
|
+
status = "ioError"
|
|
167
|
+
elif observations:
|
|
168
|
+
status = "read"
|
|
169
|
+
elif searchTargets:
|
|
170
|
+
status = "needsSourceUrls"
|
|
171
|
+
else:
|
|
172
|
+
status = "noSources"
|
|
173
|
+
return {
|
|
174
|
+
"ok": not observations or len(errors) < len(observations),
|
|
175
|
+
"tool": "browserReader",
|
|
176
|
+
"engine": "renderedBrowserCompatible",
|
|
177
|
+
"adapter": adapter,
|
|
178
|
+
"rendered": rendered["meta"],
|
|
179
|
+
"query": query,
|
|
180
|
+
"marketplaceSet": marketplaceSet,
|
|
181
|
+
"budget": budget,
|
|
182
|
+
"fx": {"base": "USD", "rates": fxRates, "mode": "fallbackOrOverride"},
|
|
183
|
+
"searchTargets": searchTargets,
|
|
184
|
+
"observations": observations,
|
|
185
|
+
"offers": offers,
|
|
186
|
+
"errors": errors,
|
|
187
|
+
"marketplaceAttempts": _attemptSummaries(observations),
|
|
188
|
+
"networkAttempted": allowNetwork,
|
|
189
|
+
"status": status,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
def _readUrl(self, url: str, query: str, *, source: str = "source", allowPrivateNetwork: bool = False) -> dict[str, Any]:
|
|
193
|
+
parsed = urlparse(url)
|
|
194
|
+
if parsed.scheme == "data":
|
|
195
|
+
raw = unquote_to_bytes(parsed.path.split(",", 1)[1] if "," in parsed.path else parsed.path)
|
|
196
|
+
text = raw.decode("utf-8", errors="ignore")
|
|
197
|
+
return _pageObservation(url, text, query, "dataUrl", source=source, statusCode=200)
|
|
198
|
+
if parsed.scheme not in {"http", "https"}:
|
|
199
|
+
return {"source": source, "url": url, "ok": False, "adapter": "http", "error": "unsupportedScheme", "snippets": []}
|
|
200
|
+
blockedReason = _privateTargetBlockReason(url, allowPrivateNetwork=allowPrivateNetwork)
|
|
201
|
+
if blockedReason:
|
|
202
|
+
return _blockedUrlObservation(url, query, source=source, error=blockedReason)
|
|
203
|
+
try:
|
|
204
|
+
request = Request(
|
|
205
|
+
url,
|
|
206
|
+
headers={
|
|
207
|
+
"User-Agent": "Mozilla/5.0 (compatible; PicuxBrowserReader/0.1; +https://picux.ai)",
|
|
208
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
209
|
+
"Accept-Language": "en-US,en;q=0.9,sv;q=0.7",
|
|
210
|
+
},
|
|
211
|
+
)
|
|
212
|
+
with urlopen(request, timeout=self.timeoutSec) as response:
|
|
213
|
+
raw = response.read(500_000)
|
|
214
|
+
statusCode = int(getattr(response, "status", 200) or 200)
|
|
215
|
+
text = raw.decode("utf-8", errors="ignore")
|
|
216
|
+
return _pageObservation(url, text, query, "http", source=source, statusCode=statusCode)
|
|
217
|
+
except Exception as exc:
|
|
218
|
+
return {"source": source, "url": url, "ok": False, "adapter": "http", "statusCode": int(getattr(exc, "code", 0) or 0), "error": str(exc)[:180], "snippets": []}
|
|
219
|
+
|
|
220
|
+
def _readRendered(self, targets: list[dict[str, str]], query: str, searchText: str) -> dict[str, Any]:
|
|
221
|
+
command = self._renderedCommand()
|
|
222
|
+
if not command:
|
|
223
|
+
return _renderedSkipped("missingRenderedCommand")
|
|
224
|
+
payload = {
|
|
225
|
+
"query": query,
|
|
226
|
+
"searchText": searchText,
|
|
227
|
+
"targets": targets,
|
|
228
|
+
"timeoutMs": self.renderedTimeoutMs,
|
|
229
|
+
}
|
|
230
|
+
try:
|
|
231
|
+
completed = subprocess.run(
|
|
232
|
+
list(command),
|
|
233
|
+
input=json.dumps(payload, ensure_ascii=True),
|
|
234
|
+
capture_output=True,
|
|
235
|
+
check=False,
|
|
236
|
+
text=True,
|
|
237
|
+
timeout=max(10, int((self.renderedTimeoutMs / 1000) * max(1, len(targets)) + 8)),
|
|
238
|
+
)
|
|
239
|
+
except Exception as exc:
|
|
240
|
+
return _renderedSkipped("renderedCommandFailed", str(exc)[:220])
|
|
241
|
+
if completed.returncode != 0:
|
|
242
|
+
return _renderedSkipped("renderedCommandNonZero", (completed.stderr or completed.stdout)[:220])
|
|
243
|
+
try:
|
|
244
|
+
result = json.loads(completed.stdout or "{}")
|
|
245
|
+
except Exception as exc:
|
|
246
|
+
return _renderedSkipped("renderedCommandInvalidJson", str(exc)[:220])
|
|
247
|
+
meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
|
|
248
|
+
observations = _normalizeRenderedObservations(result.get("observations", []), query)
|
|
249
|
+
renderedMeta = {
|
|
250
|
+
"attempted": bool(meta.get("attempted", True)),
|
|
251
|
+
"available": bool(meta.get("available", bool(observations))),
|
|
252
|
+
"status": str(meta.get("status", "rendered" if observations else "unavailable") or ""),
|
|
253
|
+
"fallbackReason": str(meta.get("fallbackReason", "") or ""),
|
|
254
|
+
"adapter": str(result.get("adapter", "playwrightChromium") or "playwrightChromium"),
|
|
255
|
+
}
|
|
256
|
+
if meta.get("error"):
|
|
257
|
+
renderedMeta["error"] = str(meta.get("error", "") or "")[:220]
|
|
258
|
+
for key in ("browserProvider", "executablePath", "fallbackReasonDetail"):
|
|
259
|
+
if meta.get(key):
|
|
260
|
+
renderedMeta[key] = str(meta.get(key, "") or "")[:600]
|
|
261
|
+
return {
|
|
262
|
+
"observations": observations,
|
|
263
|
+
"meta": renderedMeta,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
def _renderedCommand(self) -> tuple[str, ...]:
|
|
267
|
+
if self.renderedCommand:
|
|
268
|
+
return self.renderedCommand
|
|
269
|
+
script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-browser.mjs"
|
|
270
|
+
return ("node", str(script)) if script.exists() else ()
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@dataclass(frozen=True)
|
|
274
|
+
class BrowserCheckout:
|
|
275
|
+
"""Source-bound browser checkout attempt that fails closed when purchase execution is not proven."""
|
|
276
|
+
|
|
277
|
+
renderedTimeoutMs: int = 20000
|
|
278
|
+
renderedCommand: tuple[str, ...] | None = None
|
|
279
|
+
|
|
280
|
+
def run(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
281
|
+
url = str(payload.get("url", payload.get("sourceUrl", "")) or "").strip()
|
|
282
|
+
candidateId = str(payload.get("candidateId", payload.get("sourceId", "")) or "")
|
|
283
|
+
source = str(payload.get("source", "") or "")
|
|
284
|
+
execute = bool(payload.get("executePurchase", payload.get("allowPurchaseExecution", False)))
|
|
285
|
+
if not url:
|
|
286
|
+
return self._blocked("missingListingUrl", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
287
|
+
parsed = urlparse(url)
|
|
288
|
+
if not execute:
|
|
289
|
+
return self._blocked("purchaseExecutionNotAuthorized", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
290
|
+
if parsed.scheme in {"file", "about", "javascript"}:
|
|
291
|
+
return self._blocked("unsupportedCheckoutUrlScheme", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
292
|
+
blockedReason = _privateTargetBlockReason(url, allowPrivateNetwork=bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False))))
|
|
293
|
+
if blockedReason:
|
|
294
|
+
return self._blocked(blockedReason, url=url, candidateId=candidateId, source=source, execute=execute)
|
|
295
|
+
if parsed.scheme == "data":
|
|
296
|
+
return self._dataCheckout(url, candidateId=candidateId, source=source, execute=execute)
|
|
297
|
+
rendered = self._runRendered(payload, url=url, candidateId=candidateId, source=source, execute=execute)
|
|
298
|
+
if rendered:
|
|
299
|
+
return rendered
|
|
300
|
+
return self._blocked("marketplaceCheckoutAdapterMissing", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
301
|
+
|
|
302
|
+
def _dataCheckout(self, url: str, *, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
|
|
303
|
+
try:
|
|
304
|
+
raw = unquote_to_bytes(urlparse(url).path.split(",", 1)[1] if "," in urlparse(url).path else urlparse(url).path)
|
|
305
|
+
text = raw.decode("utf-8", errors="ignore")
|
|
306
|
+
except Exception as exc:
|
|
307
|
+
return self._blocked(f"dataCheckoutReadFailed:{str(exc)[:80]}", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
308
|
+
lowered = text.lower()
|
|
309
|
+
if "data-picux-checkout=\"success\"" in lowered or "data-picux-checkout='success'" in lowered or "purchase complete" in lowered or "order confirmed" in lowered:
|
|
310
|
+
receiptId = _stableToolId("purchase", {"url": url, "candidateId": candidateId, "source": source})
|
|
311
|
+
return {
|
|
312
|
+
"ok": True,
|
|
313
|
+
"tool": "browserCheckout",
|
|
314
|
+
"status": "purchased",
|
|
315
|
+
"reason": "",
|
|
316
|
+
"message": "Item successfully purchased",
|
|
317
|
+
"candidateId": candidateId,
|
|
318
|
+
"source": source,
|
|
319
|
+
"url": url,
|
|
320
|
+
"executePurchase": execute,
|
|
321
|
+
"receipt": {"receiptId": receiptId, "status": "confirmed", "source": source, "candidateId": candidateId, "url": url},
|
|
322
|
+
}
|
|
323
|
+
if "out of stock" in lowered or "sold out" in lowered:
|
|
324
|
+
return self._blocked("itemUnavailable", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
325
|
+
if "payment declined" in lowered or "payment failed" in lowered:
|
|
326
|
+
return self._blocked("paymentDeclined", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
327
|
+
if "mfa required" in lowered or "verification code" in lowered or "one-time code" in lowered:
|
|
328
|
+
blocked = self._blocked("mfaRequired", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
329
|
+
return {**blocked, "status": "needsInput", "needsInput": [{"key": "mfaCode", "label": "MFA code", "secret": True}]}
|
|
330
|
+
if "login required" in lowered or "sign in" in lowered or "account required" in lowered:
|
|
331
|
+
blocked = self._blocked("loginOrAccountRequired", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
332
|
+
return {**blocked, "status": "needsInput", "needsInput": [{"key": "portalSessionId", "label": "Authenticated marketplace session", "secret": False}]}
|
|
333
|
+
return self._blocked("purchaseConfirmationNotFound", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
334
|
+
|
|
335
|
+
def _runRendered(self, payload: dict[str, Any], *, url: str, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
|
|
336
|
+
command = self._renderedCommand()
|
|
337
|
+
if not command:
|
|
338
|
+
return {}
|
|
339
|
+
try:
|
|
340
|
+
completed = subprocess.run(
|
|
341
|
+
list(command),
|
|
342
|
+
input=json.dumps({**payload, "url": url, "candidateId": candidateId, "source": source, "executePurchase": execute, "timeoutMs": self.renderedTimeoutMs}, ensure_ascii=True),
|
|
343
|
+
capture_output=True,
|
|
344
|
+
check=False,
|
|
345
|
+
text=True,
|
|
346
|
+
timeout=max(12, int(self.renderedTimeoutMs / 1000) + 10),
|
|
347
|
+
)
|
|
348
|
+
except Exception as exc:
|
|
349
|
+
return self._blocked(f"browserCheckoutCommandFailed:{str(exc)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
350
|
+
if completed.returncode != 0:
|
|
351
|
+
return self._blocked(f"browserCheckoutCommandNonZero:{(completed.stderr or completed.stdout)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
352
|
+
try:
|
|
353
|
+
result = json.loads(completed.stdout or "{}")
|
|
354
|
+
except Exception as exc:
|
|
355
|
+
return self._blocked(f"browserCheckoutInvalidJson:{str(exc)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
356
|
+
if not isinstance(result, dict):
|
|
357
|
+
return self._blocked("browserCheckoutInvalidResult", url=url, candidateId=candidateId, source=source, execute=execute)
|
|
358
|
+
return {
|
|
359
|
+
"tool": "browserCheckout",
|
|
360
|
+
"candidateId": candidateId,
|
|
361
|
+
"source": source,
|
|
362
|
+
"url": url,
|
|
363
|
+
"executePurchase": execute,
|
|
364
|
+
**result,
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
def _renderedCommand(self) -> tuple[str, ...]:
|
|
368
|
+
if self.renderedCommand:
|
|
369
|
+
return self.renderedCommand
|
|
370
|
+
script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-checkout.mjs"
|
|
371
|
+
return ("node", str(script)) if script.exists() else ()
|
|
372
|
+
|
|
373
|
+
@staticmethod
|
|
374
|
+
def _blocked(reason: str, *, url: str, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
|
|
375
|
+
return {
|
|
376
|
+
"ok": False,
|
|
377
|
+
"tool": "browserCheckout",
|
|
378
|
+
"status": "blocked",
|
|
379
|
+
"reason": reason,
|
|
380
|
+
"message": f"Sorry, I am unable to make the purchase due to {reason}.",
|
|
381
|
+
"candidateId": candidateId,
|
|
382
|
+
"source": source,
|
|
383
|
+
"url": url,
|
|
384
|
+
"executePurchase": execute,
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
@dataclass(frozen=True)
|
|
389
|
+
class PortalBrowser:
|
|
390
|
+
"""Authenticated portal action runner that fails closed and returns proof-ready artifacts."""
|
|
391
|
+
|
|
392
|
+
timeoutSec: int = 8
|
|
393
|
+
renderedTimeoutMs: int = 20000
|
|
394
|
+
renderedCommand: tuple[str, ...] | None = None
|
|
395
|
+
|
|
396
|
+
def run(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
397
|
+
targetUrl = str(payload.get("targetUrl", payload.get("url", "")) or "").strip()
|
|
398
|
+
actionId = str(payload.get("portalActionId", payload.get("actionId", "")) or "")
|
|
399
|
+
session = payload.get("session", {}) if isinstance(payload.get("session"), dict) else {}
|
|
400
|
+
credentialRef = str(payload.get("credentialRef", session.get("credentialRef", "")) or "")
|
|
401
|
+
storageStateRef = str(payload.get("storageStateRef", session.get("storageStateRef", "")) or "")
|
|
402
|
+
steps = _portalStepResults(payload.get("steps", []))
|
|
403
|
+
inputs = payload.get("inputs", {}) if isinstance(payload.get("inputs"), dict) else {}
|
|
404
|
+
if not targetUrl:
|
|
405
|
+
return {**self._blocked("missingTargetUrl", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="missingTargetUrl")}
|
|
406
|
+
scheme = urlparse(targetUrl).scheme
|
|
407
|
+
if scheme in {"file", "about", "javascript"}:
|
|
408
|
+
return {**self._blocked("unsupportedPortalTargetScheme", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="unsupportedPortalTargetScheme")}
|
|
409
|
+
blockedReason = _privateTargetBlockReason(targetUrl, allowPrivateNetwork=bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False))))
|
|
410
|
+
if blockedReason:
|
|
411
|
+
return {**self._blocked(blockedReason, actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason=blockedReason)}
|
|
412
|
+
if not credentialRef and not storageStateRef and not targetUrl.startswith("data:"):
|
|
413
|
+
return {
|
|
414
|
+
**self._blocked("authRequired", actionId=actionId, targetUrl=targetUrl),
|
|
415
|
+
"status": "needsInput",
|
|
416
|
+
"needsInput": [{"key": "credentialRef", "label": "Credential reference", "secret": True}],
|
|
417
|
+
"steps": _portalStepsWithStatus(steps, "pending", reason="authRequired"),
|
|
418
|
+
}
|
|
419
|
+
missingInputs = _missingPortalInputs(steps, inputs, requiredKeys=_portalRequiredInputs(payload, steps))
|
|
420
|
+
if missingInputs:
|
|
421
|
+
return {
|
|
422
|
+
**self._blocked("missingPortalInputs", actionId=actionId, targetUrl=targetUrl),
|
|
423
|
+
"status": "needsInput",
|
|
424
|
+
"needsInput": missingInputs,
|
|
425
|
+
"steps": _portalStepsWithMissingInputs(steps, missingInputs),
|
|
426
|
+
}
|
|
427
|
+
allowNetwork = bool(payload.get("allowNetwork", payload.get("live", payload.get("executePortal", False))))
|
|
428
|
+
rendered = self._runRendered(payload, targetUrl=targetUrl, actionId=actionId, steps=steps, allowNetwork=allowNetwork)
|
|
429
|
+
if rendered:
|
|
430
|
+
return rendered
|
|
431
|
+
text = self._readTarget(targetUrl, allowNetwork=allowNetwork)
|
|
432
|
+
lowered = text.lower()
|
|
433
|
+
if any(token in lowered for token in ("mfa required", "2fa", "verification code", "one-time code")):
|
|
434
|
+
return {
|
|
435
|
+
**self._blocked("mfaRequired", actionId=actionId, targetUrl=targetUrl),
|
|
436
|
+
"status": "needsProxy",
|
|
437
|
+
"requiresProxy": True,
|
|
438
|
+
"needsInput": [{"key": "mfaCode", "label": "Verification code", "secret": True}],
|
|
439
|
+
"steps": _portalStepsWithStatus(steps, "pending", reason="mfaRequired"),
|
|
440
|
+
}
|
|
441
|
+
if any(token in lowered for token in ("login required", "sign in required", "authentication required")):
|
|
442
|
+
return {
|
|
443
|
+
**self._blocked("loginRequired", actionId=actionId, targetUrl=targetUrl),
|
|
444
|
+
"status": "needsInput",
|
|
445
|
+
"needsInput": [{"key": "credentialRef", "label": "Credential reference", "secret": True}],
|
|
446
|
+
"steps": _portalStepsWithStatus(steps, "pending", reason="loginRequired"),
|
|
447
|
+
}
|
|
448
|
+
if "data-picux-portal=\"success\"" in lowered or "data-picux-portal='success'" in lowered or "claim submitted" in lowered or "form submitted" in lowered or "record retrieved" in lowered:
|
|
449
|
+
proofId = _stableToolId("portalProof", {"actionId": actionId, "targetUrl": targetUrl, "text": text[:500]})
|
|
450
|
+
completedSteps = _portalStepsWithStatus(steps, "succeeded", reason="")
|
|
451
|
+
outcome = _portalOutcome(text)
|
|
452
|
+
return {
|
|
453
|
+
"ok": True,
|
|
454
|
+
"tool": "portalBrowser",
|
|
455
|
+
"status": "succeeded",
|
|
456
|
+
"message": "Portal action completed.",
|
|
457
|
+
"portalActionId": actionId,
|
|
458
|
+
"targetUrl": targetUrl,
|
|
459
|
+
"steps": completedSteps,
|
|
460
|
+
"proof": {
|
|
461
|
+
"proofId": proofId,
|
|
462
|
+
"kind": "domSnapshot",
|
|
463
|
+
"status": "captured",
|
|
464
|
+
"label": "Portal completion snapshot",
|
|
465
|
+
"artifactRef": proofId,
|
|
466
|
+
"sourceUrl": targetUrl,
|
|
467
|
+
"hash": "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest(),
|
|
468
|
+
"meta": {"snippet": _redact(_stripTags(text)[:700]), "outcome": outcome},
|
|
469
|
+
},
|
|
470
|
+
"result": {**outcome, "sourceBound": True, "stepSummary": _portalStepSummary(completedSteps)},
|
|
471
|
+
}
|
|
472
|
+
return {**self._blocked("completionProofNotFound", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "attempted", reason="completionProofNotFound")}
|
|
473
|
+
|
|
474
|
+
def _runRendered(self, payload: dict[str, Any], *, targetUrl: str, actionId: str, steps: list[dict[str, Any]], allowNetwork: bool) -> dict[str, Any]:
|
|
475
|
+
parsed = urlparse(targetUrl)
|
|
476
|
+
if parsed.scheme not in {"http", "https"} or not allowNetwork:
|
|
477
|
+
return {}
|
|
478
|
+
command = self._renderedCommand()
|
|
479
|
+
if not command:
|
|
480
|
+
return {}
|
|
481
|
+
try:
|
|
482
|
+
completed = subprocess.run(
|
|
483
|
+
list(command),
|
|
484
|
+
input=json.dumps({**payload, "targetUrl": targetUrl, "portalActionId": actionId, "steps": steps, "timeoutMs": self.renderedTimeoutMs}, ensure_ascii=True),
|
|
485
|
+
capture_output=True,
|
|
486
|
+
check=False,
|
|
487
|
+
text=True,
|
|
488
|
+
timeout=max(12, int(self.renderedTimeoutMs / 1000) + 10),
|
|
489
|
+
)
|
|
490
|
+
except Exception as exc:
|
|
491
|
+
return {**self._blocked(f"renderedPortalCommandFailed:{str(exc)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalCommandFailed")}
|
|
492
|
+
if completed.returncode != 0:
|
|
493
|
+
return {**self._blocked(f"renderedPortalCommandNonZero:{(completed.stderr or completed.stdout)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalCommandNonZero")}
|
|
494
|
+
try:
|
|
495
|
+
result = json.loads(completed.stdout or "{}")
|
|
496
|
+
except Exception as exc:
|
|
497
|
+
return {**self._blocked(f"renderedPortalInvalidJson:{str(exc)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalInvalidJson")}
|
|
498
|
+
if not isinstance(result, dict):
|
|
499
|
+
return {**self._blocked("renderedPortalInvalidResult", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalInvalidResult")}
|
|
500
|
+
return {"tool": "portalBrowser", "portalActionId": actionId, "targetUrl": targetUrl, **result}
|
|
501
|
+
|
|
502
|
+
def _renderedCommand(self) -> tuple[str, ...]:
|
|
503
|
+
if self.renderedCommand:
|
|
504
|
+
return self.renderedCommand
|
|
505
|
+
script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-portal.mjs"
|
|
506
|
+
return ("node", str(script)) if script.exists() else ()
|
|
507
|
+
|
|
508
|
+
def _readTarget(self, targetUrl: str, *, allowNetwork: bool) -> str:
|
|
509
|
+
parsed = urlparse(targetUrl)
|
|
510
|
+
if parsed.scheme == "data":
|
|
511
|
+
raw = unquote_to_bytes(parsed.path.split(",", 1)[1] if "," in parsed.path else parsed.path)
|
|
512
|
+
return raw.decode("utf-8", errors="ignore")
|
|
513
|
+
if parsed.scheme in {"http", "https"} and allowNetwork:
|
|
514
|
+
req = Request(targetUrl, headers={"User-Agent": "PicuxPortalBrowser/0.1", "Accept": "text/html,application/xhtml+xml"})
|
|
515
|
+
with urlopen(req, timeout=self.timeoutSec) as response:
|
|
516
|
+
return response.read(500_000).decode("utf-8", errors="ignore")
|
|
517
|
+
return ""
|
|
518
|
+
|
|
519
|
+
@staticmethod
|
|
520
|
+
def _blocked(reason: str, *, actionId: str, targetUrl: str) -> dict[str, Any]:
|
|
521
|
+
return {
|
|
522
|
+
"ok": False,
|
|
523
|
+
"tool": "portalBrowser",
|
|
524
|
+
"status": "blocked",
|
|
525
|
+
"reason": reason,
|
|
526
|
+
"message": f"Portal action blocked: {reason}.",
|
|
527
|
+
"portalActionId": actionId,
|
|
528
|
+
"targetUrl": targetUrl,
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@dataclass(frozen=True)
|
|
533
|
+
class MapTool:
|
|
534
|
+
"""Source-bound local place search for HUNT/RESOLVE without fabricating places."""
|
|
535
|
+
|
|
536
|
+
timeoutSec: int = 6
|
|
537
|
+
fetcher: Any | None = None
|
|
538
|
+
|
|
539
|
+
def search(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
540
|
+
query = _query(payload)
|
|
541
|
+
entity = _serviceEntity(query, payload)
|
|
542
|
+
location = _placeLocation(query, payload)
|
|
543
|
+
radiusKm = _radiusKm(query, payload)
|
|
544
|
+
allowNetwork = bool(payload.get("allowNetwork", payload.get("live", False)))
|
|
545
|
+
supplied = payload.get("places", payload.get("results", []))
|
|
546
|
+
suppliedPlaces = supplied if isinstance(supplied, list) else []
|
|
547
|
+
if not entity or not location:
|
|
548
|
+
return self._result(
|
|
549
|
+
query=query,
|
|
550
|
+
provider="mapTool",
|
|
551
|
+
entity=entity,
|
|
552
|
+
location=location,
|
|
553
|
+
radiusKm=radiusKm,
|
|
554
|
+
places=[],
|
|
555
|
+
status="needsPlaceQuery",
|
|
556
|
+
allowNetwork=allowNetwork,
|
|
557
|
+
errors=["missing:entity" if not entity else "missing:location"],
|
|
558
|
+
)
|
|
559
|
+
if suppliedPlaces:
|
|
560
|
+
places = self._normalizeSuppliedPlaces(suppliedPlaces, entity=entity, location=location, radiusKm=radiusKm)
|
|
561
|
+
return self._result(query=query, provider="clientPlaces", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=allowNetwork)
|
|
562
|
+
|
|
563
|
+
googleKey = str(
|
|
564
|
+
payload.get("googlePlacesApiKey")
|
|
565
|
+
or payload.get("googleMapsApiKey")
|
|
566
|
+
or os.environ.get("PICUX_GOOGLE_PLACES_API_KEY", "")
|
|
567
|
+
or os.environ.get("PICUX_GOOGLE_MAPS_API_KEY", "")
|
|
568
|
+
or os.environ.get("GOOGLE_PLACES_API_KEY", "")
|
|
569
|
+
or os.environ.get("GOOGLE_MAPS_API_KEY", "")
|
|
570
|
+
or ""
|
|
571
|
+
).strip()
|
|
572
|
+
if googleKey:
|
|
573
|
+
try:
|
|
574
|
+
baseUrl = str(
|
|
575
|
+
os.environ.get("PICUX_GOOGLE_PLACES_BASE_URL", "")
|
|
576
|
+
or "https://maps.googleapis.com/maps/api/place"
|
|
577
|
+
).rstrip("/")
|
|
578
|
+
places = self._googlePlaces(query=query, entity=entity, location=location, radiusKm=radiusKm, apiKey=googleKey, baseUrl=baseUrl)
|
|
579
|
+
return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=True)
|
|
580
|
+
except Exception as exc:
|
|
581
|
+
return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=[], status="ioError", allowNetwork=True, errors=[str(exc)[:220]])
|
|
582
|
+
|
|
583
|
+
if not allowNetwork:
|
|
584
|
+
return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=[], status="needsPlacesApiKeyOrNetwork", allowNetwork=False, errors=["missing:GOOGLE_PLACES_API_KEY"])
|
|
585
|
+
try:
|
|
586
|
+
places = self._openStreetMap(query=query, entity=entity, location=location, radiusKm=radiusKm)
|
|
587
|
+
return self._result(query=query, provider="openStreetMapNominatim", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=True)
|
|
588
|
+
except Exception as exc:
|
|
589
|
+
return self._result(query=query, provider="openStreetMapNominatim", entity=entity, location=location, radiusKm=radiusKm, places=[], status="ioError", allowNetwork=True, errors=[str(exc)[:220]])
|
|
590
|
+
|
|
591
|
+
def _googlePlaces(self, *, query: str, entity: str, location: str, radiusKm: float, apiKey: str, baseUrl: str) -> list[dict[str, Any]]:
|
|
592
|
+
searchText = _placeSearchText(entity, location)
|
|
593
|
+
url = f"{baseUrl}/textsearch/json?" + urlencode(
|
|
594
|
+
{
|
|
595
|
+
"query": searchText,
|
|
596
|
+
"radius": int(max(1.0, radiusKm) * 1000),
|
|
597
|
+
"key": apiKey,
|
|
598
|
+
}
|
|
599
|
+
)
|
|
600
|
+
data = self._fetchJson(url, headers={"Accept": "application/json"})
|
|
601
|
+
raw = data.get("results", []) if isinstance(data, dict) else []
|
|
602
|
+
places = []
|
|
603
|
+
for item in raw[:10]:
|
|
604
|
+
if not isinstance(item, dict):
|
|
605
|
+
continue
|
|
606
|
+
geo = item.get("geometry", {}) if isinstance(item.get("geometry"), dict) else {}
|
|
607
|
+
loc = geo.get("location", {}) if isinstance(geo.get("location"), dict) else {}
|
|
608
|
+
name = str(item.get("name", "") or "").strip()
|
|
609
|
+
address = str(item.get("formatted_address", item.get("vicinity", "")) or "").strip()
|
|
610
|
+
placeId = str(item.get("place_id", "") or "").strip()
|
|
611
|
+
details = self._googlePlaceDetails(placeId=placeId, apiKey=apiKey, baseUrl=baseUrl) if placeId else {}
|
|
612
|
+
sourceUrl = "https://www.google.com/maps/search/?" + urlencode({"api": "1", "query": f"{name} {address}".strip() or searchText, "query_place_id": placeId})
|
|
613
|
+
places.append(
|
|
614
|
+
_cleanPlace(
|
|
615
|
+
{
|
|
616
|
+
"placeId": placeId or _stableToolId("place", {"provider": "googlePlaces", "name": name, "address": address}),
|
|
617
|
+
"name": name or str(details.get("name", "") or "").strip(),
|
|
618
|
+
"category": _first(item.get("types", [])),
|
|
619
|
+
"address": address or str(details.get("formatted_address", "") or "").strip(),
|
|
620
|
+
"phone": details.get("international_phone_number") or details.get("formatted_phone_number") or "",
|
|
621
|
+
"website": details.get("website") or "",
|
|
622
|
+
"rating": item.get("rating", 0),
|
|
623
|
+
"lat": loc.get("lat", 0),
|
|
624
|
+
"lng": loc.get("lng", 0),
|
|
625
|
+
"source": "googlePlaces",
|
|
626
|
+
"sourceUrl": sourceUrl,
|
|
627
|
+
"sourceBound": bool(placeId or sourceUrl),
|
|
628
|
+
"entity": entity,
|
|
629
|
+
}
|
|
630
|
+
)
|
|
631
|
+
)
|
|
632
|
+
return [place for place in places if place.get("name")]
|
|
633
|
+
|
|
634
|
+
def _googlePlaceDetails(self, *, placeId: str, apiKey: str, baseUrl: str) -> dict[str, Any]:
|
|
635
|
+
if not placeId:
|
|
636
|
+
return {}
|
|
637
|
+
fields = "name,formatted_phone_number,international_phone_number,formatted_address,address_components,website"
|
|
638
|
+
url = f"{baseUrl}/details/json?" + urlencode({"place_id": placeId, "fields": fields, "key": apiKey})
|
|
639
|
+
data = self._fetchJson(url, headers={"Accept": "application/json"})
|
|
640
|
+
result = data.get("result", {}) if isinstance(data, dict) else {}
|
|
641
|
+
return result if isinstance(result, dict) else {}
|
|
642
|
+
|
|
643
|
+
def _openStreetMap(self, *, query: str, entity: str, location: str, radiusKm: float) -> list[dict[str, Any]]:
|
|
644
|
+
searchText = _placeSearchText(entity, location)
|
|
645
|
+
url = "https://nominatim.openstreetmap.org/search?" + urlencode(
|
|
646
|
+
{
|
|
647
|
+
"format": "jsonv2",
|
|
648
|
+
"q": searchText,
|
|
649
|
+
"limit": 10,
|
|
650
|
+
"addressdetails": 1,
|
|
651
|
+
}
|
|
652
|
+
)
|
|
653
|
+
data = self._fetchJson(
|
|
654
|
+
url,
|
|
655
|
+
headers={
|
|
656
|
+
"Accept": "application/json",
|
|
657
|
+
"User-Agent": "PicuxMapTool/0.1 (source-bound local search; https://picux.ai)",
|
|
658
|
+
},
|
|
659
|
+
)
|
|
660
|
+
raw = data if isinstance(data, list) else []
|
|
661
|
+
places = []
|
|
662
|
+
for item in raw[:10]:
|
|
663
|
+
if not isinstance(item, dict):
|
|
664
|
+
continue
|
|
665
|
+
osmType = str(item.get("osm_type", "") or "").lower()
|
|
666
|
+
osmId = str(item.get("osm_id", "") or "")
|
|
667
|
+
placeId = str(item.get("place_id", "") or osmId or "")
|
|
668
|
+
name = str(item.get("name", "") or item.get("display_name", "") or "").strip()
|
|
669
|
+
address = str(item.get("display_name", "") or "").strip()
|
|
670
|
+
if osmType in {"node", "way", "relation"} and osmId:
|
|
671
|
+
sourceUrl = f"https://www.openstreetmap.org/{osmType}/{osmId}"
|
|
672
|
+
else:
|
|
673
|
+
sourceUrl = "https://www.openstreetmap.org/search?" + urlencode({"query": searchText})
|
|
674
|
+
places.append(
|
|
675
|
+
_cleanPlace(
|
|
676
|
+
{
|
|
677
|
+
"placeId": placeId or _stableToolId("place", {"provider": "openStreetMapNominatim", "name": name, "address": address}),
|
|
678
|
+
"name": name,
|
|
679
|
+
"category": str(item.get("type", item.get("class", "")) or ""),
|
|
680
|
+
"address": address,
|
|
681
|
+
"lat": item.get("lat", 0),
|
|
682
|
+
"lng": item.get("lon", 0),
|
|
683
|
+
"source": "openStreetMapNominatim",
|
|
684
|
+
"sourceUrl": sourceUrl,
|
|
685
|
+
"sourceBound": bool(placeId or sourceUrl),
|
|
686
|
+
"entity": entity,
|
|
687
|
+
}
|
|
688
|
+
)
|
|
689
|
+
)
|
|
690
|
+
return [place for place in places if place.get("name")]
|
|
691
|
+
|
|
692
|
+
def _normalizeSuppliedPlaces(self, raw: list[Any], *, entity: str, location: str, radiusKm: float) -> list[dict[str, Any]]:
|
|
693
|
+
places = []
|
|
694
|
+
for index, item in enumerate(raw[:20], start=1):
|
|
695
|
+
if not isinstance(item, dict):
|
|
696
|
+
continue
|
|
697
|
+
place = _cleanPlace({**item, "entity": str(item.get("entity", entity) or entity), "source": str(item.get("source", "clientPlaces") or "clientPlaces")})
|
|
698
|
+
if not place.get("placeId"):
|
|
699
|
+
place["placeId"] = _stableToolId("place", {"index": index, "name": place.get("name", ""), "address": place.get("address", ""), "sourceUrl": place.get("sourceUrl", "")})
|
|
700
|
+
place["sourceBound"] = bool(place.get("sourceUrl") or place.get("placeId"))
|
|
701
|
+
places.append(place)
|
|
702
|
+
return [place for place in places if place.get("name")]
|
|
703
|
+
|
|
704
|
+
def _fetchJson(self, url: str, *, headers: dict[str, str]) -> Any:
|
|
705
|
+
if self.fetcher:
|
|
706
|
+
return self.fetcher(url, headers)
|
|
707
|
+
request = Request(url, headers=headers)
|
|
708
|
+
with urlopen(request, timeout=self.timeoutSec) as response:
|
|
709
|
+
raw = response.read(500_000)
|
|
710
|
+
return json.loads(raw.decode("utf-8", errors="ignore") or "null")
|
|
711
|
+
|
|
712
|
+
@staticmethod
|
|
713
|
+
def _result(*, query: str, provider: str, entity: str, location: str, radiusKm: float, places: list[dict[str, Any]], status: str, allowNetwork: bool, errors: list[str] | None = None) -> dict[str, Any]:
|
|
714
|
+
return {
|
|
715
|
+
"ok": bool(places) or status in {"searchedNoPlaces", "needsPlacesApiKeyOrNetwork"},
|
|
716
|
+
"tool": "mapTool",
|
|
717
|
+
"provider": provider,
|
|
718
|
+
"status": status,
|
|
719
|
+
"query": {
|
|
720
|
+
"text": query,
|
|
721
|
+
"entity": entity,
|
|
722
|
+
"location": location,
|
|
723
|
+
"radiusKm": radiusKm,
|
|
724
|
+
"radiusMeters": int(max(1.0, radiusKm) * 1000),
|
|
725
|
+
},
|
|
726
|
+
"places": places,
|
|
727
|
+
"errors": errors or [],
|
|
728
|
+
"networkAttempted": allowNetwork,
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
@dataclass(frozen=True)
|
|
733
|
+
class SharedToolbox:
|
|
734
|
+
nlpTool: NLPTool = field(default_factory=NLPTool)
|
|
735
|
+
imageReader: ImageReader = field(default_factory=ImageReader)
|
|
736
|
+
browserReader: BrowserReader = field(default_factory=BrowserReader)
|
|
737
|
+
browserCheckout: BrowserCheckout = field(default_factory=BrowserCheckout)
|
|
738
|
+
portalBrowser: PortalBrowser = field(default_factory=PortalBrowser)
|
|
739
|
+
mapTool: MapTool = field(default_factory=MapTool)
|
|
740
|
+
|
|
741
|
+
def nlp(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
742
|
+
return self.nlpTool.run(payload)
|
|
743
|
+
|
|
744
|
+
def readImage(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
745
|
+
return self.imageReader.read(payload)
|
|
746
|
+
|
|
747
|
+
def readBrowser(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
748
|
+
return self.browserReader.read(payload)
|
|
749
|
+
|
|
750
|
+
def checkoutBrowser(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
751
|
+
return self.browserCheckout.run(payload)
|
|
752
|
+
|
|
753
|
+
def runPortal(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
754
|
+
return self.portalBrowser.run(payload)
|
|
755
|
+
|
|
756
|
+
def searchMap(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
757
|
+
return self.mapTool.search(payload)
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def _query(payload: dict[str, Any]) -> str:
|
|
761
|
+
return str(payload.get("query", payload.get("request", payload.get("goal", payload.get("text", "")))) or "").strip()
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def _isLocalServiceQuery(query: str) -> bool:
|
|
765
|
+
lowered = _spellfixLocation(str(query or "")).lower()
|
|
766
|
+
serviceTerms = (
|
|
767
|
+
"mechanic",
|
|
768
|
+
"plumber",
|
|
769
|
+
"electrician",
|
|
770
|
+
"lawyer",
|
|
771
|
+
"consultant",
|
|
772
|
+
"doctor",
|
|
773
|
+
"dentist",
|
|
774
|
+
"cleaner",
|
|
775
|
+
"repair",
|
|
776
|
+
"garage",
|
|
777
|
+
"workshop",
|
|
778
|
+
"contractor",
|
|
779
|
+
"installer",
|
|
780
|
+
"handyman",
|
|
781
|
+
"agent",
|
|
782
|
+
"broker",
|
|
783
|
+
"small business",
|
|
784
|
+
"local business",
|
|
785
|
+
)
|
|
786
|
+
placeTerms = ("near", "nearby", "within", "around", "close to", "km", "kilometer", "kilometre", "hisingen", "goteborg", "gothenburg")
|
|
787
|
+
return any(term in lowered for term in serviceTerms) and any(term in lowered for term in placeTerms)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def _isResolveCase(query: str) -> bool:
|
|
791
|
+
lowered = str(query or "").lower()
|
|
792
|
+
if any(token in lowered for token in ("dispute", "claim", "complaint", "broken", "damaged", "defective", "fake", "scam", "verify", "audit", "evidence", "receipt", "screenshot")):
|
|
793
|
+
return True
|
|
794
|
+
damagedDelivery = any(token in lowered for token in ("in-transit", "in transit", "transit", "delivery", "delivered")) and any(token in lowered for token in ("tv", "item", "goods", "package", "order"))
|
|
795
|
+
merchantNoFollowup = any(token in lowered for token in ("followup", "follow up", "follow-up", "did not follow", "no response", "customer representative", "customer represent"))
|
|
796
|
+
return damagedDelivery or merchantNoFollowup
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def _serviceEntity(query: str, payload: dict[str, Any]) -> str:
|
|
800
|
+
raw = str(payload.get("entity", payload.get("service", payload.get("businessType", ""))) or "").strip()
|
|
801
|
+
if raw:
|
|
802
|
+
return _cleanServiceEntity(raw)
|
|
803
|
+
lowered = _spellfixLocation(str(query or "")).lower()
|
|
804
|
+
known = (
|
|
805
|
+
"mechanic",
|
|
806
|
+
"plumber",
|
|
807
|
+
"electrician",
|
|
808
|
+
"lawyer",
|
|
809
|
+
"consultant",
|
|
810
|
+
"doctor",
|
|
811
|
+
"dentist",
|
|
812
|
+
"cleaner",
|
|
813
|
+
"garage",
|
|
814
|
+
"workshop",
|
|
815
|
+
"contractor",
|
|
816
|
+
"installer",
|
|
817
|
+
"handyman",
|
|
818
|
+
"broker",
|
|
819
|
+
"agent",
|
|
820
|
+
)
|
|
821
|
+
for item in known:
|
|
822
|
+
if re.search(rf"\b{re.escape(item)}s?\b", lowered):
|
|
823
|
+
return item
|
|
824
|
+
subject = re.search(
|
|
825
|
+
r"\b(?:need|find|search(?: for)?|locate|looking for|get)\s+(?:a|an|the|my|aa)?\s*(.+?)(?:\s+(?:within|near|nearby|around|close to|in)\b|$)",
|
|
826
|
+
lowered,
|
|
827
|
+
re.IGNORECASE,
|
|
828
|
+
)
|
|
829
|
+
if subject:
|
|
830
|
+
return _cleanServiceEntity(subject.group(1))
|
|
831
|
+
return ""
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def _cleanServiceEntity(value: str) -> str:
|
|
835
|
+
clean = re.sub(r"\b(?:a|an|the|my|aa|local|nearby|best|good|trusted)\b", " ", str(value or ""), flags=re.IGNORECASE)
|
|
836
|
+
clean = re.sub(r"[^A-Za-z0-9 &/-]+", " ", clean)
|
|
837
|
+
clean = re.sub(r"\s+", " ", clean).strip().lower()
|
|
838
|
+
return clean[:80]
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _placeLocation(query: str, payload: dict[str, Any]) -> str:
|
|
842
|
+
raw = str(payload.get("location", payload.get("near", payload.get("area", ""))) or "").strip()
|
|
843
|
+
if raw:
|
|
844
|
+
return _spellfixLocation(raw)
|
|
845
|
+
fixed = _spellfixLocation(str(query or ""))
|
|
846
|
+
patterns = (
|
|
847
|
+
r"\bwithin\s+[0-9]+(?:[,.][0-9]+)?\s*(?:km|kilometers?|kilometres?)\s+(?:of|from)\s+(.+)$",
|
|
848
|
+
r"\b(?:near|nearby|around|close to|in)\s+(.+)$",
|
|
849
|
+
)
|
|
850
|
+
for pattern in patterns:
|
|
851
|
+
match = re.search(pattern, fixed, re.IGNORECASE)
|
|
852
|
+
if not match:
|
|
853
|
+
continue
|
|
854
|
+
location = match.group(1)
|
|
855
|
+
location = re.split(r"\b(?:for|with|that|who|and then)\b", location, maxsplit=1, flags=re.IGNORECASE)[0]
|
|
856
|
+
location = re.sub(r"[.?!]+$", "", location).strip(" ,")
|
|
857
|
+
if location:
|
|
858
|
+
return location[:140]
|
|
859
|
+
return ""
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
def _radiusKm(query: str, payload: dict[str, Any]) -> float:
|
|
863
|
+
raw = payload.get("radiusKm", payload.get("distanceKm", payload.get("radius", "")))
|
|
864
|
+
try:
|
|
865
|
+
if raw not in {"", None}:
|
|
866
|
+
value = float(str(raw).replace(",", "."))
|
|
867
|
+
if value > 0:
|
|
868
|
+
return round(min(100.0, value), 2)
|
|
869
|
+
except Exception:
|
|
870
|
+
pass
|
|
871
|
+
match = re.search(r"\b(?:within|inside|under|less than)?\s*([0-9]+(?:[,.][0-9]+)?)\s*(?:km|kilometers?|kilometres?)\b", str(query or ""), re.IGNORECASE)
|
|
872
|
+
if match:
|
|
873
|
+
return round(min(100.0, float(match.group(1).replace(",", "."))), 2)
|
|
874
|
+
return 10.0
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def _spellfixLocation(value: str) -> str:
|
|
878
|
+
fixed = str(value or "")
|
|
879
|
+
replacements = {
|
|
880
|
+
"hisigen": "Hisingen",
|
|
881
|
+
"hissigen": "Hisingen",
|
|
882
|
+
"hissingen": "Hisingen",
|
|
883
|
+
"goteborg": "Goteborg",
|
|
884
|
+
"gothenburg": "Goteborg",
|
|
885
|
+
}
|
|
886
|
+
for wrong, right in replacements.items():
|
|
887
|
+
fixed = re.sub(rf"\b{wrong}\b", right, fixed, flags=re.IGNORECASE)
|
|
888
|
+
return fixed
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def _placeSearchText(entity: str, location: str) -> str:
|
|
892
|
+
cleanEntity = str(entity or "").strip().lower()
|
|
893
|
+
cleanLocation = _spellfixLocation(str(location or "").strip())
|
|
894
|
+
locationLower = cleanLocation.lower()
|
|
895
|
+
if cleanEntity == "mechanic" and any(token in locationLower for token in ("hisingen", "goteborg", "sweden", "stockholm", "malmo")):
|
|
896
|
+
return f"bilverkstad {cleanLocation}".strip()
|
|
897
|
+
if cleanEntity == "mechanic":
|
|
898
|
+
return f"car repair {cleanLocation}".strip()
|
|
899
|
+
return f"{cleanEntity} near {cleanLocation}".strip()
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
def _cleanPlace(raw: dict[str, Any]) -> dict[str, Any]:
|
|
903
|
+
place = {
|
|
904
|
+
"placeId": str(raw.get("placeId", raw.get("place_id", raw.get("id", ""))) or ""),
|
|
905
|
+
"name": re.sub(r"\s+", " ", str(raw.get("name", raw.get("title", "")) or "")).strip()[:220],
|
|
906
|
+
"category": str(raw.get("category", raw.get("type", "")) or "")[:120],
|
|
907
|
+
"address": re.sub(r"\s+", " ", str(raw.get("address", raw.get("formatted_address", "")) or "")).strip()[:300],
|
|
908
|
+
"phone": str(raw.get("phone", raw.get("phoneNumber", raw.get("international_phone_number", ""))) or "")[:80],
|
|
909
|
+
"website": str(raw.get("website", raw.get("url", "")) or "")[:600],
|
|
910
|
+
"rating": _safeFloat(raw.get("rating", 0.0)),
|
|
911
|
+
"lat": _safeFloat(raw.get("lat", raw.get("latitude", 0.0))),
|
|
912
|
+
"lng": _safeFloat(raw.get("lng", raw.get("lon", raw.get("longitude", 0.0)))),
|
|
913
|
+
"distanceKm": _safeFloat(raw.get("distanceKm", raw.get("distance_km", 0.0))),
|
|
914
|
+
"source": str(raw.get("source", raw.get("provider", "mapTool")) or "mapTool")[:80],
|
|
915
|
+
"sourceUrl": str(raw.get("sourceUrl", raw.get("mapsUrl", raw.get("url", ""))) or "")[:600],
|
|
916
|
+
"sourceBound": bool(raw.get("sourceBound", raw.get("sourceUrl", raw.get("placeId", raw.get("place_id", ""))))),
|
|
917
|
+
"entity": str(raw.get("entity", raw.get("service", "")) or "")[:80],
|
|
918
|
+
}
|
|
919
|
+
if not place["sourceUrl"] and place["lat"] and place["lng"]:
|
|
920
|
+
place["sourceUrl"] = "https://www.google.com/maps/search/?" + urlencode({"api": "1", "query": f"{place['lat']},{place['lng']}"})
|
|
921
|
+
place["sourceBound"] = True
|
|
922
|
+
return place
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def _first(value: Any) -> str:
|
|
926
|
+
if isinstance(value, list):
|
|
927
|
+
for item in value:
|
|
928
|
+
clean = str(item or "").strip()
|
|
929
|
+
if clean:
|
|
930
|
+
return clean
|
|
931
|
+
return str(value or "").strip()
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def _route(query: str, *, seed: str) -> list[str]:
|
|
935
|
+
lowered = query.lower()
|
|
936
|
+
localService = _isLocalServiceQuery(query)
|
|
937
|
+
explicitResolve = _isResolveCase(query)
|
|
938
|
+
explicitProxy = any(token in lowered for token in ("place a call", "make a call", "call merchant", "call support", "human", "lawyer", "consultant", "manual review", "logistics", "pickup", "delivery"))
|
|
939
|
+
explicitPay = any(token in lowered for token in ("pay", "payment", "settle", "settlement", "escrow"))
|
|
940
|
+
if seed == "pay" and not explicitPay:
|
|
941
|
+
seed = "resolve" if explicitResolve else "hunt"
|
|
942
|
+
if seed == "proxy" and not explicitProxy:
|
|
943
|
+
seed = "resolve" if explicitResolve else "hunt"
|
|
944
|
+
if explicitResolve and seed in {"hunt", "bridge"}:
|
|
945
|
+
seed = "resolve"
|
|
946
|
+
route = [seed] if seed in SHARED_DOMAINS else ["hunt"]
|
|
947
|
+
if not explicitResolve and (localService or any(token in lowered for token in ("find", "near", "within", "km", "buy", "shop", "shopping", "best", "option", "lead", "opportunity", "source", "market", "property", "apartment", "house", "mechanic", "plumber", "electrician", "repair"))) and "hunt" not in route:
|
|
948
|
+
route.insert(0, "hunt")
|
|
949
|
+
if any(token in lowered for token in ("email", "merchant", "bank", "service", "support", "api", "webhook", "connector", "whatsapp", "chat", "gmail", "slack", "jira", "notion", "sap", "representative", "followup", "follow up")) and "bridge" not in route:
|
|
950
|
+
route.append("bridge")
|
|
951
|
+
if explicitResolve and "resolve" not in route:
|
|
952
|
+
route.append("resolve")
|
|
953
|
+
if explicitProxy and "proxy" not in route:
|
|
954
|
+
route.append("proxy")
|
|
955
|
+
if explicitPay and "pay" not in route:
|
|
956
|
+
route.append("pay")
|
|
957
|
+
if "pay" in route and "resolve" not in route:
|
|
958
|
+
route.insert(max(1, route.index("pay")), "resolve")
|
|
959
|
+
unique = list(dict.fromkeys([item for item in route if item in SHARED_DOMAINS]))
|
|
960
|
+
order = ["resolve", "bridge", "proxy", "pay", "hunt"] if explicitResolve and "hunt" not in unique else ["hunt", "bridge", "resolve", "proxy", "pay"]
|
|
961
|
+
return [domain for domain in order if domain in unique]
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
def _entities(query: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
965
|
+
serviceEntity = _serviceEntity(query, payload)
|
|
966
|
+
return {
|
|
967
|
+
"item": _item(query),
|
|
968
|
+
"entity": serviceEntity,
|
|
969
|
+
"service": serviceEntity,
|
|
970
|
+
"location": _placeLocation(query, payload),
|
|
971
|
+
"radiusKm": _radiusKm(query, payload),
|
|
972
|
+
"budgetUsd": _budget(query, payload),
|
|
973
|
+
"target": _target(query),
|
|
974
|
+
"phone": _phone(query, payload),
|
|
975
|
+
"email": _email(query),
|
|
976
|
+
"market": _market(query),
|
|
977
|
+
"country": _country(query, payload),
|
|
978
|
+
"region": _region(query, payload),
|
|
979
|
+
"locale": str(payload.get("locale", "") or ""),
|
|
980
|
+
"budgetCurrency": _budgetCurrency(query, payload),
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def _intentKind(query: str) -> str:
|
|
985
|
+
lowered = query.lower()
|
|
986
|
+
if _isLocalServiceQuery(query):
|
|
987
|
+
return "localService"
|
|
988
|
+
if _isResolveCase(query):
|
|
989
|
+
return "dispute"
|
|
990
|
+
if any(token in lowered for token in ("buy", "shop", "shopping", "property", "house", "apartment")):
|
|
991
|
+
return "discovery"
|
|
992
|
+
if any(token in lowered for token in ("claim", "dispute", "damaged", "broken", "scam", "fake")):
|
|
993
|
+
return "dispute"
|
|
994
|
+
if any(token in lowered for token in ("call merchant", "call support", "human", "logistics")):
|
|
995
|
+
return "handoff"
|
|
996
|
+
return "general"
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def _confidence(query: str, route: list[str], entities: dict[str, Any]) -> float:
|
|
1000
|
+
score = 0.45 + min(0.25, len(route) * 0.05)
|
|
1001
|
+
if entities.get("item") or entities.get("target"):
|
|
1002
|
+
score += 0.12
|
|
1003
|
+
if entities.get("budgetUsd") or entities.get("phone") or entities.get("email"):
|
|
1004
|
+
score += 0.10
|
|
1005
|
+
if len(query.split()) >= 5:
|
|
1006
|
+
score += 0.08
|
|
1007
|
+
return round(min(0.96, score), 3)
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def _stripTags(text: str) -> str:
|
|
1011
|
+
clean = re.sub(r"<script\b[^>]*>.*?</script>", " ", str(text or ""), flags=re.IGNORECASE | re.DOTALL)
|
|
1012
|
+
clean = re.sub(r"<style\b[^>]*>.*?</style>", " ", clean, flags=re.IGNORECASE | re.DOTALL)
|
|
1013
|
+
clean = re.sub(r"<[^>]+>", " ", clean)
|
|
1014
|
+
return html.unescape(re.sub(r"\s+", " ", clean)).strip()
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
def _confirmation(text: str) -> str:
|
|
1018
|
+
clean = _stripTags(text)
|
|
1019
|
+
match = re.search(r"\bconfirmation\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b", clean, re.IGNORECASE)
|
|
1020
|
+
if match:
|
|
1021
|
+
return match.group(1)
|
|
1022
|
+
match = re.search(r"\b(?:case|claim|ticket|reference|order)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b", clean, re.IGNORECASE)
|
|
1023
|
+
return match.group(1) if match else ""
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def _portalOutcome(text: str) -> dict[str, Any]:
|
|
1027
|
+
clean = _stripTags(text)
|
|
1028
|
+
confirmation = _confirmation(clean)
|
|
1029
|
+
status = _portalStatus(clean)
|
|
1030
|
+
deadline = _firstMatch(clean, (r"\b(?:deadline|due date|respond by|follow[- ]?up by)\s*[:#-]?\s*([A-Z][a-z]{2,9}\s+\d{1,2},?\s+\d{4}|\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{2,4})",))
|
|
1031
|
+
amount = _firstMatch(clean, (r"\b(?:amount|refund|claim value|total)\s*[:#-]?\s*([A-Z]{3}\s*)?([$€£]?\s*\d+(?:[,.]\d{2})?)",))
|
|
1032
|
+
recordRefs = []
|
|
1033
|
+
for pattern in (
|
|
1034
|
+
r"\b(?:case|claim|ticket|reference|order)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b",
|
|
1035
|
+
r"\b(?:tracking|shipment)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b",
|
|
1036
|
+
):
|
|
1037
|
+
for match in re.finditer(pattern, clean, re.IGNORECASE):
|
|
1038
|
+
recordRefs.append(match.group(1))
|
|
1039
|
+
return {
|
|
1040
|
+
"confirmation": confirmation,
|
|
1041
|
+
"status": status,
|
|
1042
|
+
"deadline": deadline,
|
|
1043
|
+
"amount": amount,
|
|
1044
|
+
"recordRefs": list(dict.fromkeys(recordRefs)),
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
|
|
1048
|
+
def _portalStatus(text: str) -> str:
|
|
1049
|
+
match = re.search(r"\b(?:status|state)\s*[:#-]?\s*([A-Za-z][A-Za-z -]{2,32})\b", text, re.IGNORECASE)
|
|
1050
|
+
if match:
|
|
1051
|
+
return match.group(1).strip(" .").lower().replace(" ", "_")
|
|
1052
|
+
lowered = text.lower()
|
|
1053
|
+
for label, tokens in (
|
|
1054
|
+
("submitted", ("claim submitted", "form submitted", "submitted")),
|
|
1055
|
+
("approved", ("approved", "accepted")),
|
|
1056
|
+
("rejected", ("rejected", "denied")),
|
|
1057
|
+
("pending", ("pending", "under review", "in review", "awaiting")),
|
|
1058
|
+
("retrieved", ("record retrieved", "record found", "download ready")),
|
|
1059
|
+
):
|
|
1060
|
+
if any(token in lowered for token in tokens):
|
|
1061
|
+
return label
|
|
1062
|
+
return ""
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _firstMatch(text: str, patterns: tuple[str, ...]) -> str:
|
|
1066
|
+
for pattern in patterns:
|
|
1067
|
+
match = re.search(pattern, text, re.IGNORECASE)
|
|
1068
|
+
if not match:
|
|
1069
|
+
continue
|
|
1070
|
+
groups = [item for item in match.groups() if item]
|
|
1071
|
+
value = " ".join(groups).strip() if groups else match.group(0).strip()
|
|
1072
|
+
return re.sub(r"\s+", " ", value)
|
|
1073
|
+
return ""
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def _portalStepResults(raw: Any) -> list[dict[str, Any]]:
|
|
1077
|
+
if not isinstance(raw, list):
|
|
1078
|
+
return []
|
|
1079
|
+
steps = []
|
|
1080
|
+
for index, item in enumerate(raw, start=1):
|
|
1081
|
+
if not isinstance(item, dict):
|
|
1082
|
+
continue
|
|
1083
|
+
stepId = str(item.get("stepId", "") or _stableToolId("portalStep", {"index": index, "step": item}))
|
|
1084
|
+
steps.append(
|
|
1085
|
+
{
|
|
1086
|
+
"stepId": stepId,
|
|
1087
|
+
"label": str(item.get("label", item.get("action", f"Step {index}")) or f"Step {index}"),
|
|
1088
|
+
"action": str(item.get("action", "") or ""),
|
|
1089
|
+
"selector": str(item.get("selector", "") or ""),
|
|
1090
|
+
"valueKey": str(item.get("valueKey", "") or ""),
|
|
1091
|
+
"status": str(item.get("status", "pending") or "pending"),
|
|
1092
|
+
"proofRequired": bool(item.get("proofRequired", False)),
|
|
1093
|
+
"meta": item.get("meta", {}) if isinstance(item.get("meta"), dict) else {},
|
|
1094
|
+
}
|
|
1095
|
+
)
|
|
1096
|
+
return steps
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
def _portalRequiredInputs(payload: dict[str, Any], steps: list[dict[str, Any]]) -> list[str]:
|
|
1100
|
+
meta = payload.get("meta", {}) if isinstance(payload.get("meta"), dict) else {}
|
|
1101
|
+
required = meta.get("requiredInputs", payload.get("requiredInputs", []))
|
|
1102
|
+
if isinstance(required, list):
|
|
1103
|
+
clean = [str(item) for item in required if str(item)]
|
|
1104
|
+
if clean:
|
|
1105
|
+
return clean
|
|
1106
|
+
return [str(step.get("valueKey", "") or "") for step in steps if str(step.get("valueKey", "") or "")]
|
|
1107
|
+
|
|
1108
|
+
|
|
1109
|
+
def _missingPortalInputs(steps: list[dict[str, Any]], inputs: dict[str, Any], *, requiredKeys: list[str]) -> list[dict[str, Any]]:
|
|
1110
|
+
missing = []
|
|
1111
|
+
required = set(requiredKeys)
|
|
1112
|
+
for step in steps:
|
|
1113
|
+
key = str(step.get("valueKey", "") or "")
|
|
1114
|
+
if not key or key not in required:
|
|
1115
|
+
continue
|
|
1116
|
+
if key not in inputs or _emptyPortalInput(inputs.get(key)):
|
|
1117
|
+
missing.append({"key": key, "label": str(step.get("label", key) or key), "secret": False})
|
|
1118
|
+
deduped: dict[str, dict[str, Any]] = {}
|
|
1119
|
+
for item in missing:
|
|
1120
|
+
deduped[item["key"]] = item
|
|
1121
|
+
return list(deduped.values())
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
def _emptyPortalInput(value: Any) -> bool:
|
|
1125
|
+
return value is None or value == "" or value == []
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def _portalStepsWithMissingInputs(steps: list[dict[str, Any]], missing: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1129
|
+
missingKeys = {str(item.get("key", "") or "") for item in missing}
|
|
1130
|
+
result = []
|
|
1131
|
+
for step in steps:
|
|
1132
|
+
valueKey = str(step.get("valueKey", "") or "")
|
|
1133
|
+
status = "needsInput" if valueKey and valueKey in missingKeys else str(step.get("status", "pending") or "pending")
|
|
1134
|
+
result.append({**step, "status": status, "meta": {**(step.get("meta", {}) if isinstance(step.get("meta"), dict) else {}), "missingInput": valueKey in missingKeys}})
|
|
1135
|
+
return result
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
def _portalStepsWithStatus(steps: list[dict[str, Any]], status: str, *, reason: str) -> list[dict[str, Any]]:
|
|
1139
|
+
result = []
|
|
1140
|
+
for step in steps:
|
|
1141
|
+
meta = step.get("meta", {}) if isinstance(step.get("meta"), dict) else {}
|
|
1142
|
+
if reason:
|
|
1143
|
+
meta = {**meta, "reason": reason}
|
|
1144
|
+
result.append({**step, "status": status, "meta": meta})
|
|
1145
|
+
return result
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
def _portalStepSummary(steps: list[dict[str, Any]]) -> dict[str, Any]:
|
|
1149
|
+
counts: dict[str, int] = {}
|
|
1150
|
+
for step in steps:
|
|
1151
|
+
status = str(step.get("status", "") or "")
|
|
1152
|
+
counts[status] = counts.get(status, 0) + 1
|
|
1153
|
+
return {"total": len(steps), "byStatus": counts}
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def _stableToolId(prefix: str, payload: dict[str, Any]) -> str:
|
|
1157
|
+
digest = hashlib.sha256(json.dumps(payload, ensure_ascii=True, sort_keys=True).encode("utf-8")).hexdigest()
|
|
1158
|
+
return f"{prefix}_{digest[:24]}"
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def _looksLikeFollowUp(query: str) -> bool:
|
|
1162
|
+
lowered = query.lower()
|
|
1163
|
+
return any(token in lowered for token in ("yes", "no", "continue", "approve", "use that", "same", "also", "the second", "that one", "go ahead"))
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
def _imageInputs(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
1167
|
+
raw = []
|
|
1168
|
+
for key in ("images", "attachments", "documents"):
|
|
1169
|
+
value = payload.get(key)
|
|
1170
|
+
if isinstance(value, list):
|
|
1171
|
+
raw.extend(item for item in value if isinstance(item, dict))
|
|
1172
|
+
images = []
|
|
1173
|
+
seen = set()
|
|
1174
|
+
for item in raw:
|
|
1175
|
+
mime = str(item.get("mime", item.get("contentType", "")) or "").lower()
|
|
1176
|
+
kind = str(item.get("kind", item.get("type", "")) or "").lower()
|
|
1177
|
+
if mime.startswith("image/") or kind in {"image", "photo", "screenshot"} or any(key in item for key in ("base64", "contentBase64", "dataUrl")):
|
|
1178
|
+
key = "|".join(str(item.get(name, "") or "") for name in ("artifactId", "name", "dataUrl", "base64", "contentBase64", "text", "ocr"))
|
|
1179
|
+
if key and key in seen:
|
|
1180
|
+
continue
|
|
1181
|
+
if key:
|
|
1182
|
+
seen.add(key)
|
|
1183
|
+
images.append(item)
|
|
1184
|
+
return images
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
def _imageBytes(image: dict[str, Any]) -> bytes:
|
|
1188
|
+
raw = str(image.get("base64", image.get("contentBase64", image.get("data", ""))) or "")
|
|
1189
|
+
dataUrl = str(image.get("dataUrl", "") or "")
|
|
1190
|
+
if dataUrl and "," in dataUrl:
|
|
1191
|
+
raw = dataUrl.split(",", 1)[1]
|
|
1192
|
+
if not raw:
|
|
1193
|
+
return b""
|
|
1194
|
+
try:
|
|
1195
|
+
return base64.b64decode(raw, validate=False)
|
|
1196
|
+
except Exception:
|
|
1197
|
+
return raw.encode("utf-8", errors="ignore")
|
|
1198
|
+
|
|
1199
|
+
|
|
1200
|
+
def _imageText(image: dict[str, Any], rawBytes: bytes) -> str:
|
|
1201
|
+
fields = [str(image.get(key, "") or "") for key in ("text", "alt", "caption", "ocr", "description", "name")]
|
|
1202
|
+
printable = ""
|
|
1203
|
+
if rawBytes:
|
|
1204
|
+
printable = " ".join(match.decode("utf-8", errors="ignore") for match in re.findall(rb"[ -~]{5,}", rawBytes))
|
|
1205
|
+
ocr = _ocrText(image, rawBytes)
|
|
1206
|
+
return " ".join(item for item in [*fields, printable, ocr] if item).strip()
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
def _ocrText(image: dict[str, Any], rawBytes: bytes) -> str:
|
|
1210
|
+
if not rawBytes or not shutil.which("tesseract"):
|
|
1211
|
+
return ""
|
|
1212
|
+
mime = str(image.get("mime", image.get("contentType", "")) or "").lower()
|
|
1213
|
+
name = str(image.get("name", image.get("filename", "")) or "").lower()
|
|
1214
|
+
suffix = ".png"
|
|
1215
|
+
if "jpeg" in mime or "jpg" in mime or name.endswith((".jpg", ".jpeg")):
|
|
1216
|
+
suffix = ".jpg"
|
|
1217
|
+
elif "webp" in mime or name.endswith(".webp"):
|
|
1218
|
+
suffix = ".webp"
|
|
1219
|
+
elif "tiff" in mime or name.endswith((".tif", ".tiff")):
|
|
1220
|
+
suffix = ".tif"
|
|
1221
|
+
path = ""
|
|
1222
|
+
try:
|
|
1223
|
+
with tempfile.NamedTemporaryFile(prefix="picux-image-", suffix=suffix, delete=False) as tmp:
|
|
1224
|
+
tmp.write(rawBytes)
|
|
1225
|
+
path = tmp.name
|
|
1226
|
+
completed = subprocess.run(
|
|
1227
|
+
["tesseract", path, "stdout", "--dpi", "150"],
|
|
1228
|
+
capture_output=True,
|
|
1229
|
+
check=False,
|
|
1230
|
+
text=True,
|
|
1231
|
+
timeout=8,
|
|
1232
|
+
)
|
|
1233
|
+
if completed.returncode == 0:
|
|
1234
|
+
return re.sub(r"\s+", " ", completed.stdout or "").strip()
|
|
1235
|
+
return ""
|
|
1236
|
+
except Exception:
|
|
1237
|
+
return ""
|
|
1238
|
+
finally:
|
|
1239
|
+
if path:
|
|
1240
|
+
try:
|
|
1241
|
+
Path(path).unlink(missing_ok=True)
|
|
1242
|
+
except Exception:
|
|
1243
|
+
pass
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def _matches(query: str, text: str) -> list[str]:
|
|
1247
|
+
qTokens = {token for token in re.findall(r"[a-z0-9]+", query.lower()) if len(token) > 2}
|
|
1248
|
+
lowered = text.lower()
|
|
1249
|
+
matches = [token for token in sorted(qTokens) if token in lowered]
|
|
1250
|
+
for label, tokens in {
|
|
1251
|
+
"receipt": ("receipt", "invoice", "order", "total"),
|
|
1252
|
+
"damage": ("broken", "damaged", "crack", "scratch"),
|
|
1253
|
+
"identity": ("name", "seller", "merchant", "store"),
|
|
1254
|
+
}.items():
|
|
1255
|
+
if any(token in lowered for token in tokens) and label not in matches:
|
|
1256
|
+
matches.append(label)
|
|
1257
|
+
return matches[:12]
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
def _summary(text: str, matches: list[str]) -> str:
|
|
1261
|
+
if not text:
|
|
1262
|
+
return "No readable image text was available; preserve the image as source evidence."
|
|
1263
|
+
prefix = "Matched " + ", ".join(matches[:4]) + ". " if matches else ""
|
|
1264
|
+
clean = re.sub(r"\s+", " ", _redact(text)).strip()
|
|
1265
|
+
return (prefix + clean[:220]).strip()
|
|
1266
|
+
|
|
1267
|
+
|
|
1268
|
+
def _privateTargetBlockReason(url: str, *, allowPrivateNetwork: bool = False) -> str:
|
|
1269
|
+
if allowPrivateNetwork:
|
|
1270
|
+
return ""
|
|
1271
|
+
parsed = urlparse(str(url or ""))
|
|
1272
|
+
if parsed.scheme not in {"http", "https"}:
|
|
1273
|
+
return ""
|
|
1274
|
+
host = (parsed.hostname or "").strip().lower()
|
|
1275
|
+
if not host:
|
|
1276
|
+
return "privateNetworkTargetBlocked"
|
|
1277
|
+
if host == "localhost" or host.endswith(".localhost") or host.endswith(".local"):
|
|
1278
|
+
return "privateNetworkTargetBlocked"
|
|
1279
|
+
try:
|
|
1280
|
+
ip = ipaddress.ip_address(host.strip("[]"))
|
|
1281
|
+
except ValueError:
|
|
1282
|
+
return ""
|
|
1283
|
+
if ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_unspecified or ip.is_reserved:
|
|
1284
|
+
return "privateNetworkTargetBlocked"
|
|
1285
|
+
return ""
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
def _blockedUrlObservation(url: str, query: str, *, source: str, error: str) -> dict[str, Any]:
|
|
1289
|
+
return {
|
|
1290
|
+
"source": source,
|
|
1291
|
+
"url": url,
|
|
1292
|
+
"ok": False,
|
|
1293
|
+
"adapter": "http",
|
|
1294
|
+
"statusCode": 0,
|
|
1295
|
+
"title": "",
|
|
1296
|
+
"snippets": [],
|
|
1297
|
+
"textSample": "",
|
|
1298
|
+
"matched": False,
|
|
1299
|
+
"error": error,
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
def _pageObservation(url: str, text: str, query: str, adapter: str, *, source: str, statusCode: int) -> dict[str, Any]:
|
|
1304
|
+
title = _title(text)
|
|
1305
|
+
body = _stripHtml(text)
|
|
1306
|
+
snippets = _snippets(body, query)
|
|
1307
|
+
return {
|
|
1308
|
+
"source": source,
|
|
1309
|
+
"url": url,
|
|
1310
|
+
"ok": True,
|
|
1311
|
+
"adapter": adapter,
|
|
1312
|
+
"statusCode": statusCode,
|
|
1313
|
+
"title": title,
|
|
1314
|
+
"snippets": snippets,
|
|
1315
|
+
"textSample": _redact(body[:4000]),
|
|
1316
|
+
"matched": bool(snippets),
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
|
|
1320
|
+
def _title(text: str) -> str:
|
|
1321
|
+
match = re.search(r"<title[^>]*>(.*?)</title>", text, flags=re.IGNORECASE | re.DOTALL)
|
|
1322
|
+
if not match:
|
|
1323
|
+
return ""
|
|
1324
|
+
return html.unescape(re.sub(r"\s+", " ", match.group(1))).strip()[:160]
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
def _stripHtml(text: str) -> str:
|
|
1328
|
+
clean = re.sub(r"<script\b.*?</script>", " ", text, flags=re.IGNORECASE | re.DOTALL)
|
|
1329
|
+
clean = re.sub(r"<style\b.*?</style>", " ", clean, flags=re.IGNORECASE | re.DOTALL)
|
|
1330
|
+
clean = re.sub(r"<[^>]+>", " ", clean)
|
|
1331
|
+
return html.unescape(re.sub(r"\s+", " ", clean)).strip()
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
def _snippets(text: str, query: str) -> list[str]:
|
|
1335
|
+
tokens = [token for token in re.findall(r"[a-z0-9]+", query.lower()) if len(token) > 2]
|
|
1336
|
+
lowered = text.lower()
|
|
1337
|
+
snippets = []
|
|
1338
|
+
for token in tokens[:8]:
|
|
1339
|
+
idx = lowered.find(token)
|
|
1340
|
+
if idx < 0:
|
|
1341
|
+
continue
|
|
1342
|
+
start = max(0, idx - 90)
|
|
1343
|
+
end = min(len(text), idx + 180)
|
|
1344
|
+
snippet = text[start:end].strip()
|
|
1345
|
+
if snippet and snippet not in snippets:
|
|
1346
|
+
snippets.append(snippet)
|
|
1347
|
+
return snippets[:5]
|
|
1348
|
+
|
|
1349
|
+
|
|
1350
|
+
def _marketplaceSet(query: str, entities: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
|
|
1351
|
+
raw = payload.get("marketplaces", payload.get("sources", []))
|
|
1352
|
+
requested = [_sourceAlias(str(item).strip().lower()) for item in raw if str(item).strip()] if isinstance(raw, list) else []
|
|
1353
|
+
market = str(entities.get("market", "") or "")
|
|
1354
|
+
country = _country(query, {**payload, **entities})
|
|
1355
|
+
if not requested:
|
|
1356
|
+
requested = _marketplaceSources(market, country)
|
|
1357
|
+
return {
|
|
1358
|
+
"market": market or "shopping",
|
|
1359
|
+
"country": country,
|
|
1360
|
+
"localSources": _localSources(market, country),
|
|
1361
|
+
"sources": requested,
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
|
|
1365
|
+
def _targetLimit(payload: dict[str, Any]) -> int:
|
|
1366
|
+
try:
|
|
1367
|
+
raw = int(payload.get("targetLimit", payload.get("sourceLimit", 12)) or 12)
|
|
1368
|
+
except Exception:
|
|
1369
|
+
raw = 12
|
|
1370
|
+
return max(1, min(raw, 20))
|
|
1371
|
+
|
|
1372
|
+
|
|
1373
|
+
def _searchTargets(query: str, entities: dict[str, Any], payload: dict[str, Any], *, marketplaceSet: dict[str, Any] | None = None) -> list[dict[str, str]]:
|
|
1374
|
+
marketplaceSet = marketplaceSet or _marketplaceSet(query, entities, payload)
|
|
1375
|
+
requested = [str(item) for item in marketplaceSet.get("sources", []) if str(item)]
|
|
1376
|
+
item = quote_plus(str(entities.get("item", "") or query))
|
|
1377
|
+
templates = {
|
|
1378
|
+
"ebay": "https://www.ebay.com/sch/i.html?_nkw={q}",
|
|
1379
|
+
"facebook": "https://www.facebook.com/marketplace/search/?query={q}",
|
|
1380
|
+
"alibaba": "https://www.alibaba.com/trade/search?SearchText={q}",
|
|
1381
|
+
"amazon": "https://www.amazon.com/s?k={q}",
|
|
1382
|
+
"shopify": "https://shop.app/search?q={q}",
|
|
1383
|
+
"blocket": "https://www.blocket.se/annonser/hela_sverige?q={q}",
|
|
1384
|
+
"tradera": "https://www.tradera.com/search?q={q}",
|
|
1385
|
+
"jumia": "https://www.jumia.com.ng/catalog/?q={q}",
|
|
1386
|
+
"konga": "https://www.konga.com/search?search={q}",
|
|
1387
|
+
"jiji": "https://jiji.ng/search?query={q}",
|
|
1388
|
+
"craigslist": "https://www.craigslist.org/search/sss?query={q}",
|
|
1389
|
+
"gumtree": "https://www.gumtree.com/search?search_category=all&q={q}",
|
|
1390
|
+
"zillow": "https://www.zillow.com/homes/{q}_rb/",
|
|
1391
|
+
"redfin": "https://www.redfin.com/city/30749/CA/{q}",
|
|
1392
|
+
"hemnet": "https://www.hemnet.se/bostader?query={q}",
|
|
1393
|
+
}
|
|
1394
|
+
homeUrls = {
|
|
1395
|
+
"amazon": "https://www.amazon.com/",
|
|
1396
|
+
"ebay": "https://www.ebay.com/",
|
|
1397
|
+
"facebook": "https://www.facebook.com/marketplace/",
|
|
1398
|
+
"alibaba": "https://www.alibaba.com/",
|
|
1399
|
+
"shopify": "https://shop.app/",
|
|
1400
|
+
"blocket": "https://www.blocket.se/",
|
|
1401
|
+
"tradera": "https://www.tradera.com/",
|
|
1402
|
+
"jumia": "https://www.jumia.com.ng/",
|
|
1403
|
+
"konga": "https://www.konga.com/",
|
|
1404
|
+
"jiji": "https://jiji.ng/",
|
|
1405
|
+
}
|
|
1406
|
+
return [
|
|
1407
|
+
{
|
|
1408
|
+
"source": source,
|
|
1409
|
+
"url": templates.get(source, f"https://www.google.com/search?q={item}+{quote_plus(source)}").format(q=item),
|
|
1410
|
+
"homeUrl": homeUrls.get(source, ""),
|
|
1411
|
+
"kind": "marketplaceSearch",
|
|
1412
|
+
}
|
|
1413
|
+
for source in requested[:10]
|
|
1414
|
+
]
|
|
1415
|
+
|
|
1416
|
+
|
|
1417
|
+
def _sourceAlias(source: str) -> str:
|
|
1418
|
+
aliases = {
|
|
1419
|
+
"fb": "facebook",
|
|
1420
|
+
"facebook marketplace": "facebook",
|
|
1421
|
+
"facebook_marketplace": "facebook",
|
|
1422
|
+
"ali": "alibaba",
|
|
1423
|
+
"aliexpress": "alibaba",
|
|
1424
|
+
"shop": "shopify",
|
|
1425
|
+
"shop.app": "shopify",
|
|
1426
|
+
"blocket.se": "blocket",
|
|
1427
|
+
"tradera.se": "tradera",
|
|
1428
|
+
"jumia nigeria": "jumia",
|
|
1429
|
+
"jumia.com.ng": "jumia",
|
|
1430
|
+
"konga.com": "konga",
|
|
1431
|
+
"jiji.ng": "jiji",
|
|
1432
|
+
}
|
|
1433
|
+
return aliases.get(source, source)
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
def _marketplaceSources(market: str, country: str) -> list[str]:
|
|
1437
|
+
if market == "property":
|
|
1438
|
+
local = _localSources(market, country)
|
|
1439
|
+
return _unique([*local, "zillow", "redfin", "hemnet"])
|
|
1440
|
+
local = _localSources("shopping", country)
|
|
1441
|
+
globalSources = ["ebay", "facebook", "alibaba", "amazon", "shopify"]
|
|
1442
|
+
if not local:
|
|
1443
|
+
return [*globalSources, "blocket"]
|
|
1444
|
+
return _unique([*local, *globalSources])
|
|
1445
|
+
|
|
1446
|
+
|
|
1447
|
+
def _localSources(market: str, country: str) -> list[str]:
|
|
1448
|
+
if market == "property":
|
|
1449
|
+
return {
|
|
1450
|
+
"se": ["hemnet", "blocket"],
|
|
1451
|
+
"ng": ["propertypro", "jiji"],
|
|
1452
|
+
"uk": ["rightmove", "zoopla", "gumtree"],
|
|
1453
|
+
"us": ["zillow", "redfin", "craigslist"],
|
|
1454
|
+
}.get(country, [])
|
|
1455
|
+
return {
|
|
1456
|
+
"se": ["blocket", "tradera", "facebook"],
|
|
1457
|
+
"ng": ["jumia", "konga", "jiji", "facebook"],
|
|
1458
|
+
"uk": ["gumtree", "facebook", "ebay"],
|
|
1459
|
+
"us": ["craigslist", "facebook", "ebay", "amazon"],
|
|
1460
|
+
}.get(country, [])
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
def _unique(items: list[str]) -> list[str]:
|
|
1464
|
+
return list(dict.fromkeys(item for item in items if item))
|
|
1465
|
+
|
|
1466
|
+
|
|
1467
|
+
def _dedupeTargets(targets: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
1468
|
+
seen = set()
|
|
1469
|
+
out = []
|
|
1470
|
+
for target in targets:
|
|
1471
|
+
url = str(target.get("url", "") or "")
|
|
1472
|
+
if not url or url in seen:
|
|
1473
|
+
continue
|
|
1474
|
+
seen.add(url)
|
|
1475
|
+
out.append(target)
|
|
1476
|
+
return out
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
def _attemptSummaries(observations: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1480
|
+
summaries = []
|
|
1481
|
+
for observation in observations:
|
|
1482
|
+
summaries.append(
|
|
1483
|
+
{
|
|
1484
|
+
"source": observation.get("source", ""),
|
|
1485
|
+
"url": observation.get("url", ""),
|
|
1486
|
+
"ok": bool(observation.get("ok")),
|
|
1487
|
+
"adapter": observation.get("adapter", ""),
|
|
1488
|
+
"statusCode": observation.get("statusCode", 0),
|
|
1489
|
+
"title": observation.get("title", ""),
|
|
1490
|
+
"matched": bool(observation.get("matched")),
|
|
1491
|
+
"listingCount": len(observation.get("listings", [])) if isinstance(observation.get("listings"), list) else 0,
|
|
1492
|
+
"filteredListingCount": int(observation.get("filteredListingCount", 0) or 0),
|
|
1493
|
+
"search": observation.get("search", {}) if isinstance(observation.get("search"), dict) else {},
|
|
1494
|
+
"error": observation.get("error", ""),
|
|
1495
|
+
}
|
|
1496
|
+
)
|
|
1497
|
+
return summaries
|
|
1498
|
+
|
|
1499
|
+
|
|
1500
|
+
def _renderedSkipped(reason: str, error: str = "") -> dict[str, Any]:
|
|
1501
|
+
attempted = reason not in {"notRequested", "noTargets"}
|
|
1502
|
+
meta = {
|
|
1503
|
+
"attempted": attempted,
|
|
1504
|
+
"available": False,
|
|
1505
|
+
"status": "skipped" if not attempted else "unavailable",
|
|
1506
|
+
"fallbackReason": reason,
|
|
1507
|
+
"adapter": "playwrightChromium",
|
|
1508
|
+
}
|
|
1509
|
+
if error:
|
|
1510
|
+
meta["error"] = error
|
|
1511
|
+
return {"observations": [], "meta": meta}
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
def _normalizeRenderedObservations(raw: Any, query: str) -> list[dict[str, Any]]:
|
|
1515
|
+
if not isinstance(raw, list):
|
|
1516
|
+
return []
|
|
1517
|
+
observations: list[dict[str, Any]] = []
|
|
1518
|
+
for item in raw:
|
|
1519
|
+
if not isinstance(item, dict):
|
|
1520
|
+
continue
|
|
1521
|
+
source = str(item.get("source", "source") or "source")
|
|
1522
|
+
url = str(item.get("url", "") or "")
|
|
1523
|
+
adapter = str(item.get("adapter", "playwrightChromium") or "playwrightChromium")
|
|
1524
|
+
statusCode = _safeInt(item.get("statusCode", 0))
|
|
1525
|
+
title = str(item.get("title", "") or "")[:180]
|
|
1526
|
+
text = re.sub(r"\s+", " ", str(item.get("textSample", item.get("text", "")) or "")).strip()
|
|
1527
|
+
textSample = _redact(text[:8000])
|
|
1528
|
+
snippets = _snippets(f"{title} {textSample}", query)
|
|
1529
|
+
listings = _normalizeListings(item.get("listings", []))
|
|
1530
|
+
search = _normalizeSearch(item.get("search", {}))
|
|
1531
|
+
if not item.get("ok"):
|
|
1532
|
+
observations.append(
|
|
1533
|
+
{
|
|
1534
|
+
"source": source,
|
|
1535
|
+
"url": url,
|
|
1536
|
+
"ok": False,
|
|
1537
|
+
"adapter": adapter,
|
|
1538
|
+
"statusCode": statusCode,
|
|
1539
|
+
"title": title,
|
|
1540
|
+
"error": str(item.get("error", "") or "")[:220],
|
|
1541
|
+
"snippets": snippets,
|
|
1542
|
+
"textSample": textSample,
|
|
1543
|
+
"listings": listings,
|
|
1544
|
+
"search": search,
|
|
1545
|
+
"matched": bool(snippets),
|
|
1546
|
+
}
|
|
1547
|
+
)
|
|
1548
|
+
continue
|
|
1549
|
+
observations.append(
|
|
1550
|
+
{
|
|
1551
|
+
"source": source,
|
|
1552
|
+
"url": url,
|
|
1553
|
+
"ok": True,
|
|
1554
|
+
"adapter": adapter,
|
|
1555
|
+
"statusCode": statusCode,
|
|
1556
|
+
"title": title,
|
|
1557
|
+
"snippets": snippets,
|
|
1558
|
+
"textSample": textSample,
|
|
1559
|
+
"listings": listings,
|
|
1560
|
+
"search": search,
|
|
1561
|
+
"matched": bool(snippets),
|
|
1562
|
+
}
|
|
1563
|
+
)
|
|
1564
|
+
return observations
|
|
1565
|
+
|
|
1566
|
+
|
|
1567
|
+
def _safeInt(value: Any) -> int:
|
|
1568
|
+
try:
|
|
1569
|
+
return int(value or 0)
|
|
1570
|
+
except Exception:
|
|
1571
|
+
return 0
|
|
1572
|
+
|
|
1573
|
+
|
|
1574
|
+
def _safeFloat(value: Any) -> float:
|
|
1575
|
+
try:
|
|
1576
|
+
return float(value or 0.0)
|
|
1577
|
+
except Exception:
|
|
1578
|
+
return 0.0
|
|
1579
|
+
|
|
1580
|
+
|
|
1581
|
+
def _normalizeListings(raw: Any) -> list[dict[str, Any]]:
|
|
1582
|
+
if not isinstance(raw, list):
|
|
1583
|
+
return []
|
|
1584
|
+
listings = []
|
|
1585
|
+
for item in raw:
|
|
1586
|
+
if not isinstance(item, dict):
|
|
1587
|
+
continue
|
|
1588
|
+
title = re.sub(r"\s+", " ", str(item.get("title", "") or "")).strip()
|
|
1589
|
+
priceText = re.sub(r"\s+", " ", str(item.get("priceText", "") or "")).strip()
|
|
1590
|
+
url = str(item.get("url", "") or "").strip()
|
|
1591
|
+
textSample = re.sub(r"\s+", " ", str(item.get("textSample", "") or "")).strip()
|
|
1592
|
+
if not priceText:
|
|
1593
|
+
priceText = _listingPriceText(textSample)
|
|
1594
|
+
if not title:
|
|
1595
|
+
title = _listingTitleText(textSample, priceText)
|
|
1596
|
+
if not title and not priceText:
|
|
1597
|
+
continue
|
|
1598
|
+
listings.append(
|
|
1599
|
+
{
|
|
1600
|
+
"title": _redact(title[:220]),
|
|
1601
|
+
"priceText": _redact(priceText[:120]),
|
|
1602
|
+
"url": url[:600],
|
|
1603
|
+
"textSample": _redact(textSample[:900]),
|
|
1604
|
+
}
|
|
1605
|
+
)
|
|
1606
|
+
return listings[:18]
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
def _listingTitleText(text: str, priceText: str = "") -> str:
|
|
1610
|
+
raw = re.sub(r"\s+", " ", str(text or "")).strip()
|
|
1611
|
+
if not raw:
|
|
1612
|
+
return ""
|
|
1613
|
+
if priceText:
|
|
1614
|
+
priceAt = raw.find(priceText)
|
|
1615
|
+
if priceAt > 0:
|
|
1616
|
+
raw = raw[:priceAt]
|
|
1617
|
+
raw = re.sub(r"^(?:NEW LOW PRICE|SPONSORED|Shop on eBay|Brand New)+", "", raw, flags=re.IGNORECASE).strip()
|
|
1618
|
+
for marker in (
|
|
1619
|
+
"Pre-Owned",
|
|
1620
|
+
"Open Box",
|
|
1621
|
+
"Brand New",
|
|
1622
|
+
"New Other",
|
|
1623
|
+
"Buy It Now",
|
|
1624
|
+
"or Best Offer",
|
|
1625
|
+
"out of 5 stars",
|
|
1626
|
+
"product ratings",
|
|
1627
|
+
):
|
|
1628
|
+
index = raw.lower().find(marker.lower())
|
|
1629
|
+
if index > 8:
|
|
1630
|
+
raw = raw[:index]
|
|
1631
|
+
break
|
|
1632
|
+
raw = re.sub(r"\s+", " ", raw).strip(" -·")
|
|
1633
|
+
return _redact(raw[:220])
|
|
1634
|
+
|
|
1635
|
+
|
|
1636
|
+
def _listingPriceText(text: str) -> str:
|
|
1637
|
+
raw = str(text or "")
|
|
1638
|
+
patterns = (
|
|
1639
|
+
r"(?:USD|SEK|NGN|NOK|EUR|GBP|\$|€|£|₦)\s*[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?",
|
|
1640
|
+
r"[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?\s*(?:kr|kronor|sek|usd|dollars?|ngn|naira|€|£|₦)",
|
|
1641
|
+
)
|
|
1642
|
+
for pattern in patterns:
|
|
1643
|
+
match = re.search(pattern, raw, flags=re.IGNORECASE)
|
|
1644
|
+
if match:
|
|
1645
|
+
value = re.sub(r"\s+", " ", match.group(0)).strip()
|
|
1646
|
+
return value[:120]
|
|
1647
|
+
return ""
|
|
1648
|
+
|
|
1649
|
+
|
|
1650
|
+
def _normalizeSearch(raw: Any) -> dict[str, Any]:
|
|
1651
|
+
raw = raw if isinstance(raw, dict) else {}
|
|
1652
|
+
return {
|
|
1653
|
+
"mode": str(raw.get("mode", "") or ""),
|
|
1654
|
+
"query": _redact(str(raw.get("query", "") or "")[:220]),
|
|
1655
|
+
"submitted": bool(raw.get("submitted")),
|
|
1656
|
+
"inputSelector": str(raw.get("inputSelector", "") or "")[:160],
|
|
1657
|
+
"entryUrl": str(raw.get("entryUrl", "") or "")[:600],
|
|
1658
|
+
"fallbackUrl": str(raw.get("fallbackUrl", "") or "")[:600],
|
|
1659
|
+
"fallbackReason": str(raw.get("fallbackReason", "") or "")[:220],
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
def _offersFromObservations(observations: list[dict[str, Any]], query: str, entities: dict[str, Any], *, budget: dict[str, Any], fxRates: dict[str, float]) -> list[dict[str, Any]]:
|
|
1664
|
+
offers: list[dict[str, Any]] = []
|
|
1665
|
+
itemId = str(entities.get("item", "") or _item(query) or "requestedItem")
|
|
1666
|
+
used = "used" in query.lower() or "second hand" in query.lower()
|
|
1667
|
+
budgetUsd = float(budget.get("usd", 0.0) or 0.0)
|
|
1668
|
+
for index, observation in enumerate(observations, start=1):
|
|
1669
|
+
if not observation.get("ok"):
|
|
1670
|
+
continue
|
|
1671
|
+
defaultCurrency = _sourceCurrency(str(observation.get("source", "") or ""), str(entities.get("country", "") or ""))
|
|
1672
|
+
filteredCount = 0
|
|
1673
|
+
listings = observation.get("listings", []) if isinstance(observation.get("listings"), list) else []
|
|
1674
|
+
for listingIndex, listing in enumerate(listings, start=1):
|
|
1675
|
+
if not _listingMatches(listing, itemId, query):
|
|
1676
|
+
continue
|
|
1677
|
+
text = " ".join(str(listing.get(key, "") or "") for key in ("title", "priceText", "textSample"))
|
|
1678
|
+
quotes = _priceQuotes(str(listing.get("priceText", "") or text), defaultCurrency, fxRates)
|
|
1679
|
+
if not quotes:
|
|
1680
|
+
continue
|
|
1681
|
+
offerQuote = min(quotes, key=lambda item: item["usd"])
|
|
1682
|
+
listedQuote = max(quotes, key=lambda item: item["usd"])
|
|
1683
|
+
offer = float(offerQuote["usd"])
|
|
1684
|
+
listed = float(listedQuote["usd"])
|
|
1685
|
+
if listed <= offer:
|
|
1686
|
+
listed = offer
|
|
1687
|
+
if budgetUsd > 0 and offer > budgetUsd:
|
|
1688
|
+
filteredCount += 1
|
|
1689
|
+
continue
|
|
1690
|
+
sourceUrl = str(listing.get("url", "") or observation.get("url", "") or "")
|
|
1691
|
+
sourceKey = "|".join(
|
|
1692
|
+
[
|
|
1693
|
+
sourceUrl,
|
|
1694
|
+
str(listingIndex),
|
|
1695
|
+
str(listing.get("title", "") or ""),
|
|
1696
|
+
str(listing.get("priceText", "") or ""),
|
|
1697
|
+
str(offerQuote.get("currency", "")),
|
|
1698
|
+
str(offerQuote.get("amount", "")),
|
|
1699
|
+
]
|
|
1700
|
+
)
|
|
1701
|
+
digest = hashlib.sha256(sourceKey.encode("utf-8")).hexdigest()[:12]
|
|
1702
|
+
offers.append(
|
|
1703
|
+
{
|
|
1704
|
+
"sourceId": f"browserSource_{digest}",
|
|
1705
|
+
"itemId": itemId,
|
|
1706
|
+
"listUsd": round(listed, 2),
|
|
1707
|
+
"offerUsd": round(offer, 2),
|
|
1708
|
+
"feesUsd": 0.0,
|
|
1709
|
+
"stock": 1,
|
|
1710
|
+
"meta": {
|
|
1711
|
+
"condition": "used" if used else "unknown",
|
|
1712
|
+
"reader": "browserReader",
|
|
1713
|
+
"sourceUrl": sourceUrl,
|
|
1714
|
+
"listingIndex": listingIndex,
|
|
1715
|
+
"source": str(observation.get("source", "") or ""),
|
|
1716
|
+
"listingTitle": str(listing.get("title", "") or "")[:220],
|
|
1717
|
+
"price": {
|
|
1718
|
+
"amount": round(float(offerQuote["amount"]), 2),
|
|
1719
|
+
"currency": offerQuote["currency"],
|
|
1720
|
+
"usd": round(offer, 2),
|
|
1721
|
+
"fxRate": offerQuote["rate"],
|
|
1722
|
+
},
|
|
1723
|
+
"budget": budget,
|
|
1724
|
+
"sourceBound": True,
|
|
1725
|
+
},
|
|
1726
|
+
}
|
|
1727
|
+
)
|
|
1728
|
+
if filteredCount:
|
|
1729
|
+
observation["filteredListingCount"] = int(observation.get("filteredListingCount", 0) or 0) + filteredCount
|
|
1730
|
+
if listings:
|
|
1731
|
+
continue
|
|
1732
|
+
if str(observation.get("source", "") or "") != "clientUrl":
|
|
1733
|
+
continue
|
|
1734
|
+
text = " ".join(
|
|
1735
|
+
[
|
|
1736
|
+
str(observation.get("title", "") or ""),
|
|
1737
|
+
" ".join(str(item) for item in observation.get("snippets", []) if item),
|
|
1738
|
+
str(observation.get("textSample", "") or ""),
|
|
1739
|
+
]
|
|
1740
|
+
)
|
|
1741
|
+
quotes = _priceQuotes(text, defaultCurrency, fxRates)
|
|
1742
|
+
if not quotes:
|
|
1743
|
+
continue
|
|
1744
|
+
offerQuote = min(quotes, key=lambda item: item["usd"])
|
|
1745
|
+
listedQuote = max(quotes, key=lambda item: item["usd"])
|
|
1746
|
+
offer = float(offerQuote["usd"])
|
|
1747
|
+
listed = float(listedQuote["usd"])
|
|
1748
|
+
if listed <= offer:
|
|
1749
|
+
listed = offer
|
|
1750
|
+
if budgetUsd > 0 and offer > budgetUsd:
|
|
1751
|
+
observation["filteredListingCount"] = int(observation.get("filteredListingCount", 0) or 0) + 1
|
|
1752
|
+
continue
|
|
1753
|
+
fallbackKey = "|".join([str(observation.get("url", f"source_{index}") or f"source_{index}"), str(index), str(offerQuote.get("currency", "")), str(offerQuote.get("amount", ""))])
|
|
1754
|
+
digest = hashlib.sha256(fallbackKey.encode("utf-8")).hexdigest()[:12]
|
|
1755
|
+
offers.append(
|
|
1756
|
+
{
|
|
1757
|
+
"sourceId": f"browserSource_{digest}",
|
|
1758
|
+
"itemId": itemId,
|
|
1759
|
+
"listUsd": round(listed, 2),
|
|
1760
|
+
"offerUsd": round(offer, 2),
|
|
1761
|
+
"feesUsd": 0.0,
|
|
1762
|
+
"stock": 1 if re.search(r"\b(?:available|in stock|stock|listing)\b", text.lower()) else 0,
|
|
1763
|
+
"meta": {
|
|
1764
|
+
"condition": "used" if used else "unknown",
|
|
1765
|
+
"reader": "browserReader",
|
|
1766
|
+
"source": str(observation.get("source", "") or ""),
|
|
1767
|
+
"listingTitle": str(observation.get("title", "") or "")[:220],
|
|
1768
|
+
"sourceUrl": str(observation.get("url", "") or ""),
|
|
1769
|
+
"price": {
|
|
1770
|
+
"amount": round(float(offerQuote["amount"]), 2),
|
|
1771
|
+
"currency": offerQuote["currency"],
|
|
1772
|
+
"usd": round(offer, 2),
|
|
1773
|
+
"fxRate": offerQuote["rate"],
|
|
1774
|
+
},
|
|
1775
|
+
"budget": budget,
|
|
1776
|
+
"sourceBound": True,
|
|
1777
|
+
},
|
|
1778
|
+
}
|
|
1779
|
+
)
|
|
1780
|
+
return offers[:10]
|
|
1781
|
+
|
|
1782
|
+
|
|
1783
|
+
def _listingMatches(listing: dict[str, Any], itemId: str, query: str) -> bool:
|
|
1784
|
+
text = " ".join(str(listing.get(key, "") or "") for key in ("title", "textSample")).lower()
|
|
1785
|
+
compact = re.sub(r"[^a-z0-9]+", "", text)
|
|
1786
|
+
if itemId and itemId != "requestedItem":
|
|
1787
|
+
itemCompact = re.sub(r"[^a-z0-9]+", "", itemId.lower())
|
|
1788
|
+
if itemCompact and itemCompact in compact:
|
|
1789
|
+
return True
|
|
1790
|
+
tokens = [
|
|
1791
|
+
token
|
|
1792
|
+
for token in re.findall(r"[a-z0-9]+", query.lower())
|
|
1793
|
+
if len(token) > 2 and token not in {"buy", "used", "less", "than", "under", "dollar", "usd", "best", "find"}
|
|
1794
|
+
]
|
|
1795
|
+
return bool(tokens and any(token in text for token in tokens[:6]))
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
def _priceQuotes(text: str, defaultCurrency: str, fxRates: dict[str, float]) -> list[dict[str, Any]]:
|
|
1799
|
+
raw = re.sub(r"\s+", " ", str(text or "")).strip()
|
|
1800
|
+
if not raw:
|
|
1801
|
+
return []
|
|
1802
|
+
amountPattern = r"[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?"
|
|
1803
|
+
tokenPattern = r"USD|SEK|NGN|NOK|EUR|GBP|dollars?|usd|sek|ngn|kr|krona|naira|\$|€|£|₦"
|
|
1804
|
+
matches: list[tuple[str, str]] = []
|
|
1805
|
+
for token, amount in re.findall(rf"({tokenPattern})\s*({amountPattern})", raw, flags=re.IGNORECASE):
|
|
1806
|
+
matches.append((amount, token))
|
|
1807
|
+
for amount, token in re.findall(rf"({amountPattern})\s*({tokenPattern})", raw, flags=re.IGNORECASE):
|
|
1808
|
+
matches.append((amount, token))
|
|
1809
|
+
quotes = []
|
|
1810
|
+
seen = set()
|
|
1811
|
+
for amountRaw, token in matches:
|
|
1812
|
+
amount = _parseAmount(amountRaw)
|
|
1813
|
+
currency = _currencyFromToken(token, defaultCurrency)
|
|
1814
|
+
if amount <= 0 or currency not in fxRates:
|
|
1815
|
+
continue
|
|
1816
|
+
key = (currency, round(amount, 2))
|
|
1817
|
+
if key in seen:
|
|
1818
|
+
continue
|
|
1819
|
+
seen.add(key)
|
|
1820
|
+
rate = float(fxRates.get(currency, 0.0) or 0.0)
|
|
1821
|
+
usd = _toUsd(amount, currency, fxRates)
|
|
1822
|
+
if usd <= 0 or usd > 1_000_000:
|
|
1823
|
+
continue
|
|
1824
|
+
quotes.append({"amount": amount, "currency": currency, "usd": round(usd, 2), "rate": rate})
|
|
1825
|
+
return quotes[:12]
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
def _parseAmount(value: str) -> float:
|
|
1829
|
+
clean = re.sub(r"\s+", "", str(value or ""))
|
|
1830
|
+
if "," in clean and "." in clean:
|
|
1831
|
+
if clean.rfind(",") > clean.rfind("."):
|
|
1832
|
+
clean = clean.replace(".", "").replace(",", ".")
|
|
1833
|
+
else:
|
|
1834
|
+
clean = clean.replace(",", "")
|
|
1835
|
+
elif "," in clean:
|
|
1836
|
+
parts = clean.split(",")
|
|
1837
|
+
clean = "".join(parts) if len(parts[-1]) == 3 else ".".join(parts)
|
|
1838
|
+
elif "." in clean:
|
|
1839
|
+
parts = clean.split(".")
|
|
1840
|
+
clean = "".join(parts) if len(parts[-1]) == 3 and len(parts) > 1 else clean
|
|
1841
|
+
try:
|
|
1842
|
+
return float(clean)
|
|
1843
|
+
except Exception:
|
|
1844
|
+
return 0.0
|
|
1845
|
+
|
|
1846
|
+
|
|
1847
|
+
def _currencyFromToken(token: str, defaultCurrency: str) -> str:
|
|
1848
|
+
lowered = str(token or "").strip().lower()
|
|
1849
|
+
if lowered in {"$", "usd", "dollar", "dollars"}:
|
|
1850
|
+
return "USD"
|
|
1851
|
+
if lowered in {"sek", "kr", "krona"}:
|
|
1852
|
+
return "SEK"
|
|
1853
|
+
if lowered in {"ngn", "₦", "naira"}:
|
|
1854
|
+
return "NGN"
|
|
1855
|
+
if lowered == "eur" or lowered == "€":
|
|
1856
|
+
return "EUR"
|
|
1857
|
+
if lowered == "gbp" or lowered == "£":
|
|
1858
|
+
return "GBP"
|
|
1859
|
+
return defaultCurrency.upper() or "USD"
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
def _toUsd(amount: float, currency: str, fxRates: dict[str, float]) -> float:
|
|
1863
|
+
currency = currency.upper() or "USD"
|
|
1864
|
+
rate = float(fxRates.get(currency, 0.0) or 0.0)
|
|
1865
|
+
if currency == "USD":
|
|
1866
|
+
return amount
|
|
1867
|
+
if rate <= 0:
|
|
1868
|
+
return 0.0
|
|
1869
|
+
return amount / rate
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def _sourceCurrency(source: str, country: str) -> str:
|
|
1873
|
+
source = source.lower()
|
|
1874
|
+
country = country.lower()
|
|
1875
|
+
if source in {"blocket", "tradera", "hemnet"} or country == "se":
|
|
1876
|
+
return "SEK"
|
|
1877
|
+
if source in {"jumia", "konga", "jiji", "propertypro"} or country == "ng":
|
|
1878
|
+
return "NGN"
|
|
1879
|
+
return "USD"
|
|
1880
|
+
|
|
1881
|
+
|
|
1882
|
+
def _item(query: str) -> str:
|
|
1883
|
+
serviceEntity = _serviceEntity(query, {})
|
|
1884
|
+
if _isLocalServiceQuery(query) and serviceEntity:
|
|
1885
|
+
return serviceEntity
|
|
1886
|
+
match = re.search(r"\b(i\s*phone\s*\d+|iphone\s*\d+|tv|laptop|car|apartment|house|property|mechanic|plumber|electrician|repair|garage|workshop)\b", query, re.IGNORECASE)
|
|
1887
|
+
if match:
|
|
1888
|
+
return re.sub(r"\s+", "", match.group(1).lower())
|
|
1889
|
+
subject = re.search(
|
|
1890
|
+
r"\b(?:buy|find|get|need|locate|search(?: for)?|shop(?: for)?|source)\s+(?:a|an|the|my|aa)?\s*(.+?)(?:\s+(?:under|less than|below|budget|for|within)\b|\s+(?:in|near|around)\s+[A-Z][A-Za-z]+|$)",
|
|
1891
|
+
query,
|
|
1892
|
+
re.IGNORECASE,
|
|
1893
|
+
)
|
|
1894
|
+
if subject:
|
|
1895
|
+
clean = re.sub(r"\b(?:used|best|cheap|affordable|new)\b", " ", subject.group(1), flags=re.IGNORECASE)
|
|
1896
|
+
clean = re.sub(r"[^A-Za-z0-9 ]+", " ", clean)
|
|
1897
|
+
clean = re.sub(r"\s+", " ", clean).strip().lower()
|
|
1898
|
+
if clean:
|
|
1899
|
+
return clean[:80]
|
|
1900
|
+
return ""
|
|
1901
|
+
|
|
1902
|
+
|
|
1903
|
+
def _budget(query: str, payload: dict[str, Any]) -> float:
|
|
1904
|
+
return float(_budgetContext(query, payload, _fxRates(payload)).get("usd", 0.0) or 0.0)
|
|
1905
|
+
|
|
1906
|
+
|
|
1907
|
+
def _budgetContext(query: str, payload: dict[str, Any], fxRates: dict[str, float]) -> dict[str, Any]:
|
|
1908
|
+
raw = payload.get("budget", payload.get("maxSpend", 0))
|
|
1909
|
+
payloadCurrency = str(payload.get("currency", payload.get("budgetCurrency", "")) or "").upper()
|
|
1910
|
+
if isinstance(raw, dict):
|
|
1911
|
+
amount = _parseAmount(str(raw.get("amount", raw.get("value", 0)) or "0"))
|
|
1912
|
+
currency = str(raw.get("currency", payloadCurrency or "USD") or "USD").upper()
|
|
1913
|
+
usd = _toUsd(amount, currency, fxRates)
|
|
1914
|
+
return _budgetMap(amount, currency, usd, fxRates)
|
|
1915
|
+
try:
|
|
1916
|
+
if raw:
|
|
1917
|
+
amount = float(raw)
|
|
1918
|
+
currency = payloadCurrency or _budgetCurrency(query, payload) or "USD"
|
|
1919
|
+
usd = _toUsd(amount, currency, fxRates)
|
|
1920
|
+
return _budgetMap(amount, currency, usd, fxRates)
|
|
1921
|
+
except Exception:
|
|
1922
|
+
pass
|
|
1923
|
+
patterns = (
|
|
1924
|
+
r"(?:under|less than|below|budget|for)\s*(?P<prefix>\$|USD|SEK|NGN|₦)?\s*(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)\s*(?P<suffix>dollars?|usd|sek|kr|krona|ngn|naira)?",
|
|
1925
|
+
r"(?P<prefix>\$|USD|SEK|NGN|₦)\s*(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)",
|
|
1926
|
+
r"\b(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)\s*(?P<suffix>dollars?|usd|sek|kr|krona|ngn|naira)\b",
|
|
1927
|
+
)
|
|
1928
|
+
for pattern in patterns:
|
|
1929
|
+
match = re.search(pattern, query, re.IGNORECASE)
|
|
1930
|
+
if not match:
|
|
1931
|
+
continue
|
|
1932
|
+
amount = _parseAmount(match.group("amount"))
|
|
1933
|
+
token = match.groupdict().get("prefix") or match.groupdict().get("suffix") or payloadCurrency or "USD"
|
|
1934
|
+
currency = _currencyFromToken(token, payloadCurrency or "USD")
|
|
1935
|
+
usd = _toUsd(amount, currency, fxRates)
|
|
1936
|
+
return _budgetMap(amount, currency, usd, fxRates)
|
|
1937
|
+
return _budgetMap(0.0, payloadCurrency or _budgetCurrency(query, payload) or "USD", 0.0, fxRates)
|
|
1938
|
+
|
|
1939
|
+
|
|
1940
|
+
def _budgetCurrency(query: str, payload: dict[str, Any]) -> str:
|
|
1941
|
+
explicit = str(payload.get("currency", payload.get("budgetCurrency", "")) or "").upper()
|
|
1942
|
+
if explicit:
|
|
1943
|
+
return explicit
|
|
1944
|
+
lowered = query.lower()
|
|
1945
|
+
if "$" in query or "usd" in lowered or "dollar" in lowered:
|
|
1946
|
+
return "USD"
|
|
1947
|
+
if "₦" in query or "ngn" in lowered or "naira" in lowered:
|
|
1948
|
+
return "NGN"
|
|
1949
|
+
if re.search(r"\b(?:sek|kr|krona)\b", lowered):
|
|
1950
|
+
return "SEK"
|
|
1951
|
+
return "USD"
|
|
1952
|
+
|
|
1953
|
+
|
|
1954
|
+
def _budgetMap(amount: float, currency: str, usd: float, fxRates: dict[str, float]) -> dict[str, Any]:
|
|
1955
|
+
currency = currency.upper() or "USD"
|
|
1956
|
+
nativeMax = amount
|
|
1957
|
+
return {
|
|
1958
|
+
"amount": round(amount, 2),
|
|
1959
|
+
"currency": currency,
|
|
1960
|
+
"usd": round(usd, 2),
|
|
1961
|
+
"fxRate": float(fxRates.get(currency, 1.0) or 1.0),
|
|
1962
|
+
"nativeMax": round(nativeMax, 2),
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
def _fxRates(payload: dict[str, Any]) -> dict[str, float]:
|
|
1967
|
+
rates = {key: float(value) for key, value in DEFAULT_FX_RATES.items()}
|
|
1968
|
+
raw = payload.get("fxRates", payload.get("exchangeRates", {}))
|
|
1969
|
+
if isinstance(raw, dict):
|
|
1970
|
+
for key, value in raw.items():
|
|
1971
|
+
try:
|
|
1972
|
+
rate = float(value)
|
|
1973
|
+
except Exception:
|
|
1974
|
+
continue
|
|
1975
|
+
if rate <= 0:
|
|
1976
|
+
continue
|
|
1977
|
+
normalized = str(key).upper().replace("USD_TO_", "").replace("USD_", "")
|
|
1978
|
+
if normalized in {"USD", "SEK", "NGN", "EUR", "GBP", "NOK"}:
|
|
1979
|
+
rates[normalized] = rate
|
|
1980
|
+
return rates
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
def _target(query: str) -> str:
|
|
1984
|
+
domainMerchant = re.search(r"\b(?:to|from|with|at)\s*([A-Z]?[A-Za-z0-9-]+\.(?:se|com|net|org|io|co|ng|uk))\b", query, re.IGNORECASE)
|
|
1985
|
+
if domainMerchant:
|
|
1986
|
+
return domainMerchant.group(1).strip()
|
|
1987
|
+
match = re.search(r"\bfrom\s+([A-Z][A-Za-z0-9.& -]{2,40})", query)
|
|
1988
|
+
return match.group(1).strip() if match else ""
|
|
1989
|
+
|
|
1990
|
+
|
|
1991
|
+
def _phone(query: str, payload: dict[str, Any]) -> str:
|
|
1992
|
+
raw = str(payload.get("phone", "") or "")
|
|
1993
|
+
if raw:
|
|
1994
|
+
return raw
|
|
1995
|
+
match = re.search(r"\+?\d[\d\s().-]{7,}\d", query)
|
|
1996
|
+
return match.group(0).strip() if match else ""
|
|
1997
|
+
|
|
1998
|
+
|
|
1999
|
+
def _email(query: str) -> str:
|
|
2000
|
+
match = re.search(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", query, flags=re.IGNORECASE)
|
|
2001
|
+
return match.group(0) if match else ""
|
|
2002
|
+
|
|
2003
|
+
|
|
2004
|
+
def _market(query: str) -> str:
|
|
2005
|
+
lowered = query.lower()
|
|
2006
|
+
if _isLocalServiceQuery(query):
|
|
2007
|
+
return "localService"
|
|
2008
|
+
if any(token in lowered for token in ("property", "apartment", "house", "rent", "buy home")):
|
|
2009
|
+
return "property"
|
|
2010
|
+
if any(token in lowered for token in ("buy", "shop", "shopping", "iphone", "used", "dollar")):
|
|
2011
|
+
return "shopping"
|
|
2012
|
+
return ""
|
|
2013
|
+
|
|
2014
|
+
|
|
2015
|
+
def _country(query: str, payload: dict[str, Any]) -> str:
|
|
2016
|
+
raw = " ".join(
|
|
2017
|
+
str(payload.get(key, "") or "")
|
|
2018
|
+
for key in ("country", "region", "locale", "market", "location")
|
|
2019
|
+
).lower()
|
|
2020
|
+
lowered = f"{query.lower()} {raw}"
|
|
2021
|
+
if any(token in lowered for token in ("sweden", "swedish", "stockholm", "goteborg", "gothenburg", "malmo", "backebol", "power.se", " se ", "sv-se")):
|
|
2022
|
+
return "se"
|
|
2023
|
+
if any(token in lowered for token in ("nigeria", "lagos", "abuja", "naira", "ngn", " ng ", "en-ng")):
|
|
2024
|
+
return "ng"
|
|
2025
|
+
if any(token in lowered for token in ("united kingdom", "england", "london", " uk ", "en-gb")):
|
|
2026
|
+
return "uk"
|
|
2027
|
+
if any(token in lowered for token in ("united states", "america", "usa", " us ", "en-us")):
|
|
2028
|
+
return "us"
|
|
2029
|
+
compact = raw.replace("_", "-")
|
|
2030
|
+
if compact in {"se", "sv-se"}:
|
|
2031
|
+
return "se"
|
|
2032
|
+
if compact in {"ng", "en-ng"}:
|
|
2033
|
+
return "ng"
|
|
2034
|
+
if compact in {"uk", "gb", "en-gb"}:
|
|
2035
|
+
return "uk"
|
|
2036
|
+
if compact in {"us", "usa", "en-us"}:
|
|
2037
|
+
return "us"
|
|
2038
|
+
return ""
|
|
2039
|
+
|
|
2040
|
+
|
|
2041
|
+
def _region(query: str, payload: dict[str, Any]) -> str:
|
|
2042
|
+
country = _country(query, payload)
|
|
2043
|
+
return country.upper() if country else str(payload.get("region", "") or "")
|
|
2044
|
+
|
|
2045
|
+
|
|
2046
|
+
def _redact(text: str) -> str:
|
|
2047
|
+
masked = re.sub(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", "[email]", str(text or ""), flags=re.IGNORECASE)
|
|
2048
|
+
return re.sub(r"\+?\b(?:\d[\s().-]?){8,15}\d\b", "[phone]", masked)
|