patchr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. apps/__init__.py +2 -0
  2. apps/api/__init__.py +2 -0
  3. apps/api/main.py +652 -0
  4. apps/benchmarks/__init__.py +1 -0
  5. apps/benchmarks/main.py +20 -0
  6. apps/sandbox/__init__.py +1 -0
  7. apps/sandbox/main.py +20 -0
  8. apps/worker/__init__.py +2 -0
  9. apps/worker/main.py +15 -0
  10. apps/worker/verify.py +14 -0
  11. patchr/__init__.py +12 -0
  12. patchr/sdk/__init__.py +20 -0
  13. patchr/sdk/client.py +12 -0
  14. patchr-0.1.0.dist-info/METADATA +137 -0
  15. patchr-0.1.0.dist-info/RECORD +116 -0
  16. patchr-0.1.0.dist-info/WHEEL +5 -0
  17. patchr-0.1.0.dist-info/entry_points.txt +5 -0
  18. patchr-0.1.0.dist-info/licenses/LICENSE +17 -0
  19. patchr-0.1.0.dist-info/top_level.txt +3 -0
  20. picux/__init__.py +6 -0
  21. picux/agents/__init__.py +5 -0
  22. picux/agents/registry.py +204 -0
  23. picux/api/__init__.py +5 -0
  24. picux/api/service.py +5075 -0
  25. picux/audit/__init__.py +31 -0
  26. picux/audit/activity.py +97 -0
  27. picux/audit/observability.py +55 -0
  28. picux/audit/verification/__init__.py +21 -0
  29. picux/audit/verification/ledger.py +633 -0
  30. picux/benchmarks/__init__.py +5 -0
  31. picux/benchmarks/local.py +286 -0
  32. picux/config.py +140 -0
  33. picux/contracts/__init__.py +22 -0
  34. picux/contracts/handshake.py +122 -0
  35. picux/contracts/integration.py +385 -0
  36. picux/contracts/openapi.py +187 -0
  37. picux/contracts/protocol_map.py +152 -0
  38. picux/contracts/routes.py +980 -0
  39. picux/contracts/schema_catalog.py +125 -0
  40. picux/core/__init__.py +17 -0
  41. picux/core/models.py +148 -0
  42. picux/core/router.py +131 -0
  43. picux/core/runtime.py +42 -0
  44. picux/core/state_machine.py +38 -0
  45. picux/domains/__init__.py +2 -0
  46. picux/domains/bridge/HostRun.py +1104 -0
  47. picux/domains/bridge/__init__.py +6 -0
  48. picux/domains/bridge/engine.py +345 -0
  49. picux/domains/hunt/__init__.py +6 -0
  50. picux/domains/hunt/engine.py +307 -0
  51. picux/domains/hunt/models.py +88 -0
  52. picux/domains/pay/__init__.py +16 -0
  53. picux/domains/pay/adapters.py +607 -0
  54. picux/domains/pay/engine.py +950 -0
  55. picux/domains/pay/models.py +95 -0
  56. picux/domains/proxy/__init__.py +5 -0
  57. picux/domains/proxy/engine.py +466 -0
  58. picux/domains/resolve/__init__.py +5 -0
  59. picux/domains/resolve/engine.py +546 -0
  60. picux/orchestrator/__init__.py +3 -0
  61. picux/orchestrator/engine.py +2840 -0
  62. picux/portals/__init__.py +17 -0
  63. picux/portals/templates.py +272 -0
  64. picux/protocols/__init__.py +1 -0
  65. picux/protocols/a2a/__init__.py +6 -0
  66. picux/protocols/a2a/client.py +51 -0
  67. picux/protocols/a2a/envelope.py +132 -0
  68. picux/protocols/mcp/__init__.py +7 -0
  69. picux/protocols/mcp/client.py +69 -0
  70. picux/protocols/mcp/contract.py +67 -0
  71. picux/protocols/mcp/server.py +76 -0
  72. picux/sandbox/__init__.py +6 -0
  73. picux/sandbox/midnight_arbitrage.py +215 -0
  74. picux/sandbox/models.py +90 -0
  75. picux/sdk/__init__.py +13 -0
  76. picux/sdk/client.py +768 -0
  77. picux/sdk/external.py +245 -0
  78. picux/security/__init__.py +18 -0
  79. picux/security/auth.py +86 -0
  80. picux/security/config_validator.py +58 -0
  81. picux/security/policy.py +158 -0
  82. picux/security/secrets.py +144 -0
  83. picux/signals/__init__.py +1 -0
  84. picux/signals/community/__init__.py +24 -0
  85. picux/signals/community/adapters/__init__.py +7 -0
  86. picux/signals/community/adapters/reddit.py +37 -0
  87. picux/signals/community/adapters/shopify.py +23 -0
  88. picux/signals/community/adapters/web.py +23 -0
  89. picux/signals/community/disambiguation.py +51 -0
  90. picux/signals/community/intake.py +227 -0
  91. picux/signals/community/models.py +102 -0
  92. picux/signals/community/rules.py +91 -0
  93. picux/signals/community/scoring.py +64 -0
  94. picux/storage/__init__.py +41 -0
  95. picux/storage/agents.py +50 -0
  96. picux/storage/cases.py +440 -0
  97. picux/storage/channels.py +476 -0
  98. picux/storage/connectors.py +411 -0
  99. picux/storage/envelopes.py +137 -0
  100. picux/storage/escrows.py +168 -0
  101. picux/storage/events.py +989 -0
  102. picux/storage/keyspace.py +60 -0
  103. picux/storage/mandates.py +107 -0
  104. picux/storage/portals.py +222 -0
  105. picux/storage/postgres.py +2049 -0
  106. picux/storage/providers.py +148 -0
  107. picux/storage/proxy.py +231 -0
  108. picux/storage/receipts.py +131 -0
  109. picux/storage/signals.py +147 -0
  110. picux/storage/tasks.py +179 -0
  111. picux/tools/__init__.py +11 -0
  112. picux/tools/shared.py +2048 -0
  113. picux/verification/__init__.py +5 -0
  114. picux/verification/rollout.py +183 -0
  115. picux/workflows/__init__.py +5 -0
  116. picux/workflows/templates.py +74 -0
picux/tools/shared.py ADDED
@@ -0,0 +1,2048 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import hashlib
5
+ import html
6
+ import ipaddress
7
+ import json
8
+ import os
9
+ import re
10
+ import shutil
11
+ import subprocess
12
+ import tempfile
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Any
16
+ from urllib.parse import quote_plus, urlencode, unquote_to_bytes, urlparse
17
+ from urllib.request import Request, urlopen
18
+
19
+ from picux.core import PicuxIntentRouter
20
+
21
+
22
+ SHARED_DOMAINS = {"hunt", "bridge", "resolve", "proxy", "pay"}
23
+ DEFAULT_FX_RATES = {
24
+ "USD": 1.0,
25
+ "SEK": 9.207515,
26
+ "NGN": 1359.86,
27
+ }
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class NLPTool:
32
+ """Classify raw user input and detect follow-up context for domain routing."""
33
+
34
+ router: PicuxIntentRouter = field(default_factory=PicuxIntentRouter)
35
+
36
+ def run(self, payload: dict[str, Any]) -> dict[str, Any]:
37
+ query = _query(payload)
38
+ context = payload.get("context", {}) if isinstance(payload.get("context"), dict) else {}
39
+ taskId = str(payload.get("taskId", context.get("taskId", "")) or "")
40
+ decision = self.router.classify(query)
41
+ route = _route(query, seed=decision.domain.value)
42
+ entities = _entities(query, payload)
43
+ isFollowUp = bool(taskId and _looksLikeFollowUp(query))
44
+ return {
45
+ "ok": True,
46
+ "tool": "nlpTool",
47
+ "input": query,
48
+ "intent": {
49
+ "domain": route[0] if route else decision.domain.value,
50
+ "reason": decision.reason,
51
+ "kind": _intentKind(query),
52
+ },
53
+ "route": route,
54
+ "isFollowUp": isFollowUp,
55
+ "followUp": {
56
+ "taskId": taskId,
57
+ "state": str(context.get("state", context.get("status", "")) or ""),
58
+ "reason": "ongoingTaskReference" if isFollowUp else "",
59
+ },
60
+ "entities": entities,
61
+ "confidence": _confidence(query, route, entities),
62
+ }
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class ImageReader:
67
+ """Read image evidence and return query-relevant text signals."""
68
+
69
+ def read(self, payload: dict[str, Any]) -> dict[str, Any]:
70
+ query = _query(payload)
71
+ images = _imageInputs(payload)
72
+ readings = []
73
+ evidence = []
74
+ for index, image in enumerate(images, start=1):
75
+ rawBytes = _imageBytes(image)
76
+ text = _imageText(image, rawBytes)
77
+ digest = hashlib.sha256(rawBytes or text.encode("utf-8")).hexdigest() if rawBytes or text else ""
78
+ matches = _matches(query, text)
79
+ imageId = str(image.get("imageId", image.get("artifactId", image.get("name", f"image_{index}"))) or f"image_{index}")
80
+ reading = {
81
+ "imageId": imageId,
82
+ "name": str(image.get("name", imageId) or imageId),
83
+ "mime": str(image.get("mime", image.get("contentType", "")) or ""),
84
+ "sha256": digest,
85
+ "text": _redact(text[:4000]),
86
+ "matches": matches,
87
+ "summary": _summary(text, matches),
88
+ "sourceBound": bool(digest),
89
+ }
90
+ readings.append(reading)
91
+ evidence.append(
92
+ {
93
+ "kind": "imageText",
94
+ "name": reading["name"],
95
+ "sha256": digest,
96
+ "mime": reading["mime"],
97
+ "label": f"image:{reading['name']}",
98
+ "source": "picux://tools/imageReader",
99
+ "summary": reading["summary"],
100
+ }
101
+ )
102
+ return {
103
+ "ok": True,
104
+ "tool": "imageReader",
105
+ "query": query,
106
+ "count": len(readings),
107
+ "readings": readings,
108
+ "evidence": evidence,
109
+ "status": "read" if readings else "noImages",
110
+ }
111
+
112
+
113
+ @dataclass(frozen=True)
114
+ class BrowserReader:
115
+ """Headless-browser compatible website reader with a safe stdlib fallback."""
116
+
117
+ timeoutSec: int = 6
118
+ renderedTimeoutMs: int = 12000
119
+ renderedCommand: tuple[str, ...] | None = None
120
+
121
+ def read(self, payload: dict[str, Any]) -> dict[str, Any]:
122
+ query = _query(payload)
123
+ payloadEntities = payload.get("entities", {}) if isinstance(payload.get("entities"), dict) else {}
124
+ entities = {**payloadEntities, **_entities(query, payload)}
125
+ fxRates = _fxRates(payload)
126
+ budget = _budgetContext(query, payload, fxRates)
127
+ urls = [str(item).strip() for item in payload.get("urls", []) if str(item).strip()] if isinstance(payload.get("urls"), list) else []
128
+ marketplaceSet = _marketplaceSet(query, entities, payload)
129
+ searchTargets = _searchTargets(query, entities, payload, marketplaceSet=marketplaceSet)
130
+ allowNetwork = bool(payload.get("allowNetwork", payload.get("live", False)))
131
+ suppliedTargets = [{"source": "clientUrl", "url": url, "kind": "supplied"} for url in urls]
132
+ liveTargets = searchTargets if allowNetwork else []
133
+ readTargets = _dedupeTargets([*suppliedTargets, *liveTargets])
134
+ allowPrivateNetwork = bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False)))
135
+ preferRendered = allowNetwork and str(payload.get("browserMode", payload.get("adapter", "auto")) or "auto").lower() not in {"http", "stdlib", "stdlibhttpreader"}
136
+ searchText = str(payload.get("searchText", entities.get("item", "") or query) or "").strip()
137
+ targetLimit = _targetLimit(payload)
138
+ selectedTargets = readTargets[:targetLimit]
139
+ blockedTargets = [
140
+ _blockedUrlObservation(
141
+ str(target.get("url", "") or ""),
142
+ query,
143
+ source=str(target.get("source", "source") or "source"),
144
+ error=blockedReason,
145
+ )
146
+ for target in selectedTargets
147
+ for blockedReason in [_privateTargetBlockReason(str(target.get("url", "") or ""), allowPrivateNetwork=allowPrivateNetwork)]
148
+ if blockedReason
149
+ ]
150
+ safeTargets = [
151
+ target
152
+ for target in selectedTargets
153
+ if not _privateTargetBlockReason(str(target.get("url", "") or ""), allowPrivateNetwork=allowPrivateNetwork)
154
+ ]
155
+ rendered = self._readRendered(safeTargets, query, searchText) if preferRendered and safeTargets else _renderedSkipped("notRequested" if safeTargets else "noTargets")
156
+ observations = [*blockedTargets, *rendered["observations"]]
157
+ adapter = "playwrightChromium" if observations and rendered["meta"].get("available") else "stdlibHttpReader"
158
+ if not rendered["observations"]:
159
+ observations = list(blockedTargets)
160
+ for target in safeTargets:
161
+ observations.append(self._readUrl(str(target.get("url", "")), query, source=str(target.get("source", "source") or "source"), allowPrivateNetwork=allowPrivateNetwork))
162
+ adapter = "stdlibHttpReader"
163
+ offers = _offersFromObservations(observations, query, entities, budget=budget, fxRates=fxRates)
164
+ errors = [item for item in observations if not item.get("ok")]
165
+ if observations and len(errors) == len(observations):
166
+ status = "ioError"
167
+ elif observations:
168
+ status = "read"
169
+ elif searchTargets:
170
+ status = "needsSourceUrls"
171
+ else:
172
+ status = "noSources"
173
+ return {
174
+ "ok": not observations or len(errors) < len(observations),
175
+ "tool": "browserReader",
176
+ "engine": "renderedBrowserCompatible",
177
+ "adapter": adapter,
178
+ "rendered": rendered["meta"],
179
+ "query": query,
180
+ "marketplaceSet": marketplaceSet,
181
+ "budget": budget,
182
+ "fx": {"base": "USD", "rates": fxRates, "mode": "fallbackOrOverride"},
183
+ "searchTargets": searchTargets,
184
+ "observations": observations,
185
+ "offers": offers,
186
+ "errors": errors,
187
+ "marketplaceAttempts": _attemptSummaries(observations),
188
+ "networkAttempted": allowNetwork,
189
+ "status": status,
190
+ }
191
+
192
+ def _readUrl(self, url: str, query: str, *, source: str = "source", allowPrivateNetwork: bool = False) -> dict[str, Any]:
193
+ parsed = urlparse(url)
194
+ if parsed.scheme == "data":
195
+ raw = unquote_to_bytes(parsed.path.split(",", 1)[1] if "," in parsed.path else parsed.path)
196
+ text = raw.decode("utf-8", errors="ignore")
197
+ return _pageObservation(url, text, query, "dataUrl", source=source, statusCode=200)
198
+ if parsed.scheme not in {"http", "https"}:
199
+ return {"source": source, "url": url, "ok": False, "adapter": "http", "error": "unsupportedScheme", "snippets": []}
200
+ blockedReason = _privateTargetBlockReason(url, allowPrivateNetwork=allowPrivateNetwork)
201
+ if blockedReason:
202
+ return _blockedUrlObservation(url, query, source=source, error=blockedReason)
203
+ try:
204
+ request = Request(
205
+ url,
206
+ headers={
207
+ "User-Agent": "Mozilla/5.0 (compatible; PicuxBrowserReader/0.1; +https://picux.ai)",
208
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
209
+ "Accept-Language": "en-US,en;q=0.9,sv;q=0.7",
210
+ },
211
+ )
212
+ with urlopen(request, timeout=self.timeoutSec) as response:
213
+ raw = response.read(500_000)
214
+ statusCode = int(getattr(response, "status", 200) or 200)
215
+ text = raw.decode("utf-8", errors="ignore")
216
+ return _pageObservation(url, text, query, "http", source=source, statusCode=statusCode)
217
+ except Exception as exc:
218
+ return {"source": source, "url": url, "ok": False, "adapter": "http", "statusCode": int(getattr(exc, "code", 0) or 0), "error": str(exc)[:180], "snippets": []}
219
+
220
+ def _readRendered(self, targets: list[dict[str, str]], query: str, searchText: str) -> dict[str, Any]:
221
+ command = self._renderedCommand()
222
+ if not command:
223
+ return _renderedSkipped("missingRenderedCommand")
224
+ payload = {
225
+ "query": query,
226
+ "searchText": searchText,
227
+ "targets": targets,
228
+ "timeoutMs": self.renderedTimeoutMs,
229
+ }
230
+ try:
231
+ completed = subprocess.run(
232
+ list(command),
233
+ input=json.dumps(payload, ensure_ascii=True),
234
+ capture_output=True,
235
+ check=False,
236
+ text=True,
237
+ timeout=max(10, int((self.renderedTimeoutMs / 1000) * max(1, len(targets)) + 8)),
238
+ )
239
+ except Exception as exc:
240
+ return _renderedSkipped("renderedCommandFailed", str(exc)[:220])
241
+ if completed.returncode != 0:
242
+ return _renderedSkipped("renderedCommandNonZero", (completed.stderr or completed.stdout)[:220])
243
+ try:
244
+ result = json.loads(completed.stdout or "{}")
245
+ except Exception as exc:
246
+ return _renderedSkipped("renderedCommandInvalidJson", str(exc)[:220])
247
+ meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
248
+ observations = _normalizeRenderedObservations(result.get("observations", []), query)
249
+ renderedMeta = {
250
+ "attempted": bool(meta.get("attempted", True)),
251
+ "available": bool(meta.get("available", bool(observations))),
252
+ "status": str(meta.get("status", "rendered" if observations else "unavailable") or ""),
253
+ "fallbackReason": str(meta.get("fallbackReason", "") or ""),
254
+ "adapter": str(result.get("adapter", "playwrightChromium") or "playwrightChromium"),
255
+ }
256
+ if meta.get("error"):
257
+ renderedMeta["error"] = str(meta.get("error", "") or "")[:220]
258
+ for key in ("browserProvider", "executablePath", "fallbackReasonDetail"):
259
+ if meta.get(key):
260
+ renderedMeta[key] = str(meta.get(key, "") or "")[:600]
261
+ return {
262
+ "observations": observations,
263
+ "meta": renderedMeta,
264
+ }
265
+
266
+ def _renderedCommand(self) -> tuple[str, ...]:
267
+ if self.renderedCommand:
268
+ return self.renderedCommand
269
+ script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-browser.mjs"
270
+ return ("node", str(script)) if script.exists() else ()
271
+
272
+
273
+ @dataclass(frozen=True)
274
+ class BrowserCheckout:
275
+ """Source-bound browser checkout attempt that fails closed when purchase execution is not proven."""
276
+
277
+ renderedTimeoutMs: int = 20000
278
+ renderedCommand: tuple[str, ...] | None = None
279
+
280
+ def run(self, payload: dict[str, Any]) -> dict[str, Any]:
281
+ url = str(payload.get("url", payload.get("sourceUrl", "")) or "").strip()
282
+ candidateId = str(payload.get("candidateId", payload.get("sourceId", "")) or "")
283
+ source = str(payload.get("source", "") or "")
284
+ execute = bool(payload.get("executePurchase", payload.get("allowPurchaseExecution", False)))
285
+ if not url:
286
+ return self._blocked("missingListingUrl", url=url, candidateId=candidateId, source=source, execute=execute)
287
+ parsed = urlparse(url)
288
+ if not execute:
289
+ return self._blocked("purchaseExecutionNotAuthorized", url=url, candidateId=candidateId, source=source, execute=execute)
290
+ if parsed.scheme in {"file", "about", "javascript"}:
291
+ return self._blocked("unsupportedCheckoutUrlScheme", url=url, candidateId=candidateId, source=source, execute=execute)
292
+ blockedReason = _privateTargetBlockReason(url, allowPrivateNetwork=bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False))))
293
+ if blockedReason:
294
+ return self._blocked(blockedReason, url=url, candidateId=candidateId, source=source, execute=execute)
295
+ if parsed.scheme == "data":
296
+ return self._dataCheckout(url, candidateId=candidateId, source=source, execute=execute)
297
+ rendered = self._runRendered(payload, url=url, candidateId=candidateId, source=source, execute=execute)
298
+ if rendered:
299
+ return rendered
300
+ return self._blocked("marketplaceCheckoutAdapterMissing", url=url, candidateId=candidateId, source=source, execute=execute)
301
+
302
+ def _dataCheckout(self, url: str, *, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
303
+ try:
304
+ raw = unquote_to_bytes(urlparse(url).path.split(",", 1)[1] if "," in urlparse(url).path else urlparse(url).path)
305
+ text = raw.decode("utf-8", errors="ignore")
306
+ except Exception as exc:
307
+ return self._blocked(f"dataCheckoutReadFailed:{str(exc)[:80]}", url=url, candidateId=candidateId, source=source, execute=execute)
308
+ lowered = text.lower()
309
+ if "data-picux-checkout=\"success\"" in lowered or "data-picux-checkout='success'" in lowered or "purchase complete" in lowered or "order confirmed" in lowered:
310
+ receiptId = _stableToolId("purchase", {"url": url, "candidateId": candidateId, "source": source})
311
+ return {
312
+ "ok": True,
313
+ "tool": "browserCheckout",
314
+ "status": "purchased",
315
+ "reason": "",
316
+ "message": "Item successfully purchased",
317
+ "candidateId": candidateId,
318
+ "source": source,
319
+ "url": url,
320
+ "executePurchase": execute,
321
+ "receipt": {"receiptId": receiptId, "status": "confirmed", "source": source, "candidateId": candidateId, "url": url},
322
+ }
323
+ if "out of stock" in lowered or "sold out" in lowered:
324
+ return self._blocked("itemUnavailable", url=url, candidateId=candidateId, source=source, execute=execute)
325
+ if "payment declined" in lowered or "payment failed" in lowered:
326
+ return self._blocked("paymentDeclined", url=url, candidateId=candidateId, source=source, execute=execute)
327
+ if "mfa required" in lowered or "verification code" in lowered or "one-time code" in lowered:
328
+ blocked = self._blocked("mfaRequired", url=url, candidateId=candidateId, source=source, execute=execute)
329
+ return {**blocked, "status": "needsInput", "needsInput": [{"key": "mfaCode", "label": "MFA code", "secret": True}]}
330
+ if "login required" in lowered or "sign in" in lowered or "account required" in lowered:
331
+ blocked = self._blocked("loginOrAccountRequired", url=url, candidateId=candidateId, source=source, execute=execute)
332
+ return {**blocked, "status": "needsInput", "needsInput": [{"key": "portalSessionId", "label": "Authenticated marketplace session", "secret": False}]}
333
+ return self._blocked("purchaseConfirmationNotFound", url=url, candidateId=candidateId, source=source, execute=execute)
334
+
335
+ def _runRendered(self, payload: dict[str, Any], *, url: str, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
336
+ command = self._renderedCommand()
337
+ if not command:
338
+ return {}
339
+ try:
340
+ completed = subprocess.run(
341
+ list(command),
342
+ input=json.dumps({**payload, "url": url, "candidateId": candidateId, "source": source, "executePurchase": execute, "timeoutMs": self.renderedTimeoutMs}, ensure_ascii=True),
343
+ capture_output=True,
344
+ check=False,
345
+ text=True,
346
+ timeout=max(12, int(self.renderedTimeoutMs / 1000) + 10),
347
+ )
348
+ except Exception as exc:
349
+ return self._blocked(f"browserCheckoutCommandFailed:{str(exc)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
350
+ if completed.returncode != 0:
351
+ return self._blocked(f"browserCheckoutCommandNonZero:{(completed.stderr or completed.stdout)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
352
+ try:
353
+ result = json.loads(completed.stdout or "{}")
354
+ except Exception as exc:
355
+ return self._blocked(f"browserCheckoutInvalidJson:{str(exc)[:120]}", url=url, candidateId=candidateId, source=source, execute=execute)
356
+ if not isinstance(result, dict):
357
+ return self._blocked("browserCheckoutInvalidResult", url=url, candidateId=candidateId, source=source, execute=execute)
358
+ return {
359
+ "tool": "browserCheckout",
360
+ "candidateId": candidateId,
361
+ "source": source,
362
+ "url": url,
363
+ "executePurchase": execute,
364
+ **result,
365
+ }
366
+
367
+ def _renderedCommand(self) -> tuple[str, ...]:
368
+ if self.renderedCommand:
369
+ return self.renderedCommand
370
+ script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-checkout.mjs"
371
+ return ("node", str(script)) if script.exists() else ()
372
+
373
+ @staticmethod
374
+ def _blocked(reason: str, *, url: str, candidateId: str, source: str, execute: bool) -> dict[str, Any]:
375
+ return {
376
+ "ok": False,
377
+ "tool": "browserCheckout",
378
+ "status": "blocked",
379
+ "reason": reason,
380
+ "message": f"Sorry, I am unable to make the purchase due to {reason}.",
381
+ "candidateId": candidateId,
382
+ "source": source,
383
+ "url": url,
384
+ "executePurchase": execute,
385
+ }
386
+
387
+
388
+ @dataclass(frozen=True)
389
+ class PortalBrowser:
390
+ """Authenticated portal action runner that fails closed and returns proof-ready artifacts."""
391
+
392
+ timeoutSec: int = 8
393
+ renderedTimeoutMs: int = 20000
394
+ renderedCommand: tuple[str, ...] | None = None
395
+
396
+ def run(self, payload: dict[str, Any]) -> dict[str, Any]:
397
+ targetUrl = str(payload.get("targetUrl", payload.get("url", "")) or "").strip()
398
+ actionId = str(payload.get("portalActionId", payload.get("actionId", "")) or "")
399
+ session = payload.get("session", {}) if isinstance(payload.get("session"), dict) else {}
400
+ credentialRef = str(payload.get("credentialRef", session.get("credentialRef", "")) or "")
401
+ storageStateRef = str(payload.get("storageStateRef", session.get("storageStateRef", "")) or "")
402
+ steps = _portalStepResults(payload.get("steps", []))
403
+ inputs = payload.get("inputs", {}) if isinstance(payload.get("inputs"), dict) else {}
404
+ if not targetUrl:
405
+ return {**self._blocked("missingTargetUrl", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="missingTargetUrl")}
406
+ scheme = urlparse(targetUrl).scheme
407
+ if scheme in {"file", "about", "javascript"}:
408
+ return {**self._blocked("unsupportedPortalTargetScheme", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="unsupportedPortalTargetScheme")}
409
+ blockedReason = _privateTargetBlockReason(targetUrl, allowPrivateNetwork=bool(payload.get("allowPrivateNetwork", payload.get("allowLocalNetwork", False))))
410
+ if blockedReason:
411
+ return {**self._blocked(blockedReason, actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason=blockedReason)}
412
+ if not credentialRef and not storageStateRef and not targetUrl.startswith("data:"):
413
+ return {
414
+ **self._blocked("authRequired", actionId=actionId, targetUrl=targetUrl),
415
+ "status": "needsInput",
416
+ "needsInput": [{"key": "credentialRef", "label": "Credential reference", "secret": True}],
417
+ "steps": _portalStepsWithStatus(steps, "pending", reason="authRequired"),
418
+ }
419
+ missingInputs = _missingPortalInputs(steps, inputs, requiredKeys=_portalRequiredInputs(payload, steps))
420
+ if missingInputs:
421
+ return {
422
+ **self._blocked("missingPortalInputs", actionId=actionId, targetUrl=targetUrl),
423
+ "status": "needsInput",
424
+ "needsInput": missingInputs,
425
+ "steps": _portalStepsWithMissingInputs(steps, missingInputs),
426
+ }
427
+ allowNetwork = bool(payload.get("allowNetwork", payload.get("live", payload.get("executePortal", False))))
428
+ rendered = self._runRendered(payload, targetUrl=targetUrl, actionId=actionId, steps=steps, allowNetwork=allowNetwork)
429
+ if rendered:
430
+ return rendered
431
+ text = self._readTarget(targetUrl, allowNetwork=allowNetwork)
432
+ lowered = text.lower()
433
+ if any(token in lowered for token in ("mfa required", "2fa", "verification code", "one-time code")):
434
+ return {
435
+ **self._blocked("mfaRequired", actionId=actionId, targetUrl=targetUrl),
436
+ "status": "needsProxy",
437
+ "requiresProxy": True,
438
+ "needsInput": [{"key": "mfaCode", "label": "Verification code", "secret": True}],
439
+ "steps": _portalStepsWithStatus(steps, "pending", reason="mfaRequired"),
440
+ }
441
+ if any(token in lowered for token in ("login required", "sign in required", "authentication required")):
442
+ return {
443
+ **self._blocked("loginRequired", actionId=actionId, targetUrl=targetUrl),
444
+ "status": "needsInput",
445
+ "needsInput": [{"key": "credentialRef", "label": "Credential reference", "secret": True}],
446
+ "steps": _portalStepsWithStatus(steps, "pending", reason="loginRequired"),
447
+ }
448
+ if "data-picux-portal=\"success\"" in lowered or "data-picux-portal='success'" in lowered or "claim submitted" in lowered or "form submitted" in lowered or "record retrieved" in lowered:
449
+ proofId = _stableToolId("portalProof", {"actionId": actionId, "targetUrl": targetUrl, "text": text[:500]})
450
+ completedSteps = _portalStepsWithStatus(steps, "succeeded", reason="")
451
+ outcome = _portalOutcome(text)
452
+ return {
453
+ "ok": True,
454
+ "tool": "portalBrowser",
455
+ "status": "succeeded",
456
+ "message": "Portal action completed.",
457
+ "portalActionId": actionId,
458
+ "targetUrl": targetUrl,
459
+ "steps": completedSteps,
460
+ "proof": {
461
+ "proofId": proofId,
462
+ "kind": "domSnapshot",
463
+ "status": "captured",
464
+ "label": "Portal completion snapshot",
465
+ "artifactRef": proofId,
466
+ "sourceUrl": targetUrl,
467
+ "hash": "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest(),
468
+ "meta": {"snippet": _redact(_stripTags(text)[:700]), "outcome": outcome},
469
+ },
470
+ "result": {**outcome, "sourceBound": True, "stepSummary": _portalStepSummary(completedSteps)},
471
+ }
472
+ return {**self._blocked("completionProofNotFound", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "attempted", reason="completionProofNotFound")}
473
+
474
+ def _runRendered(self, payload: dict[str, Any], *, targetUrl: str, actionId: str, steps: list[dict[str, Any]], allowNetwork: bool) -> dict[str, Any]:
475
+ parsed = urlparse(targetUrl)
476
+ if parsed.scheme not in {"http", "https"} or not allowNetwork:
477
+ return {}
478
+ command = self._renderedCommand()
479
+ if not command:
480
+ return {}
481
+ try:
482
+ completed = subprocess.run(
483
+ list(command),
484
+ input=json.dumps({**payload, "targetUrl": targetUrl, "portalActionId": actionId, "steps": steps, "timeoutMs": self.renderedTimeoutMs}, ensure_ascii=True),
485
+ capture_output=True,
486
+ check=False,
487
+ text=True,
488
+ timeout=max(12, int(self.renderedTimeoutMs / 1000) + 10),
489
+ )
490
+ except Exception as exc:
491
+ return {**self._blocked(f"renderedPortalCommandFailed:{str(exc)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalCommandFailed")}
492
+ if completed.returncode != 0:
493
+ return {**self._blocked(f"renderedPortalCommandNonZero:{(completed.stderr or completed.stdout)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalCommandNonZero")}
494
+ try:
495
+ result = json.loads(completed.stdout or "{}")
496
+ except Exception as exc:
497
+ return {**self._blocked(f"renderedPortalInvalidJson:{str(exc)[:120]}", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalInvalidJson")}
498
+ if not isinstance(result, dict):
499
+ return {**self._blocked("renderedPortalInvalidResult", actionId=actionId, targetUrl=targetUrl), "steps": _portalStepsWithStatus(steps, "blocked", reason="renderedPortalInvalidResult")}
500
+ return {"tool": "portalBrowser", "portalActionId": actionId, "targetUrl": targetUrl, **result}
501
+
502
+ def _renderedCommand(self) -> tuple[str, ...]:
503
+ if self.renderedCommand:
504
+ return self.renderedCommand
505
+ script = Path(__file__).resolve().parents[3] / "scripts" / "picux-rendered-portal.mjs"
506
+ return ("node", str(script)) if script.exists() else ()
507
+
508
+ def _readTarget(self, targetUrl: str, *, allowNetwork: bool) -> str:
509
+ parsed = urlparse(targetUrl)
510
+ if parsed.scheme == "data":
511
+ raw = unquote_to_bytes(parsed.path.split(",", 1)[1] if "," in parsed.path else parsed.path)
512
+ return raw.decode("utf-8", errors="ignore")
513
+ if parsed.scheme in {"http", "https"} and allowNetwork:
514
+ req = Request(targetUrl, headers={"User-Agent": "PicuxPortalBrowser/0.1", "Accept": "text/html,application/xhtml+xml"})
515
+ with urlopen(req, timeout=self.timeoutSec) as response:
516
+ return response.read(500_000).decode("utf-8", errors="ignore")
517
+ return ""
518
+
519
+ @staticmethod
520
+ def _blocked(reason: str, *, actionId: str, targetUrl: str) -> dict[str, Any]:
521
+ return {
522
+ "ok": False,
523
+ "tool": "portalBrowser",
524
+ "status": "blocked",
525
+ "reason": reason,
526
+ "message": f"Portal action blocked: {reason}.",
527
+ "portalActionId": actionId,
528
+ "targetUrl": targetUrl,
529
+ }
530
+
531
+
532
+ @dataclass(frozen=True)
533
+ class MapTool:
534
+ """Source-bound local place search for HUNT/RESOLVE without fabricating places."""
535
+
536
+ timeoutSec: int = 6
537
+ fetcher: Any | None = None
538
+
539
+ def search(self, payload: dict[str, Any]) -> dict[str, Any]:
540
+ query = _query(payload)
541
+ entity = _serviceEntity(query, payload)
542
+ location = _placeLocation(query, payload)
543
+ radiusKm = _radiusKm(query, payload)
544
+ allowNetwork = bool(payload.get("allowNetwork", payload.get("live", False)))
545
+ supplied = payload.get("places", payload.get("results", []))
546
+ suppliedPlaces = supplied if isinstance(supplied, list) else []
547
+ if not entity or not location:
548
+ return self._result(
549
+ query=query,
550
+ provider="mapTool",
551
+ entity=entity,
552
+ location=location,
553
+ radiusKm=radiusKm,
554
+ places=[],
555
+ status="needsPlaceQuery",
556
+ allowNetwork=allowNetwork,
557
+ errors=["missing:entity" if not entity else "missing:location"],
558
+ )
559
+ if suppliedPlaces:
560
+ places = self._normalizeSuppliedPlaces(suppliedPlaces, entity=entity, location=location, radiusKm=radiusKm)
561
+ return self._result(query=query, provider="clientPlaces", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=allowNetwork)
562
+
563
+ googleKey = str(
564
+ payload.get("googlePlacesApiKey")
565
+ or payload.get("googleMapsApiKey")
566
+ or os.environ.get("PICUX_GOOGLE_PLACES_API_KEY", "")
567
+ or os.environ.get("PICUX_GOOGLE_MAPS_API_KEY", "")
568
+ or os.environ.get("GOOGLE_PLACES_API_KEY", "")
569
+ or os.environ.get("GOOGLE_MAPS_API_KEY", "")
570
+ or ""
571
+ ).strip()
572
+ if googleKey:
573
+ try:
574
+ baseUrl = str(
575
+ os.environ.get("PICUX_GOOGLE_PLACES_BASE_URL", "")
576
+ or "https://maps.googleapis.com/maps/api/place"
577
+ ).rstrip("/")
578
+ places = self._googlePlaces(query=query, entity=entity, location=location, radiusKm=radiusKm, apiKey=googleKey, baseUrl=baseUrl)
579
+ return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=True)
580
+ except Exception as exc:
581
+ return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=[], status="ioError", allowNetwork=True, errors=[str(exc)[:220]])
582
+
583
+ if not allowNetwork:
584
+ return self._result(query=query, provider="googlePlaces", entity=entity, location=location, radiusKm=radiusKm, places=[], status="needsPlacesApiKeyOrNetwork", allowNetwork=False, errors=["missing:GOOGLE_PLACES_API_KEY"])
585
+ try:
586
+ places = self._openStreetMap(query=query, entity=entity, location=location, radiusKm=radiusKm)
587
+ return self._result(query=query, provider="openStreetMapNominatim", entity=entity, location=location, radiusKm=radiusKm, places=places, status="found" if places else "searchedNoPlaces", allowNetwork=True)
588
+ except Exception as exc:
589
+ return self._result(query=query, provider="openStreetMapNominatim", entity=entity, location=location, radiusKm=radiusKm, places=[], status="ioError", allowNetwork=True, errors=[str(exc)[:220]])
590
+
591
+ def _googlePlaces(self, *, query: str, entity: str, location: str, radiusKm: float, apiKey: str, baseUrl: str) -> list[dict[str, Any]]:
592
+ searchText = _placeSearchText(entity, location)
593
+ url = f"{baseUrl}/textsearch/json?" + urlencode(
594
+ {
595
+ "query": searchText,
596
+ "radius": int(max(1.0, radiusKm) * 1000),
597
+ "key": apiKey,
598
+ }
599
+ )
600
+ data = self._fetchJson(url, headers={"Accept": "application/json"})
601
+ raw = data.get("results", []) if isinstance(data, dict) else []
602
+ places = []
603
+ for item in raw[:10]:
604
+ if not isinstance(item, dict):
605
+ continue
606
+ geo = item.get("geometry", {}) if isinstance(item.get("geometry"), dict) else {}
607
+ loc = geo.get("location", {}) if isinstance(geo.get("location"), dict) else {}
608
+ name = str(item.get("name", "") or "").strip()
609
+ address = str(item.get("formatted_address", item.get("vicinity", "")) or "").strip()
610
+ placeId = str(item.get("place_id", "") or "").strip()
611
+ details = self._googlePlaceDetails(placeId=placeId, apiKey=apiKey, baseUrl=baseUrl) if placeId else {}
612
+ sourceUrl = "https://www.google.com/maps/search/?" + urlencode({"api": "1", "query": f"{name} {address}".strip() or searchText, "query_place_id": placeId})
613
+ places.append(
614
+ _cleanPlace(
615
+ {
616
+ "placeId": placeId or _stableToolId("place", {"provider": "googlePlaces", "name": name, "address": address}),
617
+ "name": name or str(details.get("name", "") or "").strip(),
618
+ "category": _first(item.get("types", [])),
619
+ "address": address or str(details.get("formatted_address", "") or "").strip(),
620
+ "phone": details.get("international_phone_number") or details.get("formatted_phone_number") or "",
621
+ "website": details.get("website") or "",
622
+ "rating": item.get("rating", 0),
623
+ "lat": loc.get("lat", 0),
624
+ "lng": loc.get("lng", 0),
625
+ "source": "googlePlaces",
626
+ "sourceUrl": sourceUrl,
627
+ "sourceBound": bool(placeId or sourceUrl),
628
+ "entity": entity,
629
+ }
630
+ )
631
+ )
632
+ return [place for place in places if place.get("name")]
633
+
634
+ def _googlePlaceDetails(self, *, placeId: str, apiKey: str, baseUrl: str) -> dict[str, Any]:
635
+ if not placeId:
636
+ return {}
637
+ fields = "name,formatted_phone_number,international_phone_number,formatted_address,address_components,website"
638
+ url = f"{baseUrl}/details/json?" + urlencode({"place_id": placeId, "fields": fields, "key": apiKey})
639
+ data = self._fetchJson(url, headers={"Accept": "application/json"})
640
+ result = data.get("result", {}) if isinstance(data, dict) else {}
641
+ return result if isinstance(result, dict) else {}
642
+
643
+ def _openStreetMap(self, *, query: str, entity: str, location: str, radiusKm: float) -> list[dict[str, Any]]:
644
+ searchText = _placeSearchText(entity, location)
645
+ url = "https://nominatim.openstreetmap.org/search?" + urlencode(
646
+ {
647
+ "format": "jsonv2",
648
+ "q": searchText,
649
+ "limit": 10,
650
+ "addressdetails": 1,
651
+ }
652
+ )
653
+ data = self._fetchJson(
654
+ url,
655
+ headers={
656
+ "Accept": "application/json",
657
+ "User-Agent": "PicuxMapTool/0.1 (source-bound local search; https://picux.ai)",
658
+ },
659
+ )
660
+ raw = data if isinstance(data, list) else []
661
+ places = []
662
+ for item in raw[:10]:
663
+ if not isinstance(item, dict):
664
+ continue
665
+ osmType = str(item.get("osm_type", "") or "").lower()
666
+ osmId = str(item.get("osm_id", "") or "")
667
+ placeId = str(item.get("place_id", "") or osmId or "")
668
+ name = str(item.get("name", "") or item.get("display_name", "") or "").strip()
669
+ address = str(item.get("display_name", "") or "").strip()
670
+ if osmType in {"node", "way", "relation"} and osmId:
671
+ sourceUrl = f"https://www.openstreetmap.org/{osmType}/{osmId}"
672
+ else:
673
+ sourceUrl = "https://www.openstreetmap.org/search?" + urlencode({"query": searchText})
674
+ places.append(
675
+ _cleanPlace(
676
+ {
677
+ "placeId": placeId or _stableToolId("place", {"provider": "openStreetMapNominatim", "name": name, "address": address}),
678
+ "name": name,
679
+ "category": str(item.get("type", item.get("class", "")) or ""),
680
+ "address": address,
681
+ "lat": item.get("lat", 0),
682
+ "lng": item.get("lon", 0),
683
+ "source": "openStreetMapNominatim",
684
+ "sourceUrl": sourceUrl,
685
+ "sourceBound": bool(placeId or sourceUrl),
686
+ "entity": entity,
687
+ }
688
+ )
689
+ )
690
+ return [place for place in places if place.get("name")]
691
+
692
+ def _normalizeSuppliedPlaces(self, raw: list[Any], *, entity: str, location: str, radiusKm: float) -> list[dict[str, Any]]:
693
+ places = []
694
+ for index, item in enumerate(raw[:20], start=1):
695
+ if not isinstance(item, dict):
696
+ continue
697
+ place = _cleanPlace({**item, "entity": str(item.get("entity", entity) or entity), "source": str(item.get("source", "clientPlaces") or "clientPlaces")})
698
+ if not place.get("placeId"):
699
+ place["placeId"] = _stableToolId("place", {"index": index, "name": place.get("name", ""), "address": place.get("address", ""), "sourceUrl": place.get("sourceUrl", "")})
700
+ place["sourceBound"] = bool(place.get("sourceUrl") or place.get("placeId"))
701
+ places.append(place)
702
+ return [place for place in places if place.get("name")]
703
+
704
+ def _fetchJson(self, url: str, *, headers: dict[str, str]) -> Any:
705
+ if self.fetcher:
706
+ return self.fetcher(url, headers)
707
+ request = Request(url, headers=headers)
708
+ with urlopen(request, timeout=self.timeoutSec) as response:
709
+ raw = response.read(500_000)
710
+ return json.loads(raw.decode("utf-8", errors="ignore") or "null")
711
+
712
+ @staticmethod
713
+ def _result(*, query: str, provider: str, entity: str, location: str, radiusKm: float, places: list[dict[str, Any]], status: str, allowNetwork: bool, errors: list[str] | None = None) -> dict[str, Any]:
714
+ return {
715
+ "ok": bool(places) or status in {"searchedNoPlaces", "needsPlacesApiKeyOrNetwork"},
716
+ "tool": "mapTool",
717
+ "provider": provider,
718
+ "status": status,
719
+ "query": {
720
+ "text": query,
721
+ "entity": entity,
722
+ "location": location,
723
+ "radiusKm": radiusKm,
724
+ "radiusMeters": int(max(1.0, radiusKm) * 1000),
725
+ },
726
+ "places": places,
727
+ "errors": errors or [],
728
+ "networkAttempted": allowNetwork,
729
+ }
730
+
731
+
732
+ @dataclass(frozen=True)
733
+ class SharedToolbox:
734
+ nlpTool: NLPTool = field(default_factory=NLPTool)
735
+ imageReader: ImageReader = field(default_factory=ImageReader)
736
+ browserReader: BrowserReader = field(default_factory=BrowserReader)
737
+ browserCheckout: BrowserCheckout = field(default_factory=BrowserCheckout)
738
+ portalBrowser: PortalBrowser = field(default_factory=PortalBrowser)
739
+ mapTool: MapTool = field(default_factory=MapTool)
740
+
741
+ def nlp(self, payload: dict[str, Any]) -> dict[str, Any]:
742
+ return self.nlpTool.run(payload)
743
+
744
+ def readImage(self, payload: dict[str, Any]) -> dict[str, Any]:
745
+ return self.imageReader.read(payload)
746
+
747
+ def readBrowser(self, payload: dict[str, Any]) -> dict[str, Any]:
748
+ return self.browserReader.read(payload)
749
+
750
+ def checkoutBrowser(self, payload: dict[str, Any]) -> dict[str, Any]:
751
+ return self.browserCheckout.run(payload)
752
+
753
+ def runPortal(self, payload: dict[str, Any]) -> dict[str, Any]:
754
+ return self.portalBrowser.run(payload)
755
+
756
+ def searchMap(self, payload: dict[str, Any]) -> dict[str, Any]:
757
+ return self.mapTool.search(payload)
758
+
759
+
760
+ def _query(payload: dict[str, Any]) -> str:
761
+ return str(payload.get("query", payload.get("request", payload.get("goal", payload.get("text", "")))) or "").strip()
762
+
763
+
764
+ def _isLocalServiceQuery(query: str) -> bool:
765
+ lowered = _spellfixLocation(str(query or "")).lower()
766
+ serviceTerms = (
767
+ "mechanic",
768
+ "plumber",
769
+ "electrician",
770
+ "lawyer",
771
+ "consultant",
772
+ "doctor",
773
+ "dentist",
774
+ "cleaner",
775
+ "repair",
776
+ "garage",
777
+ "workshop",
778
+ "contractor",
779
+ "installer",
780
+ "handyman",
781
+ "agent",
782
+ "broker",
783
+ "small business",
784
+ "local business",
785
+ )
786
+ placeTerms = ("near", "nearby", "within", "around", "close to", "km", "kilometer", "kilometre", "hisingen", "goteborg", "gothenburg")
787
+ return any(term in lowered for term in serviceTerms) and any(term in lowered for term in placeTerms)
788
+
789
+
790
+ def _isResolveCase(query: str) -> bool:
791
+ lowered = str(query or "").lower()
792
+ if any(token in lowered for token in ("dispute", "claim", "complaint", "broken", "damaged", "defective", "fake", "scam", "verify", "audit", "evidence", "receipt", "screenshot")):
793
+ return True
794
+ damagedDelivery = any(token in lowered for token in ("in-transit", "in transit", "transit", "delivery", "delivered")) and any(token in lowered for token in ("tv", "item", "goods", "package", "order"))
795
+ merchantNoFollowup = any(token in lowered for token in ("followup", "follow up", "follow-up", "did not follow", "no response", "customer representative", "customer represent"))
796
+ return damagedDelivery or merchantNoFollowup
797
+
798
+
799
+ def _serviceEntity(query: str, payload: dict[str, Any]) -> str:
800
+ raw = str(payload.get("entity", payload.get("service", payload.get("businessType", ""))) or "").strip()
801
+ if raw:
802
+ return _cleanServiceEntity(raw)
803
+ lowered = _spellfixLocation(str(query or "")).lower()
804
+ known = (
805
+ "mechanic",
806
+ "plumber",
807
+ "electrician",
808
+ "lawyer",
809
+ "consultant",
810
+ "doctor",
811
+ "dentist",
812
+ "cleaner",
813
+ "garage",
814
+ "workshop",
815
+ "contractor",
816
+ "installer",
817
+ "handyman",
818
+ "broker",
819
+ "agent",
820
+ )
821
+ for item in known:
822
+ if re.search(rf"\b{re.escape(item)}s?\b", lowered):
823
+ return item
824
+ subject = re.search(
825
+ r"\b(?:need|find|search(?: for)?|locate|looking for|get)\s+(?:a|an|the|my|aa)?\s*(.+?)(?:\s+(?:within|near|nearby|around|close to|in)\b|$)",
826
+ lowered,
827
+ re.IGNORECASE,
828
+ )
829
+ if subject:
830
+ return _cleanServiceEntity(subject.group(1))
831
+ return ""
832
+
833
+
834
+ def _cleanServiceEntity(value: str) -> str:
835
+ clean = re.sub(r"\b(?:a|an|the|my|aa|local|nearby|best|good|trusted)\b", " ", str(value or ""), flags=re.IGNORECASE)
836
+ clean = re.sub(r"[^A-Za-z0-9 &/-]+", " ", clean)
837
+ clean = re.sub(r"\s+", " ", clean).strip().lower()
838
+ return clean[:80]
839
+
840
+
841
+ def _placeLocation(query: str, payload: dict[str, Any]) -> str:
842
+ raw = str(payload.get("location", payload.get("near", payload.get("area", ""))) or "").strip()
843
+ if raw:
844
+ return _spellfixLocation(raw)
845
+ fixed = _spellfixLocation(str(query or ""))
846
+ patterns = (
847
+ r"\bwithin\s+[0-9]+(?:[,.][0-9]+)?\s*(?:km|kilometers?|kilometres?)\s+(?:of|from)\s+(.+)$",
848
+ r"\b(?:near|nearby|around|close to|in)\s+(.+)$",
849
+ )
850
+ for pattern in patterns:
851
+ match = re.search(pattern, fixed, re.IGNORECASE)
852
+ if not match:
853
+ continue
854
+ location = match.group(1)
855
+ location = re.split(r"\b(?:for|with|that|who|and then)\b", location, maxsplit=1, flags=re.IGNORECASE)[0]
856
+ location = re.sub(r"[.?!]+$", "", location).strip(" ,")
857
+ if location:
858
+ return location[:140]
859
+ return ""
860
+
861
+
862
+ def _radiusKm(query: str, payload: dict[str, Any]) -> float:
863
+ raw = payload.get("radiusKm", payload.get("distanceKm", payload.get("radius", "")))
864
+ try:
865
+ if raw not in {"", None}:
866
+ value = float(str(raw).replace(",", "."))
867
+ if value > 0:
868
+ return round(min(100.0, value), 2)
869
+ except Exception:
870
+ pass
871
+ match = re.search(r"\b(?:within|inside|under|less than)?\s*([0-9]+(?:[,.][0-9]+)?)\s*(?:km|kilometers?|kilometres?)\b", str(query or ""), re.IGNORECASE)
872
+ if match:
873
+ return round(min(100.0, float(match.group(1).replace(",", "."))), 2)
874
+ return 10.0
875
+
876
+
877
+ def _spellfixLocation(value: str) -> str:
878
+ fixed = str(value or "")
879
+ replacements = {
880
+ "hisigen": "Hisingen",
881
+ "hissigen": "Hisingen",
882
+ "hissingen": "Hisingen",
883
+ "goteborg": "Goteborg",
884
+ "gothenburg": "Goteborg",
885
+ }
886
+ for wrong, right in replacements.items():
887
+ fixed = re.sub(rf"\b{wrong}\b", right, fixed, flags=re.IGNORECASE)
888
+ return fixed
889
+
890
+
891
+ def _placeSearchText(entity: str, location: str) -> str:
892
+ cleanEntity = str(entity or "").strip().lower()
893
+ cleanLocation = _spellfixLocation(str(location or "").strip())
894
+ locationLower = cleanLocation.lower()
895
+ if cleanEntity == "mechanic" and any(token in locationLower for token in ("hisingen", "goteborg", "sweden", "stockholm", "malmo")):
896
+ return f"bilverkstad {cleanLocation}".strip()
897
+ if cleanEntity == "mechanic":
898
+ return f"car repair {cleanLocation}".strip()
899
+ return f"{cleanEntity} near {cleanLocation}".strip()
900
+
901
+
902
+ def _cleanPlace(raw: dict[str, Any]) -> dict[str, Any]:
903
+ place = {
904
+ "placeId": str(raw.get("placeId", raw.get("place_id", raw.get("id", ""))) or ""),
905
+ "name": re.sub(r"\s+", " ", str(raw.get("name", raw.get("title", "")) or "")).strip()[:220],
906
+ "category": str(raw.get("category", raw.get("type", "")) or "")[:120],
907
+ "address": re.sub(r"\s+", " ", str(raw.get("address", raw.get("formatted_address", "")) or "")).strip()[:300],
908
+ "phone": str(raw.get("phone", raw.get("phoneNumber", raw.get("international_phone_number", ""))) or "")[:80],
909
+ "website": str(raw.get("website", raw.get("url", "")) or "")[:600],
910
+ "rating": _safeFloat(raw.get("rating", 0.0)),
911
+ "lat": _safeFloat(raw.get("lat", raw.get("latitude", 0.0))),
912
+ "lng": _safeFloat(raw.get("lng", raw.get("lon", raw.get("longitude", 0.0)))),
913
+ "distanceKm": _safeFloat(raw.get("distanceKm", raw.get("distance_km", 0.0))),
914
+ "source": str(raw.get("source", raw.get("provider", "mapTool")) or "mapTool")[:80],
915
+ "sourceUrl": str(raw.get("sourceUrl", raw.get("mapsUrl", raw.get("url", ""))) or "")[:600],
916
+ "sourceBound": bool(raw.get("sourceBound", raw.get("sourceUrl", raw.get("placeId", raw.get("place_id", ""))))),
917
+ "entity": str(raw.get("entity", raw.get("service", "")) or "")[:80],
918
+ }
919
+ if not place["sourceUrl"] and place["lat"] and place["lng"]:
920
+ place["sourceUrl"] = "https://www.google.com/maps/search/?" + urlencode({"api": "1", "query": f"{place['lat']},{place['lng']}"})
921
+ place["sourceBound"] = True
922
+ return place
923
+
924
+
925
+ def _first(value: Any) -> str:
926
+ if isinstance(value, list):
927
+ for item in value:
928
+ clean = str(item or "").strip()
929
+ if clean:
930
+ return clean
931
+ return str(value or "").strip()
932
+
933
+
934
+ def _route(query: str, *, seed: str) -> list[str]:
935
+ lowered = query.lower()
936
+ localService = _isLocalServiceQuery(query)
937
+ explicitResolve = _isResolveCase(query)
938
+ explicitProxy = any(token in lowered for token in ("place a call", "make a call", "call merchant", "call support", "human", "lawyer", "consultant", "manual review", "logistics", "pickup", "delivery"))
939
+ explicitPay = any(token in lowered for token in ("pay", "payment", "settle", "settlement", "escrow"))
940
+ if seed == "pay" and not explicitPay:
941
+ seed = "resolve" if explicitResolve else "hunt"
942
+ if seed == "proxy" and not explicitProxy:
943
+ seed = "resolve" if explicitResolve else "hunt"
944
+ if explicitResolve and seed in {"hunt", "bridge"}:
945
+ seed = "resolve"
946
+ route = [seed] if seed in SHARED_DOMAINS else ["hunt"]
947
+ if not explicitResolve and (localService or any(token in lowered for token in ("find", "near", "within", "km", "buy", "shop", "shopping", "best", "option", "lead", "opportunity", "source", "market", "property", "apartment", "house", "mechanic", "plumber", "electrician", "repair"))) and "hunt" not in route:
948
+ route.insert(0, "hunt")
949
+ if any(token in lowered for token in ("email", "merchant", "bank", "service", "support", "api", "webhook", "connector", "whatsapp", "chat", "gmail", "slack", "jira", "notion", "sap", "representative", "followup", "follow up")) and "bridge" not in route:
950
+ route.append("bridge")
951
+ if explicitResolve and "resolve" not in route:
952
+ route.append("resolve")
953
+ if explicitProxy and "proxy" not in route:
954
+ route.append("proxy")
955
+ if explicitPay and "pay" not in route:
956
+ route.append("pay")
957
+ if "pay" in route and "resolve" not in route:
958
+ route.insert(max(1, route.index("pay")), "resolve")
959
+ unique = list(dict.fromkeys([item for item in route if item in SHARED_DOMAINS]))
960
+ order = ["resolve", "bridge", "proxy", "pay", "hunt"] if explicitResolve and "hunt" not in unique else ["hunt", "bridge", "resolve", "proxy", "pay"]
961
+ return [domain for domain in order if domain in unique]
962
+
963
+
964
+ def _entities(query: str, payload: dict[str, Any]) -> dict[str, Any]:
965
+ serviceEntity = _serviceEntity(query, payload)
966
+ return {
967
+ "item": _item(query),
968
+ "entity": serviceEntity,
969
+ "service": serviceEntity,
970
+ "location": _placeLocation(query, payload),
971
+ "radiusKm": _radiusKm(query, payload),
972
+ "budgetUsd": _budget(query, payload),
973
+ "target": _target(query),
974
+ "phone": _phone(query, payload),
975
+ "email": _email(query),
976
+ "market": _market(query),
977
+ "country": _country(query, payload),
978
+ "region": _region(query, payload),
979
+ "locale": str(payload.get("locale", "") or ""),
980
+ "budgetCurrency": _budgetCurrency(query, payload),
981
+ }
982
+
983
+
984
+ def _intentKind(query: str) -> str:
985
+ lowered = query.lower()
986
+ if _isLocalServiceQuery(query):
987
+ return "localService"
988
+ if _isResolveCase(query):
989
+ return "dispute"
990
+ if any(token in lowered for token in ("buy", "shop", "shopping", "property", "house", "apartment")):
991
+ return "discovery"
992
+ if any(token in lowered for token in ("claim", "dispute", "damaged", "broken", "scam", "fake")):
993
+ return "dispute"
994
+ if any(token in lowered for token in ("call merchant", "call support", "human", "logistics")):
995
+ return "handoff"
996
+ return "general"
997
+
998
+
999
+ def _confidence(query: str, route: list[str], entities: dict[str, Any]) -> float:
1000
+ score = 0.45 + min(0.25, len(route) * 0.05)
1001
+ if entities.get("item") or entities.get("target"):
1002
+ score += 0.12
1003
+ if entities.get("budgetUsd") or entities.get("phone") or entities.get("email"):
1004
+ score += 0.10
1005
+ if len(query.split()) >= 5:
1006
+ score += 0.08
1007
+ return round(min(0.96, score), 3)
1008
+
1009
+
1010
+ def _stripTags(text: str) -> str:
1011
+ clean = re.sub(r"<script\b[^>]*>.*?</script>", " ", str(text or ""), flags=re.IGNORECASE | re.DOTALL)
1012
+ clean = re.sub(r"<style\b[^>]*>.*?</style>", " ", clean, flags=re.IGNORECASE | re.DOTALL)
1013
+ clean = re.sub(r"<[^>]+>", " ", clean)
1014
+ return html.unescape(re.sub(r"\s+", " ", clean)).strip()
1015
+
1016
+
1017
+ def _confirmation(text: str) -> str:
1018
+ clean = _stripTags(text)
1019
+ match = re.search(r"\bconfirmation\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b", clean, re.IGNORECASE)
1020
+ if match:
1021
+ return match.group(1)
1022
+ match = re.search(r"\b(?:case|claim|ticket|reference|order)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b", clean, re.IGNORECASE)
1023
+ return match.group(1) if match else ""
1024
+
1025
+
1026
+ def _portalOutcome(text: str) -> dict[str, Any]:
1027
+ clean = _stripTags(text)
1028
+ confirmation = _confirmation(clean)
1029
+ status = _portalStatus(clean)
1030
+ deadline = _firstMatch(clean, (r"\b(?:deadline|due date|respond by|follow[- ]?up by)\s*[:#-]?\s*([A-Z][a-z]{2,9}\s+\d{1,2},?\s+\d{4}|\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{2,4})",))
1031
+ amount = _firstMatch(clean, (r"\b(?:amount|refund|claim value|total)\s*[:#-]?\s*([A-Z]{3}\s*)?([$€£]?\s*\d+(?:[,.]\d{2})?)",))
1032
+ recordRefs = []
1033
+ for pattern in (
1034
+ r"\b(?:case|claim|ticket|reference|order)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b",
1035
+ r"\b(?:tracking|shipment)\s*(?:number|no|id|#)\s*[:#-]?\s*([A-Z0-9-]{4,32})\b",
1036
+ ):
1037
+ for match in re.finditer(pattern, clean, re.IGNORECASE):
1038
+ recordRefs.append(match.group(1))
1039
+ return {
1040
+ "confirmation": confirmation,
1041
+ "status": status,
1042
+ "deadline": deadline,
1043
+ "amount": amount,
1044
+ "recordRefs": list(dict.fromkeys(recordRefs)),
1045
+ }
1046
+
1047
+
1048
+ def _portalStatus(text: str) -> str:
1049
+ match = re.search(r"\b(?:status|state)\s*[:#-]?\s*([A-Za-z][A-Za-z -]{2,32})\b", text, re.IGNORECASE)
1050
+ if match:
1051
+ return match.group(1).strip(" .").lower().replace(" ", "_")
1052
+ lowered = text.lower()
1053
+ for label, tokens in (
1054
+ ("submitted", ("claim submitted", "form submitted", "submitted")),
1055
+ ("approved", ("approved", "accepted")),
1056
+ ("rejected", ("rejected", "denied")),
1057
+ ("pending", ("pending", "under review", "in review", "awaiting")),
1058
+ ("retrieved", ("record retrieved", "record found", "download ready")),
1059
+ ):
1060
+ if any(token in lowered for token in tokens):
1061
+ return label
1062
+ return ""
1063
+
1064
+
1065
+ def _firstMatch(text: str, patterns: tuple[str, ...]) -> str:
1066
+ for pattern in patterns:
1067
+ match = re.search(pattern, text, re.IGNORECASE)
1068
+ if not match:
1069
+ continue
1070
+ groups = [item for item in match.groups() if item]
1071
+ value = " ".join(groups).strip() if groups else match.group(0).strip()
1072
+ return re.sub(r"\s+", " ", value)
1073
+ return ""
1074
+
1075
+
1076
+ def _portalStepResults(raw: Any) -> list[dict[str, Any]]:
1077
+ if not isinstance(raw, list):
1078
+ return []
1079
+ steps = []
1080
+ for index, item in enumerate(raw, start=1):
1081
+ if not isinstance(item, dict):
1082
+ continue
1083
+ stepId = str(item.get("stepId", "") or _stableToolId("portalStep", {"index": index, "step": item}))
1084
+ steps.append(
1085
+ {
1086
+ "stepId": stepId,
1087
+ "label": str(item.get("label", item.get("action", f"Step {index}")) or f"Step {index}"),
1088
+ "action": str(item.get("action", "") or ""),
1089
+ "selector": str(item.get("selector", "") or ""),
1090
+ "valueKey": str(item.get("valueKey", "") or ""),
1091
+ "status": str(item.get("status", "pending") or "pending"),
1092
+ "proofRequired": bool(item.get("proofRequired", False)),
1093
+ "meta": item.get("meta", {}) if isinstance(item.get("meta"), dict) else {},
1094
+ }
1095
+ )
1096
+ return steps
1097
+
1098
+
1099
+ def _portalRequiredInputs(payload: dict[str, Any], steps: list[dict[str, Any]]) -> list[str]:
1100
+ meta = payload.get("meta", {}) if isinstance(payload.get("meta"), dict) else {}
1101
+ required = meta.get("requiredInputs", payload.get("requiredInputs", []))
1102
+ if isinstance(required, list):
1103
+ clean = [str(item) for item in required if str(item)]
1104
+ if clean:
1105
+ return clean
1106
+ return [str(step.get("valueKey", "") or "") for step in steps if str(step.get("valueKey", "") or "")]
1107
+
1108
+
1109
+ def _missingPortalInputs(steps: list[dict[str, Any]], inputs: dict[str, Any], *, requiredKeys: list[str]) -> list[dict[str, Any]]:
1110
+ missing = []
1111
+ required = set(requiredKeys)
1112
+ for step in steps:
1113
+ key = str(step.get("valueKey", "") or "")
1114
+ if not key or key not in required:
1115
+ continue
1116
+ if key not in inputs or _emptyPortalInput(inputs.get(key)):
1117
+ missing.append({"key": key, "label": str(step.get("label", key) or key), "secret": False})
1118
+ deduped: dict[str, dict[str, Any]] = {}
1119
+ for item in missing:
1120
+ deduped[item["key"]] = item
1121
+ return list(deduped.values())
1122
+
1123
+
1124
+ def _emptyPortalInput(value: Any) -> bool:
1125
+ return value is None or value == "" or value == []
1126
+
1127
+
1128
+ def _portalStepsWithMissingInputs(steps: list[dict[str, Any]], missing: list[dict[str, Any]]) -> list[dict[str, Any]]:
1129
+ missingKeys = {str(item.get("key", "") or "") for item in missing}
1130
+ result = []
1131
+ for step in steps:
1132
+ valueKey = str(step.get("valueKey", "") or "")
1133
+ status = "needsInput" if valueKey and valueKey in missingKeys else str(step.get("status", "pending") or "pending")
1134
+ result.append({**step, "status": status, "meta": {**(step.get("meta", {}) if isinstance(step.get("meta"), dict) else {}), "missingInput": valueKey in missingKeys}})
1135
+ return result
1136
+
1137
+
1138
+ def _portalStepsWithStatus(steps: list[dict[str, Any]], status: str, *, reason: str) -> list[dict[str, Any]]:
1139
+ result = []
1140
+ for step in steps:
1141
+ meta = step.get("meta", {}) if isinstance(step.get("meta"), dict) else {}
1142
+ if reason:
1143
+ meta = {**meta, "reason": reason}
1144
+ result.append({**step, "status": status, "meta": meta})
1145
+ return result
1146
+
1147
+
1148
+ def _portalStepSummary(steps: list[dict[str, Any]]) -> dict[str, Any]:
1149
+ counts: dict[str, int] = {}
1150
+ for step in steps:
1151
+ status = str(step.get("status", "") or "")
1152
+ counts[status] = counts.get(status, 0) + 1
1153
+ return {"total": len(steps), "byStatus": counts}
1154
+
1155
+
1156
+ def _stableToolId(prefix: str, payload: dict[str, Any]) -> str:
1157
+ digest = hashlib.sha256(json.dumps(payload, ensure_ascii=True, sort_keys=True).encode("utf-8")).hexdigest()
1158
+ return f"{prefix}_{digest[:24]}"
1159
+
1160
+
1161
+ def _looksLikeFollowUp(query: str) -> bool:
1162
+ lowered = query.lower()
1163
+ return any(token in lowered for token in ("yes", "no", "continue", "approve", "use that", "same", "also", "the second", "that one", "go ahead"))
1164
+
1165
+
1166
+ def _imageInputs(payload: dict[str, Any]) -> list[dict[str, Any]]:
1167
+ raw = []
1168
+ for key in ("images", "attachments", "documents"):
1169
+ value = payload.get(key)
1170
+ if isinstance(value, list):
1171
+ raw.extend(item for item in value if isinstance(item, dict))
1172
+ images = []
1173
+ seen = set()
1174
+ for item in raw:
1175
+ mime = str(item.get("mime", item.get("contentType", "")) or "").lower()
1176
+ kind = str(item.get("kind", item.get("type", "")) or "").lower()
1177
+ if mime.startswith("image/") or kind in {"image", "photo", "screenshot"} or any(key in item for key in ("base64", "contentBase64", "dataUrl")):
1178
+ key = "|".join(str(item.get(name, "") or "") for name in ("artifactId", "name", "dataUrl", "base64", "contentBase64", "text", "ocr"))
1179
+ if key and key in seen:
1180
+ continue
1181
+ if key:
1182
+ seen.add(key)
1183
+ images.append(item)
1184
+ return images
1185
+
1186
+
1187
+ def _imageBytes(image: dict[str, Any]) -> bytes:
1188
+ raw = str(image.get("base64", image.get("contentBase64", image.get("data", ""))) or "")
1189
+ dataUrl = str(image.get("dataUrl", "") or "")
1190
+ if dataUrl and "," in dataUrl:
1191
+ raw = dataUrl.split(",", 1)[1]
1192
+ if not raw:
1193
+ return b""
1194
+ try:
1195
+ return base64.b64decode(raw, validate=False)
1196
+ except Exception:
1197
+ return raw.encode("utf-8", errors="ignore")
1198
+
1199
+
1200
+ def _imageText(image: dict[str, Any], rawBytes: bytes) -> str:
1201
+ fields = [str(image.get(key, "") or "") for key in ("text", "alt", "caption", "ocr", "description", "name")]
1202
+ printable = ""
1203
+ if rawBytes:
1204
+ printable = " ".join(match.decode("utf-8", errors="ignore") for match in re.findall(rb"[ -~]{5,}", rawBytes))
1205
+ ocr = _ocrText(image, rawBytes)
1206
+ return " ".join(item for item in [*fields, printable, ocr] if item).strip()
1207
+
1208
+
1209
+ def _ocrText(image: dict[str, Any], rawBytes: bytes) -> str:
1210
+ if not rawBytes or not shutil.which("tesseract"):
1211
+ return ""
1212
+ mime = str(image.get("mime", image.get("contentType", "")) or "").lower()
1213
+ name = str(image.get("name", image.get("filename", "")) or "").lower()
1214
+ suffix = ".png"
1215
+ if "jpeg" in mime or "jpg" in mime or name.endswith((".jpg", ".jpeg")):
1216
+ suffix = ".jpg"
1217
+ elif "webp" in mime or name.endswith(".webp"):
1218
+ suffix = ".webp"
1219
+ elif "tiff" in mime or name.endswith((".tif", ".tiff")):
1220
+ suffix = ".tif"
1221
+ path = ""
1222
+ try:
1223
+ with tempfile.NamedTemporaryFile(prefix="picux-image-", suffix=suffix, delete=False) as tmp:
1224
+ tmp.write(rawBytes)
1225
+ path = tmp.name
1226
+ completed = subprocess.run(
1227
+ ["tesseract", path, "stdout", "--dpi", "150"],
1228
+ capture_output=True,
1229
+ check=False,
1230
+ text=True,
1231
+ timeout=8,
1232
+ )
1233
+ if completed.returncode == 0:
1234
+ return re.sub(r"\s+", " ", completed.stdout or "").strip()
1235
+ return ""
1236
+ except Exception:
1237
+ return ""
1238
+ finally:
1239
+ if path:
1240
+ try:
1241
+ Path(path).unlink(missing_ok=True)
1242
+ except Exception:
1243
+ pass
1244
+
1245
+
1246
+ def _matches(query: str, text: str) -> list[str]:
1247
+ qTokens = {token for token in re.findall(r"[a-z0-9]+", query.lower()) if len(token) > 2}
1248
+ lowered = text.lower()
1249
+ matches = [token for token in sorted(qTokens) if token in lowered]
1250
+ for label, tokens in {
1251
+ "receipt": ("receipt", "invoice", "order", "total"),
1252
+ "damage": ("broken", "damaged", "crack", "scratch"),
1253
+ "identity": ("name", "seller", "merchant", "store"),
1254
+ }.items():
1255
+ if any(token in lowered for token in tokens) and label not in matches:
1256
+ matches.append(label)
1257
+ return matches[:12]
1258
+
1259
+
1260
+ def _summary(text: str, matches: list[str]) -> str:
1261
+ if not text:
1262
+ return "No readable image text was available; preserve the image as source evidence."
1263
+ prefix = "Matched " + ", ".join(matches[:4]) + ". " if matches else ""
1264
+ clean = re.sub(r"\s+", " ", _redact(text)).strip()
1265
+ return (prefix + clean[:220]).strip()
1266
+
1267
+
1268
+ def _privateTargetBlockReason(url: str, *, allowPrivateNetwork: bool = False) -> str:
1269
+ if allowPrivateNetwork:
1270
+ return ""
1271
+ parsed = urlparse(str(url or ""))
1272
+ if parsed.scheme not in {"http", "https"}:
1273
+ return ""
1274
+ host = (parsed.hostname or "").strip().lower()
1275
+ if not host:
1276
+ return "privateNetworkTargetBlocked"
1277
+ if host == "localhost" or host.endswith(".localhost") or host.endswith(".local"):
1278
+ return "privateNetworkTargetBlocked"
1279
+ try:
1280
+ ip = ipaddress.ip_address(host.strip("[]"))
1281
+ except ValueError:
1282
+ return ""
1283
+ if ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_unspecified or ip.is_reserved:
1284
+ return "privateNetworkTargetBlocked"
1285
+ return ""
1286
+
1287
+
1288
+ def _blockedUrlObservation(url: str, query: str, *, source: str, error: str) -> dict[str, Any]:
1289
+ return {
1290
+ "source": source,
1291
+ "url": url,
1292
+ "ok": False,
1293
+ "adapter": "http",
1294
+ "statusCode": 0,
1295
+ "title": "",
1296
+ "snippets": [],
1297
+ "textSample": "",
1298
+ "matched": False,
1299
+ "error": error,
1300
+ }
1301
+
1302
+
1303
+ def _pageObservation(url: str, text: str, query: str, adapter: str, *, source: str, statusCode: int) -> dict[str, Any]:
1304
+ title = _title(text)
1305
+ body = _stripHtml(text)
1306
+ snippets = _snippets(body, query)
1307
+ return {
1308
+ "source": source,
1309
+ "url": url,
1310
+ "ok": True,
1311
+ "adapter": adapter,
1312
+ "statusCode": statusCode,
1313
+ "title": title,
1314
+ "snippets": snippets,
1315
+ "textSample": _redact(body[:4000]),
1316
+ "matched": bool(snippets),
1317
+ }
1318
+
1319
+
1320
+ def _title(text: str) -> str:
1321
+ match = re.search(r"<title[^>]*>(.*?)</title>", text, flags=re.IGNORECASE | re.DOTALL)
1322
+ if not match:
1323
+ return ""
1324
+ return html.unescape(re.sub(r"\s+", " ", match.group(1))).strip()[:160]
1325
+
1326
+
1327
+ def _stripHtml(text: str) -> str:
1328
+ clean = re.sub(r"<script\b.*?</script>", " ", text, flags=re.IGNORECASE | re.DOTALL)
1329
+ clean = re.sub(r"<style\b.*?</style>", " ", clean, flags=re.IGNORECASE | re.DOTALL)
1330
+ clean = re.sub(r"<[^>]+>", " ", clean)
1331
+ return html.unescape(re.sub(r"\s+", " ", clean)).strip()
1332
+
1333
+
1334
+ def _snippets(text: str, query: str) -> list[str]:
1335
+ tokens = [token for token in re.findall(r"[a-z0-9]+", query.lower()) if len(token) > 2]
1336
+ lowered = text.lower()
1337
+ snippets = []
1338
+ for token in tokens[:8]:
1339
+ idx = lowered.find(token)
1340
+ if idx < 0:
1341
+ continue
1342
+ start = max(0, idx - 90)
1343
+ end = min(len(text), idx + 180)
1344
+ snippet = text[start:end].strip()
1345
+ if snippet and snippet not in snippets:
1346
+ snippets.append(snippet)
1347
+ return snippets[:5]
1348
+
1349
+
1350
+ def _marketplaceSet(query: str, entities: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
1351
+ raw = payload.get("marketplaces", payload.get("sources", []))
1352
+ requested = [_sourceAlias(str(item).strip().lower()) for item in raw if str(item).strip()] if isinstance(raw, list) else []
1353
+ market = str(entities.get("market", "") or "")
1354
+ country = _country(query, {**payload, **entities})
1355
+ if not requested:
1356
+ requested = _marketplaceSources(market, country)
1357
+ return {
1358
+ "market": market or "shopping",
1359
+ "country": country,
1360
+ "localSources": _localSources(market, country),
1361
+ "sources": requested,
1362
+ }
1363
+
1364
+
1365
+ def _targetLimit(payload: dict[str, Any]) -> int:
1366
+ try:
1367
+ raw = int(payload.get("targetLimit", payload.get("sourceLimit", 12)) or 12)
1368
+ except Exception:
1369
+ raw = 12
1370
+ return max(1, min(raw, 20))
1371
+
1372
+
1373
+ def _searchTargets(query: str, entities: dict[str, Any], payload: dict[str, Any], *, marketplaceSet: dict[str, Any] | None = None) -> list[dict[str, str]]:
1374
+ marketplaceSet = marketplaceSet or _marketplaceSet(query, entities, payload)
1375
+ requested = [str(item) for item in marketplaceSet.get("sources", []) if str(item)]
1376
+ item = quote_plus(str(entities.get("item", "") or query))
1377
+ templates = {
1378
+ "ebay": "https://www.ebay.com/sch/i.html?_nkw={q}",
1379
+ "facebook": "https://www.facebook.com/marketplace/search/?query={q}",
1380
+ "alibaba": "https://www.alibaba.com/trade/search?SearchText={q}",
1381
+ "amazon": "https://www.amazon.com/s?k={q}",
1382
+ "shopify": "https://shop.app/search?q={q}",
1383
+ "blocket": "https://www.blocket.se/annonser/hela_sverige?q={q}",
1384
+ "tradera": "https://www.tradera.com/search?q={q}",
1385
+ "jumia": "https://www.jumia.com.ng/catalog/?q={q}",
1386
+ "konga": "https://www.konga.com/search?search={q}",
1387
+ "jiji": "https://jiji.ng/search?query={q}",
1388
+ "craigslist": "https://www.craigslist.org/search/sss?query={q}",
1389
+ "gumtree": "https://www.gumtree.com/search?search_category=all&q={q}",
1390
+ "zillow": "https://www.zillow.com/homes/{q}_rb/",
1391
+ "redfin": "https://www.redfin.com/city/30749/CA/{q}",
1392
+ "hemnet": "https://www.hemnet.se/bostader?query={q}",
1393
+ }
1394
+ homeUrls = {
1395
+ "amazon": "https://www.amazon.com/",
1396
+ "ebay": "https://www.ebay.com/",
1397
+ "facebook": "https://www.facebook.com/marketplace/",
1398
+ "alibaba": "https://www.alibaba.com/",
1399
+ "shopify": "https://shop.app/",
1400
+ "blocket": "https://www.blocket.se/",
1401
+ "tradera": "https://www.tradera.com/",
1402
+ "jumia": "https://www.jumia.com.ng/",
1403
+ "konga": "https://www.konga.com/",
1404
+ "jiji": "https://jiji.ng/",
1405
+ }
1406
+ return [
1407
+ {
1408
+ "source": source,
1409
+ "url": templates.get(source, f"https://www.google.com/search?q={item}+{quote_plus(source)}").format(q=item),
1410
+ "homeUrl": homeUrls.get(source, ""),
1411
+ "kind": "marketplaceSearch",
1412
+ }
1413
+ for source in requested[:10]
1414
+ ]
1415
+
1416
+
1417
+ def _sourceAlias(source: str) -> str:
1418
+ aliases = {
1419
+ "fb": "facebook",
1420
+ "facebook marketplace": "facebook",
1421
+ "facebook_marketplace": "facebook",
1422
+ "ali": "alibaba",
1423
+ "aliexpress": "alibaba",
1424
+ "shop": "shopify",
1425
+ "shop.app": "shopify",
1426
+ "blocket.se": "blocket",
1427
+ "tradera.se": "tradera",
1428
+ "jumia nigeria": "jumia",
1429
+ "jumia.com.ng": "jumia",
1430
+ "konga.com": "konga",
1431
+ "jiji.ng": "jiji",
1432
+ }
1433
+ return aliases.get(source, source)
1434
+
1435
+
1436
+ def _marketplaceSources(market: str, country: str) -> list[str]:
1437
+ if market == "property":
1438
+ local = _localSources(market, country)
1439
+ return _unique([*local, "zillow", "redfin", "hemnet"])
1440
+ local = _localSources("shopping", country)
1441
+ globalSources = ["ebay", "facebook", "alibaba", "amazon", "shopify"]
1442
+ if not local:
1443
+ return [*globalSources, "blocket"]
1444
+ return _unique([*local, *globalSources])
1445
+
1446
+
1447
+ def _localSources(market: str, country: str) -> list[str]:
1448
+ if market == "property":
1449
+ return {
1450
+ "se": ["hemnet", "blocket"],
1451
+ "ng": ["propertypro", "jiji"],
1452
+ "uk": ["rightmove", "zoopla", "gumtree"],
1453
+ "us": ["zillow", "redfin", "craigslist"],
1454
+ }.get(country, [])
1455
+ return {
1456
+ "se": ["blocket", "tradera", "facebook"],
1457
+ "ng": ["jumia", "konga", "jiji", "facebook"],
1458
+ "uk": ["gumtree", "facebook", "ebay"],
1459
+ "us": ["craigslist", "facebook", "ebay", "amazon"],
1460
+ }.get(country, [])
1461
+
1462
+
1463
+ def _unique(items: list[str]) -> list[str]:
1464
+ return list(dict.fromkeys(item for item in items if item))
1465
+
1466
+
1467
+ def _dedupeTargets(targets: list[dict[str, str]]) -> list[dict[str, str]]:
1468
+ seen = set()
1469
+ out = []
1470
+ for target in targets:
1471
+ url = str(target.get("url", "") or "")
1472
+ if not url or url in seen:
1473
+ continue
1474
+ seen.add(url)
1475
+ out.append(target)
1476
+ return out
1477
+
1478
+
1479
+ def _attemptSummaries(observations: list[dict[str, Any]]) -> list[dict[str, Any]]:
1480
+ summaries = []
1481
+ for observation in observations:
1482
+ summaries.append(
1483
+ {
1484
+ "source": observation.get("source", ""),
1485
+ "url": observation.get("url", ""),
1486
+ "ok": bool(observation.get("ok")),
1487
+ "adapter": observation.get("adapter", ""),
1488
+ "statusCode": observation.get("statusCode", 0),
1489
+ "title": observation.get("title", ""),
1490
+ "matched": bool(observation.get("matched")),
1491
+ "listingCount": len(observation.get("listings", [])) if isinstance(observation.get("listings"), list) else 0,
1492
+ "filteredListingCount": int(observation.get("filteredListingCount", 0) or 0),
1493
+ "search": observation.get("search", {}) if isinstance(observation.get("search"), dict) else {},
1494
+ "error": observation.get("error", ""),
1495
+ }
1496
+ )
1497
+ return summaries
1498
+
1499
+
1500
+ def _renderedSkipped(reason: str, error: str = "") -> dict[str, Any]:
1501
+ attempted = reason not in {"notRequested", "noTargets"}
1502
+ meta = {
1503
+ "attempted": attempted,
1504
+ "available": False,
1505
+ "status": "skipped" if not attempted else "unavailable",
1506
+ "fallbackReason": reason,
1507
+ "adapter": "playwrightChromium",
1508
+ }
1509
+ if error:
1510
+ meta["error"] = error
1511
+ return {"observations": [], "meta": meta}
1512
+
1513
+
1514
+ def _normalizeRenderedObservations(raw: Any, query: str) -> list[dict[str, Any]]:
1515
+ if not isinstance(raw, list):
1516
+ return []
1517
+ observations: list[dict[str, Any]] = []
1518
+ for item in raw:
1519
+ if not isinstance(item, dict):
1520
+ continue
1521
+ source = str(item.get("source", "source") or "source")
1522
+ url = str(item.get("url", "") or "")
1523
+ adapter = str(item.get("adapter", "playwrightChromium") or "playwrightChromium")
1524
+ statusCode = _safeInt(item.get("statusCode", 0))
1525
+ title = str(item.get("title", "") or "")[:180]
1526
+ text = re.sub(r"\s+", " ", str(item.get("textSample", item.get("text", "")) or "")).strip()
1527
+ textSample = _redact(text[:8000])
1528
+ snippets = _snippets(f"{title} {textSample}", query)
1529
+ listings = _normalizeListings(item.get("listings", []))
1530
+ search = _normalizeSearch(item.get("search", {}))
1531
+ if not item.get("ok"):
1532
+ observations.append(
1533
+ {
1534
+ "source": source,
1535
+ "url": url,
1536
+ "ok": False,
1537
+ "adapter": adapter,
1538
+ "statusCode": statusCode,
1539
+ "title": title,
1540
+ "error": str(item.get("error", "") or "")[:220],
1541
+ "snippets": snippets,
1542
+ "textSample": textSample,
1543
+ "listings": listings,
1544
+ "search": search,
1545
+ "matched": bool(snippets),
1546
+ }
1547
+ )
1548
+ continue
1549
+ observations.append(
1550
+ {
1551
+ "source": source,
1552
+ "url": url,
1553
+ "ok": True,
1554
+ "adapter": adapter,
1555
+ "statusCode": statusCode,
1556
+ "title": title,
1557
+ "snippets": snippets,
1558
+ "textSample": textSample,
1559
+ "listings": listings,
1560
+ "search": search,
1561
+ "matched": bool(snippets),
1562
+ }
1563
+ )
1564
+ return observations
1565
+
1566
+
1567
+ def _safeInt(value: Any) -> int:
1568
+ try:
1569
+ return int(value or 0)
1570
+ except Exception:
1571
+ return 0
1572
+
1573
+
1574
+ def _safeFloat(value: Any) -> float:
1575
+ try:
1576
+ return float(value or 0.0)
1577
+ except Exception:
1578
+ return 0.0
1579
+
1580
+
1581
+ def _normalizeListings(raw: Any) -> list[dict[str, Any]]:
1582
+ if not isinstance(raw, list):
1583
+ return []
1584
+ listings = []
1585
+ for item in raw:
1586
+ if not isinstance(item, dict):
1587
+ continue
1588
+ title = re.sub(r"\s+", " ", str(item.get("title", "") or "")).strip()
1589
+ priceText = re.sub(r"\s+", " ", str(item.get("priceText", "") or "")).strip()
1590
+ url = str(item.get("url", "") or "").strip()
1591
+ textSample = re.sub(r"\s+", " ", str(item.get("textSample", "") or "")).strip()
1592
+ if not priceText:
1593
+ priceText = _listingPriceText(textSample)
1594
+ if not title:
1595
+ title = _listingTitleText(textSample, priceText)
1596
+ if not title and not priceText:
1597
+ continue
1598
+ listings.append(
1599
+ {
1600
+ "title": _redact(title[:220]),
1601
+ "priceText": _redact(priceText[:120]),
1602
+ "url": url[:600],
1603
+ "textSample": _redact(textSample[:900]),
1604
+ }
1605
+ )
1606
+ return listings[:18]
1607
+
1608
+
1609
+ def _listingTitleText(text: str, priceText: str = "") -> str:
1610
+ raw = re.sub(r"\s+", " ", str(text or "")).strip()
1611
+ if not raw:
1612
+ return ""
1613
+ if priceText:
1614
+ priceAt = raw.find(priceText)
1615
+ if priceAt > 0:
1616
+ raw = raw[:priceAt]
1617
+ raw = re.sub(r"^(?:NEW LOW PRICE|SPONSORED|Shop on eBay|Brand New)+", "", raw, flags=re.IGNORECASE).strip()
1618
+ for marker in (
1619
+ "Pre-Owned",
1620
+ "Open Box",
1621
+ "Brand New",
1622
+ "New Other",
1623
+ "Buy It Now",
1624
+ "or Best Offer",
1625
+ "out of 5 stars",
1626
+ "product ratings",
1627
+ ):
1628
+ index = raw.lower().find(marker.lower())
1629
+ if index > 8:
1630
+ raw = raw[:index]
1631
+ break
1632
+ raw = re.sub(r"\s+", " ", raw).strip(" -·")
1633
+ return _redact(raw[:220])
1634
+
1635
+
1636
+ def _listingPriceText(text: str) -> str:
1637
+ raw = str(text or "")
1638
+ patterns = (
1639
+ r"(?:USD|SEK|NGN|NOK|EUR|GBP|\$|€|£|₦)\s*[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?",
1640
+ r"[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?\s*(?:kr|kronor|sek|usd|dollars?|ngn|naira|€|£|₦)",
1641
+ )
1642
+ for pattern in patterns:
1643
+ match = re.search(pattern, raw, flags=re.IGNORECASE)
1644
+ if match:
1645
+ value = re.sub(r"\s+", " ", match.group(0)).strip()
1646
+ return value[:120]
1647
+ return ""
1648
+
1649
+
1650
+ def _normalizeSearch(raw: Any) -> dict[str, Any]:
1651
+ raw = raw if isinstance(raw, dict) else {}
1652
+ return {
1653
+ "mode": str(raw.get("mode", "") or ""),
1654
+ "query": _redact(str(raw.get("query", "") or "")[:220]),
1655
+ "submitted": bool(raw.get("submitted")),
1656
+ "inputSelector": str(raw.get("inputSelector", "") or "")[:160],
1657
+ "entryUrl": str(raw.get("entryUrl", "") or "")[:600],
1658
+ "fallbackUrl": str(raw.get("fallbackUrl", "") or "")[:600],
1659
+ "fallbackReason": str(raw.get("fallbackReason", "") or "")[:220],
1660
+ }
1661
+
1662
+
1663
+ def _offersFromObservations(observations: list[dict[str, Any]], query: str, entities: dict[str, Any], *, budget: dict[str, Any], fxRates: dict[str, float]) -> list[dict[str, Any]]:
1664
+ offers: list[dict[str, Any]] = []
1665
+ itemId = str(entities.get("item", "") or _item(query) or "requestedItem")
1666
+ used = "used" in query.lower() or "second hand" in query.lower()
1667
+ budgetUsd = float(budget.get("usd", 0.0) or 0.0)
1668
+ for index, observation in enumerate(observations, start=1):
1669
+ if not observation.get("ok"):
1670
+ continue
1671
+ defaultCurrency = _sourceCurrency(str(observation.get("source", "") or ""), str(entities.get("country", "") or ""))
1672
+ filteredCount = 0
1673
+ listings = observation.get("listings", []) if isinstance(observation.get("listings"), list) else []
1674
+ for listingIndex, listing in enumerate(listings, start=1):
1675
+ if not _listingMatches(listing, itemId, query):
1676
+ continue
1677
+ text = " ".join(str(listing.get(key, "") or "") for key in ("title", "priceText", "textSample"))
1678
+ quotes = _priceQuotes(str(listing.get("priceText", "") or text), defaultCurrency, fxRates)
1679
+ if not quotes:
1680
+ continue
1681
+ offerQuote = min(quotes, key=lambda item: item["usd"])
1682
+ listedQuote = max(quotes, key=lambda item: item["usd"])
1683
+ offer = float(offerQuote["usd"])
1684
+ listed = float(listedQuote["usd"])
1685
+ if listed <= offer:
1686
+ listed = offer
1687
+ if budgetUsd > 0 and offer > budgetUsd:
1688
+ filteredCount += 1
1689
+ continue
1690
+ sourceUrl = str(listing.get("url", "") or observation.get("url", "") or "")
1691
+ sourceKey = "|".join(
1692
+ [
1693
+ sourceUrl,
1694
+ str(listingIndex),
1695
+ str(listing.get("title", "") or ""),
1696
+ str(listing.get("priceText", "") or ""),
1697
+ str(offerQuote.get("currency", "")),
1698
+ str(offerQuote.get("amount", "")),
1699
+ ]
1700
+ )
1701
+ digest = hashlib.sha256(sourceKey.encode("utf-8")).hexdigest()[:12]
1702
+ offers.append(
1703
+ {
1704
+ "sourceId": f"browserSource_{digest}",
1705
+ "itemId": itemId,
1706
+ "listUsd": round(listed, 2),
1707
+ "offerUsd": round(offer, 2),
1708
+ "feesUsd": 0.0,
1709
+ "stock": 1,
1710
+ "meta": {
1711
+ "condition": "used" if used else "unknown",
1712
+ "reader": "browserReader",
1713
+ "sourceUrl": sourceUrl,
1714
+ "listingIndex": listingIndex,
1715
+ "source": str(observation.get("source", "") or ""),
1716
+ "listingTitle": str(listing.get("title", "") or "")[:220],
1717
+ "price": {
1718
+ "amount": round(float(offerQuote["amount"]), 2),
1719
+ "currency": offerQuote["currency"],
1720
+ "usd": round(offer, 2),
1721
+ "fxRate": offerQuote["rate"],
1722
+ },
1723
+ "budget": budget,
1724
+ "sourceBound": True,
1725
+ },
1726
+ }
1727
+ )
1728
+ if filteredCount:
1729
+ observation["filteredListingCount"] = int(observation.get("filteredListingCount", 0) or 0) + filteredCount
1730
+ if listings:
1731
+ continue
1732
+ if str(observation.get("source", "") or "") != "clientUrl":
1733
+ continue
1734
+ text = " ".join(
1735
+ [
1736
+ str(observation.get("title", "") or ""),
1737
+ " ".join(str(item) for item in observation.get("snippets", []) if item),
1738
+ str(observation.get("textSample", "") or ""),
1739
+ ]
1740
+ )
1741
+ quotes = _priceQuotes(text, defaultCurrency, fxRates)
1742
+ if not quotes:
1743
+ continue
1744
+ offerQuote = min(quotes, key=lambda item: item["usd"])
1745
+ listedQuote = max(quotes, key=lambda item: item["usd"])
1746
+ offer = float(offerQuote["usd"])
1747
+ listed = float(listedQuote["usd"])
1748
+ if listed <= offer:
1749
+ listed = offer
1750
+ if budgetUsd > 0 and offer > budgetUsd:
1751
+ observation["filteredListingCount"] = int(observation.get("filteredListingCount", 0) or 0) + 1
1752
+ continue
1753
+ fallbackKey = "|".join([str(observation.get("url", f"source_{index}") or f"source_{index}"), str(index), str(offerQuote.get("currency", "")), str(offerQuote.get("amount", ""))])
1754
+ digest = hashlib.sha256(fallbackKey.encode("utf-8")).hexdigest()[:12]
1755
+ offers.append(
1756
+ {
1757
+ "sourceId": f"browserSource_{digest}",
1758
+ "itemId": itemId,
1759
+ "listUsd": round(listed, 2),
1760
+ "offerUsd": round(offer, 2),
1761
+ "feesUsd": 0.0,
1762
+ "stock": 1 if re.search(r"\b(?:available|in stock|stock|listing)\b", text.lower()) else 0,
1763
+ "meta": {
1764
+ "condition": "used" if used else "unknown",
1765
+ "reader": "browserReader",
1766
+ "source": str(observation.get("source", "") or ""),
1767
+ "listingTitle": str(observation.get("title", "") or "")[:220],
1768
+ "sourceUrl": str(observation.get("url", "") or ""),
1769
+ "price": {
1770
+ "amount": round(float(offerQuote["amount"]), 2),
1771
+ "currency": offerQuote["currency"],
1772
+ "usd": round(offer, 2),
1773
+ "fxRate": offerQuote["rate"],
1774
+ },
1775
+ "budget": budget,
1776
+ "sourceBound": True,
1777
+ },
1778
+ }
1779
+ )
1780
+ return offers[:10]
1781
+
1782
+
1783
+ def _listingMatches(listing: dict[str, Any], itemId: str, query: str) -> bool:
1784
+ text = " ".join(str(listing.get(key, "") or "") for key in ("title", "textSample")).lower()
1785
+ compact = re.sub(r"[^a-z0-9]+", "", text)
1786
+ if itemId and itemId != "requestedItem":
1787
+ itemCompact = re.sub(r"[^a-z0-9]+", "", itemId.lower())
1788
+ if itemCompact and itemCompact in compact:
1789
+ return True
1790
+ tokens = [
1791
+ token
1792
+ for token in re.findall(r"[a-z0-9]+", query.lower())
1793
+ if len(token) > 2 and token not in {"buy", "used", "less", "than", "under", "dollar", "usd", "best", "find"}
1794
+ ]
1795
+ return bool(tokens and any(token in text for token in tokens[:6]))
1796
+
1797
+
1798
+ def _priceQuotes(text: str, defaultCurrency: str, fxRates: dict[str, float]) -> list[dict[str, Any]]:
1799
+ raw = re.sub(r"\s+", " ", str(text or "")).strip()
1800
+ if not raw:
1801
+ return []
1802
+ amountPattern = r"[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?"
1803
+ tokenPattern = r"USD|SEK|NGN|NOK|EUR|GBP|dollars?|usd|sek|ngn|kr|krona|naira|\$|€|£|₦"
1804
+ matches: list[tuple[str, str]] = []
1805
+ for token, amount in re.findall(rf"({tokenPattern})\s*({amountPattern})", raw, flags=re.IGNORECASE):
1806
+ matches.append((amount, token))
1807
+ for amount, token in re.findall(rf"({amountPattern})\s*({tokenPattern})", raw, flags=re.IGNORECASE):
1808
+ matches.append((amount, token))
1809
+ quotes = []
1810
+ seen = set()
1811
+ for amountRaw, token in matches:
1812
+ amount = _parseAmount(amountRaw)
1813
+ currency = _currencyFromToken(token, defaultCurrency)
1814
+ if amount <= 0 or currency not in fxRates:
1815
+ continue
1816
+ key = (currency, round(amount, 2))
1817
+ if key in seen:
1818
+ continue
1819
+ seen.add(key)
1820
+ rate = float(fxRates.get(currency, 0.0) or 0.0)
1821
+ usd = _toUsd(amount, currency, fxRates)
1822
+ if usd <= 0 or usd > 1_000_000:
1823
+ continue
1824
+ quotes.append({"amount": amount, "currency": currency, "usd": round(usd, 2), "rate": rate})
1825
+ return quotes[:12]
1826
+
1827
+
1828
+ def _parseAmount(value: str) -> float:
1829
+ clean = re.sub(r"\s+", "", str(value or ""))
1830
+ if "," in clean and "." in clean:
1831
+ if clean.rfind(",") > clean.rfind("."):
1832
+ clean = clean.replace(".", "").replace(",", ".")
1833
+ else:
1834
+ clean = clean.replace(",", "")
1835
+ elif "," in clean:
1836
+ parts = clean.split(",")
1837
+ clean = "".join(parts) if len(parts[-1]) == 3 else ".".join(parts)
1838
+ elif "." in clean:
1839
+ parts = clean.split(".")
1840
+ clean = "".join(parts) if len(parts[-1]) == 3 and len(parts) > 1 else clean
1841
+ try:
1842
+ return float(clean)
1843
+ except Exception:
1844
+ return 0.0
1845
+
1846
+
1847
+ def _currencyFromToken(token: str, defaultCurrency: str) -> str:
1848
+ lowered = str(token or "").strip().lower()
1849
+ if lowered in {"$", "usd", "dollar", "dollars"}:
1850
+ return "USD"
1851
+ if lowered in {"sek", "kr", "krona"}:
1852
+ return "SEK"
1853
+ if lowered in {"ngn", "₦", "naira"}:
1854
+ return "NGN"
1855
+ if lowered == "eur" or lowered == "€":
1856
+ return "EUR"
1857
+ if lowered == "gbp" or lowered == "£":
1858
+ return "GBP"
1859
+ return defaultCurrency.upper() or "USD"
1860
+
1861
+
1862
+ def _toUsd(amount: float, currency: str, fxRates: dict[str, float]) -> float:
1863
+ currency = currency.upper() or "USD"
1864
+ rate = float(fxRates.get(currency, 0.0) or 0.0)
1865
+ if currency == "USD":
1866
+ return amount
1867
+ if rate <= 0:
1868
+ return 0.0
1869
+ return amount / rate
1870
+
1871
+
1872
+ def _sourceCurrency(source: str, country: str) -> str:
1873
+ source = source.lower()
1874
+ country = country.lower()
1875
+ if source in {"blocket", "tradera", "hemnet"} or country == "se":
1876
+ return "SEK"
1877
+ if source in {"jumia", "konga", "jiji", "propertypro"} or country == "ng":
1878
+ return "NGN"
1879
+ return "USD"
1880
+
1881
+
1882
+ def _item(query: str) -> str:
1883
+ serviceEntity = _serviceEntity(query, {})
1884
+ if _isLocalServiceQuery(query) and serviceEntity:
1885
+ return serviceEntity
1886
+ match = re.search(r"\b(i\s*phone\s*\d+|iphone\s*\d+|tv|laptop|car|apartment|house|property|mechanic|plumber|electrician|repair|garage|workshop)\b", query, re.IGNORECASE)
1887
+ if match:
1888
+ return re.sub(r"\s+", "", match.group(1).lower())
1889
+ subject = re.search(
1890
+ r"\b(?:buy|find|get|need|locate|search(?: for)?|shop(?: for)?|source)\s+(?:a|an|the|my|aa)?\s*(.+?)(?:\s+(?:under|less than|below|budget|for|within)\b|\s+(?:in|near|around)\s+[A-Z][A-Za-z]+|$)",
1891
+ query,
1892
+ re.IGNORECASE,
1893
+ )
1894
+ if subject:
1895
+ clean = re.sub(r"\b(?:used|best|cheap|affordable|new)\b", " ", subject.group(1), flags=re.IGNORECASE)
1896
+ clean = re.sub(r"[^A-Za-z0-9 ]+", " ", clean)
1897
+ clean = re.sub(r"\s+", " ", clean).strip().lower()
1898
+ if clean:
1899
+ return clean[:80]
1900
+ return ""
1901
+
1902
+
1903
+ def _budget(query: str, payload: dict[str, Any]) -> float:
1904
+ return float(_budgetContext(query, payload, _fxRates(payload)).get("usd", 0.0) or 0.0)
1905
+
1906
+
1907
+ def _budgetContext(query: str, payload: dict[str, Any], fxRates: dict[str, float]) -> dict[str, Any]:
1908
+ raw = payload.get("budget", payload.get("maxSpend", 0))
1909
+ payloadCurrency = str(payload.get("currency", payload.get("budgetCurrency", "")) or "").upper()
1910
+ if isinstance(raw, dict):
1911
+ amount = _parseAmount(str(raw.get("amount", raw.get("value", 0)) or "0"))
1912
+ currency = str(raw.get("currency", payloadCurrency or "USD") or "USD").upper()
1913
+ usd = _toUsd(amount, currency, fxRates)
1914
+ return _budgetMap(amount, currency, usd, fxRates)
1915
+ try:
1916
+ if raw:
1917
+ amount = float(raw)
1918
+ currency = payloadCurrency or _budgetCurrency(query, payload) or "USD"
1919
+ usd = _toUsd(amount, currency, fxRates)
1920
+ return _budgetMap(amount, currency, usd, fxRates)
1921
+ except Exception:
1922
+ pass
1923
+ patterns = (
1924
+ r"(?:under|less than|below|budget|for)\s*(?P<prefix>\$|USD|SEK|NGN|₦)?\s*(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)\s*(?P<suffix>dollars?|usd|sek|kr|krona|ngn|naira)?",
1925
+ r"(?P<prefix>\$|USD|SEK|NGN|₦)\s*(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)",
1926
+ r"\b(?P<amount>[0-9][0-9\s,.]*(?:[,.][0-9]{1,2})?)\s*(?P<suffix>dollars?|usd|sek|kr|krona|ngn|naira)\b",
1927
+ )
1928
+ for pattern in patterns:
1929
+ match = re.search(pattern, query, re.IGNORECASE)
1930
+ if not match:
1931
+ continue
1932
+ amount = _parseAmount(match.group("amount"))
1933
+ token = match.groupdict().get("prefix") or match.groupdict().get("suffix") or payloadCurrency or "USD"
1934
+ currency = _currencyFromToken(token, payloadCurrency or "USD")
1935
+ usd = _toUsd(amount, currency, fxRates)
1936
+ return _budgetMap(amount, currency, usd, fxRates)
1937
+ return _budgetMap(0.0, payloadCurrency or _budgetCurrency(query, payload) or "USD", 0.0, fxRates)
1938
+
1939
+
1940
+ def _budgetCurrency(query: str, payload: dict[str, Any]) -> str:
1941
+ explicit = str(payload.get("currency", payload.get("budgetCurrency", "")) or "").upper()
1942
+ if explicit:
1943
+ return explicit
1944
+ lowered = query.lower()
1945
+ if "$" in query or "usd" in lowered or "dollar" in lowered:
1946
+ return "USD"
1947
+ if "₦" in query or "ngn" in lowered or "naira" in lowered:
1948
+ return "NGN"
1949
+ if re.search(r"\b(?:sek|kr|krona)\b", lowered):
1950
+ return "SEK"
1951
+ return "USD"
1952
+
1953
+
1954
+ def _budgetMap(amount: float, currency: str, usd: float, fxRates: dict[str, float]) -> dict[str, Any]:
1955
+ currency = currency.upper() or "USD"
1956
+ nativeMax = amount
1957
+ return {
1958
+ "amount": round(amount, 2),
1959
+ "currency": currency,
1960
+ "usd": round(usd, 2),
1961
+ "fxRate": float(fxRates.get(currency, 1.0) or 1.0),
1962
+ "nativeMax": round(nativeMax, 2),
1963
+ }
1964
+
1965
+
1966
+ def _fxRates(payload: dict[str, Any]) -> dict[str, float]:
1967
+ rates = {key: float(value) for key, value in DEFAULT_FX_RATES.items()}
1968
+ raw = payload.get("fxRates", payload.get("exchangeRates", {}))
1969
+ if isinstance(raw, dict):
1970
+ for key, value in raw.items():
1971
+ try:
1972
+ rate = float(value)
1973
+ except Exception:
1974
+ continue
1975
+ if rate <= 0:
1976
+ continue
1977
+ normalized = str(key).upper().replace("USD_TO_", "").replace("USD_", "")
1978
+ if normalized in {"USD", "SEK", "NGN", "EUR", "GBP", "NOK"}:
1979
+ rates[normalized] = rate
1980
+ return rates
1981
+
1982
+
1983
+ def _target(query: str) -> str:
1984
+ domainMerchant = re.search(r"\b(?:to|from|with|at)\s*([A-Z]?[A-Za-z0-9-]+\.(?:se|com|net|org|io|co|ng|uk))\b", query, re.IGNORECASE)
1985
+ if domainMerchant:
1986
+ return domainMerchant.group(1).strip()
1987
+ match = re.search(r"\bfrom\s+([A-Z][A-Za-z0-9.& -]{2,40})", query)
1988
+ return match.group(1).strip() if match else ""
1989
+
1990
+
1991
+ def _phone(query: str, payload: dict[str, Any]) -> str:
1992
+ raw = str(payload.get("phone", "") or "")
1993
+ if raw:
1994
+ return raw
1995
+ match = re.search(r"\+?\d[\d\s().-]{7,}\d", query)
1996
+ return match.group(0).strip() if match else ""
1997
+
1998
+
1999
+ def _email(query: str) -> str:
2000
+ match = re.search(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", query, flags=re.IGNORECASE)
2001
+ return match.group(0) if match else ""
2002
+
2003
+
2004
+ def _market(query: str) -> str:
2005
+ lowered = query.lower()
2006
+ if _isLocalServiceQuery(query):
2007
+ return "localService"
2008
+ if any(token in lowered for token in ("property", "apartment", "house", "rent", "buy home")):
2009
+ return "property"
2010
+ if any(token in lowered for token in ("buy", "shop", "shopping", "iphone", "used", "dollar")):
2011
+ return "shopping"
2012
+ return ""
2013
+
2014
+
2015
+ def _country(query: str, payload: dict[str, Any]) -> str:
2016
+ raw = " ".join(
2017
+ str(payload.get(key, "") or "")
2018
+ for key in ("country", "region", "locale", "market", "location")
2019
+ ).lower()
2020
+ lowered = f"{query.lower()} {raw}"
2021
+ if any(token in lowered for token in ("sweden", "swedish", "stockholm", "goteborg", "gothenburg", "malmo", "backebol", "power.se", " se ", "sv-se")):
2022
+ return "se"
2023
+ if any(token in lowered for token in ("nigeria", "lagos", "abuja", "naira", "ngn", " ng ", "en-ng")):
2024
+ return "ng"
2025
+ if any(token in lowered for token in ("united kingdom", "england", "london", " uk ", "en-gb")):
2026
+ return "uk"
2027
+ if any(token in lowered for token in ("united states", "america", "usa", " us ", "en-us")):
2028
+ return "us"
2029
+ compact = raw.replace("_", "-")
2030
+ if compact in {"se", "sv-se"}:
2031
+ return "se"
2032
+ if compact in {"ng", "en-ng"}:
2033
+ return "ng"
2034
+ if compact in {"uk", "gb", "en-gb"}:
2035
+ return "uk"
2036
+ if compact in {"us", "usa", "en-us"}:
2037
+ return "us"
2038
+ return ""
2039
+
2040
+
2041
+ def _region(query: str, payload: dict[str, Any]) -> str:
2042
+ country = _country(query, payload)
2043
+ return country.upper() if country else str(payload.get("region", "") or "")
2044
+
2045
+
2046
+ def _redact(text: str) -> str:
2047
+ masked = re.sub(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", "[email]", str(text or ""), flags=re.IGNORECASE)
2048
+ return re.sub(r"\+?\b(?:\d[\s().-]?){8,15}\d\b", "[phone]", masked)