plati-mcp-server 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/mcp_server.py +202 -24
- package/package.json +1 -1
- package/plati_scrape.py +91 -3
package/README.md
CHANGED
|
@@ -43,13 +43,16 @@ plati-mcp-server
|
|
|
43
43
|
Input arguments:
|
|
44
44
|
|
|
45
45
|
- `query` (required): Search phrase (for example `claude code`)
|
|
46
|
-
- `limit` (default `
|
|
46
|
+
- `limit` (default `20`)
|
|
47
47
|
- `currency` (default `RUB`)
|
|
48
48
|
- `lang` (default `ru-RU`)
|
|
49
|
-
- `min_reviews` (default `
|
|
50
|
-
- `min_positive_ratio` (default `0.
|
|
49
|
+
- `min_reviews` (default `0`)
|
|
50
|
+
- `min_positive_ratio` (default `0.0`)
|
|
51
51
|
- `max_pages` (default `6`)
|
|
52
52
|
- `per_page` (default `30`)
|
|
53
|
+
- `sort_by` (default `price_asc`): one of `price_asc`, `price_desc`, `seller_reviews_desc`, `reliability_desc`, `title_asc`, `title_desc`
|
|
54
|
+
- `min_price` / `max_price` (optional numeric range)
|
|
55
|
+
- `include_terms` / `exclude_terms` (optional space/comma-separated token filters applied to title/options text)
|
|
53
56
|
|
|
54
57
|
## Local development
|
|
55
58
|
|
package/mcp_server.py
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
4
|
import pathlib
|
|
5
|
+
import re
|
|
5
6
|
import sys
|
|
6
7
|
import warnings
|
|
7
8
|
from typing import Any, Dict, List
|
|
8
|
-
from urllib.parse import
|
|
9
|
+
from urllib.parse import parse_qs, unquote, urlparse
|
|
9
10
|
|
|
10
11
|
# Ensure local imports work even when launcher does not pass PYTHONPATH/cwd.
|
|
11
12
|
_HERE = pathlib.Path(__file__).resolve().parent
|
|
@@ -85,33 +86,166 @@ def _parse_good_bad(value: str) -> Dict[str, int]:
|
|
|
85
86
|
return {"good": 0, "bad": 0}
|
|
86
87
|
|
|
87
88
|
|
|
89
|
+
def _parse_query_input(query: str) -> Dict[str, str]:
|
|
90
|
+
q = (query or "").strip()
|
|
91
|
+
out = {"product_query": q, "category_id": "", "source_url": ""}
|
|
92
|
+
if not q.startswith("http://") and not q.startswith("https://"):
|
|
93
|
+
return out
|
|
94
|
+
|
|
95
|
+
parsed = urlparse(q)
|
|
96
|
+
path = parsed.path or ""
|
|
97
|
+
qs = parse_qs(parsed.query or "")
|
|
98
|
+
out["source_url"] = q
|
|
99
|
+
|
|
100
|
+
# Search root URL with optional query params.
|
|
101
|
+
if path.rstrip("/") == "/search":
|
|
102
|
+
for key in ("q", "query", "text", "term", "search", "searchString", "SearchStr"):
|
|
103
|
+
v = (qs.get(key) or [""])[0].strip()
|
|
104
|
+
if v:
|
|
105
|
+
out["product_query"] = unquote(v)
|
|
106
|
+
return out
|
|
107
|
+
out["product_query"] = ""
|
|
108
|
+
return out
|
|
109
|
+
|
|
110
|
+
# Standard search URL: /search/<term>
|
|
111
|
+
m = re.search(r"/search/([^/?#]+)", path)
|
|
112
|
+
if m:
|
|
113
|
+
out["product_query"] = unquote(m.group(1)).replace("-", " ").strip()
|
|
114
|
+
return out
|
|
115
|
+
|
|
116
|
+
# Category-like URL: /games/<slug>/<id>/ or similar.
|
|
117
|
+
cat = re.search(r"/([^/]+)/([^/]+)/(\d+)/?$", path)
|
|
118
|
+
if cat:
|
|
119
|
+
slug = unquote(cat.group(2)).replace("-", " ").strip()
|
|
120
|
+
out["product_query"] = slug or out["product_query"]
|
|
121
|
+
out["category_id"] = cat.group(3)
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
# Fallback: use last non-empty path part as query.
|
|
125
|
+
parts = [unquote(p) for p in path.split("/") if p]
|
|
126
|
+
if parts:
|
|
127
|
+
out["product_query"] = parts[-1].replace("-", " ").strip()
|
|
128
|
+
return out
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _split_terms(value: str) -> List[str]:
|
|
132
|
+
if not value:
|
|
133
|
+
return []
|
|
134
|
+
return [t for t in re.split(r"[\s,;|]+", value.lower()) if t]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _build_offer_search_text(title: str, options: List[Dict[str, Any]]) -> str:
|
|
138
|
+
parts = [title or ""]
|
|
139
|
+
for opt in options:
|
|
140
|
+
parts.append(str(opt.get("name") or ""))
|
|
141
|
+
parts.append(str(opt.get("label") or ""))
|
|
142
|
+
for v in opt.get("variants") or []:
|
|
143
|
+
parts.append(str(v.get("text") or ""))
|
|
144
|
+
return " ".join(parts).lower()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _sort_lots(items: List[Dict[str, Any]], sort_by: str) -> List[Dict[str, Any]]:
|
|
148
|
+
if sort_by == "price_desc":
|
|
149
|
+
return sorted(items, key=lambda x: (float(x.get("min_option_price", 0.0)), int(x.get("seller_reviews", 0))), reverse=True)
|
|
150
|
+
if sort_by == "seller_reviews_desc":
|
|
151
|
+
return sorted(items, key=lambda x: (int(x.get("seller_reviews", 0)), float(x.get("positive_ratio", 0.0)), -float(x.get("min_option_price", 0.0))), reverse=True)
|
|
152
|
+
if sort_by == "reliability_desc":
|
|
153
|
+
return sorted(items, key=lambda x: (float(x.get("positive_ratio", 0.0)), int(x.get("seller_reviews", 0)), -float(x.get("min_option_price", 0.0))), reverse=True)
|
|
154
|
+
if sort_by == "title_asc":
|
|
155
|
+
return sorted(items, key=lambda x: str(x.get("title", "")).lower())
|
|
156
|
+
if sort_by == "title_desc":
|
|
157
|
+
return sorted(items, key=lambda x: str(x.get("title", "")).lower(), reverse=True)
|
|
158
|
+
# default and "price_asc"
|
|
159
|
+
return sorted(items, key=lambda x: (float(x.get("min_option_price", 0.0)), -int(x.get("seller_reviews", 0))))
|
|
160
|
+
|
|
161
|
+
|
|
88
162
|
def find_cheapest_reliable_options(
|
|
89
163
|
query: str,
|
|
90
|
-
limit: int =
|
|
164
|
+
limit: int = 20,
|
|
91
165
|
currency: str = "RUB",
|
|
92
166
|
lang: str = "ru-RU",
|
|
93
167
|
min_reviews: int = 0,
|
|
94
168
|
min_positive_ratio: float = 0.0,
|
|
95
169
|
max_pages: int = 6,
|
|
96
170
|
per_page: int = 30,
|
|
171
|
+
sort_by: str = "price_asc",
|
|
172
|
+
min_price: float = 0.0,
|
|
173
|
+
max_price: float = 0.0,
|
|
174
|
+
include_terms: str = "",
|
|
175
|
+
exclude_terms: str = "",
|
|
97
176
|
) -> Dict[str, Any]:
|
|
98
177
|
if plati_scrape is None:
|
|
99
178
|
raise RuntimeError(f"plati_scrape import failed: {_PLATI_IMPORT_ERROR}")
|
|
100
179
|
|
|
101
|
-
|
|
180
|
+
parsed_q = _parse_query_input(query)
|
|
181
|
+
q = parsed_q["product_query"]
|
|
182
|
+
category_id = parsed_q["category_id"]
|
|
183
|
+
source_url = parsed_q["source_url"]
|
|
184
|
+
if not q and not category_id:
|
|
185
|
+
raise ValueError("Empty search query. Use /search/<term> or pass text query, e.g. 'chatgpt plus'.")
|
|
102
186
|
lots: List[Dict[str, Any]] = []
|
|
103
187
|
seller_cache: Dict[int, Dict[str, Any]] = {}
|
|
104
188
|
page = 1
|
|
189
|
+
include_tokens = _split_terms(include_terms)
|
|
190
|
+
exclude_tokens = _split_terms(exclude_terms)
|
|
191
|
+
sort_norm = (sort_by or "price_asc").strip().lower()
|
|
192
|
+
if sort_norm not in {
|
|
193
|
+
"price_asc",
|
|
194
|
+
"price_desc",
|
|
195
|
+
"seller_reviews_desc",
|
|
196
|
+
"reliability_desc",
|
|
197
|
+
"title_asc",
|
|
198
|
+
"title_desc",
|
|
199
|
+
}:
|
|
200
|
+
sort_norm = "price_asc"
|
|
201
|
+
seen_product_ids: set[int] = set()
|
|
202
|
+
|
|
203
|
+
while page <= max_pages:
|
|
204
|
+
has_next_page = False
|
|
205
|
+
source_items: List[Dict[str, Any]] = []
|
|
206
|
+
if category_id:
|
|
207
|
+
block_url = plati_scrape.build_category_block_url(
|
|
208
|
+
category_id=category_id,
|
|
209
|
+
page=page,
|
|
210
|
+
rows=per_page,
|
|
211
|
+
currency=currency,
|
|
212
|
+
lang=lang,
|
|
213
|
+
sort_by=sort_norm,
|
|
214
|
+
subcategory_id=0,
|
|
215
|
+
)
|
|
216
|
+
block_html = plati_scrape.fetch_text(block_url)
|
|
217
|
+
parsed_items = plati_scrape.parse_category_block_items(block_html)
|
|
218
|
+
source_items = [
|
|
219
|
+
{
|
|
220
|
+
"product_id": int(it.get("product_id") or 0),
|
|
221
|
+
"seller_id": 0,
|
|
222
|
+
"seller_name": str(it.get("seller_name") or ""),
|
|
223
|
+
"price": float(it.get("price") or 0.0),
|
|
224
|
+
"name": [{"locale": lang, "value": str(it.get("title") or "")}],
|
|
225
|
+
"link": str(it.get("link") or ""),
|
|
226
|
+
}
|
|
227
|
+
for it in parsed_items
|
|
228
|
+
]
|
|
229
|
+
has_next_page = bool(parsed_items)
|
|
230
|
+
else:
|
|
231
|
+
search_url = plati_scrape.build_search_url(
|
|
232
|
+
q,
|
|
233
|
+
page,
|
|
234
|
+
per_page,
|
|
235
|
+
currency,
|
|
236
|
+
lang,
|
|
237
|
+
"popular",
|
|
238
|
+
category_id="",
|
|
239
|
+
)
|
|
240
|
+
payload = plati_scrape.fetch_json(search_url)
|
|
241
|
+
content = payload.get("content") or {}
|
|
242
|
+
source_items = content.get("items") or []
|
|
243
|
+
has_next_page = bool(content.get("has_next_page"))
|
|
105
244
|
|
|
106
|
-
|
|
107
|
-
search_url = plati_scrape.build_search_url(q, page, per_page, currency, lang, "popular")
|
|
108
|
-
payload = plati_scrape.fetch_json(search_url)
|
|
109
|
-
content = payload.get("content") or {}
|
|
110
|
-
items = content.get("items") or []
|
|
111
|
-
if not items:
|
|
245
|
+
if not source_items:
|
|
112
246
|
break
|
|
113
247
|
|
|
114
|
-
for item in
|
|
248
|
+
for item in source_items:
|
|
115
249
|
pid = int(item.get("product_id") or 0)
|
|
116
250
|
seller_id = int(item.get("seller_id") or 0)
|
|
117
251
|
if pid <= 0:
|
|
@@ -130,9 +264,11 @@ def find_cheapest_reliable_options(
|
|
|
130
264
|
if str(product.get("is_available", 1)).lower() in {"0", "false"}:
|
|
131
265
|
continue
|
|
132
266
|
|
|
267
|
+
if seller_id <= 0:
|
|
268
|
+
seller_id = int(((product.get("seller") or {}).get("id")) or 0)
|
|
133
269
|
base_price = float(product.get("price") or item.get("price") or 0.0)
|
|
134
270
|
title = plati_scrape.clean_text(str(product.get("name") or plati_scrape.pick_name(item.get("name") or [], lang)))
|
|
135
|
-
link = f"https://plati.market/itm/i/{pid}"
|
|
271
|
+
link = str(item.get("link") or f"https://plati.market/itm/i/{pid}")
|
|
136
272
|
seller_name = str((product.get("seller") or {}).get("name") or item.get("seller_name") or "")
|
|
137
273
|
|
|
138
274
|
if seller_id > 0 and seller_id not in seller_cache:
|
|
@@ -211,19 +347,47 @@ def find_cheapest_reliable_options(
|
|
|
211
347
|
"options": options_payload,
|
|
212
348
|
}
|
|
213
349
|
)
|
|
214
|
-
|
|
215
|
-
if
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
350
|
+
offer_search_text = _build_offer_search_text(title, options_payload)
|
|
351
|
+
if include_tokens and not all(tok in offer_search_text for tok in include_tokens):
|
|
352
|
+
lots.pop()
|
|
353
|
+
continue
|
|
354
|
+
if exclude_tokens and any(tok in offer_search_text for tok in exclude_tokens):
|
|
355
|
+
lots.pop()
|
|
356
|
+
continue
|
|
357
|
+
if min_price > 0 and float(min_option_price) < float(min_price):
|
|
358
|
+
lots.pop()
|
|
359
|
+
continue
|
|
360
|
+
if max_price > 0 and float(min_option_price) > float(max_price):
|
|
361
|
+
lots.pop()
|
|
362
|
+
continue
|
|
363
|
+
if pid in seen_product_ids:
|
|
364
|
+
lots.pop()
|
|
365
|
+
continue
|
|
366
|
+
seen_product_ids.add(pid)
|
|
367
|
+
if not has_next_page:
|
|
220
368
|
break
|
|
221
369
|
page += 1
|
|
222
370
|
|
|
223
|
-
lots
|
|
371
|
+
lots = _sort_lots(lots, sort_norm)
|
|
224
372
|
top = lots[: max(1, int(limit))]
|
|
225
373
|
return {
|
|
226
374
|
"query": query,
|
|
375
|
+
"normalized_query": parsed_q["product_query"],
|
|
376
|
+
"category_id": category_id,
|
|
377
|
+
"source_url": source_url,
|
|
378
|
+
"applied_filters": {
|
|
379
|
+
"sort_by": sort_norm,
|
|
380
|
+
"min_reviews": int(min_reviews),
|
|
381
|
+
"min_positive_ratio": float(min_positive_ratio),
|
|
382
|
+
"min_price": float(min_price),
|
|
383
|
+
"max_price": float(max_price),
|
|
384
|
+
"include_terms": include_tokens,
|
|
385
|
+
"exclude_terms": exclude_tokens,
|
|
386
|
+
"max_pages": int(max_pages),
|
|
387
|
+
"per_page": int(per_page),
|
|
388
|
+
"currency": currency,
|
|
389
|
+
"lang": lang,
|
|
390
|
+
},
|
|
227
391
|
"total_candidates": len(lots),
|
|
228
392
|
"reliable_candidates": len(lots),
|
|
229
393
|
"returned": len(top),
|
|
@@ -233,18 +397,27 @@ def find_cheapest_reliable_options(
|
|
|
233
397
|
|
|
234
398
|
TOOL_SCHEMA = {
|
|
235
399
|
"name": "find_cheapest_reliable_options",
|
|
236
|
-
"description": "Find
|
|
400
|
+
"description": "Find Plati offers by text query or Plati URL, returning lots with links and full option variants.",
|
|
237
401
|
"inputSchema": {
|
|
238
402
|
"type": "object",
|
|
239
403
|
"properties": {
|
|
240
|
-
"query": {"type": "string", "description": "
|
|
241
|
-
"limit": {"type": "integer", "default":
|
|
404
|
+
"query": {"type": "string", "description": "Text query (e.g. 'claude code') or Plati URL (/search/<term>, /games/.../<id>/, /cat/.../<id>/)."},
|
|
405
|
+
"limit": {"type": "integer", "default": 20, "minimum": 1, "maximum": 100},
|
|
242
406
|
"currency": {"type": "string", "default": "RUB"},
|
|
243
407
|
"lang": {"type": "string", "default": "ru-RU"},
|
|
244
408
|
"min_reviews": {"type": "integer", "default": 0, "minimum": 0},
|
|
245
409
|
"min_positive_ratio": {"type": "number", "default": 0.0, "minimum": 0, "maximum": 1},
|
|
246
410
|
"max_pages": {"type": "integer", "default": 6, "minimum": 1, "maximum": 30},
|
|
247
411
|
"per_page": {"type": "integer", "default": 30, "minimum": 5, "maximum": 100},
|
|
412
|
+
"sort_by": {
|
|
413
|
+
"type": "string",
|
|
414
|
+
"default": "price_asc",
|
|
415
|
+
"enum": ["price_asc", "price_desc", "seller_reviews_desc", "reliability_desc", "title_asc", "title_desc"],
|
|
416
|
+
},
|
|
417
|
+
"min_price": {"type": "number", "default": 0},
|
|
418
|
+
"max_price": {"type": "number", "default": 0},
|
|
419
|
+
"include_terms": {"type": "string", "default": "", "description": "Space/comma-separated terms that must appear in lot title/options."},
|
|
420
|
+
"exclude_terms": {"type": "string", "default": "", "description": "Space/comma-separated terms to exclude from lot title/options."},
|
|
248
421
|
},
|
|
249
422
|
"required": ["query"],
|
|
250
423
|
},
|
|
@@ -279,13 +452,18 @@ def _handle_request(msg: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
279
452
|
try:
|
|
280
453
|
result = find_cheapest_reliable_options(
|
|
281
454
|
query=str(args["query"]),
|
|
282
|
-
limit=int(args.get("limit",
|
|
455
|
+
limit=int(args.get("limit", 20)),
|
|
283
456
|
currency=str(args.get("currency", "RUB")),
|
|
284
457
|
lang=str(args.get("lang", "ru-RU")),
|
|
285
|
-
min_reviews=int(args.get("min_reviews",
|
|
286
|
-
min_positive_ratio=float(args.get("min_positive_ratio", 0.
|
|
458
|
+
min_reviews=int(args.get("min_reviews", 0)),
|
|
459
|
+
min_positive_ratio=float(args.get("min_positive_ratio", 0.0)),
|
|
287
460
|
max_pages=int(args.get("max_pages", 6)),
|
|
288
461
|
per_page=int(args.get("per_page", 30)),
|
|
462
|
+
sort_by=str(args.get("sort_by", "price_asc")),
|
|
463
|
+
min_price=float(args.get("min_price", 0.0)),
|
|
464
|
+
max_price=float(args.get("max_price", 0.0)),
|
|
465
|
+
include_terms=str(args.get("include_terms", "")),
|
|
466
|
+
exclude_terms=str(args.get("exclude_terms", "")),
|
|
289
467
|
)
|
|
290
468
|
return _ok(
|
|
291
469
|
req_id,
|
package/package.json
CHANGED
package/plati_scrape.py
CHANGED
|
@@ -13,6 +13,7 @@ from urllib.request import Request, urlopen
|
|
|
13
13
|
SEARCH_ENDPOINT = "https://api.digiseller.com/api/cataloguer/front/products"
|
|
14
14
|
PRODUCT_DATA_ENDPOINT = "https://api.digiseller.com/api/products/{product_id}/data"
|
|
15
15
|
REVIEWS_ENDPOINT = "https://api.digiseller.com/api/reviews"
|
|
16
|
+
CATEGORY_BLOCK_ENDPOINT = "https://plati.market/asp/block_goods_category_2.asp"
|
|
16
17
|
USER_AGENT = (
|
|
17
18
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
18
19
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
|
|
@@ -40,6 +41,12 @@ SEARCH_SORT_MAP = {
|
|
|
40
41
|
"price_desc": "popular",
|
|
41
42
|
"new": "popular",
|
|
42
43
|
}
|
|
44
|
+
CATEGORY_SORT_MAP = {
|
|
45
|
+
"popular": "",
|
|
46
|
+
"price_asc": "price",
|
|
47
|
+
"price_desc": "-price",
|
|
48
|
+
"new": "",
|
|
49
|
+
}
|
|
43
50
|
|
|
44
51
|
|
|
45
52
|
def fetch_json(url: str, timeout: int = 30) -> Dict:
|
|
@@ -48,6 +55,12 @@ def fetch_json(url: str, timeout: int = 30) -> Dict:
|
|
|
48
55
|
return json.loads(resp.read().decode("utf-8", errors="replace"))
|
|
49
56
|
|
|
50
57
|
|
|
58
|
+
def fetch_text(url: str, timeout: int = 30) -> str:
|
|
59
|
+
req = Request(url, headers={"User-Agent": USER_AGENT, "Accept": "text/html,*/*"})
|
|
60
|
+
with urlopen(req, timeout=timeout) as resp:
|
|
61
|
+
return resp.read().decode("utf-8", errors="replace")
|
|
62
|
+
|
|
63
|
+
|
|
51
64
|
def parse_search_query(search_url: str) -> str:
|
|
52
65
|
parsed = urlparse(search_url)
|
|
53
66
|
m = re.search(r"/search/([^/?#]+)", parsed.path)
|
|
@@ -56,9 +69,17 @@ def parse_search_query(search_url: str) -> str:
|
|
|
56
69
|
return unquote(m.group(1))
|
|
57
70
|
|
|
58
71
|
|
|
59
|
-
def build_search_url(
|
|
72
|
+
def build_search_url(
|
|
73
|
+
query: str,
|
|
74
|
+
page: int,
|
|
75
|
+
count: int,
|
|
76
|
+
currency: str,
|
|
77
|
+
lang: str,
|
|
78
|
+
sort_by: str,
|
|
79
|
+
category_id: str = "",
|
|
80
|
+
) -> str:
|
|
60
81
|
params = {
|
|
61
|
-
"categoryId":
|
|
82
|
+
"categoryId": category_id,
|
|
62
83
|
"getProductsRecursive": "true",
|
|
63
84
|
"sellerCategoryId": "",
|
|
64
85
|
"productId": "",
|
|
@@ -108,6 +129,72 @@ def normalize_search_sort(sort_by: str) -> str:
|
|
|
108
129
|
return SEARCH_SORT_MAP.get(sort_by, "popular")
|
|
109
130
|
|
|
110
131
|
|
|
132
|
+
def normalize_category_sort(sort_by: str) -> str:
|
|
133
|
+
return CATEGORY_SORT_MAP.get(sort_by, "")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def build_category_block_url(
|
|
137
|
+
category_id: str,
|
|
138
|
+
page: int,
|
|
139
|
+
rows: int,
|
|
140
|
+
currency: str,
|
|
141
|
+
lang: str,
|
|
142
|
+
sort_by: str,
|
|
143
|
+
subcategory_id: int = 0,
|
|
144
|
+
) -> str:
|
|
145
|
+
params = {
|
|
146
|
+
"id_cb": str(category_id),
|
|
147
|
+
"id_c": str(subcategory_id),
|
|
148
|
+
"sort": normalize_category_sort(sort_by),
|
|
149
|
+
"page": str(page),
|
|
150
|
+
"rows": str(rows),
|
|
151
|
+
"curr": currency.lower(),
|
|
152
|
+
"lang": (lang or "ru-RU").split("-")[0],
|
|
153
|
+
}
|
|
154
|
+
return f"{CATEGORY_BLOCK_ENDPOINT}?{urlencode(params)}"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _strip_tags(value: str) -> str:
|
|
158
|
+
value = re.sub(r"<[^>]+>", " ", value or "", flags=re.DOTALL)
|
|
159
|
+
return clean_text(html.unescape(value))
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def parse_category_block_items(block_html: str) -> List[Dict[str, Union[int, float, str]]]:
|
|
163
|
+
items: List[Dict[str, Union[int, float, str]]] = []
|
|
164
|
+
if not block_html:
|
|
165
|
+
return items
|
|
166
|
+
for m in re.finditer(r"<a[^>]*product_id=\"(\d+)\"[^>]*>(.*?)</a>", block_html, flags=re.DOTALL | re.IGNORECASE):
|
|
167
|
+
pid = int(m.group(1))
|
|
168
|
+
card_html = m.group(0)
|
|
169
|
+
href_m = re.search(r'href=\"([^\"]+)\"', card_html, flags=re.IGNORECASE)
|
|
170
|
+
title_m = re.search(r'title=\"([^\"]+)\"', card_html, flags=re.IGNORECASE)
|
|
171
|
+
seller_m = re.search(
|
|
172
|
+
r"text-truncate[^>]*>([^<]+)</span>",
|
|
173
|
+
card_html,
|
|
174
|
+
flags=re.DOTALL | re.IGNORECASE,
|
|
175
|
+
)
|
|
176
|
+
price_m = re.search(r"h5-bold[^>]*>([^<]+)</span>", card_html, flags=re.DOTALL | re.IGNORECASE)
|
|
177
|
+
price_value = 0.0
|
|
178
|
+
if price_m:
|
|
179
|
+
p = re.sub(r"[^\d,.\-]", "", price_m.group(1)).replace(",", ".")
|
|
180
|
+
try:
|
|
181
|
+
price_value = float(p)
|
|
182
|
+
except Exception:
|
|
183
|
+
price_value = 0.0
|
|
184
|
+
href = href_m.group(1) if href_m else f"/itm/i/{pid}"
|
|
185
|
+
link = href if href.startswith("http://") or href.startswith("https://") else f"https://plati.market{href}"
|
|
186
|
+
items.append(
|
|
187
|
+
{
|
|
188
|
+
"product_id": pid,
|
|
189
|
+
"link": link,
|
|
190
|
+
"title": _strip_tags(title_m.group(1) if title_m else ""),
|
|
191
|
+
"seller_name": _strip_tags(seller_m.group(1) if seller_m else ""),
|
|
192
|
+
"price": price_value,
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
return items
|
|
196
|
+
|
|
197
|
+
|
|
111
198
|
def pick_name(name_entries: List[Dict], lang: str) -> str:
|
|
112
199
|
if not name_entries:
|
|
113
200
|
return ""
|
|
@@ -465,6 +552,7 @@ def search_all_products(
|
|
|
465
552
|
max_pages: int,
|
|
466
553
|
request_text: str = "pro",
|
|
467
554
|
return_all_choices: bool = False,
|
|
555
|
+
category_id: str = "",
|
|
468
556
|
) -> List[Dict]:
|
|
469
557
|
query = parse_search_query(search_url)
|
|
470
558
|
rows = []
|
|
@@ -475,7 +563,7 @@ def search_all_products(
|
|
|
475
563
|
warned_sort_fallback = False
|
|
476
564
|
|
|
477
565
|
while len(rows) < max_items and page <= max_pages:
|
|
478
|
-
api_url = build_search_url(query, page, per_page, currency, lang, api_sort)
|
|
566
|
+
api_url = build_search_url(query, page, per_page, currency, lang, api_sort, category_id=category_id)
|
|
479
567
|
try:
|
|
480
568
|
payload = fetch_json(api_url)
|
|
481
569
|
except HTTPError as e:
|