klydo-mcp 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,480 @@
1
+ """
2
+ Klydo brand scraper.
3
+
4
+ Uses the public klydo.in catalog API to fetch products.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from decimal import Decimal
11
+ from typing import Any
12
+
13
+ import httpx
14
+
15
+ from klydo.config import settings
16
+ from klydo.logging import (
17
+ logger,
18
+ log_cache_hit,
19
+ log_cache_miss,
20
+ log_api_call,
21
+ log_api_error,
22
+ )
23
+ from klydo.models.product import Price, Product, ProductImage, ProductSummary
24
+ from klydo.scrapers.cache import Cache
25
+
26
+
27
+ class KlydoStoreScraper:
28
+ """Scraper for klydo.in."""
29
+
30
+ BASE_URL = "https://api.klydo.in"
31
+ WEB_BASE_URL = "https://www.klydo.in"
32
+
33
+ def __init__(self) -> None:
34
+ self._session_id = settings.klydo_session_id or self._generate_session_id()
35
+ self._client = httpx.AsyncClient(
36
+ base_url=self.BASE_URL,
37
+ headers=self._get_headers(),
38
+ timeout=settings.request_timeout,
39
+ follow_redirects=True,
40
+ )
41
+ self._cache = Cache(namespace="klydo-store", default_ttl=settings.cache_ttl)
42
+ logger.debug(f"KlydoStoreScraper initialized | session_id={self._session_id}")
43
+
44
+ @property
45
+ def source_name(self) -> str:
46
+ """Human-readable source name."""
47
+ return "Klydo"
48
+
49
+ def _generate_session_id(self) -> str:
50
+ now_ms = int(time.time() * 1000)
51
+ suffix = int(time.time_ns() % 1_000_000)
52
+ return f"{now_ms}-{suffix}"
53
+
54
+ def _get_headers(self) -> dict[str, str]:
55
+ # Token must be provided via KLYDO_KLYDO_API_TOKEN environment variable
56
+ token = settings.klydo_api_token
57
+
58
+ headers = {
59
+ "accept": "*/*",
60
+ "accept-language": "en-GB,en-US;q=0.9,en;q=0.8,kn;q=0.7",
61
+ "authorization": f"Bearer {token}" if token else "",
62
+ "content-type": "application/json",
63
+ "dnt": "1",
64
+ "origin": self.WEB_BASE_URL,
65
+ "priority": "u=1, i",
66
+ "referer": f"{self.WEB_BASE_URL}/",
67
+ "sec-ch-ua": '"Google Chrome";v="143", "Chromium";v="143", "Not A(Brand";v="24"',
68
+ "sec-ch-ua-mobile": "?1",
69
+ "sec-ch-ua-platform": '"Android"',
70
+ "sec-fetch-dest": "empty",
71
+ "sec-fetch-mode": "cors",
72
+ "sec-fetch-site": "same-site",
73
+ "user-agent": (
74
+ "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
75
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
76
+ "Chrome/143.0.0.0 Mobile Safari/537.36"
77
+ ),
78
+ "x-app-buildnumber": "63",
79
+ "x-app-name": "Customer",
80
+ "x-app-platform": "Web",
81
+ "x-session-id": self._session_id,
82
+ }
83
+
84
+ # Drop empty auth header if token was not provided
85
+ return {k: v for k, v in headers.items() if v}
86
+
87
+ async def search(
88
+ self,
89
+ query: str,
90
+ *,
91
+ category: str | None = None,
92
+ gender: str | None = None,
93
+ min_price: int | None = None,
94
+ max_price: int | None = None,
95
+ limit: int = 20,
96
+ ) -> list[ProductSummary]:
97
+ cache_key = self._cache.cache_key(
98
+ "search",
99
+ query,
100
+ category or "",
101
+ gender or "",
102
+ str(min_price or ""),
103
+ str(max_price or ""),
104
+ str(limit),
105
+ )
106
+
107
+ if cached := await self._cache.get(cache_key):
108
+ log_cache_hit(cache_key)
109
+ products = [ProductSummary.model_validate(item) for item in cached][:limit]
110
+ await self._warm_summary_cache(products)
111
+ return products
112
+
113
+ log_cache_miss(cache_key)
114
+
115
+ params: dict[str, Any] = {
116
+ "query": query,
117
+ "limit": min(limit, 50),
118
+ "includeFilters": "true",
119
+ }
120
+
121
+ if gender:
122
+ params["audience"] = gender.lower()
123
+ if category:
124
+ params["query"] = f"{query} {category}"
125
+
126
+ try:
127
+ log_api_call(self.source_name, "/catalog/search")
128
+ response = await self._client.get("/catalog/search", params=params)
129
+ response.raise_for_status()
130
+ data = response.json()
131
+ except httpx.HTTPError as exc:
132
+ log_api_error(
133
+ self.source_name,
134
+ "/catalog/search",
135
+ str(exc),
136
+ status_code=getattr(exc.response, "status_code", None)
137
+ if hasattr(exc, "response")
138
+ else None,
139
+ )
140
+ return []
141
+
142
+ products: list[ProductSummary] = []
143
+ for item in data.get("products", [])[:limit]:
144
+ product = self._parse_product_summary(item)
145
+ if not product:
146
+ continue
147
+ # Apply price filters in rupees
148
+ if min_price is not None and product.price.current < min_price:
149
+ continue
150
+ if max_price is not None and product.price.current > max_price:
151
+ continue
152
+ products.append(product)
153
+
154
+ if products:
155
+ await self._cache.set(
156
+ cache_key,
157
+ [p.model_dump(mode="json") for p in products],
158
+ )
159
+ await self._warm_summary_cache(products)
160
+
161
+ logger.debug(f"Klydo search complete | query={query} | results={len(products)}")
162
+ return products
163
+
164
+ async def _warm_summary_cache(self, products: list[ProductSummary]) -> None:
165
+ """Store summaries keyed by styleId for PDP fallback."""
166
+ for product in products:
167
+ try:
168
+ await self._cache.set(
169
+ self._cache.cache_key("summary", product.id),
170
+ product.model_dump(mode="json"),
171
+ )
172
+ except Exception as e:
173
+ logger.debug(f"Summary cache warm failed for {product.id}: {e}")
174
+
175
+ def _parse_product_summary(self, item: dict[str, Any]) -> ProductSummary | None:
176
+ style_id = item.get("styleId")
177
+ image_url = item.get("imageUrl")
178
+ name = item.get("title") or "Unknown Product"
179
+ brand = item.get("brand") or "Klydo"
180
+
181
+ if not style_id or not image_url:
182
+ return None
183
+
184
+ selling_price = self._to_rupees(item.get("sellingPrice"))
185
+ mrp_value = self._to_rupees(item.get("mrp"))
186
+ discount_percent = self._discount_percent(
187
+ selling_price, mrp_value, item.get("discountPercentage")
188
+ )
189
+
190
+ original_price = mrp_value if mrp_value and mrp_value > 0 else None
191
+ category_value = item.get("category") or "Fashion"
192
+
193
+ return ProductSummary(
194
+ id=str(style_id),
195
+ name=name,
196
+ brand=brand,
197
+ price=Price(
198
+ current=selling_price,
199
+ original=original_price,
200
+ currency="INR",
201
+ discount_percent=discount_percent,
202
+ ),
203
+ image_url=image_url,
204
+ category=category_value,
205
+ source=self.source_name,
206
+ url=self._build_product_url(style_id, item.get("slug")),
207
+ )
208
+
209
+ async def get_product(self, product_id: str) -> Product | None:
210
+ cache_key = self._cache.cache_key("product", product_id)
211
+ if cached := await self._cache.get(cache_key):
212
+ log_cache_hit(cache_key)
213
+ return Product.model_validate(cached)
214
+
215
+ log_cache_miss(cache_key)
216
+ is_sku = product_id.startswith("SKU_")
217
+
218
+ # If we already cached a summary from a previous search, keep it for fallback
219
+ summary_cache_key = self._cache.cache_key("summary", product_id)
220
+ cached_summary = await self._cache.get(summary_cache_key)
221
+ summary_from_cache = (
222
+ ProductSummary.model_validate(cached_summary) if cached_summary else None
223
+ )
224
+
225
+ detail = await self._fetch_product_detail(product_id, is_sku=is_sku)
226
+ product = None
227
+
228
+ if detail:
229
+ product = self._parse_product_detail(
230
+ detail,
231
+ product_id,
232
+ target_sku=product_id if is_sku else None,
233
+ )
234
+
235
+ if not product:
236
+ # Fallback to summary-only product from cache
237
+ if summary_from_cache:
238
+ logger.debug(f"Using cached summary fallback for {product_id}")
239
+ product = self._product_from_summary(summary_from_cache)
240
+
241
+ if not product:
242
+ # As a last resort, try a search using the ID as query
243
+ logger.debug(f"Attempting search fallback for {product_id}")
244
+ summaries = await self.search(query=product_id, limit=10)
245
+ summary = next((s for s in summaries if s.id == product_id), None)
246
+ if summary:
247
+ product = self._product_from_summary(summary)
248
+
249
+ if product:
250
+ await self._cache.set(cache_key, product.model_dump(mode="json"))
251
+
252
+ return product
253
+
254
+ async def _fetch_product_detail(
255
+ self, identifier: str, is_sku: bool = False
256
+ ) -> dict[str, Any] | None:
257
+ """
258
+ Try a handful of likely PDP endpoints.
259
+ """
260
+ if is_sku:
261
+ endpoints = [
262
+ (f"/catalog/product/{identifier}", None),
263
+ ("/catalog/product", {"skuId": identifier}),
264
+ ("/catalog/pdp", {"skuId": identifier}),
265
+ ]
266
+ else:
267
+ endpoints = [
268
+ ("/catalog/pdp", {"styleId": identifier}),
269
+ ("/catalog/product", {"styleId": identifier}),
270
+ (f"/catalog/products/{identifier}", None),
271
+ (f"/catalog/styles/{identifier}", None),
272
+ ]
273
+
274
+ for path, params in endpoints:
275
+ try:
276
+ log_api_call(self.source_name, path)
277
+ response = await self._client.get(path, params=params)
278
+ response.raise_for_status()
279
+ data = response.json()
280
+ if data:
281
+ return data
282
+ except httpx.HTTPStatusError as exc:
283
+ if exc.response.status_code in (400, 404):
284
+ continue
285
+ log_api_error(
286
+ self.source_name,
287
+ path,
288
+ str(exc),
289
+ status_code=exc.response.status_code,
290
+ )
291
+ except Exception as exc: # noqa: BLE001
292
+ log_api_error(self.source_name, path, str(exc))
293
+ return None
294
+
295
+ def _parse_product_detail(
296
+ self,
297
+ data: dict[str, Any],
298
+ requested_style_id: str,
299
+ target_sku: str | None = None,
300
+ ) -> Product | None:
301
+ styles = data.get("styles") or []
302
+ if not styles and data.get("styleId"):
303
+ styles = [data]
304
+
305
+ style = next(
306
+ (s for s in styles if s.get("styleId") == requested_style_id),
307
+ styles[0] if styles else None,
308
+ )
309
+
310
+ if target_sku:
311
+ for candidate in styles:
312
+ for sz in candidate.get("sizes", []):
313
+ if sz.get("skuId") == target_sku:
314
+ style = candidate
315
+ break
316
+ if style == candidate:
317
+ break
318
+
319
+ if not style:
320
+ return None
321
+
322
+ title = style.get("title") or data.get("title") or "Unknown Product"
323
+ brand = data.get("brandName") or style.get("brand") or "Klydo"
324
+ slug = style.get("slug") or requested_style_id
325
+
326
+ images = []
327
+ for media in style.get("media", []):
328
+ url = media.get("url")
329
+ if url:
330
+ images.append(ProductImage(url=url, alt=title))
331
+
332
+ # Choose price from selected SKU or first available size
333
+ sizes_data = style.get("sizes", [])
334
+ selected_sku = target_sku or data.get("selectedSkuId")
335
+ size_entry = next(
336
+ (s for s in sizes_data if s.get("skuId") == selected_sku),
337
+ None,
338
+ )
339
+ if not size_entry:
340
+ size_entry = next(
341
+ (s for s in sizes_data if s.get("inventory", {}).get("available")),
342
+ sizes_data[0] if sizes_data else None,
343
+ )
344
+
345
+ price = self._price_from_size(size_entry)
346
+
347
+ description = style.get("description", "") or ""
348
+ if not description and style.get("specifications"):
349
+ description = "; ".join(
350
+ f"{spec.get('name')}: {spec.get('value')}"
351
+ for spec in style["specifications"]
352
+ if spec.get("name") and spec.get("value")
353
+ )
354
+
355
+ primary_image = images[0].url if images else None
356
+ if not primary_image:
357
+ fallback = style.get("imageUrl") or data.get("imageUrl")
358
+ if fallback:
359
+ primary_image = fallback
360
+
361
+ if not primary_image:
362
+ return None
363
+
364
+ sizes = [
365
+ s.get("size")
366
+ for s in sizes_data
367
+ if s.get("size") and s.get("inventory", {}).get("available", True)
368
+ ]
369
+
370
+ specifications = {
371
+ spec.get("name"): spec.get("value")
372
+ for spec in style.get("specifications", [])
373
+ if spec.get("name") and spec.get("value")
374
+ }
375
+
376
+ in_stock = (
377
+ any(s.get("inventory", {}).get("available") for s in sizes_data)
378
+ if sizes_data
379
+ else True
380
+ )
381
+
382
+ colors = []
383
+ label = style.get("label")
384
+ if label:
385
+ colors.append(label)
386
+
387
+ return Product(
388
+ id=str(style.get("styleId", requested_style_id)),
389
+ name=title,
390
+ brand=brand,
391
+ price=price,
392
+ image_url=primary_image,
393
+ category="Fashion",
394
+ source=self.source_name,
395
+ url=self._build_product_url(requested_style_id, slug),
396
+ description=description,
397
+ images=images or [ProductImage(url=primary_image, alt=title)],
398
+ sizes=sizes,
399
+ colors=colors,
400
+ rating=None,
401
+ review_count=0,
402
+ in_stock=in_stock,
403
+ specifications=specifications,
404
+ )
405
+
406
+ def _product_from_summary(self, summary: ProductSummary) -> Product:
407
+ """Convert a ProductSummary into a minimal Product payload."""
408
+ return Product(
409
+ id=summary.id,
410
+ name=summary.name,
411
+ brand=summary.brand,
412
+ price=summary.price,
413
+ image_url=summary.image_url,
414
+ category=summary.category,
415
+ source=summary.source,
416
+ url=summary.url,
417
+ description="",
418
+ images=[ProductImage(url=summary.image_url, alt=summary.name)],
419
+ sizes=[],
420
+ colors=[],
421
+ rating=None,
422
+ review_count=0,
423
+ in_stock=True,
424
+ specifications={},
425
+ )
426
+
427
+ def _build_product_url(self, style_id: str, slug: str | None) -> str:
428
+ if slug:
429
+ return f"{self.WEB_BASE_URL}/p/{slug}"
430
+ return f"{self.WEB_BASE_URL}/style/{style_id}"
431
+
432
+ def _discount_percent(
433
+ self, selling_price: Decimal, mrp: Decimal, provided_discount: int | None
434
+ ) -> int | None:
435
+ if provided_discount is not None:
436
+ try:
437
+ return int(provided_discount)
438
+ except (TypeError, ValueError):
439
+ pass
440
+ if mrp and mrp > selling_price and selling_price > 0:
441
+ return int(((mrp - selling_price) / mrp) * 100)
442
+ return None
443
+
444
+ def _price_from_size(self, size_entry: dict[str, Any] | None) -> Price:
445
+ if not size_entry:
446
+ return Price(current=Decimal("0"), currency="INR")
447
+
448
+ selling_price = self._to_rupees(size_entry.get("sellingPrice"))
449
+ mrp_value = self._to_rupees(size_entry.get("mrp"))
450
+ discount_percent = self._discount_percent(
451
+ selling_price, mrp_value, size_entry.get("discountPercentage")
452
+ )
453
+
454
+ return Price(
455
+ current=selling_price,
456
+ original=mrp_value if mrp_value and mrp_value > 0 else None,
457
+ currency="INR",
458
+ discount_percent=discount_percent,
459
+ )
460
+
461
+ def _to_rupees(self, value: Any) -> Decimal:
462
+ try:
463
+ return Decimal(str(value)) / Decimal("100")
464
+ except (TypeError, ValueError, ArithmeticError):
465
+ return Decimal("0")
466
+
467
+ async def get_trending(
468
+ self,
469
+ category: str | None = None,
470
+ limit: int = 20,
471
+ ) -> list[ProductSummary]:
472
+ # The API does not expose a dedicated trending endpoint publicly;
473
+ # reuse search with a sensible default query.
474
+ query = category or "T-Shirts"
475
+ logger.debug(f"Klydo get_trending | query={query} | limit={limit}")
476
+ return await self.search(query=query, limit=limit)
477
+
478
+ async def close(self) -> None:
479
+ logger.debug("Closing KlydoStoreScraper")
480
+ await self._client.aclose()