klydo-mcp 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klydo/__init__.py +8 -0
- klydo/config.py +47 -0
- klydo/logging.py +141 -0
- klydo/models/__init__.py +19 -0
- klydo/models/product.py +107 -0
- klydo/scrapers/__init__.py +73 -0
- klydo/scrapers/base.py +107 -0
- klydo/scrapers/cache.py +198 -0
- klydo/scrapers/klydo_store.py +480 -0
- klydo/scrapers/myntra.py +759 -0
- klydo/server.py +219 -0
- klydo_mcp-0.1.3.dist-info/METADATA +262 -0
- klydo_mcp-0.1.3.dist-info/RECORD +16 -0
- klydo_mcp-0.1.3.dist-info/WHEEL +4 -0
- klydo_mcp-0.1.3.dist-info/entry_points.txt +2 -0
- klydo_mcp-0.1.3.dist-info/licenses/LICENSE +21 -0
klydo/__init__.py
ADDED
klydo/config.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Application configuration using Pydantic Settings.
|
|
3
|
+
|
|
4
|
+
All settings can be overridden via environment variables
|
|
5
|
+
with the KLYDO_ prefix.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Settings(BaseSettings):
|
|
12
|
+
"""
|
|
13
|
+
Application settings.
|
|
14
|
+
|
|
15
|
+
Loaded from environment variables or .env file.
|
|
16
|
+
All env vars should be prefixed with KLYDO_.
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
KLYDO_DEBUG=true
|
|
20
|
+
KLYDO_DEFAULT_SCRAPER=myntra
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_config = SettingsConfigDict(
|
|
24
|
+
env_prefix="KLYDO_",
|
|
25
|
+
env_file=".env",
|
|
26
|
+
env_file_encoding="utf-8",
|
|
27
|
+
extra="ignore",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Scraper settings
|
|
31
|
+
default_scraper: str = "myntra"
|
|
32
|
+
request_timeout: int = 30
|
|
33
|
+
cache_ttl: int = 3600 # 1 hour
|
|
34
|
+
|
|
35
|
+
# Rate limiting (be nice to servers)
|
|
36
|
+
requests_per_minute: int = 30
|
|
37
|
+
|
|
38
|
+
# Klydo brand API auth
|
|
39
|
+
klydo_api_token: str | None = None
|
|
40
|
+
klydo_session_id: str | None = None
|
|
41
|
+
|
|
42
|
+
# Debug mode
|
|
43
|
+
debug: bool = False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Singleton instance
|
|
47
|
+
settings = Settings()
|
klydo/logging.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging configuration using Loguru.
|
|
3
|
+
|
|
4
|
+
Provides structured, human-readable logging for the Klydo MCP server.
|
|
5
|
+
Logs are essential for tracking requests, debugging issues, and monitoring
|
|
6
|
+
the server in production.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from klydo.logging import logger
|
|
10
|
+
|
|
11
|
+
logger.info("Search request", query="black dress", limit=10)
|
|
12
|
+
logger.debug("Cache hit", key="search:dress")
|
|
13
|
+
logger.error("API error", exc_info=True)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import sys
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from loguru import logger
|
|
20
|
+
|
|
21
|
+
from klydo.config import settings
|
|
22
|
+
|
|
23
|
+
# Remove default handler
|
|
24
|
+
logger.remove()
|
|
25
|
+
|
|
26
|
+
# Configure log format based on debug mode
|
|
27
|
+
if settings.debug:
|
|
28
|
+
# Detailed format for development
|
|
29
|
+
log_format = (
|
|
30
|
+
"<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
|
|
31
|
+
"<level>{level: <8}</level> | "
|
|
32
|
+
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
|
|
33
|
+
"<level>{message}</level>"
|
|
34
|
+
)
|
|
35
|
+
log_level = "DEBUG"
|
|
36
|
+
else:
|
|
37
|
+
# Cleaner format for production
|
|
38
|
+
log_format = (
|
|
39
|
+
"{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function} | {message}"
|
|
40
|
+
)
|
|
41
|
+
log_level = "INFO"
|
|
42
|
+
|
|
43
|
+
# Add stdout handler
|
|
44
|
+
logger.add(
|
|
45
|
+
sys.stderr,
|
|
46
|
+
format=log_format,
|
|
47
|
+
level=log_level,
|
|
48
|
+
colorize=True,
|
|
49
|
+
backtrace=settings.debug,
|
|
50
|
+
diagnose=settings.debug,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def log_request(
|
|
55
|
+
action: str,
|
|
56
|
+
**kwargs: Any,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Log an incoming MCP tool request.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
action: The action being performed (e.g., "search", "get_product")
|
|
63
|
+
**kwargs: Additional context to log
|
|
64
|
+
"""
|
|
65
|
+
logger.info(f"Request: {action}", **kwargs)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def log_response(
|
|
69
|
+
action: str,
|
|
70
|
+
duration_ms: float,
|
|
71
|
+
result_count: int | None = None,
|
|
72
|
+
**kwargs: Any,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Log a completed MCP tool response.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
action: The action that was performed
|
|
79
|
+
duration_ms: Time taken in milliseconds
|
|
80
|
+
result_count: Number of results returned (if applicable)
|
|
81
|
+
**kwargs: Additional context to log
|
|
82
|
+
"""
|
|
83
|
+
msg = f"Response: {action} completed in {duration_ms:.0f}ms"
|
|
84
|
+
if result_count is not None:
|
|
85
|
+
msg += f" ({result_count} results)"
|
|
86
|
+
logger.info(msg, **kwargs)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def log_cache_hit(key: str) -> None:
|
|
90
|
+
"""Log a cache hit."""
|
|
91
|
+
logger.debug(f"Cache HIT: {key}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def log_cache_miss(key: str) -> None:
|
|
95
|
+
"""Log a cache miss."""
|
|
96
|
+
logger.debug(f"Cache MISS: {key}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def log_api_call(
|
|
100
|
+
source: str,
|
|
101
|
+
endpoint: str,
|
|
102
|
+
method: str = "GET",
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Log an outgoing API call."""
|
|
105
|
+
logger.debug(f"API call: {method} {source} {endpoint}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def log_api_error(
|
|
109
|
+
source: str,
|
|
110
|
+
endpoint: str,
|
|
111
|
+
error: str,
|
|
112
|
+
status_code: int | None = None,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Log an API error."""
|
|
115
|
+
msg = f"API error: {source} {endpoint}"
|
|
116
|
+
if status_code:
|
|
117
|
+
msg += f" (HTTP {status_code})"
|
|
118
|
+
msg += f" - {error}"
|
|
119
|
+
logger.warning(msg)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def log_scraper_error(
|
|
123
|
+
scraper: str,
|
|
124
|
+
operation: str,
|
|
125
|
+
error: Exception,
|
|
126
|
+
) -> None:
|
|
127
|
+
"""Log a scraper error with exception details."""
|
|
128
|
+
logger.error(f"Scraper error: {scraper}.{operation} - {error}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# Export logger and helper functions
|
|
132
|
+
__all__ = [
|
|
133
|
+
"logger",
|
|
134
|
+
"log_request",
|
|
135
|
+
"log_response",
|
|
136
|
+
"log_cache_hit",
|
|
137
|
+
"log_cache_miss",
|
|
138
|
+
"log_api_call",
|
|
139
|
+
"log_api_error",
|
|
140
|
+
"log_scraper_error",
|
|
141
|
+
]
|
klydo/models/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Domain models for Klydo MCP Server.
|
|
3
|
+
|
|
4
|
+
All Pydantic models representing fashion products and related entities.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from klydo.models.product import (
|
|
8
|
+
Price,
|
|
9
|
+
Product,
|
|
10
|
+
ProductImage,
|
|
11
|
+
ProductSummary,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"Price",
|
|
16
|
+
"Product",
|
|
17
|
+
"ProductImage",
|
|
18
|
+
"ProductSummary",
|
|
19
|
+
]
|
klydo/models/product.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Product models for fashion e-commerce.
|
|
3
|
+
|
|
4
|
+
These models define the structure of product data returned
|
|
5
|
+
by scrapers and exposed through MCP tools.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from decimal import Decimal
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ProductImage(BaseModel):
|
|
16
|
+
"""
|
|
17
|
+
Product image with URL and alt text.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
url: Direct URL to the image
|
|
21
|
+
alt: Alt text for accessibility
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
url: HttpUrl
|
|
25
|
+
alt: str = ""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Price(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
Price with optional discount information.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
current: Current selling price
|
|
34
|
+
original: Original price before discount (if applicable)
|
|
35
|
+
currency: Currency code (default INR)
|
|
36
|
+
discount_percent: Discount percentage (if applicable)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
current: Decimal
|
|
40
|
+
original: Decimal | None = None
|
|
41
|
+
currency: str = "INR"
|
|
42
|
+
discount_percent: int | None = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def has_discount(self) -> bool:
|
|
46
|
+
"""Check if product is discounted."""
|
|
47
|
+
return self.original is not None and self.original > self.current
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ProductSummary(BaseModel):
|
|
51
|
+
"""
|
|
52
|
+
Lightweight product for search results.
|
|
53
|
+
|
|
54
|
+
Contains enough info to display in a list without
|
|
55
|
+
fetching full product details.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
id: Unique product identifier (from source site)
|
|
59
|
+
name: Product name/title
|
|
60
|
+
brand: Brand name
|
|
61
|
+
price: Price information
|
|
62
|
+
image_url: Primary product image URL
|
|
63
|
+
category: Product category
|
|
64
|
+
source: Scraper source name (e.g., 'myntra')
|
|
65
|
+
url: Direct link to product page for purchase
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
id: str = Field(..., description="Unique product identifier")
|
|
69
|
+
name: str = Field(..., description="Product name/title")
|
|
70
|
+
brand: str = Field(..., description="Brand name")
|
|
71
|
+
price: Price = Field(..., description="Price information")
|
|
72
|
+
image_url: HttpUrl = Field(..., description="Primary product image URL")
|
|
73
|
+
category: str = Field(..., description="Product category")
|
|
74
|
+
source: str = Field(..., description="Scraper source (e.g., 'myntra')")
|
|
75
|
+
url: HttpUrl = Field(..., description="Direct link to product page")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Product(ProductSummary):
|
|
79
|
+
"""
|
|
80
|
+
Full product details.
|
|
81
|
+
|
|
82
|
+
Extends ProductSummary with complete information including
|
|
83
|
+
all images, sizes, colors, ratings, and specifications.
|
|
84
|
+
|
|
85
|
+
Attributes:
|
|
86
|
+
description: Full product description
|
|
87
|
+
images: All product images
|
|
88
|
+
sizes: Available sizes
|
|
89
|
+
colors: Available colors
|
|
90
|
+
rating: Average rating (0-5)
|
|
91
|
+
review_count: Number of reviews
|
|
92
|
+
in_stock: Whether product is in stock
|
|
93
|
+
specifications: Additional product specifications
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
description: str = Field(..., description="Full product description")
|
|
97
|
+
images: list[ProductImage] = Field(
|
|
98
|
+
default_factory=list, description="All product images"
|
|
99
|
+
)
|
|
100
|
+
sizes: list[str] = Field(default_factory=list, description="Available sizes")
|
|
101
|
+
colors: list[str] = Field(default_factory=list, description="Available colors")
|
|
102
|
+
rating: float | None = Field(None, ge=0, le=5, description="Average rating (0-5)")
|
|
103
|
+
review_count: int = Field(default=0, ge=0, description="Number of reviews")
|
|
104
|
+
in_stock: bool = Field(default=True, description="Whether product is in stock")
|
|
105
|
+
specifications: dict[str, str] = Field(
|
|
106
|
+
default_factory=dict, description="Additional product specifications"
|
|
107
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scraper layer for fashion e-commerce sites.
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for scraping
|
|
5
|
+
fashion products from various Indian e-commerce sites.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from klydo.scrapers import get_scraper
|
|
9
|
+
|
|
10
|
+
scraper = get_scraper("myntra")
|
|
11
|
+
products = await scraper.search("black dress")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from klydo.scrapers.base import ScraperProtocol
|
|
15
|
+
|
|
16
|
+
# Registry of available scrapers
|
|
17
|
+
# Import here to avoid circular imports
|
|
18
|
+
_SCRAPERS: dict[str, type[ScraperProtocol]] = {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _register_scrapers() -> None:
|
|
22
|
+
"""Lazy registration of scrapers to avoid circular imports."""
|
|
23
|
+
global _SCRAPERS
|
|
24
|
+
if not _SCRAPERS:
|
|
25
|
+
from klydo.scrapers.myntra import MyntraScraper
|
|
26
|
+
from klydo.scrapers.klydo_store import KlydoStoreScraper
|
|
27
|
+
|
|
28
|
+
_SCRAPERS["myntra"] = MyntraScraper
|
|
29
|
+
_SCRAPERS["klydo"] = KlydoStoreScraper
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_scraper(name: str = "myntra") -> ScraperProtocol:
|
|
33
|
+
"""
|
|
34
|
+
Factory function to get a scraper instance by name.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
name: Scraper name (e.g., 'myntra', 'ajio')
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Scraper instance implementing ScraperProtocol
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValueError: If scraper name is not registered
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
scraper = get_scraper("myntra")
|
|
47
|
+
products = await scraper.search("dress")
|
|
48
|
+
"""
|
|
49
|
+
_register_scrapers()
|
|
50
|
+
|
|
51
|
+
if name not in _SCRAPERS:
|
|
52
|
+
available = list(_SCRAPERS.keys())
|
|
53
|
+
raise ValueError(f"Unknown scraper: '{name}'. Available scrapers: {available}")
|
|
54
|
+
|
|
55
|
+
return _SCRAPERS[name]()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def list_scrapers() -> list[str]:
|
|
59
|
+
"""
|
|
60
|
+
List all available scraper names.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List of registered scraper names
|
|
64
|
+
"""
|
|
65
|
+
_register_scrapers()
|
|
66
|
+
return list(_SCRAPERS.keys())
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
__all__ = [
|
|
70
|
+
"ScraperProtocol",
|
|
71
|
+
"get_scraper",
|
|
72
|
+
"list_scrapers",
|
|
73
|
+
]
|
klydo/scrapers/base.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract scraper interface using Python Protocol.
|
|
3
|
+
|
|
4
|
+
This defines the contract that all scrapers must implement.
|
|
5
|
+
Using Protocol (structural subtyping) instead of ABC for:
|
|
6
|
+
- Duck typing with type safety
|
|
7
|
+
- No inheritance required
|
|
8
|
+
- Easy to mock in tests
|
|
9
|
+
- Clear interface for AI agents to understand
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Protocol, runtime_checkable
|
|
13
|
+
|
|
14
|
+
from klydo.models.product import Product, ProductSummary
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class ScraperProtocol(Protocol):
|
|
19
|
+
"""
|
|
20
|
+
Abstract scraper interface.
|
|
21
|
+
|
|
22
|
+
Implement this protocol for any fashion e-commerce site.
|
|
23
|
+
All methods are async to support non-blocking I/O.
|
|
24
|
+
|
|
25
|
+
Example implementation:
|
|
26
|
+
class MyScraper:
|
|
27
|
+
@property
|
|
28
|
+
def source_name(self) -> str:
|
|
29
|
+
return "mysite"
|
|
30
|
+
|
|
31
|
+
async def search(self, query: str, **kwargs) -> list[ProductSummary]:
|
|
32
|
+
# Implementation here
|
|
33
|
+
...
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def source_name(self) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Human-readable source name.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Source name (e.g., 'Myntra', 'Ajio')
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
async def search(
|
|
47
|
+
self,
|
|
48
|
+
query: str,
|
|
49
|
+
*,
|
|
50
|
+
category: str | None = None,
|
|
51
|
+
gender: str | None = None,
|
|
52
|
+
min_price: int | None = None,
|
|
53
|
+
max_price: int | None = None,
|
|
54
|
+
limit: int = 20,
|
|
55
|
+
) -> list[ProductSummary]:
|
|
56
|
+
"""
|
|
57
|
+
Search for products matching criteria.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
query: Search terms (e.g., "black dress", "nike shoes")
|
|
61
|
+
category: Filter by category (e.g., "dresses", "shoes")
|
|
62
|
+
gender: Filter by gender ("men", "women", "unisex")
|
|
63
|
+
min_price: Minimum price in INR
|
|
64
|
+
max_price: Maximum price in INR
|
|
65
|
+
limit: Maximum number of results to return
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
List of matching products (lightweight summaries)
|
|
69
|
+
"""
|
|
70
|
+
...
|
|
71
|
+
|
|
72
|
+
async def get_product(self, product_id: str) -> Product | None:
|
|
73
|
+
"""
|
|
74
|
+
Get full product details by ID.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
product_id: Unique product identifier from search results
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Full product details, or None if not found
|
|
81
|
+
"""
|
|
82
|
+
...
|
|
83
|
+
|
|
84
|
+
async def get_trending(
|
|
85
|
+
self,
|
|
86
|
+
category: str | None = None,
|
|
87
|
+
limit: int = 20,
|
|
88
|
+
) -> list[ProductSummary]:
|
|
89
|
+
"""
|
|
90
|
+
Get trending/popular products.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
category: Optional category filter
|
|
94
|
+
limit: Maximum number of results
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of trending products
|
|
98
|
+
"""
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
async def close(self) -> None:
|
|
102
|
+
"""
|
|
103
|
+
Clean up resources (HTTP clients, etc.).
|
|
104
|
+
|
|
105
|
+
Should be called when done with the scraper.
|
|
106
|
+
"""
|
|
107
|
+
...
|
klydo/scrapers/cache.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple file-based cache with TTL support.
|
|
3
|
+
|
|
4
|
+
This provides caching for scraper responses to:
|
|
5
|
+
- Reduce load on target sites
|
|
6
|
+
- Improve response times
|
|
7
|
+
- Work offline with cached data
|
|
8
|
+
|
|
9
|
+
Can be swapped for Redis or other backends later.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
from datetime import datetime, timedelta, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CacheEntry(BaseModel):
|
|
22
|
+
"""
|
|
23
|
+
Cache entry with data and expiration.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
data: Cached data (dict or list)
|
|
27
|
+
expires_at: When this entry expires (UTC)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
data: dict[str, Any] | list[Any]
|
|
31
|
+
expires_at: datetime
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Cache:
|
|
35
|
+
"""
|
|
36
|
+
File-based cache with TTL support.
|
|
37
|
+
|
|
38
|
+
Each cache entry is stored as a JSON file with expiration metadata.
|
|
39
|
+
Expired entries are cleaned up on read.
|
|
40
|
+
|
|
41
|
+
Usage:
|
|
42
|
+
cache = Cache(namespace="myntra")
|
|
43
|
+
|
|
44
|
+
# Try cache first
|
|
45
|
+
if cached := await cache.get("search:dress"):
|
|
46
|
+
return cached
|
|
47
|
+
|
|
48
|
+
# Fetch and cache
|
|
49
|
+
result = await fetch_data()
|
|
50
|
+
await cache.set("search:dress", result, ttl=3600)
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
namespace: Cache namespace (used in key hashing)
|
|
54
|
+
cache_dir: Directory for cache files
|
|
55
|
+
default_ttl: Default TTL in seconds
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
namespace: str,
|
|
61
|
+
cache_dir: Path | None = None,
|
|
62
|
+
default_ttl: int = 3600,
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
Initialize cache.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
namespace: Cache namespace (e.g., 'myntra')
|
|
69
|
+
cache_dir: Directory for cache files (default: ~/.cache/klydo)
|
|
70
|
+
default_ttl: Default TTL in seconds (default: 1 hour)
|
|
71
|
+
"""
|
|
72
|
+
self.namespace = namespace
|
|
73
|
+
self.cache_dir = cache_dir or Path.home() / ".cache" / "klydo"
|
|
74
|
+
self.default_ttl = default_ttl
|
|
75
|
+
|
|
76
|
+
# Ensure cache directory exists
|
|
77
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
|
|
79
|
+
def _key_to_path(self, key: str) -> Path:
|
|
80
|
+
"""
|
|
81
|
+
Convert cache key to file path.
|
|
82
|
+
|
|
83
|
+
Uses MD5 hash to create safe filenames.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
key: Cache key
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Path to cache file
|
|
90
|
+
"""
|
|
91
|
+
full_key = f"{self.namespace}:{key}"
|
|
92
|
+
hashed = hashlib.md5(full_key.encode()).hexdigest()
|
|
93
|
+
return self.cache_dir / f"{hashed}.json"
|
|
94
|
+
|
|
95
|
+
async def get(self, key: str) -> dict[str, Any] | list[Any] | None:
|
|
96
|
+
"""
|
|
97
|
+
Get cached value if exists and not expired.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
key: Cache key
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Cached data, or None if not found/expired
|
|
104
|
+
"""
|
|
105
|
+
path = self._key_to_path(key)
|
|
106
|
+
|
|
107
|
+
if not path.exists():
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
raw = path.read_text(encoding="utf-8")
|
|
112
|
+
entry = CacheEntry.model_validate_json(raw)
|
|
113
|
+
|
|
114
|
+
# Check expiration
|
|
115
|
+
now = datetime.now(timezone.utc)
|
|
116
|
+
if entry.expires_at < now:
|
|
117
|
+
# Expired, clean up
|
|
118
|
+
path.unlink(missing_ok=True)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
return entry.data
|
|
122
|
+
|
|
123
|
+
except (json.JSONDecodeError, ValueError):
|
|
124
|
+
# Corrupted cache file, remove it
|
|
125
|
+
path.unlink(missing_ok=True)
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
async def set(
|
|
129
|
+
self,
|
|
130
|
+
key: str,
|
|
131
|
+
value: dict[str, Any] | list[Any],
|
|
132
|
+
ttl: int | None = None,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Cache a value with TTL.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
key: Cache key
|
|
139
|
+
value: Data to cache (must be JSON-serializable)
|
|
140
|
+
ttl: TTL in seconds (default: use default_ttl)
|
|
141
|
+
"""
|
|
142
|
+
ttl = ttl or self.default_ttl
|
|
143
|
+
expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
|
|
144
|
+
|
|
145
|
+
entry = CacheEntry(data=value, expires_at=expires_at)
|
|
146
|
+
|
|
147
|
+
path = self._key_to_path(key)
|
|
148
|
+
path.write_text(entry.model_dump_json(), encoding="utf-8")
|
|
149
|
+
|
|
150
|
+
async def invalidate(self, key: str) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Remove a cached value.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
key: Cache key
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
True if entry was removed, False if not found
|
|
159
|
+
"""
|
|
160
|
+
path = self._key_to_path(key)
|
|
161
|
+
|
|
162
|
+
if path.exists():
|
|
163
|
+
path.unlink()
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
async def clear(self) -> int:
|
|
169
|
+
"""
|
|
170
|
+
Clear all cached entries for this namespace.
|
|
171
|
+
|
|
172
|
+
Note: This clears ALL entries in the cache directory,
|
|
173
|
+
not just this namespace. Use with caution.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Number of entries cleared
|
|
177
|
+
"""
|
|
178
|
+
count = 0
|
|
179
|
+
for path in self.cache_dir.glob("*.json"):
|
|
180
|
+
path.unlink()
|
|
181
|
+
count += 1
|
|
182
|
+
return count
|
|
183
|
+
|
|
184
|
+
def cache_key(self, *parts: str) -> str:
|
|
185
|
+
"""
|
|
186
|
+
Build a cache key from parts.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
*parts: Key parts to join
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Cache key string
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
key = cache.cache_key("search", "dress", "women")
|
|
196
|
+
# Returns: "search:dress:women"
|
|
197
|
+
"""
|
|
198
|
+
return ":".join(str(p) for p in parts if p)
|