seoslug 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seoslug-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 seoslug contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
seoslug-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: seoslug
3
+ Version: 1.0.0
4
+ Summary: Framework-agnostic canonical URL normalization and SEO payload generation
5
+ Author: seoslug contributors
6
+ License-Expression: MIT
7
+ Project-URL: Documentation, https://deepwiki.com/emiliano-gandini-outeda/seoslug/
8
+ Keywords: seo,canonical,urls,metadata,open-graph,twitter-cards
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Dynamic: license-file
22
+
23
+ # seoslug
24
+
25
+ [![DeepWiki](https://img.shields.io/badge/DeepWiki-Documentation-blue)](https://deepwiki.com/emiliano-gandini-outeda/seoslug/)
26
+
27
+ Canonical URL normalization and deterministic SEO payload generation for content platforms.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install seoslug
33
+ ```
34
+
35
+ For local development:
36
+
37
+ ```bash
38
+ pip install -e .
39
+ ```
40
+
41
+ ## Quick usage
42
+
43
+ ```python
44
+ from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
45
+
46
+ config = SEOConfig(
47
+ canonical_host="portal.example.com",
48
+ public_base_url="https://portal.example.com",
49
+ url_policy=URLPolicy(
50
+ enforce_https=True,
51
+ lowercase_paths=True,
52
+ trailing_slash="never",
53
+ collapse_duplicate_slashes=True,
54
+ strip_tracking_params=True,
55
+ allowed_query_params=["page", "q"],
56
+ ),
57
+ default_og_image="https://cdn.example.com/default.jpg",
58
+ )
59
+
60
+ entity = SEOEntity(
61
+ entity_type="post",
62
+ slug="my-post",
63
+ title="My Post",
64
+ excerpt="Example excerpt",
65
+ body_html="<p>Body content</p>",
66
+ status="published",
67
+ featured_image="https://cdn.example.com/post.jpg",
68
+ )
69
+
70
+ payload = build_seo_payload(entity, "/posts/my-post", config)
71
+ ```
72
+
73
+ Full docs, API reference, and usage examples are in `docs/` and published with MkDocs.
74
+
75
+ ## License
76
+
77
+ MIT, see `LICENSE`.
@@ -0,0 +1,55 @@
1
+ # seoslug
2
+
3
+ [![DeepWiki](https://img.shields.io/badge/DeepWiki-Documentation-blue)](https://deepwiki.com/emiliano-gandini-outeda/seoslug/)
4
+
5
+ Canonical URL normalization and deterministic SEO payload generation for content platforms.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install seoslug
11
+ ```
12
+
13
+ For local development:
14
+
15
+ ```bash
16
+ pip install -e .
17
+ ```
18
+
19
+ ## Quick usage
20
+
21
+ ```python
22
+ from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
23
+
24
+ config = SEOConfig(
25
+ canonical_host="portal.example.com",
26
+ public_base_url="https://portal.example.com",
27
+ url_policy=URLPolicy(
28
+ enforce_https=True,
29
+ lowercase_paths=True,
30
+ trailing_slash="never",
31
+ collapse_duplicate_slashes=True,
32
+ strip_tracking_params=True,
33
+ allowed_query_params=["page", "q"],
34
+ ),
35
+ default_og_image="https://cdn.example.com/default.jpg",
36
+ )
37
+
38
+ entity = SEOEntity(
39
+ entity_type="post",
40
+ slug="my-post",
41
+ title="My Post",
42
+ excerpt="Example excerpt",
43
+ body_html="<p>Body content</p>",
44
+ status="published",
45
+ featured_image="https://cdn.example.com/post.jpg",
46
+ )
47
+
48
+ payload = build_seo_payload(entity, "/posts/my-post", config)
49
+ ```
50
+
51
+ Full docs, API reference, and usage examples are in `docs/` and published with MkDocs.
52
+
53
+ ## License
54
+
55
+ MIT, see `LICENSE`.
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "seoslug"
7
+ version = "1.0.0"
8
+ description = "Framework-agnostic canonical URL normalization and SEO payload generation"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [{ name = "seoslug contributors" }]
12
+ license = "MIT"
13
+ keywords = ["seo", "canonical", "urls", "metadata", "open-graph", "twitter-cards"]
14
+ classifiers = [
15
+ "Development Status :: 5 - Production/Stable",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3 :: Only",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Internet :: WWW/HTTP",
23
+ "Topic :: Software Development :: Libraries :: Python Modules",
24
+ ]
25
+ dependencies = []
26
+ license-files = ["LICENSE"]
27
+
28
+ [project.urls]
29
+ Documentation = "https://deepwiki.com/emiliano-gandini-outeda/seoslug/"
30
+
31
+ [tool.setuptools]
32
+ package-dir = {"" = "src"}
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ """Public API for seoslug."""
2
+
3
+ from .builder import build_seo_payload
4
+ from .config import SEOConfig, URLPolicy
5
+ from .normalization import normalize_path, normalize_public_url
6
+ from .schemas import SEOEntity, SEOOverrides
7
+
8
+ __all__ = [
9
+ "SEOConfig",
10
+ "URLPolicy",
11
+ "SEOEntity",
12
+ "SEOOverrides",
13
+ "normalize_public_url",
14
+ "normalize_path",
15
+ "build_seo_payload",
16
+ ]
@@ -0,0 +1,81 @@
1
+ """SEO payload builder for seoslug."""
2
+
3
+ from .config import SEOConfig
4
+ from .jsonld import normalize_schema_jsonld
5
+ from .normalization import normalize_public_url
6
+ from .schemas import SEOEntity, SEOOverrides
7
+ from .text import build_description_snippet
8
+
9
+
10
+ def _pick(*values: str | None) -> str | None:
11
+ for value in values:
12
+ if isinstance(value, str) and value.strip():
13
+ return value.strip()
14
+ return None
15
+
16
+
17
+ def _entity_default_robots(entity: SEOEntity, config: SEOConfig) -> str:
18
+ if entity.entity_type == "search":
19
+ return config.search_robots
20
+ if (entity.status or "").lower() == "published":
21
+ return "index,follow"
22
+ return config.default_robots
23
+
24
+
25
+ def _og_type(entity: SEOEntity) -> str:
26
+ if entity.entity_type in {"post", "video"}:
27
+ return "article"
28
+ return "website"
29
+
30
+
31
+ def build_seo_payload(
32
+ entity: SEOEntity,
33
+ route_path: str,
34
+ config: SEOConfig,
35
+ overrides: SEOOverrides | None = None,
36
+ ) -> dict:
37
+ ov = overrides or SEOOverrides()
38
+
39
+ title = _pick(ov.meta_title, entity.title, "Untitled")
40
+ if config.title_template:
41
+ title = config.title_template.format(title=title)
42
+
43
+ description = _pick(
44
+ ov.meta_description,
45
+ entity.excerpt,
46
+ build_description_snippet(entity.body_html),
47
+ "",
48
+ )
49
+
50
+ canonical = _pick(ov.canonical_url, normalize_public_url(route_path, config))
51
+ robots = _pick(ov.robots, _entity_default_robots(entity, config))
52
+
53
+ og_title = _pick(ov.og_title, title)
54
+ og_description = _pick(ov.og_description, description)
55
+ og_image = _pick(ov.og_image, entity.featured_image, config.default_og_image)
56
+
57
+ twitter_title = _pick(ov.twitter_title, og_title)
58
+ twitter_description = _pick(ov.twitter_description, og_description)
59
+ twitter_image = _pick(ov.twitter_image, og_image)
60
+ twitter_card = _pick(ov.twitter_card, "summary_large_image")
61
+
62
+ return {
63
+ "title": title,
64
+ "description": description,
65
+ "canonical": canonical,
66
+ "robots": robots,
67
+ "og": {
68
+ "type": _og_type(entity),
69
+ "title": og_title,
70
+ "description": og_description,
71
+ "url": canonical,
72
+ "image": og_image,
73
+ },
74
+ "twitter": {
75
+ "card": twitter_card,
76
+ "title": twitter_title,
77
+ "description": twitter_description,
78
+ "image": twitter_image,
79
+ },
80
+ "schema_jsonld": normalize_schema_jsonld(ov.schema_jsonld),
81
+ }
@@ -0,0 +1,97 @@
1
+ """Configuration models for seoslug."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Literal
5
+ from urllib.parse import urlparse
6
+
7
+
8
+ @dataclass(slots=True)
9
+ class URLPolicy:
10
+ enforce_https: bool = True
11
+ lowercase_paths: bool = True
12
+ trailing_slash: Literal["always", "never", "preserve"] = "never"
13
+ collapse_duplicate_slashes: bool = True
14
+ strip_tracking_params: bool = True
15
+ allowed_query_params: list[str] = field(default_factory=list)
16
+
17
+ def __post_init__(self) -> None:
18
+ if self.trailing_slash not in {"always", "never", "preserve"}:
19
+ raise ValueError(
20
+ "trailing_slash must be one of: 'always', 'never', 'preserve'"
21
+ )
22
+
23
+ cleaned_params: list[str] = []
24
+ seen: set[str] = set()
25
+ for param in self.allowed_query_params:
26
+ if not isinstance(param, str):
27
+ raise ValueError("allowed_query_params must contain only strings")
28
+ normalized = param.strip()
29
+ if not normalized:
30
+ continue
31
+ if normalized not in seen:
32
+ seen.add(normalized)
33
+ cleaned_params.append(normalized)
34
+ self.allowed_query_params = cleaned_params
35
+
36
+
37
+ @dataclass(slots=True)
38
+ class SEOConfig:
39
+ canonical_host: str
40
+ public_base_url: str
41
+ url_policy: URLPolicy
42
+ default_robots: str = "index,follow"
43
+ default_og_image: str | None = None
44
+ site_name: str | None = None
45
+ title_template: str | None = "{title}"
46
+ search_robots: str = "noindex,follow"
47
+
48
+ def __post_init__(self) -> None:
49
+ self.canonical_host = _validate_canonical_host(self.canonical_host)
50
+ self.public_base_url = _validate_public_base_url(self.public_base_url)
51
+
52
+ if not isinstance(self.url_policy, URLPolicy):
53
+ raise ValueError("url_policy must be a URLPolicy instance")
54
+
55
+ if not _is_nonempty_string(self.default_robots):
56
+ raise ValueError("default_robots must be a non-empty string")
57
+ if not _is_nonempty_string(self.search_robots):
58
+ raise ValueError("search_robots must be a non-empty string")
59
+
60
+ if self.default_og_image is not None and not _is_nonempty_string(
61
+ self.default_og_image
62
+ ):
63
+ raise ValueError("default_og_image must be a non-empty string when set")
64
+
65
+ if self.site_name is not None and not _is_nonempty_string(self.site_name):
66
+ raise ValueError("site_name must be a non-empty string when set")
67
+
68
+ if self.title_template is not None:
69
+ if not _is_nonempty_string(self.title_template):
70
+ raise ValueError("title_template must be a non-empty string when set")
71
+ if "{title}" not in self.title_template:
72
+ raise ValueError("title_template must include '{title}' placeholder")
73
+
74
+
75
+ def _is_nonempty_string(value: object) -> bool:
76
+ return isinstance(value, str) and bool(value.strip())
77
+
78
+
79
+ def _validate_canonical_host(canonical_host: str) -> str:
80
+ if not _is_nonempty_string(canonical_host):
81
+ raise ValueError("canonical_host must be a non-empty string")
82
+
83
+ value = canonical_host.strip().lower()
84
+ if "://" in value or "/" in value or "?" in value or "#" in value:
85
+ raise ValueError("canonical_host must be host-only (no scheme/path/query)")
86
+ return value
87
+
88
+
89
+ def _validate_public_base_url(public_base_url: str) -> str:
90
+ if not _is_nonempty_string(public_base_url):
91
+ raise ValueError("public_base_url must be a non-empty string")
92
+
93
+ value = public_base_url.strip()
94
+ parsed = urlparse(value)
95
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
96
+ raise ValueError("public_base_url must be an absolute http(s) URL")
97
+ return value
@@ -0,0 +1,13 @@
1
+ """JSON-LD helpers for seoslug."""
2
+
3
+ from copy import deepcopy
4
+
5
+
6
+ def normalize_schema_jsonld(value: dict | list[dict] | None) -> dict | list[dict]:
7
+ if value is None:
8
+ return {}
9
+ if isinstance(value, dict):
10
+ return deepcopy(value)
11
+ if isinstance(value, list) and all(isinstance(item, dict) for item in value):
12
+ return deepcopy(value)
13
+ raise ValueError("schema_jsonld must be dict, list[dict], or None")
@@ -0,0 +1,85 @@
1
+ """URL normalization functions for seoslug."""
2
+
3
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
4
+
5
+ from .config import SEOConfig, URLPolicy
6
+
7
+ _TRACKING_KEYS = {"gclid", "fbclid"}
8
+
9
+
10
+ def _collapse_duplicate_slashes(path: str) -> str:
11
+ out: list[str] = []
12
+ prev_slash = False
13
+ for char in path:
14
+ if char == "/":
15
+ if prev_slash:
16
+ continue
17
+ prev_slash = True
18
+ out.append(char)
19
+ else:
20
+ prev_slash = False
21
+ out.append(char)
22
+ return "".join(out)
23
+
24
+
25
+ def _apply_trailing_slash(path: str, mode: str) -> str:
26
+ if mode == "preserve":
27
+ return path
28
+ if path == "/":
29
+ return path
30
+ if mode == "always":
31
+ return path if path.endswith("/") else path + "/"
32
+ return path.rstrip("/") or "/"
33
+
34
+
35
+ def normalize_path(path: str, policy: URLPolicy) -> str:
36
+ if not isinstance(path, str):
37
+ raise ValueError("path must be a string")
38
+ value = path.strip() or "/"
39
+ if not value.startswith("/"):
40
+ value = "/" + value
41
+ if policy.collapse_duplicate_slashes:
42
+ value = _collapse_duplicate_slashes(value)
43
+ if policy.lowercase_paths:
44
+ value = value.lower()
45
+ value = _apply_trailing_slash(value, policy.trailing_slash)
46
+ return value
47
+
48
+
49
+ def _filter_query(query: str, policy: URLPolicy) -> str:
50
+ pairs = parse_qsl(query, keep_blank_values=True)
51
+ filtered: list[tuple[str, str]] = []
52
+ allowlist = set(policy.allowed_query_params)
53
+ for key, value in pairs:
54
+ k = key.lower()
55
+ if policy.strip_tracking_params and (k.startswith("utm_") or k in _TRACKING_KEYS):
56
+ continue
57
+ if allowlist and key not in allowlist:
58
+ continue
59
+ filtered.append((key, value))
60
+ return urlencode(filtered, doseq=True)
61
+
62
+
63
+ def normalize_public_url(url_or_path: str, config: SEOConfig) -> str:
64
+ if not isinstance(url_or_path, str) or not url_or_path.strip():
65
+ raise ValueError("url_or_path must be a non-empty string")
66
+
67
+ value = url_or_path.strip()
68
+ parsed_input = urlsplit(value)
69
+ parsed_base = urlsplit(config.public_base_url)
70
+
71
+ if parsed_input.scheme and not parsed_input.netloc:
72
+ raise ValueError("Malformed URL input")
73
+
74
+ path = parsed_input.path
75
+ query = parsed_input.query
76
+ if not parsed_input.scheme and not parsed_input.netloc:
77
+ path = value.split("?", 1)[0]
78
+ query = value.split("?", 1)[1] if "?" in value else ""
79
+
80
+ normalized_path = normalize_path(path or "/", config.url_policy)
81
+ normalized_query = _filter_query(query, config.url_policy)
82
+
83
+ scheme = "https" if config.url_policy.enforce_https else (parsed_base.scheme or "https")
84
+ netloc = config.canonical_host
85
+ return urlunsplit((scheme, netloc, normalized_path, normalized_query, ""))
@@ -0,0 +1,87 @@
1
+ """Input schemas for seoslug."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal
5
+
6
+
7
+ _ENTITY_TYPES = {"home", "post", "page", "video", "taxonomy", "search", "other"}
8
+
9
+
10
+ def _normalize_optional_string(value: object, field_name: str) -> str | None:
11
+ if value is None:
12
+ return None
13
+ if not isinstance(value, str):
14
+ raise ValueError(f"{field_name} must be a string or None")
15
+ normalized = value.strip()
16
+ return normalized or None
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class SEOEntity:
21
+ entity_type: Literal["home", "post", "page", "video", "taxonomy", "search", "other"]
22
+ slug: str | None = None
23
+ title: str | None = None
24
+ excerpt: str | None = None
25
+ body_html: str | None = None
26
+ status: str | None = None
27
+ featured_image: str | None = None
28
+ published_at: str | None = None
29
+ updated_at: str | None = None
30
+
31
+ def __post_init__(self) -> None:
32
+ if self.entity_type not in _ENTITY_TYPES:
33
+ raise ValueError("entity_type must be one of home/post/page/video/taxonomy/search/other")
34
+
35
+ self.slug = _normalize_optional_string(self.slug, "slug")
36
+ self.title = _normalize_optional_string(self.title, "title")
37
+ self.excerpt = _normalize_optional_string(self.excerpt, "excerpt")
38
+ self.body_html = _normalize_optional_string(self.body_html, "body_html")
39
+ self.status = _normalize_optional_string(self.status, "status")
40
+ self.featured_image = _normalize_optional_string(self.featured_image, "featured_image")
41
+ self.published_at = _normalize_optional_string(self.published_at, "published_at")
42
+ self.updated_at = _normalize_optional_string(self.updated_at, "updated_at")
43
+
44
+
45
+ @dataclass(slots=True)
46
+ class SEOOverrides:
47
+ meta_title: str | None = None
48
+ meta_description: str | None = None
49
+ canonical_url: str | None = None
50
+ robots: str | None = None
51
+ og_title: str | None = None
52
+ og_description: str | None = None
53
+ og_image: str | None = None
54
+ twitter_card: str | None = None
55
+ twitter_title: str | None = None
56
+ twitter_description: str | None = None
57
+ twitter_image: str | None = None
58
+ schema_jsonld: dict | list[dict] | None = None
59
+
60
+ def __post_init__(self) -> None:
61
+ self.meta_title = _normalize_optional_string(self.meta_title, "meta_title")
62
+ self.meta_description = _normalize_optional_string(
63
+ self.meta_description, "meta_description"
64
+ )
65
+ self.canonical_url = _normalize_optional_string(self.canonical_url, "canonical_url")
66
+ self.robots = _normalize_optional_string(self.robots, "robots")
67
+ self.og_title = _normalize_optional_string(self.og_title, "og_title")
68
+ self.og_description = _normalize_optional_string(
69
+ self.og_description, "og_description"
70
+ )
71
+ self.og_image = _normalize_optional_string(self.og_image, "og_image")
72
+ self.twitter_card = _normalize_optional_string(self.twitter_card, "twitter_card")
73
+ self.twitter_title = _normalize_optional_string(self.twitter_title, "twitter_title")
74
+ self.twitter_description = _normalize_optional_string(
75
+ self.twitter_description, "twitter_description"
76
+ )
77
+ self.twitter_image = _normalize_optional_string(self.twitter_image, "twitter_image")
78
+
79
+ if self.schema_jsonld is None:
80
+ return
81
+ if isinstance(self.schema_jsonld, dict):
82
+ return
83
+ if isinstance(self.schema_jsonld, list) and all(
84
+ isinstance(item, dict) for item in self.schema_jsonld
85
+ ):
86
+ return
87
+ raise ValueError("schema_jsonld must be dict, list[dict], or None")
@@ -0,0 +1,34 @@
1
+ """Text extraction utilities for seoslug."""
2
+
3
+ import re
4
+ from html import unescape
5
+
6
+ _SCRIPT_STYLE_RE = re.compile(
7
+ r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL
8
+ )
9
+ _TAG_RE = re.compile(r"<[^>]+>")
10
+ _WS_RE = re.compile(r"\s+")
11
+
12
+
13
+ def html_to_text(html: str | None) -> str:
14
+ if html is None:
15
+ return ""
16
+ if not isinstance(html, str):
17
+ raise ValueError("html must be a string or None")
18
+ if not html:
19
+ return ""
20
+ text = _SCRIPT_STYLE_RE.sub(" ", html)
21
+ text = _TAG_RE.sub(" ", text)
22
+ text = unescape(text)
23
+ return _WS_RE.sub(" ", text).strip()
24
+
25
+
26
+ def build_description_snippet(body_html: str | None, max_length: int = 160) -> str:
27
+ if not isinstance(max_length, int) or max_length <= 0:
28
+ raise ValueError("max_length must be a positive integer")
29
+ text = html_to_text(body_html)
30
+ if len(text) <= max_length:
31
+ return text
32
+ if max_length <= 3:
33
+ return "." * max_length
34
+ return text[: max_length - 3].rstrip() + "..."
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: seoslug
3
+ Version: 1.0.0
4
+ Summary: Framework-agnostic canonical URL normalization and SEO payload generation
5
+ Author: seoslug contributors
6
+ License-Expression: MIT
7
+ Project-URL: Documentation, https://deepwiki.com/emiliano-gandini-outeda/seoslug/
8
+ Keywords: seo,canonical,urls,metadata,open-graph,twitter-cards
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Dynamic: license-file
22
+
23
+ # seoslug
24
+
25
+ [![DeepWiki](https://img.shields.io/badge/DeepWiki-Documentation-blue)](https://deepwiki.com/emiliano-gandini-outeda/seoslug/)
26
+
27
+ Canonical URL normalization and deterministic SEO payload generation for content platforms.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install seoslug
33
+ ```
34
+
35
+ For local development:
36
+
37
+ ```bash
38
+ pip install -e .
39
+ ```
40
+
41
+ ## Quick usage
42
+
43
+ ```python
44
+ from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
45
+
46
+ config = SEOConfig(
47
+ canonical_host="portal.example.com",
48
+ public_base_url="https://portal.example.com",
49
+ url_policy=URLPolicy(
50
+ enforce_https=True,
51
+ lowercase_paths=True,
52
+ trailing_slash="never",
53
+ collapse_duplicate_slashes=True,
54
+ strip_tracking_params=True,
55
+ allowed_query_params=["page", "q"],
56
+ ),
57
+ default_og_image="https://cdn.example.com/default.jpg",
58
+ )
59
+
60
+ entity = SEOEntity(
61
+ entity_type="post",
62
+ slug="my-post",
63
+ title="My Post",
64
+ excerpt="Example excerpt",
65
+ body_html="<p>Body content</p>",
66
+ status="published",
67
+ featured_image="https://cdn.example.com/post.jpg",
68
+ )
69
+
70
+ payload = build_seo_payload(entity, "/posts/my-post", config)
71
+ ```
72
+
73
+ Full docs, API reference, and usage examples are in `docs/` and published with MkDocs.
74
+
75
+ ## License
76
+
77
+ MIT, see `LICENSE`.
@@ -0,0 +1,22 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/seoslug/__init__.py
5
+ src/seoslug/builder.py
6
+ src/seoslug/config.py
7
+ src/seoslug/jsonld.py
8
+ src/seoslug/normalization.py
9
+ src/seoslug/schemas.py
10
+ src/seoslug/text.py
11
+ src/seoslug.egg-info/PKG-INFO
12
+ src/seoslug.egg-info/SOURCES.txt
13
+ src/seoslug.egg-info/dependency_links.txt
14
+ src/seoslug.egg-info/top_level.txt
15
+ tests/test_builder.py
16
+ tests/test_fallbacks.py
17
+ tests/test_jsonld.py
18
+ tests/test_normalization.py
19
+ tests/test_regression_fixtures.py
20
+ tests/test_robots_rules.py
21
+ tests/test_schemas.py
22
+ tests/test_text.py
@@ -0,0 +1 @@
1
+ seoslug
@@ -0,0 +1,105 @@
1
+ """Tests for SEO payload builder."""
2
+
3
+ from seoslug import SEOConfig, SEOEntity, SEOOverrides, URLPolicy, build_seo_payload
4
+
5
+
6
+ def _config() -> SEOConfig:
7
+ return SEOConfig(
8
+ canonical_host="portal.example.com",
9
+ public_base_url="https://portal.example.com",
10
+ url_policy=URLPolicy(),
11
+ default_og_image="https://cdn.example.com/default.jpg",
12
+ site_name="Portal",
13
+ title_template="{title}",
14
+ )
15
+
16
+
17
+ def test_payload_contract_shape() -> None:
18
+ entity = SEOEntity(
19
+ entity_type="post",
20
+ slug="my-post",
21
+ title="My Post",
22
+ excerpt="Example excerpt",
23
+ body_html="<p>Body</p>",
24
+ status="published",
25
+ featured_image="https://cdn.example.com/post.jpg",
26
+ )
27
+ payload = build_seo_payload(entity, "/posts/my-post", _config())
28
+
29
+ assert set(payload.keys()) == {
30
+ "title",
31
+ "description",
32
+ "canonical",
33
+ "robots",
34
+ "og",
35
+ "twitter",
36
+ "schema_jsonld",
37
+ }
38
+ assert payload["canonical"] == "https://portal.example.com/posts/my-post"
39
+ assert payload["og"]["url"] == payload["canonical"]
40
+ assert payload["twitter"]["card"] == "summary_large_image"
41
+
42
+
43
+ def test_canonical_override_and_schema_passthrough() -> None:
44
+ entity = SEOEntity(entity_type="page", title="About")
45
+ overrides = SEOOverrides(
46
+ canonical_url="https://portal.example.com/custom-about",
47
+ schema_jsonld={"@context": "https://schema.org", "@type": "WebPage"},
48
+ )
49
+ payload = build_seo_payload(entity, "/about", _config(), overrides)
50
+ assert payload["canonical"] == "https://portal.example.com/custom-about"
51
+ assert payload["schema_jsonld"]["@type"] == "WebPage"
52
+
53
+
54
+ def test_twitter_override_precedence() -> None:
55
+ entity = SEOEntity(entity_type="post", title="Entity Title", excerpt="Entity Excerpt")
56
+ overrides = SEOOverrides(
57
+ og_title="OG Title",
58
+ twitter_title="Twitter Title",
59
+ twitter_description="Twitter Description",
60
+ )
61
+ payload = build_seo_payload(entity, "/posts/t", _config(), overrides)
62
+ assert payload["og"]["title"] == "OG Title"
63
+ assert payload["twitter"]["title"] == "Twitter Title"
64
+ assert payload["twitter"]["description"] == "Twitter Description"
65
+
66
+
67
+ def test_schema_list_passthrough() -> None:
68
+ entity = SEOEntity(entity_type="page", title="Docs")
69
+ schema = [{"@type": "BreadcrumbList"}, {"@type": "WebPage"}]
70
+ payload = build_seo_payload(entity, "/docs", _config(), SEOOverrides(schema_jsonld=schema))
71
+ assert payload["schema_jsonld"] == schema
72
+
73
+
74
+ def test_title_template_is_applied() -> None:
75
+ config = SEOConfig(
76
+ canonical_host="portal.example.com",
77
+ public_base_url="https://portal.example.com",
78
+ url_policy=URLPolicy(),
79
+ title_template="{title} | Portal",
80
+ )
81
+ payload = build_seo_payload(SEOEntity(entity_type="page", title="About"), "/about", config)
82
+ assert payload["title"] == "About | Portal"
83
+
84
+
85
+ def test_description_prefers_excerpt_over_body_snippet() -> None:
86
+ entity = SEOEntity(
87
+ entity_type="post",
88
+ excerpt="Excerpt text",
89
+ body_html="<p>Body fallback text</p>",
90
+ )
91
+ payload = build_seo_payload(entity, "/posts/p", _config())
92
+ assert payload["description"] == "Excerpt text"
93
+
94
+
95
+ def test_twitter_falls_back_to_og_values() -> None:
96
+ entity = SEOEntity(entity_type="post", title="Entity")
97
+ overrides = SEOOverrides(
98
+ og_title="OG T",
99
+ og_description="OG D",
100
+ og_image="https://cdn.example.com/og.jpg",
101
+ )
102
+ payload = build_seo_payload(entity, "/posts/p", _config(), overrides)
103
+ assert payload["twitter"]["title"] == "OG T"
104
+ assert payload["twitter"]["description"] == "OG D"
105
+ assert payload["twitter"]["image"] == "https://cdn.example.com/og.jpg"
@@ -0,0 +1,79 @@
1
+ """Tests for fallback hierarchy behavior."""
2
+
3
+ from seoslug import SEOConfig, SEOEntity, SEOOverrides, URLPolicy, build_seo_payload
4
+
5
+
6
+ def _config() -> SEOConfig:
7
+ return SEOConfig(
8
+ canonical_host="portal.example.com",
9
+ public_base_url="https://portal.example.com",
10
+ url_policy=URLPolicy(),
11
+ default_og_image="https://cdn.example.com/default.jpg",
12
+ )
13
+
14
+
15
+ def test_title_and_description_fallbacks() -> None:
16
+ entity = SEOEntity(entity_type="post", title=None, excerpt=None, body_html="<p>Hello body</p>")
17
+ payload = build_seo_payload(entity, "/x", _config())
18
+ assert payload["title"] == "Untitled"
19
+ assert payload["description"] == "Hello body"
20
+
21
+
22
+ def test_override_precedence() -> None:
23
+ entity = SEOEntity(entity_type="post", title="Entity title", excerpt="Entity desc")
24
+ ov = SEOOverrides(meta_title="Override title", meta_description="Override desc")
25
+ payload = build_seo_payload(entity, "/x", _config(), ov)
26
+ assert payload["title"] == "Override title"
27
+ assert payload["description"] == "Override desc"
28
+
29
+
30
+ def test_og_and_twitter_image_fallbacks() -> None:
31
+ entity = SEOEntity(entity_type="post", featured_image=None)
32
+ payload = build_seo_payload(entity, "/x", _config())
33
+ assert payload["og"]["image"] == "https://cdn.example.com/default.jpg"
34
+ assert payload["twitter"]["image"] == "https://cdn.example.com/default.jpg"
35
+
36
+
37
+ def test_canonical_fallback_uses_normalized_route() -> None:
38
+ entity = SEOEntity(entity_type="page", title="About")
39
+ payload = build_seo_payload(entity, "/About//Team?utm_source=x", _config())
40
+ assert payload["canonical"] == "https://portal.example.com/about/team"
41
+
42
+
43
+ def test_og_image_precedence_override_then_entity_then_default() -> None:
44
+ base_entity = SEOEntity(entity_type="post", featured_image="https://cdn.example.com/entity.jpg")
45
+ with_override = build_seo_payload(
46
+ base_entity,
47
+ "/x",
48
+ _config(),
49
+ SEOOverrides(og_image="https://cdn.example.com/override.jpg"),
50
+ )
51
+ assert with_override["og"]["image"] == "https://cdn.example.com/override.jpg"
52
+
53
+ without_override = build_seo_payload(base_entity, "/x", _config())
54
+ assert without_override["og"]["image"] == "https://cdn.example.com/entity.jpg"
55
+
56
+ no_entity_image = build_seo_payload(SEOEntity(entity_type="post"), "/x", _config())
57
+ assert no_entity_image["og"]["image"] == "https://cdn.example.com/default.jpg"
58
+
59
+
60
+ def test_twitter_override_fields_take_highest_precedence() -> None:
61
+ entity = SEOEntity(entity_type="post", title="Entity", excerpt="Excerpt")
62
+ payload = build_seo_payload(
63
+ entity,
64
+ "/x",
65
+ _config(),
66
+ SEOOverrides(
67
+ twitter_card="summary",
68
+ twitter_title="Tw Title",
69
+ twitter_description="Tw Desc",
70
+ twitter_image="https://cdn.example.com/tw.jpg",
71
+ og_title="OG Title",
72
+ og_description="OG Desc",
73
+ og_image="https://cdn.example.com/og.jpg",
74
+ ),
75
+ )
76
+ assert payload["twitter"]["card"] == "summary"
77
+ assert payload["twitter"]["title"] == "Tw Title"
78
+ assert payload["twitter"]["description"] == "Tw Desc"
79
+ assert payload["twitter"]["image"] == "https://cdn.example.com/tw.jpg"
@@ -0,0 +1,28 @@
1
+ """Tests for JSON-LD normalization helpers."""
2
+
3
+ import pytest
4
+
5
+ from seoslug.jsonld import normalize_schema_jsonld
6
+
7
+
8
+ def test_normalize_schema_jsonld_none_to_empty_dict() -> None:
9
+ assert normalize_schema_jsonld(None) == {}
10
+
11
+
12
+ def test_normalize_schema_jsonld_returns_copy_for_dict() -> None:
13
+ schema = {"@type": "WebPage", "name": "Home"}
14
+ normalized = normalize_schema_jsonld(schema)
15
+ assert normalized == schema
16
+ assert normalized is not schema
17
+
18
+
19
+ def test_normalize_schema_jsonld_returns_copy_for_list() -> None:
20
+ schema = [{"@type": "WebPage"}, {"@type": "BreadcrumbList"}]
21
+ normalized = normalize_schema_jsonld(schema)
22
+ assert normalized == schema
23
+ assert normalized is not schema
24
+
25
+
26
+ def test_normalize_schema_jsonld_rejects_invalid_type() -> None:
27
+ with pytest.raises(ValueError):
28
+ normalize_schema_jsonld("bad") # type: ignore[arg-type]
@@ -0,0 +1,86 @@
1
+ """Tests for URL normalization."""
2
+
3
+ from seoslug import SEOConfig, URLPolicy, normalize_path, normalize_public_url
4
+ import pytest
5
+
6
+
7
+ def _config(policy: URLPolicy | None = None) -> SEOConfig:
8
+ return SEOConfig(
9
+ canonical_host="portal.example.com",
10
+ public_base_url="https://portal.example.com",
11
+ url_policy=policy or URLPolicy(),
12
+ )
13
+
14
+
15
+ def test_normalize_path_lowercase_and_slashes() -> None:
16
+ policy = URLPolicy(lowercase_paths=True, collapse_duplicate_slashes=True)
17
+ assert normalize_path("//Blog//My-Post//", policy) == "/blog/my-post"
18
+
19
+
20
+ def test_trailing_slash_modes() -> None:
21
+ assert normalize_path("/blog/post", URLPolicy(trailing_slash="always")) == "/blog/post/"
22
+ assert normalize_path("/blog/post/", URLPolicy(trailing_slash="never")) == "/blog/post"
23
+ assert normalize_path("/blog/post/", URLPolicy(trailing_slash="preserve")) == "/blog/post/"
24
+
25
+
26
+ def test_normalize_public_url_enforces_host_https_and_query_rules() -> None:
27
+ config = _config(URLPolicy(allowed_query_params=["page", "q"]))
28
+ url = "http://other.example.com//Blog/Post?utm_source=x&gclid=1&page=2&q=abc&bad=1"
29
+ assert (
30
+ normalize_public_url(url, config)
31
+ == "https://portal.example.com/blog/post?page=2&q=abc"
32
+ )
33
+
34
+
35
+ def test_normalize_public_url_idempotent() -> None:
36
+ config = _config()
37
+ first = normalize_public_url("/A//B/?utm_campaign=x", config)
38
+ second = normalize_public_url(first, config)
39
+ assert first == second
40
+
41
+
42
+ def test_accepts_relative_path_with_query() -> None:
43
+ config = _config()
44
+ assert (
45
+ normalize_public_url("posts/My-Post?fbclid=123&page=1", config)
46
+ == "https://portal.example.com/posts/my-post?page=1"
47
+ )
48
+
49
+
50
+ def test_enforce_https_toggle_uses_public_base_scheme() -> None:
51
+ config = SEOConfig(
52
+ canonical_host="portal.example.com",
53
+ public_base_url="http://portal.example.com",
54
+ url_policy=URLPolicy(enforce_https=False),
55
+ )
56
+ assert normalize_public_url("https://other.example.com/a", config) == "http://portal.example.com/a"
57
+
58
+
59
+ def test_tracking_strip_toggle() -> None:
60
+ config = _config(URLPolicy(strip_tracking_params=False))
61
+ assert (
62
+ normalize_public_url("/p?utm_source=x&gclid=1&fbclid=2", config)
63
+ == "https://portal.example.com/p?utm_source=x&gclid=1&fbclid=2"
64
+ )
65
+
66
+
67
+ def test_no_allowlist_keeps_non_tracking_params() -> None:
68
+ config = _config(URLPolicy())
69
+ assert normalize_public_url("/p?a=1&b=2&utm_campaign=x", config) == "https://portal.example.com/p?a=1&b=2"
70
+
71
+
72
+ def test_malformed_url_raises_value_error() -> None:
73
+ config = _config()
74
+ with pytest.raises(ValueError):
75
+ normalize_public_url("https:///broken", config)
76
+
77
+
78
+ def test_host_is_always_enforced_for_absolute_input() -> None:
79
+ config = _config()
80
+ normalized = normalize_public_url("https://evil.example.org/path", config)
81
+ assert normalized == "https://portal.example.com/path"
82
+
83
+
84
+ def test_can_disable_duplicate_slash_collapse_and_lowercase() -> None:
85
+ policy = URLPolicy(collapse_duplicate_slashes=False, lowercase_paths=False, trailing_slash="preserve")
86
+ assert normalize_path("//Blog//Post//", policy) == "//Blog//Post//"
@@ -0,0 +1,179 @@
1
+ """Regression fixtures for representative entity types."""
2
+
3
+ import pytest
4
+
5
+ from seoslug import SEOConfig, SEOEntity, URLPolicy, build_seo_payload
6
+
7
+
8
+ def _config() -> SEOConfig:
9
+ return SEOConfig(
10
+ canonical_host="portal.example.com",
11
+ public_base_url="https://portal.example.com",
12
+ url_policy=URLPolicy(),
13
+ default_og_image="https://cdn.example.com/default.jpg",
14
+ search_robots="noindex,follow",
15
+ )
16
+
17
+
18
+ @pytest.mark.parametrize(
19
+ ("entity_type", "route", "expected"),
20
+ [
21
+ (
22
+ "home",
23
+ "/",
24
+ {
25
+ "title": "home title",
26
+ "description": "home excerpt",
27
+ "canonical": "https://portal.example.com/",
28
+ "robots": "index,follow",
29
+ "og": {
30
+ "type": "website",
31
+ "title": "home title",
32
+ "description": "home excerpt",
33
+ "url": "https://portal.example.com/",
34
+ "image": "https://cdn.example.com/default.jpg",
35
+ },
36
+ "twitter": {
37
+ "card": "summary_large_image",
38
+ "title": "home title",
39
+ "description": "home excerpt",
40
+ "image": "https://cdn.example.com/default.jpg",
41
+ },
42
+ "schema_jsonld": {},
43
+ },
44
+ ),
45
+ (
46
+ "post",
47
+ "/posts/p",
48
+ {
49
+ "title": "post title",
50
+ "description": "post excerpt",
51
+ "canonical": "https://portal.example.com/posts/p",
52
+ "robots": "index,follow",
53
+ "og": {
54
+ "type": "article",
55
+ "title": "post title",
56
+ "description": "post excerpt",
57
+ "url": "https://portal.example.com/posts/p",
58
+ "image": "https://cdn.example.com/default.jpg",
59
+ },
60
+ "twitter": {
61
+ "card": "summary_large_image",
62
+ "title": "post title",
63
+ "description": "post excerpt",
64
+ "image": "https://cdn.example.com/default.jpg",
65
+ },
66
+ "schema_jsonld": {},
67
+ },
68
+ ),
69
+ (
70
+ "page",
71
+ "/about",
72
+ {
73
+ "title": "page title",
74
+ "description": "page excerpt",
75
+ "canonical": "https://portal.example.com/about",
76
+ "robots": "index,follow",
77
+ "og": {
78
+ "type": "website",
79
+ "title": "page title",
80
+ "description": "page excerpt",
81
+ "url": "https://portal.example.com/about",
82
+ "image": "https://cdn.example.com/default.jpg",
83
+ },
84
+ "twitter": {
85
+ "card": "summary_large_image",
86
+ "title": "page title",
87
+ "description": "page excerpt",
88
+ "image": "https://cdn.example.com/default.jpg",
89
+ },
90
+ "schema_jsonld": {},
91
+ },
92
+ ),
93
+ (
94
+ "video",
95
+ "/videos/v",
96
+ {
97
+ "title": "video title",
98
+ "description": "video excerpt",
99
+ "canonical": "https://portal.example.com/videos/v",
100
+ "robots": "index,follow",
101
+ "og": {
102
+ "type": "article",
103
+ "title": "video title",
104
+ "description": "video excerpt",
105
+ "url": "https://portal.example.com/videos/v",
106
+ "image": "https://cdn.example.com/default.jpg",
107
+ },
108
+ "twitter": {
109
+ "card": "summary_large_image",
110
+ "title": "video title",
111
+ "description": "video excerpt",
112
+ "image": "https://cdn.example.com/default.jpg",
113
+ },
114
+ "schema_jsonld": {},
115
+ },
116
+ ),
117
+ (
118
+ "taxonomy",
119
+ "/topics/python",
120
+ {
121
+ "title": "taxonomy title",
122
+ "description": "taxonomy excerpt",
123
+ "canonical": "https://portal.example.com/topics/python",
124
+ "robots": "index,follow",
125
+ "og": {
126
+ "type": "website",
127
+ "title": "taxonomy title",
128
+ "description": "taxonomy excerpt",
129
+ "url": "https://portal.example.com/topics/python",
130
+ "image": "https://cdn.example.com/default.jpg",
131
+ },
132
+ "twitter": {
133
+ "card": "summary_large_image",
134
+ "title": "taxonomy title",
135
+ "description": "taxonomy excerpt",
136
+ "image": "https://cdn.example.com/default.jpg",
137
+ },
138
+ "schema_jsonld": {},
139
+ },
140
+ ),
141
+ (
142
+ "search",
143
+ "/search?q=x",
144
+ {
145
+ "title": "search title",
146
+ "description": "search excerpt",
147
+ "canonical": "https://portal.example.com/search?q=x",
148
+ "robots": "noindex,follow",
149
+ "og": {
150
+ "type": "website",
151
+ "title": "search title",
152
+ "description": "search excerpt",
153
+ "url": "https://portal.example.com/search?q=x",
154
+ "image": "https://cdn.example.com/default.jpg",
155
+ },
156
+ "twitter": {
157
+ "card": "summary_large_image",
158
+ "title": "search title",
159
+ "description": "search excerpt",
160
+ "image": "https://cdn.example.com/default.jpg",
161
+ },
162
+ "schema_jsonld": {},
163
+ },
164
+ ),
165
+ ],
166
+ )
167
+ def test_regression_entity_type_snapshots(
168
+ entity_type: str,
169
+ route: str,
170
+ expected: dict,
171
+ ) -> None:
172
+ entity = SEOEntity(
173
+ entity_type=entity_type,
174
+ title=f"{entity_type} title",
175
+ excerpt=f"{entity_type} excerpt",
176
+ status="published",
177
+ )
178
+ payload = build_seo_payload(entity, route, _config())
179
+ assert payload == expected
@@ -0,0 +1,38 @@
1
+ """Tests for robots rule behavior."""
2
+
3
+ from seoslug import SEOConfig, SEOEntity, SEOOverrides, URLPolicy, build_seo_payload
4
+
5
+
6
+ def _config() -> SEOConfig:
7
+ return SEOConfig(
8
+ canonical_host="portal.example.com",
9
+ public_base_url="https://portal.example.com",
10
+ url_policy=URLPolicy(),
11
+ default_robots="index,follow",
12
+ search_robots="noindex,follow",
13
+ )
14
+
15
+
16
+ def test_published_content_defaults_to_index_follow() -> None:
17
+ entity = SEOEntity(entity_type="post", status="published")
18
+ payload = build_seo_payload(entity, "/posts/x", _config())
19
+ assert payload["robots"] == "index,follow"
20
+
21
+
22
+ def test_search_uses_search_robots() -> None:
23
+ entity = SEOEntity(entity_type="search", status="published")
24
+ payload = build_seo_payload(entity, "/search?q=x", _config())
25
+ assert payload["robots"] == "noindex,follow"
26
+
27
+
28
+ def test_override_robots_wins() -> None:
29
+ entity = SEOEntity(entity_type="post", status="draft")
30
+ ov = SEOOverrides(robots="noindex,nofollow")
31
+ payload = build_seo_payload(entity, "/posts/x", _config(), ov)
32
+ assert payload["robots"] == "noindex,nofollow"
33
+
34
+
35
+ def test_non_published_uses_default_robots() -> None:
36
+ entity = SEOEntity(entity_type="post", status="draft")
37
+ payload = build_seo_payload(entity, "/posts/x", _config())
38
+ assert payload["robots"] == "index,follow"
@@ -0,0 +1,36 @@
1
+ """Tests for SEOEntity and SEOOverrides models."""
2
+
3
+ import pytest
4
+
5
+ from seoslug import SEOEntity, SEOOverrides
6
+
7
+
8
+ def test_entity_normalizes_optional_string_fields() -> None:
9
+ entity = SEOEntity(
10
+ entity_type="post",
11
+ slug=" my-post ",
12
+ title=" Hello ",
13
+ excerpt=" ",
14
+ )
15
+ assert entity.slug == "my-post"
16
+ assert entity.title == "Hello"
17
+ assert entity.excerpt is None
18
+
19
+
20
+ def test_entity_rejects_invalid_entity_type() -> None:
21
+ with pytest.raises(ValueError):
22
+ SEOEntity(entity_type="invalid")
23
+
24
+
25
+ def test_entity_rejects_non_string_optional_field() -> None:
26
+ with pytest.raises(ValueError):
27
+ SEOEntity(entity_type="post", title=123) # type: ignore[arg-type]
28
+
29
+
30
+ def test_overrides_normalize_and_validate_schema_type() -> None:
31
+ overrides = SEOOverrides(meta_title=" A ", robots=" ")
32
+ assert overrides.meta_title == "A"
33
+ assert overrides.robots is None
34
+
35
+ with pytest.raises(ValueError):
36
+ SEOOverrides(schema_jsonld=[{"@type": "WebPage"}, "bad"]) # type: ignore[list-item]
@@ -0,0 +1,29 @@
1
+ """Tests for text extraction utilities."""
2
+
3
+ import pytest
4
+
5
+ from seoslug.text import build_description_snippet, html_to_text
6
+
7
+
8
+ def test_html_to_text_strips_tags_and_script_content() -> None:
9
+ html = "<h1>Hello</h1><script>alert('x')</script><p>World</p>"
10
+ assert html_to_text(html) == "Hello World"
11
+
12
+
13
+ def test_snippet_truncation() -> None:
14
+ text = "<p>" + ("a" * 200) + "</p>"
15
+ snippet = build_description_snippet(text, max_length=20)
16
+ assert snippet.endswith("...")
17
+ assert len(snippet) == 20
18
+
19
+
20
+ def test_html_to_text_normalizes_whitespace_and_style() -> None:
21
+ html = "<style>.x{color:red;}</style><p> Hello\n\tWorld </p>"
22
+ assert html_to_text(html) == "Hello World"
23
+
24
+
25
+ def test_invalid_inputs_raise_value_error() -> None:
26
+ with pytest.raises(ValueError):
27
+ html_to_text(123) # type: ignore[arg-type]
28
+ with pytest.raises(ValueError):
29
+ build_description_snippet("<p>ok</p>", max_length=0)