seoslug 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seoslug/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """Public API for seoslug."""
2
+
3
+ from .builder import build_seo_payload
4
+ from .config import SEOConfig, URLPolicy
5
+ from .normalization import normalize_path, normalize_public_url
6
+ from .schemas import SEOEntity, SEOOverrides
7
+
8
+ __all__ = [
9
+ "SEOConfig",
10
+ "URLPolicy",
11
+ "SEOEntity",
12
+ "SEOOverrides",
13
+ "normalize_public_url",
14
+ "normalize_path",
15
+ "build_seo_payload",
16
+ ]
seoslug/builder.py ADDED
@@ -0,0 +1,81 @@
1
+ """SEO payload builder for seoslug."""
2
+
3
+ from .config import SEOConfig
4
+ from .jsonld import normalize_schema_jsonld
5
+ from .normalization import normalize_public_url
6
+ from .schemas import SEOEntity, SEOOverrides
7
+ from .text import build_description_snippet
8
+
9
+
10
+ def _pick(*values: str | None) -> str | None:
11
+ for value in values:
12
+ if isinstance(value, str) and value.strip():
13
+ return value.strip()
14
+ return None
15
+
16
+
17
+ def _entity_default_robots(entity: SEOEntity, config: SEOConfig) -> str:
18
+ if entity.entity_type == "search":
19
+ return config.search_robots
20
+ if (entity.status or "").lower() == "published":
21
+ return "index,follow"
22
+ return config.default_robots
23
+
24
+
25
+ def _og_type(entity: SEOEntity) -> str:
26
+ if entity.entity_type in {"post", "video"}:
27
+ return "article"
28
+ return "website"
29
+
30
+
31
+ def build_seo_payload(
32
+ entity: SEOEntity,
33
+ route_path: str,
34
+ config: SEOConfig,
35
+ overrides: SEOOverrides | None = None,
36
+ ) -> dict:
37
+ ov = overrides or SEOOverrides()
38
+
39
+ title = _pick(ov.meta_title, entity.title, "Untitled")
40
+ if config.title_template:
41
+ title = config.title_template.format(title=title)
42
+
43
+ description = _pick(
44
+ ov.meta_description,
45
+ entity.excerpt,
46
+ build_description_snippet(entity.body_html),
47
+ "",
48
+ )
49
+
50
+ canonical = _pick(ov.canonical_url, normalize_public_url(route_path, config))
51
+ robots = _pick(ov.robots, _entity_default_robots(entity, config))
52
+
53
+ og_title = _pick(ov.og_title, title)
54
+ og_description = _pick(ov.og_description, description)
55
+ og_image = _pick(ov.og_image, entity.featured_image, config.default_og_image)
56
+
57
+ twitter_title = _pick(ov.twitter_title, og_title)
58
+ twitter_description = _pick(ov.twitter_description, og_description)
59
+ twitter_image = _pick(ov.twitter_image, og_image)
60
+ twitter_card = _pick(ov.twitter_card, "summary_large_image")
61
+
62
+ return {
63
+ "title": title,
64
+ "description": description,
65
+ "canonical": canonical,
66
+ "robots": robots,
67
+ "og": {
68
+ "type": _og_type(entity),
69
+ "title": og_title,
70
+ "description": og_description,
71
+ "url": canonical,
72
+ "image": og_image,
73
+ },
74
+ "twitter": {
75
+ "card": twitter_card,
76
+ "title": twitter_title,
77
+ "description": twitter_description,
78
+ "image": twitter_image,
79
+ },
80
+ "schema_jsonld": normalize_schema_jsonld(ov.schema_jsonld),
81
+ }
seoslug/config.py ADDED
@@ -0,0 +1,97 @@
1
+ """Configuration models for seoslug."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Literal
5
+ from urllib.parse import urlparse
6
+
7
+
8
+ @dataclass(slots=True)
9
+ class URLPolicy:
10
+ enforce_https: bool = True
11
+ lowercase_paths: bool = True
12
+ trailing_slash: Literal["always", "never", "preserve"] = "never"
13
+ collapse_duplicate_slashes: bool = True
14
+ strip_tracking_params: bool = True
15
+ allowed_query_params: list[str] = field(default_factory=list)
16
+
17
+ def __post_init__(self) -> None:
18
+ if self.trailing_slash not in {"always", "never", "preserve"}:
19
+ raise ValueError(
20
+ "trailing_slash must be one of: 'always', 'never', 'preserve'"
21
+ )
22
+
23
+ cleaned_params: list[str] = []
24
+ seen: set[str] = set()
25
+ for param in self.allowed_query_params:
26
+ if not isinstance(param, str):
27
+ raise ValueError("allowed_query_params must contain only strings")
28
+ normalized = param.strip()
29
+ if not normalized:
30
+ continue
31
+ if normalized not in seen:
32
+ seen.add(normalized)
33
+ cleaned_params.append(normalized)
34
+ self.allowed_query_params = cleaned_params
35
+
36
+
37
+ @dataclass(slots=True)
38
+ class SEOConfig:
39
+ canonical_host: str
40
+ public_base_url: str
41
+ url_policy: URLPolicy
42
+ default_robots: str = "index,follow"
43
+ default_og_image: str | None = None
44
+ site_name: str | None = None
45
+ title_template: str | None = "{title}"
46
+ search_robots: str = "noindex,follow"
47
+
48
+ def __post_init__(self) -> None:
49
+ self.canonical_host = _validate_canonical_host(self.canonical_host)
50
+ self.public_base_url = _validate_public_base_url(self.public_base_url)
51
+
52
+ if not isinstance(self.url_policy, URLPolicy):
53
+ raise ValueError("url_policy must be a URLPolicy instance")
54
+
55
+ if not _is_nonempty_string(self.default_robots):
56
+ raise ValueError("default_robots must be a non-empty string")
57
+ if not _is_nonempty_string(self.search_robots):
58
+ raise ValueError("search_robots must be a non-empty string")
59
+
60
+ if self.default_og_image is not None and not _is_nonempty_string(
61
+ self.default_og_image
62
+ ):
63
+ raise ValueError("default_og_image must be a non-empty string when set")
64
+
65
+ if self.site_name is not None and not _is_nonempty_string(self.site_name):
66
+ raise ValueError("site_name must be a non-empty string when set")
67
+
68
+ if self.title_template is not None:
69
+ if not _is_nonempty_string(self.title_template):
70
+ raise ValueError("title_template must be a non-empty string when set")
71
+ if "{title}" not in self.title_template:
72
+ raise ValueError("title_template must include '{title}' placeholder")
73
+
74
+
75
+ def _is_nonempty_string(value: object) -> bool:
76
+ return isinstance(value, str) and bool(value.strip())
77
+
78
+
79
+ def _validate_canonical_host(canonical_host: str) -> str:
80
+ if not _is_nonempty_string(canonical_host):
81
+ raise ValueError("canonical_host must be a non-empty string")
82
+
83
+ value = canonical_host.strip().lower()
84
+ if "://" in value or "/" in value or "?" in value or "#" in value:
85
+ raise ValueError("canonical_host must be host-only (no scheme/path/query)")
86
+ return value
87
+
88
+
89
+ def _validate_public_base_url(public_base_url: str) -> str:
90
+ if not _is_nonempty_string(public_base_url):
91
+ raise ValueError("public_base_url must be a non-empty string")
92
+
93
+ value = public_base_url.strip()
94
+ parsed = urlparse(value)
95
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
96
+ raise ValueError("public_base_url must be an absolute http(s) URL")
97
+ return value
seoslug/jsonld.py ADDED
@@ -0,0 +1,13 @@
1
+ """JSON-LD helpers for seoslug."""
2
+
3
+ from copy import deepcopy
4
+
5
+
6
+ def normalize_schema_jsonld(value: dict | list[dict] | None) -> dict | list[dict]:
7
+ if value is None:
8
+ return {}
9
+ if isinstance(value, dict):
10
+ return deepcopy(value)
11
+ if isinstance(value, list) and all(isinstance(item, dict) for item in value):
12
+ return deepcopy(value)
13
+ raise ValueError("schema_jsonld must be dict, list[dict], or None")
@@ -0,0 +1,85 @@
1
+ """URL normalization functions for seoslug."""
2
+
3
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
4
+
5
+ from .config import SEOConfig, URLPolicy
6
+
7
+ _TRACKING_KEYS = {"gclid", "fbclid"}
8
+
9
+
10
+ def _collapse_duplicate_slashes(path: str) -> str:
11
+ out: list[str] = []
12
+ prev_slash = False
13
+ for char in path:
14
+ if char == "/":
15
+ if prev_slash:
16
+ continue
17
+ prev_slash = True
18
+ out.append(char)
19
+ else:
20
+ prev_slash = False
21
+ out.append(char)
22
+ return "".join(out)
23
+
24
+
25
+ def _apply_trailing_slash(path: str, mode: str) -> str:
26
+ if mode == "preserve":
27
+ return path
28
+ if path == "/":
29
+ return path
30
+ if mode == "always":
31
+ return path if path.endswith("/") else path + "/"
32
+ return path.rstrip("/") or "/"
33
+
34
+
35
+ def normalize_path(path: str, policy: URLPolicy) -> str:
36
+ if not isinstance(path, str):
37
+ raise ValueError("path must be a string")
38
+ value = path.strip() or "/"
39
+ if not value.startswith("/"):
40
+ value = "/" + value
41
+ if policy.collapse_duplicate_slashes:
42
+ value = _collapse_duplicate_slashes(value)
43
+ if policy.lowercase_paths:
44
+ value = value.lower()
45
+ value = _apply_trailing_slash(value, policy.trailing_slash)
46
+ return value
47
+
48
+
49
+ def _filter_query(query: str, policy: URLPolicy) -> str:
50
+ pairs = parse_qsl(query, keep_blank_values=True)
51
+ filtered: list[tuple[str, str]] = []
52
+ allowlist = set(policy.allowed_query_params)
53
+ for key, value in pairs:
54
+ k = key.lower()
55
+ if policy.strip_tracking_params and (k.startswith("utm_") or k in _TRACKING_KEYS):
56
+ continue
57
+ if allowlist and key not in allowlist:
58
+ continue
59
+ filtered.append((key, value))
60
+ return urlencode(filtered, doseq=True)
61
+
62
+
63
+ def normalize_public_url(url_or_path: str, config: SEOConfig) -> str:
64
+ if not isinstance(url_or_path, str) or not url_or_path.strip():
65
+ raise ValueError("url_or_path must be a non-empty string")
66
+
67
+ value = url_or_path.strip()
68
+ parsed_input = urlsplit(value)
69
+ parsed_base = urlsplit(config.public_base_url)
70
+
71
+ if parsed_input.scheme and not parsed_input.netloc:
72
+ raise ValueError("Malformed URL input")
73
+
74
+ path = parsed_input.path
75
+ query = parsed_input.query
76
+ if not parsed_input.scheme and not parsed_input.netloc:
77
+ path = value.split("?", 1)[0]
78
+ query = value.split("?", 1)[1] if "?" in value else ""
79
+
80
+ normalized_path = normalize_path(path or "/", config.url_policy)
81
+ normalized_query = _filter_query(query, config.url_policy)
82
+
83
+ scheme = "https" if config.url_policy.enforce_https else (parsed_base.scheme or "https")
84
+ netloc = config.canonical_host
85
+ return urlunsplit((scheme, netloc, normalized_path, normalized_query, ""))
seoslug/schemas.py ADDED
@@ -0,0 +1,87 @@
1
+ """Input schemas for seoslug."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Literal
5
+
6
+
7
+ _ENTITY_TYPES = {"home", "post", "page", "video", "taxonomy", "search", "other"}
8
+
9
+
10
+ def _normalize_optional_string(value: object, field_name: str) -> str | None:
11
+ if value is None:
12
+ return None
13
+ if not isinstance(value, str):
14
+ raise ValueError(f"{field_name} must be a string or None")
15
+ normalized = value.strip()
16
+ return normalized or None
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class SEOEntity:
21
+ entity_type: Literal["home", "post", "page", "video", "taxonomy", "search", "other"]
22
+ slug: str | None = None
23
+ title: str | None = None
24
+ excerpt: str | None = None
25
+ body_html: str | None = None
26
+ status: str | None = None
27
+ featured_image: str | None = None
28
+ published_at: str | None = None
29
+ updated_at: str | None = None
30
+
31
+ def __post_init__(self) -> None:
32
+ if self.entity_type not in _ENTITY_TYPES:
33
+ raise ValueError("entity_type must be one of home/post/page/video/taxonomy/search/other")
34
+
35
+ self.slug = _normalize_optional_string(self.slug, "slug")
36
+ self.title = _normalize_optional_string(self.title, "title")
37
+ self.excerpt = _normalize_optional_string(self.excerpt, "excerpt")
38
+ self.body_html = _normalize_optional_string(self.body_html, "body_html")
39
+ self.status = _normalize_optional_string(self.status, "status")
40
+ self.featured_image = _normalize_optional_string(self.featured_image, "featured_image")
41
+ self.published_at = _normalize_optional_string(self.published_at, "published_at")
42
+ self.updated_at = _normalize_optional_string(self.updated_at, "updated_at")
43
+
44
+
45
+ @dataclass(slots=True)
46
+ class SEOOverrides:
47
+ meta_title: str | None = None
48
+ meta_description: str | None = None
49
+ canonical_url: str | None = None
50
+ robots: str | None = None
51
+ og_title: str | None = None
52
+ og_description: str | None = None
53
+ og_image: str | None = None
54
+ twitter_card: str | None = None
55
+ twitter_title: str | None = None
56
+ twitter_description: str | None = None
57
+ twitter_image: str | None = None
58
+ schema_jsonld: dict | list[dict] | None = None
59
+
60
+ def __post_init__(self) -> None:
61
+ self.meta_title = _normalize_optional_string(self.meta_title, "meta_title")
62
+ self.meta_description = _normalize_optional_string(
63
+ self.meta_description, "meta_description"
64
+ )
65
+ self.canonical_url = _normalize_optional_string(self.canonical_url, "canonical_url")
66
+ self.robots = _normalize_optional_string(self.robots, "robots")
67
+ self.og_title = _normalize_optional_string(self.og_title, "og_title")
68
+ self.og_description = _normalize_optional_string(
69
+ self.og_description, "og_description"
70
+ )
71
+ self.og_image = _normalize_optional_string(self.og_image, "og_image")
72
+ self.twitter_card = _normalize_optional_string(self.twitter_card, "twitter_card")
73
+ self.twitter_title = _normalize_optional_string(self.twitter_title, "twitter_title")
74
+ self.twitter_description = _normalize_optional_string(
75
+ self.twitter_description, "twitter_description"
76
+ )
77
+ self.twitter_image = _normalize_optional_string(self.twitter_image, "twitter_image")
78
+
79
+ if self.schema_jsonld is None:
80
+ return
81
+ if isinstance(self.schema_jsonld, dict):
82
+ return
83
+ if isinstance(self.schema_jsonld, list) and all(
84
+ isinstance(item, dict) for item in self.schema_jsonld
85
+ ):
86
+ return
87
+ raise ValueError("schema_jsonld must be dict, list[dict], or None")
seoslug/text.py ADDED
@@ -0,0 +1,34 @@
1
+ """Text extraction utilities for seoslug."""
2
+
3
+ import re
4
+ from html import unescape
5
+
6
+ _SCRIPT_STYLE_RE = re.compile(
7
+ r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL
8
+ )
9
+ _TAG_RE = re.compile(r"<[^>]+>")
10
+ _WS_RE = re.compile(r"\s+")
11
+
12
+
13
+ def html_to_text(html: str | None) -> str:
14
+ if html is None:
15
+ return ""
16
+ if not isinstance(html, str):
17
+ raise ValueError("html must be a string or None")
18
+ if not html:
19
+ return ""
20
+ text = _SCRIPT_STYLE_RE.sub(" ", html)
21
+ text = _TAG_RE.sub(" ", text)
22
+ text = unescape(text)
23
+ return _WS_RE.sub(" ", text).strip()
24
+
25
+
26
+ def build_description_snippet(body_html: str | None, max_length: int = 160) -> str:
27
+ if not isinstance(max_length, int) or max_length <= 0:
28
+ raise ValueError("max_length must be a positive integer")
29
+ text = html_to_text(body_html)
30
+ if len(text) <= max_length:
31
+ return text
32
+ if max_length <= 3:
33
+ return "." * max_length
34
+ return text[: max_length - 3].rstrip() + "..."
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: seoslug
3
+ Version: 1.0.0
4
+ Summary: Framework-agnostic canonical URL normalization and SEO payload generation
5
+ Author: seoslug contributors
6
+ License-Expression: MIT
7
+ Project-URL: Documentation, https://deepwiki.com/emiliano-gandini-outeda/seoslug/
8
+ Keywords: seo,canonical,urls,metadata,open-graph,twitter-cards
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Dynamic: license-file
22
+
23
+ # seoslug
24
+
25
+ [![DeepWiki](https://img.shields.io/badge/DeepWiki-Documentation-blue)](https://deepwiki.com/emiliano-gandini-outeda/seoslug/)
26
+
27
+ Canonical URL normalization and deterministic SEO payload generation for content platforms.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install seoslug
33
+ ```
34
+
35
+ For local development:
36
+
37
+ ```bash
38
+ pip install -e .
39
+ ```
40
+
41
+ ## Quick usage
42
+
43
+ ```python
44
+ from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
45
+
46
+ config = SEOConfig(
47
+ canonical_host="portal.example.com",
48
+ public_base_url="https://portal.example.com",
49
+ url_policy=URLPolicy(
50
+ enforce_https=True,
51
+ lowercase_paths=True,
52
+ trailing_slash="never",
53
+ collapse_duplicate_slashes=True,
54
+ strip_tracking_params=True,
55
+ allowed_query_params=["page", "q"],
56
+ ),
57
+ default_og_image="https://cdn.example.com/default.jpg",
58
+ )
59
+
60
+ entity = SEOEntity(
61
+ entity_type="post",
62
+ slug="my-post",
63
+ title="My Post",
64
+ excerpt="Example excerpt",
65
+ body_html="<p>Body content</p>",
66
+ status="published",
67
+ featured_image="https://cdn.example.com/post.jpg",
68
+ )
69
+
70
+ payload = build_seo_payload(entity, "/posts/my-post", config)
71
+ ```
72
+
73
+ Full docs, API reference, and usage examples are in `docs/` and published with MkDocs.
74
+
75
+ ## License
76
+
77
+ MIT, see `LICENSE`.
@@ -0,0 +1,12 @@
1
+ seoslug/__init__.py,sha256=W-wZtWeyuRjrllG9i3DJWje6AbNMbld7ILw8E__LXSA,381
2
+ seoslug/builder.py,sha256=3h8X119A1i0p-U1CKq6pRs4XK6jr9RSW81dLXBDHitc,2483
3
+ seoslug/config.py,sha256=aNdazN3bYG-dL8r0ZC0V6BXJWnARCH6SCY6_ndn1bjw,3724
4
+ seoslug/jsonld.py,sha256=nCMPcBfpxiBhIaIdTM6qVRE7vqWtwOut3JvdJdyRC2o,435
5
+ seoslug/normalization.py,sha256=7JB5-Xy3j_aWL4KbVlNA8qiREsEHL8t0rLS5i_boytE,2796
6
+ seoslug/schemas.py,sha256=cWpp6r67y3SqvwTKyVjZrEVKaTgsqX5eS6bmO0Cwh50,3619
7
+ seoslug/text.py,sha256=8oiAZy6hmHYqcxreLRwB4gxXBZJ6Q87T-HtCdIKT88Y,1003
8
+ seoslug-1.0.0.dist-info/licenses/LICENSE,sha256=THz31zp4msobGOcb9cCbBH0SxVMqS7pjNlBIRkNM7Nw,1077
9
+ seoslug-1.0.0.dist-info/METADATA,sha256=Em3husAu55rXe1oO0ehjlT0SPA9blltiXOn-Xbm_L1M,2187
10
+ seoslug-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ seoslug-1.0.0.dist-info/top_level.txt,sha256=Mx56Mld7Hi20eo5D6Ykk2uR2IQnHVwMv2-NZxrNFyd0,8
12
+ seoslug-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 seoslug contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ seoslug