modern-python-guidance 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. modern_python_guidance/__init__.py +3 -0
  2. modern_python_guidance/__main__.py +5 -0
  3. modern_python_guidance/cli.py +202 -0
  4. modern_python_guidance/compat.py +22 -0
  5. modern_python_guidance/frontmatter.py +166 -0
  6. modern_python_guidance/guide_index.py +96 -0
  7. modern_python_guidance/retrieve.py +56 -0
  8. modern_python_guidance/search.py +149 -0
  9. modern_python_guidance/skills/modern-python-guidance/SKILL.md +104 -0
  10. modern_python_guidance/skills/modern-python-guidance/guides/async/async-timeout-context.md +65 -0
  11. modern_python_guidance/skills/modern-python-guidance/guides/async/exception-groups.md +70 -0
  12. modern_python_guidance/skills/modern-python-guidance/guides/async/taskgroup-over-gather.md +63 -0
  13. modern_python_guidance/skills/modern-python-guidance/guides/data-structures/dataclass-modern.md +73 -0
  14. modern_python_guidance/skills/modern-python-guidance/guides/data-structures/dict-merge-operator.md +63 -0
  15. modern_python_guidance/skills/modern-python-guidance/guides/data-structures/match-case-patterns.md +70 -0
  16. modern_python_guidance/skills/modern-python-guidance/guides/fastapi/fastapi-annotated-depends.md +80 -0
  17. modern_python_guidance/skills/modern-python-guidance/guides/fastapi/fastapi-lifespan.md +77 -0
  18. modern_python_guidance/skills/modern-python-guidance/guides/fastapi/fastapi-typed-state.md +76 -0
  19. modern_python_guidance/skills/modern-python-guidance/guides/httpx/httpx-async-client-reuse.md +70 -0
  20. modern_python_guidance/skills/modern-python-guidance/guides/httpx/httpx-streaming.md +66 -0
  21. modern_python_guidance/skills/modern-python-guidance/guides/pydantic/pydantic-v2-config.md +73 -0
  22. modern_python_guidance/skills/modern-python-guidance/guides/pydantic/pydantic-v2-model-api.md +79 -0
  23. modern_python_guidance/skills/modern-python-guidance/guides/pydantic/pydantic-v2-serialization.md +71 -0
  24. modern_python_guidance/skills/modern-python-guidance/guides/pydantic/pydantic-v2-validators.md +83 -0
  25. modern_python_guidance/skills/modern-python-guidance/guides/stdlib/datetime-utc.md +56 -0
  26. modern_python_guidance/skills/modern-python-guidance/guides/stdlib/pathlib-over-os-path.md +68 -0
  27. modern_python_guidance/skills/modern-python-guidance/guides/stdlib/removeprefix-removesuffix.md +64 -0
  28. modern_python_guidance/skills/modern-python-guidance/guides/stdlib/tomllib-builtin.md +59 -0
  29. modern_python_guidance/skills/modern-python-guidance/guides/toolchain/no-pickle.md +79 -0
  30. modern_python_guidance/skills/modern-python-guidance/guides/toolchain/pyproject-toml-over-setup.md +69 -0
  31. modern_python_guidance/skills/modern-python-guidance/guides/toolchain/ruff-over-flake8.md +90 -0
  32. modern_python_guidance/skills/modern-python-guidance/guides/toolchain/safe-subprocess.md +79 -0
  33. modern_python_guidance/skills/modern-python-guidance/guides/toolchain/uv-over-pip.md +68 -0
  34. modern_python_guidance/skills/modern-python-guidance/guides/typing/override-decorator.md +65 -0
  35. modern_python_guidance/skills/modern-python-guidance/guides/typing/paramspec-decorators.md +81 -0
  36. modern_python_guidance/skills/modern-python-guidance/guides/typing/type-parameter-syntax.md +66 -0
  37. modern_python_guidance/skills/modern-python-guidance/guides/typing/typeis-vs-typeguard.md +66 -0
  38. modern_python_guidance/skills/modern-python-guidance/guides/typing/union-syntax.md +59 -0
  39. modern_python_guidance/skills/modern-python-guidance/guides/typing/use-builtin-generics.md +61 -0
  40. modern_python_guidance/version_detect.py +136 -0
  41. modern_python_guidance-0.1.0.dist-info/METADATA +180 -0
  42. modern_python_guidance-0.1.0.dist-info/RECORD +45 -0
  43. modern_python_guidance-0.1.0.dist-info/WHEEL +4 -0
  44. modern_python_guidance-0.1.0.dist-info/entry_points.txt +3 -0
  45. modern_python_guidance-0.1.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,3 @@
1
+ """Modern Python Guidance — version-aware BAD/GOOD pattern guides for AI coding agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ """Allow running as `python -m modern_python_guidance`."""
2
+
3
+ from modern_python_guidance.cli import main
4
+
5
+ main()
@@ -0,0 +1,202 @@
1
+ """CLI entry point for modern-python-guidance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import contextlib
7
+ import json
8
+ import signal
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ from modern_python_guidance import __version__
13
+ from modern_python_guidance.compat import VERSION_RE, version_compatible
14
+ from modern_python_guidance.guide_index import build_index
15
+ from modern_python_guidance.retrieve import retrieve
16
+ from modern_python_guidance.search import search as do_search
17
+ from modern_python_guidance.version_detect import detect_version
18
+
19
+
20
+ def main(argv: list[str] | None = None) -> None:
21
+ with contextlib.suppress(AttributeError, OSError):
22
+ signal.signal(signal.SIGPIPE, signal.SIG_DFL)
23
+
24
+ parser = argparse.ArgumentParser(
25
+ prog="modern-python-guidance",
26
+ description="Version-aware BAD/GOOD pattern guides for modern Python",
27
+ )
28
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
29
+
30
+ subparsers = parser.add_subparsers(dest="command")
31
+
32
+ # search
33
+ p_search = subparsers.add_parser("search", help="Search guides by keyword")
34
+ p_search.add_argument("query", help="Search query")
35
+ p_search.add_argument("--python-version", help="Target Python version (e.g. 3.11)")
36
+ p_search.add_argument("--category", help="Filter by category")
37
+ p_search.add_argument("--limit", type=int, default=10, help="Max results (default: 10)")
38
+ p_search.add_argument(
39
+ "--format", choices=["json", "human"], default=None,
40
+ help="Output format (default: json when piped, human when TTY)",
41
+ )
42
+
43
+ # retrieve
44
+ p_retrieve = subparsers.add_parser("retrieve", help="Retrieve guide(s) by ID")
45
+ p_retrieve.add_argument("ids", help="Comma-separated guide IDs")
46
+ p_retrieve.add_argument("--python-version", help="Target Python version")
47
+ p_retrieve.add_argument(
48
+ "--format", choices=["json", "human"], default=None,
49
+ help="Output format (default: json when piped, human when TTY)",
50
+ )
51
+
52
+ # list
53
+ p_list = subparsers.add_parser("list", help="List available guides")
54
+ p_list.add_argument("--category", help="Filter by category")
55
+ p_list.add_argument("--python-version", help="Filter by Python version")
56
+ p_list.add_argument(
57
+ "--format", choices=["json", "human"], default=None,
58
+ help="Output format (default: json when piped, human when TTY)",
59
+ )
60
+
61
+ # detect-version
62
+ p_detect = subparsers.add_parser("detect-version", help="Detect project Python version")
63
+ p_detect.add_argument("--project-dir", type=Path, help="Project directory (default: cwd)")
64
+
65
+ args = parser.parse_args(argv)
66
+
67
+ if args.command is None:
68
+ parser.print_help()
69
+ sys.exit(2)
70
+
71
+ pv = getattr(args, "python_version", None)
72
+ if pv is not None and not VERSION_RE.match(pv):
73
+ parser.error(f"invalid --python-version format: {pv!r} (expected N.N, e.g. 3.11)")
74
+
75
+ try:
76
+ if args.command == "search":
77
+ _cmd_search(args)
78
+ elif args.command == "retrieve":
79
+ _cmd_retrieve(args)
80
+ elif args.command == "list":
81
+ _cmd_list(args)
82
+ elif args.command == "detect-version":
83
+ _cmd_detect_version(args)
84
+ except BrokenPipeError:
85
+ sys.exit(0)
86
+
87
+
88
+ def _resolve_format(args: argparse.Namespace) -> str:
89
+ if args.format is not None:
90
+ return args.format
91
+ return "human" if sys.stdout.isatty() else "json"
92
+
93
+
94
+ def _cmd_search(args: argparse.Namespace) -> None:
95
+ index = build_index()
96
+ results = do_search(
97
+ index,
98
+ args.query,
99
+ python_version=args.python_version,
100
+ category=args.category,
101
+ limit=args.limit,
102
+ )
103
+
104
+ fmt = _resolve_format(args)
105
+
106
+ if not results:
107
+ if fmt == "human":
108
+ print("No guides found.")
109
+ else:
110
+ print("[]")
111
+ sys.exit(1)
112
+
113
+ if fmt == "json":
114
+ out = [
115
+ {
116
+ "id": r.guide_id,
117
+ "title": r.meta.title,
118
+ "category": r.meta.category,
119
+ "layer": r.meta.layer,
120
+ "score": r.score,
121
+ "token_estimate": r.token_estimate,
122
+ "fuzzy": r.fuzzy,
123
+ }
124
+ for r in results
125
+ ]
126
+ print(json.dumps(out, indent=2, ensure_ascii=False))
127
+ else:
128
+ for r in results:
129
+ fuzzy_marker = " (fuzzy)" if r.fuzzy else ""
130
+ print(f" {r.guide_id:<40} score={r.score:<6.1f} [{r.meta.category}]{fuzzy_marker}")
131
+
132
+
133
+ def _cmd_retrieve(args: argparse.Namespace) -> None:
134
+ index = build_index()
135
+ guide_ids = [gid.strip() for gid in args.ids.split(",")]
136
+ results = retrieve(index, guide_ids, python_version=args.python_version)
137
+
138
+ fmt = _resolve_format(args)
139
+
140
+ if not results:
141
+ if fmt == "human":
142
+ print("No guides found.")
143
+ else:
144
+ print("[]")
145
+ sys.exit(1)
146
+
147
+ if fmt == "json":
148
+ print(json.dumps(results, indent=2, ensure_ascii=False))
149
+ else:
150
+ for r in results:
151
+ match_str = "YES" if r["version_match"] else "NO"
152
+ print(f"--- {r['id']} (version match: {match_str}) ---")
153
+ print(r["content"])
154
+ print()
155
+
156
+
157
+ def _cmd_list(args: argparse.Namespace) -> None:
158
+ index = build_index()
159
+ metas = index.all_meta()
160
+
161
+ if args.category:
162
+ metas = [m for m in metas if m.category == args.category]
163
+
164
+ if args.python_version:
165
+ metas = [m for m in metas if version_compatible(m.python, args.python_version)]
166
+
167
+ metas.sort(key=lambda m: (m.layer, m.category, m.id))
168
+
169
+ fmt = _resolve_format(args)
170
+
171
+ if not metas:
172
+ if fmt == "human":
173
+ print("No guides found.")
174
+ else:
175
+ print("[]")
176
+ sys.exit(1)
177
+
178
+ if fmt == "json":
179
+ out = [
180
+ {
181
+ "id": m.id,
182
+ "title": m.title,
183
+ "category": m.category,
184
+ "layer": m.layer,
185
+ "python": m.python,
186
+ "frequency": m.frequency,
187
+ }
188
+ for m in metas
189
+ ]
190
+ print(json.dumps(out, indent=2, ensure_ascii=False))
191
+ else:
192
+ current_cat = None
193
+ for m in metas:
194
+ if m.category != current_cat:
195
+ current_cat = m.category
196
+ print(f"\n[{current_cat}] (layer {m.layer})")
197
+ print(f" {m.id:<40} {m.title}")
198
+
199
+
200
+ def _cmd_detect_version(args: argparse.Namespace) -> None:
201
+ version = detect_version(project_dir=args.project_dir)
202
+ print(version)
@@ -0,0 +1,22 @@
1
+ """Shared helpers used by search, retrieve, and CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from packaging.specifiers import InvalidSpecifier, SpecifierSet
8
+ from packaging.version import Version
9
+
10
+ VERSION_RE = re.compile(r"^\d+\.\d+$")
11
+
12
+
13
+ def version_compatible(guide_python: str, target: str) -> bool:
14
+ try:
15
+ spec = SpecifierSet(guide_python)
16
+ return Version(f"{target}.0") in spec
17
+ except (InvalidSpecifier, Exception):
18
+ return True
19
+
20
+
21
+ def token_estimate(body: str) -> int:
22
+ return len(body) // 4
@@ -0,0 +1,166 @@
1
+ """Strict YAML-subset frontmatter parser for guide files.
2
+
3
+ Supports only:
4
+ - key: value (string, quoted string, integer)
5
+ - key:\\n - item\\n - item (indented list)
6
+
7
+ Rejects all other YAML constructs with FrontmatterError + line number.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ _KEY_RE = re.compile(r"^([a-z][a-z0-9_-]*)\s*:\s*(.*)")
17
+ _LIST_ITEM_RE = re.compile(r"^ - (.+)")
18
+ _FENCE = "---"
19
+
20
+ REQUIRED_FIELDS = frozenset({"id", "title", "category", "layer", "tags", "python", "frequency"})
21
+ VALID_FREQUENCIES = frozenset({"high", "medium", "low"})
22
+
23
+
24
+ class FrontmatterError(Exception):
25
+ def __init__(self, message: str, line: int | None = None):
26
+ self.line = line
27
+ prefix = f"line {line}: " if line is not None else ""
28
+ super().__init__(f"{prefix}{message}")
29
+
30
+
31
+ @dataclass
32
+ class GuideMeta:
33
+ id: str
34
+ title: str
35
+ category: str
36
+ layer: int
37
+ tags: list[str]
38
+ python: str
39
+ frequency: str
40
+ aliases: list[str] = field(default_factory=list)
41
+ pep: list[int] = field(default_factory=list)
42
+
43
+
44
+ def parse_frontmatter(text: str) -> tuple[GuideMeta, str]:
45
+ lines = text.split("\n")
46
+
47
+ if not lines or lines[0].strip() != _FENCE:
48
+ raise FrontmatterError("file must start with ---", line=1)
49
+
50
+ end_idx = None
51
+ for i in range(1, len(lines)):
52
+ if lines[i].strip() == _FENCE:
53
+ end_idx = i
54
+ break
55
+
56
+ if end_idx is None:
57
+ raise FrontmatterError("closing --- not found")
58
+
59
+ raw = _parse_raw(lines[1:end_idx])
60
+ meta = _build_meta(raw)
61
+ body = "\n".join(lines[end_idx + 1 :]).strip()
62
+ return meta, body
63
+
64
+
65
+ def _parse_raw(lines: list[str]) -> dict[str, Any]:
66
+ data: dict[str, Any] = {}
67
+ current_key: str | None = None
68
+
69
+ for i, line in enumerate(lines, start=2):
70
+ if not line.strip():
71
+ continue
72
+
73
+ list_match = _LIST_ITEM_RE.match(line)
74
+ if list_match:
75
+ if current_key is None:
76
+ raise FrontmatterError("list item without preceding key", line=i)
77
+ if not isinstance(data[current_key], list):
78
+ raise FrontmatterError(
79
+ f"list item for non-list key '{current_key}'", line=i
80
+ )
81
+ data[current_key].append(_parse_scalar(list_match.group(1).strip()))
82
+ continue
83
+
84
+ key_match = _KEY_RE.match(line)
85
+ if key_match:
86
+ key = key_match.group(1)
87
+ value_str = key_match.group(2).strip()
88
+
89
+ if key in data:
90
+ raise FrontmatterError(f"duplicate key '{key}'", line=i)
91
+
92
+ if value_str:
93
+ data[key] = _parse_scalar(value_str)
94
+ current_key = None
95
+ else:
96
+ data[key] = []
97
+ current_key = key
98
+ continue
99
+
100
+ raise FrontmatterError(f"unsupported syntax: {line!r}", line=i)
101
+
102
+ return data
103
+
104
+
105
+ def _parse_scalar(value: str) -> str | int:
106
+ if value.startswith('"') and value.endswith('"'):
107
+ return value[1:-1]
108
+
109
+ if value.startswith("'") and value.endswith("'"):
110
+ return value[1:-1]
111
+
112
+ if value.isascii() and value.isdigit():
113
+ return int(value)
114
+
115
+ return value
116
+
117
+
118
+ def _build_meta(raw: dict[str, Any]) -> GuideMeta:
119
+ missing = REQUIRED_FIELDS - raw.keys()
120
+ if missing:
121
+ raise FrontmatterError(f"missing required fields: {', '.join(sorted(missing))}")
122
+
123
+ for str_field in ("id", "title", "category", "python", "frequency"):
124
+ if isinstance(raw[str_field], list):
125
+ raise FrontmatterError(f"'{str_field}' must be a scalar value, not a list")
126
+
127
+ freq = raw["frequency"]
128
+ if freq not in VALID_FREQUENCIES:
129
+ raise FrontmatterError(f"invalid frequency '{freq}', must be one of {VALID_FREQUENCIES}")
130
+
131
+ layer = raw["layer"]
132
+ if not isinstance(layer, int) or layer not in (1, 2, 3):
133
+ raise FrontmatterError(f"layer must be 1, 2, or 3, got {layer!r}")
134
+
135
+ pep_raw = raw.get("pep")
136
+ if pep_raw is None:
137
+ pep = []
138
+ elif isinstance(pep_raw, int):
139
+ pep = [pep_raw]
140
+ elif isinstance(pep_raw, list):
141
+ try:
142
+ pep = [int(p) for p in pep_raw]
143
+ except (ValueError, TypeError) as e:
144
+ raise FrontmatterError(f"pep list items must be integers: {e}") from e
145
+ else:
146
+ raise FrontmatterError(f"pep must be int or list of ints, got {pep_raw!r}")
147
+
148
+ aliases_raw = raw.get("aliases", [])
149
+ if not isinstance(aliases_raw, list):
150
+ raise FrontmatterError(f"aliases must be a list, got {aliases_raw!r}")
151
+
152
+ tags = raw["tags"]
153
+ if not isinstance(tags, list) or not tags:
154
+ raise FrontmatterError("tags must be a non-empty list")
155
+
156
+ return GuideMeta(
157
+ id=str(raw["id"]),
158
+ title=str(raw["title"]),
159
+ category=str(raw["category"]),
160
+ layer=layer,
161
+ tags=[str(t) for t in tags],
162
+ python=str(raw["python"]),
163
+ frequency=freq,
164
+ aliases=[str(a) for a in aliases_raw],
165
+ pep=pep,
166
+ )
@@ -0,0 +1,96 @@
1
+ """Dynamic guide scanner — builds in-memory index from guides/ directory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib.resources
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ from modern_python_guidance.frontmatter import FrontmatterError, GuideMeta, parse_frontmatter
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class Guide:
17
+ meta: GuideMeta
18
+ body: str
19
+ source_path: str
20
+
21
+
22
+ @dataclass
23
+ class GuideIndex:
24
+ guides: dict[str, Guide] = field(default_factory=dict)
25
+
26
+ def __len__(self) -> int:
27
+ return len(self.guides)
28
+
29
+ def get(self, guide_id: str) -> Guide | None:
30
+ return self.guides.get(guide_id)
31
+
32
+ def all_meta(self) -> list[GuideMeta]:
33
+ return [g.meta for g in self.guides.values()]
34
+
35
+ def categories(self) -> list[str]:
36
+ return sorted({g.meta.category for g in self.guides.values()})
37
+
38
+
39
+ def build_index(guides_dir: Path | None = None) -> GuideIndex:
40
+ if guides_dir is None:
41
+ guides_dir = _find_guides_dir()
42
+
43
+ index = GuideIndex()
44
+
45
+ if not guides_dir.is_dir():
46
+ log.warning("Guides directory not found: %s", guides_dir)
47
+ return index
48
+
49
+ for md_file in sorted(guides_dir.rglob("*.md")):
50
+ try:
51
+ text = md_file.read_text(encoding="utf-8")
52
+ meta, body = parse_frontmatter(text)
53
+
54
+ expected_id = md_file.stem
55
+ if meta.id != expected_id:
56
+ log.warning(
57
+ "Guide ID mismatch: frontmatter says '%s', filename is '%s'"
58
+ " — using frontmatter ID",
59
+ meta.id,
60
+ expected_id,
61
+ )
62
+
63
+ if meta.id in index.guides:
64
+ log.warning("Duplicate guide ID '%s', skipping %s", meta.id, md_file)
65
+ continue
66
+
67
+ index.guides[meta.id] = Guide(
68
+ meta=meta,
69
+ body=body,
70
+ source_path=str(md_file),
71
+ )
72
+ except FrontmatterError as e:
73
+ log.warning("Skipping %s: %s", md_file, e)
74
+ except Exception as e:
75
+ log.warning("Unexpected error loading %s: %s", md_file, e)
76
+
77
+ log.debug("Loaded %d guides from %s", len(index), guides_dir)
78
+ return index
79
+
80
+
81
+ def _find_guides_dir() -> Path:
82
+ try:
83
+ skills_pkg = importlib.resources.files("modern_python_guidance") / "skills"
84
+ guides_path = skills_pkg / "modern-python-guidance" / "guides"
85
+ traversable_path = Path(str(guides_path))
86
+ if traversable_path.is_dir():
87
+ return traversable_path
88
+ except (TypeError, FileNotFoundError):
89
+ pass
90
+
91
+ src_root = Path(__file__).resolve().parent.parent.parent
92
+ dev_path = src_root / "skills" / "modern-python-guidance" / "guides"
93
+ if dev_path.is_dir():
94
+ return dev_path
95
+
96
+ return Path("skills") / "modern-python-guidance" / "guides"
@@ -0,0 +1,56 @@
1
+ """Guide retrieval and JSON rendering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+ from modern_python_guidance import __version__
9
+ from modern_python_guidance.compat import token_estimate, version_compatible
10
+ from modern_python_guidance.guide_index import Guide, GuideIndex
11
+
12
+
13
+ def retrieve(
14
+ index: GuideIndex,
15
+ guide_ids: list[str],
16
+ *,
17
+ python_version: str | None = None,
18
+ ) -> list[dict[str, Any]]:
19
+ results: list[dict[str, Any]] = []
20
+
21
+ for guide_id in guide_ids:
22
+ guide = index.get(guide_id)
23
+ if guide is None:
24
+ continue
25
+ results.append(_render(guide, python_version=python_version))
26
+
27
+ return results
28
+
29
+
30
+ def retrieve_json(
31
+ index: GuideIndex,
32
+ guide_ids: list[str],
33
+ *,
34
+ python_version: str | None = None,
35
+ ) -> str:
36
+ results = retrieve(index, guide_ids, python_version=python_version)
37
+ return json.dumps(results, indent=2, ensure_ascii=False)
38
+
39
+
40
+ def _render(guide: Guide, *, python_version: str | None = None) -> dict[str, Any]:
41
+ ver_match = True
42
+ if python_version:
43
+ ver_match = version_compatible(guide.meta.python, python_version)
44
+
45
+ return {
46
+ "id": guide.meta.id,
47
+ "title": guide.meta.title,
48
+ "category": guide.meta.category,
49
+ "layer": guide.meta.layer,
50
+ "python": guide.meta.python,
51
+ "frequency": guide.meta.frequency,
52
+ "version_match": ver_match,
53
+ "content": guide.body,
54
+ "token_estimate": token_estimate(guide.body),
55
+ "source": f"modern-python-guidance v{__version__}",
56
+ }
@@ -0,0 +1,149 @@
1
+ """Weighted keyword search engine for guides."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import difflib
6
+ from dataclasses import dataclass
7
+
8
+ from modern_python_guidance.compat import token_estimate, version_compatible
9
+ from modern_python_guidance.frontmatter import GuideMeta
10
+ from modern_python_guidance.guide_index import Guide, GuideIndex
11
+
12
+ WEIGHT_TAG = 10
13
+ WEIGHT_ALIAS = 8
14
+ WEIGHT_TITLE = 5
15
+ WEIGHT_CATEGORY = 3
16
+
17
+ FREQ_BOOST = {"high": 1.0, "medium": 0.5, "low": 0.0}
18
+
19
+ MAX_QUERY_LEN = 500
20
+ FUZZY_CUTOFF = 0.4
21
+ FUZZY_MAX = 3
22
+
23
+
24
+ @dataclass
25
+ class SearchResult:
26
+ guide_id: str
27
+ score: float
28
+ meta: GuideMeta
29
+ token_estimate: int
30
+ fuzzy: bool = False
31
+
32
+
33
+ def search(
34
+ index: GuideIndex,
35
+ query: str,
36
+ *,
37
+ python_version: str | None = None,
38
+ category: str | None = None,
39
+ limit: int = 10,
40
+ ) -> list[SearchResult]:
41
+ query = query[:MAX_QUERY_LEN].lower()
42
+ tokens = query.split()
43
+
44
+ if not tokens:
45
+ return []
46
+
47
+ results: list[SearchResult] = []
48
+
49
+ for guide_id, guide in index.guides.items():
50
+ meta = guide.meta
51
+
52
+ if category and meta.category != category:
53
+ continue
54
+
55
+ if python_version and not version_compatible(meta.python, python_version):
56
+ continue
57
+
58
+ score = _score(meta, tokens)
59
+
60
+ if score > 0:
61
+ score += FREQ_BOOST.get(meta.frequency, 0.0)
62
+ results.append(SearchResult(
63
+ guide_id=guide_id,
64
+ score=score,
65
+ meta=meta,
66
+ token_estimate=token_estimate(guide.body),
67
+ ))
68
+
69
+ results.sort(key=lambda r: (-r.score, r.guide_id))
70
+
71
+ if not results:
72
+ return _fuzzy_fallback(
73
+ index, query, python_version=python_version, category=category, limit=limit,
74
+ )
75
+
76
+ return results[:limit]
77
+
78
+
79
+ def _score(meta: GuideMeta, tokens: list[str]) -> float:
80
+ score = 0.0
81
+ tags_lower = [t.lower() for t in meta.tags]
82
+ aliases_lower = [a.lower() for a in meta.aliases]
83
+ title_words = meta.title.lower().split()
84
+
85
+ for token in tokens:
86
+ if token in tags_lower:
87
+ score += WEIGHT_TAG
88
+ if token in aliases_lower:
89
+ score += WEIGHT_ALIAS
90
+ if any(token in alias for alias in aliases_lower) and token not in aliases_lower:
91
+ score += WEIGHT_ALIAS * 0.5
92
+ if token in title_words:
93
+ score += WEIGHT_TITLE
94
+ if token == meta.category.lower():
95
+ score += WEIGHT_CATEGORY
96
+
97
+ return score
98
+
99
+
100
+ def _fuzzy_fallback(
101
+ index: GuideIndex,
102
+ query: str,
103
+ *,
104
+ python_version: str | None = None,
105
+ category: str | None = None,
106
+ limit: int = FUZZY_MAX,
107
+ ) -> list[SearchResult]:
108
+ candidates: dict[str, Guide] = {}
109
+ for guide_id, guide in index.guides.items():
110
+ if category and guide.meta.category != category:
111
+ continue
112
+ if python_version and not version_compatible(guide.meta.python, python_version):
113
+ continue
114
+ candidates[guide_id] = guide
115
+
116
+ if not candidates:
117
+ return []
118
+
119
+ match_pool: list[str] = []
120
+ pool_to_guides: dict[str, list[str]] = {}
121
+
122
+ for guide_id, guide in candidates.items():
123
+ for term in [guide_id, guide.meta.title.lower()] + [t.lower() for t in guide.meta.tags]:
124
+ if term not in pool_to_guides:
125
+ match_pool.append(term)
126
+ pool_to_guides[term] = []
127
+ pool_to_guides[term].append(guide_id)
128
+
129
+ matches = difflib.get_close_matches(query, match_pool, n=FUZZY_MAX * 2, cutoff=FUZZY_CUTOFF)
130
+
131
+ seen: set[str] = set()
132
+ results: list[SearchResult] = []
133
+ for match in matches:
134
+ ratio = difflib.SequenceMatcher(None, query, match).ratio()
135
+ for guide_id in pool_to_guides[match]:
136
+ if guide_id in seen:
137
+ continue
138
+ seen.add(guide_id)
139
+ guide = candidates[guide_id]
140
+ results.append(SearchResult(
141
+ guide_id=guide_id,
142
+ score=round(ratio, 3),
143
+ meta=guide.meta,
144
+ token_estimate=token_estimate(guide.body),
145
+ fuzzy=True,
146
+ ))
147
+
148
+ results.sort(key=lambda r: (-r.score, r.guide_id))
149
+ return results[:min(limit, FUZZY_MAX)]